271 files changed, 17594 insertions, 13768 deletions
diff --git a/arch/um/.gitignore b/arch/um/.gitignore
index a73d3a1cc746..d69ea5b562ce 100644
--- a/arch/um/.gitignore
+++ b/arch/um/.gitignore
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
 kernel/config.c
 kernel/config.tmp
 kernel/vmlinux.lds
+kernel/capflags.c
diff --git a/arch/um/Kbuild b/arch/um/Kbuild
new file mode 100644
index 000000000000..6cf0c1e5927b
--- /dev/null
+++ b/arch/um/Kbuild
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-y += kernel/ drivers/ os-Linux/
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
new file mode 100644
index 000000000000..8415d39b0d43
--- /dev/null
+++ b/arch/um/Kconfig
@@ -0,0 +1,303 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "UML-specific options"
+
+config UML
+	bool
+	default y
+	select ARCH_DISABLE_KASAN_INLINE if STATIC_LINK
+	select ARCH_NEEDS_DEFER_KASAN if STATIC_LINK
+	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	select ARCH_HAS_CACHE_LINE_SIZE
+	select ARCH_HAS_CPU_FINALIZE_INIT
+	select ARCH_HAS_FORTIFY_SOURCE
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_KCOV
+	select ARCH_HAS_STRNCPY_FROM_USER
+	select ARCH_HAS_STRNLEN_USER
+	select ARCH_HAS_STRICT_KERNEL_RWX
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_KASAN if X86_64
+	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
+	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ASM_MODVERSIONS
+	select HAVE_UID16
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DEBUG_BUGVERBOSE
+	select HAVE_PAGE_SIZE_4KB
+	select NO_DMA if !UML_DMA_EMULATION
+	select OF_EARLY_FLATTREE if OF
+	select GENERIC_IRQ_SHOW
+	select GENERIC_CPU_DEVICES
+	select GENERIC_SMP_IDLE_THREAD
+	select HAVE_GCC_PLUGINS
+	select ARCH_SUPPORTS_LTO_CLANG
+	select ARCH_SUPPORTS_LTO_CLANG_THIN
+	select TRACE_IRQFLAGS_SUPPORT
+	select TTY # Needed for line.c
+	select HAVE_ARCH_VMAP_STACK
+	select HAVE_RUST
+	select ARCH_HAS_UBSAN
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_SYSCALL_TRACEPOINTS
+	select THREAD_INFO_IN_TASK
+	select SPARSE_IRQ
+
+config MMU
+	bool
+	default y
+
+config UML_DMA_EMULATION
+	bool
+
+config NO_IOMEM
+	bool "disable IOMEM" if EXPERT
+	depends on !INDIRECT_IOMEM
+	default y
+
+config UML_IOMEM_EMULATION
+	bool
+	select INDIRECT_IOMEM
+	select GENERIC_PCI_IOMAP
+
+config ISA
+	bool
+
+config SBUS
+	bool
+
+config LOCKDEP_SUPPORT
+	bool
+	default y
+
+config STACKTRACE_SUPPORT
+	bool
+	default y
+	select STACKTRACE
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config HZ
+	int
+	default 100
+
+config UML_SUBARCH_SUPPORTS_SMP
+	bool
+
+config SMP
+	bool "Symmetric multi-processing support"
+	default n
+	depends on UML_SUBARCH_SUPPORTS_SMP
+	help
+	  This option enables UML SMP support.
+
+	  With this enabled, users can tell UML to start multiple virtual
+	  processors. Each virtual processor is represented as a separate
+	  host thread.
+
+	  In UML, kthreads and normal threads (when running in kernel mode)
+	  can be scheduled and executed simultaneously on different virtual
+	  processors. However, the userspace code of normal threads still
+	  runs within their respective single-threaded stubs.
+
+	  That is, SMP support is available both within the kernel and
+	  across different processes, but remains limited within threads
+	  of the same process in userspace.
+
+config NR_CPUS_RANGE_BEGIN
+	int
+	default 1 if !SMP
+	default 2
+
+config NR_CPUS_RANGE_END
+	int
+	default 1 if !SMP
+	default 64
+
+config NR_CPUS_DEFAULT
+	int
+	default 1 if !SMP
+	default 2
+
+config NR_CPUS
+	int "Maximum number of CPUs" if SMP
+	range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+	default NR_CPUS_DEFAULT
+
+source "arch/$(HEADER_ARCH)/um/Kconfig"
+
+config MAY_HAVE_RUNTIME_DEPS
+	bool
+
+config STATIC_LINK
+	bool "Force a static link"
+	depends on !MAY_HAVE_RUNTIME_DEPS
+	help
+	  This option gives you the ability to force a static link of UML.
+	  Normally, UML is linked as a shared binary.  This is inconvenient for
+	  use in a chroot jail.  So, if you intend to run UML inside a chroot,
+	  you probably want to say Y here.
+	  Additionally, this option enables using higher memory spaces (up to
+	  2.75G) for UML.
+
+	  NOTE: This option is incompatible with some networking features which
+	  depend on features that require being dynamically loaded (like NSS).
+
+config LD_SCRIPT_STATIC
+	bool
+	default y
+	depends on STATIC_LINK
+
+config LD_SCRIPT_DYN
+	bool
+	default y
+	depends on !LD_SCRIPT_STATIC
+
+config LD_SCRIPT_DYN_RPATH
+	bool "set rpath in the binary" if EXPERT
+	default y
+	depends on LD_SCRIPT_DYN
+	help
+	  Add /lib (and /lib64 for 64-bit) to the linux binary's rpath
+	  explicitly.
+
+	  You may need to turn this off if compiling for nix systems
+	  that have their libraries in random /nix directories and
+	  might otherwise unexpected use libraries from /lib or /lib64
+	  instead of the desired ones.
+
+config HOSTFS
+	tristate "Host filesystem"
+	help
+	  While the User-Mode Linux port uses its own root file system for
+	  booting and normal file access, this module lets the UML user
+	  access files stored on the host.  It does not require any
+	  network connection between the Host and UML.  An example use of
+	  this might be:
+
+	  mount none /tmp/fromhost -t hostfs -o /tmp/umlshare
+
+	  where /tmp/fromhost is an empty directory inside UML and
+	  /tmp/umlshare is a directory on the host with files the UML user
+	  wishes to access.
+
+	  For more information, see
+	  <http://user-mode-linux.sourceforge.net/hostfs.html>.
+
+	  If you'd like to be able to work with files stored on the host,
+	  say Y or M here; otherwise say N.
+
+config MCONSOLE
+	bool "Management console"
+	depends on PROC_FS
+	default y
+	help
+	  The user mode linux management console is a low-level interface to
+	  the kernel, somewhat like the i386 SysRq interface.  Since there is
+	  a full-blown operating system running under every user mode linux
+	  instance, there is much greater flexibility possible than with the
+	  SysRq mechanism.
+
+	  If you answer 'Y' to this option, to use this feature, you need the
+	  mconsole client (called uml_mconsole) which is present in CVS in
+	  2.4.5-9um and later (path /tools/mconsole), and is also in the
+	  distribution RPM package in 2.4.6 and later.
+
+	  It is safe to say 'Y' here.
+
+config MAGIC_SYSRQ
+	bool "Magic SysRq key"
+	depends on MCONSOLE
+	help
+	  If you say Y here, you will have some control over the system even
+	  if the system crashes for example during kernel debugging (e.g., you
+	  will be able to flush the buffer cache to disk, reboot the system
+	  immediately or dump some status information). A key for each of the
+	  possible requests is provided.
+
+	  This is the feature normally accomplished by pressing a key
+	  while holding SysRq (Alt+PrintScreen).
+
+	  On UML, this is accomplished by sending a "sysrq" command with
+	  mconsole, followed by the letter for the requested command.
+
+	  The keys are documented in <file:Documentation/admin-guide/sysrq.rst>. Don't say Y
+	  unless you really know what this hack does.
+
+config KERNEL_STACK_ORDER
+	int "Kernel stack size order"
+	default 2 if 64BIT
+	range 2 10 if 64BIT
+	default 1 if !64BIT
+	help
+	  This option determines the size of UML kernel stacks.  They will
+	  be 1 << order pages.  The default is OK unless you're running Valgrind
+	  on UML, in which case, set this to 3.
+	  It is possible to reduce the stack to 1 for 64BIT and 0 for 32BIT on
+	  older (pre-2017) CPUs. It is not recommended on newer CPUs due to the
+	  increase in the size of the state which needs to be saved when handling
+	  signals.
+
+config PGTABLE_LEVELS
+	int
+	default 4 if 64BIT
+	default 2 if !64BIT
+
+config UML_TIME_TRAVEL_SUPPORT
+	bool
+	prompt "Support time-travel mode (e.g. for test execution)"
+	# inf-cpu mode is incompatible with the benchmarking
+	depends on !RAID6_PQ_BENCHMARK
+	depends on !SMP
+	help
+	  Enable this option to support time travel inside the UML instance.
+
+	  After enabling this option, two modes are accessible at runtime
+	  (selected by the kernel command line), see the kernel's command-
+	  line help for more details.
+
+	  It is safe to say Y, but you probably don't need this.
+
+config UML_MAX_USERSPACE_ITERATIONS
+	int
+	prompt "Maximum number of unscheduled userspace iterations"
+	default 10000
+	depends on UML_TIME_TRAVEL_SUPPORT
+	help
+	  In UML inf-cpu and ext time-travel mode userspace can run without being
+	  interrupted. This will eventually overwhelm the kernel and create OOM
+	  situations (mainly RCU not running). This setting specifies the number
+	  of kernel/userspace switches (minor/major page fault, signal or syscall)
+	  for the same userspace thread before the sched_clock is advanced by a
+	  jiffie to trigger scheduling.
+
+	  Setting it to zero disables the feature.
+
+config KASAN_SHADOW_OFFSET
+	hex
+	depends on KASAN
+	default 0x100000000000
+	help
+	  This is the offset at which the ~16TB of shadow memory is
+	  mapped and used by KASAN for memory debugging. This can be any
+	  address that has at least KASAN_SHADOW_SIZE (total address space divided
+	  by 8) amount of space so that the KASAN shadow memory does not conflict
+	  with anything. The default is 0x100000000000, which works even if mem is
+	  set to a large value. On low-memory systems, try 0x7fff8000, as it fits
+	  into the immediate of most instructions, improving performance.
+
+endmenu
+
+source "arch/um/drivers/Kconfig"
+
+config ARCH_SUSPEND_POSSIBLE
+	def_bool y
+	depends on !SMP
+
+menu "Power management options"
+
+source "kernel/power/Kconfig"
+
+endmenu
diff --git a/arch/um/Kconfig.char b/arch/um/Kconfig.char
deleted file mode 100644
index b9d7c4276682..000000000000
--- a/arch/um/Kconfig.char
+++ /dev/null
@@ -1,127 +0,0 @@
-menu "UML Character Devices"
-
-config STDERR_CONSOLE
-	bool "stderr console"
-	default y
-	help
-	  console driver which dumps all printk messages to stderr.
-
-config STDIO_CONSOLE
-	bool
-	default y
-
-config SSL
-	bool "Virtual serial line"
-	help
-          The User-Mode Linux environment allows you to create virtual serial
-          lines on the UML that are usually made to show up on the host as
-          ttys or ptys.
-
-          See <http://user-mode-linux.sourceforge.net/old/input.html> for more
-          information and command line examples of how to use this facility.
-
-          Unless you have a specific reason for disabling this, say Y.
-
-config NULL_CHAN
-	bool "null channel support"
-	help
-          This option enables support for attaching UML consoles and serial
-          lines to a device similar to /dev/null.  Data written to it disappears
-          and there is never any data to be read.
-
-config PORT_CHAN
-	bool "port channel support"
-	help
-          This option enables support for attaching UML consoles and serial
-          lines to host portals.  They may be accessed with 'telnet <host>
-          <port number>'.  Any number of consoles and serial lines may be
-          attached to a single portal, although what UML device you get when
-          you telnet to that portal will be unpredictable.
-          It is safe to say 'Y' here.
-
-config PTY_CHAN
-	bool "pty channel support"
-	help
-          This option enables support for attaching UML consoles and serial
-          lines to host pseudo-terminals.  Access to both traditional
-          pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled
-          with this option.  The assignment of UML devices to host devices
-          will be announced in the kernel message log.
-          It is safe to say 'Y' here.
-
-config TTY_CHAN
-	bool "tty channel support"
-	help
-          This option enables support for attaching UML consoles and serial
-          lines to host terminals.  Access to both virtual consoles
-          (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and
-          /dev/pts/*) are controlled by this option.
-          It is safe to say 'Y' here.
-
-config XTERM_CHAN
-	bool "xterm channel support"
-	help
-          This option enables support for attaching UML consoles and serial
-          lines to xterms.  Each UML device so assigned will be brought up in
-          its own xterm.
-          It is safe to say 'Y' here.
-
-config NOCONFIG_CHAN
-	bool
-	default !(XTERM_CHAN && TTY_CHAN && PTY_CHAN && PORT_CHAN && NULL_CHAN)
-
-config CON_ZERO_CHAN
-	string "Default main console channel initialization"
-	default "fd:0,fd:1"
-	help
-          This is the string describing the channel to which the main console
-          will be attached by default.  This value can be overridden from the
-          command line.  The default value is "fd:0,fd:1", which attaches the
-          main console to stdin and stdout.
-          It is safe to leave this unchanged.
-
-config CON_CHAN
-	string "Default console channel initialization"
-	default "xterm"
-	help
-          This is the string describing the channel to which all consoles
-          except the main console will be attached by default.  This value can
-          be overridden from the command line.  The default value is "xterm",
-          which brings them up in xterms.
-          It is safe to leave this unchanged, although you may wish to change
-          this if you expect the UML that you build to be run in environments
-          which don't have X or xterm available.
-
-config SSL_CHAN
-	string "Default serial line channel initialization"
-	default "pty"
-	help
-          This is the string describing the channel to which the serial lines
-          will be attached by default.  This value can be overridden from the
-          command line.  The default value is "pty", which attaches them to
-          traditional pseudo-terminals.
-          It is safe to leave this unchanged, although you may wish to change
-          this if you expect the UML that you build to be run in environments
-          which don't have a set of /dev/pty* devices.
-
-config UML_SOUND
-	tristate "Sound support"
-	help
-          This option enables UML sound support.  If enabled, it will pull in
-          soundcore and the UML hostaudio relay, which acts as a intermediary
-          between the host's dsp and mixer devices and the UML sound system.
-          It is safe to say 'Y' here.
-
-config SOUND
-	tristate
-	default UML_SOUND
-
-config SOUND_OSS_CORE
-	bool
-	default UML_SOUND
-
-config HOSTAUDIO
-	tristate
-	default UML_SOUND
-
-endmenu
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
deleted file mode 100644
index bceee6623b00..000000000000
--- a/arch/um/Kconfig.common
+++ /dev/null
@@ -1,64 +0,0 @@
-config DEFCONFIG_LIST
-	string
-	option defconfig_list
-	default "arch/$ARCH/defconfig"
-
-config UML
-	bool
-	default y
-	select HAVE_GENERIC_HARDIRQS
-	select HAVE_UID16
-	select GENERIC_IRQ_SHOW
-	select GENERIC_CPU_DEVICES
-	select GENERIC_IO
-	select GENERIC_CLOCKEVENTS
-	select TTY # Needed for line.c
-
-config MMU
-	bool
-	default y
-
-config NO_IOMEM
-	def_bool y
-
-config ISA
-	bool
-
-config SBUS
-	bool
-
-config PCI
-	bool
-
-config PCMCIA
-	bool
-
-# Yet to do!
-config TRACE_IRQFLAGS_SUPPORT
-	bool
-	default n
-
-config LOCKDEP_SUPPORT
-	bool
-	default y
-
-config STACKTRACE_SUPPORT
-	bool
-	default n
-
-config GENERIC_CALIBRATE_DELAY
-	bool
-	default y
-
-config GENERIC_BUG
-	bool
-	default y
-	depends on BUG
-
-config HZ
-	int
-	default 100
-
-config SUBARCH
-	string
-	option env="SUBARCH"
diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug
index 68205fd3b08c..1dfb2959c73b 100644
--- a/arch/um/Kconfig.debug
+++ b/arch/um/Kconfig.debug
@@ -1,6 +1,4 @@
-menu "Kernel hacking"
-
-source "lib/Kconfig.debug"
+# SPDX-License-Identifier: GPL-2.0
 
 config GPROF
 	bool "Enable gprof support"
@@ -18,6 +16,8 @@ config GPROF
 config GCOV
 	bool "Enable gcov support"
 	depends on DEBUG_INFO
+	depends on !KCOV
+	depends on !MODULES
 	help
 	  This option allows developers to retrieve coverage data from a UML
 	  session.
@@ -31,10 +31,8 @@ config GCOV
 config EARLY_PRINTK
 	bool "Early printk"
 	default y
-	---help---
+	help
 	  Write kernel log output directly to stdout.
 
 	  This is useful for kernel debugging when your machine crashes very
 	  early before the console code is initialized.
-
-endmenu
diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net
deleted file mode 100644
index 820a56f00332..000000000000
--- a/arch/um/Kconfig.net
+++ /dev/null
@@ -1,202 +0,0 @@
-
-menu "UML Network Devices"
-	depends on NET
-
-# UML virtual driver
-config UML_NET
-	bool "Virtual network device"
-	help
-        While the User-Mode port cannot directly talk to any physical
-        hardware devices, this choice and the following transport options
-        provide one or more virtual network devices through which the UML
-        kernels can talk to each other, the host, and with the host's help,
-        machines on the outside world.
-
-        For more information, including explanations of the networking and
-        sample configurations, see
-        <http://user-mode-linux.sourceforge.net/old/networking.html>.
-
-        If you'd like to be able to enable networking in the User-Mode
-        linux environment, say Y; otherwise say N.  Note that you must
-        enable at least one of the following transport options to actually
-        make use of UML networking.
-
-config UML_NET_ETHERTAP
-	bool "Ethertap transport"
-	depends on UML_NET
-	help
-        The Ethertap User-Mode Linux network transport allows a single
-        running UML to exchange packets with its host over one of the
-        host's Ethertap devices, such as /dev/tap0.  Additional running
-        UMLs can use additional Ethertap devices, one per running UML.
-        While the UML believes it's on a (multi-device, broadcast) virtual
-        Ethernet network, it's in fact communicating over a point-to-point
-        link with the host.
-
-        To use this, your host kernel must have support for Ethertap
-        devices.  Also, if your host kernel is 2.4.x, it must have
-        CONFIG_NETLINK_DEV configured as Y or M.
-
-        For more information, see
-        <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
-        has examples of the UML command line to use to enable Ethertap
-        networking.
-
-        If you'd like to set up an IP network with the host and/or the
-        outside world, say Y to this, the Daemon Transport and/or the
-        Slip Transport.  You'll need at least one of them, but may choose
-        more than one without conflict.  If you don't need UML networking,
-        say N.
-
-config UML_NET_TUNTAP
-	bool "TUN/TAP transport"
-	depends on UML_NET
-	help
-        The UML TUN/TAP network transport allows a UML instance to exchange
-        packets with the host over a TUN/TAP device.  This option will only
-        work with a 2.4 host, unless you've applied the TUN/TAP patch to
-        your 2.2 host kernel.
-
-        To use this transport, your host kernel must have support for TUN/TAP
-        devices, either built-in or as a module.
-
-config UML_NET_SLIP
-	bool "SLIP transport"
-	depends on UML_NET
-	help
-        The slip User-Mode Linux network transport allows a running UML to
-        network with its host over a point-to-point link.  Unlike Ethertap,
-        which can carry any Ethernet frame (and hence even non-IP packets),
-        the slip transport can only carry IP packets.
-
-        To use this, your host must support slip devices.
-
-        For more information, see
-        <http://user-mode-linux.sourceforge.net/old/networking.html>.
-        has examples of the UML command line to use to enable slip
-        networking, and details of a few quirks with it.
-
-        The Ethertap Transport is preferred over slip because of its
-        limitations.  If you prefer slip, however, say Y here.  Otherwise
-        choose the Multicast transport (to network multiple UMLs on
-        multiple hosts), Ethertap (to network with the host and the
-        outside world), and/or the Daemon transport (to network multiple
-        UMLs on a single host).  You may choose more than one without
-        conflict.  If you don't need UML networking, say N.
-
-config UML_NET_DAEMON
-	bool "Daemon transport"
-	depends on UML_NET
-	help
-        This User-Mode Linux network transport allows one or more running
-        UMLs on a single host to communicate with each other, but not to
-        the host.
-
-        To use this form of networking, you'll need to run the UML
-        networking daemon on the host.
-
-        For more information, see
-        <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
-        has examples of the UML command line to use to enable Daemon
-        networking.
-
-        If you'd like to set up a network with other UMLs on a single host,
-        say Y.  If you need a network between UMLs on multiple physical
-        hosts, choose the Multicast Transport.  To set up a network with
-        the host and/or other IP machines, say Y to the Ethertap or Slip
-        transports.  You'll need at least one of them, but may choose
-        more than one without conflict.  If you don't need UML networking,
-        say N.
-
-config UML_NET_VDE
-	bool "VDE transport"
-	depends on UML_NET
-	help
-	This User-Mode Linux network transport allows one or more running
-	UMLs on a single host to communicate with each other and also
-	with the rest of the world using Virtual Distributed Ethernet,
-	an improved fork of uml_switch.
-
-	You must have libvdeplug installed in order to build the vde
-	transport into UML.
-
-	To use this form of networking, you will need to run vde_switch
-	on the host.
-
-	For more information, see <http://wiki.virtualsquare.org/>
-	That site has a good overview of what VDE is and also examples
-	of the UML command line to use to enable VDE networking.
-
-	If you need UML networking with VDE,
-	say Y.
-
-config UML_NET_MCAST
-	bool "Multicast transport"
-	depends on UML_NET
-	help
-        This Multicast User-Mode Linux network transport allows multiple
-        UMLs (even ones running on different host machines!) to talk to
-        each other over a virtual ethernet network.  However, it requires
-        at least one UML with one of the other transports to act as a
-        bridge if any of them need to be able to talk to their hosts or any
-        other IP machines.
-
-        To use this, your host kernel(s) must support IP Multicasting.
-
-        For more information, see
-        <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
-        has examples of the UML command line to use to enable Multicast
-        networking, and notes about the security of this approach.
-
-        If you need UMLs on multiple physical hosts to communicate as if
-        they shared an Ethernet network, say Y.  If you need to communicate
-        with other IP machines, make sure you select one of the other
-        transports (possibly in addition to Multicast; they're not
-        exclusive).  If you don't need to network UMLs say N to each of
-        the transports.
-
-config UML_NET_PCAP
-	bool "pcap transport"
-	depends on UML_NET
-	help
-	The pcap transport makes a pcap packet stream on the host look
-	like an ethernet device inside UML.  This is useful for making
-	UML act as a network monitor for the host.  You must have libcap
-	installed in order to build the pcap transport into UML.
-
-        For more information, see
-        <http://user-mode-linux.sourceforge.net/old/networking.html>  That site
-        has examples of the UML command line to use to enable this option.
-
-	If you intend to use UML as a network monitor for the host, say
-	Y here.  Otherwise, say N.
-
-config UML_NET_SLIRP
-	bool "SLiRP transport"
-	depends on UML_NET
-	help
-        The SLiRP User-Mode Linux network transport allows a running UML
-        to network by invoking a program that can handle SLIP encapsulated
-        packets.  This is commonly (but not limited to) the application
-        known as SLiRP, a program that can re-socket IP packets back onto
-        the host on which it is run.  Only IP packets are supported,
-        unlike other network transports that can handle all Ethernet
-        frames.  In general, slirp allows the UML the same IP connectivity
-        to the outside world that the host user is permitted, and unlike
-        other transports, SLiRP works without the need of root level
-        privleges, setuid binaries, or SLIP devices on the host.  This
-        also means not every type of connection is possible, but most
-        situations can be accommodated with carefully crafted slirp
-        commands that can be passed along as part of the network device's
-        setup string.  The effect of this transport on the UML is similar
-        that of a host behind a firewall that masquerades all network
-        connections passing through it (but is less secure).
-
-        To use this you should first have slirp compiled somewhere
-        accessible on the host, and have read its documentation.  If you
-        don't need UML networking, say N.
-
-        Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
-
-endmenu
-
diff --git a/arch/um/Kconfig.rest b/arch/um/Kconfig.rest
deleted file mode 100644
index 567eb5fc21df..000000000000
--- a/arch/um/Kconfig.rest
+++ /dev/null
@@ -1,21 +0,0 @@
-source "init/Kconfig"
-
-source "kernel/Kconfig.freezer"
-
-source "arch/um/Kconfig.char"
-
-source "drivers/Kconfig"
-
-source "net/Kconfig"
-
-source "arch/um/Kconfig.net"
-
-source "fs/Kconfig"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
-
-source "arch/um/Kconfig.debug"
diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um
deleted file mode 100644
index a7520c90f62d..000000000000
--- a/arch/um/Kconfig.um
+++ /dev/null
@@ -1,157 +0,0 @@
-config STATIC_LINK
-	bool "Force a static link"
-	default n
-	help
-	  This option gives you the ability to force a static link of UML.
-	  Normally, UML is linked as a shared binary.  This is inconvenient for
-	  use in a chroot jail.  So, if you intend to run UML inside a chroot,
-	  you probably want to say Y here.
-	  Additionally, this option enables using higher memory spaces (up to
-	  2.75G) for UML.
-
-source "mm/Kconfig"
-
-config LD_SCRIPT_STATIC
-	bool
-	default y
-	depends on STATIC_LINK
-
-config LD_SCRIPT_DYN
-	bool
-	default y
-	depends on !LD_SCRIPT_STATIC
-
-source "fs/Kconfig.binfmt"
-
-config HOSTFS
-	tristate "Host filesystem"
-	help
-          While the User-Mode Linux port uses its own root file system for
-          booting and normal file access, this module lets the UML user
-          access files stored on the host.  It does not require any
-          network connection between the Host and UML.  An example use of
-          this might be:
-
-          mount none /tmp/fromhost -t hostfs -o /tmp/umlshare
-
-          where /tmp/fromhost is an empty directory inside UML and
-          /tmp/umlshare is a directory on the host with files the UML user
-          wishes to access.
-
-          For more information, see
-          <http://user-mode-linux.sourceforge.net/hostfs.html>.
-
-          If you'd like to be able to work with files stored on the host,
-          say Y or M here; otherwise say N.
-
-config HPPFS
-	tristate "HoneyPot ProcFS"
-	depends on PROC_FS
-	help
-	  hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
-	  entries to be overridden, removed, or fabricated from the host.
-	  Its purpose is to allow a UML to appear to be a physical machine
-	  by removing or changing anything in /proc which gives away the
-	  identity of a UML.
-
-	  See <http://user-mode-linux.sf.net/old/hppfs.html> for more information.
-
-	  You only need this if you are setting up a UML honeypot.  Otherwise,
-	  it is safe to say 'N' here.
-
-config MCONSOLE
-	bool "Management console"
-	default y
-	help
-          The user mode linux management console is a low-level interface to
-          the kernel, somewhat like the i386 SysRq interface.  Since there is
-          a full-blown operating system running under every user mode linux
-          instance, there is much greater flexibility possible than with the
-          SysRq mechanism.
-
-          If you answer 'Y' to this option, to use this feature, you need the
-          mconsole client (called uml_mconsole) which is present in CVS in
-          2.4.5-9um and later (path /tools/mconsole), and is also in the
-          distribution RPM package in 2.4.6 and later.
-
-          It is safe to say 'Y' here.
-
-config MAGIC_SYSRQ
-	bool "Magic SysRq key"
-	depends on MCONSOLE
-	help
-	  If you say Y here, you will have some control over the system even
-	  if the system crashes for example during kernel debugging (e.g., you
-	  will be able to flush the buffer cache to disk, reboot the system
-	  immediately or dump some status information). A key for each of the
-	  possible requests is provided.
-
-	  This is the feature normally accomplished by pressing a key
-	  while holding SysRq (Alt+PrintScreen).
-
-	  On UML, this is accomplished by sending a "sysrq" command with
-	  mconsole, followed by the letter for the requested command.
-
-	  The keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
-	  unless you really know what this hack does.
-
-config SMP
-	bool "Symmetric multi-processing support"
-	default n
-	depends on BROKEN
-	help
-	  This option enables UML SMP support.
-	  It is NOT related to having a real SMP box. Not directly, at least.
-
-	  UML implements virtual SMP by allowing as many processes to run
-	  simultaneously on the host as there are virtual processors configured.
-
-	  Obviously, if the host is a uniprocessor, those processes will
-	  timeshare, but, inside UML, will appear to be running simultaneously.
-	  If the host is a multiprocessor, then UML processes may run
-	  simultaneously, depending on the host scheduler.
-
-	  This, however, is supported only in TT mode. So, if you use the SKAS
-	  patch on your host, switching to TT mode and enabling SMP usually
-	  gives	you worse performances.
-	  Also, since the support for SMP has been under-developed, there could
-	  be some bugs being exposed by enabling SMP.
-
-	  If you don't know what to do, say N.
-
-config NR_CPUS
-	int "Maximum number of CPUs (2-32)"
-	range 2 32
-	depends on SMP
-	default "32"
-
-config HIGHMEM
-	bool "Highmem support"
-	depends on !64BIT && BROKEN
-	default n
-	help
-	  This was used to allow UML to run with big amounts of memory.
-	  Currently it is unstable, so if unsure say N.
-
-	  To use big amounts of memory, it is recommended enable static
-	  linking (i.e. CONFIG_STATIC_LINK) - this should allow the
-	  guest to use up to 2.75G of memory.
-
-config KERNEL_STACK_ORDER
-	int "Kernel stack size order"
-	default 1 if 64BIT
-	range 1 10 if 64BIT
-	default 0 if !64BIT
-	help
-	  This option determines the size of UML kernel stacks.  They will
-	  be 1 << order pages.  The default is OK unless you're running Valgrind
-	  on UML, in which case, set this to 3.
-
-config MMAPPER
-	tristate "iomem emulation driver"
-	help
-	  This driver allows a host file to be used as emulated IO memory inside
-	  UML.
-
-config NO_DMA
-	def_bool y
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 133f7de2a13d..721b652ffb65 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -6,28 +6,31 @@
 # Licensed under the GPL
 #
 
+# select defconfig based on actual architecture
+ifeq ($(SUBARCH),x86)
+  ifeq ($(shell uname -m),x86_64)
+        KBUILD_DEFCONFIG := x86_64_defconfig
+  else
+        KBUILD_DEFCONFIG := i386_defconfig
+  endif
+else
+        KBUILD_DEFCONFIG := $(SUBARCH)_defconfig
+endif
+
 ARCH_DIR := arch/um
-OS := $(shell uname -s)
 # We require bash because the vmlinux link and loader script cpp use bash
 # features.
-SHELL := /bin/bash
-
-filechk_gen_header = $<
-
-core-y			+= $(ARCH_DIR)/kernel/		\
-			   $(ARCH_DIR)/drivers/		\
-			   $(ARCH_DIR)/os-$(OS)/
+SHELL := bash
 
 MODE_INCLUDE	+= -I$(srctree)/$(ARCH_DIR)/include/shared/skas
 
 HEADER_ARCH 	:= $(SUBARCH)
 
-# Additional ARCH settings for x86
-ifeq ($(SUBARCH),i386)
-        HEADER_ARCH := x86
+ifneq ($(filter $(SUBARCH),x86 x86_64 i386),)
+	HEADER_ARCH := x86
 endif
-ifeq ($(SUBARCH),x86_64)
-        HEADER_ARCH := x86
+
+ifdef CONFIG_64BIT
 	KBUILD_CFLAGS += -mcmodel=large
 endif
 
@@ -43,32 +46,40 @@ ARCH_INCLUDE	:= -I$(srctree)/$(SHARED_HEADERS)
 ARCH_INCLUDE	+= -I$(srctree)/$(HOST_DIR)/um/shared
 KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um
 
-# -Dvmap=kernel_vmap prevents anything from referencing the libpcap.o symbol so
-# named - it's a common symbol in libpcap, so we get a binary which crashes.
+# -Dstrrchr=kernel_strrchr (as well as the various in6addr symbols) prevents
+#  anything from referencing
+# libc symbols with the same name, which can cause a linker error.
 #
-# Same things for in6addr_loopback and mktime - found in libc. For these two we
-# only get link-time error, luckily.
+# -Dlongjmp=kernel_longjmp prevents anything from referencing the libpthread.a
+# embedded copy of longjmp, same thing for setjmp.
 #
-# These apply to USER_CFLAGS to.
+# These apply to USER_CFLAGS too.
 
 KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \
-	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap	\
+	$(ARCH_INCLUDE) $(MODE_INCLUDE)	\
+	-Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \
 	-Din6addr_loopback=kernel_in6addr_loopback \
-	-Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
+	-Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \
+	-D__close_range=kernel__close_range
+
+KBUILD_RUSTFLAGS += -Crelocation-model=pie
 
 KBUILD_AFLAGS += $(ARCH_INCLUDE)
 
-USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\
-	$(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \
-	$(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 -idirafter include
+USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \
+		$(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \
+		-D_FILE_OFFSET_BITS=64 -idirafter $(srctree)/include \
+		-idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ \
+		-include $(srctree)/include/linux/compiler-version.h \
+		-include $(srctree)/include/linux/kconfig.h
 
 #This will adjust *FLAGS accordingly to the platform.
-include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
+include $(srctree)/$(ARCH_DIR)/Makefile-os-Linux
 
 KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \
 		   -I$(srctree)/$(HOST_DIR)/include/uapi \
-		   -I$(HOST_DIR)/include/generated \
-		   -I$(HOST_DIR)/include/generated/uapi
+		   -I$(objtree)/$(HOST_DIR)/include/generated \
+		   -I$(objtree)/$(HOST_DIR)/include/generated/uapi
 
 # -Derrno=kernel_errno - This turns all kernel references to errno into
 # kernel_errno to separate them from the libc errno.  This allows -fno-common
@@ -98,20 +109,18 @@ define archhelp
   echo '		   find in the kernel root.'
 endef
 
-KBUILD_KCONFIG := $(HOST_DIR)/um/Kconfig
-
 archheaders:
-	$(Q)$(MAKE) -C '$(srctree)' KBUILD_SRC= \
-		ARCH=$(HEADER_ARCH) O='$(objtree)' archheaders
+	$(Q)$(MAKE) -f $(srctree)/Makefile ARCH=$(HEADER_ARCH) asm-generic archheaders
 
-archprepare: include/generated/user_constants.h
+archprepare:
+	$(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
 
 LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
+LINK-$(CONFIG_LD_SCRIPT_DYN) += -no-pie
+LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
 
 CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
-	$(call cc-option, -fno-stack-protector,) \
-	$(call cc-option, -fno-stack-protector-all,)
+	-fno-stack-protector $(call cc-option, -fno-stack-protector-all)
 
 # Options used by linker script
 export LDS_START      := $(START)
@@ -121,38 +130,26 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT)
 # The wrappers will select whether using "malloc" or the kernel allocator.
 LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
 
-LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt))
+# Avoid binutils 2.39+ warnings by marking the stack non-executable and
+# ignorning warnings for the kallsyms sections.
+LDFLAGS_EXECSTACK = -z noexecstack
+ifeq ($(CONFIG_LD_IS_BFD),y)
+LDFLAGS_EXECSTACK += $(call ld-option,--no-warn-rwx-segments)
+endif
+
+LD_FLAGS_CMDLINE = $(foreach opt,$(KBUILD_LDFLAGS) $(LDFLAGS_EXECSTACK),-Wl,$(opt))
 
 # Used by link-vmlinux.sh which has special support for um link
-export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE)
+export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) $(CC_FLAGS_LTO)
 
 # When cleaning we don't include .config, so we don't include
 # TT or skas makefiles and don't clean skas_ptregs.h.
 CLEAN_FILES += linux x.i gmon.out
+MRPROPER_FILES += $(HOST_DIR)/include/generated
 
 archclean:
 	@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
 		-o -name '*.gcov' \) -type f -print | xargs rm -f
+	$(Q)$(MAKE) -f $(srctree)/Makefile ARCH=$(HEADER_ARCH) clean
 
-# Generated files
-
-$(HOST_DIR)/um/user-offsets.s: __headers FORCE
-	$(Q)$(MAKE) $(build)=$(HOST_DIR)/um $@
-
-define filechk_gen-asm-offsets
-        (set -e; \
-         echo "/*"; \
-         echo " * DO NOT MODIFY."; \
-         echo " *"; \
-         echo " * This file was generated by arch/$(ARCH)/Makefile"; \
-         echo " *"; \
-         echo " */"; \
-         echo ""; \
-         sed -ne "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"; \
-         echo ""; )
-endef
-
-include/generated/user_constants.h: $(HOST_DIR)/um/user-offsets.s
-	$(call filechk,gen-asm-offsets)
-
-export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS DEV_NULL_PATH
+export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING DEV_NULL_PATH
diff --git a/arch/um/Makefile-ia64 b/arch/um/Makefile-ia64
deleted file mode 100644
index f84dc23b0f6e..000000000000
--- a/arch/um/Makefile-ia64
+++ /dev/null
@@ -1 +0,0 @@
-START_ADDR = 0x1000000000000000
diff --git a/arch/um/Makefile-ppc b/arch/um/Makefile-ppc
deleted file mode 100644
index 66fd2003e165..000000000000
--- a/arch/um/Makefile-ppc
+++ /dev/null
@@ -1,9 +0,0 @@
-ifeq ($(CONFIG_HOST_2G_2G), y)
-START_ADDR = 0x80000000
-else
-START_ADDR = 0xc0000000
-endif
-ARCH_CFLAGS = -U__powerpc__ -D__UM_PPC__
-
-# The arch is ppc, but the elf32 name is powerpc
-ELF_SUBARCH = powerpc
diff --git a/arch/um/Makefile-skas b/arch/um/Makefile-skas
index ac35de5316a6..1a27e65bcb9c 100644
--- a/arch/um/Makefile-skas
+++ b/arch/um/Makefile-skas
@@ -3,10 +3,15 @@
 # Licensed under the GPL
 #
 
-GPROF_OPT += -pg
-GCOV_OPT += -fprofile-arcs -ftest-coverage
+export UM_GPROF_OPT += -pg
 
-CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT)
-CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT)
-LINK-$(CONFIG_GCOV) += $(GCOV_OPT)
-LINK-$(CONFIG_GPROF) += $(GPROF_OPT)
+ifdef CONFIG_CC_IS_CLANG
+export UM_GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping
+else
+export UM_GCOV_OPT += -fprofile-arcs -ftest-coverage
+endif
+
+CFLAGS-$(CONFIG_GCOV) += $(UM_GCOV_OPT)
+CFLAGS-$(CONFIG_GPROF) += $(UM_GPROF_OPT)
+LINK-$(CONFIG_GCOV) += $(UM_GCOV_OPT)
+LINK-$(CONFIG_GPROF) += $(UM_GPROF_OPT)
diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig
new file mode 100644
index 000000000000..29d9666eceae
--- /dev/null
+++ b/arch/um/configs/i386_defconfig
@@ -0,0 +1,64 @@
+# CONFIG_COMPACTION is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HOSTFS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_KERNEL_STACK_ORDER=1
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
+# CONFIG_PID_NS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_IOSCHED_BFQ=m
+CONFIG_SSL=y
+CONFIG_NULL_CHAN=y
+CONFIG_PORT_CHAN=y
+CONFIG_PTY_CHAN=y
+CONFIG_TTY_CHAN=y
+CONFIG_XTERM_CHAN=y
+CONFIG_CON_CHAN="pts"
+CONFIG_SSL_CHAN="pts"
+CONFIG_SOUND=m
+CONFIG_UML_SOUND=m
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_BLK_DEV_UBD=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_DUMMY=m
+CONFIG_TUN=m
+CONFIG_PPP=m
+CONFIG_SLIP=m
+CONFIG_LEGACY_PTY_COUNT=32
+# CONFIG_HW_RANDOM is not set
+CONFIG_UML_RANDOM=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+CONFIG_EXT4_FS=y
+CONFIG_QUOTA=y
+CONFIG_AUTOFS_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NLS=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_KERNEL=y
diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig
new file mode 100644
index 000000000000..cf309c5406a2
--- /dev/null
+++ b/arch/um/configs/x86_64_defconfig
@@ -0,0 +1,64 @@
+# CONFIG_COMPACTION is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HOSTFS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
+# CONFIG_PID_NS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_IOSCHED_BFQ=m
+CONFIG_SSL=y
+CONFIG_NULL_CHAN=y
+CONFIG_PORT_CHAN=y
+CONFIG_PTY_CHAN=y
+CONFIG_TTY_CHAN=y
+CONFIG_XTERM_CHAN=y
+CONFIG_CON_CHAN="pts"
+CONFIG_SSL_CHAN="pts"
+CONFIG_SOUND=m
+CONFIG_UML_SOUND=m
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_BLK_DEV_UBD=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_DUMMY=m
+CONFIG_TUN=m
+CONFIG_PPP=m
+CONFIG_SLIP=m
+CONFIG_LEGACY_PTY_COUNT=32
+# CONFIG_HW_RANDOM is not set
+CONFIG_UML_RANDOM=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+CONFIG_EXT4_FS=y
+CONFIG_QUOTA=y
+CONFIG_AUTOFS_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NLS=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_FRAME_WARN=1024
+CONFIG_DEBUG_KERNEL=y
diff --git a/arch/um/defconfig b/arch/um/defconfig
deleted file mode 100644
index 08107a795062..000000000000
--- a/arch/um/defconfig
+++ /dev/null
@@ -1,901 +0,0 @@
-#
-# Automatically generated file; DO NOT EDIT.
-# User Mode Linux/i386 3.3.0 Kernel Configuration
-#
-CONFIG_DEFCONFIG_LIST="arch/$ARCH/defconfig"
-CONFIG_UML=y
-CONFIG_MMU=y
-CONFIG_NO_IOMEM=y
-# CONFIG_TRACE_IRQFLAGS_SUPPORT is not set
-CONFIG_LOCKDEP_SUPPORT=y
-# CONFIG_STACKTRACE_SUPPORT is not set
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_HZ=100
-
-#
-# UML-specific options
-#
-
-#
-# Host processor type and features
-#
-# CONFIG_M486 is not set
-# CONFIG_M586 is not set
-# CONFIG_M586TSC is not set
-# CONFIG_M586MMX is not set
-CONFIG_M686=y
-# CONFIG_MPENTIUMII is not set
-# CONFIG_MPENTIUMIII is not set
-# CONFIG_MPENTIUMM is not set
-# CONFIG_MPENTIUM4 is not set
-# CONFIG_MK6 is not set
-# CONFIG_MK7 is not set
-# CONFIG_MK8 is not set
-# CONFIG_MCRUSOE is not set
-# CONFIG_MEFFICEON is not set
-# CONFIG_MWINCHIPC6 is not set
-# CONFIG_MWINCHIP3D is not set
-# CONFIG_MELAN is not set
-# CONFIG_MGEODEGX1 is not set
-# CONFIG_MGEODE_LX is not set
-# CONFIG_MCYRIXIII is not set
-# CONFIG_MVIAC3_2 is not set
-# CONFIG_MVIAC7 is not set
-# CONFIG_MCORE2 is not set
-# CONFIG_MATOM is not set
-# CONFIG_X86_GENERIC is not set
-CONFIG_X86_INTERNODE_CACHE_SHIFT=5
-CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=5
-CONFIG_X86_XADD=y
-CONFIG_X86_PPRO_FENCE=y
-CONFIG_X86_WP_WORKS_OK=y
-CONFIG_X86_INVLPG=y
-CONFIG_X86_BSWAP=y
-CONFIG_X86_POPAD_OK=y
-CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_TSC=y
-CONFIG_X86_CMPXCHG64=y
-CONFIG_X86_CMOV=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=5
-CONFIG_CPU_SUP_INTEL=y
-CONFIG_CPU_SUP_CYRIX_32=y
-CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR=y
-CONFIG_CPU_SUP_TRANSMETA_32=y
-CONFIG_CPU_SUP_UMC_32=y
-CONFIG_UML_X86=y
-# CONFIG_64BIT is not set
-CONFIG_X86_32=y
-# CONFIG_X86_64 is not set
-# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_3_LEVEL_PGTABLES is not set
-CONFIG_ARCH_HAS_SC_SIGNALS=y
-CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y
-CONFIG_GENERIC_HWEIGHT=y
-# CONFIG_STATIC_LINK is not set
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_PAGEFLAGS_EXTENDED=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-# CONFIG_COMPACTION is not set
-# CONFIG_PHYS_ADDR_T_64BIT is not set
-CONFIG_ZONE_DMA_FLAG=0
-CONFIG_VIRT_TO_BUS=y
-# CONFIG_KSM is not set
-CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
-CONFIG_NEED_PER_CPU_KM=y
-# CONFIG_CLEANCACHE is not set
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-CONFIG_LD_SCRIPT_DYN=y
-CONFIG_BINFMT_ELF=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
-CONFIG_HAVE_AOUT=y
-# CONFIG_BINFMT_AOUT is not set
-CONFIG_BINFMT_MISC=m
-CONFIG_HOSTFS=y
-# CONFIG_HPPFS is not set
-CONFIG_MCONSOLE=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_KERNEL_STACK_ORDER=0
-# CONFIG_MMAPPER is not set
-CONFIG_NO_DMA=y
-
-#
-# General setup
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=128
-CONFIG_CROSS_COMPILE=""
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_DEFAULT_HOSTNAME="(none)"
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_POSIX_MQUEUE_SYSCTL=y
-CONFIG_BSD_PROCESS_ACCT=y
-# CONFIG_BSD_PROCESS_ACCT_V3 is not set
-# CONFIG_FHANDLE is not set
-# CONFIG_TASKSTATS is not set
-# CONFIG_AUDIT is not set
-CONFIG_HAVE_GENERIC_HARDIRQS=y
-
-#
-# IRQ subsystem
-#
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_IRQ_SHOW=y
-
-#
-# RCU Subsystem
-#
-CONFIG_TINY_RCU=y
-# CONFIG_PREEMPT_RCU is not set
-# CONFIG_RCU_TRACE is not set
-# CONFIG_TREE_RCU_TRACE is not set
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_CGROUPS=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_PROC_PID_CPUSET=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
-CONFIG_CGROUP_MEMCG=y
-CONFIG_CGROUP_MEMCG_SWAP=y
-# CONFIG_CGROUP_MEMCG_SWAP_ENABLED is not set
-# CONFIG_CGROUP_MEMCG_KMEM is not set
-CONFIG_CGROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-# CONFIG_CFS_BANDWIDTH is not set
-# CONFIG_RT_GROUP_SCHED is not set
-CONFIG_BLK_CGROUP=y
-# CONFIG_DEBUG_BLK_CGROUP is not set
-# CONFIG_CHECKPOINT_RESTORE is not set
-CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_IPC_NS=y
-# CONFIG_USER_NS is not set
-# CONFIG_PID_NS is not set
-CONFIG_NET_NS=y
-# CONFIG_SCHED_AUTOGROUP is not set
-CONFIG_MM_OWNER=y
-CONFIG_SYSFS_DEPRECATED=y
-# CONFIG_SYSFS_DEPRECATED_V2 is not set
-# CONFIG_RELAY is not set
-# CONFIG_BLK_DEV_INITRD is not set
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL=y
-CONFIG_ANON_INODES=y
-# CONFIG_EXPERT is not set
-CONFIG_UID16=y
-# CONFIG_SYSCTL_SYSCALL is not set
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
-# CONFIG_EMBEDDED is not set
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_COMPAT_BRK=y
-CONFIG_SLAB=y
-# CONFIG_SLUB is not set
-# CONFIG_PROFILING is not set
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
-CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-CONFIG_MODULES=y
-# CONFIG_MODULE_FORCE_LOAD is not set
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_BLOCK=y
-CONFIG_LBDAF=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_BLK_DEV_BSGLIB is not set
-# CONFIG_BLK_DEV_INTEGRITY is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=m
-# CONFIG_CFQ_GROUP_IOSCHED is not set
-CONFIG_DEFAULT_DEADLINE=y
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="deadline"
-# CONFIG_INLINE_SPIN_TRYLOCK is not set
-# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set
-# CONFIG_INLINE_SPIN_LOCK is not set
-# CONFIG_INLINE_SPIN_LOCK_BH is not set
-# CONFIG_INLINE_SPIN_LOCK_IRQ is not set
-# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set
-CONFIG_INLINE_SPIN_UNLOCK=y
-# CONFIG_INLINE_SPIN_UNLOCK_BH is not set
-CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
-# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set
-# CONFIG_INLINE_READ_TRYLOCK is not set
-# CONFIG_INLINE_READ_LOCK is not set
-# CONFIG_INLINE_READ_LOCK_BH is not set
-# CONFIG_INLINE_READ_LOCK_IRQ is not set
-# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set
-CONFIG_INLINE_READ_UNLOCK=y
-# CONFIG_INLINE_READ_UNLOCK_BH is not set
-CONFIG_INLINE_READ_UNLOCK_IRQ=y
-# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set
-# CONFIG_INLINE_WRITE_TRYLOCK is not set
-# CONFIG_INLINE_WRITE_LOCK is not set
-# CONFIG_INLINE_WRITE_LOCK_BH is not set
-# CONFIG_INLINE_WRITE_LOCK_IRQ is not set
-# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set
-CONFIG_INLINE_WRITE_UNLOCK=y
-# CONFIG_INLINE_WRITE_UNLOCK_BH is not set
-CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
-# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set
-# CONFIG_MUTEX_SPIN_ON_OWNER is not set
-CONFIG_FREEZER=y
-
-#
-# UML Character Devices
-#
-CONFIG_STDERR_CONSOLE=y
-CONFIG_STDIO_CONSOLE=y
-CONFIG_SSL=y
-CONFIG_NULL_CHAN=y
-CONFIG_PORT_CHAN=y
-CONFIG_PTY_CHAN=y
-CONFIG_TTY_CHAN=y
-CONFIG_XTERM_CHAN=y
-# CONFIG_NOCONFIG_CHAN is not set
-CONFIG_CON_ZERO_CHAN="fd:0,fd:1"
-CONFIG_CON_CHAN="xterm"
-CONFIG_SSL_CHAN="pts"
-CONFIG_UML_SOUND=m
-CONFIG_SOUND=m
-CONFIG_SOUND_OSS_CORE=y
-CONFIG_HOSTAUDIO=m
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=y
-CONFIG_FIRMWARE_IN_KERNEL=y
-CONFIG_EXTRA_FIRMWARE=""
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_SYS_HYPERVISOR is not set
-CONFIG_GENERIC_CPU_DEVICES=y
-# CONFIG_DMA_SHARED_BUFFER is not set
-# CONFIG_CONNECTOR is not set
-# CONFIG_MTD is not set
-CONFIG_BLK_DEV=y
-CONFIG_BLK_DEV_UBD=y
-# CONFIG_BLK_DEV_UBD_SYNC is not set
-CONFIG_BLK_DEV_COW_COMMON=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
-# CONFIG_BLK_DEV_CRYPTOLOOP is not set
-
-#
-# DRBD disabled because PROC_FS, INET or CONNECTOR not selected
-#
-CONFIG_BLK_DEV_NBD=m
-# CONFIG_BLK_DEV_RAM is not set
-# CONFIG_ATA_OVER_ETH is not set
-# CONFIG_BLK_DEV_RBD is not set
-
-#
-# Misc devices
-#
-# CONFIG_ENCLOSURE_SERVICES is not set
-# CONFIG_C2PORT is not set
-
-#
-# EEPROM support
-#
-# CONFIG_EEPROM_93CX6 is not set
-
-#
-# Texas Instruments shared transport line discipline
-#
-
-#
-# Altera FPGA firmware download module
-#
-
-#
-# SCSI device support
-#
-CONFIG_SCSI_MOD=y
-# CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_DMA is not set
-# CONFIG_SCSI_NETLINK is not set
-# CONFIG_MD is not set
-CONFIG_NETDEVICES=y
-CONFIG_NET_CORE=y
-# CONFIG_BONDING is not set
-CONFIG_DUMMY=m
-# CONFIG_EQUALIZER is not set
-# CONFIG_MII is not set
-# CONFIG_NET_TEAM is not set
-# CONFIG_MACVLAN is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-CONFIG_TUN=m
-# CONFIG_VETH is not set
-
-#
-# CAIF transport drivers
-#
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_CHELSIO=y
-CONFIG_NET_VENDOR_INTEL=y
-CONFIG_NET_VENDOR_I825XX=y
-CONFIG_NET_VENDOR_MARVELL=y
-CONFIG_NET_VENDOR_NATSEMI=y
-CONFIG_NET_VENDOR_8390=y
-# CONFIG_PHYLIB is not set
-CONFIG_PPP=m
-# CONFIG_PPP_BSDCOMP is not set
-# CONFIG_PPP_DEFLATE is not set
-# CONFIG_PPP_FILTER is not set
-# CONFIG_PPP_MPPE is not set
-# CONFIG_PPP_MULTILINK is not set
-# CONFIG_PPPOE is not set
-# CONFIG_PPP_ASYNC is not set
-# CONFIG_PPP_SYNC_TTY is not set
-CONFIG_SLIP=m
-CONFIG_SLHC=m
-# CONFIG_SLIP_COMPRESSED is not set
-# CONFIG_SLIP_SMART is not set
-# CONFIG_SLIP_MODE_SLIP6 is not set
-CONFIG_WLAN=y
-# CONFIG_HOSTAP is not set
-
-#
-# Enable WiMAX (Networking options) to see the WiMAX drivers
-#
-# CONFIG_WAN is not set
-
-#
-# Character devices
-#
-CONFIG_UNIX98_PTYS=y
-# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=32
-# CONFIG_N_GSM is not set
-# CONFIG_TRACE_SINK is not set
-CONFIG_DEVKMEM=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_UML_RANDOM=y
-# CONFIG_R3964 is not set
-# CONFIG_NSC_GPIO is not set
-# CONFIG_RAW_DRIVER is not set
-
-#
-# PPS support
-#
-# CONFIG_PPS is not set
-
-#
-# PPS generators support
-#
-
-#
-# PTP clock support
-#
-
-#
-# Enable Device Drivers -> PPS to see the PTP clock options.
-#
-# CONFIG_POWER_SUPPLY is not set
-# CONFIG_THERMAL is not set
-# CONFIG_WATCHDOG is not set
-# CONFIG_REGULATOR is not set
-CONFIG_SOUND_OSS_CORE_PRECLAIM=y
-# CONFIG_MEMSTICK is not set
-# CONFIG_NEW_LEDS is not set
-# CONFIG_ACCESSIBILITY is not set
-# CONFIG_AUXDISPLAY is not set
-# CONFIG_UIO is not set
-
-#
-# Virtio drivers
-#
-# CONFIG_VIRTIO_BALLOON is not set
-
-#
-# Microsoft Hyper-V guest support
-#
-# CONFIG_STAGING is not set
-
-#
-# Hardware Spinlock drivers
-#
-CONFIG_IOMMU_SUPPORT=y
-# CONFIG_VIRT_DRIVERS is not set
-# CONFIG_PM_DEVFREQ is not set
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-# CONFIG_UNIX_DIAG is not set
-CONFIG_XFRM=y
-# CONFIG_XFRM_USER is not set
-# CONFIG_XFRM_SUB_POLICY is not set
-# CONFIG_XFRM_MIGRATE is not set
-# CONFIG_XFRM_STATISTICS is not set
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE_DEMUX is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_XFRM_TUNNEL is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_XFRM_MODE_TRANSPORT=y
-CONFIG_INET_XFRM_MODE_TUNNEL=y
-CONFIG_INET_XFRM_MODE_BEET=y
-# CONFIG_INET_LRO is not set
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_INET_UDP_DIAG is not set
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_DEFAULT_TCP_CONG="cubic"
-# CONFIG_TCP_MD5SIG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_NETWORK_SECMARK is not set
-# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
-# CONFIG_NETFILTER is not set
-# CONFIG_IP_DCCP is not set
-# CONFIG_IP_SCTP is not set
-# CONFIG_RDS is not set
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_L2TP is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_NET_DSA is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-# CONFIG_PHONET is not set
-# CONFIG_IEEE802154 is not set
-# CONFIG_NET_SCHED is not set
-# CONFIG_DCB is not set
-# CONFIG_BATMAN_ADV is not set
-# CONFIG_OPENVSWITCH is not set
-# CONFIG_NETPRIO_CGROUP is not set
-CONFIG_BQL=y
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_CAN is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_AF_RXRPC is not set
-CONFIG_WIRELESS=y
-# CONFIG_CFG80211 is not set
-# CONFIG_LIB80211 is not set
-
-#
-# CFG80211 needs to be enabled for MAC80211
-#
-# CONFIG_WIMAX is not set
-# CONFIG_RFKILL is not set
-# CONFIG_NET_9P is not set
-# CONFIG_CAIF is not set
-# CONFIG_CEPH_LIB is not set
-# CONFIG_NFC is not set
-
-#
-# UML Network Devices
-#
-CONFIG_UML_NET=y
-CONFIG_UML_NET_ETHERTAP=y
-CONFIG_UML_NET_TUNTAP=y
-CONFIG_UML_NET_SLIP=y
-CONFIG_UML_NET_DAEMON=y
-# CONFIG_UML_NET_VDE is not set
-CONFIG_UML_NET_MCAST=y
-# CONFIG_UML_NET_PCAP is not set
-CONFIG_UML_NET_SLIRP=y
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_USE_FOR_EXT23=y
-CONFIG_EXT4_FS_XATTR=y
-# CONFIG_EXT4_FS_POSIX_ACL is not set
-# CONFIG_EXT4_FS_SECURITY is not set
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_JBD2=y
-CONFIG_FS_MBCACHE=y
-CONFIG_REISERFS_FS=y
-# CONFIG_REISERFS_CHECK is not set
-# CONFIG_REISERFS_PROC_INFO is not set
-# CONFIG_REISERFS_FS_XATTR is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_XFS_FS is not set
-# CONFIG_GFS2_FS is not set
-# CONFIG_BTRFS_FS is not set
-# CONFIG_NILFS2_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
-CONFIG_FILE_LOCKING=y
-CONFIG_FSNOTIFY=y
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY_USER=y
-# CONFIG_FANOTIFY is not set
-CONFIG_QUOTA=y
-# CONFIG_QUOTA_NETLINK_INTERFACE is not set
-CONFIG_PRINT_QUOTA_WARNING=y
-# CONFIG_QUOTA_DEBUG is not set
-# CONFIG_QFMT_V1 is not set
-# CONFIG_QFMT_V2 is not set
-CONFIG_QUOTACTL=y
-CONFIG_AUTOFS4_FS=m
-# CONFIG_FUSE_FS is not set
-
-#
-# Caches
-#
-# CONFIG_FSCACHE is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-# CONFIG_ZISOFS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-# CONFIG_TMPFS_POSIX_ACL is not set
-# CONFIG_TMPFS_XATTR is not set
-# CONFIG_HUGETLB_PAGE is not set
-# CONFIG_CONFIGFS_FS is not set
-CONFIG_MISC_FILESYSTEMS=y
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_LOGFS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_SQUASHFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_OMFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_PSTORE is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-CONFIG_NETWORK_FILESYSTEMS=y
-# CONFIG_NFS_FS is not set
-# CONFIG_NFSD is not set
-# CONFIG_CEPH_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-1"
-# CONFIG_NLS_CODEPAGE_437 is not set
-# CONFIG_NLS_CODEPAGE_737 is not set
-# CONFIG_NLS_CODEPAGE_775 is not set
-# CONFIG_NLS_CODEPAGE_850 is not set
-# CONFIG_NLS_CODEPAGE_852 is not set
-# CONFIG_NLS_CODEPAGE_855 is not set
-# CONFIG_NLS_CODEPAGE_857 is not set
-# CONFIG_NLS_CODEPAGE_860 is not set
-# CONFIG_NLS_CODEPAGE_861 is not set
-# CONFIG_NLS_CODEPAGE_862 is not set
-# CONFIG_NLS_CODEPAGE_863 is not set
-# CONFIG_NLS_CODEPAGE_864 is not set
-# CONFIG_NLS_CODEPAGE_865 is not set
-# CONFIG_NLS_CODEPAGE_866 is not set
-# CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
-# CONFIG_NLS_CODEPAGE_950 is not set
-# CONFIG_NLS_CODEPAGE_932 is not set
-# CONFIG_NLS_CODEPAGE_949 is not set
-# CONFIG_NLS_CODEPAGE_874 is not set
-# CONFIG_NLS_ISO8859_8 is not set
-# CONFIG_NLS_CODEPAGE_1250 is not set
-# CONFIG_NLS_CODEPAGE_1251 is not set
-# CONFIG_NLS_ASCII is not set
-# CONFIG_NLS_ISO8859_1 is not set
-# CONFIG_NLS_ISO8859_2 is not set
-# CONFIG_NLS_ISO8859_3 is not set
-# CONFIG_NLS_ISO8859_4 is not set
-# CONFIG_NLS_ISO8859_5 is not set
-# CONFIG_NLS_ISO8859_6 is not set
-# CONFIG_NLS_ISO8859_7 is not set
-# CONFIG_NLS_ISO8859_9 is not set
-# CONFIG_NLS_ISO8859_13 is not set
-# CONFIG_NLS_ISO8859_14 is not set
-# CONFIG_NLS_ISO8859_15 is not set
-# CONFIG_NLS_KOI8_R is not set
-# CONFIG_NLS_KOI8_U is not set
-# CONFIG_NLS_UTF8 is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY_DMESG_RESTRICT is not set
-# CONFIG_SECURITY is not set
-# CONFIG_SECURITYFS is not set
-CONFIG_DEFAULT_SECURITY_DAC=y
-CONFIG_DEFAULT_SECURITY=""
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-# CONFIG_CRYPTO_FIPS is not set
-CONFIG_CRYPTO_ALGAPI=m
-CONFIG_CRYPTO_ALGAPI2=m
-CONFIG_CRYPTO_RNG=m
-CONFIG_CRYPTO_RNG2=m
-# CONFIG_CRYPTO_MANAGER is not set
-# CONFIG_CRYPTO_MANAGER2 is not set
-# CONFIG_CRYPTO_USER is not set
-# CONFIG_CRYPTO_GF128MUL is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_CRYPTD is not set
-# CONFIG_CRYPTO_AUTHENC is not set
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Authenticated Encryption with Associated Data
-#
-# CONFIG_CRYPTO_CCM is not set
-# CONFIG_CRYPTO_GCM is not set
-# CONFIG_CRYPTO_SEQIV is not set
-
-#
-# Block modes
-#
-# CONFIG_CRYPTO_CBC is not set
-# CONFIG_CRYPTO_CTR is not set
-# CONFIG_CRYPTO_CTS is not set
-# CONFIG_CRYPTO_ECB is not set
-# CONFIG_CRYPTO_LRW is not set
-# CONFIG_CRYPTO_PCBC is not set
-# CONFIG_CRYPTO_XTS is not set
-
-#
-# Hash modes
-#
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_XCBC is not set
-# CONFIG_CRYPTO_VMAC is not set
-
-#
-# Digest
-#
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_GHASH is not set
-# CONFIG_CRYPTO_MD4 is not set
-# CONFIG_CRYPTO_MD5 is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_RMD128 is not set
-# CONFIG_CRYPTO_RMD160 is not set
-# CONFIG_CRYPTO_RMD256 is not set
-# CONFIG_CRYPTO_RMD320 is not set
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-# CONFIG_CRYPTO_WP512 is not set
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_AES=m
-# CONFIG_CRYPTO_AES_586 is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_CAMELLIA is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_DES is not set
-# CONFIG_CRYPTO_FCRYPT is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_SALSA20 is not set
-# CONFIG_CRYPTO_SALSA20_586 is not set
-# CONFIG_CRYPTO_SEED is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_TWOFISH_586 is not set
-
-#
-# Compression
-#
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_ZLIB is not set
-# CONFIG_CRYPTO_LZO is not set
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-# CONFIG_CRYPTO_USER_API_HASH is not set
-# CONFIG_CRYPTO_USER_API_SKCIPHER is not set
-CONFIG_CRYPTO_HW=y
-# CONFIG_BINARY_PRINTF is not set
-
-#
-# Library routines
-#
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_GENERIC_IO=y
-# CONFIG_CRC_CCITT is not set
-CONFIG_CRC16=y
-# CONFIG_CRC_T10DIF is not set
-# CONFIG_CRC_ITU_T is not set
-CONFIG_CRC32=y
-# CONFIG_CRC7 is not set
-# CONFIG_LIBCRC32C is not set
-# CONFIG_CRC8 is not set
-# CONFIG_XZ_DEC is not set
-# CONFIG_XZ_DEC_BCJ is not set
-CONFIG_DQL=y
-CONFIG_NLATTR=y
-# CONFIG_AVERAGE is not set
-# CONFIG_CORDIC is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4
-CONFIG_ENABLE_WARN_DEPRECATED=y
-CONFIG_ENABLE_MUST_CHECK=y
-CONFIG_FRAME_WARN=1024
-# CONFIG_STRIP_ASM_SYMS is not set
-# CONFIG_UNUSED_SYMBOLS is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_DEBUG_SECTION_MISMATCH is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
-# CONFIG_LOCKUP_DETECTOR is not set
-# CONFIG_HARDLOCKUP_DETECTOR is not set
-# CONFIG_DETECT_HUNG_TASK is not set
-CONFIG_SCHED_DEBUG=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_SPARSE_RCU_POINTER is not set
-# CONFIG_DEBUG_ATOMIC_SLEEP is not set
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_INFO_REDUCED is not set
-# CONFIG_DEBUG_VM is not set
-# CONFIG_DEBUG_WRITECOUNT is not set
-CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_TEST_LIST_SORT is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_DEBUG_NOTIFIERS is not set
-# CONFIG_DEBUG_CREDENTIALS is not set
-CONFIG_FRAME_POINTER=y
-# CONFIG_BOOT_PRINTK_DELAY is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
-# CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_ATOMIC64_SELFTEST is not set
-# CONFIG_SAMPLES is not set
-# CONFIG_TEST_KSTRTOX is not set
-# CONFIG_GPROF is not set
-# CONFIG_GCOV is not set
-CONFIG_EARLY_PRINTK=y
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
new file mode 100644
index 000000000000..6a0354ca032f
--- /dev/null
+++ b/arch/um/drivers/Kconfig
@@ -0,0 +1,190 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "UML Character Devices"
+
+config STDERR_CONSOLE
+	bool "stderr console"
+	default y
+	help
+	  console driver which dumps all printk messages to stderr.
+
+config SSL
+	bool "Virtual serial line"
+	help
+	  The User-Mode Linux environment allows you to create virtual serial
+	  lines on the UML that are usually made to show up on the host as
+	  ttys or ptys.
+
+	  See <http://user-mode-linux.sourceforge.net/old/input.html> for more
+	  information and command line examples of how to use this facility.
+
+	  Unless you have a specific reason for disabling this, say Y.
+
+config NULL_CHAN
+	bool "null channel support"
+	help
+	  This option enables support for attaching UML consoles and serial
+	  lines to a device similar to /dev/null.  Data written to it disappears
+	  and there is never any data to be read.
+
+config PORT_CHAN
+	bool "port channel support"
+	help
+	  This option enables support for attaching UML consoles and serial
+	  lines to host portals.  They may be accessed with 'telnet <host>
+	  <port number>'.  Any number of consoles and serial lines may be
+	  attached to a single portal, although what UML device you get when
+	  you telnet to that portal will be unpredictable.
+	  It is safe to say 'Y' here.
+
+config PTY_CHAN
+	bool "pty channel support"
+	help
+	  This option enables support for attaching UML consoles and serial
+	  lines to host pseudo-terminals.  Access to both traditional
+	  pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled
+	  with this option.  The assignment of UML devices to host devices
+	  will be announced in the kernel message log.
+	  It is safe to say 'Y' here.
+
+config TTY_CHAN
+	bool "tty channel support"
+	help
+	  This option enables support for attaching UML consoles and serial
+	  lines to host terminals.  Access to both virtual consoles
+	  (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and
+	  /dev/pts/*) are controlled by this option.
+	  It is safe to say 'Y' here.
+
+config XTERM_CHAN
+	bool "xterm channel support"
+	help
+	  This option enables support for attaching UML consoles and serial
+	  lines to xterms.  Each UML device so assigned will be brought up in
+	  its own xterm.
+	  It is safe to say 'Y' here.
+
+config XTERM_CHAN_DEFAULT_EMULATOR
+	string "xterm channel default terminal emulator"
+	depends on XTERM_CHAN
+	default "xterm"
+	help
+	  This option allows changing the default terminal emulator.
+
+config NOCONFIG_CHAN
+	bool
+	default !(XTERM_CHAN && TTY_CHAN && PTY_CHAN && PORT_CHAN && NULL_CHAN)
+
+config CON_ZERO_CHAN
+	string "Default main console channel initialization"
+	default "fd:0,fd:1"
+	help
+	  This is the string describing the channel to which the main console
+	  will be attached by default.  This value can be overridden from the
+	  command line.  The default value is "fd:0,fd:1", which attaches the
+	  main console to stdin and stdout.
+	  It is safe to leave this unchanged.
+
+config CON_CHAN
+	string "Default console channel initialization"
+	default "xterm"
+	help
+	  This is the string describing the channel to which all consoles
+	  except the main console will be attached by default.  This value can
+	  be overridden from the command line.  The default value is "xterm",
+	  which brings them up in xterms.
+	  It is safe to leave this unchanged, although you may wish to change
+	  this if you expect the UML that you build to be run in environments
+	  which don't have X or xterm available.
+
+config SSL_CHAN
+	string "Default serial line channel initialization"
+	default "pty"
+	help
+	  This is the string describing the channel to which the serial lines
+	  will be attached by default.  This value can be overridden from the
+	  command line.  The default value is "pty", which attaches them to
+	  traditional pseudo-terminals.
+	  It is safe to leave this unchanged, although you may wish to change
+	  this if you expect the UML that you build to be run in environments
+	  which don't have a set of /dev/pty* devices.
+
+config UML_SOUND
+	tristate "Sound support"
+	depends on SOUND
+	select SOUND_OSS_CORE
+	help
+	  This option enables UML sound support.  If enabled, it will pull in
+	  the UML hostaudio relay, which acts as a intermediary
+	  between the host's dsp and mixer devices and the UML sound system.
+	  It is safe to say 'Y' here.
+
+endmenu
+
+menu "UML Network Devices"
+	depends on NET
+
+config UML_NET_VECTOR
+	bool "Vector I/O high performance network devices"
+	select MAY_HAVE_RUNTIME_DEPS
+	help
+	  This User-Mode Linux network driver uses multi-message send
+	  and receive functions. The host running the UML guest must have
+	  a linux kernel version above 3.0 and a libc version > 2.13.
+	  This driver provides tap, raw, gre and l2tpv3 network transports.
+
+	  For more information, including explanations of the networking
+	  and sample configurations, see
+	  <file:Documentation/virt/uml/user_mode_linux_howto_v2.rst>.
+
+endmenu
+
+config VIRTIO_UML
+	bool "UML driver for virtio devices"
+	select VIRTIO
+	help
+	  This driver provides support for virtio based paravirtual device
+	  drivers over vhost-user sockets.
+
+config UML_RTC
+	bool "UML RTC driver"
+	depends on RTC_CLASS
+	# there's no use in this if PM_SLEEP isn't enabled ...
+	depends on PM_SLEEP
+	help
+	  When PM_SLEEP is configured, it may be desirable to wake up using
+	  rtcwake, especially in time-travel mode. This driver enables that
+	  by providing a fake RTC clock that causes a wakeup at the right
+	  time.
+
+config UML_PCI
+	bool
+	select FORCE_PCI
+	select IRQ_MSI_LIB
+	select UML_IOMEM_EMULATION
+	select UML_DMA_EMULATION
+	select PCI_MSI
+	select PCI_LOCKLESS_CONFIG
+
+config UML_PCI_OVER_VIRTIO
+	bool "Enable PCI over VIRTIO device simulation"
+	# in theory, just VIRTIO is enough, but that causes recursion
+	depends on VIRTIO_UML
+	select UML_PCI
+
+config UML_PCI_OVER_VIRTIO_DEVICE_ID
+	int "set the virtio device ID for PCI emulation"
+	default -1
+	depends on UML_PCI_OVER_VIRTIO
+	help
+	  There's no official device ID assigned (yet), set the one you
+	  wish to use for experimentation here. The default of -1 is
+	  not valid and will cause the driver to fail at probe.
+
+config UML_PCI_OVER_VFIO
+	bool "Enable VFIO-based PCI passthrough"
+	select UML_PCI
+	help
+	  This driver provides support for VFIO-based PCI passthrough.
+	  Currently, only MSI-X capable devices are supported, and it
+	  is assumed that drivers will use MSI-X.
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index e7582e1d248c..36dc57840084 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -1,33 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
 # 
 # Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com)
-# Licensed under the GPL
 #
 
 # pcap is broken in 2.5 because kbuild doesn't allow pcap.a to be linked
 # in to pcap.o
 
-slip-objs := slip_kern.o slip_user.o
-slirp-objs := slirp_kern.o slirp_user.o
-daemon-objs := daemon_kern.o daemon_user.o
-umcast-objs := umcast_kern.o umcast_user.o
-net-objs := net_kern.o net_user.o
+vector-objs := vector_kern.o vector_user.o vector_transports.o
 mconsole-objs := mconsole_kern.o mconsole_user.o
 hostaudio-objs := hostaudio_kern.o
 ubd-objs := ubd_kern.o ubd_user.o
 port-objs := port_kern.o port_user.o
-harddog-objs := harddog_kern.o harddog_user.o
-
-LDFLAGS_pcap.o := -r $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
-
-LDFLAGS_vde.o := -r $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
-
-targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o
-
-$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o
-	$(LD) -r -dp -o $@ $^ $(LDFLAGS) $(LDFLAGS_pcap.o)
-
-$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o
-	$(LD) -r -dp -o $@ $^ $(LDFLAGS) $(LDFLAGS_vde.o)
+harddog-objs := harddog_kern.o
+harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
+rtc-objs := rtc_kern.o rtc_user.o
+vfio_uml-objs := vfio_kern.o vfio_user.o
 
 #XXX: The call below does not work because the flags are added before the
 # object name, so nothing from the library gets linked.
@@ -40,28 +27,29 @@ obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o
 obj-$(CONFIG_SSL) += ssl.o
 obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
 
-obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
-obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
-obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 
-obj-$(CONFIG_UML_NET_VDE) += vde.o
-obj-$(CONFIG_UML_NET_MCAST) += umcast.o
-obj-$(CONFIG_UML_NET_PCAP) += pcap.o
-obj-$(CONFIG_UML_NET) += net.o 
+obj-$(CONFIG_UML_NET_VECTOR) += vector.o
 obj-$(CONFIG_MCONSOLE) += mconsole.o
-obj-$(CONFIG_MMAPPER) += mmapper_kern.o 
 obj-$(CONFIG_BLK_DEV_UBD) += ubd.o 
-obj-$(CONFIG_HOSTAUDIO) += hostaudio.o
+obj-$(CONFIG_UML_SOUND) += hostaudio.o
 obj-$(CONFIG_NULL_CHAN) += null.o 
 obj-$(CONFIG_PORT_CHAN) += port.o
 obj-$(CONFIG_PTY_CHAN) += pty.o
 obj-$(CONFIG_TTY_CHAN) += tty.o 
 obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
 obj-$(CONFIG_UML_WATCHDOG) += harddog.o
+obj-y += $(harddog-builtin-y) $(harddog-builtin-m)
 obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
+obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
+obj-$(CONFIG_UML_RTC) += rtc.o
+obj-$(CONFIG_UML_PCI) += virt-pci.o
+obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o
+obj-$(CONFIG_UML_PCI_OVER_VFIO) += vfio_uml.o
 
 # pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o vector_user.o
 CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
 
-include arch/um/scripts/Makefile.rules
+CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"'
+
+include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h
index c512b0306dd4..5a61db512ffb 100644
--- a/arch/um/drivers/chan.h
+++ b/arch/um/drivers/chan.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __CHAN_KERN_H__
@@ -22,7 +22,8 @@ struct chan {
 	unsigned int output:1;
 	unsigned int opened:1;
 	unsigned int enabled:1;
-	int fd;
+	int fd_in;
+	int fd_out; /* only different to fd_in if blocking output is needed */
 	const struct chan_ops *ops;
 	void *data;
 };
@@ -30,13 +31,12 @@ struct chan {
 extern void chan_interrupt(struct line *line, int irq);
 extern int parse_chan_pair(char *str, struct line *line, int device,
 			   const struct chan_opts *opts, char **error_out);
-extern int write_chan(struct chan *chan, const char *buf, int len,
+extern int write_chan(struct chan *chan, const u8 *buf, size_t len,
 			     int write_irq);
 extern int console_write_chan(struct chan *chan, const char *buf, 
 			      int len);
 extern int console_open_chan(struct line *line, struct console *co);
 extern void deactivate_chan(struct chan *chan, int irq);
-extern void reactivate_chan(struct chan *chan, int irq);
 extern void chan_enable_winch(struct chan *chan, struct tty_port *port);
 extern int enable_chan(struct line *line);
 extern void close_chan(struct line *line);
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index acbe6c67afba..26442db7d608 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <linux/slab.h>
@@ -33,14 +33,14 @@ static void not_configged_close(int fd, void *data)
 	       "UML\n");
 }
 
-static int not_configged_read(int fd, char *c_out, void *data)
+static int not_configged_read(int fd, u8 *c_out, void *data)
 {
 	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	return -EIO;
 }
 
-static int not_configged_write(int fd, const char *buf, int len, void *data)
+static int not_configged_write(int fd, const u8 *buf, size_t len, void *data)
 {
 	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
@@ -81,6 +81,12 @@ static const struct chan_ops not_configged_ops = {
 };
 #endif /* CONFIG_NOCONFIG_CHAN */
 
+static inline bool need_output_blocking(void)
+{
+	return time_travel_mode == TT_MODE_INFCPU ||
+	       time_travel_mode == TT_MODE_EXTERNAL;
+}
+
 static int open_one_chan(struct chan *chan)
 {
 	int fd, err;
@@ -96,15 +102,43 @@ static int open_one_chan(struct chan *chan)
 		return fd;
 
 	err = os_set_fd_block(fd, 0);
-	if (err) {
-		(*chan->ops->close)(fd, chan->data);
-		return err;
-	}
+	if (err)
+		goto out_close;
+
+	chan->fd_in = fd;
+	chan->fd_out = fd;
+
+	/*
+	 * In time-travel modes infinite-CPU and external we need to guarantee
+	 * that any writes to the output succeed immdiately from the point of
+	 * the VM. The best way to do this is to put the FD in blocking mode
+	 * and simply wait/retry until everything is written.
+	 * As every write is guaranteed to complete, we also do not need to
+	 * request an IRQ for the output.
+	 *
+	 * Note that input cannot happen in a time synchronized way. We permit
+	 * it, but time passes very quickly if anything waits for a read.
+	 */
+	if (chan->output && need_output_blocking()) {
+		err = os_dup_file(chan->fd_out);
+		if (err < 0)
+			goto out_close;
 
-	chan->fd = fd;
+		chan->fd_out = err;
+
+		err = os_set_fd_block(chan->fd_out, 1);
+		if (err) {
+			os_close_file(chan->fd_out);
+			goto out_close;
+		}
+	}
 
 	chan->opened = 1;
 	return 0;
+
+out_close:
+	(*chan->ops->close)(fd, chan->data);
+	return err;
 }
 
 static int open_chan(struct list_head *chans)
@@ -125,7 +159,7 @@ static int open_chan(struct list_head *chans)
 void chan_enable_winch(struct chan *chan, struct tty_port *port)
 {
 	if (chan && chan->primary && chan->ops->winch)
-		register_winch(chan->fd, port);
+		register_winch(chan->fd_in, port);
 }
 
 static void line_timer_cb(struct work_struct *work)
@@ -133,7 +167,7 @@ static void line_timer_cb(struct work_struct *work)
 	struct line *line = container_of(work, struct line, task.work);
 
 	if (!line->throttled)
-		chan_interrupt(line, line->driver->read_irq);
+		chan_interrupt(line, line->read_irq);
 }
 
 int enable_chan(struct line *line)
@@ -156,8 +190,9 @@ int enable_chan(struct line *line)
 
 		if (chan->enabled)
 			continue;
-		err = line_setup_irq(chan->fd, chan->input, chan->output, line,
-				     chan);
+		err = line_setup_irq(chan->fd_in, chan->input,
+				     chan->output && !need_output_blocking(),
+				     line, chan);
 		if (err)
 			goto out_close;
 
@@ -177,7 +212,7 @@ int enable_chan(struct line *line)
  * be permanently disabled.  This is discovered in IRQ context, but
  * the freeing of the IRQ must be done later.
  */
-static DEFINE_SPINLOCK(irqs_to_free_lock);
+static DEFINE_RAW_SPINLOCK(irqs_to_free_lock);
 static LIST_HEAD(irqs_to_free);
 
 void free_irqs(void)
@@ -187,17 +222,18 @@ void free_irqs(void)
 	struct list_head *ele;
 	unsigned long flags;
 
-	spin_lock_irqsave(&irqs_to_free_lock, flags);
+	raw_spin_lock_irqsave(&irqs_to_free_lock, flags);
 	list_splice_init(&irqs_to_free, &list);
-	spin_unlock_irqrestore(&irqs_to_free_lock, flags);
+	raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags);
 
 	list_for_each(ele, &list) {
 		chan = list_entry(ele, struct chan, free_list);
 
 		if (chan->input && chan->enabled)
-			um_free_irq(chan->line->driver->read_irq, chan);
-		if (chan->output && chan->enabled)
-			um_free_irq(chan->line->driver->write_irq, chan);
+			um_free_irq(chan->line->read_irq, chan);
+		if (chan->output && chan->enabled &&
+		    !need_output_blocking())
+			um_free_irq(chan->line->write_irq, chan);
 		chan->enabled = 0;
 	}
 }
@@ -210,22 +246,25 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
 		return;
 
 	if (delay_free_irq) {
-		spin_lock_irqsave(&irqs_to_free_lock, flags);
+		raw_spin_lock_irqsave(&irqs_to_free_lock, flags);
 		list_add(&chan->free_list, &irqs_to_free);
-		spin_unlock_irqrestore(&irqs_to_free_lock, flags);
-	}
-	else {
+		raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags);
+	} else {
 		if (chan->input && chan->enabled)
-			um_free_irq(chan->line->driver->read_irq, chan);
-		if (chan->output && chan->enabled)
-			um_free_irq(chan->line->driver->write_irq, chan);
+			um_free_irq(chan->line->read_irq, chan);
+		if (chan->output && chan->enabled &&
+		    !need_output_blocking())
+			um_free_irq(chan->line->write_irq, chan);
 		chan->enabled = 0;
 	}
+	if (chan->fd_out != chan->fd_in)
+		os_close_file(chan->fd_out);
 	if (chan->ops->close != NULL)
-		(*chan->ops->close)(chan->fd, chan->data);
+		(*chan->ops->close)(chan->fd_in, chan->data);
 
 	chan->opened = 0;
-	chan->fd = -1;
+	chan->fd_in = -1;
+	chan->fd_out = -1;
 }
 
 void close_chan(struct line *line)
@@ -245,28 +284,19 @@ void close_chan(struct line *line)
 void deactivate_chan(struct chan *chan, int irq)
 {
 	if (chan && chan->enabled)
-		deactivate_fd(chan->fd, irq);
-}
-
-void reactivate_chan(struct chan *chan, int irq)
-{
-	if (chan && chan->enabled)
-		reactivate_fd(chan->fd, irq);
+		deactivate_fd(chan->fd_in, irq);
 }
 
-int write_chan(struct chan *chan, const char *buf, int len,
-	       int write_irq)
+int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
 {
 	int n, ret = 0;
 
 	if (len == 0 || !chan || !chan->ops->write)
 		return 0;
 
-	n = chan->ops->write(chan->fd, buf, len, chan->data);
+	n = chan->ops->write(chan->fd_out, buf, len, chan->data);
 	if (chan->primary) {
 		ret = n;
-		if ((ret == -EAGAIN) || ((ret >= 0) && (ret < len)))
-			reactivate_fd(chan->fd, write_irq);
 	}
 	return ret;
 }
@@ -278,7 +308,7 @@ int console_write_chan(struct chan *chan, const char *buf, int len)
 	if (!chan || !chan->ops->console_write)
 		return 0;
 
-	n = chan->ops->console_write(chan->fd, buf, len);
+	n = chan->ops->console_write(chan->fd_out, buf, len);
 	if (chan->primary)
 		ret = n;
 	return ret;
@@ -306,14 +336,14 @@ int chan_window_size(struct line *line, unsigned short *rows_out,
 	if (chan && chan->primary) {
 		if (chan->ops->window_size == NULL)
 			return 0;
-		return chan->ops->window_size(chan->fd, chan->data,
+		return chan->ops->window_size(chan->fd_in, chan->data,
 					      rows_out, cols_out);
 	}
 	chan = line->chan_out;
 	if (chan && chan->primary) {
 		if (chan->ops->window_size == NULL)
 			return 0;
-		return chan->ops->window_size(chan->fd, chan->data,
+		return chan->ops->window_size(chan->fd_in, chan->data,
 					      rows_out, cols_out);
 	}
 	return 0;
@@ -329,7 +359,7 @@ static void free_one_chan(struct chan *chan)
 		(*chan->ops->free)(chan->data);
 
 	if (chan->primary && chan->output)
-		ignore_sigio_fd(chan->fd);
+		ignore_sigio_fd(chan->fd_in);
 	kfree(chan);
 }
 
@@ -488,7 +518,8 @@ static struct chan *parse_chan(struct line *line, char *str, int device,
 				 .output 	= 0,
 				 .opened  	= 0,
 				 .enabled  	= 0,
-				 .fd 		= -1,
+				 .fd_in		= -1,
+				 .fd_out	= -1,
 				 .ops 		= ops,
 				 .data 		= data });
 	return chan;
@@ -549,7 +580,7 @@ void chan_interrupt(struct line *line, int irq)
 	struct tty_port *port = &line->port;
 	struct chan *chan = line->chan_in;
 	int err;
-	char c;
+	u8 c;
 
 	if (!chan || !chan->ops->read)
 		goto out;
@@ -559,13 +590,11 @@ void chan_interrupt(struct line *line, int irq)
 			schedule_delayed_work(&line->task, 1);
 			goto out;
 		}
-		err = chan->ops->read(chan->fd, &c, chan->data);
+		err = chan->ops->read(chan->fd_in, &c, chan->data);
 		if (err > 0)
 			tty_insert_flip_char(port, c, TTY_NORMAL);
 	} while (err > 0);
 
-	if (err == 0)
-		reactivate_fd(chan->fd, irq);
 	if (err == -EIO) {
 		if (chan->primary) {
 			tty_port_tty_hangup(&line->port, false);
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index 3fd7c3efdb18..35f9beeb19b3 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
@@ -19,29 +19,41 @@ void generic_close(int fd, void *unused)
 	close(fd);
 }
 
-int generic_read(int fd, char *c_out, void *unused)
+int generic_read(int fd, __u8 *c_out, void *unused)
 {
 	int n;
 
-	n = read(fd, c_out, sizeof(*c_out));
+	CATCH_EINTR(n = read(fd, c_out, sizeof(*c_out)));
 	if (n > 0)
 		return n;
-	else if (errno == EAGAIN)
-		return 0;
 	else if (n == 0)
 		return -EIO;
+	else if (errno == EAGAIN)
+		return 0;
 	return -errno;
 }
 
 /* XXX Trivial wrapper around write */
 
-int generic_write(int fd, const char *buf, int n, void *unused)
+int generic_write(int fd, const __u8 *buf, size_t n, void *unused)
 {
+	int written = 0;
 	int err;
 
-	err = write(fd, buf, n);
-	if (err > 0)
-		return err;
+	/* The FD may be in blocking mode, as such, need to retry short writes,
+	 * they may have been interrupted by a signal.
+	 */
+	do {
+		errno = 0;
+		err = write(fd, buf + written, n - written);
+		if (err > 0) {
+			written += err;
+			continue;
+		}
+	} while (err < 0 && errno == EINTR);
+
+	if (written > 0)
+		return written;
 	else if (errno == EAGAIN)
 		return 0;
 	else if (err == 0)
@@ -141,7 +153,7 @@ struct winch_data {
 	int pipe_fd;
 };
 
-static int winch_thread(void *arg)
+static __noreturn int winch_thread(void *arg)
 {
 	struct winch_data *data = arg;
 	sigset_t sigs;
@@ -149,12 +161,14 @@ static int winch_thread(void *arg)
 	int count;
 	char c = 1;
 
+	os_set_pdeathsig();
+
 	pty_fd = data->pty_fd;
 	pipe_fd = data->pipe_fd;
 	count = write(pipe_fd, &c, sizeof(c));
 	if (count != sizeof(c))
-		printk(UM_KERN_ERR "winch_thread : failed to write "
-		       "synchronization byte, err = %d\n", -count);
+		os_info("winch_thread : failed to write synchronization byte, err = %d\n",
+			-count);
 
 	/*
 	 * We are not using SIG_IGN on purpose, so don't fix it as I thought to
@@ -166,29 +180,29 @@ static int winch_thread(void *arg)
 	sigfillset(&sigs);
 	/* Block all signals possible. */
 	if (sigprocmask(SIG_SETMASK, &sigs, NULL) < 0) {
-		printk(UM_KERN_ERR "winch_thread : sigprocmask failed, "
-		       "errno = %d\n", errno);
-		exit(1);
+		os_info("winch_thread : sigprocmask failed, errno = %d\n",
+			errno);
+		goto wait_kill;
 	}
 	/* In sigsuspend(), block anything else than SIGWINCH. */
 	sigdelset(&sigs, SIGWINCH);
 
 	if (setsid() < 0) {
-		printk(UM_KERN_ERR "winch_thread : setsid failed, errno = %d\n",
+		os_info("winch_thread : setsid failed, errno = %d\n",
 		       errno);
-		exit(1);
+		goto wait_kill;
 	}
 
 	if (ioctl(pty_fd, TIOCSCTTY, 0) < 0) {
-		printk(UM_KERN_ERR "winch_thread : TIOCSCTTY failed on "
-		       "fd %d err = %d\n", pty_fd, errno);
-		exit(1);
+		os_info("winch_thread : TIOCSCTTY failed on "
+			"fd %d err = %d\n", pty_fd, errno);
+		goto wait_kill;
 	}
 
 	if (tcsetpgrp(pty_fd, os_getpid()) < 0) {
-		printk(UM_KERN_ERR "winch_thread : tcsetpgrp failed on "
-		       "fd %d err = %d\n", pty_fd, errno);
-		exit(1);
+		os_info("winch_thread : tcsetpgrp failed on fd %d err = %d\n",
+			pty_fd, errno);
+		goto wait_kill;
 	}
 
 	/*
@@ -199,8 +213,8 @@ static int winch_thread(void *arg)
 	 */
 	count = read(pipe_fd, &c, sizeof(c));
 	if (count != sizeof(c))
-		printk(UM_KERN_ERR "winch_thread : failed to read "
-		       "synchronization byte, err = %d\n", errno);
+		os_info("winch_thread : failed to read synchronization byte, err = %d\n",
+			errno);
 
 	while(1) {
 		/*
@@ -211,16 +225,22 @@ static int winch_thread(void *arg)
 
 		count = write(pipe_fd, &c, sizeof(c));
 		if (count != sizeof(c))
-			printk(UM_KERN_ERR "winch_thread : write failed, "
-			       "err = %d\n", errno);
+			os_info("winch_thread : write failed, err = %d\n",
+				errno);
 	}
+
+wait_kill:
+	c = 2;
+	count = write(pipe_fd, &c, sizeof(c));
+	while (1)
+		pause();
 }
 
 static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
 		       unsigned long *stack_out)
 {
 	struct winch_data data;
-	int fds[2], n, err;
+	int fds[2], n, err, pid;
 	char c;
 
 	err = os_pipe(fds, 1, 1);
@@ -238,8 +258,9 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
 	 * problem with /dev/net/tun, which if held open by this
 	 * thread, prevents the TUN/TAP device from being reused.
 	 */
-	err = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out);
-	if (err < 0) {
+	pid = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out);
+	if (pid < 0) {
+		err = pid;
 		printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n",
 		       -err);
 		goto out_close;
@@ -256,13 +277,14 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
 		goto out_close;
 	}
 
-	if (os_set_fd_block(*fd_out, 0)) {
+	err = os_set_fd_block(*fd_out, 0);
+	if (err) {
 		printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd "
 		       "non-blocking.\n");
 		goto out_close;
 	}
 
-	return err;
+	return pid;
 
  out_close:
 	close(fds[1]);
diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h
index 03f1b565c5f9..e158e16fb3cc 100644
--- a/arch/um/drivers/chan_user.h
+++ b/arch/um/drivers/chan_user.h
@@ -1,17 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __CHAN_USER_H__
 #define __CHAN_USER_H__
 
 #include <init.h>
+#include <linux/types.h>
 
 struct chan_opts {
 	void (*const announce)(char *dev_name, int dev);
 	char *xterm_title;
-	const int raw;
+	int raw;
 };
 
 struct chan_ops {
@@ -19,8 +20,8 @@ struct chan_ops {
 	void *(*init)(char *, int, const struct chan_opts *);
 	int (*open)(int, int, int, void *, char **);
 	void (*close)(int, void *);
-	int (*read)(int, char *, void *);
-	int (*write)(int, const char *, int, void *);
+	int (*read)(int, __u8 *, void *);
+	int (*write)(int, const __u8 *, size_t, void *);
 	int (*console_write)(int, const char *, int);
 	int (*window_size)(int, void *, unsigned short *, unsigned short *);
 	void (*free)(void *);
@@ -31,8 +32,8 @@ extern const struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops,
 	tty_ops, xterm_ops;
 
 extern void generic_close(int fd, void *unused);
-extern int generic_read(int fd, char *c_out, void *unused);
-extern int generic_write(int fd, const char *buf, int n, void *unused);
+extern int generic_read(int fd, __u8 *c_out, void *unused);
+extern int generic_write(int fd, const __u8 *buf, size_t n, void *unused);
 extern int generic_console_write(int fd, const char *buf, int n);
 extern int generic_window_size(int fd, void *unused, unsigned short *rows_out,
 			       unsigned short *cols_out);
diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h
index 6673508f3426..9a67c017000f 100644
--- a/arch/um/drivers/cow.h
+++ b/arch/um/drivers/cow.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __COW_H__
 #define __COW_H__
 
@@ -10,7 +11,7 @@ extern int init_cow_file(int fd, char *cow_file, char *backing_file,
 extern int file_reader(__u64 offset, char *buf, int len, void *arg);
 extern int read_cow_header(int (*reader)(__u64, char *, int, void *),
 			   void *arg, __u32 *version_out,
-			   char **backing_file_out, time_t *mtime_out,
+			   char **backing_file_out, long long *mtime_out,
 			   unsigned long long *size_out, int *sectorsize_out,
 			   __u32 *align_out, int *bitmap_offset_out);
 
@@ -23,10 +24,3 @@ extern void cow_sizes(int version, __u64 size, int sectorsize, int align,
 		      int *data_offset_out);
 
 #endif
-
-/*
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h
index 67cbee63e702..916811ef5317 100644
--- a/arch/um/drivers/cow_sys.h
+++ b/arch/um/drivers/cow_sys.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __COW_SYS_H__
 #define __COW_SYS_H__
 
diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
index 0ee9cc6cc4c7..29b46581ddd1 100644
--- a/arch/um/drivers/cow_user.c
+++ b/arch/um/drivers/cow_user.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 /*
@@ -17,6 +17,7 @@
 
 #define PATH_LEN_V1 256
 
+/* unsigned time_t works until year 2106 */
 typedef __u32 time32_t;
 
 struct cow_header_v1 {
@@ -197,7 +198,7 @@ int write_cow_header(char *cow_file, int fd, char *backing_file,
 		     int sectorsize, int alignment, unsigned long long *size)
 {
 	struct cow_header_v3 *header;
-	unsigned long modtime;
+	long long modtime;
 	int err;
 
 	err = cow_seek_file(fd, 0);
@@ -276,7 +277,7 @@ int file_reader(__u64 offset, char *buf, int len, void *arg)
 
 int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 		    __u32 *version_out, char **backing_file_out,
-		    time_t *mtime_out, unsigned long long *size_out,
+		    long long *mtime_out, unsigned long long *size_out,
 		    int *sectorsize_out, __u32 *align_out,
 		    int *bitmap_offset_out)
 {
@@ -363,7 +364,7 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 
 		/*
 		 * this was used until Dec2005 - 64bits are needed to represent
-		 * 2038+. I.e. we can safely do this truncating cast.
+		 * 2106+. I.e. we can safely do this truncating cast.
 		 *
 		 * Additionally, we must use be32toh() instead of be64toh(), since
 		 * the program used to use the former (tested - I got mtime
diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h
deleted file mode 100644
index c2dd1951559f..000000000000
--- a/arch/um/drivers/daemon.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __DAEMON_H__
-#define __DAEMON_H__
-
-#include <net_user.h>
-
-#define SWITCH_VERSION 3
-
-struct daemon_data {
-	char *sock_type;
-	char *ctl_sock;
-	void *ctl_addr;
-	void *data_addr;
-	void *local_addr;
-	int fd;
-	int control;
-	void *dev;
-};
-
-extern const struct net_user_info daemon_user_info;
-
-extern int daemon_user_write(int fd, void *buf, int len,
-			     struct daemon_data *pri);
-
-#endif
diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c
deleted file mode 100644
index 7568cc2f3cd6..000000000000
--- a/arch/um/drivers/daemon_kern.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include "daemon.h"
-
-struct daemon_init {
-	char *sock_type;
-	char *ctl_sock;
-};
-
-static void daemon_init(struct net_device *dev, void *data)
-{
-	struct uml_net_private *pri;
-	struct daemon_data *dpri;
-	struct daemon_init *init = data;
-
-	pri = netdev_priv(dev);
-	dpri = (struct daemon_data *) pri->user;
-	dpri->sock_type = init->sock_type;
-	dpri->ctl_sock = init->ctl_sock;
-	dpri->fd = -1;
-	dpri->control = -1;
-	dpri->dev = dev;
-	/* We will free this pointer. If it contains crap we're burned. */
-	dpri->ctl_addr = NULL;
-	dpri->data_addr = NULL;
-	dpri->local_addr = NULL;
-
-	printk("daemon backend (uml_switch version %d) - %s:%s",
-	       SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock);
-	printk("\n");
-}
-
-static int daemon_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return net_recvfrom(fd, skb_mac_header(skb),
-			    skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int daemon_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return daemon_user_write(fd, skb->data, skb->len,
-				 (struct daemon_data *) &lp->user);
-}
-
-static const struct net_kern_info daemon_kern_info = {
-	.init			= daemon_init,
-	.protocol		= eth_protocol,
-	.read			= daemon_read,
-	.write			= daemon_write,
-};
-
-static int daemon_setup(char *str, char **mac_out, void *data)
-{
-	struct daemon_init *init = data;
-	char *remain;
-
-	*init = ((struct daemon_init)
-		{ .sock_type 		= "unix",
-		  .ctl_sock 		= "/tmp/uml.ctl" });
-
-	remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock,
-			       NULL);
-	if (remain != NULL)
-		printk(KERN_WARNING "daemon_setup : Ignoring data socket "
-		       "specification\n");
-
-	return 1;
-}
-
-static struct transport daemon_transport = {
-	.list 		= LIST_HEAD_INIT(daemon_transport.list),
-	.name 		= "daemon",
-	.setup  	= daemon_setup,
-	.user 		= &daemon_user_info,
-	.kern 		= &daemon_kern_info,
-	.private_size 	= sizeof(struct daemon_data),
-	.setup_size 	= sizeof(struct daemon_init),
-};
-
-static int register_daemon(void)
-{
-	register_transport(&daemon_transport);
-	return 0;
-}
-
-late_initcall(register_daemon);
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
deleted file mode 100644
index 8813c10d0177..000000000000
--- a/arch/um/drivers/daemon_user.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
- */
-
-#include <stdint.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/un.h>
-#include "daemon.h"
-#include <net_user.h>
-#include <os.h>
-#include <um_malloc.h>
-
-enum request_type { REQ_NEW_CONTROL };
-
-#define SWITCH_MAGIC 0xfeedface
-
-struct request_v3 {
-	uint32_t magic;
-	uint32_t version;
-	enum request_type type;
-	struct sockaddr_un sock;
-};
-
-static struct sockaddr_un *new_addr(void *name, int len)
-{
-	struct sockaddr_un *sun;
-
-	sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
-	if (sun == NULL) {
-		printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
-		       "failed\n");
-		return NULL;
-	}
-	sun->sun_family = AF_UNIX;
-	memcpy(sun->sun_path, name, len);
-	return sun;
-}
-
-static int connect_to_switch(struct daemon_data *pri)
-{
-	struct sockaddr_un *ctl_addr = pri->ctl_addr;
-	struct sockaddr_un *local_addr = pri->local_addr;
-	struct sockaddr_un *sun;
-	struct request_v3 req;
-	int fd, n, err;
-
-	pri->control = socket(AF_UNIX, SOCK_STREAM, 0);
-	if (pri->control < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "daemon_open : control socket failed, "
-		       "errno = %d\n", -err);
-		return err;
-	}
-
-	if (connect(pri->control, (struct sockaddr *) ctl_addr,
-		   sizeof(*ctl_addr)) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "daemon_open : control connect failed, "
-		       "errno = %d\n", -err);
-		goto out;
-	}
-
-	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
-	if (fd < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "daemon_open : data socket failed, "
-		       "errno = %d\n", -err);
-		goto out;
-	}
-	if (bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "daemon_open : data bind failed, "
-		       "errno = %d\n", -err);
-		goto out_close;
-	}
-
-	sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
-	if (sun == NULL) {
-		printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
-		       "failed\n");
-		err = -ENOMEM;
-		goto out_close;
-	}
-
-	req.magic = SWITCH_MAGIC;
-	req.version = SWITCH_VERSION;
-	req.type = REQ_NEW_CONTROL;
-	req.sock = *local_addr;
-	n = write(pri->control, &req, sizeof(req));
-	if (n != sizeof(req)) {
-		printk(UM_KERN_ERR "daemon_open : control setup request "
-		       "failed, err = %d\n", -errno);
-		err = -ENOTCONN;
-		goto out_free;
-	}
-
-	n = read(pri->control, sun, sizeof(*sun));
-	if (n != sizeof(*sun)) {
-		printk(UM_KERN_ERR "daemon_open : read of data socket failed, "
-		       "err = %d\n", -errno);
-		err = -ENOTCONN;
-		goto out_free;
-	}
-
-	pri->data_addr = sun;
-	return fd;
-
- out_free:
-	kfree(sun);
- out_close:
-	close(fd);
- out:
-	close(pri->control);
-	return err;
-}
-
-static int daemon_user_init(void *data, void *dev)
-{
-	struct daemon_data *pri = data;
-	struct timeval tv;
-	struct {
-		char zero;
-		int pid;
-		int usecs;
-	} name;
-
-	if (!strcmp(pri->sock_type, "unix"))
-		pri->ctl_addr = new_addr(pri->ctl_sock,
-					 strlen(pri->ctl_sock) + 1);
-	name.zero = 0;
-	name.pid = os_getpid();
-	gettimeofday(&tv, NULL);
-	name.usecs = tv.tv_usec;
-	pri->local_addr = new_addr(&name, sizeof(name));
-	pri->dev = dev;
-	pri->fd = connect_to_switch(pri);
-	if (pri->fd < 0) {
-		kfree(pri->local_addr);
-		pri->local_addr = NULL;
-		return pri->fd;
-	}
-
-	return 0;
-}
-
-static int daemon_open(void *data)
-{
-	struct daemon_data *pri = data;
-	return pri->fd;
-}
-
-static void daemon_remove(void *data)
-{
-	struct daemon_data *pri = data;
-
-	close(pri->fd);
-	pri->fd = -1;
-	close(pri->control);
-	pri->control = -1;
-
-	kfree(pri->data_addr);
-	pri->data_addr = NULL;
-	kfree(pri->ctl_addr);
-	pri->ctl_addr = NULL;
-	kfree(pri->local_addr);
-	pri->local_addr = NULL;
-}
-
-int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri)
-{
-	struct sockaddr_un *data_addr = pri->data_addr;
-
-	return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
-}
-
-const struct net_user_info daemon_user_info = {
-	.init		= daemon_user_init,
-	.open		= daemon_open,
-	.close	 	= NULL,
-	.remove	 	= daemon_remove,
-	.add_address	= NULL,
-	.delete_address = NULL,
-	.mtu		= ETH_MAX_PACKET,
-	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c
index a13a427b996b..082d739dc052 100644
--- a/arch/um/drivers/fd.c
+++ b/arch/um/drivers/fd.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
diff --git a/arch/um/drivers/harddog.h b/arch/um/drivers/harddog.h
new file mode 100644
index 000000000000..6d9ea60e7133
--- /dev/null
+++ b/arch/um/drivers/harddog.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef UM_WATCHDOG_H
+#define UM_WATCHDOG_H
+
+int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock);
+void stop_watchdog(int in_fd, int out_fd);
+int ping_watchdog(int fd);
+
+#endif /* UM_WATCHDOG_H */
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 2d0266d0254d..819aabb4ecdc 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -45,9 +45,11 @@
 #include <linux/mutex.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include "mconsole.h"
+#include "harddog.h"
 
+MODULE_DESCRIPTION("UML hardware watchdog");
 MODULE_LICENSE("GPL");
 
 static DEFINE_MUTEX(harddog_mutex);
@@ -60,8 +62,6 @@ static int harddog_out_fd = -1;
  *	Allow only one person to hold it open
  */
 
-extern int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock);
-
 static int harddog_open(struct inode *inode, struct file *file)
 {
 	int err = -EBUSY;
@@ -85,15 +85,13 @@ static int harddog_open(struct inode *inode, struct file *file)
 	timer_alive = 1;
 	spin_unlock(&lock);
 	mutex_unlock(&harddog_mutex);
-	return nonseekable_open(inode, file);
+	return stream_open(inode, file);
 err:
 	spin_unlock(&lock);
 	mutex_unlock(&harddog_mutex);
 	return err;
 }
 
-extern void stop_watchdog(int in_fd, int out_fd);
-
 static int harddog_release(struct inode *inode, struct file *file)
 {
 	/*
@@ -112,8 +110,6 @@ static int harddog_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-extern int ping_watchdog(int fd);
-
 static ssize_t harddog_write(struct file *file, const char __user *data, size_t len,
 			     loff_t *ppos)
 {
@@ -165,9 +161,9 @@ static const struct file_operations harddog_fops = {
 	.owner		= THIS_MODULE,
 	.write		= harddog_write,
 	.unlocked_ioctl	= harddog_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
 	.open		= harddog_open,
 	.release	= harddog_release,
-	.llseek		= no_llseek,
 };
 
 static struct miscdevice harddog_miscdev = {
@@ -175,27 +171,4 @@ static struct miscdevice harddog_miscdev = {
 	.name		= "watchdog",
 	.fops		= &harddog_fops,
 };
-
-static char banner[] __initdata = KERN_INFO "UML Watchdog Timer\n";
-
-static int __init harddog_init(void)
-{
-	int ret;
-
-	ret = misc_register(&harddog_miscdev);
-
-	if (ret)
-		return ret;
-
-	printk(banner);
-
-	return 0;
-}
-
-static void __exit harddog_exit(void)
-{
-	misc_deregister(&harddog_miscdev);
-}
-
-module_init(harddog_init);
-module_exit(harddog_exit);
+module_misc_device(harddog_miscdev);
diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
index f99b32a4dbff..9ed89304975e 100644
--- a/arch/um/drivers/harddog_user.c
+++ b/arch/um/drivers/harddog_user.c
@@ -1,16 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
 #include <os.h>
+#include "harddog.h"
 
 struct dog_data {
-	int stdin;
-	int stdout;
+	int stdin_fd;
+	int stdout_fd;
 	int close_me[2];
 };
 
@@ -18,11 +19,11 @@ static void pre_exec(void *d)
 {
 	struct dog_data *data = d;
 
-	dup2(data->stdin, 0);
-	dup2(data->stdout, 1);
-	dup2(data->stdout, 2);
-	close(data->stdin);
-	close(data->stdout);
+	dup2(data->stdin_fd, 0);
+	dup2(data->stdout_fd, 1);
+	dup2(data->stdout_fd, 2);
+	close(data->stdin_fd);
+	close(data->stdout_fd);
 	close(data->close_me[0]);
 	close(data->close_me[1]);
 }
@@ -49,8 +50,8 @@ int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock)
 		goto out_close_in;
 	}
 
-	data.stdin = out_fds[0];
-	data.stdout = in_fds[1];
+	data.stdin_fd = out_fds[0];
+	data.stdout_fd = in_fds[1];
 	data.close_me[0] = out_fds[1];
 	data.close_me[1] = in_fds[0];
 
diff --git a/arch/um/drivers/harddog_user_exp.c b/arch/um/drivers/harddog_user_exp.c
new file mode 100644
index 000000000000..c74d4b815d14
--- /dev/null
+++ b/arch/um/drivers/harddog_user_exp.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include "harddog.h"
+
+#if IS_MODULE(CONFIG_UML_WATCHDOG)
+EXPORT_SYMBOL(start_watchdog);
+EXPORT_SYMBOL(stop_watchdog);
+EXPORT_SYMBOL(ping_watchdog);
+#endif
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 9b90fdc4b151..0ac149de1ac0 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 Steve Schmidtke
- * Licensed under the GPL
  */
 
 #include <linux/fs.h>
@@ -9,7 +9,7 @@
 #include <linux/sound.h>
 #include <linux/soundcard.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <init.h>
 #include <os.h>
 
@@ -48,6 +48,7 @@ MODULE_PARM_DESC(mixer, MIXER_HELP);
 #ifndef MODULE
 static int set_dsp(char *name, int *add)
 {
+	*add = 0;
 	dsp = name;
 	return 0;
 }
@@ -56,6 +57,7 @@ __uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP);
 
 static int set_mixer(char *name, int *add)
 {
+	*add = 0;
 	mixer = name;
 	return 0;
 }
@@ -105,13 +107,9 @@ static ssize_t hostaudio_write(struct file *file, const char __user *buffer,
 	printk(KERN_DEBUG "hostaudio: write called, count = %d\n", count);
 #endif
 
-	kbuf = kmalloc(count, GFP_KERNEL);
-	if (kbuf == NULL)
-		return -ENOMEM;
-
-	err = -EFAULT;
-	if (copy_from_user(kbuf, buffer, count))
-		goto out;
+	kbuf = memdup_user(buffer, count);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
 
 	err = os_write_file(state->fd, kbuf, count);
 	if (err < 0)
@@ -123,16 +121,14 @@ static ssize_t hostaudio_write(struct file *file, const char __user *buffer,
 	return err;
 }
 
-static unsigned int hostaudio_poll(struct file *file,
-				   struct poll_table_struct *wait)
+static __poll_t hostaudio_poll(struct file *file,
+				struct poll_table_struct *wait)
 {
-	unsigned int mask = 0;
-
 #ifdef DEBUG
 	printk(KERN_DEBUG "hostaudio: poll called (unimplemented)\n");
 #endif
 
-	return mask;
+	return 0;
 }
 
 static long hostaudio_ioctl(struct file *file,
@@ -185,9 +181,9 @@ static int hostaudio_open(struct inode *inode, struct file *file)
 	int ret;
 
 #ifdef DEBUG
-	kparam_block_sysfs_write(dsp);
+	kernel_param_lock(THIS_MODULE);
 	printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp);
-	kparam_unblock_sysfs_write(dsp);
+	kernel_param_unlock(THIS_MODULE);
 #endif
 
 	state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL);
@@ -199,11 +195,11 @@ static int hostaudio_open(struct inode *inode, struct file *file)
 	if (file->f_mode & FMODE_WRITE)
 		w = 1;
 
-	kparam_block_sysfs_write(dsp);
+	kernel_param_lock(THIS_MODULE);
 	mutex_lock(&hostaudio_mutex);
 	ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
 	mutex_unlock(&hostaudio_mutex);
-	kparam_unblock_sysfs_write(dsp);
+	kernel_param_unlock(THIS_MODULE);
 
 	if (ret < 0) {
 		kfree(state);
@@ -260,17 +256,17 @@ static int hostmixer_open_mixdev(struct inode *inode, struct file *file)
 	if (file->f_mode & FMODE_WRITE)
 		w = 1;
 
-	kparam_block_sysfs_write(mixer);
+	kernel_param_lock(THIS_MODULE);
 	mutex_lock(&hostaudio_mutex);
 	ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0);
 	mutex_unlock(&hostaudio_mutex);
-	kparam_unblock_sysfs_write(mixer);
+	kernel_param_unlock(THIS_MODULE);
 
 	if (ret < 0) {
-		kparam_block_sysfs_write(dsp);
+		kernel_param_lock(THIS_MODULE);
 		printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', "
 		       "err = %d\n", dsp, -ret);
-		kparam_unblock_sysfs_write(dsp);
+		kernel_param_unlock(THIS_MODULE);
 		kfree(state);
 		return ret;
 	}
@@ -297,11 +293,11 @@ static int hostmixer_release(struct inode *inode, struct file *file)
 
 static const struct file_operations hostaudio_fops = {
 	.owner          = THIS_MODULE,
-	.llseek         = no_llseek,
 	.read           = hostaudio_read,
 	.write          = hostaudio_write,
 	.poll           = hostaudio_poll,
 	.unlocked_ioctl	= hostaudio_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
 	.mmap           = NULL,
 	.open           = hostaudio_open,
 	.release        = hostaudio_release,
@@ -309,13 +305,12 @@ static const struct file_operations hostaudio_fops = {
 
 static const struct file_operations hostmixer_fops = {
 	.owner          = THIS_MODULE,
-	.llseek         = no_llseek,
 	.unlocked_ioctl	= hostmixer_ioctl_mixdev,
 	.open           = hostmixer_open_mixdev,
 	.release        = hostmixer_release,
 };
 
-struct {
+static struct {
 	int dev_audio;
 	int dev_mixer;
 } module_data;
@@ -326,10 +321,10 @@ MODULE_LICENSE("GPL");
 
 static int __init hostaudio_init_module(void)
 {
-	__kernel_param_lock();
+	kernel_param_lock(THIS_MODULE);
 	printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n",
 	       dsp, mixer);
-	__kernel_param_unlock();
+	kernel_param_unlock(THIS_MODULE);
 
 	module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1);
 	if (module_data.dev_audio < 0) {
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 8035145f043b..43d8959cc746 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -1,12 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/irqreturn.h>
 #include <linux/kd.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
+
 #include "chan.h"
 #include <irq_kern.h>
 #include <irq_user.h>
@@ -31,7 +32,7 @@ static irqreturn_t line_interrupt(int irq, void *data)
  *
  * Should be called while holding line->lock (this does not modify data).
  */
-static int write_room(struct line *line)
+static unsigned int write_room(struct line *line)
 {
 	int n;
 
@@ -46,11 +47,11 @@ static int write_room(struct line *line)
 	return n - 1;
 }
 
-int line_write_room(struct tty_struct *tty)
+unsigned int line_write_room(struct tty_struct *tty)
 {
 	struct line *line = tty->driver_data;
 	unsigned long flags;
-	int room;
+	unsigned int room;
 
 	spin_lock_irqsave(&line->lock, flags);
 	room = write_room(line);
@@ -59,11 +60,11 @@ int line_write_room(struct tty_struct *tty)
 	return room;
 }
 
-int line_chars_in_buffer(struct tty_struct *tty)
+unsigned int line_chars_in_buffer(struct tty_struct *tty)
 {
 	struct line *line = tty->driver_data;
 	unsigned long flags;
-	int ret;
+	unsigned int ret;
 
 	spin_lock_irqsave(&line->lock, flags);
 	/* write_room subtracts 1 for the needed NULL, so we readd it.*/
@@ -82,7 +83,7 @@ int line_chars_in_buffer(struct tty_struct *tty)
  *
  * Must be called while holding line->lock!
  */
-static int buffer_data(struct line *line, const char *buf, int len)
+static int buffer_data(struct line *line, const u8 *buf, size_t len)
 {
 	int end, room;
 
@@ -138,7 +139,7 @@ static int flush_buffer(struct line *line)
 		count = line->buffer + LINE_BUFSIZE - line->head;
 
 		n = write_chan(line->chan_out, line->head, count,
-			       line->driver->write_irq);
+			       line->write_irq);
 		if (n < 0)
 			return n;
 		if (n == count) {
@@ -155,7 +156,7 @@ static int flush_buffer(struct line *line)
 
 	count = line->tail - line->head;
 	n = write_chan(line->chan_out, line->head, count,
-		       line->driver->write_irq);
+		       line->write_irq);
 
 	if (n < 0)
 		return n;
@@ -183,12 +184,7 @@ void line_flush_chars(struct tty_struct *tty)
 	line_flush_buffer(tty);
 }
 
-int line_put_char(struct tty_struct *tty, unsigned char ch)
-{
-	return line_write(tty, &ch, sizeof(ch));
-}
-
-int line_write(struct tty_struct *tty, const unsigned char *buf, int len)
+ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len)
 {
 	struct line *line = tty->driver_data;
 	unsigned long flags;
@@ -199,7 +195,7 @@ int line_write(struct tty_struct *tty, const unsigned char *buf, int len)
 		ret = buffer_data(line, buf, len);
 	else {
 		n = write_chan(line->chan_out, buf, len,
-			       line->driver->write_irq);
+			       line->write_irq);
 		if (n < 0) {
 			ret = n;
 			goto out_up;
@@ -215,16 +211,11 @@ out_up:
 	return ret;
 }
 
-void line_set_termios(struct tty_struct *tty, struct ktermios * old)
-{
-	/* nothing */
-}
-
 void line_throttle(struct tty_struct *tty)
 {
 	struct line *line = tty->driver_data;
 
-	deactivate_chan(line->chan_in, line->driver->read_irq);
+	deactivate_chan(line->chan_in, line->read_irq);
 	line->throttled = 1;
 }
 
@@ -233,15 +224,7 @@ void line_unthrottle(struct tty_struct *tty)
 	struct line *line = tty->driver_data;
 
 	line->throttled = 0;
-	chan_interrupt(line, line->driver->read_irq);
-
-	/*
-	 * Maybe there is enough stuff pending that calling the interrupt
-	 * throttles us again.  In this case, line->throttled will be 1
-	 * again and we shouldn't turn the interrupt back on.
-	 */
-	if (!line->throttled)
-		reactivate_chan(line->chan_in, line->driver->read_irq);
+	chan_interrupt(line, line->read_irq);
 }
 
 static irqreturn_t line_write_interrupt(int irq, void *data)
@@ -260,7 +243,7 @@ static irqreturn_t line_write_interrupt(int irq, void *data)
 	if (err == 0) {
 		spin_unlock(&line->lock);
 		return IRQ_NONE;
-	} else if (err < 0) {
+	} else if ((err < 0) && (err != -EAGAIN)) {
 		line->head = line->buffer;
 		line->tail = line->buffer;
 	}
@@ -274,19 +257,29 @@ static irqreturn_t line_write_interrupt(int irq, void *data)
 int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
 {
 	const struct line_driver *driver = line->driver;
-	int err = 0;
+	int err;
 
-	if (input)
-		err = um_request_irq(driver->read_irq, fd, IRQ_READ,
-				     line_interrupt, IRQF_SHARED,
+	if (input) {
+		err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_READ,
+				     line_interrupt, 0,
 				     driver->read_irq_name, data);
-	if (err)
-		return err;
-	if (output)
-		err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
-				     line_write_interrupt, IRQF_SHARED,
+		if (err < 0)
+			return err;
+
+		line->read_irq = err;
+	}
+
+	if (output) {
+		err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_WRITE,
+				     line_write_interrupt, 0,
 				     driver->write_irq_name, data);
-	return err;
+		if (err < 0)
+			return err;
+
+		line->write_irq = err;
+	}
+
+	return 0;
 }
 
 static int line_activate(struct tty_port *port, struct tty_struct *tty)
@@ -390,6 +383,7 @@ int setup_one_line(struct line *lines, int n, char *init,
 			parse_chan_pair(NULL, line, n, opts, error_out);
 			err = 0;
 		}
+		*error_out = "configured as 'none'";
 	} else {
 		char *new = kstrdup(init, GFP_KERNEL);
 		if (!new) {
@@ -413,6 +407,7 @@ int setup_one_line(struct line *lines, int n, char *init,
 			}
 		}
 		if (err) {
+			*error_out = "failed to parse channel pair";
 			line->init_str = NULL;
 			line->valid = 0;
 			kfree(new);
@@ -549,12 +544,14 @@ int register_lines(struct line_driver *line_driver,
 		   const struct tty_operations *ops,
 		   struct line *lines, int nlines)
 {
-	struct tty_driver *driver = alloc_tty_driver(nlines);
+	struct tty_driver *driver;
 	int err;
 	int i;
 
-	if (!driver)
-		return -ENOMEM;
+	driver = tty_alloc_driver(nlines, TTY_DRIVER_REAL_RAW |
+			TTY_DRIVER_DYNAMIC_DEV);
+	if (IS_ERR(driver))
+		return PTR_ERR(driver);
 
 	driver->driver_name = line_driver->name;
 	driver->name = line_driver->device_name;
@@ -562,9 +559,8 @@ int register_lines(struct line_driver *line_driver,
 	driver->minor_start = line_driver->minor_start;
 	driver->type = line_driver->type;
 	driver->subtype = line_driver->subtype;
-	driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
 	driver->init_termios = tty_std_termios;
-	
+
 	for (i = 0; i < nlines; i++) {
 		tty_port_init(&lines[i].port);
 		lines[i].port.ops = &line_port_ops;
@@ -578,7 +574,7 @@ int register_lines(struct line_driver *line_driver,
 	if (err) {
 		printk(KERN_ERR "register_lines : can't register %s driver\n",
 		       line_driver->name);
-		put_tty_driver(driver);
+		tty_driver_kref_put(driver);
 		for (i = 0; i < nlines; i++)
 			tty_port_destroy(&lines[i].port);
 		return err;
@@ -620,7 +616,6 @@ static void free_winch(struct winch *winch)
 	winch->fd = -1;
 	if (fd != -1)
 		os_close_file(fd);
-	list_del(&winch->list);
 	__free_winch(&winch->work);
 }
 
@@ -632,18 +627,22 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 	int fd = winch->fd;
 	int err;
 	char c;
+	struct pid *pgrp;
 
 	if (fd != -1) {
 		err = generic_read(fd, &c, NULL);
-		if (err < 0) {
+		/* A read of 2 means the winch thread failed and has warned */
+		if (err < 0 || (err == 1 && c == 2)) {
 			if (err != -EAGAIN) {
 				winch->fd = -1;
 				list_del(&winch->list);
 				os_close_file(fd);
-				printk(KERN_ERR "winch_interrupt : "
-				       "read failed, errno = %d\n", -err);
-				printk(KERN_ERR "fd %d is losing SIGWINCH "
-				       "support\n", winch->tty_fd);
+				if (err < 0) {
+					printk(KERN_ERR "winch_interrupt : read failed, errno = %d\n",
+					       -err);
+					printk(KERN_ERR "fd %d is losing SIGWINCH support\n",
+					       winch->tty_fd);
+				}
 				INIT_WORK(&winch->work, __free_winch);
 				schedule_work(&winch->work);
 				return IRQ_HANDLED;
@@ -657,13 +656,14 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 		if (line != NULL) {
 			chan_window_size(line, &tty->winsize.ws_row,
 					 &tty->winsize.ws_col);
-			kill_pgrp(tty->pgrp, SIGWINCH, 1);
+			pgrp = tty_get_pgrp(tty);
+			if (pgrp)
+				kill_pgrp(pgrp, SIGWINCH, 1);
+			put_pid(pgrp);
 		}
 		tty_kref_put(tty);
 	}
  out:
-	if (winch->fd != -1)
-		reactivate_fd(winch->fd, WINCH_IRQ);
 	return IRQ_HANDLED;
 }
 
@@ -678,24 +678,26 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_port *port,
 		goto cleanup;
 	}
 
-	*winch = ((struct winch) { .list  	= LIST_HEAD_INIT(winch->list),
-				   .fd  	= fd,
+	*winch = ((struct winch) { .fd  	= fd,
 				   .tty_fd 	= tty_fd,
 				   .pid  	= pid,
 				   .port 	= port,
 				   .stack	= stack });
 
+	spin_lock(&winch_handler_lock);
+	list_add(&winch->list, &winch_handlers);
+	spin_unlock(&winch_handler_lock);
+
 	if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt,
 			   IRQF_SHARED, "winch", winch) < 0) {
 		printk(KERN_ERR "register_winch_irq - failed to register "
 		       "IRQ\n");
+		spin_lock(&winch_handler_lock);
+		list_del(&winch->list);
+		spin_unlock(&winch_handler_lock);
 		goto out_free;
 	}
 
-	spin_lock(&winch_handler_lock);
-	list_add(&winch->list, &winch_handlers);
-	spin_unlock(&winch_handler_lock);
-
 	return;
 
  out_free:
@@ -719,6 +721,8 @@ static void unregister_winch(struct tty_struct *tty)
 		winch = list_entry(ele, struct winch, list);
 		wtty = tty_port_tty_get(winch->port);
 		if (wtty == tty) {
+			list_del(&winch->list);
+			spin_unlock(&winch_handler_lock);
 			free_winch(winch);
 			break;
 		}
@@ -729,14 +733,17 @@ static void unregister_winch(struct tty_struct *tty)
 
 static void winch_cleanup(void)
 {
-	struct list_head *ele, *next;
 	struct winch *winch;
 
 	spin_lock(&winch_handler_lock);
+	while ((winch = list_first_entry_or_null(&winch_handlers,
+						 struct winch, list))) {
+		list_del(&winch->list);
+		spin_unlock(&winch_handler_lock);
 
-	list_for_each_safe(ele, next, &winch_handlers) {
-		winch = list_entry(ele, struct winch, list);
 		free_winch(winch);
+
+		spin_lock(&winch_handler_lock);
 	}
 
 	spin_unlock(&winch_handler_lock);
diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h
index 138a14526d9c..e8bd6f3dfb50 100644
--- a/arch/um/drivers/line.h
+++ b/arch/um/drivers/line.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __LINE_H__
@@ -23,9 +23,7 @@ struct line_driver {
 	const short minor_start;
 	const short type;
 	const short subtype;
-	const int read_irq;
 	const char *read_irq_name;
-	const int write_irq;
 	const char *write_irq_name;
 	struct mc_device mc;
 	struct tty_driver *driver;
@@ -35,6 +33,8 @@ struct line {
 	struct tty_port port;
 	int valid;
 
+	int read_irq, write_irq;
+
 	char *init_str;
 	struct list_head chan_list;
 	struct chan *chan_in, *chan_out;
@@ -47,9 +47,9 @@ struct line {
 	 *
 	 * buffer points to a buffer allocated on demand, of length
 	 * LINE_BUFSIZE, head to the start of the ring, tail to the end.*/
-	char *buffer;
-	char *head;
-	char *tail;
+	u8 *buffer;
+	u8 *head;
+	u8 *tail;
 
 	int sigio;
 	struct delayed_work task;
@@ -64,14 +64,11 @@ extern void line_cleanup(struct tty_struct *tty);
 extern void line_hangup(struct tty_struct *tty);
 extern int line_setup(char **conf, unsigned nlines, char **def,
 		      char *init, char *name);
-extern int line_write(struct tty_struct *tty, const unsigned char *buf,
-		      int len);
-extern int line_put_char(struct tty_struct *tty, unsigned char ch);
-extern void line_set_termios(struct tty_struct *tty, struct ktermios * old);
-extern int line_chars_in_buffer(struct tty_struct *tty);
+extern ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len);
+extern unsigned int line_chars_in_buffer(struct tty_struct *tty);
 extern void line_flush_buffer(struct tty_struct *tty);
 extern void line_flush_chars(struct tty_struct *tty);
-extern int line_write_room(struct tty_struct *tty);
+extern unsigned int line_write_room(struct tty_struct *tty);
 extern void line_throttle(struct tty_struct *tty);
 extern void line_unthrottle(struct tty_struct *tty);
 
diff --git a/arch/um/drivers/mconsole.h b/arch/um/drivers/mconsole.h
index 8b22535c62ce..6356378304fd 100644
--- a/arch/um/drivers/mconsole.h
+++ b/arch/um/drivers/mconsole.h
@@ -1,13 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __MCONSOLE_H__
 #define __MCONSOLE_H__
 
-#ifndef __KERNEL__
+#ifdef __UM_HOST__
 #include <stdint.h>
 #define u32 uint32_t
 #endif
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 3df3bd544492..ff4bda95b9c7 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
  * Copyright (C) 2001 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/console.h>
@@ -12,7 +12,9 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
+#include <linux/panic_notifier.h>
 #include <linux/reboot.h>
+#include <linux/sched/debug.h>
 #include <linux/proc_fs.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
@@ -24,7 +26,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/switch_to.h>
 
 #include <init.h>
@@ -35,6 +37,8 @@
 #include "mconsole_kern.h"
 #include <os.h>
 
+static struct vfsmount *proc_mnt = NULL;
+
 static int do_unlink_socket(struct notifier_block *notifier,
 			    unsigned long what, void *data)
 {
@@ -95,7 +99,6 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id)
 	}
 	if (!list_empty(&mc_requests))
 		schedule_work(&mconsole_work);
-	reactivate_fd(fd, MCONSOLE_IRQ);
 	return IRQ_HANDLED;
 }
 
@@ -123,17 +126,22 @@ void mconsole_log(struct mc_request *req)
 
 void mconsole_proc(struct mc_request *req)
 {
-	struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
+	struct vfsmount *mnt = proc_mnt;
 	char *buf;
 	int len;
 	struct file *file;
 	int first_chunk = 1;
 	char *ptr = req->request.data;
+	loff_t pos = 0;
 
 	ptr += strlen("proc");
 	ptr = skip_spaces(ptr);
 
-	file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
+	if (!mnt) {
+		mconsole_reply(req, "Proc not available", 1, 0);
+		goto out;
+	}
+	file = file_open_root_mnt(mnt, ptr, O_RDONLY, 0);
 	if (IS_ERR(file)) {
 		mconsole_reply(req, "Failed to open file", 1, 0);
 		printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file));
@@ -147,12 +155,7 @@ void mconsole_proc(struct mc_request *req)
 	}
 
 	do {
-		loff_t pos = file->f_pos;
-		mm_segment_t old_fs = get_fs();
-		set_fs(KERNEL_DS);
-		len = vfs_read(file, buf, PAGE_SIZE - 1, &pos);
-		set_fs(old_fs);
-		file->f_pos = pos;
+		len = kernel_read(file, buf, PAGE_SIZE - 1, &pos);
 		if (len < 0) {
 			mconsole_reply(req, "Read of file failed", 1, 0);
 			goto out_free;
@@ -221,7 +224,7 @@ void mconsole_go(struct mc_request *req)
 
 void mconsole_stop(struct mc_request *req)
 {
-	deactivate_fd(req->originating_fd, MCONSOLE_IRQ);
+	block_signals();
 	os_set_fd_block(req->originating_fd, 1);
 	mconsole_reply(req, "stopped", 0, 0);
 	for (;;) {
@@ -243,8 +246,8 @@ void mconsole_stop(struct mc_request *req)
 		(*req->cmd->handler)(req);
 	}
 	os_set_fd_block(req->originating_fd, 0);
-	reactivate_fd(req->originating_fd, MCONSOLE_IRQ);
 	mconsole_reply(req, "", 0, 0);
+	unblock_signals();
 }
 
 static DEFINE_SPINLOCK(mc_devices_lock);
@@ -280,7 +283,7 @@ struct unplugged_pages {
 };
 
 static DEFINE_MUTEX(plug_mem_mutex);
-static unsigned long long unplugged_pages_count = 0;
+static unsigned long long unplugged_pages_count;
 static LIST_HEAD(unplugged_pages);
 static int unplug_index = UNPLUGGED_PER_PAGE;
 
@@ -551,7 +554,7 @@ struct mconsole_output {
 
 static DEFINE_SPINLOCK(client_lock);
 static LIST_HEAD(clients);
-static char console_buf[MCONSOLE_MAX_DATA];
+static char console_buf[MCONSOLE_MAX_DATA] __nonstring;
 
 static void console_write(struct console *console, const char *string,
 			  unsigned int len)
@@ -564,7 +567,7 @@ static void console_write(struct console *console, const char *string,
 
 	while (len > 0) {
 		n = min((size_t) len, ARRAY_SIZE(console_buf));
-		strncpy(console_buf, string, n);
+		memcpy(console_buf, string, n);
 		string += n;
 		len -= n;
 
@@ -645,11 +648,9 @@ void mconsole_sysrq(struct mc_request *req)
 
 static void stack_proc(void *arg)
 {
-	struct task_struct *from = current, *to = arg;
+	struct task_struct *task = arg;
 
-	to->thread.saved_task = from;
-	rcu_user_hooks_switch(from, to);
-	switch_to(from, to, from);
+	show_stack(task, NULL, KERN_INFO);
 }
 
 /*
@@ -690,6 +691,24 @@ void mconsole_stack(struct mc_request *req)
 	with_console(req, stack_proc, to);
 }
 
+static int __init mount_proc(void)
+{
+	struct file_system_type *proc_fs_type;
+	struct vfsmount *mnt;
+
+	proc_fs_type = get_fs_type("proc");
+	if (!proc_fs_type)
+		return -ENODEV;
+
+	mnt = kern_mount(proc_fs_type);
+	put_filesystem(proc_fs_type);
+	if (IS_ERR(mnt))
+		return PTR_ERR(mnt);
+
+	proc_mnt = mnt;
+	return 0;
+}
+
 /*
  * Changed by mconsole_setup, which is __setup, and called before SMP is
  * active.
@@ -703,6 +722,8 @@ static int __init mconsole_init(void)
 	int err;
 	char file[UNIX_PATH_MAX];
 
+	mount_proc();
+
 	if (umid_file_name("mconsole", file, sizeof(file)))
 		return -1;
 	snprintf(mconsole_socket_name, sizeof(file), "%s", file);
@@ -719,7 +740,7 @@ static int __init mconsole_init(void)
 
 	err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt,
 			     IRQF_SHARED, "mconsole", (void *)sock);
-	if (err) {
+	if (err < 0) {
 		printk(KERN_ERR "Failed to get IRQ for management console\n");
 		goto out;
 	}
@@ -750,27 +771,18 @@ static ssize_t mconsole_proc_write(struct file *file,
 {
 	char *buf;
 
-	buf = kmalloc(count + 1, GFP_KERNEL);
-	if (buf == NULL)
-		return -ENOMEM;
-
-	if (copy_from_user(buf, buffer, count)) {
-		count = -EFAULT;
-		goto out;
-	}
-
-	buf[count] = '\0';
+	buf = memdup_user_nul(buffer, count);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
 
 	mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count);
- out:
 	kfree(buf);
 	return count;
 }
 
-static const struct file_operations mconsole_proc_fops = {
-	.owner		= THIS_MODULE,
-	.write		= mconsole_proc_write,
-	.llseek		= noop_llseek,
+static const struct proc_ops mconsole_proc_ops = {
+	.proc_write	= mconsole_proc_write,
+	.proc_lseek	= noop_llseek,
 };
 
 static int create_proc_mconsole(void)
@@ -780,7 +792,7 @@ static int create_proc_mconsole(void)
 	if (notify_socket == NULL)
 		return 0;
 
-	ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_fops);
+	ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_ops);
 	if (ent == NULL) {
 		printk(KERN_INFO "create_proc_mconsole : proc_create failed\n");
 		return 0;
@@ -834,13 +846,12 @@ static int notify_panic(struct notifier_block *self, unsigned long unused1,
 
 	mconsole_notify(notify_socket, MCONSOLE_PANIC, message,
 			strlen(message) + 1);
-	return 0;
+	return NOTIFY_DONE;
 }
 
 static struct notifier_block panic_exit_notifier = {
-	.notifier_call 		= notify_panic,
-	.next 			= NULL,
-	.priority 		= 1
+	.notifier_call	= notify_panic,
+	.priority	= INT_MAX, /* run as soon as possible */
 };
 
 static int add_notifier(void)
diff --git a/arch/um/drivers/mconsole_kern.h b/arch/um/drivers/mconsole_kern.h
index 7a0c6a1ad1d4..56d8d6a3ff76 100644
--- a/arch/um/drivers/mconsole_kern.h
+++ b/arch/um/drivers/mconsole_kern.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __MCONSOLE_KERN_H__
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index 99209826adb1..a04cd13c6315 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <errno.h>
@@ -71,7 +71,9 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req)
 	return NULL;
 }
 
+#ifndef MIN
 #define MIN(a,b) ((a)<(b) ? (a):(b))
+#endif
 
 #define STRINGX(x) #x
 #define STRING(x) STRINGX(x)
diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c
deleted file mode 100644
index 62145c276167..000000000000
--- a/arch/um/drivers/mmapper_kern.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * arch/um/drivers/mmapper_kern.c
- *
- * BRIEF MODULE DESCRIPTION
- *
- * Copyright (C) 2000 RidgeRun, Inc.
- * Author: RidgeRun, Inc.
- *         Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com
- *
- */
-
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-
-#include <asm/uaccess.h>
-#include <mem_user.h>
-
-/* These are set in mmapper_init, which is called at boot time */
-static unsigned long mmapper_size;
-static unsigned long p_buf;
-static char *v_buf;
-
-static ssize_t mmapper_read(struct file *file, char __user *buf, size_t count,
-			    loff_t *ppos)
-{
-	return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size);
-}
-
-static ssize_t mmapper_write(struct file *file, const char __user *buf,
-			     size_t count, loff_t *ppos)
-{
-	if (*ppos > mmapper_size)
-		return -EINVAL;
-
-	return simple_write_to_buffer(v_buf, mmapper_size, ppos, buf, count);
-}
-
-static long mmapper_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	return -ENOIOCTLCMD;
-}
-
-static int mmapper_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	int ret = -EINVAL;
-	int size;
-
-	if (vma->vm_pgoff != 0)
-		goto out;
-
-	size = vma->vm_end - vma->vm_start;
-	if (size > mmapper_size)
-		return -EFAULT;
-
-	/*
-	 * XXX A comment above remap_pfn_range says it should only be
-	 * called when the mm semaphore is held
-	 */
-	if (remap_pfn_range(vma, vma->vm_start, p_buf >> PAGE_SHIFT, size,
-			    vma->vm_page_prot))
-		goto out;
-	ret = 0;
-out:
-	return ret;
-}
-
-static int mmapper_open(struct inode *inode, struct file *file)
-{
-	return 0;
-}
-
-static int mmapper_release(struct inode *inode, struct file *file)
-{
-	return 0;
-}
-
-static const struct file_operations mmapper_fops = {
-	.owner		= THIS_MODULE,
-	.read		= mmapper_read,
-	.write		= mmapper_write,
-	.unlocked_ioctl	= mmapper_ioctl,
-	.mmap		= mmapper_mmap,
-	.open		= mmapper_open,
-	.release	= mmapper_release,
-	.llseek		= default_llseek,
-};
-
-/*
- * No locking needed - only used (and modified) by below initcall and exitcall.
- */
-static struct miscdevice mmapper_dev = {
-	.minor		= MISC_DYNAMIC_MINOR,
-	.name		= "mmapper",
-	.fops		= &mmapper_fops
-};
-
-static int __init mmapper_init(void)
-{
-	int err;
-
-	printk(KERN_INFO "Mapper v0.1\n");
-
-	v_buf = (char *) find_iomem("mmapper", &mmapper_size);
-	if (mmapper_size == 0) {
-		printk(KERN_ERR "mmapper_init - find_iomem failed\n");
-		return -ENODEV;
-	}
-	p_buf = __pa(v_buf);
-
-	err = misc_register(&mmapper_dev);
-	if (err) {
-		printk(KERN_ERR "mmapper - misc_register failed, err = %d\n",
-		       err);
-		return err;
-	}
-	return 0;
-}
-
-static void mmapper_exit(void)
-{
-	misc_deregister(&mmapper_dev);
-}
-
-module_init(mmapper_init);
-module_exit(mmapper_exit);
-
-MODULE_AUTHOR("Greg Lonnon <glonnon@ridgerun.com>");
-MODULE_DESCRIPTION("DSPLinux simulator mmapper driver");
-MODULE_LICENSE("GPL");
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
deleted file mode 100644
index 39f186252e02..000000000000
--- a/arch/um/drivers/net_kern.c
+++ /dev/null
@@ -1,915 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
- */
-
-#include <linux/bootmem.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/inetdevice.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/rtnetlink.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <init.h>
-#include <irq_kern.h>
-#include <irq_user.h>
-#include "mconsole_kern.h"
-#include <net_kern.h>
-#include <net_user.h>
-
-#define DRIVER_NAME "uml-netdev"
-
-static DEFINE_SPINLOCK(opened_lock);
-static LIST_HEAD(opened);
-
-/*
- * The drop_skb is used when we can't allocate an skb.  The
- * packet is read into drop_skb in order to get the data off the
- * connection to the host.
- * It is reallocated whenever a maximum packet size is seen which is
- * larger than any seen before.  update_drop_skb is called from
- * eth_configure when a new interface is added.
- */
-static DEFINE_SPINLOCK(drop_lock);
-static struct sk_buff *drop_skb;
-static int drop_max;
-
-static int update_drop_skb(int max)
-{
-	struct sk_buff *new;
-	unsigned long flags;
-	int err = 0;
-
-	spin_lock_irqsave(&drop_lock, flags);
-
-	if (max <= drop_max)
-		goto out;
-
-	err = -ENOMEM;
-	new = dev_alloc_skb(max);
-	if (new == NULL)
-		goto out;
-
-	skb_put(new, max);
-
-	kfree_skb(drop_skb);
-	drop_skb = new;
-	drop_max = max;
-	err = 0;
-out:
-	spin_unlock_irqrestore(&drop_lock, flags);
-
-	return err;
-}
-
-static int uml_net_rx(struct net_device *dev)
-{
-	struct uml_net_private *lp = netdev_priv(dev);
-	int pkt_len;
-	struct sk_buff *skb;
-
-	/* If we can't allocate memory, try again next round. */
-	skb = dev_alloc_skb(lp->max_packet);
-	if (skb == NULL) {
-		drop_skb->dev = dev;
-		/* Read a packet into drop_skb and don't do anything with it. */
-		(*lp->read)(lp->fd, drop_skb, lp);
-		dev->stats.rx_dropped++;
-		return 0;
-	}
-
-	skb->dev = dev;
-	skb_put(skb, lp->max_packet);
-	skb_reset_mac_header(skb);
-	pkt_len = (*lp->read)(lp->fd, skb, lp);
-
-	if (pkt_len > 0) {
-		skb_trim(skb, pkt_len);
-		skb->protocol = (*lp->protocol)(skb);
-
-		dev->stats.rx_bytes += skb->len;
-		dev->stats.rx_packets++;
-		netif_rx(skb);
-		return pkt_len;
-	}
-
-	kfree_skb(skb);
-	return pkt_len;
-}
-
-static void uml_dev_close(struct work_struct *work)
-{
-	struct uml_net_private *lp =
-		container_of(work, struct uml_net_private, work);
-	dev_close(lp->dev);
-}
-
-static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
-{
-	struct net_device *dev = dev_id;
-	struct uml_net_private *lp = netdev_priv(dev);
-	int err;
-
-	if (!netif_running(dev))
-		return IRQ_NONE;
-
-	spin_lock(&lp->lock);
-	while ((err = uml_net_rx(dev)) > 0) ;
-	if (err < 0) {
-		printk(KERN_ERR
-		       "Device '%s' read returned %d, shutting it down\n",
-		       dev->name, err);
-		/* dev_close can't be called in interrupt context, and takes
-		 * again lp->lock.
-		 * And dev_close() can be safely called multiple times on the
-		 * same device, since it tests for (dev->flags & IFF_UP). So
-		 * there's no harm in delaying the device shutdown.
-		 * Furthermore, the workqueue will not re-enqueue an already
-		 * enqueued work item. */
-		schedule_work(&lp->work);
-		goto out;
-	}
-	reactivate_fd(lp->fd, UM_ETH_IRQ);
-
-out:
-	spin_unlock(&lp->lock);
-	return IRQ_HANDLED;
-}
-
-static int uml_net_open(struct net_device *dev)
-{
-	struct uml_net_private *lp = netdev_priv(dev);
-	int err;
-
-	if (lp->fd >= 0) {
-		err = -ENXIO;
-		goto out;
-	}
-
-	lp->fd = (*lp->open)(&lp->user);
-	if (lp->fd < 0) {
-		err = lp->fd;
-		goto out;
-	}
-
-	err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt,
-			     IRQF_SHARED, dev->name, dev);
-	if (err != 0) {
-		printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err);
-		err = -ENETUNREACH;
-		goto out_close;
-	}
-
-	lp->tl.data = (unsigned long) &lp->user;
-	netif_start_queue(dev);
-
-	/* clear buffer - it can happen that the host side of the interface
-	 * is full when we get here.  In this case, new data is never queued,
-	 * SIGIOs never arrive, and the net never works.
-	 */
-	while ((err = uml_net_rx(dev)) > 0) ;
-
-	spin_lock(&opened_lock);
-	list_add(&lp->list, &opened);
-	spin_unlock(&opened_lock);
-
-	return 0;
-out_close:
-	if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user);
-	lp->fd = -1;
-out:
-	return err;
-}
-
-static int uml_net_close(struct net_device *dev)
-{
-	struct uml_net_private *lp = netdev_priv(dev);
-
-	netif_stop_queue(dev);
-
-	um_free_irq(dev->irq, dev);
-	if (lp->close != NULL)
-		(*lp->close)(lp->fd, &lp->user);
-	lp->fd = -1;
-
-	spin_lock(&opened_lock);
-	list_del(&lp->list);
-	spin_unlock(&opened_lock);
-
-	return 0;
-}
-
-static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct uml_net_private *lp = netdev_priv(dev);
-	unsigned long flags;
-	int len;
-
-	netif_stop_queue(dev);
-
-	spin_lock_irqsave(&lp->lock, flags);
-
-	len = (*lp->write)(lp->fd, skb, lp);
-	skb_tx_timestamp(skb);
-
-	if (len == skb->len) {
-		dev->stats.tx_packets++;
-		dev->stats.tx_bytes += skb->len;
-		dev->trans_start = jiffies;
-		netif_start_queue(dev);
-
-		/* this is normally done in the interrupt when tx finishes */
-		netif_wake_queue(dev);
-	}
-	else if (len == 0) {
-		netif_start_queue(dev);
-		dev->stats.tx_dropped++;
-	}
-	else {
-		netif_start_queue(dev);
-		printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len);
-	}
-
-	spin_unlock_irqrestore(&lp->lock, flags);
-
-	dev_kfree_skb(skb);
-
-	return NETDEV_TX_OK;
-}
-
-static void uml_net_set_multicast_list(struct net_device *dev)
-{
-	return;
-}
-
-static void uml_net_tx_timeout(struct net_device *dev)
-{
-	dev->trans_start = jiffies;
-	netif_wake_queue(dev);
-}
-
-static int uml_net_change_mtu(struct net_device *dev, int new_mtu)
-{
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void uml_net_poll_controller(struct net_device *dev)
-{
-	disable_irq(dev->irq);
-	uml_net_interrupt(dev->irq, dev);
-	enable_irq(dev->irq);
-}
-#endif
-
-static void uml_net_get_drvinfo(struct net_device *dev,
-				struct ethtool_drvinfo *info)
-{
-	strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
-	strlcpy(info->version, "42", sizeof(info->version));
-}
-
-static const struct ethtool_ops uml_net_ethtool_ops = {
-	.get_drvinfo	= uml_net_get_drvinfo,
-	.get_link	= ethtool_op_get_link,
-	.get_ts_info	= ethtool_op_get_ts_info,
-};
-
-static void uml_net_user_timer_expire(unsigned long _conn)
-{
-#ifdef undef
-	struct connection *conn = (struct connection *)_conn;
-
-	dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn);
-	do_connect(conn);
-#endif
-}
-
-static void setup_etheraddr(struct net_device *dev, char *str)
-{
-	unsigned char *addr = dev->dev_addr;
-	char *end;
-	int i;
-
-	if (str == NULL)
-		goto random;
-
-	for (i = 0; i < 6; i++) {
-		addr[i] = simple_strtoul(str, &end, 16);
-		if ((end == str) ||
-		   ((*end != ':') && (*end != ',') && (*end != '\0'))) {
-			printk(KERN_ERR
-			       "setup_etheraddr: failed to parse '%s' "
-			       "as an ethernet address\n", str);
-			goto random;
-		}
-		str = end + 1;
-	}
-	if (is_multicast_ether_addr(addr)) {
-		printk(KERN_ERR
-		       "Attempt to assign a multicast ethernet address to a "
-		       "device disallowed\n");
-		goto random;
-	}
-	if (!is_valid_ether_addr(addr)) {
-		printk(KERN_ERR
-		       "Attempt to assign an invalid ethernet address to a "
-		       "device disallowed\n");
-		goto random;
-	}
-	if (!is_local_ether_addr(addr)) {
-		printk(KERN_WARNING
-		       "Warning: Assigning a globally valid ethernet "
-		       "address to a device\n");
-		printk(KERN_WARNING "You should set the 2nd rightmost bit in "
-		       "the first byte of the MAC,\n");
-		printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
-		       addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4],
-		       addr[5]);
-	}
-	return;
-
-random:
-	printk(KERN_INFO
-	       "Choosing a random ethernet address for device %s\n", dev->name);
-	eth_hw_addr_random(dev);
-}
-
-static DEFINE_SPINLOCK(devices_lock);
-static LIST_HEAD(devices);
-
-static struct platform_driver uml_net_driver = {
-	.driver = {
-		.name  = DRIVER_NAME,
-	},
-};
-
-static void net_device_release(struct device *dev)
-{
-	struct uml_net *device = dev_get_drvdata(dev);
-	struct net_device *netdev = device->dev;
-	struct uml_net_private *lp = netdev_priv(netdev);
-
-	if (lp->remove != NULL)
-		(*lp->remove)(&lp->user);
-	list_del(&device->list);
-	kfree(device);
-	free_netdev(netdev);
-}
-
-static const struct net_device_ops uml_netdev_ops = {
-	.ndo_open 		= uml_net_open,
-	.ndo_stop 		= uml_net_close,
-	.ndo_start_xmit 	= uml_net_start_xmit,
-	.ndo_set_rx_mode	= uml_net_set_multicast_list,
-	.ndo_tx_timeout 	= uml_net_tx_timeout,
-	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu 	= uml_net_change_mtu,
-	.ndo_validate_addr	= eth_validate_addr,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = uml_net_poll_controller,
-#endif
-};
-
-/*
- * Ensures that platform_driver_register is called only once by
- * eth_configure.  Will be set in an initcall.
- */
-static int driver_registered;
-
-static void eth_configure(int n, void *init, char *mac,
-			  struct transport *transport)
-{
-	struct uml_net *device;
-	struct net_device *dev;
-	struct uml_net_private *lp;
-	int err, size;
-
-	size = transport->private_size + sizeof(struct uml_net_private);
-
-	device = kzalloc(sizeof(*device), GFP_KERNEL);
-	if (device == NULL) {
-		printk(KERN_ERR "eth_configure failed to allocate struct "
-		       "uml_net\n");
-		return;
-	}
-
-	dev = alloc_etherdev(size);
-	if (dev == NULL) {
-		printk(KERN_ERR "eth_configure: failed to allocate struct "
-		       "net_device for eth%d\n", n);
-		goto out_free_device;
-	}
-
-	INIT_LIST_HEAD(&device->list);
-	device->index = n;
-
-	/* If this name ends up conflicting with an existing registered
-	 * netdevice, that is OK, register_netdev{,ice}() will notice this
-	 * and fail.
-	 */
-	snprintf(dev->name, sizeof(dev->name), "eth%d", n);
-
-	setup_etheraddr(dev, mac);
-
-	printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
-
-	lp = netdev_priv(dev);
-	/* This points to the transport private data. It's still clear, but we
-	 * must memset it to 0 *now*. Let's help the drivers. */
-	memset(lp, 0, size);
-	INIT_WORK(&lp->work, uml_dev_close);
-
-	/* sysfs register */
-	if (!driver_registered) {
-		platform_driver_register(&uml_net_driver);
-		driver_registered = 1;
-	}
-	device->pdev.id = n;
-	device->pdev.name = DRIVER_NAME;
-	device->pdev.dev.release = net_device_release;
-	dev_set_drvdata(&device->pdev.dev, device);
-	if (platform_device_register(&device->pdev))
-		goto out_free_netdev;
-	SET_NETDEV_DEV(dev,&device->pdev.dev);
-
-	device->dev = dev;
-
-	/*
-	 * These just fill in a data structure, so there's no failure
-	 * to be worried about.
-	 */
-	(*transport->kern->init)(dev, init);
-
-	*lp = ((struct uml_net_private)
-		{ .list  		= LIST_HEAD_INIT(lp->list),
-		  .dev 			= dev,
-		  .fd 			= -1,
-		  .mac 			= { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0},
-		  .max_packet		= transport->user->max_packet,
-		  .protocol 		= transport->kern->protocol,
-		  .open 		= transport->user->open,
-		  .close 		= transport->user->close,
-		  .remove 		= transport->user->remove,
-		  .read 		= transport->kern->read,
-		  .write 		= transport->kern->write,
-		  .add_address 		= transport->user->add_address,
-		  .delete_address  	= transport->user->delete_address });
-
-	init_timer(&lp->tl);
-	spin_lock_init(&lp->lock);
-	lp->tl.function = uml_net_user_timer_expire;
-	memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac));
-
-	if ((transport->user->init != NULL) &&
-	    ((*transport->user->init)(&lp->user, dev) != 0))
-		goto out_unregister;
-
-	dev->mtu = transport->user->mtu;
-	dev->netdev_ops = &uml_netdev_ops;
-	dev->ethtool_ops = &uml_net_ethtool_ops;
-	dev->watchdog_timeo = (HZ >> 1);
-	dev->irq = UM_ETH_IRQ;
-
-	err = update_drop_skb(lp->max_packet);
-	if (err)
-		goto out_undo_user_init;
-
-	rtnl_lock();
-	err = register_netdevice(dev);
-	rtnl_unlock();
-	if (err)
-		goto out_undo_user_init;
-
-	spin_lock(&devices_lock);
-	list_add(&device->list, &devices);
-	spin_unlock(&devices_lock);
-
-	return;
-
-out_undo_user_init:
-	if (transport->user->remove != NULL)
-		(*transport->user->remove)(&lp->user);
-out_unregister:
-	platform_device_unregister(&device->pdev);
-	return; /* platform_device_unregister frees dev and device */
-out_free_netdev:
-	free_netdev(dev);
-out_free_device:
-	kfree(device);
-}
-
-static struct uml_net *find_device(int n)
-{
-	struct uml_net *device;
-	struct list_head *ele;
-
-	spin_lock(&devices_lock);
-	list_for_each(ele, &devices) {
-		device = list_entry(ele, struct uml_net, list);
-		if (device->index == n)
-			goto out;
-	}
-	device = NULL;
- out:
-	spin_unlock(&devices_lock);
-	return device;
-}
-
-static int eth_parse(char *str, int *index_out, char **str_out,
-		     char **error_out)
-{
-	char *end;
-	int n, err = -EINVAL;
-
-	n = simple_strtoul(str, &end, 0);
-	if (end == str) {
-		*error_out = "Bad device number";
-		return err;
-	}
-
-	str = end;
-	if (*str != '=') {
-		*error_out = "Expected '=' after device number";
-		return err;
-	}
-
-	str++;
-	if (find_device(n)) {
-		*error_out = "Device already configured";
-		return err;
-	}
-
-	*index_out = n;
-	*str_out = str;
-	return 0;
-}
-
-struct eth_init {
-	struct list_head list;
-	char *init;
-	int index;
-};
-
-static DEFINE_SPINLOCK(transports_lock);
-static LIST_HEAD(transports);
-
-/* Filled in during early boot */
-static LIST_HEAD(eth_cmd_line);
-
-static int check_transport(struct transport *transport, char *eth, int n,
-			   void **init_out, char **mac_out)
-{
-	int len;
-
-	len = strlen(transport->name);
-	if (strncmp(eth, transport->name, len))
-		return 0;
-
-	eth += len;
-	if (*eth == ',')
-		eth++;
-	else if (*eth != '\0')
-		return 0;
-
-	*init_out = kmalloc(transport->setup_size, GFP_KERNEL);
-	if (*init_out == NULL)
-		return 1;
-
-	if (!transport->setup(eth, mac_out, *init_out)) {
-		kfree(*init_out);
-		*init_out = NULL;
-	}
-	return 1;
-}
-
-void register_transport(struct transport *new)
-{
-	struct list_head *ele, *next;
-	struct eth_init *eth;
-	void *init;
-	char *mac = NULL;
-	int match;
-
-	spin_lock(&transports_lock);
-	BUG_ON(!list_empty(&new->list));
-	list_add(&new->list, &transports);
-	spin_unlock(&transports_lock);
-
-	list_for_each_safe(ele, next, &eth_cmd_line) {
-		eth = list_entry(ele, struct eth_init, list);
-		match = check_transport(new, eth->init, eth->index, &init,
-					&mac);
-		if (!match)
-			continue;
-		else if (init != NULL) {
-			eth_configure(eth->index, init, mac, new);
-			kfree(init);
-		}
-		list_del(&eth->list);
-	}
-}
-
-static int eth_setup_common(char *str, int index)
-{
-	struct list_head *ele;
-	struct transport *transport;
-	void *init;
-	char *mac = NULL;
-	int found = 0;
-
-	spin_lock(&transports_lock);
-	list_for_each(ele, &transports) {
-		transport = list_entry(ele, struct transport, list);
-	        if (!check_transport(transport, str, index, &init, &mac))
-			continue;
-		if (init != NULL) {
-			eth_configure(index, init, mac, transport);
-			kfree(init);
-		}
-		found = 1;
-		break;
-	}
-
-	spin_unlock(&transports_lock);
-	return found;
-}
-
-static int __init eth_setup(char *str)
-{
-	struct eth_init *new;
-	char *error;
-	int n, err;
-
-	err = eth_parse(str, &n, &str, &error);
-	if (err) {
-		printk(KERN_ERR "eth_setup - Couldn't parse '%s' : %s\n",
-		       str, error);
-		return 1;
-	}
-
-	new = alloc_bootmem(sizeof(*new));
-	if (new == NULL) {
-		printk(KERN_ERR "eth_init : alloc_bootmem failed\n");
-		return 1;
-	}
-
-	INIT_LIST_HEAD(&new->list);
-	new->index = n;
-	new->init = str;
-
-	list_add_tail(&new->list, &eth_cmd_line);
-	return 1;
-}
-
-__setup("eth", eth_setup);
-__uml_help(eth_setup,
-"eth[0-9]+=<transport>,<options>\n"
-"    Configure a network device.\n\n"
-);
-
-static int net_config(char *str, char **error_out)
-{
-	int n, err;
-
-	err = eth_parse(str, &n, &str, error_out);
-	if (err)
-		return err;
-
-	/* This string is broken up and the pieces used by the underlying
-	 * driver.  So, it is freed only if eth_setup_common fails.
-	 */
-	str = kstrdup(str, GFP_KERNEL);
-	if (str == NULL) {
-	        *error_out = "net_config failed to strdup string";
-		return -ENOMEM;
-	}
-	err = !eth_setup_common(str, n);
-	if (err)
-		kfree(str);
-	return err;
-}
-
-static int net_id(char **str, int *start_out, int *end_out)
-{
-	char *end;
-	int n;
-
-	n = simple_strtoul(*str, &end, 0);
-	if ((*end != '\0') || (end == *str))
-		return -1;
-
-	*start_out = n;
-	*end_out = n;
-	*str = end;
-	return n;
-}
-
-static int net_remove(int n, char **error_out)
-{
-	struct uml_net *device;
-	struct net_device *dev;
-	struct uml_net_private *lp;
-
-	device = find_device(n);
-	if (device == NULL)
-		return -ENODEV;
-
-	dev = device->dev;
-	lp = netdev_priv(dev);
-	if (lp->fd > 0)
-		return -EBUSY;
-	unregister_netdev(dev);
-	platform_device_unregister(&device->pdev);
-
-	return 0;
-}
-
-static struct mc_device net_mc = {
-	.list		= LIST_HEAD_INIT(net_mc.list),
-	.name		= "eth",
-	.config		= net_config,
-	.get_config	= NULL,
-	.id		= net_id,
-	.remove		= net_remove,
-};
-
-#ifdef CONFIG_INET
-static int uml_inetaddr_event(struct notifier_block *this, unsigned long event,
-			      void *ptr)
-{
-	struct in_ifaddr *ifa = ptr;
-	struct net_device *dev = ifa->ifa_dev->dev;
-	struct uml_net_private *lp;
-	void (*proc)(unsigned char *, unsigned char *, void *);
-	unsigned char addr_buf[4], netmask_buf[4];
-
-	if (dev->netdev_ops->ndo_open != uml_net_open)
-		return NOTIFY_DONE;
-
-	lp = netdev_priv(dev);
-
-	proc = NULL;
-	switch (event) {
-	case NETDEV_UP:
-		proc = lp->add_address;
-		break;
-	case NETDEV_DOWN:
-		proc = lp->delete_address;
-		break;
-	}
-	if (proc != NULL) {
-		memcpy(addr_buf, &ifa->ifa_address, sizeof(addr_buf));
-		memcpy(netmask_buf, &ifa->ifa_mask, sizeof(netmask_buf));
-		(*proc)(addr_buf, netmask_buf, &lp->user);
-	}
-	return NOTIFY_DONE;
-}
-
-/* uml_net_init shouldn't be called twice on two CPUs at the same time */
-static struct notifier_block uml_inetaddr_notifier = {
-	.notifier_call		= uml_inetaddr_event,
-};
-
-static void inet_register(void)
-{
-	struct list_head *ele;
-	struct uml_net_private *lp;
-	struct in_device *ip;
-	struct in_ifaddr *in;
-
-	register_inetaddr_notifier(&uml_inetaddr_notifier);
-
-	/* Devices may have been opened already, so the uml_inetaddr_notifier
-	 * didn't get a chance to run for them.  This fakes it so that
-	 * addresses which have already been set up get handled properly.
-	 */
-	spin_lock(&opened_lock);
-	list_for_each(ele, &opened) {
-		lp = list_entry(ele, struct uml_net_private, list);
-		ip = lp->dev->ip_ptr;
-		if (ip == NULL)
-			continue;
-		in = ip->ifa_list;
-		while (in != NULL) {
-			uml_inetaddr_event(NULL, NETDEV_UP, in);
-			in = in->ifa_next;
-		}
-	}
-	spin_unlock(&opened_lock);
-}
-#else
-static inline void inet_register(void)
-{
-}
-#endif
-
-static int uml_net_init(void)
-{
-	mconsole_register_dev(&net_mc);
-	inet_register();
-	return 0;
-}
-
-__initcall(uml_net_init);
-
-static void close_devices(void)
-{
-	struct list_head *ele;
-	struct uml_net_private *lp;
-
-	spin_lock(&opened_lock);
-	list_for_each(ele, &opened) {
-		lp = list_entry(ele, struct uml_net_private, list);
-		um_free_irq(lp->dev->irq, lp->dev);
-		if ((lp->close != NULL) && (lp->fd >= 0))
-			(*lp->close)(lp->fd, &lp->user);
-		if (lp->remove != NULL)
-			(*lp->remove)(&lp->user);
-	}
-	spin_unlock(&opened_lock);
-}
-
-__uml_exitcall(close_devices);
-
-void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *,
-					void *),
-		    void *arg)
-{
-	struct net_device *dev = d;
-	struct in_device *ip = dev->ip_ptr;
-	struct in_ifaddr *in;
-	unsigned char address[4], netmask[4];
-
-	if (ip == NULL) return;
-	in = ip->ifa_list;
-	while (in != NULL) {
-		memcpy(address, &in->ifa_address, sizeof(address));
-		memcpy(netmask, &in->ifa_mask, sizeof(netmask));
-		(*cb)(address, netmask, arg);
-		in = in->ifa_next;
-	}
-}
-
-int dev_netmask(void *d, void *m)
-{
-	struct net_device *dev = d;
-	struct in_device *ip = dev->ip_ptr;
-	struct in_ifaddr *in;
-	__be32 *mask_out = m;
-
-	if (ip == NULL)
-		return 1;
-
-	in = ip->ifa_list;
-	if (in == NULL)
-		return 1;
-
-	*mask_out = in->ifa_mask;
-	return 0;
-}
-
-void *get_output_buffer(int *len_out)
-{
-	void *ret;
-
-	ret = (void *) __get_free_pages(GFP_KERNEL, 0);
-	if (ret) *len_out = PAGE_SIZE;
-	else *len_out = 0;
-	return ret;
-}
-
-void free_output_buffer(void *buffer)
-{
-	free_pages((unsigned long) buffer, 0);
-}
-
-int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out,
-		     char **gate_addr)
-{
-	char *remain;
-
-	remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL);
-	if (remain != NULL) {
-		printk(KERN_ERR "tap_setup_common - Extra garbage on "
-		       "specification : '%s'\n", remain);
-		return 1;
-	}
-
-	return 0;
-}
-
-unsigned short eth_protocol(struct sk_buff *skb)
-{
-	return eth_type_trans(skb, skb->dev);
-}
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
deleted file mode 100644
index cd14157b556d..000000000000
--- a/arch/um/drivers/net_user.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <net_user.h>
-#include <os.h>
-#include <um_malloc.h>
-
-int tap_open_common(void *dev, char *gate_addr)
-{
-	int tap_addr[4];
-
-	if (gate_addr == NULL)
-		return 0;
-	if (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
-		  &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4) {
-		printk(UM_KERN_ERR "Invalid tap IP address - '%s'\n",
-		       gate_addr);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-void tap_check_ips(char *gate_addr, unsigned char *eth_addr)
-{
-	int tap_addr[4];
-
-	if ((gate_addr != NULL) &&
-	    (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
-		    &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) &&
-	    (eth_addr[0] == tap_addr[0]) &&
-	    (eth_addr[1] == tap_addr[1]) &&
-	    (eth_addr[2] == tap_addr[2]) &&
-	    (eth_addr[3] == tap_addr[3])) {
-		printk(UM_KERN_ERR "The tap IP address and the UML eth IP "
-		       "address must be different\n");
-	}
-}
-
-/* Do reliable error handling as this fails frequently enough. */
-void read_output(int fd, char *output, int len)
-{
-	int remain, ret, expected;
-	char c;
-	char *str;
-
-	if (output == NULL) {
-		output = &c;
-		len = sizeof(c);
-	}
-
-	*output = '\0';
-	ret = read(fd, &remain, sizeof(remain));
-
-	if (ret != sizeof(remain)) {
-		if (ret < 0)
-			ret = -errno;
-		expected = sizeof(remain);
-		str = "length";
-		goto err;
-	}
-
-	while (remain != 0) {
-		expected = (remain < len) ? remain : len;
-		ret = read(fd, output, expected);
-		if (ret != expected) {
-			if (ret < 0)
-				ret = -errno;
-			str = "data";
-			goto err;
-		}
-		remain -= ret;
-	}
-
-	return;
-
-err:
-	if (ret < 0)
-		printk(UM_KERN_ERR "read_output - read of %s failed, "
-		       "errno = %d\n", str, -ret);
-	else
-		printk(UM_KERN_ERR "read_output - read of %s failed, read only "
-		       "%d of %d bytes\n", str, ret, expected);
-}
-
-int net_read(int fd, void *buf, int len)
-{
-	int n;
-
-	n = read(fd,  buf,  len);
-
-	if ((n < 0) && (errno == EAGAIN))
-		return 0;
-	else if (n == 0)
-		return -ENOTCONN;
-	return n;
-}
-
-int net_recvfrom(int fd, void *buf, int len)
-{
-	int n;
-
-	CATCH_EINTR(n = recvfrom(fd,  buf,  len, 0, NULL, NULL));
-	if (n < 0) {
-		if (errno == EAGAIN)
-			return 0;
-		return -errno;
-	}
-	else if (n == 0)
-		return -ENOTCONN;
-	return n;
-}
-
-int net_write(int fd, void *buf, int len)
-{
-	int n;
-
-	n = write(fd, buf, len);
-
-	if ((n < 0) && (errno == EAGAIN))
-		return 0;
-	else if (n == 0)
-		return -ENOTCONN;
-	return n;
-}
-
-int net_send(int fd, void *buf, int len)
-{
-	int n;
-
-	CATCH_EINTR(n = send(fd, buf, len, 0));
-	if (n < 0) {
-		if (errno == EAGAIN)
-			return 0;
-		return -errno;
-	}
-	else if (n == 0)
-		return -ENOTCONN;
-	return n;
-}
-
-int net_sendto(int fd, void *buf, int len, void *to, int sock_len)
-{
-	int n;
-
-	CATCH_EINTR(n = sendto(fd, buf, len, 0, (struct sockaddr *) to,
-			       sock_len));
-	if (n < 0) {
-		if (errno == EAGAIN)
-			return 0;
-		return -errno;
-	}
-	else if (n == 0)
-		return -ENOTCONN;
-	return n;
-}
-
-struct change_pre_exec_data {
-	int close_me;
-	int stdout;
-};
-
-static void change_pre_exec(void *arg)
-{
-	struct change_pre_exec_data *data = arg;
-
-	close(data->close_me);
-	dup2(data->stdout, 1);
-}
-
-static int change_tramp(char **argv, char *output, int output_len)
-{
-	int pid, fds[2], err;
-	struct change_pre_exec_data pe_data;
-
-	err = os_pipe(fds, 1, 0);
-	if (err < 0) {
-		printk(UM_KERN_ERR "change_tramp - pipe failed, err = %d\n",
-		       -err);
-		return err;
-	}
-	pe_data.close_me = fds[0];
-	pe_data.stdout = fds[1];
-	pid = run_helper(change_pre_exec, &pe_data, argv);
-
-	if (pid > 0)	/* Avoid hang as we won't get data in failure case. */
-		read_output(fds[0], output, output_len);
-
-	close(fds[0]);
-	close(fds[1]);
-
-	if (pid > 0)
-		helper_wait(pid);
-	return pid;
-}
-
-static void change(char *dev, char *what, unsigned char *addr,
-		   unsigned char *netmask)
-{
-	char addr_buf[sizeof("255.255.255.255\0")];
-	char netmask_buf[sizeof("255.255.255.255\0")];
-	char version[sizeof("nnnnn\0")];
-	char *argv[] = { "uml_net", version, what, dev, addr_buf,
-			 netmask_buf, NULL };
-	char *output;
-	int output_len, pid;
-
-	sprintf(version, "%d", UML_NET_VERSION);
-	sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
-	sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1],
-		netmask[2], netmask[3]);
-
-	output_len = UM_KERN_PAGE_SIZE;
-	output = uml_kmalloc(output_len, UM_GFP_KERNEL);
-	if (output == NULL)
-		printk(UM_KERN_ERR "change : failed to allocate output "
-		       "buffer\n");
-
-	pid = change_tramp(argv, output, output_len);
-	if (pid < 0) {
-		kfree(output);
-		return;
-	}
-
-	if (output != NULL) {
-		printk("%s", output);
-		kfree(output);
-	}
-}
-
-void open_addr(unsigned char *addr, unsigned char *netmask, void *arg)
-{
-	change(arg, "add", addr, netmask);
-}
-
-void close_addr(unsigned char *addr, unsigned char *netmask, void *arg)
-{
-	change(arg, "del", addr, netmask);
-}
-
-char *split_if_spec(char *str, ...)
-{
-	char **arg, *end;
-	va_list ap;
-
-	va_start(ap, str);
-	while ((arg = va_arg(ap, char **)) != NULL) {
-		if (*str == '\0')
-			return NULL;
-		end = strchr(str, ',');
-		if (end != str)
-			*arg = str;
-		if (end == NULL)
-			return NULL;
-		*end++ = '\0';
-		str = end;
-	}
-	va_end(ap);
-	return str;
-}
diff --git a/arch/um/drivers/null.c b/arch/um/drivers/null.c
index 10495747ce8e..30d59b8481b4 100644
--- a/arch/um/drivers/null.c
+++ b/arch/um/drivers/null.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
@@ -28,7 +28,7 @@ static int null_open(int input, int output, int primary, void *d,
 	return (fd < 0) ? -errno : fd;
 }
 
-static int null_read(int fd, char *c_out, void *unused)
+static int null_read(int fd, __u8 *c_out, void *unused)
 {
 	return -ENODEV;
 }
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
deleted file mode 100644
index be0fb57bd1d7..000000000000
--- a/arch/um/drivers/pcap_kern.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include "pcap_user.h"
-
-struct pcap_init {
-	char *host_if;
-	int promisc;
-	int optimize;
-	char *filter;
-};
-
-void pcap_init(struct net_device *dev, void *data)
-{
-	struct uml_net_private *pri;
-	struct pcap_data *ppri;
-	struct pcap_init *init = data;
-
-	pri = netdev_priv(dev);
-	ppri = (struct pcap_data *) pri->user;
-	ppri->host_if = init->host_if;
-	ppri->promisc = init->promisc;
-	ppri->optimize = init->optimize;
-	ppri->filter = init->filter;
-
-	printk("pcap backend, host interface %s\n", ppri->host_if);
-}
-
-static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return pcap_user_read(fd, skb_mac_header(skb),
-			      skb->dev->mtu + ETH_HEADER_OTHER,
-			      (struct pcap_data *) &lp->user);
-}
-
-static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return -EPERM;
-}
-
-static const struct net_kern_info pcap_kern_info = {
-	.init			= pcap_init,
-	.protocol		= eth_protocol,
-	.read			= pcap_read,
-	.write			= pcap_write,
-};
-
-int pcap_setup(char *str, char **mac_out, void *data)
-{
-	struct pcap_init *init = data;
-	char *remain, *host_if = NULL, *options[2] = { NULL, NULL };
-	int i;
-
-	*init = ((struct pcap_init)
-		{ .host_if 	= "eth0",
-		  .promisc 	= 1,
-		  .optimize 	= 0,
-		  .filter 	= NULL });
-
-	remain = split_if_spec(str, &host_if, &init->filter,
-			       &options[0], &options[1], mac_out, NULL);
-	if (remain != NULL) {
-		printk(KERN_ERR "pcap_setup - Extra garbage on "
-		       "specification : '%s'\n", remain);
-		return 0;
-	}
-
-	if (host_if != NULL)
-		init->host_if = host_if;
-
-	for (i = 0; i < ARRAY_SIZE(options); i++) {
-		if (options[i] == NULL)
-			continue;
-		if (!strcmp(options[i], "promisc"))
-			init->promisc = 1;
-		else if (!strcmp(options[i], "nopromisc"))
-			init->promisc = 0;
-		else if (!strcmp(options[i], "optimize"))
-			init->optimize = 1;
-		else if (!strcmp(options[i], "nooptimize"))
-			init->optimize = 0;
-		else {
-			printk(KERN_ERR "pcap_setup : bad option - '%s'\n",
-			       options[i]);
-			return 0;
-		}
-	}
-
-	return 1;
-}
-
-static struct transport pcap_transport = {
-	.list 		= LIST_HEAD_INIT(pcap_transport.list),
-	.name 		= "pcap",
-	.setup  	= pcap_setup,
-	.user 		= &pcap_user_info,
-	.kern 		= &pcap_kern_info,
-	.private_size 	= sizeof(struct pcap_data),
-	.setup_size 	= sizeof(struct pcap_init),
-};
-
-static int register_pcap(void)
-{
-	register_transport(&pcap_transport);
-	return 0;
-}
-
-late_initcall(register_pcap);
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
deleted file mode 100644
index c07b9c752c86..000000000000
--- a/arch/um/drivers/pcap_user.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
- */
-
-#include <errno.h>
-#include <pcap.h>
-#include <string.h>
-#include <asm/types.h>
-#include <net_user.h>
-#include "pcap_user.h"
-#include <um_malloc.h>
-
-#define PCAP_FD(p) (*(int *)(p))
-
-static int pcap_user_init(void *data, void *dev)
-{
-	struct pcap_data *pri = data;
-	pcap_t *p;
-	char errors[PCAP_ERRBUF_SIZE];
-
-	p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER,
-			   pri->promisc, 0, errors);
-	if (p == NULL) {
-		printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - "
-		       "'%s'\n", errors);
-		return -EINVAL;
-	}
-
-	pri->dev = dev;
-	pri->pcap = p;
-	return 0;
-}
-
-static int pcap_open(void *data)
-{
-	struct pcap_data *pri = data;
-	__u32 netmask;
-	int err;
-
-	if (pri->pcap == NULL)
-		return -ENODEV;
-
-	if (pri->filter != NULL) {
-		err = dev_netmask(pri->dev, &netmask);
-		if (err < 0) {
-			printk(UM_KERN_ERR "pcap_open : dev_netmask failed\n");
-			return -EIO;
-		}
-
-		pri->compiled = uml_kmalloc(sizeof(struct bpf_program),
-					UM_GFP_KERNEL);
-		if (pri->compiled == NULL) {
-			printk(UM_KERN_ERR "pcap_open : kmalloc failed\n");
-			return -ENOMEM;
-		}
-
-		err = pcap_compile(pri->pcap,
-				   (struct bpf_program *) pri->compiled,
-				   pri->filter, pri->optimize, netmask);
-		if (err < 0) {
-			printk(UM_KERN_ERR "pcap_open : pcap_compile failed - "
-			       "'%s'\n", pcap_geterr(pri->pcap));
-			goto out;
-		}
-
-		err = pcap_setfilter(pri->pcap, pri->compiled);
-		if (err < 0) {
-			printk(UM_KERN_ERR "pcap_open : pcap_setfilter "
-			       "failed - '%s'\n", pcap_geterr(pri->pcap));
-			goto out;
-		}
-	}
-
-	return PCAP_FD(pri->pcap);
-
- out:
-	kfree(pri->compiled);
-	return -EIO;
-}
-
-static void pcap_remove(void *data)
-{
-	struct pcap_data *pri = data;
-
-	if (pri->compiled != NULL)
-		pcap_freecode(pri->compiled);
-
-	if (pri->pcap != NULL)
-		pcap_close(pri->pcap);
-}
-
-struct pcap_handler_data {
-	char *buffer;
-	int len;
-};
-
-static void handler(u_char *data, const struct pcap_pkthdr *header,
-		    const u_char *packet)
-{
-	int len;
-
-	struct pcap_handler_data *hdata = (struct pcap_handler_data *) data;
-
-	len = hdata->len < header->caplen ? hdata->len : header->caplen;
-	memcpy(hdata->buffer, packet, len);
-	hdata->len = len;
-}
-
-int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri)
-{
-	struct pcap_handler_data hdata = ((struct pcap_handler_data)
-		                          { .buffer  	= buffer,
-					    .len 	= len });
-	int n;
-
-	n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata);
-	if (n < 0) {
-		printk(UM_KERN_ERR "pcap_dispatch failed - %s\n",
-		       pcap_geterr(pri->pcap));
-		return -EIO;
-	}
-	else if (n == 0)
-		return 0;
-	return hdata.len;
-}
-
-const struct net_user_info pcap_user_info = {
-	.init		= pcap_user_init,
-	.open		= pcap_open,
-	.close	 	= NULL,
-	.remove	 	= pcap_remove,
-	.add_address	= NULL,
-	.delete_address = NULL,
-	.mtu		= ETH_MAX_PACKET,
-	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h
deleted file mode 100644
index 1ca7c764cc63..000000000000
--- a/arch/um/drivers/pcap_user.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <net_user.h>
-
-struct pcap_data {
-	char *host_if;
-	int promisc;
-	int optimize;
-	char *filter;
-	void *compiled;
-	void *pcap;
-	void *dev;
-};
-
-extern const struct net_user_info pcap_user_info;
-
-extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri);
-
diff --git a/arch/um/drivers/port.h b/arch/um/drivers/port.h
index 372a80c0556a..9085b336e683 100644
--- a/arch/um/drivers/port.h
+++ b/arch/um/drivers/port.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __PORT_H__
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 40ca5cc275e9..a4508470df78 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <linux/completion.h>
@@ -45,15 +45,17 @@ struct connection {
 static irqreturn_t pipe_interrupt(int irq, void *data)
 {
 	struct connection *conn = data;
-	int fd;
+	int n_fds = 1, fd = -1;
+	ssize_t ret;
 
-	fd = os_rcv_fd(conn->socket[0], &conn->helper_pid);
-	if (fd < 0) {
-		if (fd == -EAGAIN)
+	ret = os_rcv_fd_msg(conn->socket[0], &fd, n_fds, &conn->helper_pid,
+			    sizeof(conn->helper_pid));
+	if (ret != sizeof(conn->helper_pid)) {
+		if (ret == -EAGAIN)
 			return IRQ_NONE;
 
-		printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n",
-		       -fd);
+		printk(KERN_ERR "pipe_interrupt : os_rcv_fd_msg returned %zd\n",
+		       ret);
 		os_close_file(conn->fd);
 	}
 
@@ -100,7 +102,7 @@ static int port_accept(struct port_list *port)
 		  .port 	= port });
 
 	if (um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt,
-			  IRQF_SHARED, "telnetd", conn)) {
+			  IRQF_SHARED, "telnetd", conn) < 0) {
 		printk(KERN_ERR "port_accept : failed to get IRQ for "
 		       "telnetd\n");
 		goto out_free;
@@ -137,7 +139,6 @@ static void port_work_proc(struct work_struct *unused)
 		if (!port->has_connection)
 			continue;
 
-		reactivate_fd(port->fd, ACCEPT_IRQ);
 		while (port_accept(port))
 			;
 		port->has_connection = 0;
@@ -145,7 +146,7 @@ static void port_work_proc(struct work_struct *unused)
 	local_irq_restore(flags);
 }
 
-DECLARE_WORK(port_work, port_work_proc);
+static DECLARE_WORK(port_work, port_work_proc);
 
 static irqreturn_t port_interrupt(int irq, void *data)
 {
@@ -183,7 +184,7 @@ void *port_data(int port_num)
 	}
 
 	if (um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt,
-			  IRQF_SHARED, "port", port)) {
+			  IRQF_SHARED, "port", port) < 0) {
 		printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num);
 		goto out_close;
 	}
diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
index 9a8e1b64c22e..3c62ae81df62 100644
--- a/arch/um/drivers/port_user.c
+++ b/arch/um/drivers/port_user.c
@@ -1,10 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <errno.h>
 #include <termios.h>
 #include <unistd.h>
@@ -167,14 +168,29 @@ static void port_pre_exec(void *arg)
 int port_connection(int fd, int *socket, int *pid_out)
 {
 	int new, err;
-	char *argv[] = { "/usr/sbin/in.telnetd", "-L",
-			 "/usr/lib/uml/port-helper", NULL };
+	char *env;
+	char *argv[] = { "in.telnetd", "-L",
+			 OS_LIB_PATH "/uml/port-helper", NULL };
 	struct port_pre_exec_data data;
 
+	if ((env = getenv("UML_PORT_HELPER")))
+		argv[2] = env;
+
 	new = accept(fd, NULL, 0);
 	if (new < 0)
 		return -errno;
 
+	err = os_access(argv[2], X_OK);
+	if (err < 0) {
+		printk(UM_KERN_ERR "port_connection : error accessing port-helper "
+		       "executable at %s: %s\n", argv[2], strerror(-err));
+		if (env == NULL)
+			printk(UM_KERN_ERR "Set UML_PORT_HELPER environment "
+				"variable to path to uml-utilities port-helper "
+				"binary\n");
+		goto out_close;
+	}
+
 	err = os_pipe(socket, 0, 0);
 	if (err < 0)
 		goto out_close;
diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c
index f1fcc2cedb5e..39c60068cfdf 100644
--- a/arch/um/drivers/pty.c
+++ b/arch/um/drivers/pty.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 9e3a72205827..ca08c91f47a3 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -6,113 +6,58 @@
  * This software may be used and distributed according to the terms
  * of the GNU General Public License, incorporated herein by reference.
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/interrupt.h>
 #include <linux/miscdevice.h>
+#include <linux/hw_random.h>
 #include <linux/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <init.h>
 #include <irq_kern.h>
 #include <os.h>
 
 /*
- * core module and version information
+ * core module information
  */
-#define RNG_VERSION "1.0.0"
 #define RNG_MODULE_NAME "hw_random"
 
-#define RNG_MISCDEV_MINOR		183 /* official */
-
 /* Changed at init time, in the non-modular case, and at module load
  * time, in the module case.  Presumably, the module subsystem
  * protects against a module being loaded twice at the same time.
  */
 static int random_fd = -1;
-static DECLARE_WAIT_QUEUE_HEAD(host_read_wait);
-
-static int rng_dev_open (struct inode *inode, struct file *filp)
-{
-	/* enforce read-only access to this chrdev */
-	if ((filp->f_mode & FMODE_READ) == 0)
-		return -EINVAL;
-	if ((filp->f_mode & FMODE_WRITE) != 0)
-		return -EINVAL;
-
-	return 0;
-}
+static struct hwrng hwrng;
+static DECLARE_COMPLETION(have_data);
 
-static atomic_t host_sleep_count = ATOMIC_INIT(0);
-
-static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
-			     loff_t *offp)
+static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block)
 {
-	u32 data;
-	int n, ret = 0, have_data;
-
-	while (size) {
-		n = os_read_file(random_fd, &data, sizeof(data));
-		if (n > 0) {
-			have_data = n;
-			while (have_data && size) {
-				if (put_user((u8) data, buf++)) {
-					ret = ret ? : -EFAULT;
-					break;
-				}
-				size--;
-				ret++;
-				have_data--;
-				data >>= 8;
-			}
-		}
-		else if (n == -EAGAIN) {
-			DECLARE_WAITQUEUE(wait, current);
-
-			if (filp->f_flags & O_NONBLOCK)
-				return ret ? : -EAGAIN;
+	int ret;
 
-			atomic_inc(&host_sleep_count);
-			reactivate_fd(random_fd, RANDOM_IRQ);
+	for (;;) {
+		ret = os_read_file(random_fd, buf, max);
+		if (block && ret == -EAGAIN) {
 			add_sigio_fd(random_fd);
 
-			add_wait_queue(&host_read_wait, &wait);
-			set_task_state(current, TASK_INTERRUPTIBLE);
+			ret = wait_for_completion_killable(&have_data);
 
-			schedule();
-			set_task_state(current, TASK_RUNNING);
-			remove_wait_queue(&host_read_wait, &wait);
+			ignore_sigio_fd(random_fd);
+			deactivate_fd(random_fd, RANDOM_IRQ);
 
-			if (atomic_dec_and_test(&host_sleep_count)) {
-				ignore_sigio_fd(random_fd);
-				deactivate_fd(random_fd, RANDOM_IRQ);
-			}
+			if (ret < 0)
+				break;
+		} else {
+			break;
 		}
-		else
-			return n;
-
-		if (signal_pending (current))
-			return ret ? : -ERESTARTSYS;
 	}
-	return ret;
-}
 
-static const struct file_operations rng_chrdev_ops = {
-	.owner		= THIS_MODULE,
-	.open		= rng_dev_open,
-	.read		= rng_dev_read,
-	.llseek		= noop_llseek,
-};
-
-/* rng_init shouldn't be called more than once at boot time */
-static struct miscdevice rng_miscdev = {
-	RNG_MISCDEV_MINOR,
-	RNG_MODULE_NAME,
-	&rng_chrdev_ops,
-};
+	return ret != -EAGAIN ? ret : 0;
+}
 
 static irqreturn_t random_interrupt(int irq, void *data)
 {
-	wake_up(&host_read_wait);
+	complete(&have_data);
 
 	return IRQ_HANDLED;
 }
@@ -129,18 +74,18 @@ static int __init rng_init (void)
 		goto out;
 
 	random_fd = err;
-
 	err = um_request_irq(RANDOM_IRQ, random_fd, IRQ_READ, random_interrupt,
 			     0, "random", NULL);
-	if (err)
+	if (err < 0)
 		goto err_out_cleanup_hw;
 
-	sigio_broken(random_fd, 1);
+	sigio_broken();
+	hwrng.name = RNG_MODULE_NAME;
+	hwrng.read = rng_dev_read;
 
-	err = misc_register (&rng_miscdev);
+	err = hwrng_register(&hwrng);
 	if (err) {
-		printk (KERN_ERR RNG_MODULE_NAME ": misc device register "
-			"failed\n");
+		pr_err(RNG_MODULE_NAME " registering failed (%d)\n", err);
 		goto err_out_cleanup_hw;
 	}
 out:
@@ -155,14 +100,22 @@ err_out_cleanup_hw:
 /*
  * rng_cleanup - shutdown RNG module
  */
-static void __exit rng_cleanup (void)
+
+static void cleanup(void)
+{
+	free_irq_by_fd(random_fd);
+	os_close_file(random_fd);
+}
+
+static void __exit rng_cleanup(void)
 {
+	hwrng_unregister(&hwrng);
 	os_close_file(random_fd);
-	misc_deregister (&rng_miscdev);
 }
 
 module_init (rng_init);
 module_exit (rng_cleanup);
+__uml_exitcall(cleanup);
 
 MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver");
 MODULE_LICENSE("GPL");
diff --git a/arch/um/drivers/rtc.h b/arch/um/drivers/rtc.h
new file mode 100644
index 000000000000..95e41c7d35c4
--- /dev/null
+++ b/arch/um/drivers/rtc.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#ifndef __UM_RTC_H__
+#define __UM_RTC_H__
+
+int uml_rtc_start(bool timetravel);
+int uml_rtc_enable_alarm(unsigned long long delta_seconds);
+void uml_rtc_disable_alarm(void);
+void uml_rtc_stop(bool timetravel);
+void uml_rtc_send_timetravel_alarm(void);
+
+#endif /* __UM_RTC_H__ */
diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c
new file mode 100644
index 000000000000..9158c936c128
--- /dev/null
+++ b/arch/um/drivers/rtc_kern.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <linux/platform_device.h>
+#include <linux/time-internal.h>
+#include <linux/suspend.h>
+#include <linux/err.h>
+#include <linux/rtc.h>
+#include <kern_util.h>
+#include <irq_kern.h>
+#include <os.h>
+#include "rtc.h"
+
+static time64_t uml_rtc_alarm_time;
+static bool uml_rtc_alarm_enabled;
+static struct rtc_device *uml_rtc;
+static int uml_rtc_irq_fd, uml_rtc_irq;
+
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+
+static void uml_rtc_time_travel_alarm(struct time_travel_event *ev)
+{
+	uml_rtc_send_timetravel_alarm();
+}
+
+static struct time_travel_event uml_rtc_alarm_event = {
+	.fn = uml_rtc_time_travel_alarm,
+};
+#endif
+
+static int uml_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct timespec64 ts;
+
+	/* Use this to get correct time in time-travel mode */
+	read_persistent_clock64(&ts);
+	rtc_time64_to_tm(timespec64_to_ktime(ts) / NSEC_PER_SEC, tm);
+
+	return 0;
+}
+
+static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	rtc_time64_to_tm(uml_rtc_alarm_time, &alrm->time);
+	alrm->enabled = uml_rtc_alarm_enabled;
+
+	return 0;
+}
+
+static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
+{
+	struct timespec64 ts;
+	unsigned long long secs;
+
+	if (!enable && !uml_rtc_alarm_enabled)
+		return 0;
+
+	uml_rtc_alarm_enabled = enable;
+
+	read_persistent_clock64(&ts);
+	secs = uml_rtc_alarm_time - ts.tv_sec;
+
+	if (time_travel_mode == TT_MODE_OFF) {
+		if (!enable) {
+			uml_rtc_disable_alarm();
+			return 0;
+		}
+
+		/* enable or update */
+		return uml_rtc_enable_alarm(secs);
+	} else {
+		time_travel_del_event(&uml_rtc_alarm_event);
+
+		if (enable)
+			time_travel_add_event_rel(&uml_rtc_alarm_event,
+						  secs * NSEC_PER_SEC -
+						  ts.tv_nsec);
+	}
+
+	return 0;
+}
+
+static int uml_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	uml_rtc_alarm_irq_enable(dev, 0);
+	uml_rtc_alarm_time = rtc_tm_to_time64(&alrm->time);
+	uml_rtc_alarm_irq_enable(dev, alrm->enabled);
+
+	return 0;
+}
+
+static const struct rtc_class_ops uml_rtc_ops = {
+	.read_time = uml_rtc_read_time,
+	.read_alarm = uml_rtc_read_alarm,
+	.alarm_irq_enable = uml_rtc_alarm_irq_enable,
+	.set_alarm = uml_rtc_set_alarm,
+};
+
+static irqreturn_t uml_rtc_interrupt(int irq, void *data)
+{
+	unsigned long long c = 0;
+
+	/* alarm triggered, it's now off */
+	uml_rtc_alarm_enabled = false;
+
+	os_read_file(uml_rtc_irq_fd, &c, sizeof(c));
+	WARN_ON(c == 0);
+
+	pm_system_wakeup();
+	rtc_update_irq(uml_rtc, 1, RTC_IRQF | RTC_AF);
+
+	return IRQ_HANDLED;
+}
+
+static int uml_rtc_setup(void)
+{
+	int err;
+
+	err = uml_rtc_start(time_travel_mode != TT_MODE_OFF);
+	if (WARN(err < 0, "err = %d\n", err))
+		return err;
+
+	uml_rtc_irq_fd = err;
+
+	err = um_request_irq(UM_IRQ_ALLOC, uml_rtc_irq_fd, IRQ_READ,
+			     uml_rtc_interrupt, 0, "rtc", NULL);
+	if (err < 0) {
+		uml_rtc_stop(time_travel_mode != TT_MODE_OFF);
+		return err;
+	}
+
+	irq_set_irq_wake(err, 1);
+
+	uml_rtc_irq = err;
+	return 0;
+}
+
+static void uml_rtc_cleanup(void)
+{
+	um_free_irq(uml_rtc_irq, NULL);
+	uml_rtc_stop(time_travel_mode != TT_MODE_OFF);
+}
+
+static int uml_rtc_probe(struct platform_device *pdev)
+{
+	int err;
+
+	err = uml_rtc_setup();
+	if (err)
+		return err;
+
+	uml_rtc = devm_rtc_allocate_device(&pdev->dev);
+	if (IS_ERR(uml_rtc)) {
+		err = PTR_ERR(uml_rtc);
+		goto cleanup;
+	}
+
+	uml_rtc->ops = &uml_rtc_ops;
+
+	device_init_wakeup(&pdev->dev, 1);
+
+	err = devm_rtc_register_device(uml_rtc);
+	if (err)
+		goto cleanup;
+
+	return 0;
+cleanup:
+	uml_rtc_cleanup();
+	return err;
+}
+
+static void uml_rtc_remove(struct platform_device *pdev)
+{
+	device_init_wakeup(&pdev->dev, 0);
+	uml_rtc_cleanup();
+}
+
+static struct platform_driver uml_rtc_driver = {
+	.probe = uml_rtc_probe,
+	.remove = uml_rtc_remove,
+	.driver = {
+		.name = "uml-rtc",
+	},
+};
+
+static int __init uml_rtc_init(void)
+{
+	struct platform_device *pdev;
+	int err;
+
+	err = platform_driver_register(&uml_rtc_driver);
+	if (err)
+		return err;
+
+	pdev = platform_device_alloc("uml-rtc", 0);
+	if (!pdev) {
+		err = -ENOMEM;
+		goto unregister;
+	}
+
+	err = platform_device_add(pdev);
+	if (err)
+		goto unregister;
+	return 0;
+
+unregister:
+	platform_device_put(pdev);
+	platform_driver_unregister(&uml_rtc_driver);
+	return err;
+}
+device_initcall(uml_rtc_init);
diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c
new file mode 100644
index 000000000000..67912fcf7b28
--- /dev/null
+++ b/arch/um/drivers/rtc_user.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <stdbool.h>
+#include <os.h>
+#include <errno.h>
+#include <sched.h>
+#include <unistd.h>
+#include <kern_util.h>
+#include <sys/select.h>
+#include <stdio.h>
+#include <sys/timerfd.h>
+#include "rtc.h"
+
+static int uml_rtc_irq_fds[2];
+
+void uml_rtc_send_timetravel_alarm(void)
+{
+	unsigned long long c = 1;
+
+	CATCH_EINTR(write(uml_rtc_irq_fds[1], &c, sizeof(c)));
+}
+
+int uml_rtc_start(bool timetravel)
+{
+	int err;
+
+	if (timetravel) {
+		err = os_pipe(uml_rtc_irq_fds, 1, 1);
+		if (err)
+			goto fail;
+	} else {
+		uml_rtc_irq_fds[0] = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC);
+		if (uml_rtc_irq_fds[0] < 0) {
+			err = -errno;
+			goto fail;
+		}
+
+		/* apparently timerfd won't send SIGIO, use workaround */
+		sigio_broken();
+		err = add_sigio_fd(uml_rtc_irq_fds[0]);
+		if (err < 0) {
+			close(uml_rtc_irq_fds[0]);
+			goto fail;
+		}
+	}
+
+	return uml_rtc_irq_fds[0];
+fail:
+	uml_rtc_stop(timetravel);
+	return err;
+}
+
+int uml_rtc_enable_alarm(unsigned long long delta_seconds)
+{
+	struct itimerspec it = {
+		.it_value = {
+			.tv_sec = delta_seconds,
+		},
+	};
+
+	if (timerfd_settime(uml_rtc_irq_fds[0], 0, &it, NULL))
+		return -errno;
+	return 0;
+}
+
+void uml_rtc_disable_alarm(void)
+{
+	uml_rtc_enable_alarm(0);
+}
+
+void uml_rtc_stop(bool timetravel)
+{
+	if (timetravel)
+		os_close_file(uml_rtc_irq_fds[1]);
+	else
+		ignore_sigio_fd(uml_rtc_irq_fds[0]);
+	os_close_file(uml_rtc_irq_fds[0]);
+}
diff --git a/arch/um/drivers/slip.h b/arch/um/drivers/slip.h
deleted file mode 100644
index c64f8c61d274..000000000000
--- a/arch/um/drivers/slip.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __UM_SLIP_H
-#define __UM_SLIP_H
-
-#include "slip_common.h"
-
-struct slip_data {
-	void *dev;
-	char name[sizeof("slnnnnn\0")];
-	char *addr;
-	char *gate_addr;
-	int slave;
-	struct slip_proto slip;
-};
-
-extern const struct net_user_info slip_user_info;
-
-extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri);
-extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri);
-
-#endif
diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c
deleted file mode 100644
index f597fa7c91d3..000000000000
--- a/arch/um/drivers/slip_common.c
+++ /dev/null
@@ -1,54 +0,0 @@
-#include <string.h>
-#include "slip_common.h"
-#include <net_user.h>
-
-int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip)
-{
-	int i, n, size, start;
-
-	if(slip->more > 0){
-		i = 0;
-		while(i < slip->more){
-			size = slip_unesc(slip->ibuf[i++], slip->ibuf,
-					  &slip->pos, &slip->esc);
-			if(size){
-				memcpy(buf, slip->ibuf, size);
-				memmove(slip->ibuf, &slip->ibuf[i],
-					slip->more - i);
-				slip->more = slip->more - i;
-				return size;
-			}
-		}
-		slip->more = 0;
-	}
-
-	n = net_read(fd, &slip->ibuf[slip->pos],
-		     sizeof(slip->ibuf) - slip->pos);
-	if(n <= 0)
-		return n;
-
-	start = slip->pos;
-	for(i = 0; i < n; i++){
-		size = slip_unesc(slip->ibuf[start + i], slip->ibuf,&slip->pos,
-				  &slip->esc);
-		if(size){
-			memcpy(buf, slip->ibuf, size);
-			memmove(slip->ibuf, &slip->ibuf[start+i+1],
-				n - (i + 1));
-			slip->more = n - (i + 1);
-			return size;
-		}
-	}
-	return 0;
-}
-
-int slip_proto_write(int fd, void *buf, int len, struct slip_proto *slip)
-{
-	int actual, n;
-
-	actual = slip_esc(buf, slip->obuf, len);
-	n = net_write(fd, slip->obuf, actual);
-	if(n < 0)
-		return n;
-	else return len;
-}
diff --git a/arch/um/drivers/slip_common.h b/arch/um/drivers/slip_common.h
deleted file mode 100644
index d574e0a9dc13..000000000000
--- a/arch/um/drivers/slip_common.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef __UM_SLIP_COMMON_H
-#define __UM_SLIP_COMMON_H
-
-#define BUF_SIZE 1500
- /* two bytes each for a (pathological) max packet of escaped chars +  *
-  * terminating END char + initial END char                            */
-#define ENC_BUF_SIZE (2 * BUF_SIZE + 2)
-
-/* SLIP protocol characters. */
-#define SLIP_END             0300	/* indicates end of frame	*/
-#define SLIP_ESC             0333	/* indicates byte stuffing	*/
-#define SLIP_ESC_END         0334	/* ESC ESC_END means END 'data'	*/
-#define SLIP_ESC_ESC         0335	/* ESC ESC_ESC means ESC 'data'	*/
-
-static inline int slip_unesc(unsigned char c, unsigned char *buf, int *pos,
-                             int *esc)
-{
-	int ret;
-
-	switch(c){
-	case SLIP_END:
-		*esc = 0;
-		ret=*pos;
-		*pos=0;
-		return(ret);
-	case SLIP_ESC:
-		*esc = 1;
-		return(0);
-	case SLIP_ESC_ESC:
-		if(*esc){
-			*esc = 0;
-			c = SLIP_ESC;
-		}
-		break;
-	case SLIP_ESC_END:
-		if(*esc){
-			*esc = 0;
-			c = SLIP_END;
-		}
-		break;
-	}
-	buf[(*pos)++] = c;
-	return(0);
-}
-
-static inline int slip_esc(unsigned char *s, unsigned char *d, int len)
-{
-	unsigned char *ptr = d;
-	unsigned char c;
-
-	/*
-	 * Send an initial END character to flush out any
-	 * data that may have accumulated in the receiver
-	 * due to line noise.
-	 */
-
-	*ptr++ = SLIP_END;
-
-	/*
-	 * For each byte in the packet, send the appropriate
-	 * character sequence, according to the SLIP protocol.
-	 */
-
-	while (len-- > 0) {
-		switch(c = *s++) {
-		case SLIP_END:
-			*ptr++ = SLIP_ESC;
-			*ptr++ = SLIP_ESC_END;
-			break;
-		case SLIP_ESC:
-			*ptr++ = SLIP_ESC;
-			*ptr++ = SLIP_ESC_ESC;
-			break;
-		default:
-			*ptr++ = c;
-			break;
-		}
-	}
-	*ptr++ = SLIP_END;
-	return (ptr - d);
-}
-
-struct slip_proto {
-	unsigned char ibuf[ENC_BUF_SIZE];
-	unsigned char obuf[ENC_BUF_SIZE];
-	int more; /* more data: do not read fd until ibuf has been drained */
-	int pos;
-	int esc;
-};
-
-static inline void slip_proto_init(struct slip_proto * slip)
-{
-	memset(slip->ibuf, 0, sizeof(slip->ibuf));
-	memset(slip->obuf, 0, sizeof(slip->obuf));
-	slip->more = 0;
-	slip->pos = 0;
-	slip->esc = 0;
-}
-
-extern int slip_proto_read(int fd, void *buf, int len,
-			   struct slip_proto *slip);
-extern int slip_proto_write(int fd, void *buf, int len,
-			    struct slip_proto *slip);
-
-#endif
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
deleted file mode 100644
index ed5249fc0574..000000000000
--- a/arch/um/drivers/slip_kern.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
- */
-
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include "slip.h"
-
-struct slip_init {
-	char *gate_addr;
-};
-
-static void slip_init(struct net_device *dev, void *data)
-{
-	struct uml_net_private *private;
-	struct slip_data *spri;
-	struct slip_init *init = data;
-
-	private = netdev_priv(dev);
-	spri = (struct slip_data *) private->user;
-
-	memset(spri->name, 0, sizeof(spri->name));
-	spri->addr = NULL;
-	spri->gate_addr = init->gate_addr;
-	spri->slave = -1;
-	spri->dev = dev;
-
-	slip_proto_init(&spri->slip);
-
-	dev->hard_header_len = 0;
-	dev->header_ops = NULL;
-	dev->addr_len = 0;
-	dev->type = ARPHRD_SLIP;
-	dev->tx_queue_len = 256;
-	dev->flags = IFF_NOARP;
-	printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr);
-}
-
-static unsigned short slip_protocol(struct sk_buff *skbuff)
-{
-	return htons(ETH_P_IP);
-}
-
-static int slip_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return slip_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
-			      (struct slip_data *) &lp->user);
-}
-
-static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return slip_user_write(fd, skb->data, skb->len,
-			       (struct slip_data *) &lp->user);
-}
-
-static const struct net_kern_info slip_kern_info = {
-	.init			= slip_init,
-	.protocol		= slip_protocol,
-	.read			= slip_read,
-	.write			= slip_write,
-};
-
-static int slip_setup(char *str, char **mac_out, void *data)
-{
-	struct slip_init *init = data;
-
-	*init = ((struct slip_init) { .gate_addr = NULL });
-
-	if (str[0] != '\0')
-		init->gate_addr = str;
-	return 1;
-}
-
-static struct transport slip_transport = {
-	.list 		= LIST_HEAD_INIT(slip_transport.list),
-	.name 		= "slip",
-	.setup  	= slip_setup,
-	.user 		= &slip_user_info,
-	.kern 		= &slip_kern_info,
-	.private_size 	= sizeof(struct slip_data),
-	.setup_size 	= sizeof(struct slip_init),
-};
-
-static int register_slip(void)
-{
-	register_transport(&slip_transport);
-	return 0;
-}
-
-late_initcall(register_slip);
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
deleted file mode 100644
index 55c290d925f3..000000000000
--- a/arch/um/drivers/slip_user.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <string.h>
-#include <sys/termios.h>
-#include <sys/wait.h>
-#include <net_user.h>
-#include <os.h>
-#include "slip.h"
-#include <um_malloc.h>
-
-static int slip_user_init(void *data, void *dev)
-{
-	struct slip_data *pri = data;
-
-	pri->dev = dev;
-	return 0;
-}
-
-static int set_up_tty(int fd)
-{
-	int i;
-	struct termios tios;
-
-	if (tcgetattr(fd, &tios) < 0) {
-		printk(UM_KERN_ERR "could not get initial terminal "
-		       "attributes\n");
-		return -1;
-	}
-
-	tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL;
-	tios.c_iflag = IGNBRK | IGNPAR;
-	tios.c_oflag = 0;
-	tios.c_lflag = 0;
-	for (i = 0; i < NCCS; i++)
-		tios.c_cc[i] = 0;
-	tios.c_cc[VMIN] = 1;
-	tios.c_cc[VTIME] = 0;
-
-	cfsetospeed(&tios, B38400);
-	cfsetispeed(&tios, B38400);
-
-	if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) {
-		printk(UM_KERN_ERR "failed to set terminal attributes\n");
-		return -1;
-	}
-	return 0;
-}
-
-struct slip_pre_exec_data {
-	int stdin;
-	int stdout;
-	int close_me;
-};
-
-static void slip_pre_exec(void *arg)
-{
-	struct slip_pre_exec_data *data = arg;
-
-	if (data->stdin >= 0)
-		dup2(data->stdin, 0);
-	dup2(data->stdout, 1);
-	if (data->close_me >= 0)
-		close(data->close_me);
-}
-
-static int slip_tramp(char **argv, int fd)
-{
-	struct slip_pre_exec_data pe_data;
-	char *output;
-	int pid, fds[2], err, output_len;
-
-	err = os_pipe(fds, 1, 0);
-	if (err < 0) {
-		printk(UM_KERN_ERR "slip_tramp : pipe failed, err = %d\n",
-		       -err);
-		goto out;
-	}
-
-	err = 0;
-	pe_data.stdin = fd;
-	pe_data.stdout = fds[1];
-	pe_data.close_me = fds[0];
-	err = run_helper(slip_pre_exec, &pe_data, argv);
-	if (err < 0)
-		goto out_close;
-	pid = err;
-
-	output_len = UM_KERN_PAGE_SIZE;
-	output = uml_kmalloc(output_len, UM_GFP_KERNEL);
-	if (output == NULL) {
-		printk(UM_KERN_ERR "slip_tramp : failed to allocate output "
-		       "buffer\n");
-		os_kill_process(pid, 1);
-		err = -ENOMEM;
-		goto out_close;
-	}
-
-	close(fds[1]);
-	read_output(fds[0], output, output_len);
-	printk("%s", output);
-
-	err = helper_wait(pid);
-	close(fds[0]);
-
-	kfree(output);
-	return err;
-
-out_close:
-	close(fds[0]);
-	close(fds[1]);
-out:
-	return err;
-}
-
-static int slip_open(void *data)
-{
-	struct slip_data *pri = data;
-	char version_buf[sizeof("nnnnn\0")];
-	char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
-	char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf,
-			 NULL };
-	int sfd, mfd, err;
-
-	err = get_pty();
-	if (err < 0) {
-		printk(UM_KERN_ERR "slip-open : Failed to open pty, err = %d\n",
-		       -err);
-		goto out;
-	}
-	mfd = err;
-
-	err = open(ptsname(mfd), O_RDWR, 0);
-	if (err < 0) {
-		printk(UM_KERN_ERR "Couldn't open tty for slip line, "
-		       "err = %d\n", -err);
-		goto out_close;
-	}
-	sfd = err;
-
-	if (set_up_tty(sfd))
-		goto out_close2;
-
-	pri->slave = sfd;
-	pri->slip.pos = 0;
-	pri->slip.esc = 0;
-	if (pri->gate_addr != NULL) {
-		sprintf(version_buf, "%d", UML_NET_VERSION);
-		strcpy(gate_buf, pri->gate_addr);
-
-		err = slip_tramp(argv, sfd);
-
-		if (err < 0) {
-			printk(UM_KERN_ERR "slip_tramp failed - err = %d\n",
-			       -err);
-			goto out_close2;
-		}
-		err = os_get_ifname(pri->slave, pri->name);
-		if (err < 0) {
-			printk(UM_KERN_ERR "get_ifname failed, err = %d\n",
-			       -err);
-			goto out_close2;
-		}
-		iter_addresses(pri->dev, open_addr, pri->name);
-	}
-	else {
-		err = os_set_slip(sfd);
-		if (err < 0) {
-			printk(UM_KERN_ERR "Failed to set slip discipline "
-			       "encapsulation - err = %d\n", -err);
-			goto out_close2;
-		}
-	}
-	return mfd;
-out_close2:
-	close(sfd);
-out_close:
-	close(mfd);
-out:
-	return err;
-}
-
-static void slip_close(int fd, void *data)
-{
-	struct slip_data *pri = data;
-	char version_buf[sizeof("nnnnn\0")];
-	char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name,
-			 NULL };
-	int err;
-
-	if (pri->gate_addr != NULL)
-		iter_addresses(pri->dev, close_addr, pri->name);
-
-	sprintf(version_buf, "%d", UML_NET_VERSION);
-
-	err = slip_tramp(argv, pri->slave);
-
-	if (err != 0)
-		printk(UM_KERN_ERR "slip_tramp failed - errno = %d\n", -err);
-	close(fd);
-	close(pri->slave);
-	pri->slave = -1;
-}
-
-int slip_user_read(int fd, void *buf, int len, struct slip_data *pri)
-{
-	return slip_proto_read(fd, buf, len, &pri->slip);
-}
-
-int slip_user_write(int fd, void *buf, int len, struct slip_data *pri)
-{
-	return slip_proto_write(fd, buf, len, &pri->slip);
-}
-
-static void slip_add_addr(unsigned char *addr, unsigned char *netmask,
-			  void *data)
-{
-	struct slip_data *pri = data;
-
-	if (pri->slave < 0)
-		return;
-	open_addr(addr, netmask, pri->name);
-}
-
-static void slip_del_addr(unsigned char *addr, unsigned char *netmask,
-			    void *data)
-{
-	struct slip_data *pri = data;
-
-	if (pri->slave < 0)
-		return;
-	close_addr(addr, netmask, pri->name);
-}
-
-const struct net_user_info slip_user_info = {
-	.init		= slip_user_init,
-	.open		= slip_open,
-	.close	 	= slip_close,
-	.remove	 	= NULL,
-	.add_address	= slip_add_addr,
-	.delete_address = slip_del_addr,
-	.mtu		= BUF_SIZE,
-	.max_packet	= BUF_SIZE,
-};
diff --git a/arch/um/drivers/slirp.h b/arch/um/drivers/slirp.h
deleted file mode 100644
index 89ccf83b7577..000000000000
--- a/arch/um/drivers/slirp.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __UM_SLIRP_H
-#define __UM_SLIRP_H
-
-#include "slip_common.h"
-
-#define SLIRP_MAX_ARGS 100
-/*
- * XXX this next definition is here because I don't understand why this
- * initializer doesn't work in slirp_kern.c:
- *
- *   argv :  { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] },
- *
- * or why I can't typecast like this:
- *
- *   argv :  (char* [SLIRP_MAX_ARGS])(init->argv), 
- */
-struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; };
-
-struct slirp_data {
-	void *dev;
-	struct arg_list_dummy_wrapper argw;
-	int pid;
-	int slave;
-	struct slip_proto slip;
-};
-
-extern const struct net_user_info slirp_user_info;
-
-extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri);
-extern int slirp_user_write(int fd, void *buf, int len,
-			    struct slirp_data *pri);
-
-#endif
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
deleted file mode 100644
index 4ef11ca7cacf..000000000000
--- a/arch/um/drivers/slirp_kern.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
- */
-
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/string.h>
-#include <net_kern.h>
-#include <net_user.h>
-#include "slirp.h"
-
-struct slirp_init {
-	struct arg_list_dummy_wrapper argw;  /* XXX should be simpler... */
-};
-
-void slirp_init(struct net_device *dev, void *data)
-{
-	struct uml_net_private *private;
-	struct slirp_data *spri;
-	struct slirp_init *init = data;
-	int i;
-
-	private = netdev_priv(dev);
-	spri = (struct slirp_data *) private->user;
-
-	spri->argw = init->argw;
-	spri->pid = -1;
-	spri->slave = -1;
-	spri->dev = dev;
-
-	slip_proto_init(&spri->slip);
-
-	dev->hard_header_len = 0;
-	dev->header_ops = NULL;
-	dev->addr_len = 0;
-	dev->type = ARPHRD_SLIP;
-	dev->tx_queue_len = 256;
-	dev->flags = IFF_NOARP;
-	printk("SLIRP backend - command line:");
-	for (i = 0; spri->argw.argv[i] != NULL; i++)
-		printk(" '%s'",spri->argw.argv[i]);
-	printk("\n");
-}
-
-static unsigned short slirp_protocol(struct sk_buff *skbuff)
-{
-	return htons(ETH_P_IP);
-}
-
-static int slirp_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return slirp_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
-			       (struct slirp_data *) &lp->user);
-}
-
-static int slirp_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return slirp_user_write(fd, skb->data, skb->len,
-				(struct slirp_data *) &lp->user);
-}
-
-const struct net_kern_info slirp_kern_info = {
-	.init			= slirp_init,
-	.protocol		= slirp_protocol,
-	.read			= slirp_read,
-	.write			= slirp_write,
-};
-
-static int slirp_setup(char *str, char **mac_out, void *data)
-{
-	struct slirp_init *init = data;
-	int i=0;
-
-	*init = ((struct slirp_init) { .argw = { { "slirp", NULL  } } });
-
-	str = split_if_spec(str, mac_out, NULL);
-
-	if (str == NULL) /* no command line given after MAC addr */
-		return 1;
-
-	do {
-		if (i >= SLIRP_MAX_ARGS - 1) {
-			printk(KERN_WARNING "slirp_setup: truncating slirp "
-			       "arguments\n");
-			break;
-		}
-		init->argw.argv[i++] = str;
-		while(*str && *str!=',') {
-			if (*str == '_')
-				*str=' ';
-			str++;
-		}
-		if (*str != ',')
-			break;
-		*str++ = '\0';
-	} while (1);
-
-	init->argw.argv[i] = NULL;
-	return 1;
-}
-
-static struct transport slirp_transport = {
-	.list 		= LIST_HEAD_INIT(slirp_transport.list),
-	.name 		= "slirp",
-	.setup  	= slirp_setup,
-	.user 		= &slirp_user_info,
-	.kern 		= &slirp_kern_info,
-	.private_size 	= sizeof(struct slirp_data),
-	.setup_size 	= sizeof(struct slirp_init),
-};
-
-static int register_slirp(void)
-{
-	register_transport(&slirp_transport);
-	return 0;
-}
-
-late_initcall(register_slirp);
diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
deleted file mode 100644
index c999d187abb9..000000000000
--- a/arch/um/drivers/slirp_user.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL.
- */
-
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/wait.h>
-#include <net_user.h>
-#include <os.h>
-#include "slirp.h"
-
-static int slirp_user_init(void *data, void *dev)
-{
-	struct slirp_data *pri = data;
-
-	pri->dev = dev;
-	return 0;
-}
-
-struct slirp_pre_exec_data {
-	int stdin;
-	int stdout;
-};
-
-static void slirp_pre_exec(void *arg)
-{
-	struct slirp_pre_exec_data *data = arg;
-
-	if (data->stdin != -1)
-		dup2(data->stdin, 0);
-	if (data->stdout != -1)
-		dup2(data->stdout, 1);
-}
-
-static int slirp_tramp(char **argv, int fd)
-{
-	struct slirp_pre_exec_data pe_data;
-	int pid;
-
-	pe_data.stdin = fd;
-	pe_data.stdout = fd;
-	pid = run_helper(slirp_pre_exec, &pe_data, argv);
-
-	return pid;
-}
-
-static int slirp_open(void *data)
-{
-	struct slirp_data *pri = data;
-	int fds[2], pid, err;
-
-	err = os_pipe(fds, 1, 1);
-	if (err)
-		return err;
-
-	err = slirp_tramp(pri->argw.argv, fds[1]);
-	if (err < 0) {
-		printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err);
-		goto out;
-	}
-	pid = err;
-
-	pri->slave = fds[1];
-	pri->slip.pos = 0;
-	pri->slip.esc = 0;
-	pri->pid = err;
-
-	return fds[0];
-out:
-	close(fds[0]);
-	close(fds[1]);
-	return err;
-}
-
-static void slirp_close(int fd, void *data)
-{
-	struct slirp_data *pri = data;
-	int err;
-
-	close(fd);
-	close(pri->slave);
-
-	pri->slave = -1;
-
-	if (pri->pid<1) {
-		printk(UM_KERN_ERR "slirp_close: no child process to shut "
-		       "down\n");
-		return;
-	}
-
-#if 0
-	if (kill(pri->pid, SIGHUP)<0) {
-		printk(UM_KERN_ERR "slirp_close: sending hangup to %d failed "
-		       "(%d)\n", pri->pid, errno);
-	}
-#endif
-	err = helper_wait(pri->pid);
-	if (err < 0)
-		return;
-
-	pri->pid = -1;
-}
-
-int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri)
-{
-	return slip_proto_read(fd, buf, len, &pri->slip);
-}
-
-int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri)
-{
-	return slip_proto_write(fd, buf, len, &pri->slip);
-}
-
-const struct net_user_info slirp_user_info = {
-	.init		= slirp_user_init,
-	.open		= slirp_open,
-	.close	 	= slirp_close,
-	.remove	 	= NULL,
-	.add_address	= NULL,
-	.delete_address = NULL,
-	.mtu		= BUF_SIZE,
-	.max_packet	= BUF_SIZE,
-};
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index b8d14fa52059..8006a5bd578c 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #include <linux/fs.h>
@@ -12,7 +12,6 @@
 #include <linux/console.h>
 #include <asm/termbits.h>
 #include <asm/irq.h>
-#include "ssl.h"
 #include "chan.h"
 #include <init.h>
 #include <irq_user.h>
@@ -48,9 +47,7 @@ static struct line_driver driver = {
 	.minor_start 		= 64,
 	.type 		 	= TTY_DRIVER_TYPE_SERIAL,
 	.subtype 	 	= 0,
-	.read_irq 		= SSL_IRQ,
 	.read_irq_name 		= "ssl",
-	.write_irq 		= SSL_WRITE_IRQ,
 	.write_irq_name 	= "ssl-write",
 	.mc  = {
 		.list		= LIST_HEAD_INIT(driver.mc.list),
@@ -96,12 +93,10 @@ static const struct tty_operations ssl_ops = {
 	.open 	 		= line_open,
 	.close 	 		= line_close,
 	.write 	 		= line_write,
-	.put_char 		= line_put_char,
 	.write_room		= line_write_room,
 	.chars_in_buffer 	= line_chars_in_buffer,
 	.flush_buffer 		= line_flush_buffer,
 	.flush_chars 		= line_flush_chars,
-	.set_termios 		= line_set_termios,
 	.throttle 		= line_throttle,
 	.unthrottle 		= line_unthrottle,
 	.install		= ssl_install,
@@ -111,7 +106,7 @@ static const struct tty_operations ssl_ops = {
 /* Changed by ssl_init and referenced by ssl_exit, which are both serialized
  * by being an initcall and exitcall, respectively.
  */
-static int ssl_init_done = 0;
+static int ssl_init_done;
 
 static void ssl_console_write(struct console *c, const char *string,
 			      unsigned len)
@@ -197,3 +192,14 @@ static int ssl_chan_setup(char *str)
 
 __setup("ssl", ssl_chan_setup);
 __channel_help(ssl_chan_setup, "ssl");
+
+static int ssl_non_raw_setup(char *str)
+{
+	opts.raw = 0;
+	return 1;
+}
+__setup("ssl-non-raw", ssl_non_raw_setup);
+__uml_help(ssl_non_raw_setup,
+"ssl-non-raw\n"
+"    Set serial lines to non-raw mode.\n\n"
+);
diff --git a/arch/um/drivers/ssl.h b/arch/um/drivers/ssl.h
deleted file mode 100644
index 314d17725ce6..000000000000
--- a/arch/um/drivers/ssl.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SSL_H__
-#define __SSL_H__
-
-extern int ssl_read(int fd, int line);
-extern void ssl_receive_char(int line, char ch);
-
-#endif
-
diff --git a/arch/um/drivers/stderr_console.c b/arch/um/drivers/stderr_console.c
index d07a97f8b994..ecc3a5814932 100644
--- a/arch/um/drivers/stderr_console.c
+++ b/arch/um/drivers/stderr_console.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/console.h>
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index 7b361f36ca96..1c239737d88e 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #include <linux/posix_types.h>
@@ -53,9 +53,7 @@ static struct line_driver driver = {
 	.minor_start 		= 0,
 	.type 		 	= TTY_DRIVER_TYPE_CONSOLE,
 	.subtype 	 	= SYSTEM_TYPE_CONSOLE,
-	.read_irq 		= CONSOLE_IRQ,
 	.read_irq_name 		= "console",
-	.write_irq 		= CONSOLE_WRITE_IRQ,
 	.write_irq_name 	= "console-write",
 	.mc  = {
 		.list		= LIST_HEAD_INIT(driver.mc.list),
@@ -90,7 +88,7 @@ static int con_remove(int n, char **error_out)
 }
 
 /* Set in an initcall, checked in an exitcall */
-static int con_init_done = 0;
+static int con_init_done;
 
 static int con_install(struct tty_driver *driver, struct tty_struct *tty)
 {
@@ -102,12 +100,10 @@ static const struct tty_operations console_ops = {
 	.install		= con_install,
 	.close 	 		= line_close,
 	.write 	 		= line_write,
-	.put_char 		= line_put_char,
 	.write_room		= line_write_room,
 	.chars_in_buffer 	= line_chars_in_buffer,
 	.flush_buffer 		= line_flush_buffer,
 	.flush_chars 		= line_flush_chars,
-	.set_termios 		= line_set_termios,
 	.throttle 		= line_throttle,
 	.unthrottle 		= line_unthrottle,
 	.hangup			= line_hangup,
@@ -192,6 +188,9 @@ __uml_exitcall(console_exit);
 
 static int console_chan_setup(char *str)
 {
+	if (!strncmp(str, "sole=", 5))	/* console= option specifies tty */
+		return 0;
+
 	line_setup(vt_conf, MAX_TTYS, &def_conf, str, "console");
 	return 1;
 }
diff --git a/arch/um/drivers/stdio_console.h b/arch/um/drivers/stdio_console.h
index 6d8275f71fd4..3a409ec23d63 100644
--- a/arch/um/drivers/stdio_console.h
+++ b/arch/um/drivers/stdio_console.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __STDIO_CONSOLE_H
diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c
index eaa201bca5ed..884a762d21c7 100644
--- a/arch/um/drivers/tty.c
+++ b/arch/um/drivers/tty.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <errno.h>
diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h
index 3845051f1b10..2985c14661f4 100644
--- a/arch/um/drivers/ubd.h
+++ b/arch/um/drivers/ubd.h
@@ -1,16 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
  * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_UBD_USER_H
 #define __UM_UBD_USER_H
 
-extern void ignore_sigwinch_sig(void);
-extern int start_io_thread(unsigned long sp, int *fds_out);
-extern int io_thread(void *arg);
+#include <os.h>
+
+int start_io_thread(struct os_helper_thread **td_out, int *fd_out);
+void *io_thread(void *arg);
 extern int kernel_fd;
 
+extern int ubd_read_poll(int timeout);
+extern int ubd_write_poll(int timeout);
+
+#define UBD_REQ_BUFFER_SIZE 64
+
 #endif
 
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 879990cb66c6..37455e74d314 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1,6 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2018 Cambridge Greys Ltd
+ * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 /* 2001-09-28...2002-04-17
@@ -22,8 +24,10 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/ata.h>
 #include <linux/hdreg.h>
+#include <linux/major.h>
 #include <linux/cdrom.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -32,7 +36,6 @@
 #include <linux/vmalloc.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
-#include <asm/tlbflush.h>
 #include <kern_util.h>
 #include "mconsole_kern.h"
 #include <init.h>
@@ -41,23 +44,41 @@
 #include <os.h>
 #include "cow.h"
 
-enum ubd_req { UBD_READ, UBD_WRITE };
+/* Max request size is determined by sector mask - 32K */
+#define UBD_MAX_REQUEST (8 * sizeof(long))
+
+struct io_desc {
+	char *buffer;
+	unsigned long length;
+	unsigned long sector_mask;
+	unsigned long long cow_offset;
+	unsigned long bitmap_words[2];
+};
 
 struct io_thread_req {
 	struct request *req;
-	enum ubd_req op;
 	int fds[2];
 	unsigned long offsets[2];
 	unsigned long long offset;
-	unsigned long length;
-	char *buffer;
 	int sectorsize;
-	unsigned long sector_mask;
-	unsigned long long cow_offset;
-	unsigned long bitmap_words[2];
 	int error;
+
+	int desc_cnt;
+	/* io_desc has to be the last element of the struct */
+	struct io_desc io_desc[];
 };
 
+
+static struct io_thread_req * (*irq_req_buffer)[];
+static struct io_thread_req *irq_remainder;
+static int irq_remainder_size;
+
+static struct io_thread_req * (*io_req_buffer)[];
+static struct io_thread_req *io_remainder;
+static int io_remainder_size;
+
+
+
 static inline int ubd_test_bit(__u64 bit, unsigned char *data)
 {
 	__u64 n;
@@ -84,29 +105,20 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
 #define DRIVER_NAME "uml-blkdev"
 
 static DEFINE_MUTEX(ubd_lock);
-static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 
-static int ubd_open(struct block_device *bdev, fmode_t mode);
-static void ubd_release(struct gendisk *disk, fmode_t mode);
-static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
+static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
 		     unsigned int cmd, unsigned long arg);
-static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+static int ubd_getgeo(struct gendisk *disk, struct hd_geometry *geo);
 
 #define MAX_DEV (16)
 
 static const struct block_device_operations ubd_blops = {
         .owner		= THIS_MODULE,
-        .open		= ubd_open,
-        .release	= ubd_release,
         .ioctl		= ubd_ioctl,
+        .compat_ioctl	= blkdev_compat_ptr_ioctl,
 	.getgeo		= ubd_getgeo,
 };
 
-/* Protected by ubd_lock */
-static int fake_major = UBD_MAJOR;
-static struct gendisk *ubd_gendisk[MAX_DEV];
-static struct gendisk *fake_gendisk[MAX_DEV];
-
 #ifdef CONFIG_BLK_DEV_UBD_SYNC
 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 					 .cl = 1 })
@@ -130,25 +142,22 @@ struct cow {
 #define MAX_SG 64
 
 struct ubd {
-	struct list_head restart;
 	/* name (and fd, below) of the file opened for writing, either the
 	 * backing or the cow file. */
 	char *file;
-	int count;
+	char *serial;
 	int fd;
 	__u64 size;
 	struct openflags boot_openflags;
 	struct openflags openflags;
 	unsigned shared:1;
 	unsigned no_cow:1;
+	unsigned no_trim:1;
 	struct cow cow;
 	struct platform_device pdev;
-	struct request_queue *queue;
+	struct gendisk *disk;
+	struct blk_mq_tag_set tag_set;
 	spinlock_t lock;
-	struct scatterlist sg[MAX_SG];
-	struct request *request;
-	int start_sg, end_sg;
-	sector_t rq_pos;
 };
 
 #define DEFAULT_COW { \
@@ -161,81 +170,34 @@ struct ubd {
 
 #define DEFAULT_UBD { \
 	.file = 		NULL, \
-	.count =		0, \
+	.serial =		NULL, \
 	.fd =			-1, \
 	.size =			-1, \
 	.boot_openflags =	OPEN_FLAGS, \
 	.openflags =		OPEN_FLAGS, \
 	.no_cow =               0, \
+	.no_trim =		0, \
 	.shared =		0, \
 	.cow =			DEFAULT_COW, \
 	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
-	.request =		NULL, \
-	.start_sg =		0, \
-	.end_sg =		0, \
-	.rq_pos =		0, \
 }
 
 /* Protected by ubd_lock */
 static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 
-/* Only changed by fake_ide_setup which is a setup */
-static int fake_ide = 0;
-static struct proc_dir_entry *proc_ide_root = NULL;
-static struct proc_dir_entry *proc_ide = NULL;
-
-static void make_proc_ide(void)
-{
-	proc_ide_root = proc_mkdir("ide", NULL);
-	proc_ide = proc_mkdir("ide0", proc_ide_root);
-}
-
-static int fake_ide_media_proc_show(struct seq_file *m, void *v)
-{
-	seq_puts(m, "disk\n");
-	return 0;
-}
-
-static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, fake_ide_media_proc_show, NULL);
-}
-
-static const struct file_operations fake_ide_media_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= fake_ide_media_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static void make_ide_entries(const char *dev_name)
-{
-	struct proc_dir_entry *dir, *ent;
-	char name[64];
-
-	if(proc_ide_root == NULL) make_proc_ide();
-
-	dir = proc_mkdir(dev_name, proc_ide);
-	if(!dir) return;
-
-	ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
-	if(!ent) return;
-	snprintf(name, sizeof(name), "ide0/%s", dev_name);
-	proc_symlink(dev_name, proc_ide_root, name);
-}
+static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
+				 const struct blk_mq_queue_data *bd);
 
 static int fake_ide_setup(char *str)
 {
-	fake_ide = 1;
+	pr_warn("The fake_ide option has been removed\n");
 	return 1;
 }
-
 __setup("fake_ide", fake_ide_setup);
 
 __uml_help(fake_ide_setup,
 "fake_ide\n"
-"    Create ide0 entries that map onto ubd devices.\n\n"
+"    Obsolete stub.\n\n"
 );
 
 static int parse_unit(char **ptr)
@@ -265,42 +227,20 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 {
 	struct ubd *ubd_dev;
 	struct openflags flags = global_openflags;
-	char *backing_file;
+	char *file, *backing_file, *serial;
 	int n, err = 0, i;
 
 	if(index_out) *index_out = -1;
 	n = *str;
 	if(n == '='){
-		char *end;
-		int major;
-
 		str++;
 		if(!strcmp(str, "sync")){
 			global_openflags = of_sync(global_openflags);
-			goto out1;
-		}
-
-		err = -EINVAL;
-		major = simple_strtoul(str, &end, 0);
-		if((*end != '\0') || (end == str)){
-			*error_out = "Didn't parse major number";
-			goto out1;
+			return err;
 		}
 
-		mutex_lock(&ubd_lock);
-		if (fake_major != UBD_MAJOR) {
-			*error_out = "Can't assign a fake major twice";
-			goto out1;
-		}
-
-		fake_major = major;
-
-		printk(KERN_INFO "Setting extra ubd major number to %d\n",
-		       major);
-		err = 0;
-	out1:
-		mutex_unlock(&ubd_lock);
-		return err;
+		pr_warn("fake major not supported any more\n");
+		return 0;
 	}
 
 	n = parse_unit(&str);
@@ -326,7 +266,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		*index_out = n;
 
 	err = -EINVAL;
-	for (i = 0; i < sizeof("rscd="); i++) {
+	for (i = 0; i < sizeof("rscdt="); i++) {
 		switch (*str) {
 		case 'r':
 			flags.w = 0;
@@ -340,12 +280,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 		case 'c':
 			ubd_dev->shared = 1;
 			break;
+		case 't':
+			ubd_dev->no_trim = 1;
+			break;
 		case '=':
 			str++;
 			goto break_loop;
 		default:
 			*error_out = "Expected '=' or flag letter "
-				"(r, s, c, or d)";
+				"(r, s, c, t or d)";
 			goto out;
 		}
 		str++;
@@ -358,24 +301,27 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 	goto out;
 
 break_loop:
-	backing_file = strchr(str, ',');
+	file = strsep(&str, ",:");
+	if (*file == '\0')
+		file = NULL;
 
-	if (backing_file == NULL)
-		backing_file = strchr(str, ':');
+	backing_file = strsep(&str, ",:");
+	if (backing_file && *backing_file == '\0')
+		backing_file = NULL;
 
-	if(backing_file != NULL){
-		if(ubd_dev->no_cow){
-			*error_out = "Can't specify both 'd' and a cow file";
-			goto out;
-		}
-		else {
-			*backing_file = '\0';
-			backing_file++;
-		}
+	serial = strsep(&str, ",:");
+	if (serial && *serial == '\0')
+		serial = NULL;
+
+	if (backing_file && ubd_dev->no_cow) {
+		*error_out = "Can't specify both 'd' and a cow file";
+		goto out;
 	}
+
 	err = 0;
-	ubd_dev->file = str;
+	ubd_dev->file = file;
 	ubd_dev->cow.file = backing_file;
+	ubd_dev->serial = serial;
 	ubd_dev->boot_openflags = flags;
 out:
 	mutex_unlock(&ubd_lock);
@@ -396,7 +342,7 @@ static int ubd_setup(char *str)
 
 __setup("ubd", ubd_setup);
 __uml_help(ubd_setup,
-"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
+"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
 "    This is used to associate a device with a file in the underlying\n"
 "    filesystem. When specifying two filenames, the first one is the\n"
 "    COW name and the second is the backing file name. As separator you can\n"
@@ -418,6 +364,13 @@ __uml_help(ubd_setup,
 "    'c' will cause the device to be treated as being shared between multiple\n"
 "    UMLs and file locking will be turned off - this is appropriate for a\n"
 "    cluster filesystem and inappropriate at almost all other times.\n\n"
+"    't' will disable trim/discard support on the device (enabled by default).\n\n"
+"    An optional device serial number can be exposed using the serial parameter\n"
+"    on the cmdline which is exposed as a sysfs entry. This is particularly\n"
+"    useful when a unique number should be given to the device. Note when\n"
+"    specifying a label, the filename2 must be also presented. It can be\n"
+"    an empty string, in which case the backing file is not used:\n"
+"       ubd0=File,,Serial\n\n"
 );
 
 static int udb_setup(char *str)
@@ -436,60 +389,97 @@ __uml_help(udb_setup,
 "    in the boot output.\n\n"
 );
 
-static void do_ubd_request(struct request_queue * q);
-
 /* Only changed by ubd_init, which is an initcall. */
 static int thread_fd = -1;
-static LIST_HEAD(restart);
 
-/* XXX - move this inside ubd_intr. */
-/* Called without dev->lock held, and only in interrupt context. */
-static void ubd_handler(void)
+/* Function to read several request pointers at a time
+* handling fractional reads if (and as) needed
+*/
+
+static int bulk_req_safe_read(
+	int fd,
+	struct io_thread_req * (*request_buffer)[],
+	struct io_thread_req **remainder,
+	int *remainder_size,
+	int max_recs
+	)
 {
-	struct io_thread_req *req;
-	struct ubd *ubd;
-	struct list_head *list, *next_ele;
-	unsigned long flags;
-	int n;
+	int n = 0;
+	int res = 0;
+
+	if (*remainder_size > 0) {
+		memmove(
+			(char *) request_buffer,
+			(char *) remainder, *remainder_size
+		);
+		n = *remainder_size;
+	}
 
-	while(1){
-		n = os_read_file(thread_fd, &req,
-				 sizeof(struct io_thread_req *));
-		if(n != sizeof(req)){
-			if(n == -EAGAIN)
-				break;
-			printk(KERN_ERR "spurious interrupt in ubd_handler, "
-			       "err = %d\n", -n);
-			return;
+	res = os_read_file(
+			fd,
+			((char *) request_buffer) + *remainder_size,
+			sizeof(struct io_thread_req *)*max_recs
+				- *remainder_size
+		);
+	if (res > 0) {
+		n += res;
+		if ((n % sizeof(struct io_thread_req *)) > 0) {
+			/*
+			* Read somehow returned not a multiple of dword
+			* theoretically possible, but never observed in the
+			* wild, so read routine must be able to handle it
+			*/
+			*remainder_size = n % sizeof(struct io_thread_req *);
+			WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
+			memmove(
+				remainder,
+				((char *) request_buffer) +
+					(n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
+				*remainder_size
+			);
+			n = n - *remainder_size;
 		}
-
-		blk_end_request(req->req, 0, req->length);
-		kfree(req);
+	} else {
+		n = res;
 	}
-	reactivate_fd(thread_fd, UBD_IRQ);
-
-	list_for_each_safe(list, next_ele, &restart){
-		ubd = container_of(list, struct ubd, restart);
-		list_del_init(&ubd->restart);
-		spin_lock_irqsave(&ubd->lock, flags);
-		do_ubd_request(ubd->queue);
-		spin_unlock_irqrestore(&ubd->lock, flags);
+	return n;
+}
+
+static void ubd_end_request(struct io_thread_req *io_req)
+{
+	if (io_req->error == BLK_STS_NOTSUPP) {
+		if (req_op(io_req->req) == REQ_OP_DISCARD)
+			blk_queue_disable_discard(io_req->req->q);
+		else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES)
+			blk_queue_disable_write_zeroes(io_req->req->q);
 	}
+	blk_mq_end_request(io_req->req, io_req->error);
+	kfree(io_req);
 }
 
 static irqreturn_t ubd_intr(int irq, void *dev)
 {
-	ubd_handler();
+	int len, i;
+
+	while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer,
+			&irq_remainder, &irq_remainder_size,
+			UBD_REQ_BUFFER_SIZE)) >= 0) {
+		for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
+			ubd_end_request((*irq_req_buffer)[i]);
+	}
+
+	if (len < 0 && len != -EAGAIN)
+		pr_err("spurious interrupt in %s, err = %d\n", __func__, len);
 	return IRQ_HANDLED;
 }
 
 /* Only changed by ubd_init, which is an initcall. */
-static int io_pid = -1;
+static struct os_helper_thread *io_td;
 
 static void kill_io_thread(void)
 {
-	if(io_pid != -1)
-		os_kill_process(io_pid, 1);
+	if (io_td)
+		os_kill_helper_thread(io_td);
 }
 
 __uml_exitcall(kill_io_thread);
@@ -503,7 +493,7 @@ static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 	__u32 version;
 	__u32 align;
 	char *backing_file;
-	time_t mtime;
+	time64_t mtime;
 	unsigned long long size;
 	int sector_size;
 	int bitmap_offset;
@@ -535,20 +525,16 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 {
 	int err;
 
-	err = os_seek_file(fd, offset);
-	if (err < 0)
-		return err;
-
-	err = os_read_file(fd, buf, len);
+	err = os_pread_file(fd, buf, len, offset);
 	if (err < 0)
 		return err;
 
 	return 0;
 }
 
-static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
+static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
 {
-	unsigned long modtime;
+	time64_t modtime;
 	unsigned long long actual;
 	int err;
 
@@ -574,7 +560,7 @@ static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
 		return -EINVAL;
 	}
 	if (modtime != mtime) {
-		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
+		printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
 		       "backing file\n", mtime, modtime);
 		return -EINVAL;
 	}
@@ -617,7 +603,7 @@ static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 		  unsigned long *bitmap_len_out, int *data_offset_out,
 		  int *create_cow_out)
 {
-	time_t mtime;
+	time64_t mtime;
 	unsigned long long size;
 	__u32 version, align;
 	char *backing_file;
@@ -747,7 +733,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 
 	if((fd == -ENOENT) && create_cow){
 		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
-					  ubd_dev->openflags, 1 << 9, PAGE_SIZE,
+					  ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 					  &ubd_dev->cow.bitmap_offset,
 					  &ubd_dev->cow.bitmap_len,
 					  &ubd_dev->cow.data_offset);
@@ -765,15 +751,12 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 	ubd_dev->fd = fd;
 
 	if(ubd_dev->cow.file != NULL){
-		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
-
 		err = -ENOMEM;
 		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 		if(ubd_dev->cow.bitmap == NULL){
 			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 			goto error;
 		}
-		flush_tlb_kernel_vm();
 
 		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 				      ubd_dev->cow.bitmap_offset,
@@ -796,102 +779,136 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 
 static void ubd_device_release(struct device *dev)
 {
-	struct ubd *ubd_dev = dev_get_drvdata(dev);
+	struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev);
 
-	blk_cleanup_queue(ubd_dev->queue);
+	blk_mq_free_tag_set(&ubd_dev->tag_set);
 	*ubd_dev = ((struct ubd) DEFAULT_UBD);
 }
 
-static int ubd_disk_register(int major, u64 size, int unit,
-			     struct gendisk **disk_out)
+static ssize_t serial_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
-	struct gendisk *disk;
+	struct gendisk *disk = dev_to_disk(dev);
+	struct ubd *ubd_dev = disk->private_data;
 
-	disk = alloc_disk(1 << UBD_SHIFT);
-	if(disk == NULL)
-		return -ENOMEM;
+	if (!ubd_dev)
+		return 0;
 
-	disk->major = major;
-	disk->first_minor = unit << UBD_SHIFT;
-	disk->fops = &ubd_blops;
-	set_capacity(disk, size / 512);
-	if (major == UBD_MAJOR)
-		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
-	else
-		sprintf(disk->disk_name, "ubd_fake%d", unit);
-
-	/* sysfs register (not for ide fake devices) */
-	if (major == UBD_MAJOR) {
-		ubd_devs[unit].pdev.id   = unit;
-		ubd_devs[unit].pdev.name = DRIVER_NAME;
-		ubd_devs[unit].pdev.dev.release = ubd_device_release;
-		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
-		platform_device_register(&ubd_devs[unit].pdev);
-		disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
-	}
+	return sprintf(buf, "%s", ubd_dev->serial);
+}
 
-	disk->private_data = &ubd_devs[unit];
-	disk->queue = ubd_devs[unit].queue;
-	add_disk(disk);
+static DEVICE_ATTR_RO(serial);
 
-	*disk_out = disk;
-	return 0;
+static struct attribute *ubd_attrs[] = {
+	&dev_attr_serial.attr,
+	NULL,
+};
+
+static umode_t ubd_attrs_are_visible(struct kobject *kobj,
+				     struct attribute *a, int n)
+{
+	return a->mode;
 }
 
-#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
+static const struct attribute_group ubd_attr_group = {
+	.attrs = ubd_attrs,
+	.is_visible = ubd_attrs_are_visible,
+};
+
+static const struct attribute_group *ubd_attr_groups[] = {
+	&ubd_attr_group,
+	NULL,
+};
+
+#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
+
+static const struct blk_mq_ops ubd_mq_ops = {
+	.queue_rq = ubd_queue_rq,
+};
 
 static int ubd_add(int n, char **error_out)
 {
 	struct ubd *ubd_dev = &ubd_devs[n];
+	struct queue_limits lim = {
+		.max_segments		= MAX_SG,
+		.seg_boundary_mask	= PAGE_SIZE - 1,
+		.features		= BLK_FEAT_WRITE_CACHE,
+	};
+	struct gendisk *disk;
 	int err = 0;
 
 	if(ubd_dev->file == NULL)
 		goto out;
 
+	if (ubd_dev->cow.file)
+		lim.max_hw_sectors = 8 * sizeof(long);
+	if (!ubd_dev->no_trim) {
+		lim.max_hw_discard_sectors = UBD_MAX_REQUEST;
+		lim.max_write_zeroes_sectors = UBD_MAX_REQUEST;
+	}
+
 	err = ubd_file_size(ubd_dev, &ubd_dev->size);
 	if(err < 0){
 		*error_out = "Couldn't determine size of device's file";
 		goto out;
 	}
 
+	err = ubd_open_dev(ubd_dev);
+	if (err) {
+		pr_err("ubd%c: Can't open \"%s\": errno = %d\n",
+			'a' + n, ubd_dev->file, -err);
+		goto out;
+	}
+
 	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 
-	INIT_LIST_HEAD(&ubd_dev->restart);
-	sg_init_table(ubd_dev->sg, MAX_SG);
+	ubd_dev->tag_set.ops = &ubd_mq_ops;
+	ubd_dev->tag_set.queue_depth = 64;
+	ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
+	ubd_dev->tag_set.driver_data = ubd_dev;
+	ubd_dev->tag_set.nr_hw_queues = 1;
 
-	err = -ENOMEM;
-	ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
-	if (ubd_dev->queue == NULL) {
-		*error_out = "Failed to initialize device queue";
-		goto out;
-	}
-	ubd_dev->queue->queuedata = ubd_dev;
+	err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
+	if (err)
+		goto out_close;
 
-	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
-	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
-	if(err){
-		*error_out = "Failed to register device";
-		goto out_cleanup;
+	disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
+	if (IS_ERR(disk)) {
+		err = PTR_ERR(disk);
+		goto out_cleanup_tags;
 	}
 
-	if (fake_major != UBD_MAJOR)
-		ubd_disk_register(fake_major, ubd_dev->size, n,
-				  &fake_gendisk[n]);
+	disk->major = UBD_MAJOR;
+	disk->first_minor = n << UBD_SHIFT;
+	disk->minors = 1 << UBD_SHIFT;
+	disk->fops = &ubd_blops;
+	set_capacity(disk, ubd_dev->size / 512);
+	sprintf(disk->disk_name, "ubd%c", 'a' + n);
+	disk->private_data = ubd_dev;
+	set_disk_ro(disk, !ubd_dev->openflags.w);
 
-	/*
-	 * Perhaps this should also be under the "if (fake_major)" above
-	 * using the fake_disk->disk_name
-	 */
-	if (fake_ide)
-		make_ide_entries(ubd_gendisk[n]->disk_name);
+	ubd_dev->pdev.id = n;
+	ubd_dev->pdev.name = DRIVER_NAME;
+	ubd_dev->pdev.dev.release = ubd_device_release;
+	dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev);
+	platform_device_register(&ubd_dev->pdev);
 
-	err = 0;
+	err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups);
+	if (err)
+		goto out_cleanup_disk;
+
+	ubd_dev->disk = disk;
+
+	return 0;
+
+out_cleanup_disk:
+	put_disk(disk);
+out_cleanup_tags:
+	blk_mq_free_tag_set(&ubd_dev->tag_set);
+out_close:
+	ubd_close_dev(ubd_dev);
 out:
 	return err;
-
-out_cleanup:
-	blk_cleanup_queue(ubd_dev->queue);
-	goto out;
 }
 
 static int ubd_config(char *str, char **error_out)
@@ -975,7 +992,6 @@ static int ubd_id(char **str, int *start_out, int *end_out)
 
 static int ubd_remove(int n, char **error_out)
 {
-	struct gendisk *disk = ubd_gendisk[n];
 	struct ubd *ubd_dev;
 	int err = -ENODEV;
 
@@ -986,21 +1002,15 @@ static int ubd_remove(int n, char **error_out)
 	if(ubd_dev->file == NULL)
 		goto out;
 
-	/* you cannot remove a open disk */
-	err = -EBUSY;
-	if(ubd_dev->count > 0)
-		goto out;
-
-	ubd_gendisk[n] = NULL;
-	if(disk != NULL){
-		del_gendisk(disk);
-		put_disk(disk);
-	}
+	if (ubd_dev->disk) {
+		/* you cannot remove a open disk */
+		err = -EBUSY;
+		if (disk_openers(ubd_dev->disk))
+			goto out;
 
-	if(fake_gendisk[n] != NULL){
-		del_gendisk(fake_gendisk[n]);
-		put_disk(fake_gendisk[n]);
-		fake_gendisk[n] = NULL;
+		del_gendisk(ubd_dev->disk);
+		ubd_close_dev(ubd_dev);
+		put_disk(ubd_dev->disk);
 	}
 
 	err = 0;
@@ -1059,12 +1069,26 @@ static int __init ubd_init(void)
 	if (register_blkdev(UBD_MAJOR, "ubd"))
 		return -1;
 
-	if (fake_major != UBD_MAJOR) {
-		char name[sizeof("ubd_nnn\0")];
+	irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
+				       sizeof(struct io_thread_req *),
+				       GFP_KERNEL
+		);
+	irq_remainder = 0;
 
-		snprintf(name, sizeof(name), "ubd_%d", fake_major);
-		if (register_blkdev(fake_major, "ubd"))
-			return -1;
+	if (irq_req_buffer == NULL) {
+		printk(KERN_ERR "Failed to initialize ubd buffering\n");
+		return -ENOMEM;
+	}
+	io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
+				      sizeof(struct io_thread_req *),
+				      GFP_KERNEL
+		);
+
+	io_remainder = 0;
+
+	if (io_req_buffer == NULL) {
+		printk(KERN_ERR "Failed to initialize ubd buffering\n");
+		return -ENOMEM;
 	}
 	platform_driver_register(&ubd_driver);
 	mutex_lock(&ubd_lock);
@@ -1080,8 +1104,8 @@ static int __init ubd_init(void)
 
 late_initcall(ubd_init);
 
-static int __init ubd_driver_init(void){
-	unsigned long stack;
+static int __init ubd_driver_init(void)
+{
 	int err;
 
 	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
@@ -1090,73 +1114,31 @@ static int __init ubd_driver_init(void){
 		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
 		 * enough. So use anyway the io thread. */
 	}
-	stack = alloc_stack(0, 0);
-	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
-				 &thread_fd);
-	if(io_pid < 0){
+	err = start_io_thread(&io_td, &thread_fd);
+	if (err < 0) {
 		printk(KERN_ERR
 		       "ubd : Failed to start I/O thread (errno = %d) - "
-		       "falling back to synchronous I/O\n", -io_pid);
-		io_pid = -1;
+		       "falling back to synchronous I/O\n", -err);
 		return 0;
 	}
 	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
 			     0, "ubd", ubd_devs);
-	if(err != 0)
+	if(err < 0)
 		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
 	return 0;
 }
 
 device_initcall(ubd_driver_init);
 
-static int ubd_open(struct block_device *bdev, fmode_t mode)
-{
-	struct gendisk *disk = bdev->bd_disk;
-	struct ubd *ubd_dev = disk->private_data;
-	int err = 0;
-
-	mutex_lock(&ubd_mutex);
-	if(ubd_dev->count == 0){
-		err = ubd_open_dev(ubd_dev);
-		if(err){
-			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
-			       disk->disk_name, ubd_dev->file, -err);
-			goto out;
-		}
-	}
-	ubd_dev->count++;
-	set_disk_ro(disk, !ubd_dev->openflags.w);
-
-	/* This should no more be needed. And it didn't work anyway to exclude
-	 * read-write remounting of filesystems.*/
-	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
-	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
-	        err = -EROFS;
-	}*/
-out:
-	mutex_unlock(&ubd_mutex);
-	return err;
-}
-
-static void ubd_release(struct gendisk *disk, fmode_t mode)
-{
-	struct ubd *ubd_dev = disk->private_data;
-
-	mutex_lock(&ubd_mutex);
-	if(--ubd_dev->count == 0)
-		ubd_close_dev(ubd_dev);
-	mutex_unlock(&ubd_mutex);
-}
-
 static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
 			  __u64 *cow_offset, unsigned long *bitmap,
 			  __u64 bitmap_offset, unsigned long *bitmap_words,
 			  __u64 bitmap_len)
 {
-	__u64 sector = io_offset >> 9;
+	__u64 sector = io_offset >> SECTOR_SHIFT;
 	int i, update_bitmap = 0;
 
-	for(i = 0; i < length >> 9; i++){
+	for (i = 0; i < length >> SECTOR_SHIFT; i++) {
 		if(cow_mask != NULL)
 			ubd_set_bit(i, (unsigned char *) cow_mask);
 		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
@@ -1187,115 +1169,164 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
 	*cow_offset += bitmap_offset;
 }
 
-static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
+static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
+		       unsigned long offset, unsigned long *bitmap,
 		       __u64 bitmap_offset, __u64 bitmap_len)
 {
-	__u64 sector = req->offset >> 9;
+	__u64 sector = offset >> SECTOR_SHIFT;
 	int i;
 
-	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
+	if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
 		panic("Operation too long");
 
-	if(req->op == UBD_READ) {
-		for(i = 0; i < req->length >> 9; i++){
+	if (req_op(req->req) == REQ_OP_READ) {
+		for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
 			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
 				ubd_set_bit(i, (unsigned char *)
-					    &req->sector_mask);
+					    &segment->sector_mask);
 		}
+	} else {
+		cowify_bitmap(offset, segment->length, &segment->sector_mask,
+			      &segment->cow_offset, bitmap, bitmap_offset,
+			      segment->bitmap_words, bitmap_len);
 	}
-	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
-			   &req->cow_offset, bitmap, bitmap_offset,
-			   req->bitmap_words, bitmap_len);
 }
 
-/* Called with dev->lock held */
-static void prepare_request(struct request *req, struct io_thread_req *io_req,
-			    unsigned long long offset, int page_offset,
-			    int len, struct page *page)
+static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
+			struct request *req)
 {
-	struct gendisk *disk = req->rq_disk;
-	struct ubd *ubd_dev = disk->private_data;
+	struct bio_vec bvec;
+	struct req_iterator iter;
+	int i = 0;
+	unsigned long byte_offset = io_req->offset;
+	enum req_op op = req_op(req);
+
+	if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
+		io_req->io_desc[0].buffer = NULL;
+		io_req->io_desc[0].length = blk_rq_bytes(req);
+	} else {
+		rq_for_each_segment(bvec, req, iter) {
+			BUG_ON(i >= io_req->desc_cnt);
+
+			io_req->io_desc[i].buffer = bvec_virt(&bvec);
+			io_req->io_desc[i].length = bvec.bv_len;
+			i++;
+		}
+	}
+
+	if (dev->cow.file) {
+		for (i = 0; i < io_req->desc_cnt; i++) {
+			cowify_req(io_req, &io_req->io_desc[i], byte_offset,
+				   dev->cow.bitmap, dev->cow.bitmap_offset,
+				   dev->cow.bitmap_len);
+			byte_offset += io_req->io_desc[i].length;
+		}
+
+	}
+}
+
+static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
+					   int desc_cnt)
+{
+	struct io_thread_req *io_req;
+	int i;
+
+	io_req = kmalloc(sizeof(*io_req) +
+			 (desc_cnt * sizeof(struct io_desc)),
+			 GFP_ATOMIC);
+	if (!io_req)
+		return NULL;
 
 	io_req->req = req;
-	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
-		ubd_dev->fd;
-	io_req->fds[1] = ubd_dev->fd;
-	io_req->cow_offset = -1;
-	io_req->offset = offset;
-	io_req->length = len;
+	if (dev->cow.file)
+		io_req->fds[0] = dev->cow.fd;
+	else
+		io_req->fds[0] = dev->fd;
 	io_req->error = 0;
-	io_req->sector_mask = 0;
-
-	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
+	io_req->sectorsize = SECTOR_SIZE;
+	io_req->fds[1] = dev->fd;
+	io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
 	io_req->offsets[0] = 0;
-	io_req->offsets[1] = ubd_dev->cow.data_offset;
-	io_req->buffer = page_address(page) + page_offset;
-	io_req->sectorsize = 1 << 9;
+	io_req->offsets[1] = dev->cow.data_offset;
 
-	if(ubd_dev->cow.file != NULL)
-		cowify_req(io_req, ubd_dev->cow.bitmap,
-			   ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
+	for (i = 0 ; i < desc_cnt; i++) {
+		io_req->io_desc[i].sector_mask = 0;
+		io_req->io_desc[i].cow_offset = -1;
+	}
 
+	return io_req;
 }
 
-/* Called with dev->lock held */
-static void do_ubd_request(struct request_queue *q)
+static int ubd_submit_request(struct ubd *dev, struct request *req)
 {
+	int segs = 0;
 	struct io_thread_req *io_req;
-	struct request *req;
-	int n;
+	int ret;
+	enum req_op op = req_op(req);
 
-	while(1){
-		struct ubd *dev = q->queuedata;
-		if(dev->end_sg == 0){
-			struct request *req = blk_fetch_request(q);
-			if(req == NULL)
-				return;
+	if (op == REQ_OP_FLUSH)
+		segs = 0;
+	else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
+		segs = 1;
+	else
+		segs = blk_rq_nr_phys_segments(req);
 
-			dev->request = req;
-			dev->rq_pos = blk_rq_pos(req);
-			dev->start_sg = 0;
-			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
-		}
+	io_req = ubd_alloc_req(dev, req, segs);
+	if (!io_req)
+		return -ENOMEM;
 
-		req = dev->request;
-		while(dev->start_sg < dev->end_sg){
-			struct scatterlist *sg = &dev->sg[dev->start_sg];
+	io_req->desc_cnt = segs;
+	if (segs)
+		ubd_map_req(dev, io_req, req);
 
-			io_req = kmalloc(sizeof(struct io_thread_req),
-					 GFP_ATOMIC);
-			if(io_req == NULL){
-				if(list_empty(&dev->restart))
-					list_add(&dev->restart, &restart);
-				return;
-			}
-			prepare_request(req, io_req,
-					(unsigned long long)dev->rq_pos << 9,
-					sg->offset, sg->length, sg_page(sg));
-
-			n = os_write_file(thread_fd, &io_req,
-					  sizeof(struct io_thread_req *));
-			if(n != sizeof(struct io_thread_req *)){
-				if(n != -EAGAIN)
-					printk("write to io thread failed, "
-					       "errno = %d\n", -n);
-				else if(list_empty(&dev->restart))
-					list_add(&dev->restart, &restart);
-				kfree(io_req);
-				return;
-			}
+	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
+	if (ret != sizeof(io_req)) {
+		if (ret != -EAGAIN)
+			pr_err("write to io thread failed: %d\n", -ret);
+		kfree(io_req);
+	}
+	return ret;
+}
 
-			dev->rq_pos += sg->length >> 9;
-			dev->start_sg++;
-		}
-		dev->end_sg = 0;
-		dev->request = NULL;
+static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
+				 const struct blk_mq_queue_data *bd)
+{
+	struct ubd *ubd_dev = hctx->queue->queuedata;
+	struct request *req = bd->rq;
+	int ret = 0, res = BLK_STS_OK;
+
+	blk_mq_start_request(req);
+
+	spin_lock_irq(&ubd_dev->lock);
+
+	switch (req_op(req)) {
+	case REQ_OP_FLUSH:
+	case REQ_OP_READ:
+	case REQ_OP_WRITE:
+	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
+		ret = ubd_submit_request(ubd_dev, req);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		res = BLK_STS_NOTSUPP;
 	}
+
+	spin_unlock_irq(&ubd_dev->lock);
+
+	if (ret < 0) {
+		if (ret == -ENOMEM)
+			res = BLK_STS_RESOURCE;
+		else
+			res = BLK_STS_DEV_RESOURCE;
+	}
+
+	return res;
 }
 
-static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+static int ubd_getgeo(struct gendisk *disk, struct hd_geometry *geo)
 {
-	struct ubd *ubd_dev = bdev->bd_disk->private_data;
+	struct ubd *ubd_dev = disk->private_data;
 
 	geo->heads = 128;
 	geo->sectors = 32;
@@ -1303,7 +1334,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
+static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
 		     unsigned int cmd, unsigned long arg)
 {
 	struct ubd *ubd_dev = bdev->bd_disk->private_data;
@@ -1335,87 +1366,124 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 	return -EINVAL;
 }
 
-static int update_bitmap(struct io_thread_req *req)
+static int map_error(int error_code)
 {
-	int n;
+	switch (error_code) {
+	case 0:
+		return BLK_STS_OK;
+	case ENOSYS:
+	case EOPNOTSUPP:
+		return BLK_STS_NOTSUPP;
+	case ENOSPC:
+		return BLK_STS_NOSPC;
+	}
+	return BLK_STS_IOERR;
+}
 
-	if(req->cow_offset == -1)
-		return 0;
+/*
+ * Everything from here onwards *IS NOT PART OF THE KERNEL*
+ *
+ * The following functions are part of UML hypervisor code.
+ * All functions from here onwards are executed as a helper
+ * thread and are not allowed to execute any kernel functions.
+ *
+ * Any communication must occur strictly via shared memory and IPC.
+ *
+ * Do not add printks, locks, kernel memory operations, etc - it
+ * will result in unpredictable behaviour and/or crashes.
+ */
 
-	n = os_seek_file(req->fds[1], req->cow_offset);
-	if(n < 0){
-		printk("do_io - bitmap lseek failed : err = %d\n", -n);
-		return 1;
-	}
+static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
+{
+	int n;
 
-	n = os_write_file(req->fds[1], &req->bitmap_words,
-			  sizeof(req->bitmap_words));
-	if(n != sizeof(req->bitmap_words)){
-		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
-		       req->fds[1]);
-		return 1;
-	}
+	if (segment->cow_offset == -1)
+		return map_error(0);
 
-	return 0;
+	n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
+			  sizeof(segment->bitmap_words), segment->cow_offset);
+	if (n != sizeof(segment->bitmap_words))
+		return map_error(-n);
+
+	return map_error(0);
 }
 
-static void do_io(struct io_thread_req *req)
+static void do_io(struct io_thread_req *req, struct io_desc *desc)
 {
-	char *buf;
+	char *buf = NULL;
 	unsigned long len;
 	int n, nsectors, start, end, bit;
-	int err;
 	__u64 off;
 
-	nsectors = req->length / req->sectorsize;
+	/* FLUSH is really a special case, we cannot "case" it with others */
+
+	if (req_op(req->req) == REQ_OP_FLUSH) {
+		/* fds[0] is always either the rw image or our cow file */
+		req->error = map_error(-os_sync_file(req->fds[0]));
+		return;
+	}
+
+	nsectors = desc->length / req->sectorsize;
 	start = 0;
 	do {
-		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
+		bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
 		end = start;
 		while((end < nsectors) &&
-		      (ubd_test_bit(end, (unsigned char *)
-				    &req->sector_mask) == bit))
+		      (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
 			end++;
 
 		off = req->offset + req->offsets[bit] +
 			start * req->sectorsize;
 		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
+		if (desc->buffer != NULL)
+			buf = &desc->buffer[start * req->sectorsize];
 
-		err = os_seek_file(req->fds[bit], off);
-		if(err < 0){
-			printk("do_io - lseek failed : err = %d\n", -err);
-			req->error = 1;
-			return;
-		}
-		if(req->op == UBD_READ){
+		switch (req_op(req->req)) {
+		case REQ_OP_READ:
 			n = 0;
 			do {
 				buf = &buf[n];
 				len -= n;
-				n = os_read_file(req->fds[bit], buf, len);
+				n = os_pread_file(req->fds[bit], buf, len, off);
 				if (n < 0) {
-					printk("do_io - read failed, err = %d "
-					       "fd = %d\n", -n, req->fds[bit]);
-					req->error = 1;
+					req->error = map_error(-n);
 					return;
 				}
 			} while((n < len) && (n != 0));
 			if (n < len) memset(&buf[n], 0, len - n);
-		} else {
-			n = os_write_file(req->fds[bit], buf, len);
+			break;
+		case REQ_OP_WRITE:
+			n = os_pwrite_file(req->fds[bit], buf, len, off);
 			if(n != len){
-				printk("do_io - write failed err = %d "
-				       "fd = %d\n", -n, req->fds[bit]);
-				req->error = 1;
+				req->error = map_error(-n);
 				return;
 			}
+			break;
+		case REQ_OP_DISCARD:
+			n = os_falloc_punch(req->fds[bit], off, len);
+			if (n) {
+				req->error = map_error(-n);
+				return;
+			}
+			break;
+		case REQ_OP_WRITE_ZEROES:
+			n = os_falloc_zeroes(req->fds[bit], off, len);
+			if (n) {
+				req->error = map_error(-n);
+				return;
+			}
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			req->error = BLK_STS_NOTSUPP;
+			return;
 		}
 
 		start = end;
 	} while(start < nsectors);
 
-	req->error = update_bitmap(req);
+	req->offset += len;
+	req->error = update_bitmap(req, desc);
 }
 
 /* Changed in start_io_thread, which is serialized by being called only
@@ -1424,35 +1492,53 @@ static void do_io(struct io_thread_req *req)
 int kernel_fd = -1;
 
 /* Only changed by the io thread. XXX: currently unused. */
-static int io_count = 0;
+static int io_count;
 
-int io_thread(void *arg)
+void *io_thread(void *arg)
 {
-	struct io_thread_req *req;
-	int n;
+	int n, count, written, res;
+
+	os_fix_helper_thread_signals();
 
-	ignore_sigwinch_sig();
 	while(1){
-		n = os_read_file(kernel_fd, &req,
-				 sizeof(struct io_thread_req *));
-		if(n != sizeof(struct io_thread_req *)){
-			if(n < 0)
-				printk("io_thread - read failed, fd = %d, "
-				       "err = %d\n", kernel_fd, -n);
-			else {
-				printk("io_thread - short read, fd = %d, "
-				       "length = %d\n", kernel_fd, n);
-			}
+		n = bulk_req_safe_read(
+			kernel_fd,
+			io_req_buffer,
+			&io_remainder,
+			&io_remainder_size,
+			UBD_REQ_BUFFER_SIZE
+		);
+		if (n <= 0) {
+			if (n == -EAGAIN)
+				ubd_read_poll(-1);
+
 			continue;
 		}
-		io_count++;
-		do_io(req);
-		n = os_write_file(kernel_fd, &req,
-				  sizeof(struct io_thread_req *));
-		if(n != sizeof(struct io_thread_req *))
-			printk("io_thread - write failed, fd = %d, err = %d\n",
-			       kernel_fd, -n);
+
+		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
+			struct io_thread_req *req = (*io_req_buffer)[count];
+			int i;
+
+			io_count++;
+			for (i = 0; !req->error && i < req->desc_cnt; i++)
+				do_io(req, &(req->io_desc[i]));
+
+		}
+
+		written = 0;
+
+		do {
+			res = os_write_file(kernel_fd,
+					    ((char *) io_req_buffer) + written,
+					    n - written);
+			if (res >= 0) {
+				written += res;
+			}
+			if (written < n) {
+				ubd_write_poll(-1);
+			}
+		} while (written < n);
 	}
 
-	return 0;
+	return NULL;
 }
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index a703e45d8aac..8e8a8bf518b6 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -1,7 +1,8 @@
-/* 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2016 Anton Ivanov (aivanov@brocade.com)
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
@@ -20,15 +21,13 @@
 
 #include "ubd.h"
 #include <os.h>
+#include <poll.h>
 
-void ignore_sigwinch_sig(void)
-{
-	signal(SIGWINCH, SIG_IGN);
-}
+static struct pollfd kernel_pollfd;
 
-int start_io_thread(unsigned long sp, int *fd_out)
+int start_io_thread(struct os_helper_thread **td_out, int *fd_out)
 {
-	int pid, fds[2], err;
+	int fds[2], err;
 
 	err = os_pipe(fds, 1, 1);
 	if(err < 0){
@@ -37,22 +36,25 @@ int start_io_thread(unsigned long sp, int *fd_out)
 	}
 
 	kernel_fd = fds[0];
+	kernel_pollfd.fd = kernel_fd;
+	kernel_pollfd.events = POLLIN;
 	*fd_out = fds[1];
 
 	err = os_set_fd_block(*fd_out, 0);
+	err |= os_set_fd_block(kernel_fd, 0);
 	if (err) {
 		printk("start_io_thread - failed to set nonblocking I/O.\n");
 		goto out_close;
 	}
 
-	pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL);
-	if(pid < 0){
-		err = -errno;
-		printk("start_io_thread - clone failed : errno = %d\n", errno);
+	err = os_run_helper_thread(td_out, io_thread, NULL);
+	if (err < 0) {
+		printk("%s - failed to run helper thread, err = %d\n",
+		       __func__, -err);
 		goto out_close;
 	}
 
-	return(pid);
+	return 0;
 
  out_close:
 	os_close_file(fds[0]);
@@ -62,3 +64,15 @@ int start_io_thread(unsigned long sp, int *fd_out)
  out:
 	return err;
 }
+
+int ubd_read_poll(int timeout)
+{
+	kernel_pollfd.events = POLLIN;
+	return poll(&kernel_pollfd, 1, timeout);
+}
+int ubd_write_poll(int timeout)
+{
+	kernel_pollfd.events = POLLOUT;
+	return poll(&kernel_pollfd, 1, timeout);
+}
+
diff --git a/arch/um/drivers/umcast.h b/arch/um/drivers/umcast.h
deleted file mode 100644
index c190c6440911..000000000000
--- a/arch/um/drivers/umcast.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __DRIVERS_UMCAST_H
-#define __DRIVERS_UMCAST_H
-
-#include <net_user.h>
-
-struct umcast_data {
-	char *addr;
-	unsigned short lport;
-	unsigned short rport;
-	void *listen_addr;
-	void *remote_addr;
-	int ttl;
-	int unicast;
-	void *dev;
-};
-
-extern const struct net_user_info umcast_user_info;
-
-extern int umcast_user_write(int fd, void *buf, int len,
-			     struct umcast_data *pri);
-
-#endif
diff --git a/arch/um/drivers/umcast_kern.c b/arch/um/drivers/umcast_kern.c
deleted file mode 100644
index f5ba6e377913..000000000000
--- a/arch/um/drivers/umcast_kern.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * user-mode-linux networking multicast transport
- * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- *
- * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- *
- * Licensed under the GPL.
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include "umcast.h"
-#include <net_kern.h>
-
-struct umcast_init {
-	char *addr;
-	int lport;
-	int rport;
-	int ttl;
-	bool unicast;
-};
-
-static void umcast_init(struct net_device *dev, void *data)
-{
-	struct uml_net_private *pri;
-	struct umcast_data *dpri;
-	struct umcast_init *init = data;
-
-	pri = netdev_priv(dev);
-	dpri = (struct umcast_data *) pri->user;
-	dpri->addr = init->addr;
-	dpri->lport = init->lport;
-	dpri->rport = init->rport;
-	dpri->unicast = init->unicast;
-	dpri->ttl = init->ttl;
-	dpri->dev = dev;
-
-	if (dpri->unicast) {
-		printk(KERN_INFO "ucast backend address: %s:%u listen port: "
-		       "%u\n", dpri->addr, dpri->rport, dpri->lport);
-	} else {
-		printk(KERN_INFO "mcast backend multicast address: %s:%u, "
-		       "TTL:%u\n", dpri->addr, dpri->lport, dpri->ttl);
-	}
-}
-
-static int umcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return net_recvfrom(fd, skb_mac_header(skb),
-			    skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int umcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return umcast_user_write(fd, skb->data, skb->len,
-				(struct umcast_data *) &lp->user);
-}
-
-static const struct net_kern_info umcast_kern_info = {
-	.init			= umcast_init,
-	.protocol		= eth_protocol,
-	.read			= umcast_read,
-	.write			= umcast_write,
-};
-
-static int mcast_setup(char *str, char **mac_out, void *data)
-{
-	struct umcast_init *init = data;
-	char *port_str = NULL, *ttl_str = NULL, *remain;
-	char *last;
-
-	*init = ((struct umcast_init)
-		{ .addr	= "239.192.168.1",
-		  .lport	= 1102,
-		  .ttl	= 1 });
-
-	remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str,
-			       NULL);
-	if (remain != NULL) {
-		printk(KERN_ERR "mcast_setup - Extra garbage on "
-		       "specification : '%s'\n", remain);
-		return 0;
-	}
-
-	if (port_str != NULL) {
-		init->lport = simple_strtoul(port_str, &last, 10);
-		if ((*last != '\0') || (last == port_str)) {
-			printk(KERN_ERR "mcast_setup - Bad port : '%s'\n",
-			       port_str);
-			return 0;
-		}
-	}
-
-	if (ttl_str != NULL) {
-		init->ttl = simple_strtoul(ttl_str, &last, 10);
-		if ((*last != '\0') || (last == ttl_str)) {
-			printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n",
-			       ttl_str);
-			return 0;
-		}
-	}
-
-	init->unicast = false;
-	init->rport = init->lport;
-
-	printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr,
-	       init->lport, init->ttl);
-
-	return 1;
-}
-
-static int ucast_setup(char *str, char **mac_out, void *data)
-{
-	struct umcast_init *init = data;
-	char *lport_str = NULL, *rport_str = NULL, *remain;
-	char *last;
-
-	*init = ((struct umcast_init)
-		{ .addr		= "",
-		  .lport	= 1102,
-		  .rport	= 1102 });
-
-	remain = split_if_spec(str, mac_out, &init->addr,
-			       &lport_str, &rport_str, NULL);
-	if (remain != NULL) {
-		printk(KERN_ERR "ucast_setup - Extra garbage on "
-		       "specification : '%s'\n", remain);
-		return 0;
-	}
-
-	if (lport_str != NULL) {
-		init->lport = simple_strtoul(lport_str, &last, 10);
-		if ((*last != '\0') || (last == lport_str)) {
-			printk(KERN_ERR "ucast_setup - Bad listen port : "
-			       "'%s'\n", lport_str);
-			return 0;
-		}
-	}
-
-	if (rport_str != NULL) {
-		init->rport = simple_strtoul(rport_str, &last, 10);
-		if ((*last != '\0') || (last == rport_str)) {
-			printk(KERN_ERR "ucast_setup - Bad remote port : "
-			       "'%s'\n", rport_str);
-			return 0;
-		}
-	}
-
-	init->unicast = true;
-
-	printk(KERN_INFO "Configured ucast device: :%u -> %s:%u\n",
-	       init->lport, init->addr, init->rport);
-
-	return 1;
-}
-
-static struct transport mcast_transport = {
-	.list	= LIST_HEAD_INIT(mcast_transport.list),
-	.name	= "mcast",
-	.setup	= mcast_setup,
-	.user	= &umcast_user_info,
-	.kern	= &umcast_kern_info,
-	.private_size	= sizeof(struct umcast_data),
-	.setup_size	= sizeof(struct umcast_init),
-};
-
-static struct transport ucast_transport = {
-	.list	= LIST_HEAD_INIT(ucast_transport.list),
-	.name	= "ucast",
-	.setup	= ucast_setup,
-	.user	= &umcast_user_info,
-	.kern	= &umcast_kern_info,
-	.private_size	= sizeof(struct umcast_data),
-	.setup_size	= sizeof(struct umcast_init),
-};
-
-static int register_umcast(void)
-{
-	register_transport(&mcast_transport);
-	register_transport(&ucast_transport);
-	return 0;
-}
-
-late_initcall(register_umcast);
diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c
deleted file mode 100644
index 6074184bb51b..000000000000
--- a/arch/um/drivers/umcast_user.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * user-mode-linux networking multicast transport
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
- *
- * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- *
- * Licensed under the GPL.
- *
- */
-
-#include <unistd.h>
-#include <errno.h>
-#include <netinet/in.h>
-#include "umcast.h"
-#include <net_user.h>
-#include <um_malloc.h>
-
-static struct sockaddr_in *new_addr(char *addr, unsigned short port)
-{
-	struct sockaddr_in *sin;
-
-	sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL);
-	if (sin == NULL) {
-		printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in "
-		       "failed\n");
-		return NULL;
-	}
-	sin->sin_family = AF_INET;
-	if (addr)
-		sin->sin_addr.s_addr = in_aton(addr);
-	else
-		sin->sin_addr.s_addr = INADDR_ANY;
-	sin->sin_port = htons(port);
-	return sin;
-}
-
-static int umcast_user_init(void *data, void *dev)
-{
-	struct umcast_data *pri = data;
-
-	pri->remote_addr = new_addr(pri->addr, pri->rport);
-	if (pri->unicast)
-		pri->listen_addr = new_addr(NULL, pri->lport);
-	else
-		pri->listen_addr = pri->remote_addr;
-	pri->dev = dev;
-	return 0;
-}
-
-static void umcast_remove(void *data)
-{
-	struct umcast_data *pri = data;
-
-	kfree(pri->listen_addr);
-	if (pri->unicast)
-		kfree(pri->remote_addr);
-	pri->listen_addr = pri->remote_addr = NULL;
-}
-
-static int umcast_open(void *data)
-{
-	struct umcast_data *pri = data;
-	struct sockaddr_in *lsin = pri->listen_addr;
-	struct sockaddr_in *rsin = pri->remote_addr;
-	struct ip_mreq mreq;
-	int fd, yes = 1, err = -EINVAL;
-
-
-	if ((!pri->unicast && lsin->sin_addr.s_addr == 0) ||
-	    (rsin->sin_addr.s_addr == 0) ||
-	    (lsin->sin_port == 0) || (rsin->sin_port == 0))
-		goto out;
-
-	fd = socket(AF_INET, SOCK_DGRAM, 0);
-
-	if (fd < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "umcast_open : data socket failed, "
-		       "errno = %d\n", errno);
-		goto out;
-	}
-
-	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "umcast_open: SO_REUSEADDR failed, "
-		       "errno = %d\n", errno);
-		goto out_close;
-	}
-
-	if (!pri->unicast) {
-		/* set ttl according to config */
-		if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl,
-			       sizeof(pri->ttl)) < 0) {
-			err = -errno;
-			printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_TTL "
-			       "failed, error = %d\n", errno);
-			goto out_close;
-		}
-
-		/* set LOOP, so data does get fed back to local sockets */
-		if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP,
-			       &yes, sizeof(yes)) < 0) {
-			err = -errno;
-			printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_LOOP "
-			       "failed, error = %d\n", errno);
-			goto out_close;
-		}
-	}
-
-	/* bind socket to the address */
-	if (bind(fd, (struct sockaddr *) lsin, sizeof(*lsin)) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "umcast_open : data bind failed, "
-		       "errno = %d\n", errno);
-		goto out_close;
-	}
-
-	if (!pri->unicast) {
-		/* subscribe to the multicast group */
-		mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
-		mreq.imr_interface.s_addr = 0;
-		if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP,
-			       &mreq, sizeof(mreq)) < 0) {
-			err = -errno;
-			printk(UM_KERN_ERR "umcast_open: IP_ADD_MEMBERSHIP "
-			       "failed, error = %d\n", errno);
-			printk(UM_KERN_ERR "There appears not to be a "
-			       "multicast-capable network interface on the "
-			       "host.\n");
-			printk(UM_KERN_ERR "eth0 should be configured in order "
-			       "to use the multicast transport.\n");
-			goto out_close;
-		}
-	}
-
-	return fd;
-
- out_close:
-	close(fd);
- out:
-	return err;
-}
-
-static void umcast_close(int fd, void *data)
-{
-	struct umcast_data *pri = data;
-
-	if (!pri->unicast) {
-		struct ip_mreq mreq;
-		struct sockaddr_in *lsin = pri->listen_addr;
-
-		mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
-		mreq.imr_interface.s_addr = 0;
-		if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP,
-			       &mreq, sizeof(mreq)) < 0) {
-			printk(UM_KERN_ERR "umcast_close: IP_DROP_MEMBERSHIP "
-			       "failed, error = %d\n", errno);
-		}
-	}
-
-	close(fd);
-}
-
-int umcast_user_write(int fd, void *buf, int len, struct umcast_data *pri)
-{
-	struct sockaddr_in *data_addr = pri->remote_addr;
-
-	return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
-}
-
-const struct net_user_info umcast_user_info = {
-	.init	= umcast_user_init,
-	.open	= umcast_open,
-	.close	= umcast_close,
-	.remove	= umcast_remove,
-	.add_address	= NULL,
-	.delete_address = NULL,
-	.mtu	= ETH_MAX_PACKET,
-	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/drivers/vde.h b/arch/um/drivers/vde.h
deleted file mode 100644
index fc3a05902ba1..000000000000
--- a/arch/um/drivers/vde.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- * Licensed under the GPL.
- */
-
-#ifndef __UM_VDE_H__
-#define __UM_VDE_H__
-
-struct vde_data {
-	char *vde_switch;
-	char *descr;
-	void *args;
-	void *conn;
-	void *dev;
-};
-
-struct vde_init {
-	char *vde_switch;
-	char *descr;
-	int port;
-	char *group;
-	int mode;
-};
-
-extern const struct net_user_info vde_user_info;
-
-extern void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init);
-
-extern int vde_user_read(void *conn, void *buf, int len);
-extern int vde_user_write(void *conn, void *buf, int len);
-
-#endif
diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c
deleted file mode 100644
index 6a365fadc7c4..000000000000
--- a/arch/um/drivers/vde_kern.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- * Licensed under the GPL.
- *
- * Transport usage:
- *  ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description>
- *
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <net_kern.h>
-#include <net_user.h>
-#include "vde.h"
-
-static void vde_init(struct net_device *dev, void *data)
-{
-	struct vde_init *init = data;
-	struct uml_net_private *pri;
-	struct vde_data *vpri;
-
-	pri = netdev_priv(dev);
-	vpri = (struct vde_data *) pri->user;
-
-	vpri->vde_switch = init->vde_switch;
-	vpri->descr = init->descr ? init->descr : "UML vde_transport";
-	vpri->args = NULL;
-	vpri->conn = NULL;
-	vpri->dev = dev;
-
-	printk("vde backend - %s, ", vpri->vde_switch ?
-	       vpri->vde_switch : "(default socket)");
-
-	vde_init_libstuff(vpri, init);
-
-	printk("\n");
-}
-
-static int vde_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	struct vde_data *pri = (struct vde_data *) &lp->user;
-
-	if (pri->conn != NULL)
-		return vde_user_read(pri->conn, skb_mac_header(skb),
-				     skb->dev->mtu + ETH_HEADER_OTHER);
-
-	printk(KERN_ERR "vde_read - we have no VDECONN to read from");
-	return -EBADF;
-}
-
-static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	struct vde_data *pri = (struct vde_data *) &lp->user;
-
-	if (pri->conn != NULL)
-		return vde_user_write((void *)pri->conn, skb->data,
-				      skb->len);
-
-	printk(KERN_ERR "vde_write - we have no VDECONN to write to");
-	return -EBADF;
-}
-
-static const struct net_kern_info vde_kern_info = {
-	.init			= vde_init,
-	.protocol		= eth_protocol,
-	.read			= vde_read,
-	.write			= vde_write,
-};
-
-static int vde_setup(char *str, char **mac_out, void *data)
-{
-	struct vde_init *init = data;
-	char *remain, *port_str = NULL, *mode_str = NULL, *last;
-
-	*init = ((struct vde_init)
-		{ .vde_switch		= NULL,
-		  .descr		= NULL,
-		  .port			= 0,
-		  .group		= NULL,
-		  .mode			= 0 });
-
-	remain = split_if_spec(str, &init->vde_switch, mac_out, &port_str,
-				&init->group, &mode_str, &init->descr, NULL);
-
-	if (remain != NULL)
-		printk(KERN_WARNING "vde_setup - Ignoring extra data :"
-		       "'%s'\n", remain);
-
-	if (port_str != NULL) {
-		init->port = simple_strtoul(port_str, &last, 10);
-		if ((*last != '\0') || (last == port_str)) {
-			printk(KERN_ERR "vde_setup - Bad port : '%s'\n",
-						port_str);
-			return 0;
-		}
-	}
-
-	if (mode_str != NULL) {
-		init->mode = simple_strtoul(mode_str, &last, 8);
-		if ((*last != '\0') || (last == mode_str)) {
-			printk(KERN_ERR "vde_setup - Bad mode : '%s'\n",
-						mode_str);
-			return 0;
-		}
-	}
-
-	printk(KERN_INFO "Configured vde device: %s\n", init->vde_switch ?
-	       init->vde_switch : "(default socket)");
-
-	return 1;
-}
-
-static struct transport vde_transport = {
-	.list 		= LIST_HEAD_INIT(vde_transport.list),
-	.name 		= "vde",
-	.setup  	= vde_setup,
-	.user 		= &vde_user_info,
-	.kern 		= &vde_kern_info,
-	.private_size 	= sizeof(struct vde_data),
-	.setup_size 	= sizeof(struct vde_init),
-};
-
-static int register_vde(void)
-{
-	register_transport(&vde_transport);
-	return 0;
-}
-
-late_initcall(register_vde);
diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c
deleted file mode 100644
index 64cb630d1157..000000000000
--- a/arch/um/drivers/vde_user.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
- * Licensed under the GPL.
- */
-
-#include <stddef.h>
-#include <errno.h>
-#include <libvdeplug.h>
-#include <net_user.h>
-#include <um_malloc.h>
-#include "vde.h"
-
-static int vde_user_init(void *data, void *dev)
-{
-	struct vde_data *pri = data;
-	VDECONN *conn = NULL;
-	int err = -EINVAL;
-
-	pri->dev = dev;
-
-	conn = vde_open(pri->vde_switch, pri->descr, pri->args);
-
-	if (conn == NULL) {
-		err = -errno;
-		printk(UM_KERN_ERR "vde_user_init: vde_open failed, "
-		       "errno = %d\n", errno);
-		return err;
-	}
-
-	printk(UM_KERN_INFO "vde backend - connection opened\n");
-
-	pri->conn = conn;
-
-	return 0;
-}
-
-static int vde_user_open(void *data)
-{
-	struct vde_data *pri = data;
-
-	if (pri->conn != NULL)
-		return vde_datafd(pri->conn);
-
-	printk(UM_KERN_WARNING "vde_open - we have no VDECONN to open");
-	return -EINVAL;
-}
-
-static void vde_remove(void *data)
-{
-	struct vde_data *pri = data;
-
-	if (pri->conn != NULL) {
-		printk(UM_KERN_INFO "vde backend - closing connection\n");
-		vde_close(pri->conn);
-		pri->conn = NULL;
-		kfree(pri->args);
-		pri->args = NULL;
-		return;
-	}
-
-	printk(UM_KERN_WARNING "vde_remove - we have no VDECONN to remove");
-}
-
-const struct net_user_info vde_user_info = {
-	.init		= vde_user_init,
-	.open		= vde_user_open,
-	.close	 	= NULL,
-	.remove	 	= vde_remove,
-	.add_address	= NULL,
-	.delete_address = NULL,
-	.mtu		= ETH_MAX_PACKET,
-	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
-
-void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init)
-{
-	struct vde_open_args *args;
-
-	vpri->args = uml_kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL);
-	if (vpri->args == NULL) {
-		printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args "
-		       "allocation failed");
-		return;
-	}
-
-	args = vpri->args;
-
-	args->port = init->port;
-	args->group = init->group;
-	args->mode = init->mode ? init->mode : 0700;
-
-	args->port ?  printk("port %d", args->port) :
-		printk("undefined port");
-}
-
-int vde_user_read(void *conn, void *buf, int len)
-{
-	VDECONN *vconn = conn;
-	int rv;
-
-	if (vconn == NULL)
-		return 0;
-
-	rv = vde_recv(vconn, buf, len, 0);
-	if (rv < 0) {
-		if (errno == EAGAIN)
-			return 0;
-		return -errno;
-	}
-	else if (rv == 0)
-		return -ENOTCONN;
-
-	return rv;
-}
-
-int vde_user_write(void *conn, void *buf, int len)
-{
-	VDECONN *vconn = conn;
-
-	if (vconn == NULL)
-		return 0;
-
-	return vde_send(vconn, buf, len, 0);
-}
-
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
new file mode 100644
index 000000000000..25d9258fa592
--- /dev/null
+++ b/arch/um/drivers/vector_kern.c
@@ -0,0 +1,1771 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 - 2019 Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
+ * James Leu (jleu@mindspring.net).
+ * Copyright (C) 2001 by various other people who didn't put their name here.
+ */
+
+#define pr_fmt(fmt) "uml-vector: " fmt
+
+#include <linux/memblock.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/firmware.h>
+#include <linux/fs.h>
+#include <asm/atomic.h>
+#include <uapi/linux/filter.h>
+#include <init.h>
+#include <irq_kern.h>
+#include <irq_user.h>
+#include <os.h>
+#include "mconsole_kern.h"
+#include "vector_user.h"
+#include "vector_kern.h"
+
+/*
+ * Adapted from network devices with the following major changes:
+ * All transports are static - simplifies the code significantly
+ * Multiple FDs/IRQs per device
+ * Vector IO optionally used for read/write, falling back to legacy
+ * based on configuration and/or availability
+ * Configuration is no longer positional - L2TPv3 and GRE require up to
+ * 10 parameters, passing this as positional is not fit for purpose.
+ * Only socket transports are supported
+ */
+
+
+#define DRIVER_NAME "uml-vector"
+struct vector_cmd_line_arg {
+	struct list_head list;
+	int unit;
+	char *arguments;
+};
+
+struct vector_device {
+	struct list_head list;
+	struct net_device *dev;
+	struct platform_device pdev;
+	int unit;
+	int opened;
+};
+
+static LIST_HEAD(vec_cmd_line);
+
+static DEFINE_SPINLOCK(vector_devices_lock);
+static LIST_HEAD(vector_devices);
+
+static int driver_registered;
+
+static void vector_eth_configure(int n, struct arglist *def);
+static int vector_mmsg_rx(struct vector_private *vp, int budget);
+
+/* Argument accessors to set variables (and/or set default values)
+ * mtu, buffer sizing, default headroom, etc
+ */
+
+#define DEFAULT_HEADROOM 2
+#define SAFETY_MARGIN 32
+#define DEFAULT_VECTOR_SIZE 64
+#define TX_SMALL_PACKET 128
+#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
+
+static const struct {
+	const char string[ETH_GSTRING_LEN];
+} ethtool_stats_keys[] = {
+	{ "rx_queue_max" },
+	{ "rx_queue_running_average" },
+	{ "tx_queue_max" },
+	{ "tx_queue_running_average" },
+	{ "rx_encaps_errors" },
+	{ "tx_timeout_count" },
+	{ "tx_restart_queue" },
+	{ "tx_kicks" },
+	{ "tx_flow_control_xon" },
+	{ "tx_flow_control_xoff" },
+	{ "rx_csum_offload_good" },
+	{ "rx_csum_offload_errors"},
+	{ "sg_ok"},
+	{ "sg_linearized"},
+};
+
+#define VECTOR_NUM_STATS	ARRAY_SIZE(ethtool_stats_keys)
+
+static void vector_reset_stats(struct vector_private *vp)
+{
+	/* We reuse the existing queue locks for stats */
+
+	/* RX stats are modified with RX head_lock held
+	 * in vector_poll.
+	 */
+
+	spin_lock(&vp->rx_queue->head_lock);
+	vp->estats.rx_queue_max = 0;
+	vp->estats.rx_queue_running_average = 0;
+	vp->estats.rx_encaps_errors = 0;
+	vp->estats.sg_ok = 0;
+	vp->estats.sg_linearized = 0;
+	spin_unlock(&vp->rx_queue->head_lock);
+
+	/* TX stats are modified with TX head_lock held
+	 * in vector_send.
+	 */
+
+	spin_lock(&vp->tx_queue->head_lock);
+	vp->estats.tx_timeout_count = 0;
+	vp->estats.tx_restart_queue = 0;
+	vp->estats.tx_kicks = 0;
+	vp->estats.tx_flow_control_xon = 0;
+	vp->estats.tx_flow_control_xoff = 0;
+	vp->estats.tx_queue_max = 0;
+	vp->estats.tx_queue_running_average = 0;
+	spin_unlock(&vp->tx_queue->head_lock);
+}
+
+static int get_mtu(struct arglist *def)
+{
+	char *mtu = uml_vector_fetch_arg(def, "mtu");
+	long result;
+
+	if (mtu != NULL) {
+		if (kstrtoul(mtu, 10, &result) == 0)
+			if ((result < (1 << 16) - 1) && (result >= 576))
+				return result;
+	}
+	return ETH_MAX_PACKET;
+}
+
+static char *get_bpf_file(struct arglist *def)
+{
+	return uml_vector_fetch_arg(def, "bpffile");
+}
+
+static bool get_bpf_flash(struct arglist *def)
+{
+	char *allow = uml_vector_fetch_arg(def, "bpfflash");
+	long result;
+
+	if (allow != NULL) {
+		if (kstrtoul(allow, 10, &result) == 0)
+			return result > 0;
+	}
+	return false;
+}
+
+static int get_depth(struct arglist *def)
+{
+	char *mtu = uml_vector_fetch_arg(def, "depth");
+	long result;
+
+	if (mtu != NULL) {
+		if (kstrtoul(mtu, 10, &result) == 0)
+			return result;
+	}
+	return DEFAULT_VECTOR_SIZE;
+}
+
+static int get_headroom(struct arglist *def)
+{
+	char *mtu = uml_vector_fetch_arg(def, "headroom");
+	long result;
+
+	if (mtu != NULL) {
+		if (kstrtoul(mtu, 10, &result) == 0)
+			return result;
+	}
+	return DEFAULT_HEADROOM;
+}
+
+static int get_req_size(struct arglist *def)
+{
+	char *gro = uml_vector_fetch_arg(def, "gro");
+	long result;
+
+	if (gro != NULL) {
+		if (kstrtoul(gro, 10, &result) == 0) {
+			if (result > 0)
+				return 65536;
+		}
+	}
+	return get_mtu(def) + ETH_HEADER_OTHER +
+		get_headroom(def) + SAFETY_MARGIN;
+}
+
+
+static int get_transport_options(struct arglist *def)
+{
+	char *transport = uml_vector_fetch_arg(def, "transport");
+	char *vector = uml_vector_fetch_arg(def, "vec");
+
+	int vec_rx = VECTOR_RX;
+	int vec_tx = VECTOR_TX;
+	long parsed;
+	int result = 0;
+
+	if (transport == NULL)
+		return -EINVAL;
+
+	if (vector != NULL) {
+		if (kstrtoul(vector, 10, &parsed) == 0) {
+			if (parsed == 0) {
+				vec_rx = 0;
+				vec_tx = 0;
+			}
+		}
+	}
+
+	if (get_bpf_flash(def))
+		result = VECTOR_BPF_FLASH;
+
+	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+		return result;
+	if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
+		return (result | vec_rx | VECTOR_BPF);
+	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+		return (result | vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
+	return (result | vec_rx | vec_tx);
+}
+
+
+/* A mini-buffer for packet drop read
+ * All of our supported transports are datagram oriented and we always
+ * read using recvmsg or recvmmsg. If we pass a buffer which is smaller
+ * than the packet size it still counts as full packet read and will
+ * clean the incoming stream to keep sigio/epoll happy
+ */
+
+#define DROP_BUFFER_SIZE 32
+
+static char *drop_buffer;
+
+
+/*
+ * Advance the mmsg queue head by n = advance. Resets the queue to
+ * maximum enqueue/dequeue-at-once capacity if possible. Called by
+ * dequeuers. Caller must hold the head_lock!
+ */
+
+static int vector_advancehead(struct vector_queue *qi, int advance)
+{
+	qi->head =
+		(qi->head + advance)
+			% qi->max_depth;
+
+
+	atomic_sub(advance, &qi->queue_depth);
+	return atomic_read(&qi->queue_depth);
+}
+
+/*	Advance the queue tail by n = advance.
+ *	This is called by enqueuers which should hold the
+ *	head lock already
+ */
+
+static int vector_advancetail(struct vector_queue *qi, int advance)
+{
+	qi->tail =
+		(qi->tail + advance)
+			% qi->max_depth;
+	atomic_add(advance, &qi->queue_depth);
+	return atomic_read(&qi->queue_depth);
+}
+
+static int prep_msg(struct vector_private *vp,
+	struct sk_buff *skb,
+	struct iovec *iov)
+{
+	int iov_index = 0;
+	int nr_frags, frag;
+	skb_frag_t *skb_frag;
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (nr_frags > MAX_IOV_SIZE) {
+		if (skb_linearize(skb) != 0)
+			goto drop;
+	}
+	if (vp->header_size > 0) {
+		iov[iov_index].iov_len = vp->header_size;
+		vp->form_header(iov[iov_index].iov_base, skb, vp);
+		iov_index++;
+	}
+	iov[iov_index].iov_base = skb->data;
+	if (nr_frags > 0) {
+		iov[iov_index].iov_len = skb->len - skb->data_len;
+		vp->estats.sg_ok++;
+	} else
+		iov[iov_index].iov_len = skb->len;
+	iov_index++;
+	for (frag = 0; frag < nr_frags; frag++) {
+		skb_frag = &skb_shinfo(skb)->frags[frag];
+		iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+		iov[iov_index].iov_len = skb_frag_size(skb_frag);
+		iov_index++;
+	}
+	return iov_index;
+drop:
+	return -1;
+}
+/*
+ * Generic vector enqueue with support for forming headers using transport
+ * specific callback. Allows GRE, L2TPv3, RAW and other transports
+ * to use a common enqueue procedure in vector mode
+ */
+
+static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
+{
+	struct vector_private *vp = netdev_priv(qi->dev);
+	int queue_depth;
+	int packet_len;
+	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+	int iov_count;
+
+	spin_lock(&qi->tail_lock);
+	queue_depth = atomic_read(&qi->queue_depth);
+
+	if (skb)
+		packet_len = skb->len;
+
+	if (queue_depth < qi->max_depth) {
+
+		*(qi->skbuff_vector + qi->tail) = skb;
+		mmsg_vector += qi->tail;
+		iov_count = prep_msg(
+			vp,
+			skb,
+			mmsg_vector->msg_hdr.msg_iov
+		);
+		if (iov_count < 1)
+			goto drop;
+		mmsg_vector->msg_hdr.msg_iovlen = iov_count;
+		mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
+		mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
+		wmb(); /* Make the packet visible to the NAPI poll thread */
+		queue_depth = vector_advancetail(qi, 1);
+	} else
+		goto drop;
+	spin_unlock(&qi->tail_lock);
+	return queue_depth;
+drop:
+	qi->dev->stats.tx_dropped++;
+	if (skb != NULL) {
+		packet_len = skb->len;
+		dev_consume_skb_any(skb);
+		netdev_completed_queue(qi->dev, 1, packet_len);
+	}
+	spin_unlock(&qi->tail_lock);
+	return queue_depth;
+}
+
+static int consume_vector_skbs(struct vector_queue *qi, int count)
+{
+	struct sk_buff *skb;
+	int skb_index;
+	int bytes_compl = 0;
+
+	for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
+		skb = *(qi->skbuff_vector + skb_index);
+		/* mark as empty to ensure correct destruction if
+		 * needed
+		 */
+		bytes_compl += skb->len;
+		*(qi->skbuff_vector + skb_index) = NULL;
+		dev_consume_skb_any(skb);
+	}
+	qi->dev->stats.tx_bytes += bytes_compl;
+	qi->dev->stats.tx_packets += count;
+	netdev_completed_queue(qi->dev, count, bytes_compl);
+	return vector_advancehead(qi, count);
+}
+
+/*
+ * Generic vector dequeue via sendmmsg with support for forming headers
+ * using transport specific callback. Allows GRE, L2TPv3, RAW and
+ * other transports to use a common dequeue procedure in vector mode
+ */
+
+
+static int vector_send(struct vector_queue *qi)
+{
+	struct vector_private *vp = netdev_priv(qi->dev);
+	struct mmsghdr *send_from;
+	int result = 0, send_len;
+
+	if (spin_trylock(&qi->head_lock)) {
+		/* update queue_depth to current value */
+		while (atomic_read(&qi->queue_depth) > 0) {
+			/* Calculate the start of the vector */
+			send_len = atomic_read(&qi->queue_depth);
+			send_from = qi->mmsg_vector;
+			send_from += qi->head;
+			/* Adjust vector size if wraparound */
+			if (send_len + qi->head > qi->max_depth)
+				send_len = qi->max_depth - qi->head;
+			/* Try to TX as many packets as possible */
+			if (send_len > 0) {
+				result = uml_vector_sendmmsg(
+					 vp->fds->tx_fd,
+					 send_from,
+					 send_len,
+					 0
+				);
+				vp->in_write_poll =
+					(result != send_len);
+			}
+			/* For some of the sendmmsg error scenarios
+			 * we may end being unsure in the TX success
+			 * for all packets. It is safer to declare
+			 * them all TX-ed and blame the network.
+			 */
+			if (result < 0) {
+				if (net_ratelimit())
+					netdev_err(vp->dev, "sendmmsg err=%i\n",
+						result);
+				vp->in_error = true;
+				result = send_len;
+			}
+			if (result > 0) {
+				consume_vector_skbs(qi, result);
+				/* This is equivalent to an TX IRQ.
+				 * Restart the upper layers to feed us
+				 * more packets.
+				 */
+				if (result > vp->estats.tx_queue_max)
+					vp->estats.tx_queue_max = result;
+				vp->estats.tx_queue_running_average =
+					(vp->estats.tx_queue_running_average + result) >> 1;
+			}
+			netif_wake_queue(qi->dev);
+			/* if TX is busy, break out of the send loop,
+			 *  poll write IRQ will reschedule xmit for us.
+			 */
+			if (result != send_len) {
+				vp->estats.tx_restart_queue++;
+				break;
+			}
+		}
+		spin_unlock(&qi->head_lock);
+	}
+	return atomic_read(&qi->queue_depth);
+}
+
+/* Queue destructor. Deliberately stateless so we can use
+ * it in queue cleanup if initialization fails.
+ */
+
+static void destroy_queue(struct vector_queue *qi)
+{
+	int i;
+	struct iovec *iov;
+	struct vector_private *vp = netdev_priv(qi->dev);
+	struct mmsghdr *mmsg_vector;
+
+	if (qi == NULL)
+		return;
+	/* deallocate any skbuffs - we rely on any unused to be
+	 * set to NULL.
+	 */
+	if (qi->skbuff_vector != NULL) {
+		for (i = 0; i < qi->max_depth; i++) {
+			if (*(qi->skbuff_vector + i) != NULL)
+				dev_kfree_skb_any(*(qi->skbuff_vector + i));
+		}
+		kfree(qi->skbuff_vector);
+	}
+	/* deallocate matching IOV structures including header buffs */
+	if (qi->mmsg_vector != NULL) {
+		mmsg_vector = qi->mmsg_vector;
+		for (i = 0; i < qi->max_depth; i++) {
+			iov = mmsg_vector->msg_hdr.msg_iov;
+			if (iov != NULL) {
+				if ((vp->header_size > 0) &&
+					(iov->iov_base != NULL))
+					kfree(iov->iov_base);
+				kfree(iov);
+			}
+			mmsg_vector++;
+		}
+		kfree(qi->mmsg_vector);
+	}
+	kfree(qi);
+}
+
+/*
+ * Queue constructor. Create a queue with a given side.
+ */
+static struct vector_queue *create_queue(
+	struct vector_private *vp,
+	int max_size,
+	int header_size,
+	int num_extra_frags)
+{
+	struct vector_queue *result;
+	int i;
+	struct iovec *iov;
+	struct mmsghdr *mmsg_vector;
+
+	result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
+	if (result == NULL)
+		return NULL;
+	result->max_depth = max_size;
+	result->dev = vp->dev;
+	result->mmsg_vector = kmalloc(
+		(sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
+	if (result->mmsg_vector == NULL)
+		goto out_mmsg_fail;
+	result->skbuff_vector = kmalloc(
+		(sizeof(void *) * max_size), GFP_KERNEL);
+	if (result->skbuff_vector == NULL)
+		goto out_skb_fail;
+
+	/* further failures can be handled safely by destroy_queue*/
+
+	mmsg_vector = result->mmsg_vector;
+	for (i = 0; i < max_size; i++) {
+		/* Clear all pointers - we use non-NULL as marking on
+		 * what to free on destruction
+		 */
+		*(result->skbuff_vector + i) = NULL;
+		mmsg_vector->msg_hdr.msg_iov = NULL;
+		mmsg_vector++;
+	}
+	mmsg_vector = result->mmsg_vector;
+	result->max_iov_frags = num_extra_frags;
+	for (i = 0; i < max_size; i++) {
+		if (vp->header_size > 0)
+			iov = kmalloc_array(3 + num_extra_frags,
+					    sizeof(struct iovec),
+					    GFP_KERNEL
+			);
+		else
+			iov = kmalloc_array(2 + num_extra_frags,
+					    sizeof(struct iovec),
+					    GFP_KERNEL
+			);
+		if (iov == NULL)
+			goto out_fail;
+		mmsg_vector->msg_hdr.msg_iov = iov;
+		mmsg_vector->msg_hdr.msg_iovlen = 1;
+		mmsg_vector->msg_hdr.msg_control = NULL;
+		mmsg_vector->msg_hdr.msg_controllen = 0;
+		mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
+		mmsg_vector->msg_hdr.msg_name = NULL;
+		mmsg_vector->msg_hdr.msg_namelen = 0;
+		if (vp->header_size > 0) {
+			iov->iov_base = kmalloc(header_size, GFP_KERNEL);
+			if (iov->iov_base == NULL)
+				goto out_fail;
+			iov->iov_len = header_size;
+			mmsg_vector->msg_hdr.msg_iovlen = 2;
+			iov++;
+		}
+		iov->iov_base = NULL;
+		iov->iov_len = 0;
+		mmsg_vector++;
+	}
+	spin_lock_init(&result->head_lock);
+	spin_lock_init(&result->tail_lock);
+	atomic_set(&result->queue_depth, 0);
+	result->head = 0;
+	result->tail = 0;
+	return result;
+out_skb_fail:
+	kfree(result->mmsg_vector);
+out_mmsg_fail:
+	kfree(result);
+	return NULL;
+out_fail:
+	destroy_queue(result);
+	return NULL;
+}
+
+/*
+ * We do not use the RX queue as a proper wraparound queue for now
+ * This is not necessary because the consumption via napi_gro_receive()
+ * happens in-line. While we can try using the return code of
+ * netif_rx() for flow control there are no drivers doing this today.
+ * For this RX specific use we ignore the tail/head locks and
+ * just read into a prepared queue filled with skbuffs.
+ */
+
+static struct sk_buff *prep_skb(
+	struct vector_private *vp,
+	struct user_msghdr *msg)
+{
+	int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+	struct sk_buff *result;
+	int iov_index = 0, len;
+	struct iovec *iov = msg->msg_iov;
+	int err, nr_frags, frag;
+	skb_frag_t *skb_frag;
+
+	if (vp->req_size <= linear)
+		len = linear;
+	else
+		len = vp->req_size;
+	result = alloc_skb_with_frags(
+		linear,
+		len - vp->max_packet,
+		3,
+		&err,
+		GFP_ATOMIC
+	);
+	if (vp->header_size > 0)
+		iov_index++;
+	if (result == NULL) {
+		iov[iov_index].iov_base = NULL;
+		iov[iov_index].iov_len = 0;
+		goto done;
+	}
+	skb_reserve(result, vp->headroom);
+	result->dev = vp->dev;
+	skb_put(result, vp->max_packet);
+	result->data_len = len - vp->max_packet;
+	result->len += len - vp->max_packet;
+	skb_reset_mac_header(result);
+	result->ip_summed = CHECKSUM_NONE;
+	iov[iov_index].iov_base = result->data;
+	iov[iov_index].iov_len = vp->max_packet;
+	iov_index++;
+
+	nr_frags = skb_shinfo(result)->nr_frags;
+	for (frag = 0; frag < nr_frags; frag++) {
+		skb_frag = &skb_shinfo(result)->frags[frag];
+		iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+		if (iov[iov_index].iov_base != NULL)
+			iov[iov_index].iov_len = skb_frag_size(skb_frag);
+		else
+			iov[iov_index].iov_len = 0;
+		iov_index++;
+	}
+done:
+	msg->msg_iovlen = iov_index;
+	return result;
+}
+
+
+/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */
+
+static void prep_queue_for_rx(struct vector_queue *qi)
+{
+	struct vector_private *vp = netdev_priv(qi->dev);
+	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+	void **skbuff_vector = qi->skbuff_vector;
+	int i, queue_depth;
+
+	queue_depth = atomic_read(&qi->queue_depth);
+
+	if (queue_depth == 0)
+		return;
+
+	/* RX is always emptied 100% during each cycle, so we do not
+	 * have to do the tail wraparound math for it.
+	 */
+
+	qi->head = qi->tail = 0;
+
+	for (i = 0; i < queue_depth; i++) {
+		/* it is OK if allocation fails - recvmmsg with NULL data in
+		 * iov argument still performs an RX, just drops the packet
+		 * This allows us stop faffing around with a "drop buffer"
+		 */
+
+		*skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
+		skbuff_vector++;
+		mmsg_vector++;
+	}
+	atomic_set(&qi->queue_depth, 0);
+}
+
+static struct vector_device *find_device(int n)
+{
+	struct vector_device *device;
+	struct list_head *ele;
+
+	spin_lock(&vector_devices_lock);
+	list_for_each(ele, &vector_devices) {
+		device = list_entry(ele, struct vector_device, list);
+		if (device->unit == n)
+			goto out;
+	}
+	device = NULL;
+ out:
+	spin_unlock(&vector_devices_lock);
+	return device;
+}
+
+static int vector_parse(char *str, int *index_out, char **str_out,
+			char **error_out)
+{
+	int n, err;
+	char *start = str;
+
+	while ((*str != ':') && (strlen(str) > 1))
+		str++;
+	if (*str != ':') {
+		*error_out = "Expected ':' after device number";
+		return -EINVAL;
+	}
+	*str = '\0';
+
+	err = kstrtouint(start, 0, &n);
+	if (err < 0) {
+		*error_out = "Bad device number";
+		return err;
+	}
+
+	str++;
+	if (find_device(n)) {
+		*error_out = "Device already configured";
+		return -EINVAL;
+	}
+
+	*index_out = n;
+	*str_out = str;
+	return 0;
+}
+
+static int vector_config(char *str, char **error_out)
+{
+	int err, n;
+	char *params;
+	struct arglist *parsed;
+
+	err = vector_parse(str, &n, &params, error_out);
+	if (err != 0)
+		return err;
+
+	/* This string is broken up and the pieces used by the underlying
+	 * driver. We should copy it to make sure things do not go wrong
+	 * later.
+	 */
+
+	params = kstrdup(params, GFP_KERNEL);
+	if (params == NULL) {
+		*error_out = "vector_config failed to strdup string";
+		return -ENOMEM;
+	}
+
+	parsed = uml_parse_vector_ifspec(params);
+
+	if (parsed == NULL) {
+		*error_out = "vector_config failed to parse parameters";
+		kfree(params);
+		return -EINVAL;
+	}
+
+	vector_eth_configure(n, parsed);
+	return 0;
+}
+
+static int vector_id(char **str, int *start_out, int *end_out)
+{
+	char *end;
+	int n;
+
+	n = simple_strtoul(*str, &end, 0);
+	if ((*end != '\0') || (end == *str))
+		return -1;
+
+	*start_out = n;
+	*end_out = n;
+	*str = end;
+	return n;
+}
+
+static int vector_remove(int n, char **error_out)
+{
+	struct vector_device *vec_d;
+	struct net_device *dev;
+	struct vector_private *vp;
+
+	vec_d = find_device(n);
+	if (vec_d == NULL)
+		return -ENODEV;
+	dev = vec_d->dev;
+	vp = netdev_priv(dev);
+	if (vp->fds != NULL)
+		return -EBUSY;
+	unregister_netdev(dev);
+	platform_device_unregister(&vec_d->pdev);
+	return 0;
+}
+
+/*
+ * There is no shared per-transport initialization code, so
+ * we will just initialize each interface one by one and
+ * add them to a list
+ */
+
+static struct platform_driver uml_net_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+	},
+};
+
+
+static void vector_device_release(struct device *dev)
+{
+	struct vector_device *device =
+		container_of(dev, struct vector_device, pdev.dev);
+	struct net_device *netdev = device->dev;
+
+	list_del(&device->list);
+	kfree(device);
+	free_netdev(netdev);
+}
+
+/* Bog standard recv using recvmsg - not used normally unless the user
+ * explicitly specifies not to use recvmmsg vector RX.
+ */
+
+static int vector_legacy_rx(struct vector_private *vp)
+{
+	int pkt_len;
+	struct user_msghdr hdr;
+	struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
+	int iovpos = 0;
+	struct sk_buff *skb;
+	int header_check;
+
+	hdr.msg_name = NULL;
+	hdr.msg_namelen = 0;
+	hdr.msg_iov = (struct iovec *) &iov;
+	hdr.msg_control = NULL;
+	hdr.msg_controllen = 0;
+	hdr.msg_flags = 0;
+
+	if (vp->header_size > 0) {
+		iov[0].iov_base = vp->header_rxbuffer;
+		iov[0].iov_len = vp->header_size;
+	}
+
+	skb = prep_skb(vp, &hdr);
+
+	if (skb == NULL) {
+		/* Read a packet into drop_buffer and don't do
+		 * anything with it.
+		 */
+		iov[iovpos].iov_base = drop_buffer;
+		iov[iovpos].iov_len = DROP_BUFFER_SIZE;
+		hdr.msg_iovlen = 1;
+		vp->dev->stats.rx_dropped++;
+	}
+
+	pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
+	if (pkt_len < 0) {
+		vp->in_error = true;
+		return pkt_len;
+	}
+
+	if (skb != NULL) {
+		if (pkt_len > vp->header_size) {
+			if (vp->header_size > 0) {
+				header_check = vp->verify_header(
+					vp->header_rxbuffer, skb, vp);
+				if (header_check < 0) {
+					dev_kfree_skb_irq(skb);
+					vp->dev->stats.rx_dropped++;
+					vp->estats.rx_encaps_errors++;
+					return 0;
+				}
+				if (header_check > 0) {
+					vp->estats.rx_csum_offload_good++;
+					skb->ip_summed = CHECKSUM_UNNECESSARY;
+				}
+			}
+			pskb_trim(skb, pkt_len - vp->rx_header_size);
+			skb->protocol = eth_type_trans(skb, skb->dev);
+			vp->dev->stats.rx_bytes += skb->len;
+			vp->dev->stats.rx_packets++;
+			napi_gro_receive(&vp->napi, skb);
+		} else {
+			dev_kfree_skb_irq(skb);
+		}
+	}
+	return pkt_len;
+}
+
+/*
+ * Packet at a time TX which falls back to vector TX if the
+ * underlying transport is busy.
+ */
+
+
+
+static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
+{
+	struct iovec iov[3 + MAX_IOV_SIZE];
+	int iov_count, pkt_len = 0;
+
+	iov[0].iov_base = vp->header_txbuffer;
+	iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
+
+	if (iov_count < 1)
+		goto drop;
+
+	pkt_len = uml_vector_writev(
+		vp->fds->tx_fd,
+		(struct iovec *) &iov,
+		iov_count
+	);
+
+	if (pkt_len < 0)
+		goto drop;
+
+	netif_trans_update(vp->dev);
+	netif_wake_queue(vp->dev);
+
+	if (pkt_len > 0) {
+		vp->dev->stats.tx_bytes += skb->len;
+		vp->dev->stats.tx_packets++;
+	} else {
+		vp->dev->stats.tx_dropped++;
+	}
+	consume_skb(skb);
+	return pkt_len;
+drop:
+	vp->dev->stats.tx_dropped++;
+	consume_skb(skb);
+	if (pkt_len < 0)
+		vp->in_error = true;
+	return pkt_len;
+}
+
+/*
+ * Receive as many messages as we can in one call using the special
+ * mmsg vector matched to an skb vector which we prepared earlier.
+ */
+
+static int vector_mmsg_rx(struct vector_private *vp, int budget)
+{
+	int packet_count, i;
+	struct vector_queue *qi = vp->rx_queue;
+	struct sk_buff *skb;
+	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+	void **skbuff_vector = qi->skbuff_vector;
+	int header_check;
+
+	/* Refresh the vector and make sure it is with new skbs and the
+	 * iovs are updated to point to them.
+	 */
+
+	prep_queue_for_rx(qi);
+
+	/* Fire the Lazy Gun - get as many packets as we can in one go. */
+
+	if (budget > qi->max_depth)
+		budget = qi->max_depth;
+
+	packet_count = uml_vector_recvmmsg(
+		vp->fds->rx_fd, qi->mmsg_vector, budget, 0);
+
+	if (packet_count < 0)
+		vp->in_error = true;
+
+	if (packet_count <= 0)
+		return packet_count;
+
+	/* We treat packet processing as enqueue, buffer refresh as dequeue
+	 * The queue_depth tells us how many buffers have been used and how
+	 * many do we need to prep the next time prep_queue_for_rx() is called.
+	 */
+
+	atomic_add(packet_count, &qi->queue_depth);
+
+	for (i = 0; i < packet_count; i++) {
+		skb = (*skbuff_vector);
+		if (mmsg_vector->msg_len > vp->header_size) {
+			if (vp->header_size > 0) {
+				header_check = vp->verify_header(
+					mmsg_vector->msg_hdr.msg_iov->iov_base,
+					skb,
+					vp
+				);
+				if (header_check < 0) {
+				/* Overlay header failed to verify - discard.
+				 * We can actually keep this skb and reuse it,
+				 * but that will make the prep logic too
+				 * complex.
+				 */
+					dev_kfree_skb_irq(skb);
+					vp->estats.rx_encaps_errors++;
+					continue;
+				}
+				if (header_check > 0) {
+					vp->estats.rx_csum_offload_good++;
+					skb->ip_summed = CHECKSUM_UNNECESSARY;
+				}
+			}
+			pskb_trim(skb,
+				mmsg_vector->msg_len - vp->rx_header_size);
+			skb->protocol = eth_type_trans(skb, skb->dev);
+			/*
+			 * We do not need to lock on updating stats here
+			 * The interrupt loop is non-reentrant.
+			 */
+			vp->dev->stats.rx_bytes += skb->len;
+			vp->dev->stats.rx_packets++;
+			napi_gro_receive(&vp->napi, skb);
+		} else {
+			/* Overlay header too short to do anything - discard.
+			 * We can actually keep this skb and reuse it,
+			 * but that will make the prep logic too complex.
+			 */
+			if (skb != NULL)
+				dev_kfree_skb_irq(skb);
+		}
+		(*skbuff_vector) = NULL;
+		/* Move to the next buffer element */
+		mmsg_vector++;
+		skbuff_vector++;
+	}
+	if (packet_count > 0) {
+		if (vp->estats.rx_queue_max < packet_count)
+			vp->estats.rx_queue_max = packet_count;
+		vp->estats.rx_queue_running_average =
+			(vp->estats.rx_queue_running_average + packet_count) >> 1;
+	}
+	return packet_count;
+}
+
+static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	int queue_depth = 0;
+
+	if (vp->in_error) {
+		deactivate_fd(vp->fds->rx_fd, vp->rx_irq);
+		if ((vp->fds->rx_fd != vp->fds->tx_fd) && (vp->tx_irq != 0))
+			deactivate_fd(vp->fds->tx_fd, vp->tx_irq);
+		return NETDEV_TX_BUSY;
+	}
+
+	if ((vp->options & VECTOR_TX) == 0) {
+		writev_tx(vp, skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* We do BQL only in the vector path, no point doing it in
+	 * packet at a time mode as there is no device queue
+	 */
+
+	netdev_sent_queue(vp->dev, skb->len);
+	queue_depth = vector_enqueue(vp->tx_queue, skb);
+
+	if (queue_depth < vp->tx_queue->max_depth && netdev_xmit_more()) {
+		mod_timer(&vp->tl, vp->coalesce);
+		return NETDEV_TX_OK;
+	} else {
+		queue_depth = vector_send(vp->tx_queue);
+		if (queue_depth > 0)
+			napi_schedule(&vp->napi);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct vector_private *vp = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return IRQ_NONE;
+	napi_schedule(&vp->napi);
+	return IRQ_HANDLED;
+
+}
+
+static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct vector_private *vp = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return IRQ_NONE;
+	/* We need to pay attention to it only if we got
+	 * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
+	 * we ignore it. In the future, it may be worth
+	 * it to improve the IRQ controller a bit to make
+	 * tweaking the IRQ mask less costly
+	 */
+
+	napi_schedule(&vp->napi);
+	return IRQ_HANDLED;
+
+}
+
+static int irq_rr;
+
+static int vector_net_close(struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+
+	netif_stop_queue(dev);
+	timer_delete(&vp->tl);
+
+	vp->opened = false;
+
+	if (vp->fds == NULL)
+		return 0;
+
+	/* Disable and free all IRQS */
+	if (vp->rx_irq > 0) {
+		um_free_irq(vp->rx_irq, dev);
+		vp->rx_irq = 0;
+	}
+	if (vp->tx_irq > 0) {
+		um_free_irq(vp->tx_irq, dev);
+		vp->tx_irq = 0;
+	}
+	napi_disable(&vp->napi);
+	netif_napi_del(&vp->napi);
+	if (vp->fds->rx_fd > 0) {
+		if (vp->bpf)
+			uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
+		os_close_file(vp->fds->rx_fd);
+		vp->fds->rx_fd = -1;
+	}
+	if (vp->fds->tx_fd > 0) {
+		os_close_file(vp->fds->tx_fd);
+		vp->fds->tx_fd = -1;
+	}
+	if (vp->bpf != NULL)
+		kfree(vp->bpf->filter);
+	kfree(vp->bpf);
+	vp->bpf = NULL;
+	kfree(vp->fds->remote_addr);
+	kfree(vp->transport_data);
+	kfree(vp->header_rxbuffer);
+	kfree(vp->header_txbuffer);
+	if (vp->rx_queue != NULL)
+		destroy_queue(vp->rx_queue);
+	if (vp->tx_queue != NULL)
+		destroy_queue(vp->tx_queue);
+	kfree(vp->fds);
+	vp->fds = NULL;
+	vp->in_error = false;
+	return 0;
+}
+
+static int vector_poll(struct napi_struct *napi, int budget)
+{
+	struct vector_private *vp = container_of(napi, struct vector_private, napi);
+	int work_done = 0;
+	int err;
+	bool tx_enqueued = false;
+
+	if ((vp->options & VECTOR_TX) != 0)
+		tx_enqueued = (vector_send(vp->tx_queue) > 0);
+	spin_lock(&vp->rx_queue->head_lock);
+	if ((vp->options & VECTOR_RX) > 0)
+		err = vector_mmsg_rx(vp, budget);
+	else {
+		err = vector_legacy_rx(vp);
+		if (err > 0)
+			err = 1;
+	}
+	spin_unlock(&vp->rx_queue->head_lock);
+	if (err > 0)
+		work_done += err;
+
+	if (tx_enqueued || err > 0)
+		napi_schedule(napi);
+	if (work_done <= budget)
+		napi_complete_done(napi, work_done);
+	return work_done;
+}
+
+static void vector_reset_tx(struct work_struct *work)
+{
+	struct vector_private *vp =
+		container_of(work, struct vector_private, reset_tx);
+	netdev_reset_queue(vp->dev);
+	netif_start_queue(vp->dev);
+	netif_wake_queue(vp->dev);
+}
+
+static int vector_net_open(struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	int err = -EINVAL;
+	struct vector_device *vdevice;
+
+	if (vp->opened)
+		return -ENXIO;
+	vp->opened = true;
+
+	vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed));
+
+	vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
+
+	if (vp->fds == NULL)
+		goto out_close;
+
+	if (build_transport_data(vp) < 0)
+		goto out_close;
+
+	if ((vp->options & VECTOR_RX) > 0) {
+		vp->rx_queue = create_queue(
+			vp,
+			get_depth(vp->parsed),
+			vp->rx_header_size,
+			MAX_IOV_SIZE
+		);
+		atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed));
+	} else {
+		vp->header_rxbuffer = kmalloc(
+			vp->rx_header_size,
+			GFP_KERNEL
+		);
+		if (vp->header_rxbuffer == NULL)
+			goto out_close;
+	}
+	if ((vp->options & VECTOR_TX) > 0) {
+		vp->tx_queue = create_queue(
+			vp,
+			get_depth(vp->parsed),
+			vp->header_size,
+			MAX_IOV_SIZE
+		);
+	} else {
+		vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
+		if (vp->header_txbuffer == NULL)
+			goto out_close;
+	}
+
+	netif_napi_add_weight(vp->dev, &vp->napi, vector_poll,
+			      get_depth(vp->parsed));
+	napi_enable(&vp->napi);
+
+	/* READ IRQ */
+	err = um_request_irq(
+		irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
+			IRQ_READ, vector_rx_interrupt,
+			IRQF_SHARED, dev->name, dev);
+	if (err < 0) {
+		netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
+		err = -ENETUNREACH;
+		goto out_close;
+	}
+	vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
+	dev->irq = irq_rr + VECTOR_BASE_IRQ;
+	irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+
+	/* WRITE IRQ - we need it only if we have vector TX */
+	if ((vp->options & VECTOR_TX) > 0) {
+		err = um_request_irq(
+			irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
+				IRQ_WRITE, vector_tx_interrupt,
+				IRQF_SHARED, dev->name, dev);
+		if (err < 0) {
+			netdev_err(dev,
+				"vector_open: failed to get tx irq(%d)\n", err);
+			err = -ENETUNREACH;
+			goto out_close;
+		}
+		vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
+		irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+	}
+
+	if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
+		if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
+			vp->options |= VECTOR_BPF;
+	}
+	if (((vp->options & VECTOR_BPF) != 0) && (vp->bpf == NULL))
+		vp->bpf = uml_vector_default_bpf(dev->dev_addr);
+
+	if (vp->bpf != NULL)
+		uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
+
+	netif_start_queue(dev);
+	vector_reset_stats(vp);
+
+	/* clear buffer - it can happen that the host side of the interface
+	 * is full when we get here. In this case, new data is never queued,
+	 * SIGIOs never arrive, and the net never works.
+	 */
+
+	napi_schedule(&vp->napi);
+
+	vdevice = find_device(vp->unit);
+	vdevice->opened = 1;
+
+	if ((vp->options & VECTOR_TX) != 0)
+		add_timer(&vp->tl);
+	return 0;
+out_close:
+	vector_net_close(dev);
+	return err;
+}
+
+
+static void vector_net_set_multicast_list(struct net_device *dev)
+{
+	/* TODO: - we can do some BPF games here */
+	return;
+}
+
+static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+	struct vector_private *vp = netdev_priv(dev);
+
+	vp->estats.tx_timeout_count++;
+	netif_trans_update(dev);
+	schedule_work(&vp->reset_tx);
+}
+
+static netdev_features_t vector_fix_features(struct net_device *dev,
+	netdev_features_t features)
+{
+	features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+	return features;
+}
+
+static int vector_set_features(struct net_device *dev,
+	netdev_features_t features)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	/* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
+	 * no way to negotiate it on raw sockets, so we can change
+	 * only our side.
+	 */
+	if (features & NETIF_F_GRO)
+		/* All new frame buffers will be GRO-sized */
+		vp->req_size = 65536;
+	else
+		/* All new frame buffers will be normal sized */
+		vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+	return 0;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void vector_net_poll_controller(struct net_device *dev)
+{
+	disable_irq(dev->irq);
+	vector_rx_interrupt(dev->irq, dev);
+	enable_irq(dev->irq);
+}
+#endif
+
+static void vector_net_get_drvinfo(struct net_device *dev,
+				struct ethtool_drvinfo *info)
+{
+	strscpy(info->driver, DRIVER_NAME);
+}
+
+static int vector_net_load_bpf_flash(struct net_device *dev,
+				struct ethtool_flash *efl)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	struct vector_device *vdevice;
+	const struct firmware *fw;
+	int result = 0;
+
+	if (!(vp->options & VECTOR_BPF_FLASH)) {
+		netdev_err(dev, "loading firmware not permitted: %s\n", efl->data);
+		return -1;
+	}
+
+	if (vp->bpf != NULL) {
+		if (vp->opened)
+			uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
+		kfree(vp->bpf->filter);
+		vp->bpf->filter = NULL;
+	} else {
+		vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC);
+		if (vp->bpf == NULL) {
+			netdev_err(dev, "failed to allocate memory for firmware\n");
+			goto flash_fail;
+		}
+	}
+
+	vdevice = find_device(vp->unit);
+
+	if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
+		goto flash_fail;
+
+	vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_ATOMIC);
+	if (!vp->bpf->filter)
+		goto free_buffer;
+
+	vp->bpf->len = fw->size / sizeof(struct sock_filter);
+	release_firmware(fw);
+
+	if (vp->opened)
+		result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
+
+	return result;
+
+free_buffer:
+	release_firmware(fw);
+
+flash_fail:
+	if (vp->bpf != NULL)
+		kfree(vp->bpf->filter);
+	kfree(vp->bpf);
+	vp->bpf = NULL;
+	return -1;
+}
+
+static void vector_get_ringparam(struct net_device *netdev,
+				 struct ethtool_ringparam *ring,
+				 struct kernel_ethtool_ringparam *kernel_ring,
+				 struct netlink_ext_ack *extack)
+{
+	struct vector_private *vp = netdev_priv(netdev);
+
+	ring->rx_max_pending = vp->rx_queue->max_depth;
+	ring->tx_max_pending = vp->tx_queue->max_depth;
+	ring->rx_pending = vp->rx_queue->max_depth;
+	ring->tx_pending = vp->tx_queue->max_depth;
+}
+
+static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+	switch (stringset) {
+	case ETH_SS_TEST:
+		*buf = '\0';
+		break;
+	case ETH_SS_STATS:
+		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+static int vector_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return 0;
+	case ETH_SS_STATS:
+		return VECTOR_NUM_STATS;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void vector_get_ethtool_stats(struct net_device *dev,
+	struct ethtool_stats *estats,
+	u64 *tmp_stats)
+{
+	struct vector_private *vp = netdev_priv(dev);
+
+	/* Stats are modified in the dequeue portions of
+	 * rx/tx which are protected by the head locks
+	 * grabbing these locks here ensures they are up
+	 * to date.
+	 */
+
+	spin_lock(&vp->tx_queue->head_lock);
+	spin_lock(&vp->rx_queue->head_lock);
+	memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
+	spin_unlock(&vp->rx_queue->head_lock);
+	spin_unlock(&vp->tx_queue->head_lock);
+}
+
+static int vector_get_coalesce(struct net_device *netdev,
+			       struct ethtool_coalesce *ec,
+			       struct kernel_ethtool_coalesce *kernel_coal,
+			       struct netlink_ext_ack *extack)
+{
+	struct vector_private *vp = netdev_priv(netdev);
+
+	ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
+	return 0;
+}
+
+static int vector_set_coalesce(struct net_device *netdev,
+			       struct ethtool_coalesce *ec,
+			       struct kernel_ethtool_coalesce *kernel_coal,
+			       struct netlink_ext_ack *extack)
+{
+	struct vector_private *vp = netdev_priv(netdev);
+
+	vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
+	if (vp->coalesce == 0)
+		vp->coalesce = 1;
+	return 0;
+}
+
+static const struct ethtool_ops vector_net_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_TX_USECS,
+	.get_drvinfo	= vector_net_get_drvinfo,
+	.get_link	= ethtool_op_get_link,
+	.get_ts_info	= ethtool_op_get_ts_info,
+	.get_ringparam	= vector_get_ringparam,
+	.get_strings	= vector_get_strings,
+	.get_sset_count	= vector_get_sset_count,
+	.get_ethtool_stats = vector_get_ethtool_stats,
+	.get_coalesce	= vector_get_coalesce,
+	.set_coalesce	= vector_set_coalesce,
+	.flash_device	= vector_net_load_bpf_flash,
+};
+
+
+static const struct net_device_ops vector_netdev_ops = {
+	.ndo_open		= vector_net_open,
+	.ndo_stop		= vector_net_close,
+	.ndo_start_xmit		= vector_net_start_xmit,
+	.ndo_set_rx_mode	= vector_net_set_multicast_list,
+	.ndo_tx_timeout		= vector_net_tx_timeout,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_fix_features	= vector_fix_features,
+	.ndo_set_features	= vector_set_features,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller = vector_net_poll_controller,
+#endif
+};
+
+static void vector_timer_expire(struct timer_list *t)
+{
+	struct vector_private *vp = timer_container_of(vp, t, tl);
+
+	vp->estats.tx_kicks++;
+	napi_schedule(&vp->napi);
+}
+
+static void vector_setup_etheraddr(struct net_device *dev, char *str)
+{
+	u8 addr[ETH_ALEN];
+
+	if (str == NULL)
+		goto random;
+
+	if (!mac_pton(str, addr)) {
+		netdev_err(dev,
+			"Failed to parse '%s' as an ethernet address\n", str);
+		goto random;
+	}
+	if (is_multicast_ether_addr(addr)) {
+		netdev_err(dev,
+			"Attempt to assign a multicast ethernet address to a device disallowed\n");
+		goto random;
+	}
+	if (!is_valid_ether_addr(addr)) {
+		netdev_err(dev,
+			"Attempt to assign an invalid ethernet address to a device disallowed\n");
+		goto random;
+	}
+	if (!is_local_ether_addr(addr)) {
+		netdev_warn(dev, "Warning: Assigning a globally valid ethernet address to a device\n");
+		netdev_warn(dev, "You should set the 2nd rightmost bit in the first byte of the MAC,\n");
+		netdev_warn(dev, "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
+			addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], addr[5]);
+	}
+	eth_hw_addr_set(dev, addr);
+	return;
+
+random:
+	netdev_info(dev, "Choosing a random ethernet address\n");
+	eth_hw_addr_random(dev);
+}
+
+static void vector_eth_configure(
+		int n,
+		struct arglist *def
+	)
+{
+	struct vector_device *device;
+	struct net_device *dev;
+	struct vector_private *vp;
+	int err;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (device == NULL) {
+		pr_err("Failed to allocate struct vector_device for vec%d\n", n);
+		return;
+	}
+	dev = alloc_etherdev(sizeof(struct vector_private));
+	if (dev == NULL) {
+		pr_err("Failed to allocate struct net_device for vec%d\n", n);
+		goto out_free_device;
+	}
+
+	dev->mtu = get_mtu(def);
+
+	INIT_LIST_HEAD(&device->list);
+	device->unit = n;
+
+	/* If this name ends up conflicting with an existing registered
+	 * netdevice, that is OK, register_netdev{,ice}() will notice this
+	 * and fail.
+	 */
+	snprintf(dev->name, sizeof(dev->name), "vec%d", n);
+	vector_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
+	vp = netdev_priv(dev);
+
+	/* sysfs register */
+	if (!driver_registered) {
+		platform_driver_register(&uml_net_driver);
+		driver_registered = 1;
+	}
+	device->pdev.id = n;
+	device->pdev.name = DRIVER_NAME;
+	device->pdev.dev.release = vector_device_release;
+	dev_set_drvdata(&device->pdev.dev, device);
+	if (platform_device_register(&device->pdev))
+		goto out_free_netdev;
+	SET_NETDEV_DEV(dev, &device->pdev.dev);
+
+	device->dev = dev;
+
+	INIT_LIST_HEAD(&vp->list);
+	vp->dev		= dev;
+	vp->unit	= n;
+	vp->options	= get_transport_options(def);
+	vp->parsed	= def;
+	vp->max_packet	= get_mtu(def) + ETH_HEADER_OTHER;
+	/*
+	 * TODO - we need to calculate headroom so that ip header
+	 * is 16 byte aligned all the time
+	 */
+	vp->headroom	= get_headroom(def);
+	vp->coalesce	= 2;
+	vp->req_size	= get_req_size(def);
+
+	dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
+	INIT_WORK(&vp->reset_tx, vector_reset_tx);
+
+	timer_setup(&vp->tl, vector_timer_expire, 0);
+
+	/* FIXME */
+	dev->netdev_ops = &vector_netdev_ops;
+	dev->ethtool_ops = &vector_net_ethtool_ops;
+	dev->watchdog_timeo = (HZ >> 1);
+	/* primary IRQ - fixme */
+	dev->irq = 0; /* we will adjust this once opened */
+
+	rtnl_lock();
+	err = register_netdevice(dev);
+	rtnl_unlock();
+	if (err)
+		goto out_undo_user_init;
+
+	spin_lock(&vector_devices_lock);
+	list_add(&device->list, &vector_devices);
+	spin_unlock(&vector_devices_lock);
+
+	return;
+
+out_undo_user_init:
+	return;
+out_free_netdev:
+	free_netdev(dev);
+out_free_device:
+	kfree(device);
+}
+
+
+
+
+/*
+ * Invoked late in the init
+ */
+
+static int __init vector_init(void)
+{
+	struct list_head *ele;
+	struct vector_cmd_line_arg *def;
+	struct arglist *parsed;
+
+	list_for_each(ele, &vec_cmd_line) {
+		def = list_entry(ele, struct vector_cmd_line_arg, list);
+		parsed = uml_parse_vector_ifspec(def->arguments);
+		if (parsed != NULL)
+			vector_eth_configure(def->unit, parsed);
+	}
+	return 0;
+}
+
+
+/* Invoked at initial argument parsing, only stores
+ * arguments until a proper vector_init is called
+ * later
+ */
+
+static int __init vector_setup(char *str)
+{
+	char *error;
+	int n, err;
+	struct vector_cmd_line_arg *new;
+
+	err = vector_parse(str, &n, &str, &error);
+	if (err) {
+		pr_err("Couldn't parse '%s': %s\n", str, error);
+		return 1;
+	}
+	new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES);
+	INIT_LIST_HEAD(&new->list);
+	new->unit = n;
+	new->arguments = str;
+	list_add_tail(&new->list, &vec_cmd_line);
+	return 1;
+}
+
+__setup("vec", vector_setup);
+__uml_help(vector_setup,
+"vec[0-9]+:<option>=<value>,<option>=<value>\n"
+"    Configure a vector io network device.\n\n"
+);
+
+late_initcall(vector_init);
+
+static struct mc_device vector_mc = {
+	.list		= LIST_HEAD_INIT(vector_mc.list),
+	.name		= "vec",
+	.config		= vector_config,
+	.get_config	= NULL,
+	.id		= vector_id,
+	.remove		= vector_remove,
+};
+
+#ifdef CONFIG_INET
+static int vector_inetaddr_event(
+	struct notifier_block *this,
+	unsigned long event,
+	void *ptr)
+{
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block vector_inetaddr_notifier = {
+	.notifier_call		= vector_inetaddr_event,
+};
+
+static void inet_register(void)
+{
+	register_inetaddr_notifier(&vector_inetaddr_notifier);
+}
+#else
+static inline void inet_register(void)
+{
+}
+#endif
+
+static int vector_net_init(void)
+{
+	mconsole_register_dev(&vector_mc);
+	inet_register();
+	return 0;
+}
+
+__initcall(vector_net_init);
+
+
+
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
new file mode 100644
index 000000000000..417834793658
--- /dev/null
+++ b/arch/um/drivers/vector_kern.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+
+#ifndef __UM_VECTOR_KERN_H
+#define __UM_VECTOR_KERN_H
+
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+
+#include "vector_user.h"
+
+/* Queue structure specially adapted for multiple enqueue/dequeue
+ * in a mmsgrecv/mmsgsend context
+ */
+
+/* Dequeue method */
+
+#define QUEUE_SENDMSG 0
+#define QUEUE_SENDMMSG 1
+
+#define VECTOR_RX 1
+#define VECTOR_TX (1 << 1)
+#define VECTOR_BPF (1 << 2)
+#define VECTOR_QDISC_BYPASS (1 << 3)
+#define VECTOR_BPF_FLASH (1 << 4)
+
+#define ETH_MAX_PACKET 1500
+#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
+
+#define MAX_FILTER_PROG (2 << 16)
+
+struct vector_queue {
+	struct mmsghdr *mmsg_vector;
+	void **skbuff_vector;
+	 /* backlink to device which owns us */
+	struct net_device *dev;
+	spinlock_t head_lock;
+	spinlock_t tail_lock;
+	atomic_t queue_depth;
+	int head, tail, max_depth, max_iov_frags;
+	short options;
+};
+
+struct vector_estats {
+	uint64_t rx_queue_max;
+	uint64_t rx_queue_running_average;
+	uint64_t tx_queue_max;
+	uint64_t tx_queue_running_average;
+	uint64_t rx_encaps_errors;
+	uint64_t tx_timeout_count;
+	uint64_t tx_restart_queue;
+	uint64_t tx_kicks;
+	uint64_t tx_flow_control_xon;
+	uint64_t tx_flow_control_xoff;
+	uint64_t rx_csum_offload_good;
+	uint64_t rx_csum_offload_errors;
+	uint64_t sg_ok;
+	uint64_t sg_linearized;
+};
+
+#define VERIFY_HEADER_NOK -1
+#define VERIFY_HEADER_OK 0
+#define VERIFY_CSUM_OK 1
+
+struct vector_private {
+	struct list_head list;
+	struct net_device *dev;
+	struct napi_struct		napi	____cacheline_aligned;
+
+	int unit;
+
+	/* Timeout timer in TX */
+
+	struct timer_list tl;
+
+	/* Scheduled "remove device" work */
+	struct work_struct reset_tx;
+	struct vector_fds *fds;
+
+	struct vector_queue *rx_queue;
+	struct vector_queue *tx_queue;
+
+	int rx_irq;
+	int tx_irq;
+
+	struct arglist *parsed;
+
+	void *transport_data; /* transport specific params if needed */
+
+	int max_packet;
+	int req_size; /* different from max packet - used for TSO */
+	int headroom;
+
+	int options;
+
+	/* remote address if any - some transports will leave this as null */
+
+	int header_size;
+	int rx_header_size;
+	int coalesce;
+
+	void *header_rxbuffer;
+	void *header_txbuffer;
+
+	int (*form_header)(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp);
+	int (*verify_header)(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp);
+
+	spinlock_t stats_lock;
+
+	bool rexmit_scheduled;
+	bool opened;
+	bool in_write_poll;
+	bool in_error;
+
+	/* guest allowed to use ethtool flash to load bpf */
+	bool bpf_via_flash;
+
+	/* ethtool stats */
+
+	struct vector_estats estats;
+	struct sock_fprog *bpf;
+
+	char user[];
+};
+
+extern int build_transport_data(struct vector_private *vp);
+
+#endif
diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c
new file mode 100644
index 000000000000..0794d23f07cb
--- /dev/null
+++ b/arch/um/drivers/vector_transports.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <asm/byteorder.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/virtio_net.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_byteorder.h>
+#include <linux/netdev_features.h>
+#include "vector_user.h"
+#include "vector_kern.h"
+
+#define GOOD_LINEAR 512
+#define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface"
+
+struct gre_minimal_header {
+	uint16_t header;
+	uint16_t arptype;
+};
+
+
+struct uml_gre_data {
+	uint32_t rx_key;
+	uint32_t tx_key;
+	uint32_t sequence;
+
+	bool ipv6;
+	bool has_sequence;
+	bool pin_sequence;
+	bool checksum;
+	bool key;
+	struct gre_minimal_header expected_header;
+
+	uint32_t checksum_offset;
+	uint32_t key_offset;
+	uint32_t sequence_offset;
+
+};
+
+struct uml_l2tpv3_data {
+	uint64_t rx_cookie;
+	uint64_t tx_cookie;
+	uint64_t rx_session;
+	uint64_t tx_session;
+	uint32_t counter;
+
+	bool udp;
+	bool ipv6;
+	bool has_counter;
+	bool pin_counter;
+	bool cookie;
+	bool cookie_is_64;
+
+	uint32_t cookie_offset;
+	uint32_t session_offset;
+	uint32_t counter_offset;
+};
+
+static int l2tpv3_form_header(uint8_t *header,
+	struct sk_buff *skb, struct vector_private *vp)
+{
+	struct uml_l2tpv3_data *td = vp->transport_data;
+	uint32_t *counter;
+
+	if (td->udp)
+		*(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET);
+	(*(uint32_t *) (header + td->session_offset)) = td->tx_session;
+
+	if (td->cookie) {
+		if (td->cookie_is_64)
+			(*(uint64_t *)(header + td->cookie_offset)) =
+				td->tx_cookie;
+		else
+			(*(uint32_t *)(header + td->cookie_offset)) =
+				td->tx_cookie;
+	}
+	if (td->has_counter) {
+		counter = (uint32_t *)(header + td->counter_offset);
+		if (td->pin_counter) {
+			*counter = 0;
+		} else {
+			td->counter++;
+			*counter = cpu_to_be32(td->counter);
+		}
+	}
+	return 0;
+}
+
+static int gre_form_header(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp)
+{
+	struct uml_gre_data *td = vp->transport_data;
+	uint32_t *sequence;
+	*((uint32_t *) header) = *((uint32_t *) &td->expected_header);
+	if (td->key)
+		(*(uint32_t *) (header + td->key_offset)) = td->tx_key;
+	if (td->has_sequence) {
+		sequence = (uint32_t *)(header + td->sequence_offset);
+		if (td->pin_sequence)
+			*sequence = 0;
+		else
+			*sequence = cpu_to_be32(++td->sequence);
+	}
+	return 0;
+}
+
+static int raw_form_header(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp)
+{
+	struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+	virtio_net_hdr_from_skb(
+		skb,
+		vheader,
+		virtio_legacy_is_little_endian(),
+		false,
+		0
+	);
+
+	return 0;
+}
+
+static int l2tpv3_verify_header(
+	uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+	struct uml_l2tpv3_data *td = vp->transport_data;
+	uint32_t *session;
+	uint64_t cookie;
+
+	if ((!td->udp) && (!td->ipv6))
+		header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+	/* we do not do a strict check for "data" packets as per
+	 * the RFC spec because the pure IP spec does not have
+	 * that anyway.
+	 */
+
+	if (td->cookie) {
+		if (td->cookie_is_64)
+			cookie = *(uint64_t *)(header + td->cookie_offset);
+		else
+			cookie = *(uint32_t *)(header + td->cookie_offset);
+		if (cookie != td->rx_cookie) {
+			if (net_ratelimit())
+				netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id");
+			return -1;
+		}
+	}
+	session = (uint32_t *) (header + td->session_offset);
+	if (*session != td->rx_session) {
+		if (net_ratelimit())
+			netdev_err(vp->dev, "uml_l2tpv3: session mismatch");
+		return -1;
+	}
+	return 0;
+}
+
+static int gre_verify_header(
+	uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+
+	uint32_t key;
+	struct uml_gre_data *td = vp->transport_data;
+
+	if (!td->ipv6)
+		header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+	if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) {
+		if (net_ratelimit())
+			netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x",
+				*((uint32_t *) &td->expected_header),
+				*((uint32_t *) header)
+			);
+		return -1;
+	}
+
+	if (td->key) {
+		key = (*(uint32_t *)(header + td->key_offset));
+		if (key != td->rx_key) {
+			if (net_ratelimit())
+				netdev_err(vp->dev, "unknown key id %0x, expecting %0x",
+						key, td->rx_key);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int raw_verify_header(
+	uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+	struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+	if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) &&
+		(vp->req_size != 65536)) {
+		if (net_ratelimit())
+			netdev_err(
+				vp->dev,
+				GSO_ERROR
+		);
+	}
+	if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0)
+		return 1;
+
+	virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian());
+	return 0;
+}
+
+static bool get_uint_param(
+	struct arglist *def, char *param, unsigned int *result)
+{
+	char *arg = uml_vector_fetch_arg(def, param);
+
+	if (arg != NULL) {
+		if (kstrtoint(arg, 0, result) == 0)
+			return true;
+	}
+	return false;
+}
+
+static bool get_ulong_param(
+	struct arglist *def, char *param, unsigned long *result)
+{
+	char *arg = uml_vector_fetch_arg(def, param);
+
+	if (arg != NULL) {
+		if (kstrtoul(arg, 0, result) == 0)
+			return true;
+		return true;
+	}
+	return false;
+}
+
+static int build_gre_transport_data(struct vector_private *vp)
+{
+	struct uml_gre_data *td;
+	int temp_int;
+	int temp_rx;
+	int temp_tx;
+
+	vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL);
+	if (vp->transport_data == NULL)
+		return -ENOMEM;
+	td = vp->transport_data;
+	td->sequence = 0;
+
+	td->expected_header.arptype = GRE_IRB;
+	td->expected_header.header = 0;
+
+	vp->form_header = &gre_form_header;
+	vp->verify_header = &gre_verify_header;
+	vp->header_size = 4;
+	td->key_offset = 4;
+	td->sequence_offset = 4;
+	td->checksum_offset = 4;
+
+	td->ipv6 = false;
+	if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+		if (temp_int > 0)
+			td->ipv6 = true;
+	}
+	td->key = false;
+	if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) {
+		if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) {
+			td->key = true;
+			td->expected_header.header |= GRE_MODE_KEY;
+			td->rx_key = cpu_to_be32(temp_rx);
+			td->tx_key = cpu_to_be32(temp_tx);
+			vp->header_size += 4;
+			td->sequence_offset += 4;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	td->sequence = false;
+	if (get_uint_param(vp->parsed, "sequence", &temp_int)) {
+		if (temp_int > 0) {
+			vp->header_size += 4;
+			td->has_sequence = true;
+			td->expected_header.header |= GRE_MODE_SEQUENCE;
+			if (get_uint_param(
+				vp->parsed, "pin_sequence", &temp_int)) {
+				if (temp_int > 0)
+					td->pin_sequence = true;
+			}
+		}
+	}
+	vp->rx_header_size = vp->header_size;
+	if (!td->ipv6)
+		vp->rx_header_size += sizeof(struct iphdr);
+	return 0;
+}
+
+static int build_l2tpv3_transport_data(struct vector_private *vp)
+{
+
+	struct uml_l2tpv3_data *td;
+	int temp_int, temp_rxs, temp_txs;
+	unsigned long temp_rx;
+	unsigned long temp_tx;
+
+	vp->transport_data = kmalloc(
+		sizeof(struct uml_l2tpv3_data), GFP_KERNEL);
+
+	if (vp->transport_data == NULL)
+		return -ENOMEM;
+
+	td = vp->transport_data;
+
+	vp->form_header = &l2tpv3_form_header;
+	vp->verify_header = &l2tpv3_verify_header;
+	td->counter = 0;
+
+	vp->header_size = 4;
+	td->session_offset = 0;
+	td->cookie_offset = 4;
+	td->counter_offset = 4;
+
+
+	td->ipv6 = false;
+	if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+		if (temp_int > 0)
+			td->ipv6 = true;
+	}
+
+	if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) {
+		if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) {
+			td->tx_session = cpu_to_be32(temp_txs);
+			td->rx_session = cpu_to_be32(temp_rxs);
+		} else {
+			return -EINVAL;
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	td->cookie_is_64  = false;
+	if (get_uint_param(vp->parsed, "cookie64", &temp_int)) {
+		if (temp_int > 0)
+			td->cookie_is_64  = true;
+	}
+	td->cookie = false;
+	if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) {
+		if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) {
+			td->cookie = true;
+			if (td->cookie_is_64) {
+				td->rx_cookie = cpu_to_be64(temp_rx);
+				td->tx_cookie = cpu_to_be64(temp_tx);
+				vp->header_size += 8;
+				td->counter_offset += 8;
+			} else {
+				td->rx_cookie = cpu_to_be32(temp_rx);
+				td->tx_cookie = cpu_to_be32(temp_tx);
+				vp->header_size += 4;
+				td->counter_offset += 4;
+			}
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	td->has_counter = false;
+	if (get_uint_param(vp->parsed, "counter", &temp_int)) {
+		if (temp_int > 0) {
+			td->has_counter = true;
+			vp->header_size += 4;
+			if (get_uint_param(
+				vp->parsed, "pin_counter", &temp_int)) {
+				if (temp_int > 0)
+					td->pin_counter = true;
+			}
+		}
+	}
+
+	if (get_uint_param(vp->parsed, "udp", &temp_int)) {
+		if (temp_int > 0) {
+			td->udp = true;
+			vp->header_size += 4;
+			td->counter_offset += 4;
+			td->session_offset += 4;
+			td->cookie_offset += 4;
+		}
+	}
+
+	vp->rx_header_size = vp->header_size;
+	if ((!td->ipv6) && (!td->udp))
+		vp->rx_header_size += sizeof(struct iphdr);
+
+	return 0;
+}
+
+static int build_raw_transport_data(struct vector_private *vp)
+{
+	if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+		if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd))
+			return -1;
+		vp->form_header = &raw_form_header;
+		vp->verify_header = &raw_verify_header;
+		vp->header_size = sizeof(struct virtio_net_hdr);
+		vp->rx_header_size = sizeof(struct virtio_net_hdr);
+		vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO);
+		vp->dev->features |=
+			(NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+				NETIF_F_TSO | NETIF_F_GRO);
+		netdev_info(
+			vp->dev,
+			"raw: using vnet headers for tso and tx/rx checksum"
+		);
+	}
+	return 0;
+}
+
+static int build_hybrid_transport_data(struct vector_private *vp)
+{
+	if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+		vp->form_header = &raw_form_header;
+		vp->verify_header = &raw_verify_header;
+		vp->header_size = sizeof(struct virtio_net_hdr);
+		vp->rx_header_size = sizeof(struct virtio_net_hdr);
+		vp->dev->hw_features |=
+			(NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+		vp->dev->features |=
+			(NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+				NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+		netdev_info(
+			vp->dev,
+			"tap/raw hybrid: using vnet headers for tso and tx/rx checksum"
+		);
+	} else {
+		return 0; /* do not try to enable tap too if raw failed */
+	}
+	if (uml_tap_enable_vnet_headers(vp->fds->tx_fd))
+		return 0;
+	return -1;
+}
+
+static int build_tap_transport_data(struct vector_private *vp)
+{
+	/* "Pure" tap uses the same fd for rx and tx */
+	if (uml_tap_enable_vnet_headers(vp->fds->tx_fd)) {
+		vp->form_header = &raw_form_header;
+		vp->verify_header = &raw_verify_header;
+		vp->header_size = sizeof(struct virtio_net_hdr);
+		vp->rx_header_size = sizeof(struct virtio_net_hdr);
+		vp->dev->hw_features |=
+			(NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+		vp->dev->features |=
+			(NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+				NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+		netdev_info(
+			vp->dev,
+			"tap: using vnet headers for tso and tx/rx checksum"
+		);
+		return 0;
+	}
+	return -1;
+}
+
+
+static int build_bess_transport_data(struct vector_private *vp)
+{
+	vp->form_header = NULL;
+	vp->verify_header = NULL;
+	vp->header_size = 0;
+	vp->rx_header_size = 0;
+	return 0;
+}
+
+int build_transport_data(struct vector_private *vp)
+{
+	char *transport = uml_vector_fetch_arg(vp->parsed, "transport");
+
+	if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+		return build_gre_transport_data(vp);
+	if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+		return build_l2tpv3_transport_data(vp);
+	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+		return build_raw_transport_data(vp);
+	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+		return build_tap_transport_data(vp);
+	if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
+		return build_hybrid_transport_data(vp);
+	if (strncmp(transport, TRANS_BESS, TRANS_BESS_LEN) == 0)
+		return build_bess_transport_data(vp);
+	return 0;
+}
+
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
new file mode 100644
index 000000000000..2ea67e6fd067
--- /dev/null
+++ b/arch/um/drivers/vector_user.c
@@ -0,0 +1,941 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <netinet/ip.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <linux/virtio_net.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <os.h>
+#include <limits.h>
+#include <um_malloc.h>
+#include "vector_user.h"
+
+#define ID_GRE 0
+#define ID_L2TPV3 1
+#define ID_BESS 2
+#define ID_MAX 2
+
+#define TOKEN_IFNAME "ifname"
+#define TOKEN_SCRIPT "ifup"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define TRANS_FD "fd"
+#define TRANS_FD_LEN strlen(TRANS_FD)
+
+#define TRANS_VDE "vde"
+#define TRANS_VDE_LEN strlen(TRANS_VDE)
+
+#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
+#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
+#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
+#define UNIX_BIND_FAIL "unix_open : could not bind socket err=%i"
+#define BPF_ATTACH_FAIL "Failed to attach filter size %d prog %px to %d, err %d\n"
+#define BPF_DETACH_FAIL "Failed to detach filter size %d prog %px to %d, err %d\n"
+
+#define MAX_UN_LEN 107
+
+static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+static const char *template = "tapXXXXXX";
+
+/* This is very ugly and brute force lookup, but it is done
+ * only once at initialization so not worth doing hashes or
+ * anything more intelligent
+ */
+
+char *uml_vector_fetch_arg(struct arglist *ifspec, char *token)
+{
+	int i;
+
+	for (i = 0; i < ifspec->numargs; i++) {
+		if (strcmp(ifspec->tokens[i], token) == 0)
+			return ifspec->values[i];
+	}
+	return NULL;
+
+}
+
+struct arglist *uml_parse_vector_ifspec(char *arg)
+{
+	struct arglist *result;
+	int pos, len;
+	bool parsing_token = true, next_starts = true;
+
+	if (arg == NULL)
+		return NULL;
+	result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL);
+	if (result == NULL)
+		return NULL;
+	result->numargs = 0;
+	len = strlen(arg);
+	for (pos = 0; pos < len; pos++) {
+		if (next_starts) {
+			if (parsing_token) {
+				result->tokens[result->numargs] = arg + pos;
+			} else {
+				result->values[result->numargs] = arg + pos;
+				result->numargs++;
+			}
+			next_starts = false;
+		}
+		if (*(arg + pos) == '=') {
+			if (parsing_token)
+				parsing_token = false;
+			else
+				goto cleanup;
+			next_starts = true;
+			(*(arg + pos)) = '\0';
+		}
+		if (*(arg + pos) == ',') {
+			parsing_token = true;
+			next_starts = true;
+			(*(arg + pos)) = '\0';
+		}
+	}
+	return result;
+cleanup:
+	printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg);
+	kfree(result);
+	return NULL;
+}
+
+/*
+ * Socket/FD configuration functions. These return an structure
+ * of rx and tx descriptors to cover cases where these are not
+ * the same (f.e. read via raw socket and write via tap).
+ */
+
+#define PATH_NET_TUN "/dev/net/tun"
+
+
+static int create_tap_fd(char *iface)
+{
+	struct ifreq ifr;
+	int fd = -1;
+	int err = -ENOMEM, offload;
+
+	fd = open(PATH_NET_TUN, O_RDWR);
+	if (fd < 0) {
+		printk(UM_KERN_ERR "uml_tap: failed to open tun device\n");
+		goto tap_fd_cleanup;
+	}
+	memset(&ifr, 0, sizeof(ifr));
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+	strscpy(ifr.ifr_name, iface);
+
+	err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+	if (err != 0) {
+		printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n");
+		goto tap_fd_cleanup;
+	}
+
+	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+	ioctl(fd, TUNSETOFFLOAD, offload);
+	return fd;
+tap_fd_cleanup:
+	if (fd >= 0)
+		os_close_file(fd);
+	return err;
+}
+
+static int create_raw_fd(char *iface, int flags, int proto)
+{
+	struct ifreq ifr;
+	int fd = -1;
+	struct sockaddr_ll sock;
+	int err = -ENOMEM;
+
+	fd = socket(AF_PACKET, SOCK_RAW, flags);
+	if (fd == -1) {
+		err = -errno;
+		goto raw_fd_cleanup;
+	}
+	memset(&ifr, 0, sizeof(ifr));
+	strscpy(ifr.ifr_name, iface);
+	if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+		err = -errno;
+		goto raw_fd_cleanup;
+	}
+
+	sock.sll_family = AF_PACKET;
+	sock.sll_protocol = htons(proto);
+	sock.sll_ifindex = ifr.ifr_ifindex;
+
+	if (bind(fd,
+		(struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+		err = -errno;
+		goto raw_fd_cleanup;
+	}
+	return fd;
+raw_fd_cleanup:
+	printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
+	if (fd >= 0)
+		os_close_file(fd);
+	return err;
+}
+
+
+static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
+{
+	int fd = -1, i;
+	char *iface;
+	struct vector_fds *result = NULL;
+	bool dynamic = false;
+	char dynamic_ifname[IFNAMSIZ];
+	char *argv[] = {NULL, NULL, NULL, NULL};
+
+	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+	if (iface == NULL) {
+		dynamic = true;
+		iface = dynamic_ifname;
+		srand(getpid());
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result == NULL) {
+		printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n");
+		goto tap_cleanup;
+	}
+	result->rx_fd = -1;
+	result->tx_fd = -1;
+	result->remote_addr = NULL;
+	result->remote_addr_size = 0;
+
+	/* TAP */
+	do {
+		if (dynamic) {
+			strcpy(iface, template);
+			for (i = 0; i < strlen(iface); i++) {
+				if (iface[i] == 'X') {
+					iface[i] = padchar[rand() % strlen(padchar)];
+				}
+			}
+		}
+		fd = create_tap_fd(iface);
+		if ((fd < 0) && (!dynamic)) {
+			printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n");
+			goto tap_cleanup;
+		}
+		result->tx_fd = fd;
+		result->rx_fd = fd;
+	} while (fd < 0);
+
+	argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+	if (argv[0]) {
+		argv[1] = iface;
+		run_helper(NULL, NULL, argv);
+	}
+
+	return result;
+tap_cleanup:
+	printk(UM_KERN_ERR "user_init_tap: init failed, error %d", fd);
+	kfree(result);
+	return NULL;
+}
+
+static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec)
+{
+	char *iface;
+	struct vector_fds *result = NULL;
+	char *argv[] = {NULL, NULL, NULL, NULL};
+
+	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+	if (iface == NULL) {
+		printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
+		goto hybrid_cleanup;
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result == NULL) {
+		printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n");
+		goto hybrid_cleanup;
+	}
+	result->rx_fd = -1;
+	result->tx_fd = -1;
+	result->remote_addr = NULL;
+	result->remote_addr_size = 0;
+
+	/* TAP */
+
+	result->tx_fd = create_tap_fd(iface);
+	if (result->tx_fd < 0) {
+		printk(UM_KERN_ERR "uml_tap: failed to create tun interface: %i\n", result->tx_fd);
+		goto hybrid_cleanup;
+	}
+
+	/* RAW */
+
+	result->rx_fd = create_raw_fd(iface, ETH_P_ALL, ETH_P_ALL);
+	if (result->rx_fd == -1) {
+		printk(UM_KERN_ERR
+			"uml_tap: failed to create paired raw socket: %i\n", result->rx_fd);
+		goto hybrid_cleanup;
+	}
+
+	argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+	if (argv[0]) {
+		argv[1] = iface;
+		run_helper(NULL, NULL, argv);
+	}
+	return result;
+hybrid_cleanup:
+	printk(UM_KERN_ERR "user_init_hybrid: init failed");
+	kfree(result);
+	return NULL;
+}
+
+static struct vector_fds *user_init_unix_fds(struct arglist *ifspec, int id)
+{
+	int fd = -1;
+	int socktype;
+	char *src, *dst;
+	struct vector_fds *result = NULL;
+	struct sockaddr_un *local_addr = NULL, *remote_addr = NULL;
+
+	src = uml_vector_fetch_arg(ifspec, "src");
+	dst = uml_vector_fetch_arg(ifspec, "dst");
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result == NULL) {
+		printk(UM_KERN_ERR "unix open:cannot allocate remote addr");
+		goto unix_cleanup;
+	}
+	remote_addr = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
+	if (remote_addr == NULL) {
+		printk(UM_KERN_ERR "unix open:cannot allocate remote addr");
+		goto unix_cleanup;
+	}
+
+	switch (id) {
+	case ID_BESS:
+		socktype = SOCK_SEQPACKET;
+		if ((src != NULL) && (strlen(src) <= MAX_UN_LEN)) {
+			local_addr = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
+			if (local_addr == NULL) {
+				printk(UM_KERN_ERR "bess open:cannot allocate local addr");
+				goto unix_cleanup;
+			}
+			local_addr->sun_family = AF_UNIX;
+			memcpy(local_addr->sun_path, src, strlen(src) + 1);
+		}
+		if ((dst == NULL) || (strlen(dst) > MAX_UN_LEN))
+			goto unix_cleanup;
+		remote_addr->sun_family = AF_UNIX;
+		memcpy(remote_addr->sun_path, dst, strlen(dst) + 1);
+		break;
+	default:
+		printk(KERN_ERR "Unsupported unix socket type\n");
+		return NULL;
+	}
+
+	fd = socket(AF_UNIX, socktype, 0);
+	if (fd == -1) {
+		printk(UM_KERN_ERR
+			"unix open: could not open socket, error = %d",
+			-errno
+		);
+		goto unix_cleanup;
+	}
+	if (local_addr != NULL) {
+		if (bind(fd, (struct sockaddr *) local_addr, sizeof(struct sockaddr_un))) {
+			printk(UM_KERN_ERR UNIX_BIND_FAIL, errno);
+			goto unix_cleanup;
+		}
+	}
+	switch (id) {
+	case ID_BESS:
+		if (connect(fd, (const struct sockaddr *) remote_addr, sizeof(struct sockaddr_un)) < 0) {
+			printk(UM_KERN_ERR "bess open:cannot connect to %s %i", remote_addr->sun_path, -errno);
+			goto unix_cleanup;
+		}
+		break;
+	}
+	result->rx_fd = fd;
+	result->tx_fd = fd;
+	result->remote_addr_size = sizeof(struct sockaddr_un);
+	result->remote_addr = remote_addr;
+	return result;
+unix_cleanup:
+	if (fd >= 0)
+		os_close_file(fd);
+	kfree(remote_addr);
+	kfree(result);
+	return NULL;
+}
+
+static int strtofd(const char *nptr)
+{
+	long fd;
+	char *endptr;
+
+	if (nptr == NULL)
+		return -1;
+
+	errno = 0;
+	fd = strtol(nptr, &endptr, 10);
+	if (nptr == endptr ||
+		errno != 0 ||
+		*endptr != '\0' ||
+		fd < 0 ||
+		fd > INT_MAX) {
+		return -1;
+	}
+	return fd;
+}
+
+static struct vector_fds *user_init_fd_fds(struct arglist *ifspec)
+{
+	int fd = -1;
+	char *fdarg = NULL;
+	struct vector_fds *result = NULL;
+
+	fdarg = uml_vector_fetch_arg(ifspec, "fd");
+	fd = strtofd(fdarg);
+	if (fd == -1) {
+		printk(UM_KERN_ERR "fd open: bad or missing fd argument");
+		goto fd_cleanup;
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result == NULL) {
+		printk(UM_KERN_ERR "fd open: allocation failed");
+		goto fd_cleanup;
+	}
+
+	result->rx_fd = fd;
+	result->tx_fd = fd;
+	result->remote_addr_size = 0;
+	result->remote_addr = NULL;
+	return result;
+
+fd_cleanup:
+	if (fd >= 0)
+		os_close_file(fd);
+	kfree(result);
+	return NULL;
+}
+
+/* enough char to store an int type */
+#define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2)
+#define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3)
+/* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */
+#define VDE_MAX_ARGC 12
+#define VDE_SEQPACKET_HEAD "seqpacket://"
+#define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1)
+#define VDE_DEFAULT_DESCRIPTION "UML"
+
+static struct vector_fds *user_init_vde_fds(struct arglist *ifspec)
+{
+	char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1];
+	char *argv[VDE_MAX_ARGC] = {"vde_plug"};
+	int argc = 1;
+	int rv;
+	int sv[2];
+	struct vector_fds *result = NULL;
+
+	char *vnl = uml_vector_fetch_arg(ifspec,"vnl");
+	char *descr = uml_vector_fetch_arg(ifspec,"descr");
+	char *port = uml_vector_fetch_arg(ifspec,"port");
+	char *mode = uml_vector_fetch_arg(ifspec,"mode");
+	char *group = uml_vector_fetch_arg(ifspec,"group");
+	if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION;
+
+	argv[argc++] = "--descr";
+	argv[argc++] = descr;
+	if (port != NULL) {
+		argv[argc++] = "--port2";
+		argv[argc++] = port;
+	}
+	if (mode != NULL) {
+		argv[argc++] = "--mod2";
+		argv[argc++] = mode;
+	}
+	if (group != NULL) {
+		argv[argc++] = "--group2";
+		argv[argc++] = group;
+	}
+	argv[argc++] = seqpacketvnl;
+	argv[argc++] = vnl;
+	argv[argc++] = NULL;
+
+	rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv);
+	if (rv  < 0) {
+		printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno);
+		return NULL;
+	}
+	rv = os_set_exec_close(sv[0]);
+	if (rv  < 0) {
+		printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno);
+		goto vde_cleanup_sv;
+	}
+	snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]);
+
+	run_helper(NULL, NULL, argv);
+
+	close(sv[1]);
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result == NULL) {
+		printk(UM_KERN_ERR "fd open: allocation failed");
+		goto vde_cleanup;
+	}
+
+	result->rx_fd = sv[0];
+	result->tx_fd = sv[0];
+	result->remote_addr_size = 0;
+	result->remote_addr = NULL;
+	return result;
+
+vde_cleanup_sv:
+	close(sv[1]);
+vde_cleanup:
+	close(sv[0]);
+	return NULL;
+}
+
+static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
+{
+	int rxfd = -1, txfd = -1;
+	int err = -ENOMEM;
+	char *iface;
+	struct vector_fds *result = NULL;
+	char *argv[] = {NULL, NULL, NULL, NULL};
+
+	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+	if (iface == NULL)
+		goto raw_cleanup;
+
+	rxfd = create_raw_fd(iface, ETH_P_ALL, ETH_P_ALL);
+	if (rxfd == -1) {
+		err = -errno;
+		goto raw_cleanup;
+	}
+	txfd = create_raw_fd(iface, 0, ETH_P_IP); /* Turn off RX on this fd */
+	if (txfd == -1) {
+		err = -errno;
+		goto raw_cleanup;
+	}
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result != NULL) {
+		result->rx_fd = rxfd;
+		result->tx_fd = txfd;
+		result->remote_addr = NULL;
+		result->remote_addr_size = 0;
+	}
+	argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+	if (argv[0]) {
+		argv[1] = iface;
+		run_helper(NULL, NULL, argv);
+	}
+	return result;
+raw_cleanup:
+	printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
+	kfree(result);
+	return NULL;
+}
+
+
+bool uml_raw_enable_qdisc_bypass(int fd)
+{
+	int optval = 1;
+
+	if (setsockopt(fd,
+		SOL_PACKET, PACKET_QDISC_BYPASS,
+		&optval, sizeof(optval)) != 0) {
+		return false;
+	}
+	return true;
+}
+
+bool uml_raw_enable_vnet_headers(int fd)
+{
+	int optval = 1;
+
+	if (setsockopt(fd,
+		SOL_PACKET, PACKET_VNET_HDR,
+		&optval, sizeof(optval)) != 0) {
+		printk(UM_KERN_INFO VNET_HDR_FAIL, fd);
+		return false;
+	}
+	return true;
+}
+bool uml_tap_enable_vnet_headers(int fd)
+{
+	unsigned int features;
+	int len = sizeof(struct virtio_net_hdr);
+
+	if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
+		printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno));
+		return false;
+	}
+	if ((features & IFF_VNET_HDR) == 0) {
+		printk(UM_KERN_INFO "tapraw: No VNET HEADER support");
+		return false;
+	}
+	ioctl(fd, TUNSETVNETHDRSZ, &len);
+	return true;
+}
+
+static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id)
+{
+	int err = -ENOMEM;
+	int fd = -1, gairet;
+	struct addrinfo srchints;
+	struct addrinfo dsthints;
+	bool v6, udp;
+	char *value;
+	char *src, *dst, *srcport, *dstport;
+	struct addrinfo *gairesult = NULL;
+	struct vector_fds *result = NULL;
+
+
+	value = uml_vector_fetch_arg(ifspec, "v6");
+	v6 = false;
+	udp = false;
+	if (value != NULL) {
+		if (strtol((const char *) value, NULL, 10) > 0)
+			v6 = true;
+	}
+
+	value = uml_vector_fetch_arg(ifspec, "udp");
+	if (value != NULL) {
+		if (strtol((const char *) value, NULL, 10) > 0)
+			udp = true;
+	}
+	src = uml_vector_fetch_arg(ifspec, "src");
+	dst = uml_vector_fetch_arg(ifspec, "dst");
+	srcport = uml_vector_fetch_arg(ifspec, "srcport");
+	dstport = uml_vector_fetch_arg(ifspec, "dstport");
+
+	memset(&dsthints, 0, sizeof(dsthints));
+
+	if (v6)
+		dsthints.ai_family = AF_INET6;
+	else
+		dsthints.ai_family = AF_INET;
+
+	switch (id) {
+	case ID_GRE:
+		dsthints.ai_socktype = SOCK_RAW;
+		dsthints.ai_protocol = IPPROTO_GRE;
+		break;
+	case ID_L2TPV3:
+		if (udp) {
+			dsthints.ai_socktype = SOCK_DGRAM;
+			dsthints.ai_protocol = 0;
+		} else {
+			dsthints.ai_socktype = SOCK_RAW;
+			dsthints.ai_protocol = IPPROTO_L2TP;
+		}
+		break;
+	default:
+		printk(KERN_ERR "Unsupported socket type\n");
+		return NULL;
+	}
+	memcpy(&srchints, &dsthints, sizeof(struct addrinfo));
+
+	gairet = getaddrinfo(src, srcport, &dsthints, &gairesult);
+	if ((gairet != 0) || (gairesult == NULL)) {
+		printk(UM_KERN_ERR
+			"socket_open : could not resolve src, error = %s",
+			gai_strerror(gairet)
+		);
+		return NULL;
+	}
+	fd = socket(gairesult->ai_family,
+		gairesult->ai_socktype, gairesult->ai_protocol);
+	if (fd == -1) {
+		printk(UM_KERN_ERR
+			"socket_open : could not open socket, error = %d",
+			-errno
+		);
+		goto cleanup;
+	}
+	if (bind(fd,
+		(struct sockaddr *) gairesult->ai_addr,
+		gairesult->ai_addrlen)) {
+		printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno);
+		goto cleanup;
+	}
+
+	if (gairesult != NULL)
+		freeaddrinfo(gairesult);
+
+	gairesult = NULL;
+
+	gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult);
+	if ((gairet != 0) || (gairesult == NULL)) {
+		printk(UM_KERN_ERR
+			"socket_open : could not resolve dst, error = %s",
+			gai_strerror(gairet)
+		);
+		return NULL;
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result != NULL) {
+		result->rx_fd = fd;
+		result->tx_fd = fd;
+		result->remote_addr = uml_kmalloc(
+			gairesult->ai_addrlen, UM_GFP_KERNEL);
+		if (result->remote_addr == NULL)
+			goto cleanup;
+		result->remote_addr_size = gairesult->ai_addrlen;
+		memcpy(
+			result->remote_addr,
+			gairesult->ai_addr,
+			gairesult->ai_addrlen
+		);
+	}
+	freeaddrinfo(gairesult);
+	return result;
+cleanup:
+	if (gairesult != NULL)
+		freeaddrinfo(gairesult);
+	printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err);
+	if (fd >= 0)
+		os_close_file(fd);
+	if (result != NULL) {
+		kfree(result->remote_addr);
+		kfree(result);
+	}
+	return NULL;
+}
+
+struct vector_fds *uml_vector_user_open(
+	int unit,
+	struct arglist *parsed
+)
+{
+	char *transport;
+
+	if (parsed == NULL) {
+		printk(UM_KERN_ERR "no parsed config for unit %d\n", unit);
+		return NULL;
+	}
+	transport = uml_vector_fetch_arg(parsed, "transport");
+	if (transport == NULL) {
+		printk(UM_KERN_ERR "missing transport for unit %d\n", unit);
+		return NULL;
+	}
+	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+		return user_init_raw_fds(parsed);
+	if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0)
+		return user_init_hybrid_fds(parsed);
+	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+		return user_init_tap_fds(parsed);
+	if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+		return user_init_socket_fds(parsed, ID_GRE);
+	if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+		return user_init_socket_fds(parsed, ID_L2TPV3);
+	if (strncmp(transport, TRANS_BESS, TRANS_BESS_LEN) == 0)
+		return user_init_unix_fds(parsed, ID_BESS);
+	if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0)
+		return user_init_fd_fds(parsed);
+	if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0)
+		return user_init_vde_fds(parsed);
+	return NULL;
+}
+
+
+int uml_vector_sendmsg(int fd, void *hdr, int flags)
+{
+	int n;
+
+	CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr,  flags));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_recvmsg(int fd, void *hdr, int flags)
+{
+	int n;
+	struct msghdr *msg = (struct msghdr *) hdr;
+
+	CATCH_EINTR(n = readv(fd, msg->msg_iov, msg->msg_iovlen));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_writev(int fd, void *hdr, int iovcount)
+{
+	int n;
+
+	CATCH_EINTR(n = writev(fd, (struct iovec *) hdr,  iovcount));
+	if ((n < 0) && ((errno == EAGAIN) || (errno == ENOBUFS)))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_sendmmsg(
+	int fd,
+	void *msgvec,
+	unsigned int vlen,
+	unsigned int flags)
+{
+	int n;
+
+	CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags));
+	if ((n < 0) && ((errno == EAGAIN) || (errno == ENOBUFS)))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_recvmmsg(
+	int fd,
+	void *msgvec,
+	unsigned int vlen,
+	unsigned int flags)
+{
+	int n;
+
+	CATCH_EINTR(
+		n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+int uml_vector_attach_bpf(int fd, void *bpf)
+{
+	struct sock_fprog *prog = bpf;
+
+	int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, sizeof(struct sock_fprog));
+
+	if (err < 0)
+		printk(KERN_ERR BPF_ATTACH_FAIL, prog->len, prog->filter, fd, -errno);
+	return err;
+}
+
+int uml_vector_detach_bpf(int fd, void *bpf)
+{
+	struct sock_fprog *prog = bpf;
+
+	int err = setsockopt(fd, SOL_SOCKET, SO_DETACH_FILTER, bpf, sizeof(struct sock_fprog));
+	if (err < 0)
+		printk(KERN_ERR BPF_DETACH_FAIL, prog->len, prog->filter, fd, -errno);
+	return err;
+}
+void *uml_vector_default_bpf(const void *mac)
+{
+	struct sock_filter *bpf;
+	uint32_t *mac1 = (uint32_t *)(mac + 2);
+	uint16_t *mac2 = (uint16_t *) mac;
+	struct sock_fprog *bpf_prog;
+
+	bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL);
+	if (bpf_prog) {
+		bpf_prog->len = DEFAULT_BPF_LEN;
+		bpf_prog->filter = NULL;
+	} else {
+		return NULL;
+	}
+	bpf = uml_kmalloc(
+		sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL);
+	if (bpf) {
+		bpf_prog->filter = bpf;
+		/* ld	[8] */
+		bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 };
+		/* jeq	#0xMAC[2-6] jt 2 jf 5*/
+		bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)};
+		/* ldh	[6] */
+		bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 };
+		/* jeq	#0xMAC[0-1] jt 4 jf 5 */
+		bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)};
+		/* ret	#0 */
+		bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 };
+		/* ret	#0x40000 */
+		bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 };
+	} else {
+		kfree(bpf_prog);
+		bpf_prog = NULL;
+	}
+	return bpf_prog;
+}
+
+/* Note - this function requires a valid mac being passed as an arg */
+
+void *uml_vector_user_bpf(char *filename)
+{
+	struct sock_filter *bpf;
+	struct sock_fprog *bpf_prog;
+	struct stat statbuf;
+	int res, ffd = -1;
+
+	if (filename == NULL)
+		return NULL;
+
+	if (stat(filename, &statbuf) < 0) {
+		printk(KERN_ERR "Error %d reading bpf file", -errno);
+		return false;
+	}
+	bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL);
+	if (bpf_prog == NULL) {
+		printk(KERN_ERR "Failed to allocate bpf prog buffer");
+		return NULL;
+	}
+	bpf_prog->len = statbuf.st_size / sizeof(struct sock_filter);
+	bpf_prog->filter = NULL;
+	ffd = os_open_file(filename, of_read(OPENFLAGS()), 0);
+	if (ffd < 0) {
+		printk(KERN_ERR "Error %d opening bpf file", -errno);
+		goto bpf_failed;
+	}
+	bpf = uml_kmalloc(statbuf.st_size, UM_GFP_KERNEL);
+	if (bpf == NULL) {
+		printk(KERN_ERR "Failed to allocate bpf buffer");
+		goto bpf_failed;
+	}
+	bpf_prog->filter = bpf;
+	res = os_read_file(ffd, bpf, statbuf.st_size);
+	if (res < statbuf.st_size) {
+		printk(KERN_ERR "Failed to read bpf program %s, error %d", filename, res);
+		kfree(bpf);
+		goto bpf_failed;
+	}
+	os_close_file(ffd);
+	return bpf_prog;
+bpf_failed:
+	if (ffd > 0)
+		os_close_file(ffd);
+	kfree(bpf_prog);
+	return NULL;
+}
diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h
new file mode 100644
index 000000000000..59ed5f9e6e41
--- /dev/null
+++ b/arch/um/drivers/vector_user.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+
+#ifndef __UM_VECTOR_USER_H
+#define __UM_VECTOR_USER_H
+
+#define MAXVARGS	20
+
+#define TOKEN_IFNAME "ifname"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define TRANS_TAP "tap"
+#define TRANS_TAP_LEN strlen(TRANS_TAP)
+
+#define TRANS_GRE "gre"
+#define TRANS_GRE_LEN strlen(TRANS_GRE)
+
+#define TRANS_L2TPV3 "l2tpv3"
+#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
+
+#define TRANS_HYBRID "hybrid"
+#define TRANS_HYBRID_LEN strlen(TRANS_HYBRID)
+
+#define TRANS_BESS "bess"
+#define TRANS_BESS_LEN strlen(TRANS_BESS)
+
+#define DEFAULT_BPF_LEN 6
+
+#ifndef IPPROTO_GRE
+#define IPPROTO_GRE 0x2F
+#endif
+
+#define GRE_MODE_CHECKSUM	cpu_to_be16(8 << 12)	/* checksum */
+#define GRE_MODE_RESERVED	cpu_to_be16(4 << 12)	/* unused */
+#define GRE_MODE_KEY		cpu_to_be16(2 << 12)	/* KEY present */
+#define GRE_MODE_SEQUENCE	cpu_to_be16(1 << 12)	/* sequence */
+
+#define GRE_IRB cpu_to_be16(0x6558)
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+struct arglist {
+	int	numargs;
+	char	*tokens[MAXVARGS];
+	char	*values[MAXVARGS];
+};
+
+/* Separating read and write FDs allows us to have different
+ * rx and tx method. Example - read tap via raw socket using
+ * recvmmsg, write using legacy tap write calls
+ */
+
+struct vector_fds {
+	int rx_fd;
+	int tx_fd;
+	void *remote_addr;
+	int remote_addr_size;
+};
+
+#define VECTOR_READ	1
+
+extern struct arglist *uml_parse_vector_ifspec(char *arg);
+
+extern struct vector_fds *uml_vector_user_open(
+	int unit,
+	struct arglist *parsed
+);
+
+extern char *uml_vector_fetch_arg(
+	struct arglist *ifspec,
+	char *token
+);
+
+extern int uml_vector_recvmsg(int fd, void *hdr, int flags);
+extern int uml_vector_sendmsg(int fd, void *hdr, int flags);
+extern int uml_vector_writev(int fd, void *hdr, int iovcount);
+extern int uml_vector_sendmmsg(
+	int fd, void *msgvec,
+	unsigned int vlen,
+	unsigned int flags
+);
+extern int uml_vector_recvmmsg(
+	int fd,
+	void *msgvec,
+	unsigned int vlen,
+	unsigned int flags
+);
+extern void *uml_vector_default_bpf(const void *mac);
+extern void *uml_vector_user_bpf(char *filename);
+extern int uml_vector_attach_bpf(int fd, void *bpf);
+extern int uml_vector_detach_bpf(int fd, void *bpf);
+extern bool uml_raw_enable_qdisc_bypass(int fd);
+extern bool uml_raw_enable_vnet_headers(int fd);
+extern bool uml_tap_enable_vnet_headers(int fd);
+
+
+#endif
diff --git a/arch/um/drivers/vfio_kern.c b/arch/um/drivers/vfio_kern.c
new file mode 100644
index 000000000000..915812a79bfc
--- /dev/null
+++ b/arch/um/drivers/vfio_kern.c
@@ -0,0 +1,708 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ */
+
+#define pr_fmt(fmt) "vfio-uml: " fmt
+
+#include <linux/module.h>
+#include <linux/logic_iomem.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+#include <irq_kern.h>
+#include <init.h>
+#include <os.h>
+
+#include "mconsole_kern.h"
+#include "virt-pci.h"
+#include "vfio_user.h"
+
+#define to_vdev(_pdev) container_of(_pdev, struct uml_vfio_device, pdev)
+
+struct uml_vfio_intr_ctx {
+	struct uml_vfio_device *dev;
+	int irq;
+};
+
+struct uml_vfio_device {
+	const char *name;
+	int group;
+
+	struct um_pci_device pdev;
+	struct uml_vfio_user_device udev;
+	struct uml_vfio_intr_ctx *intr_ctx;
+
+	int msix_cap;
+	int msix_bar;
+	int msix_offset;
+	int msix_size;
+	u32 *msix_data;
+
+	struct list_head list;
+};
+
+struct uml_vfio_group {
+	int id;
+	int fd;
+	int users;
+	struct list_head list;
+};
+
+static struct {
+	int fd;
+	int users;
+} uml_vfio_container = { .fd = -1 };
+static DEFINE_MUTEX(uml_vfio_container_mtx);
+
+static LIST_HEAD(uml_vfio_groups);
+static DEFINE_MUTEX(uml_vfio_groups_mtx);
+
+static LIST_HEAD(uml_vfio_devices);
+static DEFINE_MUTEX(uml_vfio_devices_mtx);
+
+static int uml_vfio_set_container(int group_fd)
+{
+	int err;
+
+	guard(mutex)(&uml_vfio_container_mtx);
+
+	err = uml_vfio_user_set_container(uml_vfio_container.fd, group_fd);
+	if (err)
+		return err;
+
+	uml_vfio_container.users++;
+	if (uml_vfio_container.users > 1)
+		return 0;
+
+	err = uml_vfio_user_setup_iommu(uml_vfio_container.fd);
+	if (err) {
+		uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
+		uml_vfio_container.users--;
+	}
+	return err;
+}
+
+static void uml_vfio_unset_container(int group_fd)
+{
+	guard(mutex)(&uml_vfio_container_mtx);
+
+	uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
+	uml_vfio_container.users--;
+}
+
+static int uml_vfio_open_group(int group_id)
+{
+	struct uml_vfio_group *group;
+	int err;
+
+	guard(mutex)(&uml_vfio_groups_mtx);
+
+	list_for_each_entry(group, &uml_vfio_groups, list) {
+		if (group->id == group_id) {
+			group->users++;
+			return group->fd;
+		}
+	}
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return -ENOMEM;
+
+	group->fd = uml_vfio_user_open_group(group_id);
+	if (group->fd < 0) {
+		err = group->fd;
+		goto free_group;
+	}
+
+	err = uml_vfio_set_container(group->fd);
+	if (err)
+		goto close_group;
+
+	group->id = group_id;
+	group->users = 1;
+
+	list_add(&group->list, &uml_vfio_groups);
+
+	return group->fd;
+
+close_group:
+	os_close_file(group->fd);
+free_group:
+	kfree(group);
+	return err;
+}
+
+static int uml_vfio_release_group(int group_fd)
+{
+	struct uml_vfio_group *group;
+
+	guard(mutex)(&uml_vfio_groups_mtx);
+
+	list_for_each_entry(group, &uml_vfio_groups, list) {
+		if (group->fd == group_fd) {
+			group->users--;
+			if (group->users == 0) {
+				uml_vfio_unset_container(group_fd);
+				os_close_file(group_fd);
+				list_del(&group->list);
+				kfree(group);
+			}
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static irqreturn_t uml_vfio_interrupt(int unused, void *opaque)
+{
+	struct uml_vfio_intr_ctx *ctx = opaque;
+	struct uml_vfio_device *dev = ctx->dev;
+	int index = ctx - dev->intr_ctx;
+	int irqfd = dev->udev.irqfd[index];
+	int irq = dev->msix_data[index];
+	uint64_t v;
+	int r;
+
+	do {
+		r = os_read_file(irqfd, &v, sizeof(v));
+		if (r == sizeof(v))
+			generic_handle_irq(irq);
+	} while (r == sizeof(v) || r == -EINTR);
+	WARN(r != -EAGAIN, "read returned %d\n", r);
+
+	return IRQ_HANDLED;
+}
+
+static int uml_vfio_activate_irq(struct uml_vfio_device *dev, int index)
+{
+	struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
+	int err, irqfd;
+
+	if (ctx->irq >= 0)
+		return 0;
+
+	irqfd = uml_vfio_user_activate_irq(&dev->udev, index);
+	if (irqfd < 0)
+		return irqfd;
+
+	ctx->irq = um_request_irq(UM_IRQ_ALLOC, irqfd, IRQ_READ,
+				  uml_vfio_interrupt, 0,
+				  "vfio-uml", ctx);
+	if (ctx->irq < 0) {
+		err = ctx->irq;
+		goto deactivate;
+	}
+
+	err = add_sigio_fd(irqfd);
+	if (err)
+		goto free_irq;
+
+	return 0;
+
+free_irq:
+	um_free_irq(ctx->irq, ctx);
+	ctx->irq = -1;
+deactivate:
+	uml_vfio_user_deactivate_irq(&dev->udev, index);
+	return err;
+}
+
+static int uml_vfio_deactivate_irq(struct uml_vfio_device *dev, int index)
+{
+	struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
+
+	if (ctx->irq >= 0) {
+		ignore_sigio_fd(dev->udev.irqfd[index]);
+		um_free_irq(ctx->irq, ctx);
+		uml_vfio_user_deactivate_irq(&dev->udev, index);
+		ctx->irq = -1;
+	}
+	return 0;
+}
+
+static int uml_vfio_update_msix_cap(struct uml_vfio_device *dev,
+				    unsigned int offset, int size,
+				    unsigned long val)
+{
+	/*
+	 * Here, we handle only the operations we care about,
+	 * ignoring the rest.
+	 */
+	if (size == 2 && offset == dev->msix_cap + PCI_MSIX_FLAGS) {
+		switch (val & ~PCI_MSIX_FLAGS_QSIZE) {
+		case PCI_MSIX_FLAGS_ENABLE:
+		case 0:
+			return uml_vfio_user_update_irqs(&dev->udev);
+		}
+	}
+	return 0;
+}
+
+static int uml_vfio_update_msix_table(struct uml_vfio_device *dev,
+				      unsigned int offset, int size,
+				      unsigned long val)
+{
+	int index;
+
+	/*
+	 * Here, we handle only the operations we care about,
+	 * ignoring the rest.
+	 */
+	offset -= dev->msix_offset + PCI_MSIX_ENTRY_DATA;
+
+	if (size != 4 || offset % PCI_MSIX_ENTRY_SIZE != 0)
+		return 0;
+
+	index = offset / PCI_MSIX_ENTRY_SIZE;
+	if (index >= dev->udev.irq_count)
+		return -EINVAL;
+
+	dev->msix_data[index] = val;
+
+	return val ? uml_vfio_activate_irq(dev, index) :
+		uml_vfio_deactivate_irq(dev, index);
+}
+
+static unsigned long __uml_vfio_cfgspace_read(struct uml_vfio_device *dev,
+					      unsigned int offset, int size)
+{
+	u8 data[8];
+
+	memset(data, 0xff, sizeof(data));
+
+	if (uml_vfio_user_cfgspace_read(&dev->udev, offset, data, size))
+		return ULONG_MAX;
+
+	switch (size) {
+	case 1:
+		return data[0];
+	case 2:
+		return le16_to_cpup((void *)data);
+	case 4:
+		return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+	case 8:
+		return le64_to_cpup((void *)data);
+#endif
+	default:
+		return ULONG_MAX;
+	}
+}
+
+static unsigned long uml_vfio_cfgspace_read(struct um_pci_device *pdev,
+					    unsigned int offset, int size)
+{
+	struct uml_vfio_device *dev = to_vdev(pdev);
+
+	return __uml_vfio_cfgspace_read(dev, offset, size);
+}
+
+static void __uml_vfio_cfgspace_write(struct uml_vfio_device *dev,
+				      unsigned int offset, int size,
+				      unsigned long val)
+{
+	u8 data[8];
+
+	switch (size) {
+	case 1:
+		data[0] = (u8)val;
+		break;
+	case 2:
+		put_unaligned_le16(val, (void *)data);
+		break;
+	case 4:
+		put_unaligned_le32(val, (void *)data);
+		break;
+#ifdef CONFIG_64BIT
+	case 8:
+		put_unaligned_le64(val, (void *)data);
+		break;
+#endif
+	}
+
+	WARN_ON(uml_vfio_user_cfgspace_write(&dev->udev, offset, data, size));
+}
+
+static void uml_vfio_cfgspace_write(struct um_pci_device *pdev,
+				    unsigned int offset, int size,
+				    unsigned long val)
+{
+	struct uml_vfio_device *dev = to_vdev(pdev);
+
+	if (offset < dev->msix_cap + PCI_CAP_MSIX_SIZEOF &&
+	    offset + size > dev->msix_cap)
+		WARN_ON(uml_vfio_update_msix_cap(dev, offset, size, val));
+
+	__uml_vfio_cfgspace_write(dev, offset, size, val);
+}
+
+static void uml_vfio_bar_copy_from(struct um_pci_device *pdev, int bar,
+				   void *buffer, unsigned int offset, int size)
+{
+	struct uml_vfio_device *dev = to_vdev(pdev);
+
+	memset(buffer, 0xff, size);
+	uml_vfio_user_bar_read(&dev->udev, bar, offset, buffer, size);
+}
+
+static unsigned long uml_vfio_bar_read(struct um_pci_device *pdev, int bar,
+				       unsigned int offset, int size)
+{
+	u8 data[8];
+
+	uml_vfio_bar_copy_from(pdev, bar, data, offset, size);
+
+	switch (size) {
+	case 1:
+		return data[0];
+	case 2:
+		return le16_to_cpup((void *)data);
+	case 4:
+		return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+	case 8:
+		return le64_to_cpup((void *)data);
+#endif
+	default:
+		return ULONG_MAX;
+	}
+}
+
+static void uml_vfio_bar_copy_to(struct um_pci_device *pdev, int bar,
+				 unsigned int offset, const void *buffer,
+				 int size)
+{
+	struct uml_vfio_device *dev = to_vdev(pdev);
+
+	uml_vfio_user_bar_write(&dev->udev, bar, offset, buffer, size);
+}
+
+static void uml_vfio_bar_write(struct um_pci_device *pdev, int bar,
+			       unsigned int offset, int size,
+			       unsigned long val)
+{
+	struct uml_vfio_device *dev = to_vdev(pdev);
+	u8 data[8];
+
+	if (bar == dev->msix_bar && offset + size > dev->msix_offset &&
+	    offset < dev->msix_offset + dev->msix_size)
+		WARN_ON(uml_vfio_update_msix_table(dev, offset, size, val));
+
+	switch (size) {
+	case 1:
+		data[0] = (u8)val;
+		break;
+	case 2:
+		put_unaligned_le16(val, (void *)data);
+		break;
+	case 4:
+		put_unaligned_le32(val, (void *)data);
+		break;
+#ifdef CONFIG_64BIT
+	case 8:
+		put_unaligned_le64(val, (void *)data);
+		break;
+#endif
+	}
+
+	uml_vfio_bar_copy_to(pdev, bar, offset, data, size);
+}
+
+static void uml_vfio_bar_set(struct um_pci_device *pdev, int bar,
+			     unsigned int offset, u8 value, int size)
+{
+	struct uml_vfio_device *dev = to_vdev(pdev);
+	int i;
+
+	for (i = 0; i < size; i++)
+		uml_vfio_user_bar_write(&dev->udev, bar, offset + i, &value, 1);
+}
+
+static const struct um_pci_ops uml_vfio_um_pci_ops = {
+	.cfgspace_read	= uml_vfio_cfgspace_read,
+	.cfgspace_write	= uml_vfio_cfgspace_write,
+	.bar_read	= uml_vfio_bar_read,
+	.bar_write	= uml_vfio_bar_write,
+	.bar_copy_from	= uml_vfio_bar_copy_from,
+	.bar_copy_to	= uml_vfio_bar_copy_to,
+	.bar_set	= uml_vfio_bar_set,
+};
+
+static u8 uml_vfio_find_capability(struct uml_vfio_device *dev, u8 cap)
+{
+	u8 id, pos;
+	u16 ent;
+	int ttl = 48; /* PCI_FIND_CAP_TTL */
+
+	pos = __uml_vfio_cfgspace_read(dev, PCI_CAPABILITY_LIST, sizeof(pos));
+
+	while (pos && ttl--) {
+		ent = __uml_vfio_cfgspace_read(dev, pos, sizeof(ent));
+
+		id = ent & 0xff;
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+
+		pos = ent >> 8;
+	}
+
+	return 0;
+}
+
+static int uml_vfio_read_msix_table(struct uml_vfio_device *dev)
+{
+	unsigned int off;
+	u16 flags;
+	u32 tbl;
+
+	off = uml_vfio_find_capability(dev, PCI_CAP_ID_MSIX);
+	if (!off)
+		return -ENOTSUPP;
+
+	dev->msix_cap = off;
+
+	tbl = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_TABLE, sizeof(tbl));
+	flags = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_FLAGS, sizeof(flags));
+
+	dev->msix_bar = tbl & PCI_MSIX_TABLE_BIR;
+	dev->msix_offset = tbl & PCI_MSIX_TABLE_OFFSET;
+	dev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * PCI_MSIX_ENTRY_SIZE;
+
+	dev->msix_data = kzalloc(dev->msix_size, GFP_KERNEL);
+	if (!dev->msix_data)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void uml_vfio_open_device(struct uml_vfio_device *dev)
+{
+	struct uml_vfio_intr_ctx *ctx;
+	int err, group_id, i;
+
+	group_id = uml_vfio_user_get_group_id(dev->name);
+	if (group_id < 0) {
+		pr_err("Failed to get group id (%s), error %d\n",
+		       dev->name, group_id);
+		goto free_dev;
+	}
+
+	dev->group = uml_vfio_open_group(group_id);
+	if (dev->group < 0) {
+		pr_err("Failed to open group %d (%s), error %d\n",
+		       group_id, dev->name, dev->group);
+		goto free_dev;
+	}
+
+	err = uml_vfio_user_setup_device(&dev->udev, dev->group, dev->name);
+	if (err) {
+		pr_err("Failed to setup device (%s), error %d\n",
+		       dev->name, err);
+		goto release_group;
+	}
+
+	err = uml_vfio_read_msix_table(dev);
+	if (err) {
+		pr_err("Failed to read MSI-X table (%s), error %d\n",
+		       dev->name, err);
+		goto teardown_udev;
+	}
+
+	dev->intr_ctx = kmalloc_array(dev->udev.irq_count,
+				      sizeof(struct uml_vfio_intr_ctx),
+				      GFP_KERNEL);
+	if (!dev->intr_ctx) {
+		pr_err("Failed to allocate interrupt context (%s)\n",
+		       dev->name);
+		goto free_msix;
+	}
+
+	for (i = 0; i < dev->udev.irq_count; i++) {
+		ctx = &dev->intr_ctx[i];
+		ctx->dev = dev;
+		ctx->irq = -1;
+	}
+
+	dev->pdev.ops = &uml_vfio_um_pci_ops;
+
+	err = um_pci_device_register(&dev->pdev);
+	if (err) {
+		pr_err("Failed to register UM PCI device (%s), error %d\n",
+		       dev->name, err);
+		goto free_intr_ctx;
+	}
+
+	return;
+
+free_intr_ctx:
+	kfree(dev->intr_ctx);
+free_msix:
+	kfree(dev->msix_data);
+teardown_udev:
+	uml_vfio_user_teardown_device(&dev->udev);
+release_group:
+	uml_vfio_release_group(dev->group);
+free_dev:
+	list_del(&dev->list);
+	kfree(dev->name);
+	kfree(dev);
+}
+
+static void uml_vfio_release_device(struct uml_vfio_device *dev)
+{
+	int i;
+
+	for (i = 0; i < dev->udev.irq_count; i++)
+		uml_vfio_deactivate_irq(dev, i);
+	uml_vfio_user_update_irqs(&dev->udev);
+
+	um_pci_device_unregister(&dev->pdev);
+	kfree(dev->intr_ctx);
+	kfree(dev->msix_data);
+	uml_vfio_user_teardown_device(&dev->udev);
+	uml_vfio_release_group(dev->group);
+	list_del(&dev->list);
+	kfree(dev->name);
+	kfree(dev);
+}
+
+static struct uml_vfio_device *uml_vfio_find_device(const char *device)
+{
+	struct uml_vfio_device *dev;
+
+	list_for_each_entry(dev, &uml_vfio_devices, list) {
+		if (!strcmp(dev->name, device))
+			return dev;
+	}
+	return NULL;
+}
+
+static struct uml_vfio_device *uml_vfio_add_device(const char *device)
+{
+	struct uml_vfio_device *dev;
+	int fd;
+
+	guard(mutex)(&uml_vfio_devices_mtx);
+
+	if (uml_vfio_container.fd < 0) {
+		fd = uml_vfio_user_open_container();
+		if (fd < 0)
+			return ERR_PTR(fd);
+		uml_vfio_container.fd = fd;
+	}
+
+	if (uml_vfio_find_device(device))
+		return ERR_PTR(-EEXIST);
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+
+	dev->name = kstrdup(device, GFP_KERNEL);
+	if (!dev->name) {
+		kfree(dev);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	list_add_tail(&dev->list, &uml_vfio_devices);
+	return dev;
+}
+
+static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp)
+{
+	struct uml_vfio_device *dev;
+
+	dev = uml_vfio_add_device(device);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+	return 0;
+}
+
+static int uml_vfio_cmdline_get(char *buffer, const struct kernel_param *kp)
+{
+	return 0;
+}
+
+static const struct kernel_param_ops uml_vfio_cmdline_param_ops = {
+	.set = uml_vfio_cmdline_set,
+	.get = uml_vfio_cmdline_get,
+};
+
+device_param_cb(device, &uml_vfio_cmdline_param_ops, NULL, 0400);
+__uml_help(uml_vfio_cmdline_param_ops,
+"vfio_uml.device=<domain:bus:slot.function>\n"
+"    Pass through a PCI device to UML via VFIO. Currently, only MSI-X\n"
+"    capable devices are supported, and it is assumed that drivers will\n"
+"    use MSI-X. This parameter can be specified multiple times to pass\n"
+"    through multiple PCI devices to UML.\n\n"
+);
+
+static int uml_vfio_mc_config(char *str, char **error_out)
+{
+	struct uml_vfio_device *dev;
+
+	if (*str != '=') {
+		*error_out = "Invalid config";
+		return -EINVAL;
+	}
+	str += 1;
+
+	dev = uml_vfio_add_device(str);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+	uml_vfio_open_device(dev);
+	return 0;
+}
+
+static int uml_vfio_mc_id(char **str, int *start_out, int *end_out)
+{
+	return -EOPNOTSUPP;
+}
+
+static int uml_vfio_mc_remove(int n, char **error_out)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct mc_device uml_vfio_mc = {
+	.list           = LIST_HEAD_INIT(uml_vfio_mc.list),
+	.name           = "vfio_uml.device",
+	.config         = uml_vfio_mc_config,
+	.get_config     = NULL,
+	.id             = uml_vfio_mc_id,
+	.remove         = uml_vfio_mc_remove,
+};
+
+static int __init uml_vfio_init(void)
+{
+	struct uml_vfio_device *dev, *n;
+
+	sigio_broken();
+
+	/* If the opening fails, the device will be released. */
+	list_for_each_entry_safe(dev, n, &uml_vfio_devices, list)
+		uml_vfio_open_device(dev);
+
+	mconsole_register_dev(&uml_vfio_mc);
+
+	return 0;
+}
+late_initcall(uml_vfio_init);
+
+static void __exit uml_vfio_exit(void)
+{
+	struct uml_vfio_device *dev, *n;
+
+	list_for_each_entry_safe(dev, n, &uml_vfio_devices, list)
+		uml_vfio_release_device(dev);
+
+	if (uml_vfio_container.fd >= 0)
+		os_close_file(uml_vfio_container.fd);
+}
+module_exit(uml_vfio_exit);
diff --git a/arch/um/drivers/vfio_user.c b/arch/um/drivers/vfio_user.c
new file mode 100644
index 000000000000..6a45d8e14582
--- /dev/null
+++ b/arch/um/drivers/vfio_user.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include <linux/limits.h>
+#include <linux/vfio.h>
+#include <linux/pci_regs.h>
+#include <as-layout.h>
+#include <um_malloc.h>
+
+#include "vfio_user.h"
+
+int uml_vfio_user_open_container(void)
+{
+	int r, fd;
+
+	fd = open("/dev/vfio/vfio", O_RDWR);
+	if (fd < 0)
+		return -errno;
+
+	r = ioctl(fd, VFIO_GET_API_VERSION);
+	if (r != VFIO_API_VERSION) {
+		r = r < 0 ? -errno : -EINVAL;
+		goto error;
+	}
+
+	r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
+	if (r <= 0) {
+		r = r < 0 ? -errno : -EINVAL;
+		goto error;
+	}
+
+	return fd;
+
+error:
+	close(fd);
+	return r;
+}
+
+int uml_vfio_user_setup_iommu(int container)
+{
+	/*
+	 * This is a bit tricky. See the big comment in
+	 * vhost_user_set_mem_table() in virtio_uml.c.
+	 */
+	unsigned long reserved = uml_reserved - uml_physmem;
+	struct vfio_iommu_type1_dma_map dma_map = {
+		.argsz = sizeof(dma_map),
+		.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+		.vaddr = uml_reserved,
+		.iova = reserved,
+		.size = physmem_size - reserved,
+	};
+
+	if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0)
+		return -errno;
+
+	if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0)
+		return -errno;
+
+	return 0;
+}
+
+int uml_vfio_user_get_group_id(const char *device)
+{
+	char *path, *buf, *end;
+	const char *name;
+	int r;
+
+	path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device);
+
+	buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL);
+	if (!buf) {
+		r = -ENOMEM;
+		goto free_path;
+	}
+
+	r = readlink(path, buf, PATH_MAX);
+	if (r < 0) {
+		r = -errno;
+		goto free_buf;
+	}
+	buf[r] = '\0';
+
+	name = basename(buf);
+
+	r = strtoul(name, &end, 10);
+	if (*end != '\0' || end == name) {
+		r = -EINVAL;
+		goto free_buf;
+	}
+
+free_buf:
+	kfree(buf);
+free_path:
+	kfree(path);
+	return r;
+}
+
+int uml_vfio_user_open_group(int group_id)
+{
+	char *path;
+	int fd;
+
+	path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	sprintf(path, "/dev/vfio/%d", group_id);
+
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		fd = -errno;
+		goto out;
+	}
+
+out:
+	kfree(path);
+	return fd;
+}
+
+int uml_vfio_user_set_container(int container, int group)
+{
+	if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0)
+		return -errno;
+	return 0;
+}
+
+int uml_vfio_user_unset_container(int container, int group)
+{
+	if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0)
+		return -errno;
+	return 0;
+}
+
+static int vfio_set_irqs(int device, int start, int count, int *irqfd)
+{
+	struct vfio_irq_set *irq_set;
+	int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count;
+	int err = 0;
+
+	irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL);
+	if (!irq_set)
+		return -ENOMEM;
+
+	irq_set->argsz = argsz;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = start;
+	irq_set->count = count;
+	memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count);
+
+	if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) {
+		err = -errno;
+		goto out;
+	}
+
+out:
+	kfree(irq_set);
+	return err;
+}
+
+int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
+			       int group, const char *device)
+{
+	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+	struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
+	int err, i;
+
+	dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device);
+	if (dev->device < 0)
+		return -errno;
+
+	if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) {
+		err = -errno;
+		goto close_device;
+	}
+
+	dev->num_regions = device_info.num_regions;
+	if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1)
+		dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1;
+
+	dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions,
+				  UM_GFP_KERNEL);
+	if (!dev->region) {
+		err = -ENOMEM;
+		goto close_device;
+	}
+
+	for (i = 0; i < dev->num_regions; i++) {
+		struct vfio_region_info region = {
+			.argsz = sizeof(region),
+			.index = i,
+		};
+		if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, &region) < 0) {
+			err = -errno;
+			goto free_region;
+		}
+		dev->region[i].size = region.size;
+		dev->region[i].offset = region.offset;
+	}
+
+	/* Only MSI-X is supported currently. */
+	irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX;
+	if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) {
+		err = -errno;
+		goto free_region;
+	}
+
+	dev->irq_count = irq_info.count;
+
+	dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL);
+	if (!dev->irqfd) {
+		err = -ENOMEM;
+		goto free_region;
+	}
+
+	memset(dev->irqfd, -1, sizeof(int) * dev->irq_count);
+
+	err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
+	if (err)
+		goto free_irqfd;
+
+	return 0;
+
+free_irqfd:
+	kfree(dev->irqfd);
+free_region:
+	kfree(dev->region);
+close_device:
+	close(dev->device);
+	return err;
+}
+
+void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev)
+{
+	kfree(dev->irqfd);
+	kfree(dev->region);
+	close(dev->device);
+}
+
+int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index)
+{
+	int irqfd;
+
+	irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+	if (irqfd < 0)
+		return -errno;
+
+	dev->irqfd[index] = irqfd;
+	return irqfd;
+}
+
+void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index)
+{
+	close(dev->irqfd[index]);
+	dev->irqfd[index] = -1;
+}
+
+int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev)
+{
+	return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
+}
+
+static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index,
+			    uint64_t offset, void *buf, uint64_t size)
+{
+	if (index >= dev->num_regions || offset + size > dev->region[index].size)
+		return -EINVAL;
+
+	if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0)
+		return -errno;
+
+	return 0;
+}
+
+static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index,
+			     uint64_t offset, const void *buf, uint64_t size)
+{
+	if (index >= dev->num_regions || offset + size > dev->region[index].size)
+		return -EINVAL;
+
+	if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0)
+		return -errno;
+
+	return 0;
+}
+
+int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
+				unsigned int offset, void *buf, int size)
+{
+	return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+				offset, buf, size);
+}
+
+int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
+				 unsigned int offset, const void *buf, int size)
+{
+	return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX,
+				 offset, buf, size);
+}
+
+int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
+			   unsigned int offset, void *buf, int size)
+{
+	return vfio_region_read(dev, bar, offset, buf, size);
+}
+
+int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
+			    unsigned int offset, const void *buf, int size)
+{
+	return vfio_region_write(dev, bar, offset, buf, size);
+}
diff --git a/arch/um/drivers/vfio_user.h b/arch/um/drivers/vfio_user.h
new file mode 100644
index 000000000000..75535e05059b
--- /dev/null
+++ b/arch/um/drivers/vfio_user.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_VFIO_USER_H
+#define __UM_VFIO_USER_H
+
+struct uml_vfio_user_device {
+	int device;
+
+	struct {
+		uint64_t size;
+		uint64_t offset;
+	} *region;
+	int num_regions;
+
+	int32_t *irqfd;
+	int irq_count;
+};
+
+int uml_vfio_user_open_container(void);
+int uml_vfio_user_setup_iommu(int container);
+
+int uml_vfio_user_get_group_id(const char *device);
+int uml_vfio_user_open_group(int group_id);
+int uml_vfio_user_set_container(int container, int group);
+int uml_vfio_user_unset_container(int container, int group);
+
+int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
+			       int group, const char *device);
+void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev);
+
+int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index);
+void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index);
+int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev);
+
+int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
+				unsigned int offset, void *buf, int size);
+int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
+				 unsigned int offset, const void *buf, int size);
+
+int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
+			   unsigned int offset, void *buf, int size);
+int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
+			    unsigned int offset, const void *buf, int size);
+
+#endif /* __UM_VFIO_USER_H */
diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
new file mode 100644
index 000000000000..fcfa3b7e021b
--- /dev/null
+++ b/arch/um/drivers/vhost_user.h
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Vhost-user protocol */
+
+#ifndef __VHOST_USER_H__
+#define __VHOST_USER_H__
+
+/* Message flags */
+#define VHOST_USER_FLAG_REPLY		BIT(2)
+#define VHOST_USER_FLAG_NEED_REPLY	BIT(3)
+/* Feature bits */
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+/* Protocol feature bits */
+#define VHOST_USER_PROTOCOL_F_MQ			0
+#define VHOST_USER_PROTOCOL_F_REPLY_ACK			3
+#define VHOST_USER_PROTOCOL_F_SLAVE_REQ			5
+#define VHOST_USER_PROTOCOL_F_CONFIG			9
+#define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS	14
+/* Vring state index masks */
+#define VHOST_USER_VRING_INDEX_MASK	0xff
+#define VHOST_USER_VRING_POLL_MASK	BIT(8)
+
+/* Supported version */
+#define VHOST_USER_VERSION		1
+/* Supported transport features */
+#define VHOST_USER_SUPPORTED_F		BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
+/* Supported protocol features */
+#define VHOST_USER_SUPPORTED_PROTOCOL_F	(BIT_ULL(VHOST_USER_PROTOCOL_F_MQ) | \
+					 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+					 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+					 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \
+					 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS))
+
+enum vhost_user_request {
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SEND_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_SET_VRING_ENDIAN = 23,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
+	VHOST_USER_VRING_KICK = 35,
+};
+
+enum vhost_user_slave_request {
+	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+	VHOST_USER_SLAVE_VRING_CALL = 4,
+};
+
+struct vhost_user_header {
+	/*
+	 * Use enum vhost_user_request for outgoing messages,
+	 * uses enum vhost_user_slave_request for incoming ones.
+	 */
+	u32 request;
+	u32 flags;
+	u32 size;
+} __packed;
+
+struct vhost_user_config {
+	u32 offset;
+	u32 size;
+	u32 flags;
+	u8 payload[]; /* Variable length */
+} __packed;
+
+struct vhost_user_vring_state {
+	u32 index;
+	u32 num;
+} __packed;
+
+struct vhost_user_vring_addr {
+	u32 index;
+	u32 flags;
+	u64 desc, used, avail, log;
+} __packed;
+
+struct vhost_user_mem_region {
+	u64 guest_addr;
+	u64 size;
+	u64 user_addr;
+	u64 mmap_offset;
+} __packed;
+
+struct vhost_user_mem_regions {
+	u32 num;
+	u32 padding;
+	struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */
+} __packed;
+
+union vhost_user_payload {
+	u64 integer;
+	struct vhost_user_config config;
+	struct vhost_user_vring_state vring_state;
+	struct vhost_user_vring_addr vring_addr;
+	struct vhost_user_mem_regions mem_regions;
+};
+
+struct vhost_user_msg {
+	struct vhost_user_header header;
+	union vhost_user_payload payload;
+} __packed;
+
+#endif
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
new file mode 100644
index 000000000000..557d93aea00a
--- /dev/null
+++ b/arch/um/drivers/virt-pci.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/logic_iomem.h>
+#include <linux/of_platform.h>
+#include <linux/irqchip/irq-msi-lib.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+#include <linux/unaligned.h>
+#include <irq_kern.h>
+
+#include "virt-pci.h"
+
+#define MAX_DEVICES 8
+#define MAX_MSI_VECTORS 32
+#define CFG_SPACE_SIZE 4096
+
+struct um_pci_device_reg {
+	struct um_pci_device *dev;
+	void __iomem *iomem;
+};
+
+static struct pci_host_bridge *bridge;
+static DEFINE_MUTEX(um_pci_mtx);
+static struct um_pci_device *um_pci_platform_device;
+static struct um_pci_device_reg um_pci_devices[MAX_DEVICES];
+static struct fwnode_handle *um_pci_fwnode;
+static struct irq_domain *um_pci_inner_domain;
+static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
+
+static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
+					  int size)
+{
+	struct um_pci_device_reg *reg = priv;
+	struct um_pci_device *dev = reg->dev;
+
+	if (!dev)
+		return ULONG_MAX;
+
+	switch (size) {
+	case 1:
+	case 2:
+	case 4:
+#ifdef CONFIG_64BIT
+	case 8:
+#endif
+		break;
+	default:
+		WARN(1, "invalid config space read size %d\n", size);
+		return ULONG_MAX;
+	}
+
+	return dev->ops->cfgspace_read(dev, offset, size);
+}
+
+static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
+				  unsigned long val)
+{
+	struct um_pci_device_reg *reg = priv;
+	struct um_pci_device *dev = reg->dev;
+
+	if (!dev)
+		return;
+
+	switch (size) {
+	case 1:
+	case 2:
+	case 4:
+#ifdef CONFIG_64BIT
+	case 8:
+#endif
+		break;
+	default:
+		WARN(1, "invalid config space write size %d\n", size);
+		return;
+	}
+
+	dev->ops->cfgspace_write(dev, offset, size, val);
+}
+
+static const struct logic_iomem_ops um_pci_device_cfgspace_ops = {
+	.read = um_pci_cfgspace_read,
+	.write = um_pci_cfgspace_write,
+};
+
+static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
+				     int size)
+{
+	u8 *resptr = priv;
+	struct um_pci_device *dev = container_of(resptr - *resptr,
+						 struct um_pci_device,
+						 resptr[0]);
+	u8 bar = *resptr;
+
+	switch (size) {
+	case 1:
+	case 2:
+	case 4:
+#ifdef CONFIG_64BIT
+	case 8:
+#endif
+		break;
+	default:
+		WARN(1, "invalid bar read size %d\n", size);
+		return ULONG_MAX;
+	}
+
+	return dev->ops->bar_read(dev, bar, offset, size);
+}
+
+static void um_pci_bar_write(void *priv, unsigned int offset, int size,
+			     unsigned long val)
+{
+	u8 *resptr = priv;
+	struct um_pci_device *dev = container_of(resptr - *resptr,
+						 struct um_pci_device,
+						 resptr[0]);
+	u8 bar = *resptr;
+
+	switch (size) {
+	case 1:
+	case 2:
+	case 4:
+#ifdef CONFIG_64BIT
+	case 8:
+#endif
+		break;
+	default:
+		WARN(1, "invalid bar write size %d\n", size);
+		return;
+	}
+
+	dev->ops->bar_write(dev, bar, offset, size, val);
+}
+
+static void um_pci_bar_copy_from(void *priv, void *buffer,
+				 unsigned int offset, int size)
+{
+	u8 *resptr = priv;
+	struct um_pci_device *dev = container_of(resptr - *resptr,
+						 struct um_pci_device,
+						 resptr[0]);
+	u8 bar = *resptr;
+
+	dev->ops->bar_copy_from(dev, bar, buffer, offset, size);
+}
+
+static void um_pci_bar_copy_to(void *priv, unsigned int offset,
+			       const void *buffer, int size)
+{
+	u8 *resptr = priv;
+	struct um_pci_device *dev = container_of(resptr - *resptr,
+						 struct um_pci_device,
+						 resptr[0]);
+	u8 bar = *resptr;
+
+	dev->ops->bar_copy_to(dev, bar, offset, buffer, size);
+}
+
+static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size)
+{
+	u8 *resptr = priv;
+	struct um_pci_device *dev = container_of(resptr - *resptr,
+						 struct um_pci_device,
+						 resptr[0]);
+	u8 bar = *resptr;
+
+	dev->ops->bar_set(dev, bar, offset, value, size);
+}
+
+static const struct logic_iomem_ops um_pci_device_bar_ops = {
+	.read = um_pci_bar_read,
+	.write = um_pci_bar_write,
+	.set = um_pci_bar_set,
+	.copy_from = um_pci_bar_copy_from,
+	.copy_to = um_pci_bar_copy_to,
+};
+
+static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
+				    int where)
+{
+	struct um_pci_device_reg *dev;
+	unsigned int busn = bus->number;
+
+	if (busn > 0)
+		return NULL;
+
+	/* not allowing functions for now ... */
+	if (devfn % 8)
+		return NULL;
+
+	if (devfn / 8 >= ARRAY_SIZE(um_pci_devices))
+		return NULL;
+
+	dev = &um_pci_devices[devfn / 8];
+	if (!dev)
+		return NULL;
+
+	return (void __iomem *)((unsigned long)dev->iomem + where);
+}
+
+static struct pci_ops um_pci_ops = {
+	.map_bus = um_pci_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static void um_pci_rescan(void)
+{
+	pci_lock_rescan_remove();
+	pci_rescan_bus(bridge->bus);
+	pci_unlock_rescan_remove();
+}
+
+#ifdef CONFIG_OF
+/* Copied from arch/x86/kernel/devicetree.c */
+struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
+{
+	struct device_node *np;
+
+	for_each_node_by_type(np, "pci") {
+		const void *prop;
+		unsigned int bus_min;
+
+		prop = of_get_property(np, "bus-range", NULL);
+		if (!prop)
+			continue;
+		bus_min = be32_to_cpup(prop);
+		if (bus->number == bus_min)
+			return np;
+	}
+	return NULL;
+}
+#endif
+
+static struct resource virt_cfgspace_resource = {
+	.name = "PCI config space",
+	.start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE,
+	.end = 0xf0000000 - 1,
+	.flags = IORESOURCE_MEM,
+};
+
+static long um_pci_map_cfgspace(unsigned long offset, size_t size,
+				const struct logic_iomem_ops **ops,
+				void **priv)
+{
+	if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE))
+		return -EINVAL;
+
+	if (offset / CFG_SPACE_SIZE < MAX_DEVICES) {
+		*ops = &um_pci_device_cfgspace_ops;
+		*priv = &um_pci_devices[offset / CFG_SPACE_SIZE];
+		return 0;
+	}
+
+	WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size);
+	return -ENOENT;
+}
+
+static const struct logic_iomem_region_ops um_pci_cfgspace_ops = {
+	.map = um_pci_map_cfgspace,
+};
+
+static struct resource virt_iomem_resource = {
+	.name = "PCI iomem",
+	.start = 0xf0000000,
+	.end = 0xffffffff,
+	.flags = IORESOURCE_MEM,
+};
+
+struct um_pci_map_iomem_data {
+	unsigned long offset;
+	size_t size;
+	const struct logic_iomem_ops **ops;
+	void **priv;
+	long ret;
+};
+
+static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data)
+{
+	struct um_pci_map_iomem_data *data = _data;
+	struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
+	struct um_pci_device *dev;
+	int i;
+
+	if (!reg->dev)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) {
+		struct resource *r = &pdev->resource[i];
+
+		if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM)
+			continue;
+
+		/*
+		 * must be the whole or part of the resource,
+		 * not allowed to only overlap
+		 */
+		if (data->offset < r->start || data->offset > r->end)
+			continue;
+		if (data->offset + data->size - 1 > r->end)
+			continue;
+
+		dev = reg->dev;
+		*data->ops = &um_pci_device_bar_ops;
+		dev->resptr[i] = i;
+		*data->priv = &dev->resptr[i];
+		data->ret = data->offset - r->start;
+
+		/* no need to continue */
+		return 1;
+	}
+
+	return 0;
+}
+
+static long um_pci_map_iomem(unsigned long offset, size_t size,
+			     const struct logic_iomem_ops **ops,
+			     void **priv)
+{
+	struct um_pci_map_iomem_data data = {
+		/* we want the full address here */
+		.offset = offset + virt_iomem_resource.start,
+		.size = size,
+		.ops = ops,
+		.priv = priv,
+		.ret = -ENOENT,
+	};
+
+	pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data);
+	return data.ret;
+}
+
+static const struct logic_iomem_region_ops um_pci_iomem_ops = {
+	.map = um_pci_map_iomem,
+};
+
+static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	/*
+	 * This is a very low address and not actually valid 'physical' memory
+	 * in UML, so we can simply map MSI(-X) vectors to there, it cannot be
+	 * legitimately written to by the device in any other way.
+	 * We use the (virtual) IRQ number here as the message to simplify the
+	 * code that receives the message, where for now we simply trust the
+	 * device to send the correct message.
+	 */
+	msg->address_hi = 0;
+	msg->address_lo = 0xa0000;
+	msg->data = data->irq;
+}
+
+static struct irq_chip um_pci_msi_bottom_irq_chip = {
+	.name = "UM virtual MSI",
+	.irq_compose_msi_msg = um_pci_compose_msi_msg,
+};
+
+static int um_pci_inner_domain_alloc(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs,
+				     void *args)
+{
+	unsigned long bit;
+
+	WARN_ON(nr_irqs != 1);
+
+	mutex_lock(&um_pci_mtx);
+	bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS);
+	if (bit >= MAX_MSI_VECTORS) {
+		mutex_unlock(&um_pci_mtx);
+		return -ENOSPC;
+	}
+
+	set_bit(bit, um_pci_msi_used);
+	mutex_unlock(&um_pci_mtx);
+
+	irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip,
+			    domain->host_data, handle_simple_irq,
+			    NULL, NULL);
+
+	return 0;
+}
+
+static void um_pci_inner_domain_free(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+
+	mutex_lock(&um_pci_mtx);
+
+	if (!test_bit(d->hwirq, um_pci_msi_used))
+		pr_err("trying to free unused MSI#%lu\n", d->hwirq);
+	else
+		__clear_bit(d->hwirq, um_pci_msi_used);
+
+	mutex_unlock(&um_pci_mtx);
+}
+
+static const struct irq_domain_ops um_pci_inner_domain_ops = {
+	.select = msi_lib_irq_domain_select,
+	.alloc = um_pci_inner_domain_alloc,
+	.free = um_pci_inner_domain_free,
+};
+
+#define UM_PCI_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS		| \
+				   MSI_FLAG_USE_DEF_CHIP_OPS		| \
+				   MSI_FLAG_NO_AFFINITY)
+#define UM_PCI_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK		| \
+				    MSI_FLAG_PCI_MSIX)
+
+static const struct msi_parent_ops um_pci_msi_parent_ops = {
+	.required_flags		= UM_PCI_MSI_FLAGS_REQUIRED,
+	.supported_flags	= UM_PCI_MSI_FLAGS_SUPPORTED,
+	.bus_select_token	= DOMAIN_BUS_NEXUS,
+	.bus_select_mask	= MATCH_PCI_MSI,
+	.prefix			= "UM-virtual-",
+	.init_dev_msi_info	= msi_lib_init_dev_msi_info,
+};
+
+static struct resource busn_resource = {
+	.name	= "PCI busn",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUS,
+};
+
+static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
+
+	if (WARN_ON(!reg->dev))
+		return -EINVAL;
+
+	/* Yes, we map all pins to the same IRQ ... doesn't matter for now. */
+	return reg->dev->irq;
+}
+
+void *pci_root_bus_fwnode(struct pci_bus *bus)
+{
+	return um_pci_fwnode;
+}
+
+static long um_pci_map_platform(unsigned long offset, size_t size,
+				const struct logic_iomem_ops **ops,
+				void **priv)
+{
+	if (!um_pci_platform_device)
+		return -ENOENT;
+
+	*ops = &um_pci_device_bar_ops;
+	*priv = &um_pci_platform_device->resptr[0];
+
+	return offset;
+}
+
+static const struct logic_iomem_region_ops um_pci_platform_ops = {
+	.map = um_pci_map_platform,
+};
+
+static struct resource virt_platform_resource = {
+	.name = "platform",
+	.start = 0x10000000,
+	.end = 0x1fffffff,
+	.flags = IORESOURCE_MEM,
+};
+
+int um_pci_device_register(struct um_pci_device *dev)
+{
+	int i, free = -1;
+	int err = 0;
+
+	mutex_lock(&um_pci_mtx);
+	for (i = 0; i < MAX_DEVICES; i++) {
+		if (um_pci_devices[i].dev)
+			continue;
+		free = i;
+		break;
+	}
+
+	if (free < 0) {
+		err = -ENOSPC;
+		goto out;
+	}
+
+	dev->irq = irq_alloc_desc(numa_node_id());
+	if (dev->irq < 0) {
+		err = dev->irq;
+		goto out;
+	}
+
+	um_pci_devices[free].dev = dev;
+
+out:
+	mutex_unlock(&um_pci_mtx);
+	if (!err)
+		um_pci_rescan();
+	return err;
+}
+
+void um_pci_device_unregister(struct um_pci_device *dev)
+{
+	int i;
+
+	mutex_lock(&um_pci_mtx);
+	for (i = 0; i < MAX_DEVICES; i++) {
+		if (um_pci_devices[i].dev != dev)
+			continue;
+		um_pci_devices[i].dev = NULL;
+		irq_free_desc(dev->irq);
+		break;
+	}
+	mutex_unlock(&um_pci_mtx);
+
+	if (i < MAX_DEVICES) {
+		struct pci_dev *pci_dev;
+
+		pci_dev = pci_get_slot(bridge->bus, i);
+		if (pci_dev)
+			pci_stop_and_remove_bus_device_locked(pci_dev);
+	}
+}
+
+int um_pci_platform_device_register(struct um_pci_device *dev)
+{
+	guard(mutex)(&um_pci_mtx);
+	if (um_pci_platform_device)
+		return -EBUSY;
+	um_pci_platform_device = dev;
+	return 0;
+}
+
+void um_pci_platform_device_unregister(struct um_pci_device *dev)
+{
+	guard(mutex)(&um_pci_mtx);
+	if (um_pci_platform_device == dev)
+		um_pci_platform_device = NULL;
+}
+
+static int __init um_pci_init(void)
+{
+	int err, i;
+
+	WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
+				       &um_pci_cfgspace_ops));
+	WARN_ON(logic_iomem_add_region(&virt_iomem_resource,
+				       &um_pci_iomem_ops));
+	WARN_ON(logic_iomem_add_region(&virt_platform_resource,
+				       &um_pci_platform_ops));
+
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
+	if (!um_pci_fwnode) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	struct irq_domain_info info = {
+		.fwnode		= um_pci_fwnode,
+		.ops		= &um_pci_inner_domain_ops,
+		.size		= MAX_MSI_VECTORS,
+	};
+
+	um_pci_inner_domain = msi_create_parent_irq_domain(&info, &um_pci_msi_parent_ops);
+	if (!um_pci_inner_domain) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	pci_add_resource(&bridge->windows, &virt_iomem_resource);
+	pci_add_resource(&bridge->windows, &busn_resource);
+	bridge->ops = &um_pci_ops;
+	bridge->map_irq = um_pci_map_irq;
+
+	for (i = 0; i < MAX_DEVICES; i++) {
+		resource_size_t start;
+
+		start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE;
+		um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE);
+		if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) {
+			err = -ENOMEM;
+			goto free;
+		}
+	}
+
+	err = pci_host_probe(bridge);
+	if (err)
+		goto free;
+
+	return 0;
+
+free:
+	if (um_pci_inner_domain)
+		irq_domain_remove(um_pci_inner_domain);
+	if (um_pci_fwnode)
+		irq_domain_free_fwnode(um_pci_fwnode);
+	if (bridge) {
+		pci_free_resource_list(&bridge->windows);
+		pci_free_host_bridge(bridge);
+	}
+	return err;
+}
+device_initcall(um_pci_init);
+
+static void __exit um_pci_exit(void)
+{
+	irq_domain_remove(um_pci_inner_domain);
+	pci_free_resource_list(&bridge->windows);
+	pci_free_host_bridge(bridge);
+}
+module_exit(um_pci_exit);
diff --git a/arch/um/drivers/virt-pci.h b/arch/um/drivers/virt-pci.h
new file mode 100644
index 000000000000..b20d1475d1eb
--- /dev/null
+++ b/arch/um/drivers/virt-pci.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_VIRT_PCI_H
+#define __UM_VIRT_PCI_H
+
+#include <linux/pci.h>
+
+struct um_pci_device {
+	const struct um_pci_ops *ops;
+
+	/* for now just standard BARs */
+	u8 resptr[PCI_STD_NUM_BARS];
+
+	int irq;
+};
+
+struct um_pci_ops {
+	unsigned long (*cfgspace_read)(struct um_pci_device *dev,
+				       unsigned int offset, int size);
+	void (*cfgspace_write)(struct um_pci_device *dev, unsigned int offset,
+			       int size, unsigned long val);
+
+	unsigned long (*bar_read)(struct um_pci_device *dev, int bar,
+				  unsigned int offset, int size);
+	void (*bar_write)(struct um_pci_device *dev, int bar,
+			  unsigned int offset, int size, unsigned long val);
+
+	void (*bar_copy_from)(struct um_pci_device *dev, int bar, void *buffer,
+			      unsigned int offset, int size);
+	void (*bar_copy_to)(struct um_pci_device *dev, int bar,
+			    unsigned int offset, const void *buffer, int size);
+	void (*bar_set)(struct um_pci_device *dev, int bar,
+			unsigned int offset, u8 value, int size);
+};
+
+int um_pci_device_register(struct um_pci_device *dev);
+void um_pci_device_unregister(struct um_pci_device *dev);
+
+int um_pci_platform_device_register(struct um_pci_device *dev);
+void um_pci_platform_device_unregister(struct um_pci_device *dev);
+
+#endif /* __UM_VIRT_PCI_H */
diff --git a/arch/um/drivers/virtio_pcidev.c b/arch/um/drivers/virtio_pcidev.c
new file mode 100644
index 000000000000..f9b4b6f7582c
--- /dev/null
+++ b/arch/um/drivers/virtio_pcidev.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/logic_iomem.h>
+#include <linux/of_platform.h>
+#include <linux/irqdomain.h>
+#include <linux/virtio_pcidev.h>
+#include <linux/virtio-uml.h>
+#include <linux/delay.h>
+#include <linux/msi.h>
+#include <linux/unaligned.h>
+#include <irq_kern.h>
+
+#include "virt-pci.h"
+
+#define to_virtio_pcidev(_pdev) \
+	container_of(_pdev, struct virtio_pcidev_device, pdev)
+
+/* for MSI-X we have a 32-bit payload */
+#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
+#define NUM_IRQ_MSGS	10
+
+struct virtio_pcidev_message_buffer {
+	struct virtio_pcidev_msg hdr;
+	u8 data[8];
+};
+
+struct virtio_pcidev_device {
+	struct um_pci_device pdev;
+	struct virtio_device *vdev;
+
+	struct virtqueue *cmd_vq, *irq_vq;
+
+#define VIRTIO_PCIDEV_WRITE_BUFS	20
+	struct virtio_pcidev_message_buffer bufs[VIRTIO_PCIDEV_WRITE_BUFS + 1];
+	void *extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS + 1];
+	DECLARE_BITMAP(used_bufs, VIRTIO_PCIDEV_WRITE_BUFS);
+
+#define VIRTIO_PCIDEV_STAT_WAITING	0
+	unsigned long status;
+
+	bool platform;
+};
+
+static unsigned int virtio_pcidev_max_delay_us = 40000;
+module_param_named(max_delay_us, virtio_pcidev_max_delay_us, uint, 0644);
+
+static int virtio_pcidev_get_buf(struct virtio_pcidev_device *dev, bool *posted)
+{
+	int i;
+
+	for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) {
+		if (!test_and_set_bit(i, dev->used_bufs))
+			return i;
+	}
+
+	*posted = false;
+	return VIRTIO_PCIDEV_WRITE_BUFS;
+}
+
+static void virtio_pcidev_free_buf(struct virtio_pcidev_device *dev, void *buf)
+{
+	int i;
+
+	if (buf == &dev->bufs[VIRTIO_PCIDEV_WRITE_BUFS]) {
+		kfree(dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS]);
+		dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS] = NULL;
+		return;
+	}
+
+	for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) {
+		if (buf == &dev->bufs[i]) {
+			kfree(dev->extra_ptrs[i]);
+			dev->extra_ptrs[i] = NULL;
+			WARN_ON(!test_and_clear_bit(i, dev->used_bufs));
+			return;
+		}
+	}
+
+	WARN_ON(1);
+}
+
+static int virtio_pcidev_send_cmd(struct virtio_pcidev_device *dev,
+				  struct virtio_pcidev_msg *cmd,
+				  unsigned int cmd_size,
+				  const void *extra, unsigned int extra_size,
+				  void *out, unsigned int out_size)
+{
+	struct scatterlist out_sg, extra_sg, in_sg;
+	struct scatterlist *sgs_list[] = {
+		[0] = &out_sg,
+		[1] = extra ? &extra_sg : &in_sg,
+		[2] = extra ? &in_sg : NULL,
+	};
+	struct virtio_pcidev_message_buffer *buf;
+	int delay_count = 0;
+	bool bounce_out;
+	int ret, len;
+	int buf_idx;
+	bool posted;
+
+	if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
+		return -EINVAL;
+
+	switch (cmd->op) {
+	case VIRTIO_PCIDEV_OP_CFG_WRITE:
+	case VIRTIO_PCIDEV_OP_MMIO_WRITE:
+	case VIRTIO_PCIDEV_OP_MMIO_MEMSET:
+		/* in PCI, writes are posted, so don't wait */
+		posted = !out;
+		WARN_ON(!posted);
+		break;
+	default:
+		posted = false;
+		break;
+	}
+
+	bounce_out = !posted && cmd_size <= sizeof(*cmd) &&
+		     out && out_size <= sizeof(buf->data);
+
+	buf_idx = virtio_pcidev_get_buf(dev, &posted);
+	buf = &dev->bufs[buf_idx];
+	memcpy(buf, cmd, cmd_size);
+
+	if (posted && extra && extra_size > sizeof(buf) - cmd_size) {
+		dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size,
+						   GFP_ATOMIC);
+
+		if (!dev->extra_ptrs[buf_idx]) {
+			virtio_pcidev_free_buf(dev, buf);
+			return -ENOMEM;
+		}
+		extra = dev->extra_ptrs[buf_idx];
+	} else if (extra && extra_size <= sizeof(buf) - cmd_size) {
+		memcpy((u8 *)buf + cmd_size, extra, extra_size);
+		cmd_size += extra_size;
+		extra_size = 0;
+		extra = NULL;
+		cmd = (void *)buf;
+	} else {
+		cmd = (void *)buf;
+	}
+
+	sg_init_one(&out_sg, cmd, cmd_size);
+	if (extra)
+		sg_init_one(&extra_sg, extra, extra_size);
+	/* allow stack for small buffers */
+	if (bounce_out)
+		sg_init_one(&in_sg, buf->data, out_size);
+	else if (out)
+		sg_init_one(&in_sg, out, out_size);
+
+	/* add to internal virtio queue */
+	ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
+				extra ? 2 : 1,
+				out ? 1 : 0,
+				cmd, GFP_ATOMIC);
+	if (ret) {
+		virtio_pcidev_free_buf(dev, buf);
+		return ret;
+	}
+
+	if (posted) {
+		virtqueue_kick(dev->cmd_vq);
+		return 0;
+	}
+
+	/* kick and poll for getting a response on the queue */
+	set_bit(VIRTIO_PCIDEV_STAT_WAITING, &dev->status);
+	virtqueue_kick(dev->cmd_vq);
+	ret = 0;
+
+	while (1) {
+		void *completed = virtqueue_get_buf(dev->cmd_vq, &len);
+
+		if (completed == buf)
+			break;
+
+		if (completed)
+			virtio_pcidev_free_buf(dev, completed);
+
+		if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
+			      ++delay_count > virtio_pcidev_max_delay_us,
+			      "um virt-pci delay: %d", delay_count)) {
+			ret = -EIO;
+			break;
+		}
+		udelay(1);
+	}
+	clear_bit(VIRTIO_PCIDEV_STAT_WAITING, &dev->status);
+
+	if (bounce_out)
+		memcpy(out, buf->data, out_size);
+
+	virtio_pcidev_free_buf(dev, buf);
+
+	return ret;
+}
+
+static unsigned long virtio_pcidev_cfgspace_read(struct um_pci_device *pdev,
+						 unsigned int offset, int size)
+{
+	struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
+	struct virtio_pcidev_msg hdr = {
+		.op = VIRTIO_PCIDEV_OP_CFG_READ,
+		.size = size,
+		.addr = offset,
+	};
+	/* max 8, we might not use it all */
+	u8 data[8];
+
+	memset(data, 0xff, sizeof(data));
+
+	/* size has been checked in um_pci_cfgspace_read() */
+	if (virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size))
+		return ULONG_MAX;
+
+	switch (size) {
+	case 1:
+		return data[0];
+	case 2:
+		return le16_to_cpup((void *)data);
+	case 4:
+		return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+	case 8:
+		return le64_to_cpup((void *)data);
+#endif
+	default:
+		return ULONG_MAX;
+	}
+}
+
+static void virtio_pcidev_cfgspace_write(struct um_pci_device *pdev,
+					 unsigned int offset, int size,
+					 unsigned long val)
+{
+	struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
+	struct {
+		struct virtio_pcidev_msg hdr;
+		/* maximum size - we may only use parts of it */
+		u8 data[8];
+	} msg = {
+		.hdr = {
+			.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
+			.size = size,
+			.addr = offset,
+		},
+	};
+
+	/* size has been checked in um_pci_cfgspace_write() */
+	switch (size) {
+	case 1:
+		msg.data[0] = (u8)val;
+		break;
+	case 2:
+		put_unaligned_le16(val, (void *)msg.data);
+		break;
+	case 4:
+		put_unaligned_le32(val, (void *)msg.data);
+		break;
+#ifdef CONFIG_64BIT
+	case 8:
+		put_unaligned_le64(val, (void *)msg.data);
+		break;
+#endif
+	}
+
+	WARN_ON(virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0));
+}
+
+static void virtio_pcidev_bar_copy_from(struct um_pci_device *pdev,
+					int bar, void *buffer,
+					unsigned int offset, int size)
+{
+	struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
+	struct virtio_pcidev_msg hdr = {
+		.op = VIRTIO_PCIDEV_OP_MMIO_READ,
+		.bar = bar,
+		.size = size,
+		.addr = offset,
+	};
+
+	memset(buffer, 0xff, size);
+
+	virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size);
+}
+
+static unsigned long virtio_pcidev_bar_read(struct um_pci_device *pdev, int bar,
+					    unsigned int offset, int size)
+{
+	/* 8 is maximum size - we may only use parts of it */
+	u8 data[8];
+
+	/* size has been checked in um_pci_bar_read() */
+	virtio_pcidev_bar_copy_from(pdev, bar, data, offset, size);
+
+	switch (size) {
+	case 1:
+		return data[0];
+	case 2:
+		return le16_to_cpup((void *)data);
+	case 4:
+		return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+	case 8:
+		return le64_to_cpup((void *)data);
+#endif
+	default:
+		return ULONG_MAX;
+	}
+}
+
+static void virtio_pcidev_bar_copy_to(struct um_pci_device *pdev,
+				      int bar, unsigned int offset,
+				      const void *buffer, int size)
+{
+	struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
+	struct virtio_pcidev_msg hdr = {
+		.op = VIRTIO_PCIDEV_OP_MMIO_WRITE,
+		.bar = bar,
+		.size = size,
+		.addr = offset,
+	};
+
+	virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0);
+}
+
+static void virtio_pcidev_bar_write(struct um_pci_device *pdev, int bar,
+				    unsigned int offset, int size,
+				    unsigned long val)
+{
+	/* maximum size - we may only use parts of it */
+	u8 data[8];
+
+	/* size has been checked in um_pci_bar_write() */
+	switch (size) {
+	case 1:
+		data[0] = (u8)val;
+		break;
+	case 2:
+		put_unaligned_le16(val, (void *)data);
+		break;
+	case 4:
+		put_unaligned_le32(val, (void *)data);
+		break;
+#ifdef CONFIG_64BIT
+	case 8:
+		put_unaligned_le64(val, (void *)data);
+		break;
+#endif
+	}
+
+	virtio_pcidev_bar_copy_to(pdev, bar, offset, data, size);
+}
+
+static void virtio_pcidev_bar_set(struct um_pci_device *pdev, int bar,
+				  unsigned int offset, u8 value, int size)
+{
+	struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
+	struct {
+		struct virtio_pcidev_msg hdr;
+		u8 data;
+	} msg = {
+		.hdr = {
+			.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
+			.bar = bar,
+			.size = size,
+			.addr = offset,
+		},
+		.data = value,
+	};
+
+	virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0);
+}
+
+static const struct um_pci_ops virtio_pcidev_um_pci_ops = {
+	.cfgspace_read	= virtio_pcidev_cfgspace_read,
+	.cfgspace_write	= virtio_pcidev_cfgspace_write,
+	.bar_read	= virtio_pcidev_bar_read,
+	.bar_write	= virtio_pcidev_bar_write,
+	.bar_copy_from	= virtio_pcidev_bar_copy_from,
+	.bar_copy_to	= virtio_pcidev_bar_copy_to,
+	.bar_set	= virtio_pcidev_bar_set,
+};
+
+static void virtio_pcidev_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick)
+{
+	struct scatterlist sg[1];
+
+	sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE);
+	if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC))
+		kfree(buf);
+	else if (kick)
+		virtqueue_kick(vq);
+}
+
+static void virtio_pcidev_handle_irq_message(struct virtqueue *vq,
+					     struct virtio_pcidev_msg *msg)
+{
+	struct virtio_device *vdev = vq->vdev;
+	struct virtio_pcidev_device *dev = vdev->priv;
+
+	if (!dev->pdev.irq)
+		return;
+
+	/* we should properly chain interrupts, but on ARCH=um we don't care */
+
+	switch (msg->op) {
+	case VIRTIO_PCIDEV_OP_INT:
+		generic_handle_irq(dev->pdev.irq);
+		break;
+	case VIRTIO_PCIDEV_OP_MSI:
+		/* our MSI message is just the interrupt number */
+		if (msg->size == sizeof(u32))
+			generic_handle_irq(le32_to_cpup((void *)msg->data));
+		else
+			generic_handle_irq(le16_to_cpup((void *)msg->data));
+		break;
+	case VIRTIO_PCIDEV_OP_PME:
+		/* nothing to do - we already woke up due to the message */
+		break;
+	default:
+		dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op);
+		break;
+	}
+}
+
+static void virtio_pcidev_cmd_vq_cb(struct virtqueue *vq)
+{
+	struct virtio_device *vdev = vq->vdev;
+	struct virtio_pcidev_device *dev = vdev->priv;
+	void *cmd;
+	int len;
+
+	if (test_bit(VIRTIO_PCIDEV_STAT_WAITING, &dev->status))
+		return;
+
+	while ((cmd = virtqueue_get_buf(vq, &len)))
+		virtio_pcidev_free_buf(dev, cmd);
+}
+
+static void virtio_pcidev_irq_vq_cb(struct virtqueue *vq)
+{
+	struct virtio_pcidev_msg *msg;
+	int len;
+
+	while ((msg = virtqueue_get_buf(vq, &len))) {
+		if (len >= sizeof(*msg))
+			virtio_pcidev_handle_irq_message(vq, msg);
+
+		/* recycle the message buffer */
+		virtio_pcidev_irq_vq_addbuf(vq, msg, true);
+	}
+}
+
+static int virtio_pcidev_init_vqs(struct virtio_pcidev_device *dev)
+{
+	struct virtqueue_info vqs_info[] = {
+		{ "cmd", virtio_pcidev_cmd_vq_cb },
+		{ "irq", virtio_pcidev_irq_vq_cb },
+	};
+	struct virtqueue *vqs[2];
+	int err, i;
+
+	err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL);
+	if (err)
+		return err;
+
+	dev->cmd_vq = vqs[0];
+	dev->irq_vq = vqs[1];
+
+	virtio_device_ready(dev->vdev);
+
+	for (i = 0; i < NUM_IRQ_MSGS; i++) {
+		void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
+
+		if (msg)
+			virtio_pcidev_irq_vq_addbuf(dev->irq_vq, msg, false);
+	}
+
+	virtqueue_kick(dev->irq_vq);
+
+	return 0;
+}
+
+static void __virtio_pcidev_virtio_platform_remove(struct virtio_device *vdev,
+						   struct virtio_pcidev_device *dev)
+{
+	um_pci_platform_device_unregister(&dev->pdev);
+
+	virtio_reset_device(vdev);
+	vdev->config->del_vqs(vdev);
+
+	kfree(dev);
+}
+
+static int virtio_pcidev_virtio_platform_probe(struct virtio_device *vdev,
+					       struct virtio_pcidev_device *dev)
+{
+	int err;
+
+	dev->platform = true;
+
+	err = virtio_pcidev_init_vqs(dev);
+	if (err)
+		goto err_free;
+
+	err = um_pci_platform_device_register(&dev->pdev);
+	if (err)
+		goto err_reset;
+
+	err = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev);
+	if (err)
+		goto err_unregister;
+
+	return 0;
+
+err_unregister:
+	um_pci_platform_device_unregister(&dev->pdev);
+err_reset:
+	virtio_reset_device(vdev);
+	vdev->config->del_vqs(vdev);
+err_free:
+	kfree(dev);
+	return err;
+}
+
+static int virtio_pcidev_virtio_probe(struct virtio_device *vdev)
+{
+	struct virtio_pcidev_device *dev;
+	int err;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->vdev = vdev;
+	vdev->priv = dev;
+
+	dev->pdev.ops = &virtio_pcidev_um_pci_ops;
+
+	if (of_device_is_compatible(vdev->dev.of_node, "simple-bus"))
+		return virtio_pcidev_virtio_platform_probe(vdev, dev);
+
+	err = virtio_pcidev_init_vqs(dev);
+	if (err)
+		goto err_free;
+
+	err = um_pci_device_register(&dev->pdev);
+	if (err)
+		goto err_reset;
+
+	device_set_wakeup_enable(&vdev->dev, true);
+
+	/*
+	 * In order to do suspend-resume properly, don't allow VQs
+	 * to be suspended.
+	 */
+	virtio_uml_set_no_vq_suspend(vdev, true);
+
+	return 0;
+
+err_reset:
+	virtio_reset_device(vdev);
+	vdev->config->del_vqs(vdev);
+err_free:
+	kfree(dev);
+	return err;
+}
+
+static void virtio_pcidev_virtio_remove(struct virtio_device *vdev)
+{
+	struct virtio_pcidev_device *dev = vdev->priv;
+
+	if (dev->platform) {
+		of_platform_depopulate(&vdev->dev);
+		__virtio_pcidev_virtio_platform_remove(vdev, dev);
+		return;
+	}
+
+	device_set_wakeup_enable(&vdev->dev, false);
+
+	um_pci_device_unregister(&dev->pdev);
+
+	/* Stop all virtqueues */
+	virtio_reset_device(vdev);
+	dev->cmd_vq = NULL;
+	dev->irq_vq = NULL;
+	vdev->config->del_vqs(vdev);
+
+	kfree(dev);
+}
+
+static void virtio_pcidev_virtio_shutdown(struct virtio_device *vdev)
+{
+	/* nothing to do, we just don't want queue shutdown */
+}
+
+static struct virtio_device_id id_table[] = {
+	{ CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+MODULE_DEVICE_TABLE(virtio, id_table);
+
+static struct virtio_driver virtio_pcidev_virtio_driver = {
+	.driver.name = "virtio-pci",
+	.id_table = id_table,
+	.probe = virtio_pcidev_virtio_probe,
+	.remove = virtio_pcidev_virtio_remove,
+	.shutdown = virtio_pcidev_virtio_shutdown,
+};
+
+static int __init virtio_pcidev_init(void)
+{
+	if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
+		 "No virtio device ID configured for PCI - no PCI support\n"))
+		return 0;
+
+	return register_virtio_driver(&virtio_pcidev_virtio_driver);
+}
+late_initcall(virtio_pcidev_init);
+
+static void __exit virtio_pcidev_exit(void)
+{
+	unregister_virtio_driver(&virtio_pcidev_virtio_driver);
+}
+module_exit(virtio_pcidev_exit);
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
new file mode 100644
index 000000000000..6cf1152a1a4e
--- /dev/null
+++ b/arch/um/drivers/virtio_uml.c
@@ -0,0 +1,1495 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtio vhost-user driver
+ *
+ * Copyright(c) 2019 Intel Corporation
+ *
+ * This driver allows virtio devices to be used over a vhost-user socket.
+ *
+ * Guest devices can be instantiated by kernel module or command line
+ * parameters. One device will be created for each parameter. Syntax:
+ *
+ *		virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
+ * where:
+ *		<socket>	:= vhost-user socket path to connect
+ *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
+ *		<platform_id>	:= (optional) platform device id
+ *
+ * example:
+ *		virtio_uml.device=/var/uml.socket:1
+ *
+ * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
+ */
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/string_choices.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <linux/time-internal.h>
+#include <linux/virtio-uml.h>
+#include <shared/as-layout.h>
+#include <irq_kern.h>
+#include <init.h>
+#include <os.h>
+#include "vhost_user.h"
+
+#define MAX_SUPPORTED_QUEUE_SIZE	256
+
+#define to_virtio_uml_device(_vdev) \
+	container_of(_vdev, struct virtio_uml_device, vdev)
+
+struct virtio_uml_platform_data {
+	u32 virtio_device_id;
+	const char *socket_path;
+	struct work_struct conn_broken_wk;
+	struct platform_device *pdev;
+};
+
+struct virtio_uml_device {
+	struct virtio_device vdev;
+	struct platform_device *pdev;
+	struct virtio_uml_platform_data *pdata;
+
+	raw_spinlock_t sock_lock;
+	int sock, req_fd, irq;
+	u64 features;
+	u64 protocol_features;
+	u64 max_vqs;
+	u8 status;
+	u8 registered:1;
+	u8 suspended:1;
+	u8 no_vq_suspend:1;
+
+	u8 config_changed_irq:1;
+	uint64_t vq_irq_vq_map;
+	int recv_rc;
+};
+
+struct virtio_uml_vq_info {
+	int kick_fd, call_fd;
+	char name[32];
+	bool suspended;
+};
+
+#define vu_err(vu_dev, ...)	dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
+
+/* Vhost-user protocol */
+
+static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
+			    const int *fds, unsigned int fds_num)
+{
+	int rc;
+
+	do {
+		rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
+		if (rc > 0) {
+			buf += rc;
+			len -= rc;
+			fds = NULL;
+			fds_num = 0;
+		}
+	} while (len && (rc >= 0 || rc == -EINTR));
+
+	if (rc < 0)
+		return rc;
+	return 0;
+}
+
+static int full_read(int fd, void *buf, int len, bool abortable)
+{
+	int rc;
+
+	if (!len)
+		return 0;
+
+	do {
+		rc = os_read_file(fd, buf, len);
+		if (rc > 0) {
+			buf += rc;
+			len -= rc;
+		}
+	} while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
+
+	if (rc < 0)
+		return rc;
+	if (rc == 0)
+		return -ECONNRESET;
+	return 0;
+}
+
+static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
+{
+	return full_read(fd, msg, sizeof(msg->header), true);
+}
+
+static int vhost_user_recv(struct virtio_uml_device *vu_dev,
+			   int fd, struct vhost_user_msg *msg,
+			   size_t max_payload_size, bool wait)
+{
+	size_t size;
+	int rc;
+
+	/*
+	 * In virtio time-travel mode, we're handling all the vhost-user
+	 * FDs by polling them whenever appropriate. However, we may get
+	 * into a situation where we're sending out an interrupt message
+	 * to a device (e.g. a net device) and need to handle a simulation
+	 * time message while doing so, e.g. one that tells us to update
+	 * our idea of how long we can run without scheduling.
+	 *
+	 * Thus, we need to not just read() from the given fd, but need
+	 * to also handle messages for the simulation time - this function
+	 * does that for us while waiting for the given fd to be readable.
+	 */
+	if (wait)
+		time_travel_wait_readable(fd);
+
+	rc = vhost_user_recv_header(fd, msg);
+
+	if (rc)
+		return rc;
+	size = msg->header.size;
+	if (size > max_payload_size)
+		return -EPROTO;
+	return full_read(fd, &msg->payload, size, false);
+}
+
+static void vhost_user_check_reset(struct virtio_uml_device *vu_dev,
+				   int rc)
+{
+	struct virtio_uml_platform_data *pdata = vu_dev->pdata;
+
+	if (rc != -ECONNRESET)
+		return;
+
+	if (!vu_dev->registered)
+		return;
+
+	vu_dev->registered = 0;
+
+	schedule_work(&pdata->conn_broken_wk);
+}
+
+static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
+				struct vhost_user_msg *msg,
+				size_t max_payload_size)
+{
+	int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
+				 max_payload_size, true);
+
+	if (rc) {
+		vhost_user_check_reset(vu_dev, rc);
+		return rc;
+	}
+
+	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
+		return -EPROTO;
+
+	return 0;
+}
+
+static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
+			       u64 *value)
+{
+	struct vhost_user_msg msg;
+	int rc = vhost_user_recv_resp(vu_dev, &msg,
+				      sizeof(msg.payload.integer));
+
+	if (rc)
+		return rc;
+	if (msg.header.size != sizeof(msg.payload.integer))
+		return -EPROTO;
+	*value = msg.payload.integer;
+	return 0;
+}
+
+static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
+			       struct vhost_user_msg *msg,
+			       size_t max_payload_size)
+{
+	int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
+				 max_payload_size, false);
+
+	if (rc)
+		return rc;
+
+	if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
+			VHOST_USER_VERSION)
+		return -EPROTO;
+
+	return 0;
+}
+
+static int vhost_user_send(struct virtio_uml_device *vu_dev,
+			   bool need_response, struct vhost_user_msg *msg,
+			   int *fds, size_t num_fds)
+{
+	size_t size = sizeof(msg->header) + msg->header.size;
+	unsigned long flags;
+	bool request_ack;
+	int rc;
+
+	msg->header.flags |= VHOST_USER_VERSION;
+
+	/*
+	 * The need_response flag indicates that we already need a response,
+	 * e.g. to read the features. In these cases, don't request an ACK as
+	 * it is meaningless. Also request an ACK only if supported.
+	 */
+	request_ack = !need_response;
+	if (!(vu_dev->protocol_features &
+			BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
+		request_ack = false;
+
+	if (request_ack)
+		msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
+
+	raw_spin_lock_irqsave(&vu_dev->sock_lock, flags);
+	rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
+	if (rc < 0)
+		goto out;
+
+	if (request_ack) {
+		uint64_t status;
+
+		rc = vhost_user_recv_u64(vu_dev, &status);
+		if (rc)
+			goto out;
+
+		if (status) {
+			vu_err(vu_dev, "slave reports error: %llu\n", status);
+			rc = -EIO;
+			goto out;
+		}
+	}
+
+out:
+	raw_spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
+	return rc;
+}
+
+static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
+				      bool need_response, u32 request)
+{
+	struct vhost_user_msg msg = {
+		.header.request = request,
+	};
+
+	return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
+}
+
+static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
+					 u32 request, int fd)
+{
+	struct vhost_user_msg msg = {
+		.header.request = request,
+	};
+
+	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
+}
+
+static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
+			       u32 request, u64 value)
+{
+	struct vhost_user_msg msg = {
+		.header.request = request,
+		.header.size = sizeof(msg.payload.integer),
+		.payload.integer = value,
+	};
+
+	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
+}
+
+static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
+{
+	return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
+}
+
+static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
+				   u64 *features)
+{
+	int rc = vhost_user_send_no_payload(vu_dev, true,
+					    VHOST_USER_GET_FEATURES);
+
+	if (rc)
+		return rc;
+	return vhost_user_recv_u64(vu_dev, features);
+}
+
+static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
+				   u64 features)
+{
+	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
+}
+
+static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
+					    u64 *protocol_features)
+{
+	int rc = vhost_user_send_no_payload(vu_dev, true,
+			VHOST_USER_GET_PROTOCOL_FEATURES);
+
+	if (rc)
+		return rc;
+	return vhost_user_recv_u64(vu_dev, protocol_features);
+}
+
+static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
+					    u64 protocol_features)
+{
+	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
+				   protocol_features);
+}
+
+static int vhost_user_get_queue_num(struct virtio_uml_device *vu_dev,
+				    u64 *queue_num)
+{
+	int rc = vhost_user_send_no_payload(vu_dev, true,
+			VHOST_USER_GET_QUEUE_NUM);
+
+	if (rc)
+		return rc;
+	return vhost_user_recv_u64(vu_dev, queue_num);
+}
+
+static void vhost_user_reply(struct virtio_uml_device *vu_dev,
+			     struct vhost_user_msg *msg, int response)
+{
+	struct vhost_user_msg reply = {
+		.payload.integer = response,
+	};
+	size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
+	int rc;
+
+	reply.header = msg->header;
+	reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
+	reply.header.flags |= VHOST_USER_FLAG_REPLY;
+	reply.header.size = sizeof(reply.payload.integer);
+
+	rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
+
+	if (rc)
+		vu_err(vu_dev,
+		       "sending reply to slave request failed: %d (size %zu)\n",
+		       rc, size);
+}
+
+static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
+				       struct time_travel_event *ev)
+{
+	struct virtqueue *vq;
+	int response = 1;
+	struct {
+		struct vhost_user_msg msg;
+		u8 extra_payload[512];
+	} msg;
+	int rc;
+	irqreturn_t irq_rc = IRQ_NONE;
+
+	while (1) {
+		rc = vhost_user_recv_req(vu_dev, &msg.msg,
+					 sizeof(msg.msg.payload) +
+					 sizeof(msg.extra_payload));
+		if (rc)
+			break;
+
+		switch (msg.msg.header.request) {
+		case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
+			vu_dev->config_changed_irq = true;
+			response = 0;
+			break;
+		case VHOST_USER_SLAVE_VRING_CALL:
+			virtio_device_for_each_vq((&vu_dev->vdev), vq) {
+				if (vq->index == msg.msg.payload.vring_state.index) {
+					response = 0;
+					vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index);
+					break;
+				}
+			}
+			break;
+		case VHOST_USER_SLAVE_IOTLB_MSG:
+			/* not supported - VIRTIO_F_ACCESS_PLATFORM */
+		case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
+			/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
+		default:
+			vu_err(vu_dev, "unexpected slave request %d\n",
+			       msg.msg.header.request);
+		}
+
+		if (ev && !vu_dev->suspended)
+			time_travel_add_irq_event(ev);
+
+		if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
+			vhost_user_reply(vu_dev, &msg.msg, response);
+		irq_rc = IRQ_HANDLED;
+	}
+	/* mask EAGAIN as we try non-blocking read until socket is empty */
+	vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc;
+	return irq_rc;
+}
+
+static irqreturn_t vu_req_interrupt(int irq, void *data)
+{
+	struct virtio_uml_device *vu_dev = data;
+	irqreturn_t ret = IRQ_HANDLED;
+
+	if (!um_irq_timetravel_handler_used())
+		ret = vu_req_read_message(vu_dev, NULL);
+
+	if (vu_dev->recv_rc) {
+		vhost_user_check_reset(vu_dev, vu_dev->recv_rc);
+	} else if (vu_dev->vq_irq_vq_map) {
+		struct virtqueue *vq;
+
+		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
+			if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index))
+				vring_interrupt(0 /* ignored */, vq);
+		}
+		vu_dev->vq_irq_vq_map = 0;
+	} else if (vu_dev->config_changed_irq) {
+		virtio_config_changed(&vu_dev->vdev);
+		vu_dev->config_changed_irq = false;
+	}
+
+	return ret;
+}
+
+static void vu_req_interrupt_comm_handler(int irq, int fd, void *data,
+					  struct time_travel_event *ev)
+{
+	vu_req_read_message(data, ev);
+}
+
+static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
+{
+	int rc, req_fds[2];
+
+	/* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
+	rc = os_pipe(req_fds, true, true);
+	if (rc < 0)
+		return rc;
+	vu_dev->req_fd = req_fds[0];
+
+	rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
+			       vu_req_interrupt, IRQF_SHARED,
+			       vu_dev->pdev->name, vu_dev,
+			       vu_req_interrupt_comm_handler);
+	if (rc < 0)
+		goto err_close;
+
+	vu_dev->irq = rc;
+
+	rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
+					   req_fds[1]);
+	if (rc)
+		goto err_free_irq;
+
+	goto out;
+
+err_free_irq:
+	um_free_irq(vu_dev->irq, vu_dev);
+err_close:
+	os_close_file(req_fds[0]);
+out:
+	/* Close unused write end of request fds */
+	os_close_file(req_fds[1]);
+	return rc;
+}
+
+static int vhost_user_init(struct virtio_uml_device *vu_dev)
+{
+	int rc = vhost_user_set_owner(vu_dev);
+
+	if (rc)
+		return rc;
+	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
+	if (rc)
+		return rc;
+
+	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
+		rc = vhost_user_get_protocol_features(vu_dev,
+				&vu_dev->protocol_features);
+		if (rc)
+			return rc;
+		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
+		rc = vhost_user_set_protocol_features(vu_dev,
+				vu_dev->protocol_features);
+		if (rc)
+			return rc;
+	}
+
+	if (vu_dev->protocol_features &
+			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
+		rc = vhost_user_init_slave_req(vu_dev);
+		if (rc)
+			return rc;
+	}
+
+	if (vu_dev->protocol_features &
+			BIT_ULL(VHOST_USER_PROTOCOL_F_MQ)) {
+		rc = vhost_user_get_queue_num(vu_dev, &vu_dev->max_vqs);
+		if (rc)
+			return rc;
+	} else {
+		vu_dev->max_vqs = U64_MAX;
+	}
+
+	return 0;
+}
+
+static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
+				  u32 offset, void *buf, u32 len)
+{
+	u32 cfg_size = offset + len;
+	struct vhost_user_msg *msg;
+	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
+	size_t msg_size = sizeof(msg->header) + payload_size;
+	int rc;
+
+	if (!(vu_dev->protocol_features &
+	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
+		return;
+
+	msg = kzalloc(msg_size, GFP_KERNEL);
+	if (!msg)
+		return;
+	msg->header.request = VHOST_USER_GET_CONFIG;
+	msg->header.size = payload_size;
+	msg->payload.config.offset = 0;
+	msg->payload.config.size = cfg_size;
+
+	rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
+	if (rc) {
+		vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
+		       rc);
+		goto free;
+	}
+
+	rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
+	if (rc) {
+		vu_err(vu_dev,
+		       "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
+		       rc);
+		goto free;
+	}
+
+	if (msg->header.size != payload_size ||
+	    msg->payload.config.size != cfg_size) {
+		rc = -EPROTO;
+		vu_err(vu_dev,
+		       "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
+		       msg->header.size, payload_size,
+		       msg->payload.config.size, cfg_size);
+		goto free;
+	}
+	memcpy(buf, msg->payload.config.payload + offset, len);
+
+free:
+	kfree(msg);
+}
+
+static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
+				  u32 offset, const void *buf, u32 len)
+{
+	struct vhost_user_msg *msg;
+	size_t payload_size = sizeof(msg->payload.config) + len;
+	size_t msg_size = sizeof(msg->header) + payload_size;
+	int rc;
+
+	if (!(vu_dev->protocol_features &
+	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
+		return;
+
+	msg = kzalloc(msg_size, GFP_KERNEL);
+	if (!msg)
+		return;
+	msg->header.request = VHOST_USER_SET_CONFIG;
+	msg->header.size = payload_size;
+	msg->payload.config.offset = offset;
+	msg->payload.config.size = len;
+	memcpy(msg->payload.config.payload, buf, len);
+
+	rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
+	if (rc)
+		vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
+		       rc);
+
+	kfree(msg);
+}
+
+static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
+				      struct vhost_user_mem_region *region_out)
+{
+	unsigned long long mem_offset;
+	int rc = phys_mapping(addr, &mem_offset);
+
+	if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
+		return -EFAULT;
+	*fd_out = rc;
+	region_out->guest_addr = addr;
+	region_out->user_addr = addr;
+	region_out->size = size;
+	region_out->mmap_offset = mem_offset;
+
+	/* Ensure mapping is valid for the entire region */
+	rc = phys_mapping(addr + size - 1, &mem_offset);
+	if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
+		 addr + size - 1, rc, *fd_out))
+		return -EFAULT;
+	return 0;
+}
+
+static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
+{
+	struct vhost_user_msg msg = {
+		.header.request = VHOST_USER_SET_MEM_TABLE,
+		.header.size = offsetof(typeof(msg.payload.mem_regions), regions[1]),
+		.payload.mem_regions.num = 1,
+	};
+	unsigned long reserved = uml_reserved - uml_physmem;
+	int fds[2];
+	int rc;
+
+	/*
+	 * This is a bit tricky, see also the comment with setup_physmem().
+	 *
+	 * Essentially, setup_physmem() uses a file to mmap() our physmem,
+	 * but the code and data we *already* have is omitted. To us, this
+	 * is no difference, since they both become part of our address
+	 * space and memory consumption. To somebody looking in from the
+	 * outside, however, it is different because the part of our memory
+	 * consumption that's already part of the binary (code/data) is not
+	 * mapped from the file, so it's not visible to another mmap from
+	 * the file descriptor.
+	 *
+	 * Thus, don't advertise this space to the vhost-user slave. This
+	 * means that the slave will likely abort or similar when we give
+	 * it an address from the hidden range, since it's not marked as
+	 * a valid address, but at least that way we detect the issue and
+	 * don't just have the slave read an all-zeroes buffer from the
+	 * shared memory file, or write something there that we can never
+	 * see (depending on the direction of the virtqueue traffic.)
+	 *
+	 * Since we usually don't want to use .text for virtio buffers,
+	 * this effectively means that you cannot use
+	 *  1) global variables, which are in the .bss and not in the shm
+	 *     file-backed memory
+	 *  2) the stack in some processes, depending on where they have
+	 *     their stack (or maybe only no interrupt stack?)
+	 *
+	 * The stack is already not typically valid for DMA, so this isn't
+	 * much of a restriction, but global variables might be encountered.
+	 *
+	 * It might be possible to fix it by copying around the data that's
+	 * between bss_start and where we map the file now, but it's not
+	 * something that you typically encounter with virtio drivers, so
+	 * it didn't seem worthwhile.
+	 */
+	rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
+					&fds[0],
+					&msg.payload.mem_regions.regions[0]);
+
+	if (rc < 0)
+		return rc;
+
+	return vhost_user_send(vu_dev, false, &msg, fds,
+			       msg.payload.mem_regions.num);
+}
+
+static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
+				      u32 request, u32 index, u32 num)
+{
+	struct vhost_user_msg msg = {
+		.header.request = request,
+		.header.size = sizeof(msg.payload.vring_state),
+		.payload.vring_state.index = index,
+		.payload.vring_state.num = num,
+	};
+
+	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
+}
+
+static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
+				    u32 index, u32 num)
+{
+	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
+					  index, num);
+}
+
+static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
+				     u32 index, u32 offset)
+{
+	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
+					  index, offset);
+}
+
+static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
+				     u32 index, u64 desc, u64 used, u64 avail,
+				     u64 log)
+{
+	struct vhost_user_msg msg = {
+		.header.request = VHOST_USER_SET_VRING_ADDR,
+		.header.size = sizeof(msg.payload.vring_addr),
+		.payload.vring_addr.index = index,
+		.payload.vring_addr.desc = desc,
+		.payload.vring_addr.used = used,
+		.payload.vring_addr.avail = avail,
+		.payload.vring_addr.log = log,
+	};
+
+	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
+}
+
+static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
+				   u32 request, int index, int fd)
+{
+	struct vhost_user_msg msg = {
+		.header.request = request,
+		.header.size = sizeof(msg.payload.integer),
+		.payload.integer = index,
+	};
+
+	if (index & ~VHOST_USER_VRING_INDEX_MASK)
+		return -EINVAL;
+	if (fd < 0) {
+		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
+		return vhost_user_send(vu_dev, false, &msg, NULL, 0);
+	}
+	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
+}
+
+static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
+				     int index, int fd)
+{
+	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
+				       index, fd);
+}
+
+static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
+				     int index, int fd)
+{
+	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
+				       index, fd);
+}
+
+static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
+				       u32 index, bool enable)
+{
+	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
+		return 0;
+
+	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
+					  index, enable);
+}
+
+
+/* Virtio interface */
+
+static bool vu_notify(struct virtqueue *vq)
+{
+	struct virtio_uml_vq_info *info = vq->priv;
+	const uint64_t n = 1;
+	int rc;
+
+	if (info->suspended)
+		return true;
+
+	time_travel_propagate_time();
+
+	if (info->kick_fd < 0) {
+		struct virtio_uml_device *vu_dev;
+
+		vu_dev = to_virtio_uml_device(vq->vdev);
+
+		return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
+						  vq->index, 0) == 0;
+	}
+
+	do {
+		rc = os_write_file(info->kick_fd, &n, sizeof(n));
+	} while (rc == -EINTR);
+	return !WARN(rc != sizeof(n), "write returned %d\n", rc);
+}
+
+static irqreturn_t vu_interrupt(int irq, void *opaque)
+{
+	struct virtqueue *vq = opaque;
+	struct virtio_uml_vq_info *info = vq->priv;
+	uint64_t n;
+	int rc;
+	irqreturn_t ret = IRQ_NONE;
+
+	do {
+		rc = os_read_file(info->call_fd, &n, sizeof(n));
+		if (rc == sizeof(n))
+			ret |= vring_interrupt(irq, vq);
+	} while (rc == sizeof(n) || rc == -EINTR);
+	WARN(rc != -EAGAIN, "read returned %d\n", rc);
+	return ret;
+}
+
+
+static void vu_get(struct virtio_device *vdev, unsigned offset,
+		   void *buf, unsigned len)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	vhost_user_get_config(vu_dev, offset, buf, len);
+}
+
+static void vu_set(struct virtio_device *vdev, unsigned offset,
+		   const void *buf, unsigned len)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	vhost_user_set_config(vu_dev, offset, buf, len);
+}
+
+static u8 vu_get_status(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	return vu_dev->status;
+}
+
+static void vu_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	vu_dev->status = status;
+}
+
+static void vu_reset(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	vu_dev->status = 0;
+}
+
+static void vu_del_vq(struct virtqueue *vq)
+{
+	struct virtio_uml_vq_info *info = vq->priv;
+
+	if (info->call_fd >= 0) {
+		struct virtio_uml_device *vu_dev;
+
+		vu_dev = to_virtio_uml_device(vq->vdev);
+
+		um_free_irq(vu_dev->irq, vq);
+		os_close_file(info->call_fd);
+	}
+
+	if (info->kick_fd >= 0)
+		os_close_file(info->kick_fd);
+
+	vring_del_virtqueue(vq);
+	kfree(info);
+}
+
+static void vu_del_vqs(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+	struct virtqueue *vq, *n;
+	u64 features;
+
+	/* Note: reverse order as a workaround to a decoding bug in snabb */
+	list_for_each_entry_reverse(vq, &vdev->vqs, list)
+		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
+
+	/* Ensure previous messages have been processed */
+	WARN_ON(vhost_user_get_features(vu_dev, &features));
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		vu_del_vq(vq);
+}
+
+static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
+			       struct virtqueue *vq)
+{
+	struct virtio_uml_vq_info *info = vq->priv;
+	int call_fds[2];
+	int rc, irq;
+
+	/* no call FD needed/desired in this case */
+	if (vu_dev->protocol_features &
+			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
+	    vu_dev->protocol_features &
+			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
+		info->call_fd = -1;
+		return 0;
+	}
+
+	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
+	rc = os_pipe(call_fds, true, true);
+	if (rc < 0)
+		return rc;
+
+	info->call_fd = call_fds[0];
+	irq = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
+			     vu_interrupt, IRQF_SHARED, info->name, vq);
+	if (irq < 0) {
+		rc = irq;
+		goto close_both;
+	}
+
+	rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
+	if (rc)
+		goto release_irq;
+
+	vu_dev->irq = irq;
+
+	goto out;
+
+release_irq:
+	um_free_irq(irq, vq);
+close_both:
+	os_close_file(call_fds[0]);
+out:
+	/* Close (unused) write end of call fds */
+	os_close_file(call_fds[1]);
+
+	return rc;
+}
+
+static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
+				     unsigned index, vq_callback_t *callback,
+				     const char *name, bool ctx)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+	struct platform_device *pdev = vu_dev->pdev;
+	struct virtio_uml_vq_info *info;
+	struct virtqueue *vq;
+	int num = MAX_SUPPORTED_QUEUE_SIZE;
+	int rc;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		rc = -ENOMEM;
+		goto error_kzalloc;
+	}
+	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
+		 pdev->id, name);
+
+	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
+				    ctx, vu_notify, callback, info->name);
+	if (!vq) {
+		rc = -ENOMEM;
+		goto error_create;
+	}
+	vq->priv = info;
+	vq->num_max = num;
+	num = virtqueue_get_vring_size(vq);
+
+	if (vu_dev->protocol_features &
+			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
+		info->kick_fd = -1;
+	} else {
+		rc = os_eventfd(0, 0);
+		if (rc < 0)
+			goto error_kick;
+		info->kick_fd = rc;
+	}
+
+	rc = vu_setup_vq_call_fd(vu_dev, vq);
+	if (rc)
+		goto error_call;
+
+	rc = vhost_user_set_vring_num(vu_dev, index, num);
+	if (rc)
+		goto error_setup;
+
+	rc = vhost_user_set_vring_base(vu_dev, index, 0);
+	if (rc)
+		goto error_setup;
+
+	rc = vhost_user_set_vring_addr(vu_dev, index,
+				       virtqueue_get_desc_addr(vq),
+				       virtqueue_get_used_addr(vq),
+				       virtqueue_get_avail_addr(vq),
+				       (u64) -1);
+	if (rc)
+		goto error_setup;
+
+	return vq;
+
+error_setup:
+	if (info->call_fd >= 0) {
+		um_free_irq(vu_dev->irq, vq);
+		os_close_file(info->call_fd);
+	}
+error_call:
+	if (info->kick_fd >= 0)
+		os_close_file(info->kick_fd);
+error_kick:
+	vring_del_virtqueue(vq);
+error_create:
+	kfree(info);
+error_kzalloc:
+	return ERR_PTR(rc);
+}
+
+static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       struct virtqueue_info vqs_info[],
+		       struct irq_affinity *desc)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+	int i, queue_idx = 0, rc;
+	struct virtqueue *vq;
+
+	/* not supported for now */
+	if (WARN(nvqs > 64 || nvqs > vu_dev->max_vqs,
+		 "%d VQs requested, only up to 64 or %lld supported\n",
+		 nvqs, vu_dev->max_vqs))
+		return -EINVAL;
+
+	rc = vhost_user_set_mem_table(vu_dev);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < nvqs; ++i) {
+		struct virtqueue_info *vqi = &vqs_info[i];
+
+		if (!vqi->name) {
+			vqs[i] = NULL;
+			continue;
+		}
+
+		vqs[i] = vu_setup_vq(vdev, queue_idx++, vqi->callback,
+				     vqi->name, vqi->ctx);
+		if (IS_ERR(vqs[i])) {
+			rc = PTR_ERR(vqs[i]);
+			goto error_setup;
+		}
+	}
+
+	list_for_each_entry(vq, &vdev->vqs, list) {
+		struct virtio_uml_vq_info *info = vq->priv;
+
+		if (info->kick_fd >= 0) {
+			rc = vhost_user_set_vring_kick(vu_dev, vq->index,
+						       info->kick_fd);
+			if (rc)
+				goto error_setup;
+		}
+
+		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
+		if (rc)
+			goto error_setup;
+	}
+
+	return 0;
+
+error_setup:
+	vu_del_vqs(vdev);
+	return rc;
+}
+
+static u64 vu_get_features(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	return vu_dev->features;
+}
+
+static int vu_finalize_features(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
+
+	vring_transport_features(vdev);
+	vu_dev->features = vdev->features | supported;
+
+	return vhost_user_set_features(vu_dev, vu_dev->features);
+}
+
+static const char *vu_bus_name(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	return vu_dev->pdev->name;
+}
+
+static const struct virtio_config_ops virtio_uml_config_ops = {
+	.get = vu_get,
+	.set = vu_set,
+	.get_status = vu_get_status,
+	.set_status = vu_set_status,
+	.reset = vu_reset,
+	.find_vqs = vu_find_vqs,
+	.del_vqs = vu_del_vqs,
+	.get_features = vu_get_features,
+	.finalize_features = vu_finalize_features,
+	.bus_name = vu_bus_name,
+};
+
+static void virtio_uml_release_dev(struct device *d)
+{
+	struct virtio_device *vdev =
+			container_of(d, struct virtio_device, dev);
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	time_travel_propagate_time();
+
+	/* might not have been opened due to not negotiating the feature */
+	if (vu_dev->req_fd >= 0) {
+		um_free_irq(vu_dev->irq, vu_dev);
+		os_close_file(vu_dev->req_fd);
+	}
+
+	os_close_file(vu_dev->sock);
+	kfree(vu_dev);
+}
+
+void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
+				  bool no_vq_suspend)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	if (WARN_ON(vdev->config != &virtio_uml_config_ops))
+		return;
+
+	vu_dev->no_vq_suspend = no_vq_suspend;
+	dev_info(&vdev->dev, "%s VQ suspend\n", str_disabled_enabled(no_vq_suspend));
+}
+
+static void vu_of_conn_broken(struct work_struct *wk)
+{
+	struct virtio_uml_platform_data *pdata;
+	struct virtio_uml_device *vu_dev;
+
+	pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
+
+	vu_dev = platform_get_drvdata(pdata->pdev);
+
+	virtio_break_device(&vu_dev->vdev);
+
+	/*
+	 * We can't remove the device from the devicetree so the only thing we
+	 * can do is warn.
+	 */
+	WARN_ON(1);
+}
+
+/* Platform device */
+
+static struct virtio_uml_platform_data *
+virtio_uml_create_pdata(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct virtio_uml_platform_data *pdata;
+	int ret;
+
+	if (!np)
+		return ERR_PTR(-EINVAL);
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken);
+	pdata->pdev = pdev;
+
+	ret = of_property_read_string(np, "socket-path", &pdata->socket_path);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = of_property_read_u32(np, "virtio-device-id",
+				   &pdata->virtio_device_id);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return pdata;
+}
+
+static int virtio_uml_probe(struct platform_device *pdev)
+{
+	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+	struct virtio_uml_device *vu_dev;
+	int rc;
+
+	if (!pdata) {
+		pdata = virtio_uml_create_pdata(pdev);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+	}
+
+	vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
+	if (!vu_dev)
+		return -ENOMEM;
+
+	vu_dev->pdata = pdata;
+	vu_dev->vdev.dev.parent = &pdev->dev;
+	vu_dev->vdev.dev.release = virtio_uml_release_dev;
+	vu_dev->vdev.config = &virtio_uml_config_ops;
+	vu_dev->vdev.id.device = pdata->virtio_device_id;
+	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
+	vu_dev->pdev = pdev;
+	vu_dev->req_fd = -1;
+	vu_dev->irq = UM_IRQ_ALLOC;
+
+	time_travel_propagate_time();
+
+	do {
+		rc = os_connect_socket(pdata->socket_path);
+	} while (rc == -EINTR);
+	if (rc < 0)
+		goto error_free;
+	vu_dev->sock = rc;
+
+	raw_spin_lock_init(&vu_dev->sock_lock);
+
+	rc = vhost_user_init(vu_dev);
+	if (rc)
+		goto error_init;
+
+	platform_set_drvdata(pdev, vu_dev);
+
+	device_set_wakeup_capable(&vu_dev->vdev.dev, true);
+
+	rc = register_virtio_device(&vu_dev->vdev);
+	if (rc) {
+		put_device(&vu_dev->vdev.dev);
+		return rc;
+	}
+	vu_dev->registered = 1;
+	return 0;
+
+error_init:
+	os_close_file(vu_dev->sock);
+error_free:
+	kfree(vu_dev);
+	return rc;
+}
+
+static void virtio_uml_remove(struct platform_device *pdev)
+{
+	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
+
+	unregister_virtio_device(&vu_dev->vdev);
+}
+
+/* Command line device list */
+
+static void vu_cmdline_release_dev(struct device *d)
+{
+}
+
+static struct device vu_cmdline_parent = {
+	.init_name = "virtio-uml-cmdline",
+	.release = vu_cmdline_release_dev,
+};
+
+static bool vu_cmdline_parent_registered;
+static int vu_cmdline_id;
+
+static int vu_unregister_cmdline_device(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+
+	kfree(pdata->socket_path);
+	platform_device_unregister(pdev);
+	return 0;
+}
+
+static void vu_conn_broken(struct work_struct *wk)
+{
+	struct virtio_uml_platform_data *pdata;
+	struct virtio_uml_device *vu_dev;
+
+	pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
+
+	vu_dev = platform_get_drvdata(pdata->pdev);
+
+	virtio_break_device(&vu_dev->vdev);
+
+	vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
+}
+
+static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
+{
+	const char *ids = strchr(device, ':');
+	unsigned int virtio_device_id;
+	int processed, consumed, err;
+	char *socket_path;
+	struct virtio_uml_platform_data pdata, *ppdata;
+	struct platform_device *pdev;
+
+	if (!ids || ids == device)
+		return -EINVAL;
+
+	processed = sscanf(ids, ":%u%n:%d%n",
+			   &virtio_device_id, &consumed,
+			   &vu_cmdline_id, &consumed);
+
+	if (processed < 1 || ids[consumed])
+		return -EINVAL;
+
+	if (!vu_cmdline_parent_registered) {
+		err = device_register(&vu_cmdline_parent);
+		if (err) {
+			pr_err("Failed to register parent device!\n");
+			put_device(&vu_cmdline_parent);
+			return err;
+		}
+		vu_cmdline_parent_registered = true;
+	}
+
+	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
+	if (!socket_path)
+		return -ENOMEM;
+
+	pdata.virtio_device_id = (u32) virtio_device_id;
+	pdata.socket_path = socket_path;
+
+	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
+		vu_cmdline_id, virtio_device_id, socket_path);
+
+	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
+					     vu_cmdline_id++, &pdata,
+					     sizeof(pdata));
+	err = PTR_ERR_OR_ZERO(pdev);
+	if (err)
+		goto free;
+
+	ppdata = pdev->dev.platform_data;
+	ppdata->pdev = pdev;
+	INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
+
+	return 0;
+
+free:
+	kfree(socket_path);
+	return err;
+}
+
+static int vu_cmdline_get_device(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+	char *buffer = data;
+	unsigned int len = strlen(buffer);
+
+	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
+		 pdata->socket_path, pdata->virtio_device_id, pdev->id);
+	return 0;
+}
+
+static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
+{
+	buffer[0] = '\0';
+	if (vu_cmdline_parent_registered)
+		device_for_each_child(&vu_cmdline_parent, buffer,
+				      vu_cmdline_get_device);
+	return strlen(buffer) + 1;
+}
+
+static const struct kernel_param_ops vu_cmdline_param_ops = {
+	.set = vu_cmdline_set,
+	.get = vu_cmdline_get,
+};
+
+device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
+__uml_help(vu_cmdline_param_ops,
+"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
+"    Configure a virtio device over a vhost-user socket.\n"
+"    See virtio_ids.h for a list of possible virtio device id values.\n"
+"    Optionally use a specific platform_device id.\n\n"
+);
+
+
+static void vu_unregister_cmdline_devices(void)
+{
+	if (vu_cmdline_parent_registered) {
+		device_for_each_child(&vu_cmdline_parent, NULL,
+				      vu_unregister_cmdline_device);
+		device_unregister(&vu_cmdline_parent);
+		vu_cmdline_parent_registered = false;
+	}
+}
+
+/* Platform driver */
+
+static const struct of_device_id virtio_uml_match[] = {
+	{ .compatible = "virtio,uml", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, virtio_uml_match);
+
+static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
+
+	if (!vu_dev->no_vq_suspend) {
+		struct virtqueue *vq;
+
+		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
+			struct virtio_uml_vq_info *info = vq->priv;
+
+			info->suspended = true;
+			vhost_user_set_vring_enable(vu_dev, vq->index, false);
+		}
+	}
+
+	if (!device_may_wakeup(&vu_dev->vdev.dev)) {
+		vu_dev->suspended = true;
+		return 0;
+	}
+
+	return irq_set_irq_wake(vu_dev->irq, 1);
+}
+
+static int virtio_uml_resume(struct platform_device *pdev)
+{
+	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
+
+	if (!vu_dev->no_vq_suspend) {
+		struct virtqueue *vq;
+
+		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
+			struct virtio_uml_vq_info *info = vq->priv;
+
+			info->suspended = false;
+			vhost_user_set_vring_enable(vu_dev, vq->index, true);
+		}
+	}
+
+	vu_dev->suspended = false;
+
+	if (!device_may_wakeup(&vu_dev->vdev.dev))
+		return 0;
+
+	return irq_set_irq_wake(vu_dev->irq, 0);
+}
+
+static struct platform_driver virtio_uml_driver = {
+	.probe = virtio_uml_probe,
+	.remove = virtio_uml_remove,
+	.driver = {
+		.name = "virtio-uml",
+		.of_match_table = virtio_uml_match,
+	},
+	.suspend = virtio_uml_suspend,
+	.resume = virtio_uml_resume,
+};
+
+static int __init virtio_uml_init(void)
+{
+	return platform_driver_register(&virtio_uml_driver);
+}
+
+static void __exit virtio_uml_exit(void)
+{
+	platform_driver_unregister(&virtio_uml_driver);
+	vu_unregister_cmdline_devices();
+}
+
+module_init(virtio_uml_init);
+module_exit(virtio_uml_exit);
+__uml_exitcall(virtio_uml_exit);
+
+MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
+MODULE_LICENSE("GPL");
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index 20e30be44795..d05918e422f9 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
@@ -18,6 +18,7 @@
 struct xterm_chan {
 	int pid;
 	int helper_pid;
+	int chan_fd;
 	char *title;
 	int device;
 	int raw;
@@ -33,6 +34,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts)
 		return NULL;
 	*data = ((struct xterm_chan) { .pid 		= -1,
 				       .helper_pid 	= -1,
+				       .chan_fd		= -1,
 				       .device 		= device,
 				       .title 		= opts->xterm_title,
 				       .raw  		= opts->raw } );
@@ -40,7 +42,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts)
 }
 
 /* Only changed by xterm_setup, which is a setup */
-static char *terminal_emulator = "xterm";
+static char *terminal_emulator = CONFIG_XTERM_CHAN_DEFAULT_EMULATOR;
 static char *title_switch = "-T";
 static char *exec_switch = "-e";
 
@@ -77,8 +79,9 @@ __uml_setup("xterm=", xterm_setup,
 "    respectively.  The title switch must have the form '<switch> title',\n"
 "    not '<switch>=title'.  Similarly, the exec switch must have the form\n"
 "    '<switch> command arg1 arg2 ...'.\n"
-"    The default values are 'xterm=xterm,-T,-e'.  Values for gnome-terminal\n"
-"    are 'xterm=gnome-terminal,-t,-x'.\n\n"
+"    The default values are 'xterm=" CONFIG_XTERM_CHAN_DEFAULT_EMULATOR
+     ",-T,-e'.\n"
+"    Values for gnome-terminal are 'xterm=gnome-terminal,-t,--'.\n\n"
 );
 
 static int xterm_open(int input, int output, int primary, void *d,
@@ -94,12 +97,9 @@ static int xterm_open(int input, int output, int primary, void *d,
 	if (access(argv[4], X_OK) < 0)
 		argv[4] = "port-helper";
 
-	/*
-	 * Check that DISPLAY is set, this doesn't guarantee the xterm
-	 * will work but w/o it we can be pretty sure it won't.
-	 */
-	if (getenv("DISPLAY") == NULL) {
-		printk(UM_KERN_ERR "xterm_open: $DISPLAY not set.\n");
+	/* Ensure we are running on Xorg or Wayland. */
+	if (!getenv("DISPLAY") && !getenv("WAYLAND_DISPLAY")) {
+		printk(UM_KERN_ERR "xterm_open : neither $DISPLAY nor $WAYLAND_DISPLAY is set.\n");
 		return -ENODEV;
 	}
 
@@ -149,10 +149,11 @@ static int xterm_open(int input, int output, int primary, void *d,
 		goto out_kill;
 	}
 
+	data->chan_fd = fd;
 	new = xterm_fd(fd, &data->helper_pid);
 	if (new < 0) {
 		err = new;
-		printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n",
+		printk(UM_KERN_ERR "xterm_open : xterm_fd failed, err = %d\n",
 		       -err);
 		goto out_kill;
 	}
@@ -206,6 +207,8 @@ static void xterm_close(int fd, void *d)
 		os_kill_process(data->helper_pid, 0);
 	data->helper_pid = -1;
 
+	if (data->chan_fd != -1)
+		os_close_file(data->chan_fd);
 	os_close_file(fd);
 }
 
diff --git a/arch/um/drivers/xterm.h b/arch/um/drivers/xterm.h
index 56b9c4aba423..5968da3a6aba 100644
--- a/arch/um/drivers/xterm.h
+++ b/arch/um/drivers/xterm.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __XTERM_H__
diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c
index e8f9957bfbf6..3971252cb1a6 100644
--- a/arch/um/drivers/xterm_kern.c
+++ b/arch/um/drivers/xterm_kern.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/slab.h>
@@ -9,6 +9,7 @@
 #include <asm/irq.h>
 #include <irq_kern.h>
 #include <os.h>
+#include "xterm.h"
 
 struct xterm_wait {
 	struct completion ready;
@@ -20,12 +21,19 @@ struct xterm_wait {
 static irqreturn_t xterm_interrupt(int irq, void *data)
 {
 	struct xterm_wait *xterm = data;
-	int fd;
+	int fd = -1, n_fds = 1;
+	ssize_t ret;
 
-	fd = os_rcv_fd(xterm->fd, &xterm->pid);
-	if (fd == -EAGAIN)
+	ret = os_rcv_fd_msg(xterm->fd, &fd, n_fds,
+			    &xterm->pid, sizeof(xterm->pid));
+	if (ret == -EAGAIN)
 		return IRQ_NONE;
 
+	if (ret < 0)
+		fd = ret;
+	else if (ret != sizeof(xterm->pid))
+		fd = -EMSGSIZE;
+
 	xterm->new_fd = fd;
 	complete(&xterm->ready);
 
@@ -51,7 +59,7 @@ int xterm_fd(int socket, int *pid_out)
 
 	err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt,
 			     IRQF_SHARED, "xterm", data);
-	if (err) {
+	if (err < 0) {
 		printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, "
 		       "err = %d\n",  err);
 		ret = err;
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index b30f34a79882..1b9b82bbe322 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,5 +1,28 @@
-generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h
-generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
-generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h
-generic-y += switch_to.h clkdev.h
+# SPDX-License-Identifier: GPL-2.0
+generic-y += bug.h
+generic-y += compat.h
+generic-y += device.h
+generic-y += dma-mapping.h
+generic-y += emergency-restart.h
+generic-y += exec.h
+generic-y += ftrace.h
+generic-y += hw_irq.h
+generic-y += irq_regs.h
+generic-y += irq_work.h
+generic-y += kdebug.h
+generic-y += mcs_spinlock.h
+generic-y += mmiowb.h
+generic-y += module.h
+generic-y += module.lds.h
+generic-y += parport.h
+generic-y += percpu.h
+generic-y += preempt.h
+generic-y += runtime-const.h
+generic-y += softirq_stack.h
+generic-y += switch_to.h
+generic-y += topology.h
 generic-y += trace_clock.h
+generic-y += kprobes.h
+generic-y += mm_hooks.h
+generic-y += vga.h
+generic-y += video.h
diff --git a/arch/um/include/asm/a.out-core.h b/arch/um/include/asm/a.out-core.h
deleted file mode 100644
index 995643b18309..000000000000
--- a/arch/um/include/asm/a.out-core.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* a.out coredump register dumper
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef __UM_A_OUT_CORE_H
-#define __UM_A_OUT_CORE_H
-
-#ifdef __KERNEL__
-
-#include <linux/user.h>
-
-/*
- * fill in the user structure for an a.out core dump
- */
-static inline void aout_dump_thread(struct pt_regs *regs, struct user *u)
-{
-}
-
-#endif /* __KERNEL__ */
-#endif /* __UM_A_OUT_CORE_H */
diff --git a/arch/um/include/asm/archrandom.h b/arch/um/include/asm/archrandom.h
new file mode 100644
index 000000000000..24e16c979c51
--- /dev/null
+++ b/arch/um/include/asm/archrandom.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_UM_ARCHRANDOM_H__
+#define __ASM_UM_ARCHRANDOM_H__
+
+#include <linux/types.h>
+
+/* This is from <os.h>, but better not to #include that in a global header here. */
+ssize_t os_getrandom(void *buf, size_t len, unsigned int flags);
+
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
+{
+	ssize_t ret;
+
+	ret = os_getrandom(v, max_longs * sizeof(*v), 0);
+	if (ret < 0)
+		return 0;
+	return ret / sizeof(*v);
+}
+
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+	return 0;
+}
+
+#endif
diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..408b31d59127
--- /dev/null
+++ b/arch/um/include/asm/asm-prototypes.h
@@ -0,0 +1,6 @@
+#include <asm-generic/asm-prototypes.h>
+#include <asm/checksum.h>
+
+#ifdef CONFIG_UML_X86
+extern void cmpxchg8b_emu(void);
+#endif
diff --git a/arch/um/include/asm/bpf_perf_event.h b/arch/um/include/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..287221342d2c
--- /dev/null
+++ b/arch/um/include/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * asm-generic/bpf_perf_event.h is part of the uapi headers, but since
+ * arch/um has no uapi of its on, we can't use the "generic-y"
+ * Kbuild rule to generate the wrapper
+ */
+
+#include <asm-generic/bpf_perf_event.h>
diff --git a/arch/um/include/asm/bugs.h b/arch/um/include/asm/bugs.h
deleted file mode 100644
index 6a72e240d5fc..000000000000
--- a/arch/um/include/asm/bugs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_BUGS_H
-#define __UM_BUGS_H
-
-void check_bugs(void);
-
-#endif
diff --git a/arch/um/include/asm/cache.h b/arch/um/include/asm/cache.h
index 19e1bdd67416..5c156273484b 100644
--- a/arch/um/include/asm/cache.h
+++ b/arch/um/include/asm/cache.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __UM_CACHE_H
 #define __UM_CACHE_H
 
diff --git a/arch/um/include/asm/cacheflush.h b/arch/um/include/asm/cacheflush.h
new file mode 100644
index 000000000000..4c9858cd36ec
--- /dev/null
+++ b/arch/um/include/asm/cacheflush.h
@@ -0,0 +1,9 @@
+#ifndef __UM_ASM_CACHEFLUSH_H
+#define __UM_ASM_CACHEFLUSH_H
+
+#include <asm/tlbflush.h>
+#define flush_cache_vmap flush_tlb_kernel_range
+#define flush_cache_vunmap flush_tlb_kernel_range
+
+#include <asm-generic/cacheflush.h>
+#endif /* __UM_ASM_CACHEFLUSH_H */
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index 1dd5bd8a8c59..fd481ac371de 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #include <asm-generic/vmlinux.lds.h>
 
   .fini      : { *(.fini)    } =0x9090
@@ -8,14 +9,13 @@
   _sdata = .;
   PROVIDE (sdata = .);
 
-  RODATA
+  RO_DATA(4096)
 
   .unprotected : { *(.unprotected) }
   . = ALIGN(4096);
   PROVIDE (_unprotected_end = .);
 
   . = ALIGN(4096);
-  .note : { *(.note.*) }
   EXCEPTION_TABLE(0)
 
   BUG_TABLE
@@ -52,14 +52,6 @@
 	CON_INITCALL
   }
 
-  .uml.initcall.init : {
-	__uml_initcall_start = .;
-	*(.uml.initcall.init)
-	__uml_initcall_end = .;
-  }
-
-  SECURITY_INIT
-
   .exitcall : {
 	__exitcall_begin = .;
 	*(.exitcall.exit)
@@ -81,7 +73,7 @@
   .altinstr_replacement : { *(.altinstr_replacement) }
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
-  .exit.text : { *(.exit.text) }
+  .exit.text : { EXIT_TEXT }
   .exit.data : { *(.exit.data) }
 
   .preinit_array : {
@@ -91,6 +83,8 @@
   }
   .init_array : {
 	__init_array_start = .;
+	*(.kasan_init)
+	*(.init_array.*)
 	*(.init_array)
 	__init_array_end = .;
   }
diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h
new file mode 100644
index 000000000000..4354f6984271
--- /dev/null
+++ b/arch/um/include/asm/cpufeature.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_CPUFEATURE_H
+#define _ASM_UM_CPUFEATURE_H
+
+#include <asm/processor.h>
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLER__)
+
+#include <asm/asm.h>
+#include <linux/bitops.h>
+
+extern const char * const x86_cap_flags[NCAPINTS*32];
+extern const char * const x86_power_flags[32];
+#define X86_CAP_FMT "%s"
+#define x86_cap_flag(flag) x86_cap_flags[flag]
+
+/*
+ * In order to save room, we index into this array by doing
+ * X86_BUG_<name> - NCAPINTS*32.
+ */
+extern const char * const x86_bug_flags[NBUGINTS*32];
+
+#define test_cpu_cap(c, bit)						\
+	 test_bit(bit, (unsigned long *)((c)->x86_capability))
+
+/*
+ * There are 32 bits/features in each mask word.  The high bits
+ * (selected with (bit>>5) give us the word number and the low 5
+ * bits give us the bit/feature number inside the word.
+ * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can
+ * see if it is set in the mask word.
+ */
+#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit)	\
+	(((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
+
+#define cpu_has(c, bit)							\
+	 test_cpu_cap(c, bit)
+
+#define this_cpu_has(bit)						\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
+	 x86_this_cpu_test_bit(bit, cpu_info.x86_capability))
+
+/*
+ * This macro is for detection of features which need kernel
+ * infrastructure to be used.  It may *not* directly test the CPU
+ * itself.  Use the cpu_has() family if you want true runtime
+ * testing of CPU features, like in hypervisor code where you are
+ * supporting a possible guest feature where host support for it
+ * is not relevant.
+ */
+#define cpu_feature_enabled(bit)	\
+	(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
+
+#define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
+
+#define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
+
+extern void setup_clear_cpu_cap(unsigned int bit);
+
+#define setup_force_cpu_cap(bit) do { \
+	set_cpu_cap(&boot_cpu_data, bit);	\
+	set_bit(bit, (unsigned long *)cpu_caps_set);	\
+} while (0)
+
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
+/*
+ * Static testing of CPU features. Used the same as boot_cpu_has(). It
+ * statically patches the target code for additional performance. Use
+ * static_cpu_has() only in fast paths, where every cycle counts. Which
+ * means that the boot_cpu_has() variant is already fast enough for the
+ * majority of cases and you should stick to using it as it is generally
+ * only two instructions: a RIP-relative MOV and a TEST.
+ */
+static __always_inline bool _static_cpu_has(u16 bit)
+{
+	asm goto("1: jmp 6f\n"
+		 "2:\n"
+		 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+			 "((5f-4f) - (2b-1b)),0x90\n"
+		 "3:\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 4f - .\n"		/* repl offset */
+		 " .word %P[always]\n"		/* always replace */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 5f - 4f\n"		/* repl len */
+		 " .byte 3b - 2b\n"		/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_replacement,\"ax\"\n"
+		 "4: jmp %l[t_no]\n"
+		 "5:\n"
+		 ".previous\n"
+		 ".section .altinstructions,\"a\"\n"
+		 " .long 1b - .\n"		/* src offset */
+		 " .long 0\n"			/* no replacement */
+		 " .word %P[feature]\n"		/* feature bit */
+		 " .byte 3b - 1b\n"		/* src len */
+		 " .byte 0\n"			/* repl len */
+		 " .byte 0\n"			/* pad len */
+		 ".previous\n"
+		 ".section .altinstr_aux,\"ax\"\n"
+		 "6:\n"
+		 " testb %[bitnum],%[cap_byte]\n"
+		 " jnz %l[t_yes]\n"
+		 " jmp %l[t_no]\n"
+		 ".previous\n"
+		 : : [feature]  "i" (bit),
+		     [always]   "i" (X86_FEATURE_ALWAYS),
+		     [bitnum]   "i" (1 << (bit & 7)),
+		     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+		 : : t_yes, t_no);
+t_yes:
+	return true;
+t_no:
+	return false;
+}
+
+#define static_cpu_has(bit)					\
+(								\
+	__builtin_constant_p(boot_cpu_has(bit)) ?		\
+		boot_cpu_has(bit) :				\
+		_static_cpu_has(bit)				\
+)
+
+#define cpu_has_bug(c, bit)		cpu_has(c, (bit))
+#define set_cpu_bug(c, bit)		set_cpu_cap(c, (bit))
+
+#define static_cpu_has_bug(bit)		static_cpu_has((bit))
+#define boot_cpu_has_bug(bit)		cpu_has_bug(&boot_cpu_data, (bit))
+#define boot_cpu_set_bug(bit)		set_cpu_cap(&boot_cpu_data, (bit))
+
+#define MAX_CPU_FEATURES		(NCAPINTS * 32)
+#define cpu_have_feature		boot_cpu_has
+
+#define CPU_FEATURE_TYPEFMT		"x86,ven%04Xfam%04Xmod%04X"
+#define CPU_FEATURE_TYPEVAL		boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
+					boot_cpu_data.x86_model
+
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */
+#endif /* _ASM_UM_CPUFEATURE_H */
diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h
new file mode 100644
index 000000000000..159a29b3d4cc
--- /dev/null
+++ b/arch/um/include/asm/current.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_CURRENT_H
+#define __ASM_CURRENT_H
+
+#include <linux/compiler.h>
+#include <linux/threads.h>
+
+#ifndef __ASSEMBLER__
+
+#include <shared/smp.h>
+
+struct task_struct;
+extern struct task_struct *cpu_tasks[NR_CPUS];
+
+static __always_inline struct task_struct *get_current(void)
+{
+	return cpu_tasks[uml_curr_cpu()];
+}
+
+#define current get_current()
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_CURRENT_H */
diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h
new file mode 100644
index 000000000000..e79b2ab6f40c
--- /dev/null
+++ b/arch/um/include/asm/delay.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_DELAY_H
+#define __UM_DELAY_H
+#include <asm-generic/delay.h>
+#include <linux/time-internal.h>
+
+static inline void um_ndelay(unsigned long nsecs)
+{
+	if (time_travel_mode == TT_MODE_INFCPU ||
+	    time_travel_mode == TT_MODE_EXTERNAL) {
+		time_travel_ndelay(nsecs);
+		return;
+	}
+	ndelay(nsecs);
+}
+#undef ndelay
+#define ndelay(n) um_ndelay(n)
+
+static inline void um_udelay(unsigned long usecs)
+{
+	if (time_travel_mode == TT_MODE_INFCPU ||
+	    time_travel_mode == TT_MODE_EXTERNAL) {
+		time_travel_ndelay(1000 * usecs);
+		return;
+	}
+	udelay(usecs);
+}
+#undef udelay
+#define udelay(n) um_udelay(n)
+#endif /* __UM_DELAY_H */
diff --git a/arch/um/include/asm/dma.h b/arch/um/include/asm/dma.h
index f88c5860520b..fdc53642c718 100644
--- a/arch/um/include/asm/dma.h
+++ b/arch/um/include/asm/dma.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __UM_DMA_H
 #define __UM_DMA_H
 
diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h
deleted file mode 100644
index 21a423bae5e8..000000000000
--- a/arch/um/include/asm/fixmap.h
+++ /dev/null
@@ -1,98 +0,0 @@
-#ifndef __UM_FIXMAP_H
-#define __UM_FIXMAP_H
-
-#include <asm/processor.h>
-#include <asm/kmap_types.h>
-#include <asm/archparam.h>
-#include <asm/page.h>
-#include <linux/threads.h>
-
-/*
- * Here we define all the compile-time 'special' virtual
- * addresses. The point is to have a constant address at
- * compile time, but to set the physical address only
- * in the boot process. We allocate these special  addresses
- * from the end of virtual memory (0xfffff000) backwards.
- * Also this lets us do fail-safe vmalloc(), we
- * can guarantee that these special addresses and
- * vmalloc()-ed addresses never overlap.
- *
- * these 'compile-time allocated' memory buffers are
- * fixed-size 4k pages. (or larger if used with an increment
- * highger than 1) use fixmap_set(idx,phys) to associate
- * physical memory with fixmap indices.
- *
- * TLB entries of such buffers will not be flushed across
- * task switches.
- */
-
-/*
- * on UP currently we will have no trace of the fixmap mechanizm,
- * no page table allocations, etc. This might change in the
- * future, say framebuffers for the console driver(s) could be
- * fix-mapped?
- */
-enum fixed_addresses {
-#ifdef CONFIG_HIGHMEM
-	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
-	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-#endif
-	__end_of_fixed_addresses
-};
-
-extern void __set_fixmap (enum fixed_addresses idx,
-			  unsigned long phys, pgprot_t flags);
-
-#define set_fixmap(idx, phys) \
-		__set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
-		__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
-/*
- * used by vmalloc.c.
- *
- * Leave one empty page between vmalloc'ed areas and
- * the start of the fixmap, and leave one page empty
- * at the top of mem..
- */
-
-#define FIXADDR_TOP	(TASK_SIZE - 2 * PAGE_SIZE)
-#define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
-
-#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)      ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
-	/*
-	 * this branch gets completely eliminated after inlining,
-	 * except when someone tries to use fixaddr indices in an
-	 * illegal way. (such as mixing up address types or using
-	 * out-of-range indices).
-	 *
-	 * If it doesn't get removed, the linker will complain
-	 * loudly with a reasonably clear error message..
-	 */
-	if (idx >= __end_of_fixed_addresses)
-		__this_fixmap_does_not_exist();
-
-        return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
-      BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-      return __virt_to_fix(vaddr);
-}
-
-#endif
diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
new file mode 100644
index 000000000000..3abf67c83c40
--- /dev/null
+++ b/arch/um/include/asm/fpu/api.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_UM_FPU_API_H
+#define _ASM_UM_FPU_API_H
+
+#include <linux/types.h>
+
+/* Copyright (c) 2020 Cambridge Greys Ltd
+ * Copyright (c) 2020 Red Hat Inc.
+ * A set of "dummy" defines to allow the direct inclusion
+ * of x86 optimized copy, xor, etc routines into the
+ * UML code tree. */
+
+#define kernel_fpu_begin() (void)0
+#define kernel_fpu_end() (void)0
+
+static inline bool irq_fpu_usable(void)
+{
+	return true;
+}
+
+
+#endif
diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
new file mode 100644
index 000000000000..780aa6bfc050
--- /dev/null
+++ b/arch/um/include/asm/futex.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_FUTEX_H
+#define _ASM_UM_FUTEX_H
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+
+int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr);
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval);
+
+#endif
diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
new file mode 100644
index 000000000000..8de71752a9b8
--- /dev/null
+++ b/arch/um/include/asm/hardirq.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_UM_HARDIRQ_H
+#define __ASM_UM_HARDIRQ_H
+
+#include <linux/cache.h>
+#include <linux/threads.h>
+
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1
+
+typedef struct {
+	unsigned int __softirq_pending;
+#if IS_ENABLED(CONFIG_SMP)
+	unsigned int irq_resched_count;
+	unsigned int irq_call_count;
+#endif
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define __ARCH_IRQ_STAT
+
+#define inc_irq_stat(member)	this_cpu_inc(irq_stat.member)
+
+#include <linux/irq.h>
+
+static inline void ack_bad_irq(unsigned int irq)
+{
+	pr_crit("unexpected IRQ trap at vector %02x\n", irq);
+}
+
+#endif /* __ASM_UM_HARDIRQ_H */
diff --git a/arch/um/include/asm/io.h b/arch/um/include/asm/io.h
new file mode 100644
index 000000000000..9ea42cc746d9
--- /dev/null
+++ b/arch/um/include/asm/io.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_IO_H
+#define _ASM_UM_IO_H
+#include <linux/types.h>
+
+/* get emulated iomem (if desired) */
+#include <asm-generic/logic_io.h>
+
+#ifndef ioremap
+#define ioremap ioremap
+static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
+{
+	return NULL;
+}
+#endif /* ioremap */
+
+#ifndef iounmap
+#define iounmap iounmap
+static inline void iounmap(void __iomem *addr)
+{
+}
+#endif /* iounmap */
+
+#include <asm-generic/io.h>
+
+#endif
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index 4a2037f8204b..36dbedd1af48 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -1,23 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __UM_IRQ_H
 #define __UM_IRQ_H
 
 #define TIMER_IRQ		0
 #define UMN_IRQ			1
-#define CONSOLE_IRQ		2
-#define CONSOLE_WRITE_IRQ	3
-#define UBD_IRQ			4
-#define UM_ETH_IRQ		5
-#define SSL_IRQ			6
-#define SSL_WRITE_IRQ		7
-#define ACCEPT_IRQ		8
-#define MCONSOLE_IRQ		9
-#define WINCH_IRQ		10
-#define SIGIO_WRITE_IRQ 	11
-#define TELNETD_IRQ 		12
-#define XTERM_IRQ 		13
-#define RANDOM_IRQ 		14
-
-#define LAST_IRQ RANDOM_IRQ
-#define NR_IRQS (LAST_IRQ + 1)
+#define UBD_IRQ			2
+#define UM_ETH_IRQ		3
+#define ACCEPT_IRQ		4
+#define MCONSOLE_IRQ		5
+#define WINCH_IRQ		6
+#define SIGIO_WRITE_IRQ 	7
+#define TELNETD_IRQ 		8
+#define XTERM_IRQ 		9
+#define RANDOM_IRQ 		10
+#define SIGCHLD_IRQ		11
 
+#ifdef CONFIG_UML_NET_VECTOR
+
+#define VECTOR_BASE_IRQ		(SIGCHLD_IRQ + 1)
+#define VECTOR_IRQ_SPACE	8
+
+#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
+
+#else
+
+#define UM_FIRST_DYN_IRQ (SIGCHLD_IRQ + 1)
+
+#endif
+
+#define UM_LAST_SIGNAL_IRQ	64
+/* If we have (simulated) PCI MSI, allow 64 more interrupt numbers for it */
+#ifdef CONFIG_PCI_MSI
+#define NR_IRQS			(UM_LAST_SIGNAL_IRQ + 64)
+#else
+#define NR_IRQS			UM_LAST_SIGNAL_IRQ
+#endif /* CONFIG_PCI_MSI */
+
+#include <asm-generic/irq.h>
 #endif
diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h
index c780d8a16773..31e49e0894c5 100644
--- a/arch/um/include/asm/irqflags.h
+++ b/arch/um/include/asm/irqflags.h
@@ -1,42 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __UM_IRQFLAGS_H
 #define __UM_IRQFLAGS_H
 
-extern int get_signals(void);
-extern int set_signals(int enable);
-extern void block_signals(void);
-extern void unblock_signals(void);
+int um_get_signals(void);
+int um_set_signals(int enable);
+void block_signals(void);
+void unblock_signals(void);
 
+#define arch_local_save_flags arch_local_save_flags
 static inline unsigned long arch_local_save_flags(void)
 {
-	return get_signals();
+	return um_get_signals();
 }
 
+#define arch_local_irq_restore arch_local_irq_restore
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-	set_signals(flags);
+	um_set_signals(flags);
 }
 
+#define arch_local_irq_enable arch_local_irq_enable
 static inline void arch_local_irq_enable(void)
 {
 	unblock_signals();
 }
 
+#define arch_local_irq_disable arch_local_irq_disable
 static inline void arch_local_irq_disable(void)
 {
 	block_signals();
 }
 
-static inline unsigned long arch_local_irq_save(void)
-{
-	unsigned long flags;
-	flags = arch_local_save_flags();
-	arch_local_irq_disable();
-	return flags;
-}
+#define ARCH_IRQ_DISABLED	0
 
-static inline bool arch_irqs_disabled(void)
-{
-	return arch_local_save_flags() == 0;
-}
+#include <asm-generic/irqflags.h>
 
 #endif
diff --git a/arch/um/include/asm/kasan.h b/arch/um/include/asm/kasan.h
new file mode 100644
index 000000000000..81bcdc0f962e
--- /dev/null
+++ b/arch/um/include/asm/kasan.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_UM_KASAN_H
+#define __ASM_UM_KASAN_H
+
+#include <linux/init.h>
+#include <linux/const.h>
+
+#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
+/* used in kasan_mem_to_shadow to divide by 8 */
+#define KASAN_SHADOW_SCALE_SHIFT 3
+
+#ifdef CONFIG_X86_64
+#define KASAN_HOST_USER_SPACE_END_ADDR 0x00007fffffffffffUL
+/* KASAN_SHADOW_SIZE is the size of total address space divided by 8 */
+#define KASAN_SHADOW_SIZE ((KASAN_HOST_USER_SPACE_END_ADDR + 1) >> \
+			KASAN_SHADOW_SCALE_SHIFT)
+#else
+#error "KASAN_SHADOW_SIZE is not defined for this sub-architecture"
+#endif /* CONFIG_X86_64 */
+
+#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET)
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
+
+#ifdef CONFIG_KASAN
+void kasan_init(void);
+#else
+static inline void kasan_init(void) { }
+#endif /* CONFIG_KASAN */
+
+#endif /* __ASM_UM_KASAN_H */
diff --git a/arch/um/include/asm/kmap_types.h b/arch/um/include/asm/kmap_types.h
deleted file mode 100644
index 2e0a6b1d8300..000000000000
--- a/arch/um/include/asm/kmap_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_KMAP_TYPES_H
-#define __UM_KMAP_TYPES_H
-
-/* No more #include "asm/arch/kmap_types.h" ! */
-
-#define KM_TYPE_NR 14
-
-#endif
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index da705448590f..07d48738b402 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -1,24 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __ARCH_UM_MMU_H
 #define __ARCH_UM_MMU_H
 
+#include "linux/types.h"
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
 #include <mm_id.h>
-#include <asm/mm_context.h>
 
 typedef struct mm_context {
 	struct mm_id id;
-	struct uml_arch_mm_context arch;
-	struct page *stub_pages[2];
-} mm_context_t;
+	struct mutex turnstile;
+
+	struct list_head list;
 
-extern void __switch_mm(struct mm_id * mm_idp);
+	/* Address range in need of a TLB sync */
+	spinlock_t sync_tlb_lock;
+	unsigned long sync_tlb_range_from;
+	unsigned long sync_tlb_range_to;
+} mm_context_t;
 
-/* Avoid tangled inclusion with asm/ldt.h */
-extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
-extern void free_ldt(struct mm_context *mm);
+#define INIT_MM_CONTEXT(mm)						\
+	.context = {							\
+		.turnstile = __MUTEX_INITIALIZER(mm.context.turnstile),	\
+		.sync_tlb_lock = __SPIN_LOCK_INITIALIZER(mm.context.sync_tlb_lock), \
+	}
 
 #endif
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index aa4a743dc4ab..c727e56ba116 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -1,58 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_MMU_CONTEXT_H
 #define __UM_MMU_CONTEXT_H
 
 #include <linux/sched.h>
-#include <asm/mmu.h>
-
-extern void uml_setup_stubs(struct mm_struct *mm);
-extern void arch_exit_mmap(struct mm_struct *mm);
-
-#define deactivate_mm(tsk,mm)	do { } while (0)
-
-extern void force_flush_all(void);
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
 
-static inline void activate_mm(struct mm_struct *old, struct mm_struct *new)
-{
-	/*
-	 * This is called by fs/exec.c and sys_unshare()
-	 * when the new ->mm is used for the first time.
-	 */
-	__switch_mm(&new->context.id);
-	down_write(&new->mmap_sem);
-	uml_setup_stubs(new);
-	up_write(&new->mmap_sem);
-}
+#include <asm/mm_hooks.h>
+#include <asm/mmu.h>
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
 			     struct task_struct *tsk)
 {
-	unsigned cpu = smp_processor_id();
-
-	if(prev != next){
-		cpumask_clear_cpu(cpu, mm_cpumask(prev));
-		cpumask_set_cpu(cpu, mm_cpumask(next));
-		if(next != &init_mm)
-			__switch_mm(&next->context.id);
-	}
-}
-
-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
-{
-	uml_setup_stubs(mm);
-}
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, 
-				  struct task_struct *tsk)
-{
 }
 
+#define init_new_context init_new_context
 extern int init_new_context(struct task_struct *task, struct mm_struct *mm);
 
+#define destroy_context destroy_context
 extern void destroy_context(struct mm_struct *mm);
 
+#include <asm-generic/mmu_context.h>
+
 #endif
diff --git a/arch/um/include/asm/msi.h b/arch/um/include/asm/msi.h
new file mode 100644
index 000000000000..c8c6c381f394
--- /dev/null
+++ b/arch/um/include/asm/msi.h
@@ -0,0 +1 @@
+#include <asm-generic/msi.h>
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 5ff53d9185f7..2d363460d896 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
  * Copyright 2003 PathScale, Inc.
- * Licensed under the GPL
  */
 
 #ifndef __UM_PAGE_H
@@ -9,15 +9,13 @@
 
 #include <linux/const.h>
 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	12
-#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK	(~(PAGE_SIZE-1))
+#include <vdso/page.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 struct page;
 
+#include <linux/pfn.h>
 #include <linux/types.h>
 #include <asm/vm-flags.h>
 
@@ -31,56 +29,35 @@ struct page;
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT)
-
-typedef struct { unsigned long pte_low, pte_high; } pte_t;
-typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pgd; } pgd_t;
-#define pte_val(x) ((x).pte_low | ((unsigned long long) (x).pte_high << 32))
-
-#define pte_get_bits(pte, bits) ((pte).pte_low & (bits))
-#define pte_set_bits(pte, bits) ((pte).pte_low |= (bits))
-#define pte_clear_bits(pte, bits) ((pte).pte_low &= ~(bits))
-#define pte_copy(to, from) ({ (to).pte_high = (from).pte_high; \
-			      smp_wmb(); \
-			      (to).pte_low = (from).pte_low; })
-#define pte_is_zero(pte) (!((pte).pte_low & ~_PAGE_NEWPAGE) && !(pte).pte_high)
-#define pte_set_val(pte, phys, prot) \
-	({ (pte).pte_high = (phys) >> 32; \
-	   (pte).pte_low = (phys) | pgprot_val(prot); })
 
+#if CONFIG_PGTABLE_LEVELS > 2
+
+typedef struct { unsigned long pmd; } pmd_t;
 #define pmd_val(x)	((x).pmd)
 #define __pmd(x) ((pmd_t) { (x) } )
 
-typedef unsigned long long pfn_t;
-typedef unsigned long long phys_t;
-
-#else
+#if CONFIG_PGTABLE_LEVELS > 3
 
-typedef struct { unsigned long pte; } pte_t;
-typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pud; } pud_t;
+#define pud_val(x)	((x).pud)
+#define __pud(x) ((pud_t) { (x) } )
 
-#ifdef CONFIG_3_LEVEL_PGTABLES
-typedef struct { unsigned long pmd; } pmd_t;
-#define pmd_val(x)	((x).pmd)
-#define __pmd(x) ((pmd_t) { (x) } )
-#endif
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#endif /* CONFIG_PGTABLE_LEVELS > 2 */
 
 #define pte_val(x)	((x).pte)
 
-
 #define pte_get_bits(p, bits) ((p).pte & (bits))
 #define pte_set_bits(p, bits) ((p).pte |= (bits))
 #define pte_clear_bits(p, bits) ((p).pte &= ~(bits))
 #define pte_copy(to, from) ((to).pte = (from).pte)
-#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE))
+#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEEDSYNC))
 #define pte_set_val(p, phys, prot) (p).pte = (phys | pgprot_val(prot))
 
-typedef unsigned long pfn_t;
 typedef unsigned long phys_t;
 
-#endif
-
 typedef struct { unsigned long pgprot; } pgprot_t;
 
 typedef struct page *pgtable_t;
@@ -106,17 +83,17 @@ extern unsigned long uml_physmem;
  * casting is the right thing, but 32-bit UML can't have 64-bit virtual
  * addresses
  */
-#define __pa(virt) to_phys((void *) (unsigned long) (virt))
-#define __va(phys) to_virt((unsigned long) (phys))
+#define __pa(virt) uml_to_phys((void *) (unsigned long) (virt))
+#define __va(phys) uml_to_virt((unsigned long) (phys))
 
-#define phys_to_pfn(p) ((pfn_t) ((p) >> PAGE_SHIFT))
-#define pfn_to_phys(pfn) ((phys_t) ((pfn) << PAGE_SHIFT))
+#define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn) PFN_PHYS(pfn)
 
-#define pfn_valid(pfn) ((pfn) < max_mapnr)
 #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
 
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
+
 #endif	/* __UM_PAGE_H */
diff --git a/arch/um/include/asm/pci.h b/arch/um/include/asm/pci.h
new file mode 100644
index 000000000000..238d2e7faff8
--- /dev/null
+++ b/arch/um/include/asm/pci.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_UM_PCI_H
+#define __ASM_UM_PCI_H
+#include <linux/types.h>
+#include <asm/io.h>
+
+/* Generic PCI */
+#include <asm-generic/pci.h>
+
+#ifdef CONFIG_PCI_MSI
+/*
+ * This is a bit of an annoying hack, and it assumes we only have
+ * the virt-pci (if anything). Which is true, but still.
+ */
+void *pci_root_bus_fwnode(struct pci_bus *bus);
+#define pci_root_bus_fwnode	pci_root_bus_fwnode
+#endif
+
+#endif  /* __ASM_UM_PCI_H */
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index bf90b2aa2002..826ec44b58cd 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -1,8 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Copyright 2003 PathScale, Inc.
  * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h
- * Licensed under the GPL
  */
 
 #ifndef __UM_PGALLOC_H
@@ -10,6 +10,8 @@
 
 #include <linux/mm.h>
 
+#include <asm-generic/pgalloc.h>
+
 #define pmd_populate_kernel(mm, pmd, pte) \
 	set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
 
@@ -17,45 +19,27 @@
 	set_pmd(pmd, __pmd(_PAGE_TABLE +			\
 		((unsigned long long)page_to_pfn(pte) <<	\
 			(unsigned long long) PAGE_SHIFT)))
-#define pmd_pgtable(pmd) pmd_page(pmd)
 
 /*
  * Allocate and free page tables.
  */
 extern pgd_t *pgd_alloc(struct mm_struct *);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
 
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_page((unsigned long) pte);
-}
+#define __pte_free_tlb(tlb, pte, address)	\
+	tlb_remove_ptdesc((tlb), page_ptdesc(pte))
 
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-	pgtable_page_dtor(pte);
-	__free_page(pte);
-}
+#if CONFIG_PGTABLE_LEVELS > 2
 
-#define __pte_free_tlb(tlb,pte, address)		\
-do {							\
-	pgtable_page_dtor(pte);				\
-	tlb_remove_page((tlb),(pte));			\
-} while (0)
+#define __pmd_free_tlb(tlb, pmd, address)	\
+	tlb_remove_ptdesc((tlb), virt_to_ptdesc(pmd))
 
-#ifdef CONFIG_3_LEVEL_PGTABLES
+#if CONFIG_PGTABLE_LEVELS > 3
 
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	free_page((unsigned long)pmd);
-}
+#define __pud_free_tlb(tlb, pud, address)	\
+	tlb_remove_ptdesc((tlb), virt_to_ptdesc(pud))
 
-#define __pmd_free_tlb(tlb,x, address)   tlb_remove_page((tlb),virt_to_page(x))
 #endif
-
-#define check_pgt_cache()	do { } while (0)
+#endif
 
 #endif
 
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index f534b73e753e..14ec16f92ce4 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -1,8 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Copyright 2003 PathScale, Inc.
  * Derived from include/asm-i386/pgtable.h
- * Licensed under the GPL
  */
 
 #ifndef __UM_PGTABLE_2LEVEL_H
@@ -23,7 +23,6 @@
 #define PTRS_PER_PTE	1024
 #define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE)
 #define PTRS_PER_PGD	1024
-#define FIRST_USER_ADDRESS	0
 
 #define pte_ERROR(e) \
         printk("%s:%d: bad pte %p(%08lx).\n", __FILE__, __LINE__, &(e), \
@@ -32,22 +31,12 @@
         printk("%s:%d: bad pgd %p(%08lx).\n", __FILE__, __LINE__, &(e), \
 	       pgd_val(e))
 
-static inline int pgd_newpage(pgd_t pgd)	{ return 0; }
+static inline int pgd_needsync(pgd_t pgd)	{ return 0; }
 static inline void pgd_mkuptodate(pgd_t pgd)	{ }
 
 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
 
 #define pte_pfn(x) phys_to_pfn(pte_val(x))
-#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
 #define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
 
-/*
- * Bits 0 through 4 are taken
- */
-#define PTE_FILE_MAX_BITS	27
-
-#define pte_to_pgoff(pte) (pte_val(pte) >> 5)
-
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 5) + _PAGE_FILE })
-
 #endif
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
deleted file mode 100644
index 0032f9212e74..000000000000
--- a/arch/um/include/asm/pgtable-3level.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright 2003 PathScale Inc
- * Derived from include/asm-i386/pgtable.h
- * Licensed under the GPL
- */
-
-#ifndef __UM_PGTABLE_3LEVEL_H
-#define __UM_PGTABLE_3LEVEL_H
-
-#include <asm-generic/pgtable-nopud.h>
-
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-
-#ifdef CONFIG_64BIT
-#define PGDIR_SHIFT	30
-#else
-#define PGDIR_SHIFT	31
-#endif
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-/* PMD_SHIFT determines the size of the area a second-level page table can
- * map
- */
-
-#define PMD_SHIFT	21
-#define PMD_SIZE	(1UL << PMD_SHIFT)
-#define PMD_MASK	(~(PMD_SIZE-1))
-
-/*
- * entries per page directory level
- */
-
-#define PTRS_PER_PTE 512
-#ifdef CONFIG_64BIT
-#define PTRS_PER_PMD 512
-#define PTRS_PER_PGD 512
-#else
-#define PTRS_PER_PMD 1024
-#define PTRS_PER_PGD 1024
-#endif
-
-#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE)
-#define FIRST_USER_ADDRESS	0
-
-#define pte_ERROR(e) \
-        printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), \
-	       pte_val(e))
-#define pmd_ERROR(e) \
-        printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \
-	       pmd_val(e))
-#define pgd_ERROR(e) \
-        printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \
-	       pgd_val(e))
-
-#define pud_none(x)	(!(pud_val(x) & ~_PAGE_NEWPAGE))
-#define	pud_bad(x)	((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
-#define pud_present(x)	(pud_val(x) & _PAGE_PRESENT)
-#define pud_populate(mm, pud, pmd) \
-	set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
-
-#ifdef CONFIG_64BIT
-#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
-#else
-#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
-#endif
-
-static inline int pgd_newpage(pgd_t pgd)
-{
-	return(pgd_val(pgd) & _PAGE_NEWPAGE);
-}
-
-static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
-
-#ifdef CONFIG_64BIT
-#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval))
-#else
-#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
-#endif
-
-struct mm_struct;
-extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address);
-
-static inline void pud_clear (pud_t *pud)
-{
-	set_pud(pud, __pud(_PAGE_NEWPAGE));
-}
-
-#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
-
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(pud, address) ((pmd_t *) pud_page_vaddr(*(pud)) + \
-			pmd_index(address))
-
-static inline unsigned long pte_pfn(pte_t pte)
-{
-	return phys_to_pfn(pte_val(pte));
-}
-
-static inline pte_t pfn_pte(pfn_t page_nr, pgprot_t pgprot)
-{
-	pte_t pte;
-	phys_t phys = pfn_to_phys(page_nr);
-
-	pte_set_val(pte, phys, pgprot);
-	return pte;
-}
-
-static inline pmd_t pfn_pmd(pfn_t page_nr, pgprot_t pgprot)
-{
-	return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot));
-}
-
-/*
- * Bits 0 through 3 are taken in the low part of the pte,
- * put the 32 bits of offset into the high part.
- */
-#define PTE_FILE_MAX_BITS	32
-
-#ifdef CONFIG_64BIT
-
-#define pte_to_pgoff(p) ((p).pte >> 32)
-
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE })
-
-#else
-
-#define pte_to_pgoff(pte) ((pte).pte_high)
-
-#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
-
-#endif
-
-#endif
-
diff --git a/arch/um/include/asm/pgtable-4level.h b/arch/um/include/asm/pgtable-4level.h
new file mode 100644
index 000000000000..7a271b7b83d2
--- /dev/null
+++ b/arch/um/include/asm/pgtable-4level.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2003 PathScale Inc
+ * Derived from include/asm-i386/pgtable.h
+ */
+
+#ifndef __UM_PGTABLE_4LEVEL_H
+#define __UM_PGTABLE_4LEVEL_H
+
+#include <asm-generic/pgtable-nop4d.h>
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+
+#define PGDIR_SHIFT	39
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* PUD_SHIFT determines the size of the area a third-level page table can
+ * map
+ */
+
+#define PUD_SHIFT	30
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/* PMD_SHIFT determines the size of the area a second-level page table can
+ * map
+ */
+
+#define PMD_SHIFT	21
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/*
+ * entries per page directory level
+ */
+
+#define PTRS_PER_PTE 512
+#define PTRS_PER_PMD 512
+#define PTRS_PER_PUD 512
+#define PTRS_PER_PGD 512
+
+#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE)
+
+#define pte_ERROR(e) \
+        printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), \
+	       pte_val(e))
+#define pmd_ERROR(e) \
+        printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \
+	       pmd_val(e))
+#define pud_ERROR(e) \
+        printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), \
+	       pud_val(e))
+#define pgd_ERROR(e) \
+        printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \
+	       pgd_val(e))
+
+#define pud_none(x)	(!(pud_val(x) & ~_PAGE_NEEDSYNC))
+#define	pud_bad(x)	((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+#define pud_present(x)	(pud_val(x) & _PAGE_PRESENT)
+#define pud_populate(mm, pud, pmd) \
+	set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
+
+#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
+
+#define p4d_none(x)	(!(p4d_val(x) & ~_PAGE_NEEDSYNC))
+#define	p4d_bad(x)	((p4d_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+#define p4d_present(x)	(p4d_val(x) & _PAGE_PRESENT)
+#define p4d_populate(mm, p4d, pud) \
+	set_p4d(p4d, __p4d(_PAGE_TABLE + __pa(pud)))
+
+#define set_p4d(p4dptr, p4dval) (*(p4dptr) = (p4dval))
+
+
+static inline int pgd_needsync(pgd_t pgd)
+{
+	return pgd_val(pgd) & _PAGE_NEEDSYNC;
+}
+
+static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEEDSYNC; }
+
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+
+static inline void pud_clear (pud_t *pud)
+{
+	set_pud(pud, __pud(_PAGE_NEEDSYNC));
+}
+
+static inline void p4d_clear (p4d_t *p4d)
+{
+	set_p4d(p4d, __p4d(_PAGE_NEEDSYNC));
+}
+
+#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
+#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK))
+
+#define p4d_page(p4d) phys_to_page(p4d_val(p4d) & PAGE_MASK)
+#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & PAGE_MASK))
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return phys_to_pfn(pte_val(pte));
+}
+
+static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+{
+	return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot));
+}
+
+#endif
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index bf974f712af7..3b42b0f45bf6 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -1,31 +1,35 @@
-/* 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright 2003 PathScale, Inc.
  * Derived from include/asm-i386/pgtable.h
- * Licensed under the GPL
  */
 
 #ifndef __UM_PGTABLE_H
 #define __UM_PGTABLE_H
 
-#include <asm/fixmap.h>
+#include <asm/page.h>
+#include <linux/mm_types.h>
 
 #define _PAGE_PRESENT	0x001
-#define _PAGE_NEWPAGE	0x002
-#define _PAGE_NEWPROT	0x004
+#define _PAGE_NEEDSYNC	0x002
 #define _PAGE_RW	0x020
 #define _PAGE_USER	0x040
 #define _PAGE_ACCESSED	0x080
 #define _PAGE_DIRTY	0x100
 /* If _PAGE_PRESENT is clear, we use these: */
-#define _PAGE_FILE	0x008	/* nonlinear file mapping, saved PTE; unset:swap */
 #define _PAGE_PROTNONE	0x010	/* if the user mapped it with PROT_NONE;
 				   pte_present gives true */
 
-#ifdef CONFIG_3_LEVEL_PGTABLES
-#include <asm/pgtable-3level.h>
-#else
+/* We borrow bit 10 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE	0x400
+
+#if CONFIG_PGTABLE_LEVELS == 4
+#include <asm/pgtable-4level.h>
+#elif CONFIG_PGTABLE_LEVELS == 2
 #include <asm/pgtable-2level.h>
+#else
+#error "Unsupported number of page table levels"
 #endif
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
@@ -33,8 +37,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 /* zero page used for uninitialized stuff */
 extern unsigned long *empty_zero_page;
 
-#define pgtable_cache_init() do ; while (0)
-
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
@@ -43,19 +45,15 @@ extern unsigned long *empty_zero_page;
  * area for the same reason. ;)
  */
 
-extern unsigned long end_iomem;
+#ifndef COMPILE_OFFSETS
+#include <as-layout.h> /* for high_physmem */
+#endif
 
 #define VMALLOC_OFFSET	(__va_space)
-#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
-#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK)
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END	(PKMAP_BASE-2*PAGE_SIZE)
-#else
-# define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
-#endif
+#define VMALLOC_START	((high_physmem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
+#define VMALLOC_END	(TASK_SIZE-2*PAGE_SIZE)
 #define MODULES_VADDR	VMALLOC_START
 #define MODULES_END	VMALLOC_END
-#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -75,23 +73,6 @@ extern unsigned long end_iomem;
  * Also, write permissions imply read permissions. This is the closest we can
  * get..
  */
-#define __P000	PAGE_NONE
-#define __P001	PAGE_READONLY
-#define __P010	PAGE_COPY
-#define __P011	PAGE_COPY
-#define __P100	PAGE_READONLY
-#define __P101	PAGE_READONLY
-#define __P110	PAGE_COPY
-#define __P111	PAGE_COPY
-
-#define __S000	PAGE_NONE
-#define __S001	PAGE_READONLY
-#define __S010	PAGE_SHARED
-#define __S011	PAGE_SHARED
-#define __S100	PAGE_READONLY
-#define __S101	PAGE_READONLY
-#define __S110	PAGE_SHARED
-#define __S111	PAGE_SHARED
 
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
@@ -99,20 +80,24 @@ extern unsigned long end_iomem;
  */
 #define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
 
-#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE))
+#define pte_clear(mm, addr, xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEEDSYNC))
 
-#define pmd_none(x)	(!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE))
+#define pmd_none(x)	(!((unsigned long)pmd_val(x) & ~_PAGE_NEEDSYNC))
 #define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
 
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
-#define pmd_clear(xp)	do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0)
+#define pmd_clear(xp)	do { pmd_val(*(xp)) = _PAGE_NEEDSYNC; } while (0)
 
-#define pmd_newpage(x)  (pmd_val(x) & _PAGE_NEWPAGE)
-#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE)
+#define pmd_needsync(x)   (pmd_val(x) & _PAGE_NEEDSYNC)
+#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEEDSYNC)
 
-#define pud_newpage(x)  (pud_val(x) & _PAGE_NEWPAGE)
-#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE)
+#define pud_needsync(x)   (pud_val(x) & _PAGE_NEEDSYNC)
+#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEEDSYNC)
 
+#define p4d_needsync(x)   (p4d_val(x) & _PAGE_NEEDSYNC)
+#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEEDSYNC)
+
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
 #define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
 
 #define pte_page(x) pfn_to_page(pte_pfn(x))
@@ -135,7 +120,7 @@ static inline int pte_none(pte_t pte)
  * Undefined behaviour if not..
  */
 static inline int pte_read(pte_t pte)
-{ 
+{
 	return((pte_get_bits(pte, _PAGE_USER)) &&
 	       !(pte_get_bits(pte, _PAGE_PROTNONE)));
 }
@@ -151,14 +136,6 @@ static inline int pte_write(pte_t pte)
 	       !(pte_get_bits(pte, _PAGE_PROTNONE)));
 }
 
-/*
- * The following only works if pte_present() is not true.
- */
-static inline int pte_file(pte_t pte)
-{
-	return pte_get_bits(pte, _PAGE_FILE);
-}
-
 static inline int pte_dirty(pte_t pte)
 {
 	return pte_get_bits(pte, _PAGE_DIRTY);
@@ -169,19 +146,9 @@ static inline int pte_young(pte_t pte)
 	return pte_get_bits(pte, _PAGE_ACCESSED);
 }
 
-static inline int pte_newpage(pte_t pte)
+static inline int pte_needsync(pte_t pte)
 {
-	return pte_get_bits(pte, _PAGE_NEWPAGE);
-}
-
-static inline int pte_newprot(pte_t pte)
-{ 
-	return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT)));
-}
-
-static inline int pte_special(pte_t pte)
-{
-	return 0;
+	return pte_get_bits(pte, _PAGE_NEEDSYNC);
 }
 
 /*
@@ -190,38 +157,32 @@ static inline int pte_special(pte_t pte)
  * =================================
  */
 
-static inline pte_t pte_mknewprot(pte_t pte)
-{
-	pte_set_bits(pte, _PAGE_NEWPROT);
-	return(pte);
-}
-
 static inline pte_t pte_mkclean(pte_t pte)
 {
 	pte_clear_bits(pte, _PAGE_DIRTY);
 	return(pte);
 }
 
-static inline pte_t pte_mkold(pte_t pte)	
-{ 
+static inline pte_t pte_mkold(pte_t pte)
+{
 	pte_clear_bits(pte, _PAGE_ACCESSED);
 	return(pte);
 }
 
 static inline pte_t pte_wrprotect(pte_t pte)
-{ 
+{
 	pte_clear_bits(pte, _PAGE_RW);
-	return(pte_mknewprot(pte)); 
+	return pte;
 }
 
 static inline pte_t pte_mkread(pte_t pte)
-{ 
+{
 	pte_set_bits(pte, _PAGE_USER);
-	return(pte_mknewprot(pte)); 
+	return pte;
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
-{ 
+{
 	pte_set_bits(pte, _PAGE_DIRTY);
 	return(pte);
 }
@@ -232,28 +193,21 @@ static inline pte_t pte_mkyoung(pte_t pte)
 	return(pte);
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)	
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte_set_bits(pte, _PAGE_RW);
-	return(pte_mknewprot(pte)); 
+	return pte;
 }
 
-static inline pte_t pte_mkuptodate(pte_t pte)	
+static inline pte_t pte_mkuptodate(pte_t pte)
 {
-	pte_clear_bits(pte, _PAGE_NEWPAGE);
-	if(pte_present(pte))
-		pte_clear_bits(pte, _PAGE_NEWPROT);
-	return(pte); 
+	pte_clear_bits(pte, _PAGE_NEEDSYNC);
+	return pte;
 }
 
-static inline pte_t pte_mknewpage(pte_t pte)
-{
-	pte_set_bits(pte, _PAGE_NEWPAGE);
-	return(pte);
-}
-
-static inline pte_t pte_mkspecial(pte_t pte)
+static inline pte_t pte_mkneedsync(pte_t pte)
 {
+	pte_set_bits(pte, _PAGE_NEEDSYNC);
 	return(pte);
 }
 
@@ -261,115 +215,124 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
 {
 	pte_copy(*pteptr, pteval);
 
-	/* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so
-	 * fix_range knows to unmap it.  _PAGE_NEWPROT is specific to
-	 * mapped pages.
+	/* If it's a swap entry, it needs to be marked _PAGE_NEEDSYNC so
+	 * update_pte_range knows to unmap it.
 	 */
 
-	*pteptr = pte_mknewpage(*pteptr);
-	if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr);
+	*pteptr = pte_mkneedsync(*pteptr);
+}
+
+#define PFN_PTE_SHIFT		PAGE_SHIFT
+
+static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start,
+				    unsigned long end)
+{
+	guard(spinlock_irqsave)(&mm->context.sync_tlb_lock);
+
+	if (!mm->context.sync_tlb_range_to) {
+		mm->context.sync_tlb_range_from = start;
+		mm->context.sync_tlb_range_to = end;
+	} else {
+		if (start < mm->context.sync_tlb_range_from)
+			mm->context.sync_tlb_range_from = start;
+		if (end > mm->context.sync_tlb_range_to)
+			mm->context.sync_tlb_range_to = end;
+	}
+}
+
+#define set_ptes set_ptes
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, pte_t pte, int nr)
+{
+	/* Basically the default implementation */
+	size_t length = nr * PAGE_SIZE;
+
+	for (;;) {
+		set_pte(ptep, pte);
+		if (--nr == 0)
+			break;
+		ptep++;
+		pte = __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
+	}
+
+	um_tlb_mark_sync(mm, addr, addr + length);
 }
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 
 #define __HAVE_ARCH_PTE_SAME
 static inline int pte_same(pte_t pte_a, pte_t pte_b)
 {
-	return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEWPAGE);
+	return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEEDSYNC);
 }
 
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-
-#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys))
 #define __virt_to_page(virt) phys_to_page(__pa(virt))
-#define page_to_phys(page) pfn_to_phys((pfn_t) page_to_pfn(page))
 #define virt_to_page(addr) __virt_to_page((const unsigned long) addr)
 
-#define mk_pte(page, pgprot) \
-	({ pte_t pte;					\
-							\
-	pte_set_val(pte, page_to_phys(page), (pgprot));	\
-	if (pte_present(pte))				\
-		pte_mknewprot(pte_mknewpage(pte));	\
-	pte;})
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	pte_t pte;
+
+	pte_set_val(pte, pfn_to_phys(pfn), pgprot);
+
+	return pte;
+}
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	pte_set_val(pte, (pte_val(pte) & _PAGE_CHG_MASK), newprot);
-	return pte; 
+	return pte;
 }
 
 /*
- * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
- *
- * this macro returns the index of the entry in the pgd page which would
- * control the given virtual address
- */
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
-
-/*
- * pgd_offset() returns a (pgd_t *)
- * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
- */
-#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
-
-/*
- * a shortcut which implies the use of the kernel's pgd, instead
- * of a process's
- */
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-
-/*
  * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
  *
  * this macro returns the index of the entry in the pmd page which would
  * control the given virtual address
  */
 #define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-
-#define pmd_page_vaddr(pmd) \
-	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
-/*
- * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
- *
- * this macro returns the index of the entry in the pte page which would
- * control the given virtual address
- */
-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir, address) \
-	((pte_t *) pmd_page_vaddr(*(dir)) +  pte_index(address))
-#define pte_offset_map(dir, address) \
-	((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
-#define pte_unmap(pte) do { } while (0)
 
 struct mm_struct;
 extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
 
-#define update_mmu_cache(vma,address,ptep) do ; while (0)
+#define update_mmu_cache(vma,address,ptep) do {} while (0)
+#define update_mmu_cache_range(vmf, vma, address, ptep, nr) do {} while (0)
 
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *   <--------------- offset ----------------> E < type -> 0 0 0 1 0
+ *
+ *   E is the exclusive marker that is not stored in swap entries.
+ *   _PAGE_NEEDSYNC (bit 1) is always set to 1 in set_pte().
+ */
 #define __swp_type(x)			(((x).val >> 5) & 0x1f)
 #define __swp_offset(x)			((x).val >> 11)
 
 #define __swp_entry(type, offset) \
-	((swp_entry_t) { ((type) << 5) | ((offset) << 11) })
+	((swp_entry_t) { (((type) & 0x1f) << 5) | ((offset) << 11) })
 #define __pte_to_swp_entry(pte) \
 	((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val })
 
-#define kern_addr_valid(addr) (1)
+static inline bool pte_swp_exclusive(pte_t pte)
+{
+	return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE);
+}
 
-#include <asm-generic/pgtable.h>
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+	pte_set_bits(pte, _PAGE_SWP_EXCLUSIVE);
+	return pte;
+}
 
-/* Clear a kernel PTE and flush it from the TLB */
-#define kpte_clear_flush(ptep, vaddr)		\
-do {						\
-	pte_clear(&init_mm, (vaddr), (ptep));	\
-	__flush_tlb_one((vaddr));		\
-} while (0)
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+	pte_clear_bits(pte, _PAGE_SWP_EXCLUSIVE);
+	return pte;
+}
 
 #endif
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index c03cd5a02364..7854d51b6639 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_PROCESSOR_GENERIC_H
@@ -11,65 +11,40 @@ struct pt_regs;
 struct task_struct;
 
 #include <asm/ptrace.h>
-#include <registers.h>
 #include <sysdep/archsetjmp.h>
 
 #include <linux/prefetch.h>
 
+#include <asm/cpufeatures.h>
+
 struct mm_struct;
 
 struct thread_struct {
-	struct task_struct *saved_task;
-	struct pt_regs regs;
-	int singlestep_syscall;
-	void *fault_addr;
-	jmp_buf *fault_catcher;
+	struct pt_regs *segv_regs;
 	struct task_struct *prev_sched;
-	unsigned long temp_stack;
 	struct arch_thread arch;
 	jmp_buf switch_buf;
-	int mm_count;
 	struct {
-		int op;
-		union {
-			struct {
-				int pid;
-			} fork, exec;
-			struct {
-				int (*proc)(void *);
-				void *arg;
-			} thread;
-			struct {
-				void (*proc)(void *);
-				void *arg;
-			} cb;
-		} u;
+		struct {
+			int (*proc)(void *);
+			void *arg;
+		} thread;
 	} request;
+
+	void *segv_continue;
+
+	/* Contains variable sized FP registers */
+	struct pt_regs regs;
 };
 
 #define INIT_THREAD \
 { \
 	.regs		   	= EMPTY_REGS,	\
-	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
-	.temp_stack		= 0, \
 	.arch			= INIT_ARCH_THREAD, \
-	.request		= { 0 } \
-}
-
-static inline void release_thread(struct task_struct *task)
-{
+	.request		= { } \
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
-static inline void mm_copy_segments(struct mm_struct *from_mm,
-				    struct mm_struct *new_mm)
-{
-}
-
-#define init_stack	(init_thread_union.stack)
-
 /*
  * User space process size: 3GB (default).
  */
@@ -96,23 +71,18 @@ extern void start_thread(struct pt_regs *regs, unsigned long entry,
 
 struct cpuinfo_um {
 	unsigned long loops_per_jiffy;
-	int ipi_pipe[2];
+	int cache_alignment;
+	union {
+		__u32		x86_capability[NCAPINTS + NBUGINTS];
+		unsigned long	x86_capability_alignment;
+	};
 };
 
 extern struct cpuinfo_um boot_cpu_data;
 
-#define my_cpu_data		cpu_data[smp_processor_id()]
-
-#ifdef CONFIG_SMP
-extern struct cpuinfo_um cpu_data[];
-#define current_cpu_data cpu_data[smp_processor_id()]
-#else
-#define cpu_data (&boot_cpu_data)
-#define current_cpu_data boot_cpu_data
-#endif
-
+#define cache_line_size()	(boot_cpu_data.cache_alignment)
 
 #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
 
 #endif
diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h
index cb9b3c47ca8e..86d74f9d33cf 100644
--- a/arch/um/include/asm/ptrace-generic.h
+++ b/arch/um/include/asm/ptrace-generic.h
@@ -1,14 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_PTRACE_GENERIC_H
 #define __UM_PTRACE_GENERIC_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
-#include <asm/ptrace-abi.h>
 #include <sysdep/ptrace.h>
 
 struct pt_regs {
@@ -28,6 +27,8 @@ struct pt_regs {
 
 #define instruction_pointer(regs) PT_REGS_IP(regs)
 
+#define PTRACE_OLDSETOPTIONS 21
+
 struct task_struct;
 
 extern long subarch_ptrace(struct task_struct *child, long request,
@@ -35,9 +36,12 @@ extern long subarch_ptrace(struct task_struct *child, long request,
 extern unsigned long getreg(struct task_struct *child, int regno);
 extern int putreg(struct task_struct *child, int regno, unsigned long value);
 
-extern int arch_copy_tls(struct task_struct *new);
+extern int poke_user(struct task_struct *child, long addr, long data);
+extern int peek_user(struct task_struct *child, long addr, long data);
+
+extern int arch_set_tls(struct task_struct *new, unsigned long tls);
 extern void clear_flushed_tls(struct task_struct *task);
-extern void syscall_trace_enter(struct pt_regs *regs);
+extern int syscall_trace_enter(struct pt_regs *regs);
 extern void syscall_trace_leave(struct pt_regs *regs);
 
 #endif
diff --git a/arch/um/include/asm/sections.h b/arch/um/include/asm/sections.h
new file mode 100644
index 000000000000..a3c1fb6ed6ad
--- /dev/null
+++ b/arch/um/include/asm/sections.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_SECTIONS_H
+#define __UM_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char __binary_start[];
+extern char __syscall_stub_start[], __syscall_stub_end[];
+
+#endif
diff --git a/arch/um/include/asm/setup.h b/arch/um/include/asm/setup.h
index 99f086301f4c..80ada899f254 100644
--- a/arch/um/include/asm/setup.h
+++ b/arch/um/include/asm/setup.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef SETUP_H_INCLUDED
 #define SETUP_H_INCLUDED
 
diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h
index e4507938d8cf..be1743a6ff3c 100644
--- a/arch/um/include/asm/smp.h
+++ b/arch/um/include/asm/smp.h
@@ -1,32 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __UM_SMP_H
 #define __UM_SMP_H
 
-#ifdef CONFIG_SMP
+#if IS_ENABLED(CONFIG_SMP)
 
-#include <linux/bitops.h>
-#include <asm/current.h>
 #include <linux/cpumask.h>
+#include <shared/smp.h>
 
-#define raw_smp_processor_id() (current_thread->cpu)
+#define raw_smp_processor_id() uml_curr_cpu()
 
-#define cpu_logical_map(n) (n)
-#define cpu_number_map(n) (n)
-extern int hard_smp_processor_id(void);
-#define NO_PROC_ID -1
+void arch_smp_send_reschedule(int cpu);
 
-extern int ncpus;
+void arch_send_call_function_single_ipi(int cpu);
 
+void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
-static inline void smp_cpus_done(unsigned int maxcpus)
-{
-}
-
-extern struct task_struct *idle_threads[NR_CPUS];
-
-#else
-
-#define hard_smp_processor_id()		0
-
-#endif
+#endif /* CONFIG_SMP */
 
 #endif
diff --git a/arch/um/include/asm/stacktrace.h b/arch/um/include/asm/stacktrace.h
new file mode 100644
index 000000000000..436b55952c3a
--- /dev/null
+++ b/arch/um/include/asm/stacktrace.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UML_STACKTRACE_H
+#define _ASM_UML_STACKTRACE_H
+
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+
+struct stack_frame {
+	struct stack_frame *next_frame;
+	unsigned long return_address;
+};
+
+struct stacktrace_ops {
+	void (*address)(void *data, unsigned long address, int reliable);
+};
+
+#ifdef CONFIG_FRAME_POINTER
+static inline unsigned long
+get_frame_pointer(struct task_struct *task, struct pt_regs *segv_regs)
+{
+	if (!task || task == current)
+		return segv_regs ? PT_REGS_BP(segv_regs) : current_bp();
+	return KSTK_EBP(task);
+}
+#else
+static inline unsigned long
+get_frame_pointer(struct task_struct *task, struct pt_regs *segv_regs)
+{
+	return 0;
+}
+#endif
+
+static inline unsigned long
+*get_stack_pointer(struct task_struct *task, struct pt_regs *segv_regs)
+{
+	if (!task || task == current)
+		return segv_regs ? (unsigned long *)PT_REGS_SP(segv_regs) : current_sp();
+	return (unsigned long *)KSTK_ESP(task);
+}
+
+void dump_trace(struct task_struct *tsk, const struct stacktrace_ops *ops, void *data);
+
+#endif /* _ASM_UML_STACKTRACE_H */
diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h
new file mode 100644
index 000000000000..bcd73bcfe577
--- /dev/null
+++ b/arch/um/include/asm/syscall-generic.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Access to user system call parameters and results
+ *
+ * See asm-generic/syscall.h for function descriptions.
+ *
+ * Copyright (C) 2015 Mickaël Salaün <mic@digikod.net>
+ */
+
+#ifndef __UM_SYSCALL_GENERIC_H
+#define __UM_SYSCALL_GENERIC_H
+
+#include <asm/ptrace.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <sysdep/ptrace.h>
+
+static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
+{
+
+	return PT_REGS_SYSCALL_NR(regs);
+}
+
+static inline void syscall_set_nr(struct task_struct *task, struct pt_regs *regs, int nr)
+{
+	PT_REGS_SYSCALL_NR(regs) = nr;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* do nothing */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	const long error = regs_return_value(regs);
+
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs_return_value(regs);
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	PT_REGS_SET_SYSCALL_RETURN(regs, (long) error ?: val);
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long *args)
+{
+	const struct uml_pt_regs *r = &regs->regs;
+
+	*args++ = UPT_SYSCALL_ARG1(r);
+	*args++ = UPT_SYSCALL_ARG2(r);
+	*args++ = UPT_SYSCALL_ARG3(r);
+	*args++ = UPT_SYSCALL_ARG4(r);
+	*args++ = UPT_SYSCALL_ARG5(r);
+	*args   = UPT_SYSCALL_ARG6(r);
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 const unsigned long *args)
+{
+	struct uml_pt_regs *r = &regs->regs;
+
+	UPT_SYSCALL_ARG1(r) = *args++;
+	UPT_SYSCALL_ARG2(r) = *args++;
+	UPT_SYSCALL_ARG3(r) = *args++;
+	UPT_SYSCALL_ARG4(r) = *args++;
+	UPT_SYSCALL_ARG5(r) = *args++;
+	UPT_SYSCALL_ARG6(r) = *args;
+}
+
+/* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */
+
+#endif	/* __UM_SYSCALL_GENERIC_H */
diff --git a/arch/um/include/asm/sysrq.h b/arch/um/include/asm/sysrq.h
deleted file mode 100644
index c8d332b56b98..000000000000
--- a/arch/um/include/asm/sysrq.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __UM_SYSRQ_H
-#define __UM_SYSRQ_H
-
-struct task_struct;
-extern void show_trace(struct task_struct* task, unsigned long *stack);
-
-#endif
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index 2c8eeb2df8b4..7a6f4dc99fa1 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -1,80 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_THREAD_INFO_H
 #define __UM_THREAD_INFO_H
 
-#ifndef __ASSEMBLY__
+#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER
+#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE)
+
+#ifndef __ASSEMBLER__
 
 #include <asm/types.h>
 #include <asm/page.h>
-#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <sysdep/ptrace_user.h>
 
 struct thread_info {
-	struct task_struct	*task;		/* main task structure */
-	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
 	__u32			cpu;		/* current CPU */
 	int			preempt_count;  /* 0 => preemptable,
 						   <0 => BUG */
-	mm_segment_t		addr_limit;	/* thread address space:
-					 	   0-0xBFFFFFFF for user
-						   0-0xFFFFFFFF for kernel */
-	struct restart_block    restart_block;
-	struct thread_info	*real_thread;    /* Points to non-IRQ stack */
 };
 
 #define INIT_THREAD_INFO(tsk)			\
 {						\
-	.task =		&tsk,			\
-	.exec_domain =	&default_exec_domain,	\
 	.flags =		0,		\
 	.cpu =		0,			\
 	.preempt_count = INIT_PREEMPT_COUNT,	\
-	.addr_limit =	KERNEL_DS,		\
-	.restart_block =  {			\
-		.fn =  do_no_restart_syscall,	\
-	},					\
-	.real_thread = NULL,			\
 }
 
-#define init_thread_info	(init_thread_union.thread_info)
-#define init_stack		(init_thread_union.stack)
-
-#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE)
-/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
-{
-	struct thread_info *ti;
-	unsigned long mask = THREAD_SIZE - 1;
-	void *p;
-
-	asm volatile ("" : "=r" (p) : "0" (&ti));
-	ti = (struct thread_info *) (((unsigned long)p) & ~mask);
-	return ti;
-}
-
-#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER
-
 #endif
 
-#define PREEMPT_ACTIVE		0x10000000
-
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_NOTIFY_SIGNAL	3	/* signal notifications exist */
 #define TIF_RESTART_BLOCK	4
 #define TIF_MEMDIE		5	/* is terminating due to OOM killer */
 #define TIF_SYSCALL_AUDIT	6
 #define TIF_RESTORE_SIGMASK	7
 #define TIF_NOTIFY_RESUME	8
+#define TIF_SECCOMP		9	/* secure computing */
+#define TIF_SINGLESTEP		10	/* single stepping userspace */
+#define TIF_SYSCALL_TRACEPOINT	11	/* syscall tracepoint instrumentation */
+
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
+
+#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
+				 _TIF_NOTIFY_RESUME)
 
 #endif
diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h
index 0f4ada08f748..9f27176adb26 100644
--- a/arch/um/include/asm/timex.h
+++ b/arch/um/include/asm/timex.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __UM_TIMEX_H
 #define __UM_TIMEX_H
 
-typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
-	return 0;
-}
-
 #define CLOCK_TICK_RATE (HZ)
 
+#include <asm-generic/timex.h>
+
 #endif
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 4febacd1a8a1..0422467bda5b 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -1,120 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __UM_TLB_H
 #define __UM_TLB_H
 
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/percpu.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
-/* struct mmu_gather is an opaque type used by the mm code for passing around
- * any data needed by arch specific code for tlb_remove_page.
- */
-struct mmu_gather {
-	struct mm_struct	*mm;
-	unsigned int		need_flush; /* Really unmapped some ptes? */
-	unsigned long		start;
-	unsigned long		end;
-	unsigned int		fullmm; /* non-zero means full mm flush */
-};
-
-static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
-					  unsigned long address)
-{
-	if (tlb->start > address)
-		tlb->start = address;
-	if (tlb->end < address + PAGE_SIZE)
-		tlb->end = address + PAGE_SIZE;
-}
-
-static inline void init_tlb_gather(struct mmu_gather *tlb)
-{
-	tlb->need_flush = 0;
-
-	tlb->start = TASK_SIZE;
-	tlb->end = 0;
-
-	if (tlb->fullmm) {
-		tlb->start = 0;
-		tlb->end = TASK_SIZE;
-	}
-}
-
-static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
-{
-	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
-
-	init_tlb_gather(tlb);
-}
-
-extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-			       unsigned long end);
-
-static inline void
-tlb_flush_mmu(struct mmu_gather *tlb)
-{
-	if (!tlb->need_flush)
-		return;
+#include <linux/mm.h>
 
-	flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
-	init_tlb_gather(tlb);
-}
-
-/* tlb_finish_mmu
- *	Called at the end of the shootdown operation to free up any resources
- *	that were required.
- */
-static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
-	tlb_flush_mmu(tlb);
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-}
-
-/* tlb_remove_page
- *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)),
- *	while handling the additional races in SMP caused by other CPUs
- *	caching valid mappings in their TLBs.
- */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	tlb->need_flush = 1;
-	free_page_and_swap_cache(page);
-	return 1; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	__tlb_remove_page(tlb, page);
-}
-
-/**
- * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
- *
- * Record the fact that pte's were really umapped in ->need_flush, so we can
- * later optimise away the tlb invalidate.   This helps when userspace is
- * unmapping already-unmapped pages, which happens quite a lot.
- */
-#define tlb_remove_tlb_entry(tlb, ptep, address)		\
-	do {							\
-		tlb->need_flush = 1;				\
-		__tlb_remove_tlb_entry(tlb, ptep, address);	\
-	} while (0)
-
-#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
-
-#define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr)
-
-#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
-
-#define tlb_migrate_finish(mm) do {} while (0)
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm-generic/tlb.h>
 
 #endif
diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h
index 614f2c091178..13a3009942be 100644
--- a/arch/um/include/asm/tlbflush.h
+++ b/arch/um/include/asm/tlbflush.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __UM_TLBFLUSH_H
@@ -9,23 +9,51 @@
 #include <linux/mm.h>
 
 /*
- * TLB flushing:
+ * In UML, we need to sync the TLB over by using mmap/munmap syscalls from
+ * the process handling the MM (which can be the kernel itself).
+ *
+ * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes
+ * we catch all PTE transitions where memory that was unusable becomes usable.
+ * While with flush_tlb_* we can track any memory that becomes unusable and
+ * even if a higher layer of the page table was modified.
+ *
+ * So, we simply track updates using both methods and mark the memory area to
+ * be synced later on. The only special case is that flush_tlb_kern_* needs to
+ * be executed immediately as there is no good synchronization point in that
+ * case. In contrast, in the set_ptes case we can wait for the next kernel
+ * segfault before we do the synchornization.
  *
- *  - flush_tlb() flushes the current mm struct TLBs
  *  - flush_tlb_all() flushes all processes TLBs
  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
  *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_kernel_vm() flushes the kernel vm area
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
  */
 
+extern int um_tlb_sync(struct mm_struct *mm);
+
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 
-			    unsigned long end);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address);
-extern void flush_tlb_kernel_vm(void);
-extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-extern void __flush_tlb_one(unsigned long addr);
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long address)
+{
+	um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	um_tlb_mark_sync(vma->vm_mm, start, end);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	um_tlb_mark_sync(&init_mm, start, end);
+
+	/* Kernel needs to be synced immediately */
+	um_tlb_sync(&init_mm);
+}
 
 #endif
diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h
index 3f22fbf7ca1d..0df9ea4abda8 100644
--- a/arch/um/include/asm/uaccess.h
+++ b/arch/um/include/asm/uaccess.h
@@ -1,178 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
+ * Copyright (C) 2015 Richard Weinberger (richard@nod.at)
  */
 
 #ifndef __UM_UACCESS_H
 #define __UM_UACCESS_H
 
-/* thread_info has a mm_segment_t in it, so put the definition up here */
-typedef struct {
-	unsigned long seg;
-} mm_segment_t;
-
-#include <linux/thread_info.h>
-#include <linux/errno.h>
-#include <asm/processor.h>
 #include <asm/elf.h>
-
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
-
-/*
- * The fs value determines whether argument validity checking should be
- * performed or not.  If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- */
-
-#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
-
-#define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
-#define USER_DS		MAKE_MM_SEG(TASK_SIZE)
-
-#define get_ds()	(KERNEL_DS)
-#define get_fs()	(current_thread_info()->addr_limit)
-#define set_fs(x)	(current_thread_info()->addr_limit = (x))
-
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#include <linux/unaligned.h>
+#include <sysdep/faultinfo.h>
 
 #define __under_task_size(addr, size) \
 	(((unsigned long) (addr) < TASK_SIZE) && \
 	 (((unsigned long) (addr) + (size)) < TASK_SIZE))
 
-#define __access_ok_vsyscall(type, addr, size) \
-	 ((type == VERIFY_READ) && \
-	  ((unsigned long) (addr) >= FIXADDR_USER_START) && \
-	  ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \
-	  ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))
-
 #define __addr_range_nowrap(addr, size) \
 	((unsigned long) (addr) <= ((unsigned long) (addr) + (size)))
 
-#define access_ok(type, addr, size) \
-	(__addr_range_nowrap(addr, size) && \
-	 (__under_task_size(addr, size) || \
-	  __access_ok_vsyscall(type, addr, size) || \
-	  segment_eq(get_fs(), KERNEL_DS)))
-
-extern int copy_from_user(void *to, const void __user *from, int n);
-extern int copy_to_user(void __user *to, const void *from, int n);
-
-/*
- * strncpy_from_user: - Copy a NUL terminated string from userspace.
- * @dst:   Destination address, in kernel space.  This buffer must be at
- *         least @count bytes long.
- * @src:   Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-
-extern int strncpy_from_user(char *dst, const char __user *src, int count);
+extern unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n);
+extern unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+extern unsigned long __clear_user(void __user *mem, unsigned long len);
+static inline int __access_ok(const void __user *ptr, unsigned long size);
 
-/*
- * __clear_user: - Zero a block of memory in user space, with less checking.
- * @to:   Destination address, in user space.
- * @n:    Number of bytes to zero.
- *
- * Zero a block of memory in user space.  Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be cleared.
- * On success, this will be zero.
- */
-extern int __clear_user(void __user *mem, int len);
-
-/*
- * clear_user: - Zero a block of memory in user space.
- * @to:   Destination address, in user space.
- * @n:    Number of bytes to zero.
- *
- * Zero a block of memory in user space.
- *
- * Returns number of bytes that could not be cleared.
- * On success, this will be zero.
- */
-extern int clear_user(void __user *mem, int len);
+/* Teach asm-generic/uaccess.h that we have C functions for these. */
+#define __access_ok __access_ok
+#define __clear_user __clear_user
 
-/*
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- * @n:   The maximum valid length
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- * If the string is too long, returns a value greater than @n.
- */
-extern int strnlen_user(const void __user *str, int len);
+#define INLINE_COPY_FROM_USER
+#define INLINE_COPY_TO_USER
 
-#define __copy_from_user(to, from, n) copy_from_user(to, from, n)
+#include <asm-generic/uaccess.h>
 
-#define __copy_to_user(to, from, n) copy_to_user(to, from, n)
-
-#define __copy_to_user_inatomic __copy_to_user
-#define __copy_from_user_inatomic __copy_from_user
-
-#define __get_user(x, ptr) \
-({ \
-	const __typeof__(*(ptr)) __user *__private_ptr = (ptr);	\
-	__typeof__(x) __private_val;			\
-	int __private_ret = -EFAULT;			\
-	(x) = (__typeof__(*(__private_ptr)))0;				\
-	if (__copy_from_user((__force void *)&__private_val, (__private_ptr),\
-			     sizeof(*(__private_ptr))) == 0) {		\
-		(x) = (__typeof__(*(__private_ptr))) __private_val;	\
-		__private_ret = 0;					\
-	}								\
-	__private_ret;							\
-}) 
-
-#define get_user(x, ptr) \
-({ \
-        const __typeof__((*(ptr))) __user *private_ptr = (ptr); \
-        (access_ok(VERIFY_READ, private_ptr, sizeof(*private_ptr)) ? \
-	 __get_user(x, private_ptr) : ((x) = (__typeof__(*ptr))0, -EFAULT)); \
-})
-
-#define __put_user(x, ptr) \
-({ \
-        __typeof__(*(ptr)) __user *__private_ptr = ptr; \
-        __typeof__(*(__private_ptr)) __private_val; \
-        int __private_ret = -EFAULT; \
-        __private_val = (__typeof__(*(__private_ptr))) (x); \
-        if (__copy_to_user((__private_ptr), &__private_val, \
-			   sizeof(*(__private_ptr))) == 0) { \
-		__private_ret = 0; \
-	} \
-        __private_ret; \
-})
-
-#define put_user(x, ptr) \
-({ \
-        __typeof__(*(ptr)) __user *private_ptr = (ptr); \
-        (access_ok(VERIFY_WRITE, private_ptr, sizeof(*private_ptr)) ? \
-	 __put_user(x, private_ptr) : -EFAULT); \
-})
-
-#define strlen_user(str) strnlen_user(str, ~0U >> 1)
-
-struct exception_table_entry
+static inline int __access_ok(const void __user *ptr, unsigned long size)
 {
-        unsigned long insn;
-	unsigned long fixup;
-};
+	unsigned long addr = (unsigned long)ptr;
+	return __addr_range_nowrap(addr, size) && __under_task_size(addr, size);
+}
+
+#define __get_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	int __faulted;							\
+									\
+	___backtrack_faulted(__faulted);				\
+	if (__faulted) {						\
+		*((type *)dst) = (type) 0;				\
+		goto err_label;						\
+	}								\
+	*((type *)dst) = get_unaligned((type *)(src));			\
+	barrier();							\
+	current->thread.segv_continue = NULL;				\
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	int __faulted;							\
+									\
+	___backtrack_faulted(__faulted);				\
+	if (__faulted)							\
+		goto err_label;						\
+	put_unaligned(*((type *)src), (type *)(dst));			\
+	barrier();							\
+	current->thread.segv_continue = NULL;				\
+} while (0)
 
 #endif
diff --git a/arch/um/include/asm/unwind.h b/arch/um/include/asm/unwind.h
new file mode 100644
index 000000000000..7ffa5437b761
--- /dev/null
+++ b/arch/um/include/asm/unwind.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_UML_UNWIND_H
+#define _ASM_UML_UNWIND_H
+
+static inline void
+unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
+		   void *orc, size_t orc_size) {}
+
+#endif /* _ASM_UML_UNWIND_H */
diff --git a/arch/um/include/asm/vmalloc.h b/arch/um/include/asm/vmalloc.h
new file mode 100644
index 000000000000..9a7b9ed93733
--- /dev/null
+++ b/arch/um/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_UM_VMALLOC_H
+#define _ASM_UM_VMALLOC_H
+
+#endif /* _ASM_UM_VMALLOC_H */
diff --git a/arch/um/include/asm/vmlinux.lds.h b/arch/um/include/asm/vmlinux.lds.h
new file mode 100644
index 000000000000..149494ae78ea
--- /dev/null
+++ b/arch/um/include/asm/vmlinux.lds.h
@@ -0,0 +1,2 @@
+#include <asm/thread_info.h>
+#include <asm-generic/vmlinux.lds.h>
diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
new file mode 100644
index 000000000000..647fae200c5d
--- /dev/null
+++ b/arch/um/include/asm/xor.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_XOR_H
+#define _ASM_UM_XOR_H
+
+#ifdef CONFIG_64BIT
+#undef CONFIG_X86_32
+#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_sse_pf64))
+#else
+#define CONFIG_X86_32 1
+#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_8regs))
+#endif
+
+#include <asm/cpufeature.h>
+#include <../../x86/include/asm/xor.h>
+#include <linux/time-internal.h>
+
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+#undef XOR_SELECT_TEMPLATE
+/* pick an arbitrary one - measuring isn't possible with inf-cpu */
+#define XOR_SELECT_TEMPLATE(x)	\
+	(time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x)
+#endif
+
+#endif
diff --git a/arch/um/include/linux/smp-internal.h b/arch/um/include/linux/smp-internal.h
new file mode 100644
index 000000000000..1dbcbc23f9c9
--- /dev/null
+++ b/arch/um/include/linux/smp-internal.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_SMP_INTERNAL_H
+#define __UM_SMP_INTERNAL_H
+
+#if IS_ENABLED(CONFIG_SMP)
+
+void prefill_possible_map(void);
+
+#else /* !CONFIG_SMP */
+
+static inline void prefill_possible_map(void) { }
+
+#endif /* CONFIG_SMP */
+
+extern char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE);
+
+#endif /* __UM_SMP_INTERNAL_H */
diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h
new file mode 100644
index 000000000000..c274eb5ad55e
--- /dev/null
+++ b/arch/um/include/linux/time-internal.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 - 2014 Cisco Systems
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+
+#ifndef __TIMER_INTERNAL_H__
+#define __TIMER_INTERNAL_H__
+#include <linux/list.h>
+#include <asm/bug.h>
+#include <shared/timetravel.h>
+
+#define TIMER_MULTIPLIER 256
+#define TIMER_MIN_DELTA  500
+
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+struct time_travel_event {
+	unsigned long long time;
+	void (*fn)(struct time_travel_event *d);
+	struct list_head list;
+	bool pending, onstack;
+};
+
+void time_travel_sleep(void);
+
+static inline void
+time_travel_set_event_fn(struct time_travel_event *e,
+			 void (*fn)(struct time_travel_event *d))
+{
+	e->fn = fn;
+}
+
+void __time_travel_propagate_time(void);
+
+static inline void time_travel_propagate_time(void)
+{
+	if (time_travel_mode == TT_MODE_EXTERNAL)
+		__time_travel_propagate_time();
+}
+
+void __time_travel_wait_readable(int fd);
+
+static inline void time_travel_wait_readable(int fd)
+{
+	if (time_travel_mode == TT_MODE_EXTERNAL)
+		__time_travel_wait_readable(fd);
+}
+
+void time_travel_add_irq_event(struct time_travel_event *e);
+void time_travel_add_event_rel(struct time_travel_event *e,
+			       unsigned long long delay_ns);
+bool time_travel_del_event(struct time_travel_event *e);
+#else
+struct time_travel_event {
+};
+
+static inline void time_travel_sleep(void)
+{
+}
+
+/* this is a macro so the event/function need not exist */
+#define time_travel_set_event_fn(e, fn) do {} while (0)
+
+static inline void time_travel_propagate_time(void)
+{
+}
+
+static inline void time_travel_wait_readable(int fd)
+{
+}
+
+static inline void time_travel_add_irq_event(struct time_travel_event *e)
+{
+	WARN_ON(1);
+}
+
+/*
+ * not inlines so the data structure need not exist,
+ * cause linker failures
+ */
+extern void time_travel_not_configured(void);
+#define time_travel_add_event_rel(...) time_travel_not_configured()
+#define time_travel_del_event(...) time_travel_not_configured()
+#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
+
+extern unsigned long tt_extra_sched_jiffies;
+
+/*
+ * Without CONFIG_UML_TIME_TRAVEL_SUPPORT this is a linker error if used,
+ * which is intentional since we really shouldn't link it in that case.
+ */
+void time_travel_ndelay(unsigned long nsec);
+
+int um_setup_timer(void);
+
+#endif /* __TIMER_INTERNAL_H__ */
diff --git a/arch/um/include/linux/virtio-uml.h b/arch/um/include/linux/virtio-uml.h
new file mode 100644
index 000000000000..2f652fa90f04
--- /dev/null
+++ b/arch/um/include/linux/virtio-uml.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#ifndef __VIRTIO_UML_H__
+#define __VIRTIO_UML_H__
+
+void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
+				  bool no_vq_suspend);
+
+#endif /* __VIRTIO_UML_H__ */
diff --git a/arch/um/include/shared/aio.h b/arch/um/include/shared/aio.h
deleted file mode 100644
index 423bae9153f8..000000000000
--- a/arch/um/include/shared/aio.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2004 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef AIO_H__
-#define AIO_H__
-
-enum aio_type { AIO_READ, AIO_WRITE, AIO_MMAP };
-
-struct aio_thread_reply {
-	void *data;
-	int err;
-};
-
-struct aio_context {
-	int reply_fd;
-	struct aio_context *next;
-};
-
-#define INIT_AIO_CONTEXT { .reply_fd	= -1, \
-			   .next	= NULL }
-
-extern int submit_aio(enum aio_type type, int fd, char *buf, int len,
-		      unsigned long long offset, int reply_fd,
-                      struct aio_context *aio);
-
-#endif
diff --git a/arch/um/include/shared/arch.h b/arch/um/include/shared/arch.h
index 4f46abda060d..cc398a21ad96 100644
--- a/arch/um/include/shared/arch.h
+++ b/arch/um/include/shared/arch.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __ARCH_H__
@@ -12,4 +12,6 @@ extern void arch_check_bugs(void);
 extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs);
 extern void arch_examine_signal(int sig, struct uml_pt_regs *regs);
 
+void mc_set_rip(void *_mc, void *target);
+
 #endif
diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h
index 694c792bab4e..02ef258e3395 100644
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __START_H__
@@ -20,48 +20,37 @@
  * 'UL' and other type specifiers unilaterally.  We
  * use the following macros to deal with this.
  */
+#define STUB_START stub_start
+#define STUB_CODE STUB_START
+#define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE)
+#define STUB_DATA_PAGES 2
+#define STUB_SIZE ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE)
+#define STUB_END (STUB_START + STUB_SIZE)
 
-#ifdef __ASSEMBLY__
-#define _UML_AC(X, Y)	(Y)
-#else
-#define __UML_AC(X, Y)	(X(Y))
-#define _UML_AC(X, Y)	__UML_AC(X, Y)
-#endif
-
-#define STUB_START _UML_AC(, 0x100000)
-#define STUB_CODE _UML_AC((unsigned long), STUB_START)
-#define STUB_DATA _UML_AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE)
-#define STUB_END _UML_AC((unsigned long), STUB_DATA + UM_KERN_PAGE_SIZE)
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <sysdep/ptrace.h>
 
-struct cpu_task {
-	int pid;
-	void *task;
-};
+struct task_struct;
+extern struct task_struct *cpu_tasks[];
 
-extern struct cpu_task cpu_tasks[];
+extern unsigned long long physmem_size;
 
-extern unsigned long low_physmem;
 extern unsigned long high_physmem;
 extern unsigned long uml_physmem;
 extern unsigned long uml_reserved;
 extern unsigned long end_vm;
 extern unsigned long start_vm;
-extern unsigned long long highmem;
 
-extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end;
-extern unsigned long _unprotected_end;
 extern unsigned long brk_start;
 
-extern unsigned long host_task_size;
+extern unsigned long stub_start;
 
-extern int linux_main(int argc, char **argv);
+extern int linux_main(int argc, char **argv, char **envp);
+extern void uml_finishsetup(void);
 
 struct siginfo;
-extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *);
+extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *, void *);
 
 #endif
 
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
deleted file mode 100644
index c92306809029..000000000000
--- a/arch/um/include/shared/common-offsets.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* for use by sys-$SUBARCH/kernel-offsets.c */
-
-DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
-
-DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE);
-DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK);
-DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT);
-DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
-
-DEFINE(UM_ELF_CLASS, ELF_CLASS);
-DEFINE(UM_ELFCLASS32, ELFCLASS32);
-DEFINE(UM_ELFCLASS64, ELFCLASS64);
-
-DEFINE(UM_NR_CPUS, NR_CPUS);
-
-DEFINE(UM_GFP_KERNEL, GFP_KERNEL);
-DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC);
-
-/* For crypto assembler code. */
-DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
-
-DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
-
-DEFINE(UM_HZ, HZ);
-
-DEFINE(UM_USEC_PER_SEC, USEC_PER_SEC);
-DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
-DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
-
-#ifdef CONFIG_PRINTK
-DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK);
-#endif
-#ifdef CONFIG_NO_HZ_COMMON
-DEFINE(UML_CONFIG_NO_HZ_COMMON, CONFIG_NO_HZ_COMMON);
-#endif
-#ifdef CONFIG_UML_X86
-DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86);
-#endif
-#ifdef CONFIG_64BIT
-DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
-#endif
diff --git a/arch/um/include/shared/elf_user.h b/arch/um/include/shared/elf_user.h
index 53516b637272..fd461ee40c05 100644
--- a/arch/um/include/shared/elf_user.h
+++ b/arch/um/include/shared/elf_user.h
@@ -1,7 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
  * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- * Licensed under the GPL
  */
 
 #ifndef __ELF_USER_H__
diff --git a/arch/um/include/shared/frame_kern.h b/arch/um/include/shared/frame_kern.h
index e584e40ee832..ed952ac661ca 100644
--- a/arch/um/include/shared/frame_kern.h
+++ b/arch/um/include/shared/frame_kern.h
@@ -1,19 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __FRAME_KERN_H_
 #define __FRAME_KERN_H_
 
-extern int setup_signal_stack_sc(unsigned long stack_top, int sig, 
-				 struct k_sigaction *ka,
-				 struct pt_regs *regs, 
-				 sigset_t *mask);
-extern int setup_signal_stack_si(unsigned long stack_top, int sig, 
-				 struct k_sigaction *ka,
-				 struct pt_regs *regs, siginfo_t *info, 
-				 sigset_t *mask);
+extern int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig,
+				 struct pt_regs *regs, sigset_t *mask);
+extern int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
+				 struct pt_regs *regs, sigset_t *mask);
 
 #endif
 
diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h
index b3906f860a87..1a659e2e8cc3 100644
--- a/arch/um/include/shared/init.h
+++ b/arch/um/include/shared/init.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_UML_INIT_H
 #define _LINUX_UML_INIT_H
 
@@ -40,39 +41,19 @@
 typedef int (*initcall_t)(void);
 typedef void (*exitcall_t)(void);
 
-#ifndef __KERNEL__
-#ifndef __section
-# define __section(S) __attribute__ ((__section__(#S)))
-#endif
-
-#if __GNUC__ == 3
-
-#if __GNUC_MINOR__ >= 3
-# define __used			__attribute__((__used__))
-#else
-# define __used			__attribute__((__unused__))
-#endif
-
-#else
-#if __GNUC__ == 4
-# define __used			__attribute__((__used__))
-#endif
-#endif
+#include <linux/compiler_types.h>
 
-#else
-#include <linux/compiler.h>
-#endif
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init		__section(.init.text)
-#define __initdata	__section(.init.data)
-#define __exitdata	__section(.exit.data)
-#define __exit_call	__used __section(.exitcall.exit)
+#define __init		__section(".init.text")
+#define __initdata	__section(".init.data")
+#define __exitdata	__section(".exit.data")
+#define __exit_call	__used __section(".exitcall.exit")
 
 #ifdef MODULE
-#define __exit		__section(.exit.text)
+#define __exit		__section(".exit.text")
 #else
-#define __exit		__used __section(.exit.text)
+#define __exit		__used __section(".exit.text")
 #endif
 
 #endif
@@ -83,14 +64,10 @@ struct uml_param {
         int (*setup_func)(char *, int *);
 };
 
-extern initcall_t __uml_initcall_start, __uml_initcall_end;
 extern initcall_t __uml_postsetup_start, __uml_postsetup_end;
 extern const char *__uml_help_start, *__uml_help_end;
 #endif
 
-#define __uml_initcall(fn)					  	\
-	static initcall_t __uml_initcall_##fn __uml_init_call = fn
-
 #define __uml_exitcall(fn)						\
 	static exitcall_t __uml_exitcall_##fn __uml_exit_call = fn
 
@@ -125,13 +102,12 @@ extern struct uml_param __uml_setup_start, __uml_setup_end;
  * Mark functions and data as being only used at initialization
  * or exit time.
  */
-#define __uml_init_setup	__used __section(.uml.setup.init)
-#define __uml_setup_help	__used __section(.uml.help.init)
-#define __uml_init_call		__used __section(.uml.initcall.init)
-#define __uml_postsetup_call	__used __section(.uml.postsetup.init)
-#define __uml_exit_call		__used __section(.uml.exitcall.exit)
+#define __uml_init_setup	__used __section(".uml.setup.init")
+#define __uml_setup_help	__used __section(".uml.help.init")
+#define __uml_postsetup_call	__used __section(".uml.postsetup.init")
+#define __uml_exit_call		__used __section(".uml.exitcall.exit")
 
-#ifndef __KERNEL__
+#ifdef __UM_HOST__
 
 #define __define_initcall(level,fn) \
 	static initcall_t __initcall_##fn __used \
@@ -144,7 +120,7 @@ extern struct uml_param __uml_setup_start, __uml_setup_end;
 
 #define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define __init_call	__used __section(.initcall.init)
+#define __init_call	__used __section(".initcall.init")
 
 #endif
 
diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h
index e05bd667de15..44357fa6ee29 100644
--- a/arch/um/include/shared/irq_kern.h
+++ b/arch/um/include/shared/irq_kern.h
@@ -1,18 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __IRQ_KERN_H__
 #define __IRQ_KERN_H__
 
 #include <linux/interrupt.h>
+#include <linux/time-internal.h>
 #include <asm/ptrace.h>
+#include "irq_user.h"
 
-extern int um_request_irq(unsigned int irq, int fd, int type,
-			  irq_handler_t handler,
-			  unsigned long irqflags,  const char * devname,
-			  void *dev_id);
-void um_free_irq(unsigned int irq, void *dev);
+#define UM_IRQ_ALLOC	-1
+
+int um_request_irq(int irq, int fd, enum um_irq_type type,
+		   irq_handler_t handler, unsigned long irqflags,
+		   const char *devname, void *dev_id);
+
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+/**
+ * um_request_irq_tt - request an IRQ with timetravel handler
+ *
+ * @irq: the IRQ number, or %UM_IRQ_ALLOC
+ * @fd: The file descriptor to request an IRQ for
+ * @type: read or write
+ * @handler: the (generic style) IRQ handler
+ * @irqflags: Linux IRQ flags
+ * @devname: name for this to show
+ * @dev_id: data pointer to pass to the IRQ handler
+ * @timetravel_handler: the timetravel interrupt handler, invoked with the IRQ
+ *	number, fd, dev_id and time-travel event pointer.
+ *
+ * Returns: The interrupt number assigned or a negative error.
+ *
+ * Note that the timetravel handler is invoked only if the time_travel_mode is
+ * %TT_MODE_EXTERNAL, and then it is invoked even while the system is suspended!
+ * This function must call time_travel_add_irq_event() for the event passed with
+ * an appropriate delay, before sending an ACK on the socket it was invoked for.
+ *
+ * If this was called while the system is suspended, then adding the event will
+ * cause the system to resume.
+ *
+ * Since this function will almost certainly have to handle the FD's condition,
+ * a read will consume the message, and after that it is up to the code using
+ * it to pass such a message to the @handler in whichever way it can.
+ *
+ * If time_travel_mode is not %TT_MODE_EXTERNAL the @timetravel_handler will
+ * not be invoked at all and the @handler must handle the FD becoming
+ * readable (or writable) instead. Use um_irq_timetravel_handler_used() to
+ * distinguish these cases.
+ *
+ * See virtio_uml.c for an example.
+ */
+int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
+		      irq_handler_t handler, unsigned long irqflags,
+		      const char *devname, void *dev_id,
+		      void (*timetravel_handler)(int, int, void *,
+						 struct time_travel_event *));
+#else
+static inline
+int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
+		      irq_handler_t handler, unsigned long irqflags,
+		      const char *devname, void *dev_id,
+		      void (*timetravel_handler)(int, int, void *,
+						 struct time_travel_event *))
+{
+	return um_request_irq(irq, fd, type, handler, irqflags,
+			      devname, dev_id);
+}
 #endif
 
+static inline bool um_irq_timetravel_handler_used(void)
+{
+	return time_travel_mode == TT_MODE_EXTERNAL;
+}
+
+void um_free_irq(int irq, void *dev_id);
+void free_irqs(void);
+#endif
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index df5633053957..746abc24a5d5 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __IRQ_USER_H__
@@ -8,24 +8,20 @@
 
 #include <sysdep/ptrace.h>
 
-struct irq_fd {
-	struct irq_fd *next;
-	void *id;
-	int fd;
-	int type;
-	int irq;
-	int events;
-	int current_events;
+enum um_irq_type {
+	IRQ_READ,
+	IRQ_WRITE,
+	NUM_IRQ_TYPES,
 };
 
-enum { IRQ_READ, IRQ_WRITE };
-
 struct siginfo;
-extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
+extern void sigio_handler(int sig, struct siginfo *unused_si,
+			  struct uml_pt_regs *regs, void *mc);
+extern void sigchld_handler(int sig, struct siginfo *unused_si,
+			   struct uml_pt_regs *regs, void *mc);
+void sigio_run_timetravel_handlers(void);
 extern void free_irq_by_fd(int fd);
-extern void reactivate_fd(int fd, int irqnum);
 extern void deactivate_fd(int fd, int irqnum);
 extern int deactivate_all_fds(void);
-extern int activate_ipi(int fd, int pid);
 
 #endif
diff --git a/arch/um/include/shared/kern.h b/arch/um/include/shared/kern.h
index 6cd01240bbf0..3a9c75a8413c 100644
--- a/arch/um/include/shared/kern.h
+++ b/arch/um/include/shared/kern.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __KERN_H__
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index 83a91f976330..38321188c04c 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __KERN_UTIL_H__
@@ -13,31 +13,30 @@ struct siginfo;
 
 extern int uml_exitcode;
 
-extern int ncpus;
 extern int kmalloc_ok;
 
-#define UML_ROUND_UP(addr) \
-	((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
 extern unsigned long alloc_stack(int order, int atomic);
 extern void free_stack(unsigned long stack, int order);
 
-extern int do_signal(void);
+struct pt_regs;
+extern void do_signal(struct pt_regs *regs);
 extern void interrupt_end(void);
-extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs);
+extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs,
+			 void *mc);
 
 extern unsigned long segv(struct faultinfo fi, unsigned long ip,
-			  int is_user, struct uml_pt_regs *regs);
+			  int is_user, struct uml_pt_regs *regs,
+			  void *mc);
 extern int handle_page_fault(unsigned long address, unsigned long ip,
 			     int is_write, int is_user, int *code_out);
 
 extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs);
-extern int smp_sigio_handler(void);
 extern void initial_thread_cb(void (*proc)(void *), void *arg);
-extern int is_syscall(unsigned long addr);
 
 extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
 
+extern void uml_pm_wake(void);
+
 extern int start_uml(void);
 extern void paging_init(void);
 
@@ -48,22 +47,25 @@ extern void do_uml_exitcalls(void);
  * Are we disallowed to sleep? Used to choose between GFP_KERNEL and
  * GFP_ATOMIC.
  */
-extern int __cant_sleep(void);
+extern int __uml_cant_sleep(void);
 extern int get_current_pid(void);
 extern int copy_from_user_proc(void *to, void *from, int size);
-extern int cpu(void);
 extern char *uml_strdup(const char *string);
+int uml_need_resched(void);
 
 extern unsigned long to_irq_stack(unsigned long *mask_out);
 extern unsigned long from_irq_stack(int nested);
 
-extern void syscall_trace(struct uml_pt_regs *regs, int entryexit);
-extern int singlestepping(void *t);
+extern int singlestepping(void);
 
-extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
-extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs);
-extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
+extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+			 void *mc);
+extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+		  void *mc);
 extern void fatal_sigsegv(void) __attribute__ ((noreturn));
 
+void um_idle_sleep(void);
+
+void kasan_map_memory(void *start, size_t len);
 
 #endif
diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h
index 9bdddf4c405b..c53e43d980c8 100644
--- a/arch/um/include/shared/longjmp.h
+++ b/arch/um/include/shared/longjmp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __UML_LONGJMP_H
 #define __UML_LONGJMP_H
 
@@ -11,13 +12,12 @@ extern void longjmp(jmp_buf, int);
 	longjmp(*buf, val);	\
 } while(0)
 
-#define UML_SETJMP(buf) ({ \
-	int n;	   \
-	volatile int enable;	\
-	enable = get_signals(); \
-	n = setjmp(*buf); \
-	if(n != 0) \
-		set_signals(enable); \
+#define UML_SETJMP(buf) ({				\
+	int n, enable;					\
+	enable = um_get_signals();			\
+	n = setjmp(*buf);				\
+	if(n != 0)					\
+		um_set_signals_trace(enable);		\
 	n; })
 
 #endif
diff --git a/arch/um/include/shared/mem.h b/arch/um/include/shared/mem.h
index 5cd40e99e8d5..98aacd544108 100644
--- a/arch/um/include/shared/mem.h
+++ b/arch/um/include/shared/mem.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __MEM_H__
@@ -9,12 +9,12 @@
 extern int phys_mapping(unsigned long phys, unsigned long long *offset_out);
 
 extern unsigned long uml_physmem;
-static inline unsigned long to_phys(void *virt)
+static inline unsigned long uml_to_phys(void *virt)
 {
 	return(((unsigned long) virt) - uml_physmem);
 }
 
-static inline void *to_virt(unsigned long phys)
+static inline void *uml_to_virt(unsigned long phys)
 {
 	return((void *) uml_physmem + phys);
 }
diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h
index 46384acd547b..8a5b72872ff8 100644
--- a/arch/um/include/shared/mem_user.h
+++ b/arch/um/include/shared/mem_user.h
@@ -32,30 +32,10 @@
 #ifndef _MEM_USER_H
 #define _MEM_USER_H
 
-struct iomem_region {
-	struct iomem_region *next;
-	char *driver;
-	int fd;
-	int size;
-	unsigned long phys;
-	unsigned long virt;
-};
-
-extern struct iomem_region *iomem_regions;
-extern int iomem_size;
-
 #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1))
 
-extern int init_mem_user(void);
-extern void setup_memory(void *entry);
-extern unsigned long find_iomem(char *driver, unsigned long *len_out);
-extern int init_maps(unsigned long physmem, unsigned long iomem,
-		     unsigned long highmem);
-extern unsigned long get_vm(unsigned long len);
 extern void setup_physmem(unsigned long start, unsigned long usable,
-			  unsigned long len, unsigned long long highmem);
-extern void add_iomem(char *name, int fd, unsigned long size);
-extern unsigned long phys_offset(unsigned long phys);
+			  unsigned long len);
 extern void map_memory(unsigned long virt, unsigned long phys,
 		       unsigned long len, int r, int w, int x);
 
diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h
deleted file mode 100644
index 012ac87d4900..000000000000
--- a/arch/um/include/shared/net_kern.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_NET_KERN_H
-#define __UM_NET_KERN_H
-
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/skbuff.h>
-#include <linux/socket.h>
-#include <linux/list.h>
-#include <linux/workqueue.h>
-
-struct uml_net {
-	struct list_head list;
-	struct net_device *dev;
-	struct platform_device pdev;
-	int index;
-};
-
-struct uml_net_private {
-	struct list_head list;
-	spinlock_t lock;
-	struct net_device *dev;
-	struct timer_list tl;
-
-	struct work_struct work;
-	int fd;
-	unsigned char mac[ETH_ALEN];
-	int max_packet;
-	unsigned short (*protocol)(struct sk_buff *);
-	int (*open)(void *);
-	void (*close)(int, void *);
-	void (*remove)(void *);
-	int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
-	int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
-
-	void (*add_address)(unsigned char *, unsigned char *, void *);
-	void (*delete_address)(unsigned char *, unsigned char *, void *);
-	char user[0];
-};
-
-struct net_kern_info {
-	void (*init)(struct net_device *, void *);
-	unsigned short (*protocol)(struct sk_buff *);
-	int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
-	int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
-};
-
-struct transport {
-	struct list_head list;
-	const char *name;
-	int (* const setup)(char *, char **, void *);
-	const struct net_user_info *user;
-	const struct net_kern_info *kern;
-	const int private_size;
-	const int setup_size;
-};
-
-extern struct net_device *ether_init(int);
-extern unsigned short ether_protocol(struct sk_buff *);
-extern int tap_setup_common(char *str, char *type, char **dev_name,
-			    char **mac_out, char **gate_addr);
-extern void register_transport(struct transport *new);
-extern unsigned short eth_protocol(struct sk_buff *skb);
-
-#endif
diff --git a/arch/um/include/shared/net_user.h b/arch/um/include/shared/net_user.h
deleted file mode 100644
index 3dabbe128e40..000000000000
--- a/arch/um/include/shared/net_user.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_NET_USER_H__
-#define __UM_NET_USER_H__
-
-#define ETH_ADDR_LEN (6)
-#define ETH_HEADER_ETHERTAP (16)
-#define ETH_HEADER_OTHER (26) /* 14 for ethernet + VLAN + MPLS for crazy people */
-#define ETH_MAX_PACKET (1500)
-
-#define UML_NET_VERSION (4)
-
-struct net_user_info {
-	int (*init)(void *, void *);
-	int (*open)(void *);
-	void (*close)(int, void *);
-	void (*remove)(void *);
-	void (*add_address)(unsigned char *, unsigned char *, void *);
-	void (*delete_address)(unsigned char *, unsigned char *, void *);
-	int max_packet;
-	int mtu;
-};
-
-extern void ether_user_init(void *data, void *dev);
-extern void iter_addresses(void *d, void (*cb)(unsigned char *,
-					       unsigned char *, void *),
-			   void *arg);
-
-extern void *get_output_buffer(int *len_out);
-extern void free_output_buffer(void *buffer);
-
-extern int tap_open_common(void *dev, char *gate_addr);
-extern void tap_check_ips(char *gate_addr, unsigned char *eth_addr);
-
-extern void read_output(int fd, char *output_out, int len);
-
-extern int net_read(int fd, void *buf, int len);
-extern int net_recvfrom(int fd, void *buf, int len);
-extern int net_write(int fd, void *buf, int len);
-extern int net_send(int fd, void *buf, int len);
-extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len);
-
-extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg);
-extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg);
-
-extern char *split_if_spec(char *str, ...);
-
-extern int dev_netmask(void *d, void *m);
-
-#endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 95feaa47a2fb..b26e94292fc1 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -1,15 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
+ * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __OS_H__
 #define __OS_H__
 
-#include <stdarg.h>
 #include <irq_user.h>
 #include <longjmp.h>
 #include <mm_id.h>
+/* This is to get size_t */
+#ifndef __UM_HOST__
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#endif
 
 #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
 
@@ -34,6 +41,8 @@
 #define OS_LIB_PATH	"/usr/lib/"
 #endif
 
+#define OS_SENDMSG_MAX_FDS 8
+
 /*
  * types taken from stat_file() in hostfs_user.c
  * (if they are wrong here, they are wrong there...).
@@ -134,15 +143,17 @@ extern int os_access(const char *file, int mode);
 extern int os_set_exec_close(int fd);
 extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg);
 extern int os_get_ifname(int fd, char *namebuf);
-extern int os_set_slip(int fd);
 extern int os_mode_fd(int fd, int mode);
 
 extern int os_seek_file(int fd, unsigned long long offset);
 extern int os_open_file(const char *file, struct openflags flags, int mode);
 extern int os_read_file(int fd, void *buf, int len);
 extern int os_write_file(int fd, const void *buf, int count);
+extern int os_sync_file(int fd);
 extern int os_file_size(const char *file, unsigned long long *size_out);
-extern int os_file_modtime(const char *file, unsigned long *modtime);
+extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset);
+extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset);
+extern int os_file_modtime(const char *file, long long *modtime);
 extern int os_pipe(int *fd, int stream, int close_on_exec);
 extern int os_set_fd_async(int fd);
 extern int os_clear_fd_async(int fd);
@@ -150,45 +161,48 @@ extern int os_set_fd_block(int fd, int blocking);
 extern int os_accept_connection(int fd);
 extern int os_create_unix_socket(const char *file, int len, int close_on_exec);
 extern int os_shutdown_socket(int fd, int r, int w);
+extern int os_dup_file(int fd);
 extern void os_close_file(int fd);
-extern int os_rcv_fd(int fd, int *helper_pid_out);
-extern int create_unix_socket(char *file, int len, int close_on_exec);
+ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
+		      void *data, size_t data_len);
 extern int os_connect_socket(const char *name);
 extern int os_file_type(char *file);
 extern int os_file_mode(const char *file, struct openflags *mode_out);
 extern int os_lock_file(int fd, int excl);
 extern void os_flush_stdout(void);
-extern int os_stat_filesystem(char *path, long *bsize_out,
-			      long long *blocks_out, long long *bfree_out,
-			      long long *bavail_out, long long *files_out,
-			      long long *ffree_out, void *fsid_out,
-			      int fsid_size, long *namelen_out,
-			      long *spare_out);
-extern int os_change_dir(char *dir);
-extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
+extern int os_falloc_zeroes(int fd, unsigned long long offset, int count);
+extern int os_eventfd(unsigned int initval, int flags);
+extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
+			  const int *fds, unsigned int fds_num);
+int os_poll(unsigned int n, const int *fds);
+void *os_mmap_rw_shared(int fd, size_t size);
+void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size);
 
 /* start_up.c */
 extern void os_early_checks(void);
-extern void can_do_skas(void);
 extern void os_check_bugs(void);
 extern void check_host_supports_tls(int *supports_tls, int *tls_min);
+extern void get_host_cpu_features(
+	void (*flags_helper_func)(char *line),
+	void (*cache_helper_func)(char *line));
 
 /* mem.c */
 extern int create_mem_file(unsigned long long len);
 
+/* tlb.c */
+extern void report_enomem(void);
+
 /* process.c */
-extern unsigned long os_process_pc(int pid);
-extern int os_process_parent(int pid);
-extern void os_stop_process(int pid);
+pid_t os_reap_child(void);
+extern void os_alarm_process(int pid);
 extern void os_kill_process(int pid, int reap_child);
 extern void os_kill_ptraced_process(int pid, int reap_child);
-extern long os_ptrace_ldt(long pid, long addr, long data);
 
 extern int os_getpid(void);
-extern int os_getpgrp(void);
 
 extern void init_new_thread_signals(void);
 
@@ -199,7 +213,11 @@ extern int os_protect_memory(void *addr, unsigned long len,
 extern int os_unmap_memory(void *addr, int len);
 extern int os_drop_memory(void *addr, int length);
 extern int can_drop_memory(void);
-extern void os_flush_stdout(void);
+
+void os_set_pdeathsig(void);
+
+int os_futex_wait(void *uaddr, unsigned int val);
+int os_futex_wake(void *uaddr);
 
 /* execvp.c */
 extern int execvp_noalloc(char *buf, const char *file, char *const argv[]);
@@ -209,6 +227,11 @@ extern int run_helper_thread(int (*proc)(void *), void *arg,
 			     unsigned int flags, unsigned long *stack_out);
 extern int helper_wait(int pid);
 
+struct os_helper_thread;
+int os_run_helper_thread(struct os_helper_thread **td_out,
+			 void *(*routine)(void *), void *arg);
+void os_kill_helper_thread(struct os_helper_thread *td);
+void os_fix_helper_thread_signals(void);
 
 /* umid.c */
 extern int umid_file_name(char *name, char *buf, int len);
@@ -216,54 +239,60 @@ extern int set_umid(char *name);
 extern char *get_umid(void);
 
 /* signal.c */
-extern void timer_init(void);
+extern void timer_set_signal_handler(void);
 extern void set_sigstack(void *sig_stack, int size);
-extern void remove_sigstack(void);
 extern void set_handler(int sig);
+extern void send_sigio_to_self(void);
 extern int change_sig(int signal, int on);
 extern void block_signals(void);
 extern void unblock_signals(void);
-extern int get_signals(void);
-extern int set_signals(int enable);
+extern int um_get_signals(void);
+extern int um_set_signals(int enable);
+extern int um_set_signals_trace(int enable);
+extern void deliver_alarm(void);
+extern void register_pm_wake_signal(void);
+extern void block_signals_hard(void);
+extern void unblock_signals_hard(void);
+extern void mark_sigio_pending(void);
 
 /* util.c */
 extern void stack_protections(unsigned long address);
 extern int raw(int fd);
 extern void setup_machinename(char *machine_out);
 extern void setup_hostinfo(char *buf, int len);
+extern ssize_t os_getrandom(void *buf, size_t len, unsigned int flags);
 extern void os_dump_core(void) __attribute__ ((noreturn));
 extern void um_early_printk(const char *s, unsigned int n);
+extern void os_fix_helper_signals(void);
+extern void os_info(const char *fmt, ...)
+	__attribute__ ((format (printf, 1, 2)));
+extern void os_warn(const char *fmt, ...)
+	__attribute__ ((format (printf, 1, 2)));
 
 /* time.c */
-extern void idle_sleep(unsigned long long nsecs);
-extern int set_interval(void);
-extern int timer_one_shot(int ticks);
-extern long long disable_timer(void);
-extern void uml_idle_timer(void);
+void os_idle_prepare(void);
+extern void os_idle_sleep(void);
+extern int os_timer_create(void);
+extern int os_timer_set_interval(int cpu, unsigned long long nsecs);
+extern int os_timer_one_shot(int cpu, unsigned long long nsecs);
+extern void os_timer_disable(int cpu);
+extern long long os_persistent_clock_emulation(void);
 extern long long os_nsecs(void);
 
 /* skas/mem.c */
-extern long run_syscall_stub(struct mm_id * mm_idp,
-			     int syscall, unsigned long *args, long expected,
-			     void **addr, int done);
-extern long syscall_stub_data(struct mm_id * mm_idp,
-			      unsigned long *data, int data_count,
-			      void **addr, void **stub_addr);
-extern int map(struct mm_id * mm_idp, unsigned long virt,
-	       unsigned long len, int prot, int phys_fd,
-	       unsigned long long offset, int done, void **data);
-extern int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
-		 int done, void **data);
-extern int protect(struct mm_id * mm_idp, unsigned long addr,
-		   unsigned long len, unsigned int prot, int done, void **data);
+int syscall_stub_flush(struct mm_id *mm_idp);
+struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp);
+void syscall_stub_dump_error(struct mm_id *mm_idp);
+
+int map(struct mm_id *mm_idp, unsigned long virt,
+	unsigned long len, int prot, int phys_fd,
+	unsigned long long offset);
+int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
 
 /* skas/process.c */
 extern int is_skas_winch(int pid, int fd, void *data);
-extern int start_userspace(unsigned long stub_stack);
-extern int copy_context_skas0(unsigned long stack, int pid);
+extern int start_userspace(struct mm_id *mm_id);
 extern void userspace(struct uml_pt_regs *regs);
-extern int map_stub_pages(int fd, unsigned long code, unsigned long data,
-			  unsigned long stack);
 extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
 extern void switch_threads(jmp_buf *me, jmp_buf *you);
 extern int start_idle_thread(void *stack, jmp_buf *switch_buf);
@@ -273,29 +302,58 @@ extern void halt_skas(void);
 extern void reboot_skas(void);
 
 /* irq.c */
-extern int os_waiting_for_events(struct irq_fd *active_fds);
-extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds);
-extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
-extern void os_free_irq_later(struct irq_fd *active_fds,
-		int irq, void *dev_id);
-extern int os_get_pollfd(int i);
-extern void os_set_pollfd(int i, int fd);
+extern int os_waiting_for_events_epoll(void);
+extern void *os_epoll_get_data_pointer(int index);
+extern int os_epoll_triggered(int index, int events);
+extern int os_event_mask(enum um_irq_type irq_type);
+extern int os_setup_epoll(void);
+extern int os_add_epoll_fd(int events, int fd, void *data);
+extern int os_mod_epoll_fd(int events, int fd, void *data);
+extern int os_del_epoll_fd(int fd);
 extern void os_set_ioignore(void);
+extern void os_close_epoll_fd(void);
+extern void um_irqs_suspend(void);
+extern void um_irqs_resume(void);
 
 /* sigio.c */
 extern int add_sigio_fd(int fd);
 extern int ignore_sigio_fd(int fd);
-extern void maybe_sigio_broken(int fd, int read);
-extern void sigio_broken(int fd, int read);
-
-/* sys-x86_64/prctl.c */
-extern int os_arch_prctl(int pid, int code, unsigned long *addr);
+extern void maybe_sigio_broken(int fd);
+extern void sigio_broken(void);
+/*
+ * unlocked versions for IRQ controller code.
+ *
+ * This is safe because it's used at suspend/resume and nothing
+ * else is running.
+ */
+extern int __add_sigio_fd(int fd);
+extern int __ignore_sigio_fd(int fd);
 
 /* tty.c */
 extern int get_pty(void);
 
-/* sys-$ARCH/task_size.c */
-extern unsigned long os_get_top_address(void);
+long syscall(long number, ...);
+
+/* irqflags tracing */
+extern void block_signals_trace(void);
+extern void unblock_signals_trace(void);
+extern void um_trace_signals_on(void);
+extern void um_trace_signals_off(void);
+
+/* time-travel */
+extern void deliver_time_travel_irqs(void);
+
+/* smp.c */
+#if IS_ENABLED(CONFIG_SMP)
+void os_init_smp(void);
+int os_start_cpu_thread(int cpu);
+void os_start_secondary(void *arg, jmp_buf *switch_buf);
+int os_send_ipi(int cpu, int vector);
+void os_local_ipi_enable(void);
+void os_local_ipi_disable(void);
+#else /* !CONFIG_SMP */
+static inline void os_local_ipi_enable(void) { }
+static inline void os_local_ipi_disable(void) { }
+#endif /* CONFIG_SMP */
 
 #endif
diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h
index 56b2f284b108..8a705d8f96ce 100644
--- a/arch/um/include/shared/ptrace_user.h
+++ b/arch/um/include/shared/ptrace_user.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __PTRACE_USER_H__
@@ -12,45 +12,4 @@
 extern int ptrace_getregs(long pid, unsigned long *regs_out);
 extern int ptrace_setregs(long pid, unsigned long *regs_in);
 
-/* syscall emulation path in ptrace */
-
-#ifndef PTRACE_SYSEMU
-#define PTRACE_SYSEMU 31
-#endif
-#ifndef PTRACE_SYSEMU_SINGLESTEP
-#define PTRACE_SYSEMU_SINGLESTEP 32
-#endif
-
-/* On architectures, that started to support PTRACE_O_TRACESYSGOOD
- * in linux 2.4, there are two different definitions of
- * PTRACE_SETOPTIONS: linux 2.4 uses 21 while linux 2.6 uses 0x4200.
- * For binary compatibility, 2.6 also supports the old "21", named
- * PTRACE_OLDSETOPTION. On these architectures, UML always must use
- * "21", to ensure the kernel runs on 2.4 and 2.6 host without
- * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML.
- * We also want to be able to build the kernel on 2.4, which doesn't
- * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare
- * PTRACE_OLDSETOPTIONS to be the same as PTRACE_SETOPTIONS.
- *
- * On architectures, that start to support PTRACE_O_TRACESYSGOOD on
- * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't
- * supported by the host kernel. In that case, our trick lets us use
- * the new 0x4200 with the name PTRACE_OLDSETOPTIONS.
- */
-#ifndef PTRACE_OLDSETOPTIONS
-#define PTRACE_OLDSETOPTIONS PTRACE_SETOPTIONS
-#endif
-
-void set_using_sysemu(int value);
-int get_using_sysemu(void);
-extern int sysemu_supported;
-
-#define SELECT_PTRACE_OPERATION(sysemu_mode, singlestep_mode) \
-	(((int[3][3] ) { \
-		{ PTRACE_SYSCALL, PTRACE_SYSCALL, PTRACE_SINGLESTEP }, \
-		{ PTRACE_SYSEMU, PTRACE_SYSEMU, PTRACE_SINGLESTEP }, \
-		{ PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, \
-		  PTRACE_SYSEMU_SINGLESTEP } }) \
-		[sysemu_mode][singlestep_mode])
-
 #endif
diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h
index f5b76355ad71..7d81b2339a48 100644
--- a/arch/um/include/shared/registers.h
+++ b/arch/um/include/shared/registers.h
@@ -1,23 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2004 PathScale, Inc
- * Licensed under the GPL
  */
 
 #ifndef __REGISTERS_H
 #define __REGISTERS_H
 
 #include <sysdep/ptrace.h>
-#include <sysdep/archsetjmp.h>
 
-extern int save_fp_registers(int pid, unsigned long *fp_regs);
-extern int restore_fp_registers(int pid, unsigned long *fp_regs);
-extern int save_fpx_registers(int pid, unsigned long *fp_regs);
-extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
-extern int save_registers(int pid, struct uml_pt_regs *regs);
-extern int restore_registers(int pid, struct uml_pt_regs *regs);
-extern int init_registers(int pid);
+extern int init_pid_registers(int pid);
 extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs);
-extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
 extern int get_fp_registers(int pid, unsigned long *regs);
 extern int put_fp_registers(int pid, unsigned long *regs);
 
diff --git a/arch/um/include/shared/sigio.h b/arch/um/include/shared/sigio.h
index 434f1a9ae4b3..c6c2edce1f6d 100644
--- a/arch/um/include/shared/sigio.h
+++ b/arch/um/include/shared/sigio.h
@@ -1,13 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __SIGIO_H__
 #define __SIGIO_H__
 
-extern int write_sigio_irq(int fd);
-extern int register_sigio_fd(int fd);
 extern void sigio_lock(void);
 extern void sigio_unlock(void);
 
diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h
index 48dd0989ddaa..fb96c0bd8222 100644
--- a/arch/um/include/shared/skas/mm_id.h
+++ b/arch/um/include/shared/skas/mm_id.h
@@ -1,17 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2005 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __MM_ID_H
 #define __MM_ID_H
 
+#include <linux/compiler_types.h>
+
+#define STUB_MAX_FDS 4
+
 struct mm_id {
-	union {
-		int mm_fd;
-		int pid;
-	} u;
+	int pid;
 	unsigned long stack;
+	int syscall_data_len;
+
+	/* Only used with SECCOMP mode */
+	int sock;
+	int syscall_fd_num;
+	int syscall_fd_map[STUB_MAX_FDS];
 };
 
+void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile);
+void exit_turnstile(struct mm_id *mm_id) __releases(turnstile);
+
+void notify_mm_kill(int pid);
+
 #endif
diff --git a/arch/um/include/shared/skas/proc_mm.h b/arch/um/include/shared/skas/proc_mm.h
deleted file mode 100644
index 902809209603..000000000000
--- a/arch/um/include/shared/skas/proc_mm.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SKAS_PROC_MM_H
-#define __SKAS_PROC_MM_H
-
-#define MM_MMAP 54
-#define MM_MUNMAP 55
-#define MM_MPROTECT 56
-#define MM_COPY_SEGMENTS 57
-
-struct mm_mmap {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-struct mm_munmap {
-	unsigned long addr;
-	unsigned long len;
-};
-
-struct mm_mprotect {
-	unsigned long addr;
-	unsigned long len;
-	unsigned int prot;
-};
-
-struct proc_mm_op {
-	int op;
-	union {
-		struct mm_mmap mmap;
-		struct mm_munmap munmap;
-	        struct mm_mprotect mprotect;
-		int copy_segments;
-	} u;
-};
-
-#endif
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index c45df961c874..2237ffedec75 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __SKAS_H
@@ -8,15 +8,14 @@
 
 #include <sysdep/ptrace.h>
 
-extern int userspace_pid[];
-extern int proc_mm, ptrace_faultinfo, ptrace_ldt;
-extern int skas_needs_stub;
+extern int using_seccomp;
 
-extern int user_thread(unsigned long stack, int flags);
 extern void new_thread_handler(void);
 extern void handle_syscall(struct uml_pt_regs *regs);
-extern int new_mm(unsigned long stack);
-extern long execute_syscall_skas(void *r);
 extern unsigned long current_stub_stack(void);
+extern struct mm_id *current_mm_id(void);
+extern void current_mm_sync(void);
+void initial_jmpbuf_lock(void);
+void initial_jmpbuf_unlock(void);
 
 #endif
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index f6ed92c3727d..27db38e95df9 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -1,18 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
+
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2005 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #ifndef __STUB_DATA_H
 #define __STUB_DATA_H
 
-#include <sys/time.h>
+#include <linux/compiler_types.h>
+#include <as-layout.h>
+#include <sysdep/tls.h>
+#include <sysdep/stub-data.h>
+#include <mm_id.h>
+
+#define FUTEX_IN_CHILD 0
+#define FUTEX_IN_KERN 1
+
+struct stub_init_data {
+	int seccomp;
+
+	unsigned long stub_start;
+
+	int stub_code_fd;
+	unsigned long stub_code_offset;
+	int stub_data_fd;
+	unsigned long stub_data_offset;
+
+	unsigned long signal_handler;
+	unsigned long signal_restorer;
+};
+
+#define STUB_NEXT_SYSCALL(s) \
+	((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len))
+
+enum stub_syscall_type {
+	STUB_SYSCALL_UNSET = 0,
+	STUB_SYSCALL_MMAP,
+	STUB_SYSCALL_MUNMAP,
+};
+
+struct stub_syscall {
+	struct {
+		unsigned long addr;
+		unsigned long length;
+		unsigned long offset;
+		int fd;
+		int prot;
+	} mem;
+
+	enum stub_syscall_type syscall;
+};
 
 struct stub_data {
-	long offset;
-	int fd;
-	struct itimerval timer;
 	long err;
+
+	int syscall_data_len;
+	/* 128 leaves enough room for additional fields in the struct */
+	struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
+
+	/* data shared with signal handler (only used in seccomp mode) */
+	short restart_wait;
+	unsigned int futex;
+	int signal;
+	unsigned short si_offset;
+	unsigned short mctx_offset;
+
+	/* seccomp architecture specific state restore */
+	struct stub_data_arch arch_data;
+
+	/* Stack for our signal handlers and for calling into . */
+	unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
 };
 
 #endif
diff --git a/arch/um/include/shared/skas_ptrace.h b/arch/um/include/shared/skas_ptrace.h
deleted file mode 100644
index 630a9c92b93c..000000000000
--- a/arch/um/include/shared/skas_ptrace.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __SKAS_PTRACE_H
-#define __SKAS_PTRACE_H
-
-#define PTRACE_FAULTINFO 52
-#define PTRACE_SWITCH_MM 55
-
-#include <sysdep/skas_ptrace.h>
-
-#endif
diff --git a/arch/um/include/shared/smp.h b/arch/um/include/shared/smp.h
new file mode 100644
index 000000000000..06e3faa95091
--- /dev/null
+++ b/arch/um/include/shared/smp.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_SHARED_SMP_H
+#define __UM_SHARED_SMP_H
+
+#if IS_ENABLED(CONFIG_SMP)
+
+extern int uml_ncpus;
+
+int uml_curr_cpu(void);
+void uml_start_secondary(void *opaque);
+void uml_ipi_handler(int vector);
+
+#else /* !CONFIG_SMP */
+
+#define uml_ncpus 1
+#define uml_curr_cpu() 0
+
+#endif /* CONFIG_SMP */
+
+#endif /* __UM_SHARED_SMP_H */
diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h
new file mode 100644
index 000000000000..7c2b277b7eb0
--- /dev/null
+++ b/arch/um/include/shared/timetravel.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019-2021 Intel Corporation
+ */
+#ifndef _UM_TIME_TRAVEL_H_
+#define _UM_TIME_TRAVEL_H_
+
+enum time_travel_mode {
+	TT_MODE_OFF,
+	TT_MODE_BASIC,
+	TT_MODE_INFCPU,
+	TT_MODE_EXTERNAL,
+};
+
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
+extern enum time_travel_mode time_travel_mode;
+extern int time_travel_should_print_bc_msg;
+#else
+#define time_travel_mode TT_MODE_OFF
+#define time_travel_should_print_bc_msg 0
+#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
+
+void _time_travel_print_bc_msg(void);
+static inline void time_travel_print_bc_msg(void)
+{
+	if (time_travel_should_print_bc_msg)
+		_time_travel_print_bc_msg();
+}
+
+#endif /* _UM_TIME_TRAVEL_H_ */
diff --git a/arch/um/include/shared/um_malloc.h b/arch/um/include/shared/um_malloc.h
index 6395fef6b69b..815dd03e8707 100644
--- a/arch/um/include/shared/um_malloc.h
+++ b/arch/um/include/shared/um_malloc.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
- * Licensed under the GPL
  */
 
 #ifndef __UM_MALLOC_H__
@@ -11,8 +11,9 @@
 extern void *uml_kmalloc(int size, int flags);
 extern void kfree(const void *ptr);
 
-extern void *vmalloc(unsigned long size);
-extern void vfree(void *ptr);
+extern void *vmalloc_noprof(unsigned long size);
+#define vmalloc(...)		vmalloc_noprof(__VA_ARGS__)
+extern void vfree(const void *ptr);
 
 #endif /* __UM_MALLOC_H__ */
 
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index cef068563336..139eb78a4767 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -1,6 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #ifndef __USER_H__
@@ -16,11 +16,12 @@
  */
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
-/* This is to get size_t */
-#ifdef __KERNEL__
+/* This is to get size_t and NULL */
+#ifndef __UM_HOST__
 #include <linux/types.h>
 #else
 #include <stddef.h>
+#include <sys/types.h>
 #endif
 
 extern void panic(const char *fmt, ...)
@@ -37,19 +38,29 @@ extern void panic(const char *fmt, ...)
 #define UM_KERN_DEBUG	KERN_DEBUG
 #define UM_KERN_CONT	KERN_CONT
 
-#ifdef UML_CONFIG_PRINTK
-extern int printk(const char *fmt, ...)
+#if IS_ENABLED(CONFIG_PRINTK)
+#define printk(...) _printk(__VA_ARGS__)
+extern int _printk(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
+extern void print_hex_dump(const char *level, const char *prefix_str,
+			   int prefix_type, int rowsize, int groupsize,
+			   const void *buf, size_t len, _Bool ascii);
 #else
 static inline int printk(const char *fmt, ...)
 {
 	return 0;
 }
+static inline void print_hex_dump(const char *level, const char *prefix_str,
+				  int prefix_type, int rowsize, int groupsize,
+				  const void *buf, size_t len, _Bool ascii)
+{
+}
 #endif
 
 extern int in_aton(char *str);
-extern size_t strlcpy(char *, const char *, size_t);
 extern size_t strlcat(char *, const char *, size_t);
+extern size_t sized_strscpy(char *, const char *, size_t);
+#define strscpy(dst, src)	sized_strscpy(dst, src, sizeof(dst))
 
 /* Copied from linux/compiler-gcc.h since we can't include it directly */
 #define barrier() __asm__ __volatile__("": : :"memory")
diff --git a/arch/um/include/uapi/asm/Kbuild b/arch/um/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..f66554cd5c45
--- /dev/null
+++ b/arch/um/include/uapi/asm/Kbuild
@@ -0,0 +1 @@
+# SPDX-License-Identifier: GPL-2.0
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index babe21826e3e..be60bc451b3f 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -1,30 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux,intel}.com)
-# Licensed under the GPL
 #
 
+# Don't instrument UML-specific code; without this, we may crash when
+# accessing the instrumentation buffer for the first time from the
+# kernel.
+KCOV_INSTRUMENT                := n
+
 CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START)		\
                         -DELF_ARCH=$(LDS_ELF_ARCH)	\
                         -DELF_FORMAT=$(LDS_ELF_FORMAT)	\
 			$(LDS_EXTRA)
-extra-y := vmlinux.lds
-clean-files :=
+always-$(KBUILD_BUILTIN) := vmlinux.lds
 
 obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
 	physmem.o process.o ptrace.o reboot.o sigio.o \
-	signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o \
-	um_arch.o umid.o skas/
+	signal.o sysrq.o time.o tlb.o trap.o \
+	um_arch.o umid.o kmsg_dump.o capflags.o skas/
+obj-y += load_file.o
 
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
 obj-$(CONFIG_GPROF)	+= gprof_syms.o
-obj-$(CONFIG_GCOV)	+= gmon_syms.o
+obj-$(CONFIG_OF) += dtb.o
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_SMP) += smp.o
 
 USER_OBJS := config.o
 
-include arch/um/scripts/Makefile.rules
+include $(srctree)/arch/um/scripts/Makefile.rules
 
-targets := config.c config.tmp
+targets := config.c config.tmp capflags.c
 
 # Be careful with the below Sed code - sed is pitfall-rich!
 # We use sed to lower build requirements, for "embedded" builders for instance.
@@ -39,6 +46,15 @@ quiet_cmd_quote1 = QUOTE   $@
 $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE
 	$(call if_changed,quote2)
 
+quiet_cmd_mkcapflags = MKCAP   $@
+      cmd_mkcapflags = $(CONFIG_SHELL) $(src)/../../x86/kernel/cpu/mkcapflags.sh $@ $^
+
+cpufeature = $(src)/../../x86/include/asm/cpufeatures.h
+vmxfeature = $(src)/../../x86/include/asm/vmxfeatures.h
+
+$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/../../x86/kernel/cpu/mkcapflags.sh FORCE
+	$(call if_changed,mkcapflags)
+
 quiet_cmd_quote2 = QUOTE   $@
       cmd_quote2 = sed -e '/CONFIG/{'          \
 		  -e 's/"CONFIG"//'            \
diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
index 1fb12235ab9c..d620b6f6de9b 100644
--- a/arch/um/kernel/asm-offsets.c
+++ b/arch/um/kernel/asm-offsets.c
@@ -1 +1,49 @@
-#include <sysdep/kernel-offsets.h>
+/* SPDX-License-Identifier: GPL-2.0 */
+#define COMPILE_OFFSETS
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/elf.h>
+#include <linux/crypto.h>
+#include <linux/kbuild.h>
+#include <linux/audit.h>
+#include <linux/fs.h>
+#include <asm/mman.h>
+#include <asm/seccomp.h>
+#include <asm/extable.h>
+
+/* workaround for a warning with -Wmissing-prototypes */
+void foo(void);
+
+void foo(void)
+{
+	DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
+
+	DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE);
+	DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK);
+	DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT);
+
+	DEFINE(UM_GFP_KERNEL, GFP_KERNEL);
+	DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC);
+
+	DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
+
+	DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
+	DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
+
+	DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES);
+
+	DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE);
+
+	DEFINE(HOSTFS_ATTR_MODE, ATTR_MODE);
+	DEFINE(HOSTFS_ATTR_UID, ATTR_UID);
+	DEFINE(HOSTFS_ATTR_GID, ATTR_GID);
+	DEFINE(HOSTFS_ATTR_SIZE, ATTR_SIZE);
+	DEFINE(HOSTFS_ATTR_ATIME, ATTR_ATIME);
+	DEFINE(HOSTFS_ATTR_MTIME, ATTR_MTIME);
+	DEFINE(HOSTFS_ATTR_CTIME, ATTR_CTIME);
+	DEFINE(HOSTFS_ATTR_ATIME_SET, ATTR_ATIME_SET);
+	DEFINE(HOSTFS_ATTR_MTIME_SET, ATTR_MTIME_SET);
+
+	DEFINE(ALT_INSTR_SIZE, sizeof(struct alt_instr));
+	DEFINE(EXTABLE_SIZE,   sizeof(struct exception_table_entry));
+}
diff --git a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in
index 972bf1659564..3ece3c3b31cc 100644
--- a/arch/um/kernel/config.c.in
+++ b/arch/um/kernel/config.c.in
@@ -1,6 +1,6 @@
-/* 
+// SPDX-License-Identifier: GPL-2.0
+/*
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c
new file mode 100644
index 000000000000..47cd3d869fb2
--- /dev/null
+++ b/arch/um/kernel/dtb.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <linux/printk.h>
+#include <linux/memblock.h>
+#include <init.h>
+
+#include "um_arch.h"
+
+static char *dtb __initdata;
+
+void uml_dtb_init(void)
+{
+	long long size;
+	void *area;
+
+	area = uml_load_file(dtb, &size);
+	if (area) {
+		if (!early_init_dt_scan(area, __pa(area))) {
+			pr_err("invalid DTB %s\n", dtb);
+			memblock_free(area, size);
+			return;
+		}
+
+		early_init_fdt_scan_reserved_mem();
+	}
+
+	unflatten_device_tree();
+}
+
+static int __init uml_dtb_setup(char *line, int *add)
+{
+	*add = 0;
+	dtb = line;
+	return 0;
+}
+
+__uml_setup("dtb=", uml_dtb_setup,
+"dtb=<file>\n"
+"    Boot the kernel with the devicetree blob from the specified file.\n\n"
+);
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index adde088aeeff..a36b7918a011 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -1,4 +1,4 @@
-#include <asm-generic/vmlinux.lds.h>
+#include <asm/vmlinux.lds.h>
 #include <asm/page.h>
 
 OUTPUT_FORMAT(ELF_FORMAT)
@@ -6,6 +6,12 @@ OUTPUT_ARCH(ELF_ARCH)
 ENTRY(_start)
 jiffies = jiffies_64;
 
+VERSION {
+  {
+    local: *;
+  };
+}
+
 SECTIONS
 {
   PROVIDE (__executable_start = START);
@@ -69,6 +75,8 @@ SECTIONS
     TEXT_TEXT
     SCHED_TEXT
     LOCK_TEXT
+    IRQENTRY_TEXT
+    SOFTIRQENTRY_TEXT
     *(.fixup)
     *(.stub .text.* .gnu.linkonce.t.*)
     /* .gnu.warning sections are handled specially by elf32.em.  */
@@ -100,12 +108,14 @@ SECTIONS
      be empty, which isn't pretty.  */
   . = ALIGN(32 / 8);
   .preinit_array     : { *(.preinit_array) }
-  .init_array     : { *(.init_array) }
+  .init_array     : {
+    *(.kasan_init)
+    *(.init_array.*)
+    *(.init_array)
+  }
   .fini_array     : { *(.fini_array) }
   .data           : {
     INIT_TASK_DATA(KERNEL_STACK_SIZE)
-    . = ALIGN(KERNEL_STACK_SIZE);
-    *(.data..init_irqstack)
     DATA_DATA
     *(.data.* .gnu.linkonce.d.*)
     SORT(CONSTRUCTORS)
@@ -161,8 +171,11 @@ SECTIONS
   PROVIDE (end = .);
 
   STABS_DEBUG
-
   DWARF_DEBUG
+  ELF_DETAILS
 
   DISCARDS
 }
+
+ASSERT(__syscall_stub_end - __syscall_stub_start <= PAGE_SIZE,
+       "STUB code must not be larger than one page");
diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c
index 4a0800bc37b2..c350c2331bbe 100644
--- a/arch/um/kernel/early_printk.c
+++ b/arch/um/kernel/early_printk.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2011 Richard Weinberger <richrd@nod.at>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 0d7103c9eff3..13812fa97eee 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -1,50 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/stddef.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/ptrace.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <asm/current.h>
 #include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <as-layout.h>
 #include <mem_user.h>
+#include <registers.h>
 #include <skas.h>
 #include <os.h>
 
 void flush_thread(void)
 {
-	void *data = NULL;
-	int ret;
-
 	arch_flush_thread(&current->thread.arch);
 
-	ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data);
-	ret = ret || unmap(&current->mm->context.id, STUB_END,
-			   host_task_size - STUB_END, 1, &data);
-	if (ret) {
-		printk(KERN_ERR "flush_thread - clearing address space failed, "
-		       "err = %d\n", ret);
-		force_sig(SIGKILL, current);
-	}
 	get_safe_registers(current_pt_regs()->regs.gp,
 			   current_pt_regs()->regs.fp);
-
-	__switch_mm(&current->mm->context.id);
 }
 
 void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 {
 	PT_REGS_IP(regs) = eip;
 	PT_REGS_SP(regs) = esp;
-	current->ptrace &= ~PT_DTRACE;
-#ifdef SUBARCH_EXECVE1
-	SUBARCH_EXECVE1(regs->regs);
-#endif
+	clear_thread_flag(TIF_SINGLESTEP);
 }
 EXPORT_SYMBOL(start_thread);
diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c
index 829df49dee99..43edc2aa57e4 100644
--- a/arch/um/kernel/exitcode.c
+++ b/arch/um/kernel/exitcode.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/ctype.h>
@@ -10,7 +10,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * If read and write race, the read will still atomically read a valid
@@ -40,9 +40,11 @@ static ssize_t exitcode_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
 	char *end, buf[sizeof("nnnnn\0")];
+	size_t size;
 	int tmp;
 
-	if (copy_from_user(buf, buffer, count))
+	size = min(count, sizeof(buf));
+	if (copy_from_user(buf, buffer, size))
 		return -EFAULT;
 
 	tmp = simple_strtol(buf, &end, 0);
@@ -53,20 +55,19 @@ static ssize_t exitcode_proc_write(struct file *file,
 	return count;
 }
 
-static const struct file_operations exitcode_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= exitcode_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= exitcode_proc_write,
+static const struct proc_ops exitcode_proc_ops = {
+	.proc_open	= exitcode_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= exitcode_proc_write,
 };
 
 static int make_proc_exitcode(void)
 {
 	struct proc_dir_entry *ent;
 
-	ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_fops);
+	ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_ops);
 	if (ent == NULL) {
 		printk(KERN_WARNING "make_proc_exitcode : Failed to register "
 		       "/proc/exitcode\n");
diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c
deleted file mode 100644
index 1bf61266da8e..000000000000
--- a/arch/um/kernel/gmon_syms.c
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/module.h>
-
-extern void __bb_init_func(void *)  __attribute__((weak));
-EXPORT_SYMBOL(__bb_init_func);
diff --git a/arch/um/kernel/gprof_syms.c b/arch/um/kernel/gprof_syms.c
index 74ddb44288a3..84d536908775 100644
--- a/arch/um/kernel/gprof_syms.c
+++ b/arch/um/kernel/gprof_syms.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/module.h>
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 55cead809b18..99dba827461c 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -1,46 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <asm/types.h>
 #include <init.h>
 #include <os.h>
 
+#include "um_arch.h"
+
 /* Changed by uml_initrd_setup, which is a setup */
 static char *initrd __initdata = NULL;
-static int load_initrd(char *filename, void *buf, int size);
 
-static int __init read_initrd(void)
+int __init read_initrd(void)
 {
+	unsigned long long size;
 	void *area;
-	long long size;
-	int err;
-
-	if (initrd == NULL)
-		return 0;
-
-	err = os_file_size(initrd, &size);
-	if (err)
-		return 0;
-
-	/*
-	 * This is necessary because alloc_bootmem craps out if you
-	 * ask for no memory.
-	 */
-	if (size == 0) {
-		printk(KERN_ERR "\"%s\" is a zero-size initrd\n", initrd);
-		return 0;
-	}
 
-	area = alloc_bootmem(size);
-	if (area == NULL)
+	if (!initrd)
 		return 0;
 
-	if (load_initrd(initrd, area, size) == -1)
+	area = uml_load_file(initrd, &size);
+	if (!area)
 		return 0;
 
 	initrd_start = (unsigned long) area;
@@ -48,10 +32,9 @@ static int __init read_initrd(void)
 	return 0;
 }
 
-__uml_postsetup(read_initrd);
-
 static int __init uml_initrd_setup(char *line, int *add)
 {
+	*add = 0;
 	initrd = line;
 	return 0;
 }
@@ -61,25 +44,3 @@ __uml_setup("initrd=", uml_initrd_setup,
 "    This is used to boot UML from an initrd image.  The argument is the\n"
 "    name of the file containing the image.\n\n"
 );
-
-static int load_initrd(char *filename, void *buf, int size)
-{
-	int fd, n;
-
-	fd = os_open_file(filename, of_read(OPENFLAGS()), 0);
-	if (fd < 0) {
-		printk(KERN_ERR "Opening '%s' failed - err = %d\n", filename,
-		       -fd);
-		return -1;
-	}
-	n = os_read_file(fd, buf, size);
-	if (n != size) {
-		printk(KERN_ERR "Read of %d bytes from '%s' failed, "
-		       "err = %d\n", size,
-		       filename, -n);
-		return -1;
-	}
-
-	os_close_file(fd);
-	return 0;
-}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 36e12f0cefd5..f4b13f15a9c1 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,6 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  */
@@ -16,245 +18,424 @@
 #include <as-layout.h>
 #include <kern_util.h>
 #include <os.h>
+#include <irq_user.h>
+#include <irq_kern.h>
+#include <linux/time-internal.h>
 
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define irq_stats(x)		(&per_cpu(irq_stat, x))
+
+/* When epoll triggers we do not know why it did so
+ * we can also have different IRQs for read and write.
+ * This is why we keep a small irq_reg array for each fd -
+ * one entry per IRQ type
+ */
+struct irq_reg {
+	void *id;
+	int irq;
+	/* it's cheaper to store this than to query it */
+	int events;
+	bool active;
+	bool pending;
+	bool wakeup;
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+	bool pending_event;
+	void (*timetravel_handler)(int, int, void *,
+				   struct time_travel_event *);
+	struct time_travel_event event;
+#endif
+};
+
+struct irq_entry {
+	struct list_head list;
+	int fd;
+	struct irq_reg reg[NUM_IRQ_TYPES];
+	bool suspended;
+	bool sigio_workaround;
+};
+
+static DEFINE_RAW_SPINLOCK(irq_lock);
+static LIST_HEAD(active_fds);
+static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
+static bool irqs_suspended;
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+static bool irqs_pending;
+#endif
+
+static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
+{
 /*
- * This list is accessed under irq_lock, except in sigio_handler,
- * where it is safe from being modified.  IRQ handlers won't change it -
- * if an IRQ source has vanished, it will be freed by free_irqs just
- * before returning from sigio_handler.  That will process a separate
- * list of irqs to free, with its own locking, coming back here to
- * remove list elements, taking the irq_lock to do so.
+ * irq->active guards against reentry
+ * irq->pending accumulates pending requests
+ * if pending is raised the irq_handler is re-run
+ * until pending is cleared
  */
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
+	if (irq->active) {
+		irq->active = false;
 
-extern void free_irqs(void);
+		do {
+			irq->pending = false;
+			do_IRQ(irq->irq, regs);
+		} while (irq->pending);
+
+		irq->active = true;
+	} else {
+		irq->pending = true;
+	}
+}
 
-void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+static void irq_event_handler(struct time_travel_event *ev)
 {
-	struct irq_fd *irq_fd;
-	int n;
+	struct irq_reg *reg = container_of(ev, struct irq_reg, event);
 
-	if (smp_sigio_handler())
+	/* do nothing if suspended; just cause a wakeup and mark as pending */
+	if (irqs_suspended) {
+		irqs_pending = true;
+		reg->pending_event = true;
 		return;
+	}
 
-	while (1) {
-		n = os_waiting_for_events(active_fds);
-		if (n <= 0) {
-			if (n == -EINTR)
-				continue;
-			else break;
-		}
+	generic_handle_irq(reg->irq);
+}
+
+static bool irq_do_timetravel_handler(struct irq_entry *entry,
+				      enum um_irq_type t)
+{
+	struct irq_reg *reg = &entry->reg[t];
+
+	if (!reg->timetravel_handler)
+		return false;
+
+	/*
+	 * Handle all messages - we might get multiple even while
+	 * interrupts are already suspended, due to suspend order
+	 * etc. Note that time_travel_add_irq_event() will not add
+	 * an event twice, if it's pending already "first wins".
+	 */
+	reg->timetravel_handler(reg->irq, entry->fd, reg->id, &reg->event);
 
-		for (irq_fd = active_fds; irq_fd != NULL;
-		     irq_fd = irq_fd->next) {
-			if (irq_fd->current_events != 0) {
-				irq_fd->current_events = 0;
-				do_IRQ(irq_fd->irq, regs);
+	if (!reg->event.pending)
+		return false;
+
+	return true;
+}
+
+static void irq_do_pending_events(bool timetravel_handlers_only)
+{
+	struct irq_entry *entry;
+
+	if (!irqs_pending || timetravel_handlers_only)
+		return;
+
+	irqs_pending = false;
+
+	list_for_each_entry(entry, &active_fds, list) {
+		enum um_irq_type t;
+
+		for (t = 0; t < NUM_IRQ_TYPES; t++) {
+			struct irq_reg *reg = &entry->reg[t];
+
+			/*
+			 * Any timetravel_handler was invoked already, just
+			 * directly run the IRQ.
+			 */
+			if (reg->pending_event) {
+				irq_enter();
+				generic_handle_irq(reg->irq);
+				irq_exit();
+				reg->pending_event = false;
 			}
 		}
 	}
-
-	free_irqs();
+}
+#else
+static bool irq_do_timetravel_handler(struct irq_entry *entry,
+				      enum um_irq_type t)
+{
+	return false;
 }
 
-static DEFINE_SPINLOCK(irq_lock);
+static void irq_do_pending_events(bool timetravel_handlers_only)
+{
+}
+#endif
 
-static int activate_fd(int irq, int fd, int type, void *dev_id)
+static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
+			      struct uml_pt_regs *regs,
+			      bool timetravel_handlers_only)
 {
-	struct pollfd *tmp_pfd;
-	struct irq_fd *new_fd, *irq_fd;
-	unsigned long flags;
-	int events, err, n;
+	struct irq_reg *reg = &entry->reg[t];
 
-	err = os_set_fd_async(fd);
-	if (err < 0)
-		goto out;
+	if (!reg->events)
+		return;
 
-	err = -ENOMEM;
-	new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL);
-	if (new_fd == NULL)
-		goto out;
+	if (os_epoll_triggered(idx, reg->events) <= 0)
+		return;
 
-	if (type == IRQ_READ)
-		events = UM_POLLIN | UM_POLLPRI;
-	else events = UM_POLLOUT;
-	*new_fd = ((struct irq_fd) { .next  		= NULL,
-				     .id 		= dev_id,
-				     .fd 		= fd,
-				     .type 		= type,
-				     .irq 		= irq,
-				     .events 		= events,
-				     .current_events 	= 0 } );
-
-	err = -EBUSY;
-	spin_lock_irqsave(&irq_lock, flags);
-	for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
-		if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
-			printk(KERN_ERR "Registering fd %d twice\n", fd);
-			printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
-			printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
-			       dev_id);
-			goto out_unlock;
-		}
+	if (irq_do_timetravel_handler(entry, t))
+		return;
+
+	/*
+	 * If we're called to only run time-travel handlers then don't
+	 * actually proceed but mark sigio as pending (if applicable).
+	 * For suspend/resume, timetravel_handlers_only may be true
+	 * despite time-travel not being configured and used.
+	 */
+	if (timetravel_handlers_only) {
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+		reg->pending_event = true;
+		irqs_pending = true;
+		mark_sigio_pending();
+#endif
+		return;
 	}
 
-	if (type == IRQ_WRITE)
-		fd = -1;
+	irq_io_loop(reg, regs);
+}
 
-	tmp_pfd = NULL;
-	n = 0;
+static void _sigio_handler(struct uml_pt_regs *regs,
+			   bool timetravel_handlers_only)
+{
+	struct irq_entry *irq_entry;
+	int n, i;
 
-	while (1) {
-		n = os_create_pollfd(fd, events, tmp_pfd, n);
-		if (n == 0)
-			break;
-
-		/*
-		 * n > 0
-		 * It means we couldn't put new pollfd to current pollfds
-		 * and tmp_fds is NULL or too small for new pollfds array.
-		 * Needed size is equal to n as minimum.
-		 *
-		 * Here we have to drop the lock in order to call
-		 * kmalloc, which might sleep.
-		 * If something else came in and changed the pollfds array
-		 * so we will not be able to put new pollfd struct to pollfds
-		 * then we free the buffer tmp_fds and try again.
-		 */
-		spin_unlock_irqrestore(&irq_lock, flags);
-		kfree(tmp_pfd);
+	if (timetravel_handlers_only && !um_irq_timetravel_handler_used())
+		return;
 
-		tmp_pfd = kmalloc(n, GFP_KERNEL);
-		if (tmp_pfd == NULL)
-			goto out_kfree;
+	/* Flush out pending events that were ignored due to time-travel. */
+	if (!irqs_suspended)
+		irq_do_pending_events(timetravel_handlers_only);
 
-		spin_lock_irqsave(&irq_lock, flags);
-	}
+	while (1) {
+		/* This is now lockless - epoll keeps back-referencesto the irqs
+		 * which have trigger it so there is no need to walk the irq
+		 * list and lock it every time. We avoid locking by turning off
+		 * IO for a specific fd by executing os_del_epoll_fd(fd) before
+		 * we do any changes to the actual data structures
+		 */
+		n = os_waiting_for_events_epoll();
 
-	*last_irq_ptr = new_fd;
-	last_irq_ptr = &new_fd->next;
+		if (n <= 0) {
+			if (n == -EINTR)
+				continue;
+			else
+				break;
+		}
 
-	spin_unlock_irqrestore(&irq_lock, flags);
+		for (i = 0; i < n ; i++) {
+			enum um_irq_type t;
 
-	/*
-	 * This calls activate_fd, so it has to be outside the critical
-	 * section.
-	 */
-	maybe_sigio_broken(fd, (type == IRQ_READ));
+			irq_entry = os_epoll_get_data_pointer(i);
 
-	return 0;
+			for (t = 0; t < NUM_IRQ_TYPES; t++)
+				sigio_reg_handler(i, irq_entry, t, regs,
+						  timetravel_handlers_only);
+		}
+	}
 
- out_unlock:
-	spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
-	kfree(new_fd);
- out:
-	return err;
+	if (!timetravel_handlers_only)
+		free_irqs();
 }
 
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
+void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+		   void *mc)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&irq_lock, flags);
-	os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
-	spin_unlock_irqrestore(&irq_lock, flags);
+	preempt_disable();
+	_sigio_handler(regs, irqs_suspended);
+	preempt_enable();
 }
 
-struct irq_and_dev {
-	int irq;
-	void *dev;
-};
-
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static struct irq_entry *get_irq_entry_by_fd(int fd)
 {
-	struct irq_and_dev *data = d;
+	struct irq_entry *walk;
 
-	return ((irq->irq == data->irq) && (irq->id == data->dev));
+	lockdep_assert_held(&irq_lock);
+
+	list_for_each_entry(walk, &active_fds, list) {
+		if (walk->fd == fd)
+			return walk;
+	}
+
+	return NULL;
 }
 
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
+static void remove_irq_entry(struct irq_entry *to_free, bool remove)
 {
-	struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
-							  .dev  = dev });
+	if (!to_free)
+		return;
 
-	free_irq_by_cb(same_irq_and_dev, &data);
+	if (remove)
+		os_del_epoll_fd(to_free->fd);
+	list_del(&to_free->list);
 }
 
-static int same_fd(struct irq_fd *irq, void *fd)
+static bool update_irq_entry(struct irq_entry *entry)
 {
-	return (irq->fd == *((int *)fd));
+	enum um_irq_type i;
+	int events = 0;
+
+	for (i = 0; i < NUM_IRQ_TYPES; i++)
+		events |= entry->reg[i].events;
+
+	if (events) {
+		/* will modify (instead of add) if needed */
+		os_add_epoll_fd(events, entry->fd, entry);
+		return true;
+	}
+
+	os_del_epoll_fd(entry->fd);
+	return false;
 }
 
-void free_irq_by_fd(int fd)
+static struct irq_entry *update_or_remove_irq_entry(struct irq_entry *entry)
 {
-	free_irq_by_cb(same_fd, &fd);
+	if (update_irq_entry(entry))
+		return NULL;
+	remove_irq_entry(entry, false);
+	return entry;
 }
 
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
+		       void (*timetravel_handler)(int, int, void *,
+						  struct time_travel_event *))
 {
-	struct irq_fd *irq;
-	int i = 0;
-	int fdi;
+	struct irq_entry *irq_entry, *to_free = NULL;
+	int err, events = os_event_mask(type);
+	unsigned long flags;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		if ((irq->fd == fd) && (irq->irq == irqnum))
-			break;
-		i++;
-	}
-	if (irq == NULL) {
-		printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
-		       fd);
+	err = os_set_fd_async(fd);
+	if (err < 0)
 		goto out;
+
+	raw_spin_lock_irqsave(&irq_lock, flags);
+	irq_entry = get_irq_entry_by_fd(fd);
+	if (irq_entry) {
+already:
+		/* cannot register the same FD twice with the same type */
+		if (WARN_ON(irq_entry->reg[type].events)) {
+			err = -EALREADY;
+			goto out_unlock;
+		}
+
+		/* temporarily disable to avoid IRQ-side locking */
+		os_del_epoll_fd(fd);
+	} else {
+		struct irq_entry *new;
+
+		/* don't restore interrupts */
+		raw_spin_unlock(&irq_lock);
+		new = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
+		if (!new) {
+			local_irq_restore(flags);
+			return -ENOMEM;
+		}
+		raw_spin_lock(&irq_lock);
+		irq_entry = get_irq_entry_by_fd(fd);
+		if (irq_entry) {
+			to_free = new;
+			goto already;
+		}
+		irq_entry = new;
+		irq_entry->fd = fd;
+		list_add_tail(&irq_entry->list, &active_fds);
+		maybe_sigio_broken(fd);
 	}
-	fdi = os_get_pollfd(i);
-	if ((fdi != -1) && (fdi != fd)) {
-		printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
-		       "and pollfds, fd %d vs %d, need %d\n", irq->fd,
-		       fdi, fd);
-		irq = NULL;
-		goto out;
+
+	irq_entry->reg[type].id = dev_id;
+	irq_entry->reg[type].irq = irq;
+	irq_entry->reg[type].active = true;
+	irq_entry->reg[type].events = events;
+
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+	if (um_irq_timetravel_handler_used()) {
+		irq_entry->reg[type].timetravel_handler = timetravel_handler;
+		irq_entry->reg[type].event.fn = irq_event_handler;
 	}
-	*index_out = i;
- out:
-	return irq;
+#endif
+
+	WARN_ON(!update_irq_entry(irq_entry));
+	err = 0;
+out_unlock:
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+out:
+	kfree(to_free);
+	return err;
+}
+
+/*
+ * Remove the entry or entries for a specific FD, if you
+ * don't want to remove all the possible entries then use
+ * um_free_irq() or deactivate_fd() instead.
+ */
+void free_irq_by_fd(int fd)
+{
+	struct irq_entry *to_free;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&irq_lock, flags);
+	to_free = get_irq_entry_by_fd(fd);
+	remove_irq_entry(to_free, true);
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+	kfree(to_free);
 }
+EXPORT_SYMBOL(free_irq_by_fd);
 
-void reactivate_fd(int fd, int irqnum)
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
-	struct irq_fd *irq;
+	struct irq_entry *entry, *to_free = NULL;
 	unsigned long flags;
-	int i;
 
-	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
-	}
-	os_set_pollfd(i, irq->fd);
-	spin_unlock_irqrestore(&irq_lock, flags);
+	raw_spin_lock_irqsave(&irq_lock, flags);
+	list_for_each_entry(entry, &active_fds, list) {
+		enum um_irq_type i;
 
-	add_sigio_fd(fd);
+		for (i = 0; i < NUM_IRQ_TYPES; i++) {
+			struct irq_reg *reg = &entry->reg[i];
+
+			if (!reg->events)
+				continue;
+			if (reg->irq != irq)
+				continue;
+			if (reg->id != dev)
+				continue;
+
+			os_del_epoll_fd(entry->fd);
+			reg->events = 0;
+			to_free = update_or_remove_irq_entry(entry);
+			goto out;
+		}
+	}
+out:
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+	kfree(to_free);
 }
 
 void deactivate_fd(int fd, int irqnum)
 {
-	struct irq_fd *irq;
+	struct irq_entry *entry;
 	unsigned long flags;
-	int i;
+	enum um_irq_type i;
 
-	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	os_del_epoll_fd(fd);
+
+	raw_spin_lock_irqsave(&irq_lock, flags);
+	entry = get_irq_entry_by_fd(fd);
+	if (!entry)
+		goto out;
+
+	for (i = 0; i < NUM_IRQ_TYPES; i++) {
+		if (!entry->reg[i].events)
+			continue;
+		if (entry->reg[i].irq == irqnum)
+			entry->reg[i].events = 0;
 	}
 
-	os_set_pollfd(i, -1);
-	spin_unlock_irqrestore(&irq_lock, flags);
+	entry = update_or_remove_irq_entry(entry);
+out:
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+	kfree(entry);
 
 	ignore_sigio_fd(fd);
 }
@@ -268,17 +449,18 @@ EXPORT_SYMBOL(deactivate_fd);
  */
 int deactivate_all_fds(void)
 {
-	struct irq_fd *irq;
-	int err;
+	struct irq_entry *entry;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		err = os_clear_fd_async(irq->fd);
-		if (err)
-			return err;
-	}
-	/* If there is a signal already queued, after unblocking ignore it */
+	/* Stop IO. The IRQ loop has no lock so this is our
+	 * only way of making sure we are safe to dispose
+	 * of all IRQ handlers
+	 */
 	os_set_ioignore();
 
+	/* we can no longer call kfree() here so just deactivate */
+	list_for_each_entry(entry, &active_fds, list)
+		os_del_epoll_fd(entry->fd);
+	os_close_epoll_fd();
 	return 0;
 }
 
@@ -297,31 +479,180 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
 	return 1;
 }
 
-void um_free_irq(unsigned int irq, void *dev)
+void um_free_irq(int irq, void *dev)
 {
+	if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ,
+		 "freeing invalid irq %d", irq))
+		return;
+
 	free_irq_by_irq_and_dev(irq, dev);
 	free_irq(irq, dev);
+	clear_bit(irq, irqs_allocated);
 }
 EXPORT_SYMBOL(um_free_irq);
 
-int um_request_irq(unsigned int irq, int fd, int type,
-		   irq_handler_t handler,
-		   unsigned long irqflags, const char * devname,
-		   void *dev_id)
+static int
+_um_request_irq(int irq, int fd, enum um_irq_type type,
+		irq_handler_t handler, unsigned long irqflags,
+		const char *devname, void *dev_id,
+		void (*timetravel_handler)(int, int, void *,
+					   struct time_travel_event *))
 {
 	int err;
 
+	if (irq == UM_IRQ_ALLOC) {
+		int i;
+
+		for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) {
+			if (!test_and_set_bit(i, irqs_allocated)) {
+				irq = i;
+				break;
+			}
+		}
+	}
+
+	if (irq < 0)
+		return -ENOSPC;
+
 	if (fd != -1) {
-		err = activate_fd(irq, fd, type, dev_id);
+		err = activate_fd(irq, fd, type, dev_id, timetravel_handler);
 		if (err)
-			return err;
+			goto error;
 	}
 
-	return request_irq(irq, handler, irqflags, devname, dev_id);
+	err = request_irq(irq, handler, irqflags, devname, dev_id);
+	if (err < 0)
+		goto error;
+
+	return irq;
+error:
+	clear_bit(irq, irqs_allocated);
+	return err;
 }
 
+int um_request_irq(int irq, int fd, enum um_irq_type type,
+		   irq_handler_t handler, unsigned long irqflags,
+		   const char *devname, void *dev_id)
+{
+	return _um_request_irq(irq, fd, type, handler, irqflags,
+			       devname, dev_id, NULL);
+}
 EXPORT_SYMBOL(um_request_irq);
-EXPORT_SYMBOL(reactivate_fd);
+
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
+		      irq_handler_t handler, unsigned long irqflags,
+		      const char *devname, void *dev_id,
+		      void (*timetravel_handler)(int, int, void *,
+						 struct time_travel_event *))
+{
+	return _um_request_irq(irq, fd, type, handler, irqflags,
+			       devname, dev_id, timetravel_handler);
+}
+EXPORT_SYMBOL(um_request_irq_tt);
+
+void sigio_run_timetravel_handlers(void)
+{
+	_sigio_handler(NULL, true);
+}
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+void um_irqs_suspend(void)
+{
+	struct irq_entry *entry;
+	unsigned long flags;
+
+	irqs_suspended = true;
+
+	raw_spin_lock_irqsave(&irq_lock, flags);
+	list_for_each_entry(entry, &active_fds, list) {
+		enum um_irq_type t;
+		bool clear = true;
+
+		for (t = 0; t < NUM_IRQ_TYPES; t++) {
+			if (!entry->reg[t].events)
+				continue;
+
+			/*
+			 * For the SIGIO_WRITE_IRQ, which is used to handle the
+			 * SIGIO workaround thread, we need special handling:
+			 * enable wake for it itself, but below we tell it about
+			 * any FDs that should be suspended.
+			 */
+			if (entry->reg[t].wakeup ||
+			    entry->reg[t].irq == SIGIO_WRITE_IRQ
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+			    || entry->reg[t].timetravel_handler
+#endif
+			    ) {
+				clear = false;
+				break;
+			}
+		}
+
+		if (clear) {
+			entry->suspended = true;
+			os_clear_fd_async(entry->fd);
+			entry->sigio_workaround =
+				!__ignore_sigio_fd(entry->fd);
+		}
+	}
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+}
+
+void um_irqs_resume(void)
+{
+	struct irq_entry *entry;
+	unsigned long flags;
+
+
+	raw_spin_lock_irqsave(&irq_lock, flags);
+	list_for_each_entry(entry, &active_fds, list) {
+		if (entry->suspended) {
+			int err = os_set_fd_async(entry->fd);
+
+			WARN(err < 0, "os_set_fd_async returned %d\n", err);
+			entry->suspended = false;
+
+			if (entry->sigio_workaround) {
+				err = __add_sigio_fd(entry->fd);
+				WARN(err < 0, "add_sigio_returned %d\n", err);
+			}
+		}
+	}
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+
+	irqs_suspended = false;
+	send_sigio_to_self();
+}
+
+static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
+{
+	struct irq_entry *entry;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&irq_lock, flags);
+	list_for_each_entry(entry, &active_fds, list) {
+		enum um_irq_type t;
+
+		for (t = 0; t < NUM_IRQ_TYPES; t++) {
+			if (!entry->reg[t].events)
+				continue;
+
+			if (entry->reg[t].irq != d->irq)
+				continue;
+			entry->reg[t].wakeup = on;
+			goto unlock;
+		}
+	}
+unlock:
+	raw_spin_unlock_irqrestore(&irq_lock, flags);
+	return 0;
+}
+#else
+#define normal_irq_set_wake NULL
+#endif
 
 /*
  * irq_chip must define at least enable/disable and ack when
@@ -331,139 +662,67 @@ static void dummy(struct irq_data *d)
 {
 }
 
-/* This is used for everything else than the timer. */
+/* This is used for everything other than the timer. */
 static struct irq_chip normal_irq_type = {
 	.name = "SIGIO",
 	.irq_disable = dummy,
 	.irq_enable = dummy,
 	.irq_ack = dummy,
+	.irq_mask = dummy,
+	.irq_unmask = dummy,
+	.irq_set_wake = normal_irq_set_wake,
 };
 
-static struct irq_chip SIGVTALRM_irq_type = {
-	.name = "SIGVTALRM",
+static struct irq_chip alarm_irq_type = {
+	.name = "SIGALRM",
 	.irq_disable = dummy,
 	.irq_enable = dummy,
 	.irq_ack = dummy,
+	.irq_mask = dummy,
+	.irq_unmask = dummy,
 };
 
 void __init init_IRQ(void)
 {
 	int i;
 
-	irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
+	irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_percpu_irq);
 
-	for (i = 1; i < NR_IRQS; i++)
+	for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++)
 		irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+	/* Initialize EPOLL Loop */
+	os_setup_epoll();
 }
 
-/*
- * IRQ stack entry and exit:
- *
- * Unlike i386, UML doesn't receive IRQs on the normal kernel stack
- * and switch over to the IRQ stack after some preparation.  We use
- * sigaltstack to receive signals on a separate stack from the start.
- * These two functions make sure the rest of the kernel won't be too
- * upset by being on a different stack.  The IRQ stack has a
- * thread_info structure at the bottom so that current et al continue
- * to work.
- *
- * to_irq_stack copies the current task's thread_info to the IRQ stack
- * thread_info and sets the tasks's stack to point to the IRQ stack.
- *
- * from_irq_stack copies the thread_info struct back (flags may have
- * been modified) and resets the task's stack pointer.
- *
- * Tricky bits -
- *
- * What happens when two signals race each other?  UML doesn't block
- * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal
- * could arrive while a previous one is still setting up the
- * thread_info.
- *
- * There are three cases -
- *     The first interrupt on the stack - sets up the thread_info and
- * handles the interrupt
- *     A nested interrupt interrupting the copying of the thread_info -
- * can't handle the interrupt, as the stack is in an unknown state
- *     A nested interrupt not interrupting the copying of the
- * thread_info - doesn't do any setup, just handles the interrupt
- *
- * The first job is to figure out whether we interrupted stack setup.
- * This is done by xchging the signal mask with thread_info->pending.
- * If the value that comes back is zero, then there is no setup in
- * progress, and the interrupt can be handled.  If the value is
- * non-zero, then there is stack setup in progress.  In order to have
- * the interrupt handled, we leave our signal in the mask, and it will
- * be handled by the upper handler after it has set up the stack.
- *
- * Next is to figure out whether we are the outer handler or a nested
- * one.  As part of setting up the stack, thread_info->real_thread is
- * set to non-NULL (and is reset to NULL on exit).  This is the
- * nesting indicator.  If it is non-NULL, then the stack is already
- * set up and the handler can run.
- */
-
-static unsigned long pending_mask;
-
-unsigned long to_irq_stack(unsigned long *mask_out)
+int __init arch_probe_nr_irqs(void)
 {
-	struct thread_info *ti;
-	unsigned long mask, old;
-	int nested;
-
-	mask = xchg(&pending_mask, *mask_out);
-	if (mask != 0) {
-		/*
-		 * If any interrupts come in at this point, we want to
-		 * make sure that their bits aren't lost by our
-		 * putting our bit in.  So, this loop accumulates bits
-		 * until xchg returns the same value that we put in.
-		 * When that happens, there were no new interrupts,
-		 * and pending_mask contains a bit for each interrupt
-		 * that came in.
-		 */
-		old = *mask_out;
-		do {
-			old |= mask;
-			mask = xchg(&pending_mask, old);
-		} while (mask != old);
-		return 1;
-	}
-
-	ti = current_thread_info();
-	nested = (ti->real_thread != NULL);
-	if (!nested) {
-		struct task_struct *task;
-		struct thread_info *tti;
-
-		task = cpu_tasks[ti->cpu].task;
-		tti = task_thread_info(task);
-
-		*ti = *tti;
-		ti->real_thread = tti;
-		task->stack = ti;
-	}
-
-	mask = xchg(&pending_mask, 0);
-	*mask_out |= mask | nested;
-	return 0;
+	return NR_IRQS;
 }
 
-unsigned long from_irq_stack(int nested)
+void sigchld_handler(int sig, struct siginfo *unused_si,
+		     struct uml_pt_regs *regs, void *mc)
 {
-	struct thread_info *ti, *to;
-	unsigned long mask;
+	do_IRQ(SIGCHLD_IRQ, regs);
+}
 
-	ti = current_thread_info();
+/*
+ * /proc/interrupts printing for arch specific interrupts
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+#if IS_ENABLED(CONFIG_SMP)
+	int cpu;
 
-	pending_mask = 1;
+	seq_printf(p, "%*s: ", prec, "RES");
+	for_each_online_cpu(cpu)
+		seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count);
+	seq_puts(p, "  Rescheduling interrupts\n");
 
-	to = ti->real_thread;
-	current->stack = to;
-	ti->real_thread = NULL;
-	*to = *ti;
+	seq_printf(p, "%*s: ", prec, "CAL");
+	for_each_online_cpu(cpu)
+		seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count);
+	seq_puts(p, "  Function call interrupts\n");
+#endif
 
-	mask = xchg(&pending_mask, 0);
-	return mask & ~1;
+	return 0;
 }
-
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
new file mode 100644
index 000000000000..fc0f543d1d8e
--- /dev/null
+++ b/arch/um/kernel/kmsg_dump.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kmsg_dump.h>
+#include <linux/spinlock.h>
+#include <linux/console.h>
+#include <linux/string.h>
+#include <shared/init.h>
+#include <shared/kern.h>
+#include <os.h>
+
+static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
+				struct kmsg_dump_detail *detail)
+{
+	static struct kmsg_dump_iter iter;
+	static DEFINE_SPINLOCK(lock);
+	static char line[1024];
+	struct console *con;
+	unsigned long flags;
+	size_t len = 0;
+	int cookie;
+
+	/*
+	 * If no consoles are available to output crash information, dump
+	 * the kmsg buffer to stdout.
+	 */
+
+	cookie = console_srcu_read_lock();
+	for_each_console_srcu(con) {
+		/*
+		 * The ttynull console and disabled consoles are ignored
+		 * since they cannot output. All other consoles are
+		 * expected to output the crash information.
+		 */
+		if (strcmp(con->name, "ttynull") != 0 &&
+		    console_is_usable(con, console_srcu_read_flags(con), true)) {
+			break;
+		}
+	}
+	console_srcu_read_unlock(cookie);
+	if (con)
+		return;
+
+	if (!spin_trylock_irqsave(&lock, flags))
+		return;
+
+	kmsg_dump_rewind(&iter);
+
+	printf("kmsg_dump:\n");
+	while (kmsg_dump_get_line(&iter, true, line, sizeof(line), &len)) {
+		line[len] = '\0';
+		printf("%s", line);
+	}
+
+	spin_unlock_irqrestore(&lock, flags);
+}
+
+static struct kmsg_dumper kmsg_dumper = {
+	.dump = kmsg_dumper_stdout
+};
+
+static int __init kmsg_dumper_stdout_init(void)
+{
+	return kmsg_dump_register(&kmsg_dumper);
+}
+
+__uml_postsetup(kmsg_dumper_stdout_init);
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 543c04756939..96314c31e61c 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -1,13 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/module.h>
 #include <os.h>
 
-EXPORT_SYMBOL(set_signals);
-EXPORT_SYMBOL(get_signals);
+EXPORT_SYMBOL(um_get_signals);
+EXPORT_SYMBOL(um_set_signals);
 
 EXPORT_SYMBOL(os_stat_fd);
 EXPORT_SYMBOL(os_stat_file);
@@ -33,12 +33,16 @@ EXPORT_SYMBOL(os_shutdown_socket);
 EXPORT_SYMBOL(os_create_unix_socket);
 EXPORT_SYMBOL(os_connect_socket);
 EXPORT_SYMBOL(os_accept_connection);
-EXPORT_SYMBOL(os_rcv_fd);
+EXPORT_SYMBOL(os_rcv_fd_msg);
 EXPORT_SYMBOL(run_helper);
 EXPORT_SYMBOL(os_major);
 EXPORT_SYMBOL(os_minor);
 EXPORT_SYMBOL(os_makedev);
+EXPORT_SYMBOL(os_eventfd);
+EXPORT_SYMBOL(os_sendmsg_fds);
 
 EXPORT_SYMBOL(add_sigio_fd);
 EXPORT_SYMBOL(ignore_sigio_fd);
 EXPORT_SYMBOL(sigio_broken);
+
+EXPORT_SYMBOL(syscall);
diff --git a/arch/um/kernel/load_file.c b/arch/um/kernel/load_file.c
new file mode 100644
index 000000000000..cb9d178ab7d8
--- /dev/null
+++ b/arch/um/kernel/load_file.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ */
+#include <linux/memblock.h>
+#include <os.h>
+
+#include "um_arch.h"
+
+static int __init __uml_load_file(const char *filename, void *buf, int size)
+{
+	int fd, n;
+
+	fd = os_open_file(filename, of_read(OPENFLAGS()), 0);
+	if (fd < 0) {
+		printk(KERN_ERR "Opening '%s' failed - err = %d\n", filename,
+		       -fd);
+		return -1;
+	}
+	n = os_read_file(fd, buf, size);
+	if (n != size) {
+		printk(KERN_ERR "Read of %d bytes from '%s' failed, "
+		       "err = %d\n", size,
+		       filename, -n);
+		return -1;
+	}
+
+	os_close_file(fd);
+	return 0;
+}
+
+void *uml_load_file(const char *filename, unsigned long long *size)
+{
+	void *area;
+	int err;
+
+	*size = 0;
+
+	if (!filename)
+		return NULL;
+
+	err = os_file_size(filename, size);
+	if (err)
+		return NULL;
+
+	if (*size == 0) {
+		printk(KERN_ERR "\"%s\" is empty\n", filename);
+		return NULL;
+	}
+
+	area = memblock_alloc_or_panic(*size, SMP_CACHE_BYTES);
+
+	if (__uml_load_file(filename, area, *size)) {
+		memblock_free(area, *size);
+		return NULL;
+	}
+
+	return area;
+}
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 7ddb64baf327..39c4a7e21c6f 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -1,29 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/stddef.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
-#include <linux/highmem.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
-#include <asm/fixmap.h>
+#include <linux/init.h>
+#include <asm/sections.h>
 #include <asm/page.h>
+#include <asm/pgalloc.h>
 #include <as-layout.h>
 #include <init.h>
 #include <kern.h>
 #include <kern_util.h>
 #include <mem_user.h>
 #include <os.h>
+#include <um_malloc.h>
+#include <linux/sched/task.h>
+#include <linux/kasan.h>
+
+#ifdef CONFIG_KASAN
+void __init kasan_init(void)
+{
+	/*
+	 * kasan_map_memory will map all of the required address space and
+	 * the host machine will allocate physical memory as necessary.
+	 */
+	kasan_map_memory((void *)KASAN_SHADOW_START, KASAN_SHADOW_SIZE);
+	init_task.kasan_depth = 0;
+	/*
+	 * Since kasan_init() is called before main(),
+	 * KASAN is initialized but the enablement is deferred after
+	 * jump_label_init(). See arch_mm_preinit().
+	 */
+}
+
+static void (*kasan_init_ptr)(void)
+__section(".kasan_init") __used
+= kasan_init;
+#endif
 
 /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
 unsigned long *empty_zero_page = NULL;
 EXPORT_SYMBOL(empty_zero_page);
-/* allocated in paging_init and unchanged thereafter */
-static unsigned long *empty_bad_page = NULL;
 
 /*
  * Initialized during boot, and readonly for initializing page tables
@@ -32,202 +55,47 @@ static unsigned long *empty_bad_page = NULL;
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 /* Initialized at boot time, and readonly after that */
-unsigned long long highmem;
 int kmalloc_ok = 0;
 
 /* Used during early boot */
 static unsigned long brk_end;
 
-#ifdef CONFIG_HIGHMEM
-static void setup_highmem(unsigned long highmem_start,
-			  unsigned long highmem_len)
+void __init arch_mm_preinit(void)
 {
-	unsigned long highmem_pfn;
-	int i;
+	/* Safe to call after jump_label_init(). Enables KASAN. */
+	kasan_init_generic();
 
-	highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT;
-	for (i = 0; i < highmem_len >> PAGE_SHIFT; i++)
-		free_highmem_page(&mem_map[highmem_pfn + i]);
-}
-#endif
-
-void __init mem_init(void)
-{
 	/* clear the zero-page */
 	memset(empty_zero_page, 0, PAGE_SIZE);
 
 	/* Map in the area just after the brk now that kmalloc is about
 	 * to be turned on.
 	 */
-	brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
+	brk_end = PAGE_ALIGN((unsigned long) sbrk(0));
 	map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
-	free_bootmem(__pa(brk_end), uml_reserved - brk_end);
+	memblock_free((void *)brk_end, uml_reserved - brk_end);
 	uml_reserved = brk_end;
-
-	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
-	max_low_pfn = totalram_pages;
-#ifdef CONFIG_HIGHMEM
-	setup_highmem(end_iomem, highmem);
-#endif
-	max_pfn = totalram_pages;
-	mem_init_print_info(NULL);
-	kmalloc_ok = 1;
-}
-
-/*
- * Create a page table and place a pointer to it in a middle page
- * directory entry.
- */
-static void __init one_page_table_init(pmd_t *pmd)
-{
-	if (pmd_none(*pmd)) {
-		pte_t *pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-		set_pmd(pmd, __pmd(_KERNPG_TABLE +
-					   (unsigned long) __pa(pte)));
-		if (pte != pte_offset_kernel(pmd, 0))
-			BUG();
-	}
-}
-
-static void __init one_md_table_init(pud_t *pud)
-{
-#ifdef CONFIG_3_LEVEL_PGTABLES
-	pmd_t *pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-	set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table)));
-	if (pmd_table != pmd_offset(pud, 0))
-		BUG();
-#endif
-}
-
-static void __init fixrange_init(unsigned long start, unsigned long end,
-				 pgd_t *pgd_base)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	int i, j;
-	unsigned long vaddr;
-
-	vaddr = start;
-	i = pgd_index(vaddr);
-	j = pmd_index(vaddr);
-	pgd = pgd_base + i;
-
-	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
-		pud = pud_offset(pgd, vaddr);
-		if (pud_none(*pud))
-			one_md_table_init(pud);
-		pmd = pmd_offset(pud, vaddr);
-		for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) {
-			one_page_table_init(pmd);
-			vaddr += PMD_SIZE;
-		}
-		j = 0;
-	}
-}
-
-#ifdef CONFIG_HIGHMEM
-pte_t *kmap_pte;
-pgprot_t kmap_prot;
-
-#define kmap_get_fixmap_pte(vaddr)					\
-	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\
-				     (vaddr)), (vaddr))
-
-static void __init kmap_init(void)
-{
-	unsigned long kmap_vstart;
-
-	/* cache the first kmap pte */
-	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
-	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
-	kmap_prot = PAGE_KERNEL;
-}
-
-static void __init init_highmem(void)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long vaddr;
-
-	/*
-	 * Permanent kmaps:
-	 */
-	vaddr = PKMAP_BASE;
-	fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir);
-
-	pgd = swapper_pg_dir + pgd_index(vaddr);
-	pud = pud_offset(pgd, vaddr);
-	pmd = pmd_offset(pud, vaddr);
-	pte = pte_offset_kernel(pmd, vaddr);
-	pkmap_page_table = pte;
-
-	kmap_init();
+	min_low_pfn = PFN_UP(__pa(uml_reserved));
+	max_pfn = max_low_pfn;
 }
-#endif /* CONFIG_HIGHMEM */
 
-static void __init fixaddr_user_init( void)
+void __init mem_init(void)
 {
-#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
-	long size = FIXADDR_USER_END - FIXADDR_USER_START;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	phys_t p;
-	unsigned long v, vaddr = FIXADDR_USER_START;
-
-	if (!size)
-		return;
-
-	fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir);
-	v = (unsigned long) alloc_bootmem_low_pages(size);
-	memcpy((void *) v , (void *) FIXADDR_USER_START, size);
-	p = __pa(v);
-	for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE,
-		      p += PAGE_SIZE) {
-		pgd = swapper_pg_dir + pgd_index(vaddr);
-		pud = pud_offset(pgd, vaddr);
-		pmd = pmd_offset(pud, vaddr);
-		pte = pte_offset_kernel(pmd, vaddr);
-		pte_set_val(*pte, p, PAGE_READONLY);
-	}
-#endif
+	kmalloc_ok = 1;
 }
 
 void __init paging_init(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES], vaddr;
-	int i;
+	unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
 
-	empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
-	empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
-	for (i = 0; i < ARRAY_SIZE(zones_size); i++)
-		zones_size[i] = 0;
+	empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE,
+							       PAGE_SIZE);
+	if (!empty_zero_page)
+		panic("%s: Failed to allocate %lu bytes align=%lx\n",
+		      __func__, PAGE_SIZE, PAGE_SIZE);
 
-	zones_size[ZONE_NORMAL] = (end_iomem >> PAGE_SHIFT) -
-		(uml_physmem >> PAGE_SHIFT);
-#ifdef CONFIG_HIGHMEM
-	zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT;
-#endif
-	free_area_init(zones_size);
-
-	/*
-	 * Fixed mappings, only the page table structure has to be
-	 * created - mappings will be set by set_fixmap():
-	 */
-	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-	fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir);
-
-	fixaddr_user_init();
-
-#ifdef CONFIG_HIGHMEM
-	init_highmem();
-#endif
+	max_zone_pfn[ZONE_NORMAL] = high_physmem >> PAGE_SHIFT;
+	free_area_init(max_zone_pfn);
 }
 
 /*
@@ -239,64 +107,49 @@ void free_initmem(void)
 {
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-	free_reserved_area((void *)start, (void *)end, -1, "initrd");
-}
-#endif
-
 /* Allocate and free page tables. */
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+	pgd_t *pgd = __pgd_alloc(mm, 0);
 
-	if (pgd) {
-		memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+	if (pgd)
 		memcpy(pgd + USER_PTRS_PER_PGD,
 		       swapper_pg_dir + USER_PTRS_PER_PGD,
 		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
-	}
-	return pgd;
-}
-
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	free_page((unsigned long) pgd);
-}
-
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-{
-	pte_t *pte;
-
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-	return pte;
-}
-
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	struct page *pte;
-
-	pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-	if (pte)
-		pgtable_page_ctor(pte);
-	return pte;
-}
-
-#ifdef CONFIG_3_LEVEL_PGTABLES
-pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
 
-	if (pmd)
-		memset(pmd, 0, PAGE_SIZE);
-
-	return pmd;
+	return pgd;
 }
-#endif
 
 void *uml_kmalloc(int size, int flags)
 {
 	return kmalloc(size, flags);
 }
+
+static const pgprot_t protection_map[16] = {
+	[VM_NONE]					= PAGE_NONE,
+	[VM_READ]					= PAGE_READONLY,
+	[VM_WRITE]					= PAGE_COPY,
+	[VM_WRITE | VM_READ]				= PAGE_COPY,
+	[VM_EXEC]					= PAGE_READONLY,
+	[VM_EXEC | VM_READ]				= PAGE_READONLY,
+	[VM_EXEC | VM_WRITE]				= PAGE_COPY,
+	[VM_EXEC | VM_WRITE | VM_READ]			= PAGE_COPY,
+	[VM_SHARED]					= PAGE_NONE,
+	[VM_SHARED | VM_READ]				= PAGE_READONLY,
+	[VM_SHARED | VM_WRITE]				= PAGE_SHARED,
+	[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_SHARED,
+	[VM_SHARED | VM_EXEC]				= PAGE_READONLY,
+	[VM_SHARED | VM_EXEC | VM_READ]			= PAGE_READONLY,
+	[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_SHARED,
+	[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_SHARED
+};
+DECLARE_VM_GET_PAGE_PROT
+
+void mark_rodata_ro(void)
+{
+	unsigned long rodata_start = PFN_ALIGN(__start_rodata);
+	unsigned long rodata_end = PFN_ALIGN(__end_rodata);
+
+	os_protect_memory((void *)rodata_start, rodata_end - rodata_start, 1, 0, 0);
+}
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index f116db15d402..ae6ca373c261 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -1,16 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
 #include <asm/page.h>
+#include <asm/sections.h>
 #include <as-layout.h>
 #include <init.h>
 #include <kern.h>
+#include <kern_util.h>
 #include <mem_user.h>
 #include <os.h>
 
@@ -20,43 +22,6 @@ static int physmem_fd = -1;
 unsigned long high_physmem;
 EXPORT_SYMBOL(high_physmem);
 
-extern unsigned long long physmem_size;
-
-int __init init_maps(unsigned long physmem, unsigned long iomem,
-		     unsigned long highmem)
-{
-	struct page *p, *map;
-	unsigned long phys_len, phys_pages, highmem_len, highmem_pages;
-	unsigned long iomem_len, iomem_pages, total_len, total_pages;
-	int i;
-
-	phys_pages = physmem >> PAGE_SHIFT;
-	phys_len = phys_pages * sizeof(struct page);
-
-	iomem_pages = iomem >> PAGE_SHIFT;
-	iomem_len = iomem_pages * sizeof(struct page);
-
-	highmem_pages = highmem >> PAGE_SHIFT;
-	highmem_len = highmem_pages * sizeof(struct page);
-
-	total_pages = phys_pages + iomem_pages + highmem_pages;
-	total_len = phys_len + iomem_len + highmem_len;
-
-	map = alloc_bootmem_low_pages(total_len);
-	if (map == NULL)
-		return -ENOMEM;
-
-	for (i = 0; i < total_pages; i++) {
-		p = &map[i];
-		memset(p, 0, sizeof(struct page));
-		SetPageReserved(p);
-		INIT_LIST_HEAD(&p->lru);
-	}
-
-	max_mapnr = total_pages;
-	return 0;
-}
-
 void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
 		int r, int w, int x)
 {
@@ -75,25 +40,46 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
 	}
 }
 
-extern int __syscall_stub_start;
-
+/**
+ * setup_physmem() - Setup physical memory for UML
+ * @start:	Start address of the physical kernel memory,
+ *		i.e start address of the executable image.
+ * @reserve_end:	end address of the physical kernel memory.
+ * @len:	Length of total physical memory that should be mapped/made
+ *		available, in bytes.
+ *
+ * Creates an unlinked temporary file of size (len) and memory maps
+ * it on the last executable image address (uml_reserved).
+ *
+ * The offset is needed as the length of the total physical memory
+ * (len) includes the size of the memory used be the executable image,
+ * but the mapped-to address is the last address of the executable image
+ * (uml_reserved == end address of executable image).
+ *
+ * The memory mapped memory of the temporary file is used as backing memory
+ * of all user space processes/kernel tasks.
+ */
 void __init setup_physmem(unsigned long start, unsigned long reserve_end,
-			  unsigned long len, unsigned long long highmem)
+			  unsigned long len)
 {
 	unsigned long reserve = reserve_end - start;
-	int pfn = PFN_UP(__pa(reserve_end));
-	int delta = (len - reserve) >> PAGE_SHIFT;
-	int err, offset, bootmap_size;
+	unsigned long map_size = len - reserve;
+	int err;
+
+	if (len <= reserve) {
+		os_warn("Too few physical memory! Needed=%lu, given=%lu\n",
+			reserve, len);
+		exit(1);
+	}
 
-	physmem_fd = create_mem_file(len + highmem);
+	physmem_fd = create_mem_file(len);
 
-	offset = uml_reserved - uml_physmem;
-	err = os_map_memory((void *) uml_reserved, physmem_fd, offset,
-			    len - offset, 1, 1, 1);
+	err = os_map_memory((void *) reserve_end, physmem_fd, reserve,
+			    map_size, 1, 1, 1);
 	if (err < 0) {
-		printf("setup_physmem - mapping %ld bytes of memory at 0x%p "
-		       "failed - errno = %d\n", len - offset,
-		       (void *) uml_reserved, err);
+		os_warn("setup_physmem - mapping %lu bytes of memory at 0x%p "
+			"failed - errno = %d\n", map_size,
+			(void *) reserve_end, err);
 		exit(1);
 	}
 
@@ -101,12 +87,14 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
 	 * Special kludge - This page will be mapped in to userspace processes
 	 * from physmem_fd, so it needs to be written out there.
 	 */
-	os_seek_file(physmem_fd, __pa(&__syscall_stub_start));
-	os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE);
+	os_seek_file(physmem_fd, __pa(__syscall_stub_start));
+	os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE);
+
+	memblock_add(__pa(start), len);
+	memblock_reserve(__pa(start), reserve);
 
-	bootmap_size = init_bootmem(pfn, pfn + delta);
-	free_bootmem(__pa(reserve_end) + bootmap_size,
-		     len - bootmap_size - reserve);
+	min_low_pfn = PFN_UP(__pa(reserve_end));
+	max_low_pfn = min_low_pfn + (map_size >> PAGE_SHIFT);
 }
 
 int phys_mapping(unsigned long phys, unsigned long long *offset_out)
@@ -117,30 +105,16 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
 		fd = physmem_fd;
 		*offset_out = phys;
 	}
-	else if (phys < __pa(end_iomem)) {
-		struct iomem_region *region = iomem_regions;
-
-		while (region != NULL) {
-			if ((phys >= region->phys) &&
-			    (phys < region->phys + region->size)) {
-				fd = region->fd;
-				*offset_out = phys - region->phys;
-				break;
-			}
-			region = region->next;
-		}
-	}
-	else if (phys < __pa(end_iomem) + highmem) {
-		fd = physmem_fd;
-		*offset_out = phys - iomem_size;
-	}
 
 	return fd;
 }
+EXPORT_SYMBOL(phys_mapping);
 
 static int __init uml_mem_setup(char *line, int *add)
 {
 	char *retptr;
+
+	*add = 0;
 	physmem_size = memparse(line,&retptr);
 	return 0;
 }
@@ -153,63 +127,3 @@ __uml_setup("mem=", uml_mem_setup,
 "    be more, and the excess, if it's ever used, will just be swapped out.\n"
 "	Example: mem=64M\n\n"
 );
-
-extern int __init parse_iomem(char *str, int *add);
-
-__uml_setup("iomem=", parse_iomem,
-"iomem=<name>,<file>\n"
-"    Configure <file> as an IO memory region named <name>.\n\n"
-);
-
-/*
- * This list is constructed in parse_iomem and addresses filled in in
- * setup_iomem, both of which run during early boot.  Afterwards, it's
- * unchanged.
- */
-struct iomem_region *iomem_regions;
-
-/* Initialized in parse_iomem and unchanged thereafter */
-int iomem_size;
-
-unsigned long find_iomem(char *driver, unsigned long *len_out)
-{
-	struct iomem_region *region = iomem_regions;
-
-	while (region != NULL) {
-		if (!strcmp(region->driver, driver)) {
-			*len_out = region->size;
-			return region->virt;
-		}
-
-		region = region->next;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(find_iomem);
-
-static int setup_iomem(void)
-{
-	struct iomem_region *region = iomem_regions;
-	unsigned long iomem_start = high_physmem + PAGE_SIZE;
-	int err;
-
-	while (region != NULL) {
-		err = os_map_memory((void *) iomem_start, region->fd, 0,
-				    region->size, 1, 1, 0);
-		if (err)
-			printk(KERN_ERR "Mapping iomem region for driver '%s' "
-			       "failed, errno = %d\n", region->driver, -err);
-		else {
-			region->virt = iomem_start;
-			region->phys = __pa(region->virt);
-		}
-
-		iomem_start += region->size + PAGE_SIZE;
-		region = region->next;
-	}
-
-	return 0;
-}
-
-__initcall(setup_iomem);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index bbcef522bcb1..63b38a3f73f7 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -1,7 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright 2003 PathScale, Inc.
- * Licensed under the GPL
  */
 
 #include <linux/stddef.h>
@@ -13,44 +15,38 @@
 #include <linux/proc_fs.h>
 #include <linux/ptrace.h>
 #include <linux/random.h>
+#include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/tick.h>
 #include <linux/threads.h>
-#include <linux/tracehook.h>
+#include <linux/resume_user_mode.h>
 #include <asm/current.h>
-#include <asm/pgtable.h>
 #include <asm/mmu_context.h>
-#include <asm/uaccess.h>
+#include <asm/switch_to.h>
+#include <asm/exec.h>
+#include <linux/uaccess.h>
 #include <as-layout.h>
 #include <kern_util.h>
 #include <os.h>
 #include <skas.h>
+#include <registers.h>
+#include <linux/time-internal.h>
+#include <linux/elfcore.h>
 
 /*
  * This is a per-cpu array.  A processor only modifies its entry and it only
  * cares about its entry, so it's OK if another processor is modifying its
  * entry.
  */
-struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } };
-
-static inline int external_pid(void)
-{
-	/* FIXME: Need to look up userspace_pid by cpu */
-	return userspace_pid[0];
-}
-
-int pid_to_processor_id(int pid)
-{
-	int i;
-
-	for (i = 0; i < ncpus; i++) {
-		if (cpu_tasks[i].pid == pid)
-			return i;
-	}
-	return -1;
-}
+struct task_struct *cpu_tasks[NR_CPUS] = {
+	[0 ... NR_CPUS - 1] = &init_task,
+};
+EXPORT_SYMBOL(cpu_tasks);
 
 void free_stack(unsigned long stack, int order)
 {
@@ -71,46 +67,35 @@ unsigned long alloc_stack(int order, int atomic)
 
 static inline void set_current(struct task_struct *task)
 {
-	cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task)
-		{ external_pid(), task });
+	cpu_tasks[task_thread_info(task)->cpu] = task;
 }
 
-extern void arch_switch_to(struct task_struct *to);
-
-void *__switch_to(struct task_struct *from, struct task_struct *to)
+struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to)
 {
 	to->thread.prev_sched = from;
 	set_current(to);
 
-	do {
-		current->thread.saved_task = NULL;
-
-		switch_threads(&from->thread.switch_buf,
-			       &to->thread.switch_buf);
-
-		arch_switch_to(current);
-
-		if (current->thread.saved_task)
-			show_regs(&(current->thread.regs));
-		to = current->thread.saved_task;
-		from = current;
-	} while (current->thread.saved_task);
+	switch_threads(&from->thread.switch_buf, &to->thread.switch_buf);
+	arch_switch_to(current);
 
 	return current->thread.prev_sched;
 }
 
 void interrupt_end(void)
 {
-	if (need_resched())
-		schedule();
-	if (test_thread_flag(TIF_SIGPENDING))
-		do_signal();
-	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
-		tracehook_notify_resume(&current->thread.regs);
-}
-
-void exit_thread(void)
-{
+	struct pt_regs *regs = &current->thread.regs;
+	unsigned long thread_flags;
+
+	thread_flags = read_thread_flags();
+	while (thread_flags & _TIF_WORK_MASK) {
+		if (thread_flags & _TIF_NEED_RESCHED)
+			schedule();
+		if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
+			do_signal(regs);
+		if (thread_flags & _TIF_NOTIFY_RESUME)
+			resume_user_mode_work(regs);
+		thread_flags = read_thread_flags();
+	}
 }
 
 int get_current_pid(void)
@@ -124,28 +109,26 @@ int get_current_pid(void)
  */
 void new_thread_handler(void)
 {
-	int (*fn)(void *), n;
+	int (*fn)(void *);
 	void *arg;
 
 	if (current->thread.prev_sched != NULL)
 		schedule_tail(current->thread.prev_sched);
 	current->thread.prev_sched = NULL;
 
-	fn = current->thread.request.u.thread.proc;
-	arg = current->thread.request.u.thread.arg;
+	fn = current->thread.request.thread.proc;
+	arg = current->thread.request.thread.arg;
 
 	/*
 	 * callback returns only if the kernel thread execs a process
 	 */
-	n = fn(arg);
+	fn(arg);
 	userspace(&current->thread.regs.regs);
 }
 
 /* Called magically, see new_thread_handler above */
-void fork_handler(void)
+static void fork_handler(void)
 {
-	force_flush_all();
-
 	schedule_tail(current->thread.prev_sched);
 
 	/*
@@ -160,16 +143,17 @@ void fork_handler(void)
 	userspace(&current->thread.regs.regs);
 }
 
-int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg, struct task_struct * p)
+int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
 {
+	u64 clone_flags = args->flags;
+	unsigned long sp = args->stack;
+	unsigned long tls = args->tls;
 	void (*handler)(void);
-	int kthread = current->flags & PF_KTHREAD;
 	int ret = 0;
 
 	p->thread = (struct thread_struct) INIT_THREAD;
 
-	if (!kthread) {
+	if (!args->fn) {
 	  	memcpy(&p->thread.regs.regs, current_pt_regs(),
 		       sizeof(p->thread.regs.regs));
 		PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0);
@@ -181,21 +165,21 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
 	} else {
 		get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
-		p->thread.request.u.thread.proc = (int (*)(void *))sp;
-		p->thread.request.u.thread.arg = (void *)arg;
+		p->thread.request.thread.proc = args->fn;
+		p->thread.request.thread.arg = args->fn_arg;
 		handler = new_thread_handler;
 	}
 
 	new_thread(task_stack_page(p), &p->thread.switch_buf, handler);
 
-	if (!kthread) {
+	if (!args->fn) {
 		clear_flushed_tls(p);
 
 		/*
 		 * Set a new TLS for the child thread?
 		 */
 		if (clone_flags & CLONE_SETTLS)
-			ret = arch_copy_tls(p);
+			ret = arch_set_tls(p, tls);
 	}
 
 	return ret;
@@ -203,34 +187,50 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 
 void initial_thread_cb(void (*proc)(void *), void *arg)
 {
-	int save_kmalloc_ok = kmalloc_ok;
-
-	kmalloc_ok = 0;
 	initial_thread_cb_skas(proc, arg);
-	kmalloc_ok = save_kmalloc_ok;
+}
+
+int arch_dup_task_struct(struct task_struct *dst,
+			 struct task_struct *src)
+{
+	/* init_task is not dynamically sized (missing FPU state) */
+	if (unlikely(src == &init_task)) {
+		memcpy(dst, src, sizeof(init_task));
+		memset((void *)dst + sizeof(init_task), 0,
+		       arch_task_struct_size - sizeof(init_task));
+	} else {
+		memcpy(dst, src, arch_task_struct_size);
+	}
+
+	return 0;
+}
+
+void um_idle_sleep(void)
+{
+	if (time_travel_mode != TT_MODE_OFF)
+		time_travel_sleep();
+	else
+		os_idle_sleep();
 }
 
 void arch_cpu_idle(void)
 {
-	unsigned long long nsecs;
+	um_idle_sleep();
+}
 
-	cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
-	nsecs = disable_timer();
-	idle_sleep(nsecs);
-	local_irq_enable();
+void arch_cpu_idle_prepare(void)
+{
+	os_idle_prepare();
 }
 
-int __cant_sleep(void) {
+int __uml_cant_sleep(void) {
 	return in_atomic() || irqs_disabled() || in_interrupt();
 	/* Is in_interrupt() really needed? */
 }
 
-int user_context(unsigned long sp)
+int uml_need_resched(void)
 {
-	unsigned long stack;
-
-	stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER);
-	return stack != (unsigned long) current_thread_info();
+	return need_resched();
 }
 
 extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end;
@@ -250,127 +250,20 @@ char *uml_strdup(const char *string)
 }
 EXPORT_SYMBOL(uml_strdup);
 
-int copy_to_user_proc(void __user *to, void *from, int size)
-{
-	return copy_to_user(to, from, size);
-}
-
 int copy_from_user_proc(void *to, void __user *from, int size)
 {
 	return copy_from_user(to, from, size);
 }
 
-int clear_user_proc(void __user *buf, int size)
-{
-	return clear_user(buf, size);
-}
-
-int strlen_user_proc(char __user *str)
-{
-	return strlen_user(str);
-}
-
-int smp_sigio_handler(void)
-{
-#ifdef CONFIG_SMP
-	int cpu = current_thread_info()->cpu;
-	IPI_handler(cpu);
-	if (cpu != 0)
-		return 1;
-#endif
-	return 0;
-}
-
-int cpu(void)
+int singlestepping(void)
 {
-	return current_thread_info()->cpu;
-}
-
-static atomic_t using_sysemu = ATOMIC_INIT(0);
-int sysemu_supported;
-
-void set_using_sysemu(int value)
-{
-	if (value > sysemu_supported)
-		return;
-	atomic_set(&using_sysemu, value);
-}
-
-int get_using_sysemu(void)
-{
-	return atomic_read(&using_sysemu);
-}
-
-static int sysemu_proc_show(struct seq_file *m, void *v)
-{
-	seq_printf(m, "%d\n", get_using_sysemu());
-	return 0;
-}
-
-static int sysemu_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sysemu_proc_show, NULL);
-}
-
-static ssize_t sysemu_proc_write(struct file *file, const char __user *buf,
-				 size_t count, loff_t *pos)
-{
-	char tmp[2];
-
-	if (copy_from_user(tmp, buf, 1))
-		return -EFAULT;
-
-	if (tmp[0] >= '0' && tmp[0] <= '2')
-		set_using_sysemu(tmp[0] - '0');
-	/* We use the first char, but pretend to write everything */
-	return count;
-}
-
-static const struct file_operations sysemu_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= sysemu_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= sysemu_proc_write,
-};
-
-int __init make_proc_sysemu(void)
-{
-	struct proc_dir_entry *ent;
-	if (!sysemu_supported)
-		return 0;
-
-	ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_fops);
-
-	if (ent == NULL)
-	{
-		printk(KERN_WARNING "Failed to register /proc/sysemu\n");
-		return 0;
-	}
-
-	return 0;
-}
-
-late_initcall(make_proc_sysemu);
-
-int singlestepping(void * t)
-{
-	struct task_struct *task = t ? t : current;
-
-	if (!(task->ptrace & PT_DTRACE))
-		return 0;
-
-	if (task->thread.singlestep_syscall)
-		return 1;
-
-	return 2;
+	return test_thread_flag(TIF_SINGLESTEP);
 }
 
 /*
  * Only x86 and x86_64 have an arch_align_stack().
  * All other arches have "#define arch_align_stack(x) (x)"
- * in their asm/system.h
+ * in their asm/exec.h
  * As this is included in UML from asm-um/system-generic.h,
  * we can use it to behave as the subarch does.
  */
@@ -378,19 +271,16 @@ int singlestepping(void * t)
 unsigned long arch_align_stack(unsigned long sp)
 {
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
+		sp -= get_random_u32_below(8192);
 	return sp & ~0xf;
 }
 #endif
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long stack_page, sp, ip;
 	bool seen_sched = 0;
 
-	if ((p == NULL) || (p == current) || (p->state == TASK_RUNNING))
-		return 0;
-
 	stack_page = (unsigned long) task_stack_page(p);
 	/* Bail if the process has no kernel stack for some reason */
 	if (stack_page == 0)
@@ -417,11 +307,3 @@ unsigned long get_wchan(struct task_struct *p)
 
 	return 0;
 }
-
-int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
-{
-	int cpu = current_thread_info()->cpu;
-
-	return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu);
-}
-
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 694d551c8899..fdbb37b5c399 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -1,21 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/audit.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
-#include <linux/tracehook.h>
-#include <asm/uaccess.h>
-#include <skas_ptrace.h>
-
+#include <linux/uaccess.h>
+#include <asm/ptrace-abi.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
 
 void user_enable_single_step(struct task_struct *child)
 {
-	child->ptrace |= PT_DTRACE;
-	child->thread.singlestep_syscall = 0;
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
 
 #ifdef SUBARCH_SET_SINGLESTEPPING
 	SUBARCH_SET_SINGLESTEPPING(child, 1);
@@ -24,8 +23,7 @@ void user_enable_single_step(struct task_struct *child)
 
 void user_disable_single_step(struct task_struct *child)
 {
-	child->ptrace &= ~PT_DTRACE;
-	child->thread.singlestep_syscall = 0;
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
 
 #ifdef SUBARCH_SET_SINGLESTEPPING
 	SUBARCH_SET_SINGLESTEPPING(child, 0);
@@ -40,9 +38,6 @@ void ptrace_disable(struct task_struct *child)
 	user_disable_single_step(child);
 }
 
-extern int peek_user(struct task_struct * child, long addr, long data);
-extern int poke_user(struct task_struct * child, long addr, long data);
-
 long arch_ptrace(struct task_struct *child, long request,
 		 unsigned long addr, unsigned long data)
 {
@@ -68,7 +63,7 @@ long arch_ptrace(struct task_struct *child, long request,
 
 #ifdef PTRACE_GETREGS
 	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-		if (!access_ok(VERIFY_WRITE, p, MAX_REG_OFFSET)) {
+		if (!access_ok(p, MAX_REG_OFFSET)) {
 			ret = -EIO;
 			break;
 		}
@@ -83,7 +78,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #ifdef PTRACE_SETREGS
 	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
 		unsigned long tmp = 0;
-		if (!access_ok(VERIFY_READ, p, MAX_REG_OFFSET)) {
+		if (!access_ok(p, MAX_REG_OFFSET)) {
 			ret = -EIO;
 			break;
 		}
@@ -104,35 +99,6 @@ long arch_ptrace(struct task_struct *child, long request,
 		ret = ptrace_set_thread_area(child, addr, vp);
 		break;
 
-	case PTRACE_FAULTINFO: {
-		/*
-		 * Take the info from thread->arch->faultinfo,
-		 * but transfer max. sizeof(struct ptrace_faultinfo).
-		 * On i386, ptrace_faultinfo is smaller!
-		 */
-		ret = copy_to_user(p, &child->thread.arch.faultinfo,
-				   sizeof(struct ptrace_faultinfo)) ?
-			-EIO : 0;
-		break;
-	}
-
-#ifdef PTRACE_LDT
-	case PTRACE_LDT: {
-		struct ptrace_ldt ldt;
-
-		if (copy_from_user(&ldt, p, sizeof(ldt))) {
-			ret = -EIO;
-			break;
-		}
-
-		/*
-		 * This one is confusing, so just punt and return -EIO for
-		 * now
-		 */
-		ret = -EIO;
-		break;
-	}
-#endif
 	default:
 		ret = ptrace_request(child, request, addr, data);
 		if (ret == -EIO)
@@ -143,39 +109,33 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
-		  int error_code)
+static void send_sigtrap(struct uml_pt_regs *regs, int error_code)
 {
-	struct siginfo info;
-
-	memset(&info, 0, sizeof(info));
-	info.si_signo = SIGTRAP;
-	info.si_code = TRAP_BRKPT;
-
-	/* User-mode eip? */
-	info.si_addr = UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL;
-
 	/* Send us the fake SIGTRAP */
-	force_sig_info(SIGTRAP, &info, tsk);
+	force_sig_fault(SIGTRAP, TRAP_BRKPT,
+			/* User-mode eip? */
+			UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL);
 }
 
 /*
- * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and
+ * XXX Check TIF_SINGLESTEP for singlestepping check and
  * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check
  */
-void syscall_trace_enter(struct pt_regs *regs)
+int syscall_trace_enter(struct pt_regs *regs)
 {
-	audit_syscall_entry(HOST_AUDIT_ARCH,
-			    UPT_SYSCALL_NR(&regs->regs),
+	audit_syscall_entry(UPT_SYSCALL_NR(&regs->regs),
 			    UPT_SYSCALL_ARG1(&regs->regs),
 			    UPT_SYSCALL_ARG2(&regs->regs),
 			    UPT_SYSCALL_ARG3(&regs->regs),
 			    UPT_SYSCALL_ARG4(&regs->regs));
 
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_enter(regs, UPT_SYSCALL_NR(&regs->regs));
+
 	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
+		return 0;
 
-	tracehook_report_syscall_entry(regs);
+	return ptrace_report_syscall_entry(regs);
 }
 
 void syscall_trace_leave(struct pt_regs *regs)
@@ -185,13 +145,16 @@ void syscall_trace_leave(struct pt_regs *regs)
 	audit_syscall_exit(regs);
 
 	/* Fake a debug trap */
-	if (ptraced & PT_DTRACE)
-		send_sigtrap(current, &regs->regs, 0);
+	if (test_thread_flag(TIF_SINGLESTEP))
+		send_sigtrap(&regs->regs, 0);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_exit(regs, PT_REGS_SYSCALL_RET(regs));
 
 	if (!test_thread_flag(TIF_SYSCALL_TRACE))
 		return;
 
-	tracehook_report_syscall_exit(regs, 0);
+	ptrace_report_syscall_exit(regs, 0);
 	/* force do_signal() --> is_syscall() */
 	if (ptraced & PT_PTRACED)
 		set_thread_flag(TIF_SIGPENDING);
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index ced8903921ae..680bce4bd8fa 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -1,42 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
 /* 
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/sched/mm.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/oom.h>
+#include <linux/reboot.h>
 #include <kern_util.h>
 #include <os.h>
 #include <skas.h>
 
 void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
 
 static void kill_off_processes(void)
 {
-	if (proc_mm)
-		/*
-		 * FIXME: need to loop over userspace_pids
-		 */
-		os_kill_ptraced_process(userspace_pid[0], 1);
-	else {
-		struct task_struct *p;
-		int pid;
+	struct task_struct *p;
+	int pid;
 
-		read_lock(&tasklist_lock);
-		for_each_process(p) {
-			struct task_struct *t;
+	read_lock(&tasklist_lock);
+	for_each_process(p) {
+		struct task_struct *t;
 
-			t = find_lock_task_mm(p);
-			if (!t)
-				continue;
-			pid = t->mm->context.id.u.pid;
-			task_unlock(t);
-			os_kill_ptraced_process(pid, 1);
-		}
-		read_unlock(&tasklist_lock);
+		t = find_lock_task_mm(p);
+		if (!t)
+			continue;
+		pid = t->mm->context.id.pid;
+		task_unlock(t);
+		os_kill_ptraced_process(pid, 1);
 	}
+	read_unlock(&tasklist_lock);
 }
 
 void uml_cleanup(void)
@@ -62,3 +59,18 @@ void machine_halt(void)
 {
 	machine_power_off();
 }
+
+static int sys_power_off_handler(struct sys_off_data *data)
+{
+	machine_power_off();
+	return 0;
+}
+
+static int register_power_off(void)
+{
+	register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+				 SYS_OFF_PRIO_DEFAULT,
+				 sys_power_off_handler, NULL);
+	return 0;
+}
+__initcall(register_power_off);
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index b5e0cbb34382..4fc04742048a 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
- * Licensed under the GPL
  */
 
 #include <linux/interrupt.h>
@@ -8,42 +8,15 @@
 #include <os.h>
 #include <sigio.h>
 
-/* Protected by sigio_lock() called from write_sigio_workaround */
-static int sigio_irq_fd = -1;
-
-static irqreturn_t sigio_interrupt(int irq, void *data)
-{
-	char c;
-
-	os_read_file(sigio_irq_fd, &c, sizeof(c));
-	reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
-	return IRQ_HANDLED;
-}
-
-int write_sigio_irq(int fd)
-{
-	int err;
-
-	err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt,
-			     0, "write sigio", NULL);
-	if (err) {
-		printk(KERN_ERR "write_sigio_irq : um_request_irq failed, "
-		       "err = %d\n", err);
-		return -1;
-	}
-	sigio_irq_fd = fd;
-	return 0;
-}
-
 /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */
-static DEFINE_SPINLOCK(sigio_spinlock);
+static DEFINE_MUTEX(sigio_mutex);
 
 void sigio_lock(void)
 {
-	spin_lock(&sigio_spinlock);
+	mutex_lock(&sigio_mutex);
 }
 
 void sigio_unlock(void)
 {
-	spin_unlock(&sigio_spinlock);
+	mutex_unlock(&sigio_mutex);
 }
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 3e831b3fd07b..a56b44522766 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -1,32 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/ftrace.h>
 #include <asm/siginfo.h>
 #include <asm/signal.h>
 #include <asm/unistd.h>
 #include <frame_kern.h>
 #include <kern_util.h>
+#include <os.h>
 
 EXPORT_SYMBOL(block_signals);
 EXPORT_SYMBOL(unblock_signals);
 
+void block_signals_trace(void)
+{
+	block_signals();
+	if (current_thread_info())
+		trace_hardirqs_off();
+}
+
+void unblock_signals_trace(void)
+{
+	if (current_thread_info())
+		trace_hardirqs_on();
+	unblock_signals();
+}
+
+void um_trace_signals_on(void)
+{
+	if (current_thread_info())
+		trace_hardirqs_on();
+}
+
+void um_trace_signals_off(void)
+{
+	if (current_thread_info())
+		trace_hardirqs_off();
+}
+
 /*
  * OK, we're invoking a handler
  */
-static void handle_signal(struct pt_regs *regs, unsigned long signr,
-			 struct k_sigaction *ka, siginfo_t *info)
+static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 {
 	sigset_t *oldset = sigmask_to_save();
 	int singlestep = 0;
 	unsigned long sp;
 	int err;
 
-	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+	if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED))
 		singlestep = 1;
 
 	/* Did we come from a system call? */
@@ -39,11 +66,11 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 			break;
 
 		case -ERESTARTSYS:
-			if (!(ka->sa.sa_flags & SA_RESTART)) {
+			if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
 				PT_REGS_SYSCALL_RET(regs) = -EINTR;
 				break;
 			}
-		/* fallthrough */
+			fallthrough;
 		case -ERESTARTNOINTR:
 			PT_REGS_RESTART_SYSCALL(regs);
 			PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs);
@@ -52,32 +79,28 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 	}
 
 	sp = PT_REGS_SP(regs);
-	if ((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0))
+	if ((ksig->ka.sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0))
 		sp = current->sas_ss_sp + current->sas_ss_size;
 
 #ifdef CONFIG_ARCH_HAS_SC_SIGNALS
-	if (!(ka->sa.sa_flags & SA_SIGINFO))
-		err = setup_signal_stack_sc(sp, signr, ka, regs, oldset);
+	if (!(ksig->ka.sa.sa_flags & SA_SIGINFO))
+		err = setup_signal_stack_sc(sp, ksig, regs, oldset);
 	else
 #endif
-		err = setup_signal_stack_si(sp, signr, ka, regs, info, oldset);
+		err = setup_signal_stack_si(sp, ksig, regs, oldset);
 
-	if (err)
-		force_sigsegv(signr, current);
-	else
-		signal_delivered(signr, info, ka, regs, singlestep);
+	signal_setup_done(err, ksig, singlestep);
 }
 
-static int kern_do_signal(struct pt_regs *regs)
+void do_signal(struct pt_regs *regs)
 {
-	struct k_sigaction ka_copy;
-	siginfo_t info;
-	int sig, handled_sig = 0;
+	struct ksignal ksig;
+	int handled_sig = 0;
 
-	while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) {
+	while (get_signal(&ksig)) {
 		handled_sig = 1;
 		/* Whee!  Actually deliver the signal.  */
-		handle_signal(regs, sig, &ka_copy, &info);
+		handle_signal(&ksig, regs);
 	}
 
 	/* Did we come from a system call? */
@@ -98,27 +121,9 @@ static int kern_do_signal(struct pt_regs *regs)
 	}
 
 	/*
-	 * This closes a way to execute a system call on the host.  If
-	 * you set a breakpoint on a system call instruction and singlestep
-	 * from it, the tracing thread used to PTRACE_SINGLESTEP the process
-	 * rather than PTRACE_SYSCALL it, allowing the system call to execute
-	 * on the host.  The tracing thread will check this flag and
-	 * PTRACE_SYSCALL if necessary.
-	 */
-	if (current->ptrace & PT_DTRACE)
-		current->thread.singlestep_syscall =
-			is_syscall(PT_REGS_IP(&current->thread.regs));
-
-	/*
 	 * if there's no signal to deliver, we just put the saved sigmask
 	 * back
 	 */
 	if (!handled_sig)
 		restore_saved_sigmask();
-	return handled_sig;
-}
-
-int do_signal(void)
-{
-	return kern_do_signal(&current->thread.regs);
 }
diff --git a/arch/um/kernel/skas/.gitignore b/arch/um/kernel/skas/.gitignore
new file mode 100644
index 000000000000..c3409ced0f38
--- /dev/null
+++ b/arch/um/kernel/skas/.gitignore
@@ -0,0 +1,2 @@
+stub_exe
+stub_exe.dbg
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index 0b76d8869c94..3384be42691f 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -1,15 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
-# Licensed under the GPL
 #
 
-obj-y := clone.o mmu.o process.o syscall.o uaccess.o
+obj-y := stub.o mmu.o process.o syscall.o uaccess.o \
+	 stub_exe_embed.o
 
-# clone.o is in the stub, so it can't be built with profiling
+# Stub executable
+
+stub_exe_objs-y := stub_exe.o
+
+stub_exe_objs := $(foreach F,$(stub_exe_objs-y),$(obj)/$F)
+
+# Object file containing the ELF executable
+$(obj)/stub_exe_embed.o: $(src)/stub_exe_embed.S $(obj)/stub_exe
+
+$(obj)/stub_exe.dbg: $(stub_exe_objs) FORCE
+	$(call if_changed,stub_exe)
+
+$(obj)/stub_exe: OBJCOPYFLAGS := -S
+$(obj)/stub_exe: $(obj)/stub_exe.dbg FORCE
+	$(call if_changed,objcopy)
+
+quiet_cmd_stub_exe = STUB_EXE $@
+      cmd_stub_exe = $(CC) -nostdlib -o $@ \
+			   $(filter-out $(UM_GPROF_OPT) $(UM_GCOV_OPT),$(KBUILD_CFLAGS)) $(STUB_EXE_LDFLAGS) \
+			   $(filter %.o,$^)
+
+STUB_EXE_LDFLAGS = -Wl,-n -static
+
+targets += stub_exe.dbg stub_exe $(stub_exe_objs-y)
+
+# end
+
+# stub.o is in the stub, so it can't be built with profiling
 # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
 # disable it
 
-CFLAGS_clone.o := $(CFLAGS_NO_HARDENING)
-UNPROFILE_OBJS := clone.o
+CFLAGS_stub.o := $(CFLAGS_NO_HARDENING)
+CFLAGS_stub_exe.o := $(CFLAGS_NO_HARDENING)
+
+# Clang will call memset() from __builtin_alloca() when stack variable
+# initialization is enabled, which is used in stub_exe.c.
+CFLAGS_stub_exe.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
+
+UNPROFILE_OBJS := stub.o stub_exe.o
+KCOV_INSTRUMENT := n
 
-include arch/um/scripts/Makefile.rules
+include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
deleted file mode 100644
index 289771dadf81..000000000000
--- a/arch/um/kernel/skas/clone.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <signal.h>
-#include <sched.h>
-#include <asm/unistd.h>
-#include <sys/time.h>
-#include <as-layout.h>
-#include <ptrace_user.h>
-#include <stub-data.h>
-#include <sysdep/stub.h>
-
-/*
- * This is in a separate file because it needs to be compiled with any
- * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled
- *
- * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize
- * on some systems.
- */
-
-void __attribute__ ((__section__ (".__syscall_stub")))
-stub_clone_handler(void)
-{
-	struct stub_data *data = (struct stub_data *) STUB_DATA;
-	long err;
-
-	err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
-			    STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *));
-	if (err != 0)
-		goto out;
-
-	err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
-	if (err)
-		goto out;
-
-	err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL,
-			    (long) &data->timer, 0);
-	if (err)
-		goto out;
-
-	remap_stack(data->fd, data->offset);
-	goto done;
-
- out:
-	/*
-	 * save current result.
-	 * Parent: pid;
-	 * child: retcode of mmap already saved and it jumps around this
-	 * assignment
-	 */
-	data->err = err;
- done:
-	trap_myself();
-}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index ff03067a3b14..00957788591b 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -1,178 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
+
+#include <shared/irq_kern.h>
 #include <asm/pgalloc.h>
-#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/mmu_context.h>
 #include <as-layout.h>
 #include <os.h>
 #include <skas.h>
+#include <stub-data.h>
 
-extern int __syscall_stub_start;
+/* Ensure the stub_data struct covers the allocated area */
+static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
 
-static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
-			 unsigned long kernel)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	pgd = pgd_offset(mm, proc);
-	pud = pud_alloc(mm, pgd, proc);
-	if (!pud)
-		goto out;
+static spinlock_t mm_list_lock;
+static struct list_head mm_list;
 
-	pmd = pmd_alloc(mm, pud, proc);
-	if (!pmd)
-		goto out_pmd;
+void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile)
+{
+	struct mm_context *ctx = container_of(mm_id, struct mm_context, id);
 
-	pte = pte_alloc_map(mm, NULL, pmd, proc);
-	if (!pte)
-		goto out_pte;
+	mutex_lock(&ctx->turnstile);
+}
 
-	*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
-	*pte = pte_mkread(*pte);
-	return 0;
+void exit_turnstile(struct mm_id *mm_id) __releases(turnstile)
+{
+	struct mm_context *ctx = container_of(mm_id, struct mm_context, id);
 
- out_pte:
-	pmd_free(mm, pmd);
- out_pmd:
-	pud_free(mm, pud);
- out:
-	return -ENOMEM;
+	mutex_unlock(&ctx->turnstile);
 }
 
 int init_new_context(struct task_struct *task, struct mm_struct *mm)
 {
- 	struct mm_context *from_mm = NULL;
-	struct mm_context *to_mm = &mm->context;
+	struct mm_id *new_id = &mm->context.id;
 	unsigned long stack = 0;
 	int ret = -ENOMEM;
 
-	if (skas_needs_stub) {
-		stack = get_zeroed_page(GFP_KERNEL);
-		if (stack == 0)
-			goto out;
-	}
+	mutex_init(&mm->context.turnstile);
+	spin_lock_init(&mm->context.sync_tlb_lock);
 
-	to_mm->id.stack = stack;
-	if (current->mm != NULL && current->mm != &init_mm)
-		from_mm = &current->mm->context;
+	stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES));
+	if (stack == 0)
+		goto out;
 
-	if (proc_mm) {
-		ret = new_mm(stack);
-		if (ret < 0) {
-			printk(KERN_ERR "init_new_context_skas - "
-			       "new_mm failed, errno = %d\n", ret);
-			goto out_free;
-		}
-		to_mm->id.u.mm_fd = ret;
-	}
-	else {
-		if (from_mm)
-			to_mm->id.u.pid = copy_context_skas0(stack,
-							     from_mm->id.u.pid);
-		else to_mm->id.u.pid = start_userspace(stack);
-
-		if (to_mm->id.u.pid < 0) {
-			ret = to_mm->id.u.pid;
-			goto out_free;
-		}
+	new_id->stack = stack;
+	new_id->syscall_data_len = 0;
+	new_id->syscall_fd_num = 0;
+
+	scoped_guard(spinlock_irqsave, &mm_list_lock) {
+		/* Insert into list, used for lookups when the child dies */
+		list_add(&mm->context.list, &mm_list);
 	}
 
-	ret = init_new_ldt(to_mm, from_mm);
-	if (ret < 0) {
-		printk(KERN_ERR "init_new_context_skas - init_ldt"
-		       " failed, errno = %d\n", ret);
+	ret = start_userspace(new_id);
+	if (ret < 0)
 		goto out_free;
-	}
+
+	/* Ensure the new MM is clean and nothing unwanted is mapped */
+	unmap(new_id, 0, STUB_START);
 
 	return 0;
 
  out_free:
-	if (to_mm->id.stack != 0)
-		free_page(to_mm->id.stack);
+	free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
  out:
 	return ret;
 }
 
-void uml_setup_stubs(struct mm_struct *mm)
+void destroy_context(struct mm_struct *mm)
 {
-	int err, ret;
+	struct mm_context *mmu = &mm->context;
 
-	if (!skas_needs_stub)
+	/*
+	 * If init_new_context wasn't called, this will be
+	 * zero, resulting in a kill(0), which will result in the
+	 * whole UML suddenly dying.  Also, cover negative and
+	 * 1 cases, since they shouldn't happen either.
+	 *
+	 * Negative cases happen if the child died unexpectedly.
+	 */
+	if (mmu->id.pid >= 0 && mmu->id.pid < 2) {
+		printk(KERN_ERR "corrupt mm_context - pid = %d\n",
+		       mmu->id.pid);
 		return;
+	}
 
-	ret = init_stub_pte(mm, STUB_CODE,
-			    (unsigned long) &__syscall_stub_start);
-	if (ret)
-		goto out;
-
-	ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack);
-	if (ret)
-		goto out;
-
-	mm->context.stub_pages[0] = virt_to_page(&__syscall_stub_start);
-	mm->context.stub_pages[1] = virt_to_page(mm->context.id.stack);
+	scoped_guard(spinlock_irqsave, &mm_list_lock)
+		list_del(&mm->context.list);
 
-	/* dup_mmap already holds mmap_sem */
-	err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
-				      VM_READ | VM_MAYREAD | VM_EXEC |
-				      VM_MAYEXEC | VM_DONTCOPY,
-				      mm->context.stub_pages);
-	if (err) {
-		printk(KERN_ERR "install_special_mapping returned %d\n", err);
-		goto out;
+	if (mmu->id.pid > 0) {
+		os_kill_ptraced_process(mmu->id.pid, 1);
+		mmu->id.pid = -1;
 	}
-	return;
 
-out:
-	force_sigsegv(SIGSEGV, current);
+	if (using_seccomp && mmu->id.sock)
+		os_close_file(mmu->id.sock);
+
+	free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
 }
 
-void arch_exit_mmap(struct mm_struct *mm)
+static irqreturn_t mm_sigchld_irq(int irq, void* dev)
 {
-	pte_t *pte;
+	struct mm_context *mm_context;
+	pid_t pid;
 
-	pte = virt_to_pte(mm, STUB_CODE);
-	if (pte != NULL)
-		pte_clear(mm, STUB_CODE, pte);
+	guard(spinlock)(&mm_list_lock);
 
-	pte = virt_to_pte(mm, STUB_DATA);
-	if (pte == NULL)
-		return;
+	while ((pid = os_reap_child()) > 0) {
+		/*
+		* A child died, check if we have an MM with the PID. This is
+		* only relevant in SECCOMP mode (as ptrace will fail anyway).
+		*
+		* See wait_stub_done_seccomp for more details.
+		*/
+		list_for_each_entry(mm_context, &mm_list, list) {
+			if (mm_context->id.pid == pid) {
+				struct stub_data *stub_data;
+				printk("Unexpectedly lost MM child! Affected tasks will segfault.");
+
+				/* Marks the MM as dead */
+				mm_context->id.pid = -1;
+
+				stub_data = (void *)mm_context->id.stack;
+				stub_data->futex = FUTEX_IN_KERN;
+#if IS_ENABLED(CONFIG_SMP)
+				os_futex_wake(&stub_data->futex);
+#endif
+
+				/*
+				 * NOTE: Currently executing syscalls by
+				 * affected tasks may finish normally.
+				 */
+				break;
+			}
+		}
+	}
 
-	pte_clear(mm, STUB_DATA, pte);
+	return IRQ_HANDLED;
 }
 
-void destroy_context(struct mm_struct *mm)
+static int __init init_child_tracking(void)
 {
-	struct mm_context *mmu = &mm->context;
+	int err;
 
-	if (proc_mm)
-		os_close_file(mmu->id.u.mm_fd);
-	else {
-		/*
-		 * If init_new_context wasn't called, this will be
-		 * zero, resulting in a kill(0), which will result in the
-		 * whole UML suddenly dying.  Also, cover negative and
-		 * 1 cases, since they shouldn't happen either.
-		 */
-		if (mmu->id.u.pid < 2) {
-			printk(KERN_ERR "corrupt mm_context - pid = %d\n",
-			       mmu->id.u.pid);
-			return;
-		}
-		os_kill_ptraced_process(mmu->id.u.pid, 1);
-	}
+	spin_lock_init(&mm_list_lock);
+	INIT_LIST_HEAD(&mm_list);
 
-	if (skas_needs_stub)
-		free_page(mmu->id.stack);
+	err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL);
+	if (err < 0)
+		panic("Failed to register SIGCHLD IRQ: %d", err);
 
-	free_ldt(mmu);
+	return 0;
 }
+early_initcall(init_child_tracking)
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 4da11b3c8ddb..4a7673b0261a 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -1,73 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/task.h>
+#include <linux/smp-internal.h>
+
+#include <asm/tlbflush.h>
+
 #include <as-layout.h>
 #include <kern.h>
 #include <os.h>
 #include <skas.h>
-
-int new_mm(unsigned long stack)
-{
-	int fd, err;
-
-	fd = os_open_file("/proc/mm", of_cloexec(of_write(OPENFLAGS())), 0);
-	if (fd < 0)
-		return fd;
-
-	if (skas_needs_stub) {
-		err = map_stub_pages(fd, STUB_CODE, STUB_DATA, stack);
-		if (err) {
-			os_close_file(fd);
-			return err;
-		}
-	}
-
-	return fd;
-}
+#include <kern_util.h>
 
 extern void start_kernel(void);
 
 static int __init start_kernel_proc(void *unused)
 {
-	int pid;
-
-	block_signals();
-	pid = os_getpid();
+	block_signals_trace();
 
-	cpu_tasks[0].pid = pid;
-	cpu_tasks[0].task = current;
-#ifdef CONFIG_SMP
-	init_cpu_online(get_cpu_mask(0));
-#endif
 	start_kernel();
 	return 0;
 }
 
-extern int userspace_pid[];
-
-extern char cpu0_irqstack[];
+char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE);
 
 int __init start_uml(void)
 {
-	stack_protections((unsigned long) &cpu0_irqstack);
-	set_sigstack(cpu0_irqstack, THREAD_SIZE);
-	if (proc_mm) {
-		userspace_pid[0] = start_userspace(0);
-		if (userspace_pid[0] < 0) {
-			printf("start_uml - start_userspace returned %d\n",
-			       userspace_pid[0]);
-			exit(1);
-		}
-	}
+	stack_protections((unsigned long) &cpu_irqstacks[0]);
+	set_sigstack(cpu_irqstacks[0], THREAD_SIZE);
 
 	init_new_thread_signals();
 
-	init_task.thread.request.u.thread.proc = start_kernel_proc;
-	init_task.thread.request.u.thread.arg = NULL;
+	init_task.thread.request.thread.proc = start_kernel_proc;
+	init_task.thread.request.thread.arg = NULL;
 	return start_idle_thread(task_stack_page(&init_task),
 				 &init_task.thread.switch_buf);
 }
@@ -79,3 +49,31 @@ unsigned long current_stub_stack(void)
 
 	return current->mm->context.id.stack;
 }
+
+struct mm_id *current_mm_id(void)
+{
+	if (current->mm == NULL)
+		return NULL;
+
+	return &current->mm->context.id;
+}
+
+void current_mm_sync(void)
+{
+	if (current->mm == NULL)
+		return;
+
+	um_tlb_sync(current->mm);
+}
+
+static DEFINE_SPINLOCK(initial_jmpbuf_spinlock);
+
+void initial_jmpbuf_lock(void)
+{
+	spin_lock_irq(&initial_jmpbuf_spinlock);
+}
+
+void initial_jmpbuf_unlock(void)
+{
+	spin_unlock_irq(&initial_jmpbuf_spinlock);
+}
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
new file mode 100644
index 000000000000..67cab46a602c
--- /dev/null
+++ b/arch/um/kernel/skas/stub.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
+ */
+
+#include <sysdep/stub.h>
+
+#include <linux/futex.h>
+#include <sys/socket.h>
+#include <errno.h>
+
+/*
+ * Known security issues
+ *
+ * Userspace can jump to this address to execute *any* syscall that is
+ * permitted by the stub. As we will return afterwards, it can do
+ * whatever it likes, including:
+ * - Tricking the kernel into handing out the memory FD
+ * - Using this memory FD to read/write all physical memory
+ * - Running in parallel to the kernel processing a syscall
+ *   (possibly creating data races?)
+ * - Blocking e.g. SIGALRM to avoid time based scheduling
+ *
+ * To avoid this, the permitted location for each syscall needs to be
+ * checked for in the SECCOMP filter (which is reasonably simple). Also,
+ * more care will need to go into considerations how the code might be
+ * tricked by using a prepared stack (or even modifying the stack from
+ * another thread in case SMP support is added).
+ *
+ * As for the SIGALRM, the best counter measure will be to check in the
+ * kernel that the process is reporting back the SIGALRM in a timely
+ * fashion.
+ */
+static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS])
+{
+	struct stub_data *d = get_stub_data();
+	int i;
+	unsigned long res;
+	int fd;
+
+	for (i = 0; i < d->syscall_data_len; i++) {
+		struct stub_syscall *sc = &d->syscall_data[i];
+
+		switch (sc->syscall) {
+		case STUB_SYSCALL_MMAP:
+			if (fd_map)
+				fd = fd_map[sc->mem.fd];
+			else
+				fd = sc->mem.fd;
+
+			res = stub_syscall6(STUB_MMAP_NR,
+					    sc->mem.addr, sc->mem.length,
+					    sc->mem.prot,
+					    MAP_SHARED | MAP_FIXED,
+					    fd, sc->mem.offset);
+			if (res != sc->mem.addr) {
+				d->err = res;
+				d->syscall_data_len = i;
+				return -1;
+			}
+			break;
+		case STUB_SYSCALL_MUNMAP:
+			res = stub_syscall2(__NR_munmap,
+					    sc->mem.addr, sc->mem.length);
+			if (res) {
+				d->err = res;
+				d->syscall_data_len = i;
+				return -1;
+			}
+			break;
+		default:
+			d->err = -95; /* EOPNOTSUPP */
+			d->syscall_data_len = i;
+			return -1;
+		}
+	}
+
+	d->err = 0;
+	d->syscall_data_len = 0;
+
+	return 0;
+}
+
+void __section(".__syscall_stub")
+stub_syscall_handler(void)
+{
+	syscall_handler(NULL);
+
+	trap_myself();
+}
+
+void __section(".__syscall_stub")
+stub_signal_interrupt(int sig, siginfo_t *info, void *p)
+{
+	struct stub_data *d = get_stub_data();
+	char rcv_data;
+	union {
+		char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)];
+		struct cmsghdr align;
+	} ctrl = {};
+	struct iovec iov = {
+		.iov_base = &rcv_data,
+		.iov_len = 1,
+	};
+	struct msghdr msghdr = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = &ctrl,
+		.msg_controllen = sizeof(ctrl),
+	};
+	ucontext_t *uc = p;
+	struct cmsghdr *fd_msg;
+	int *fd_map;
+	int num_fds;
+	long res;
+
+	d->signal = sig;
+	d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0];
+	d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0];
+
+restart_wait:
+	d->futex = FUTEX_IN_KERN;
+	do {
+		res = stub_syscall3(__NR_futex, (unsigned long)&d->futex,
+				    FUTEX_WAKE, 1);
+	} while (res == -EINTR);
+
+	do {
+		res = stub_syscall4(__NR_futex, (unsigned long)&d->futex,
+				    FUTEX_WAIT, FUTEX_IN_KERN, 0);
+	} while (res == -EINTR || d->futex == FUTEX_IN_KERN);
+
+	if (res < 0 && res != -EAGAIN)
+		stub_syscall1(__NR_exit_group, 1);
+
+	if (d->syscall_data_len) {
+		/* Read passed FDs (if any) */
+		do {
+			res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0);
+		} while (res == -EINTR);
+
+		/* We should never have a receive error (other than -EAGAIN) */
+		if (res < 0 && res != -EAGAIN)
+			stub_syscall1(__NR_exit_group, 1);
+
+		/* Receive the FDs */
+		num_fds = 0;
+		fd_msg = msghdr.msg_control;
+		fd_map = (void *)&CMSG_DATA(fd_msg);
+		if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr))
+			num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+		/* Try running queued syscalls. */
+		res = syscall_handler(fd_map);
+
+		while (num_fds)
+			stub_syscall2(__NR_close, fd_map[--num_fds], 0);
+	} else {
+		res = 0;
+	}
+
+	if (res < 0 || d->restart_wait) {
+		/* Report SIGSYS if we restart. */
+		d->signal = SIGSYS;
+		d->restart_wait = 0;
+
+		goto restart_wait;
+	}
+
+	/* Restore arch dependent state that is not part of the mcontext */
+	stub_seccomp_restore_state(&d->arch_data);
+
+	/* Return so that the host modified mcontext is restored. */
+}
+
+void __section(".__syscall_stub")
+stub_signal_restorer(void)
+{
+	/* We must not have anything on the stack when doing rt_sigreturn */
+	stub_syscall0(__NR_rt_sigreturn);
+}
diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c
new file mode 100644
index 000000000000..cbafaa684e66
--- /dev/null
+++ b/arch/um/kernel/skas/stub_exe.c
@@ -0,0 +1,230 @@
+#include <sys/ptrace.h>
+#include <sys/prctl.h>
+#include <sys/fcntl.h>
+#include <asm/unistd.h>
+#include <sysdep/stub.h>
+#include <stub-data.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <generated/asm-offsets.h>
+
+void _start(void);
+
+noinline static void real_init(void)
+{
+	struct stub_init_data init_data;
+	unsigned long res;
+	struct {
+		void  *ss_sp;
+		int    ss_flags;
+		size_t ss_size;
+	} stack = {
+		.ss_size = STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
+	};
+	struct {
+		void *sa_handler_;
+		unsigned long sa_flags;
+		void *sa_restorer;
+		unsigned long long sa_mask;
+	} sa = {
+		/* Need to set SA_RESTORER (but the handler never returns) */
+		.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
+	};
+
+	/* set a nice name */
+	stub_syscall2(__NR_prctl, PR_SET_NAME, (unsigned long)"uml-userspace");
+
+	/* Make sure this process dies if the kernel dies */
+	stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
+
+	/* Needed in SECCOMP mode (and safe to do anyway) */
+	stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+
+	/* read information from STDIN and close it */
+	res = stub_syscall3(__NR_read, 0,
+			    (unsigned long)&init_data, sizeof(init_data));
+	if (res != sizeof(init_data))
+		stub_syscall1(__NR_exit, 10);
+
+	/* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */
+	if (!init_data.seccomp)
+		stub_syscall1(__NR_close, 0);
+	else
+		stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK);
+
+	/* map stub code + data */
+	res = stub_syscall6(STUB_MMAP_NR,
+			    init_data.stub_start, UM_KERN_PAGE_SIZE,
+			    PROT_READ | PROT_EXEC, MAP_FIXED | MAP_SHARED,
+			    init_data.stub_code_fd, init_data.stub_code_offset);
+	if (res != init_data.stub_start)
+		stub_syscall1(__NR_exit, 11);
+
+	res = stub_syscall6(STUB_MMAP_NR,
+			    init_data.stub_start + UM_KERN_PAGE_SIZE,
+			    STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
+			    PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
+			    init_data.stub_data_fd, init_data.stub_data_offset);
+	if (res != init_data.stub_start + UM_KERN_PAGE_SIZE)
+		stub_syscall1(__NR_exit, 12);
+
+	/* In SECCOMP mode, we only need the signalling FD from now on */
+	if (init_data.seccomp) {
+		res = stub_syscall3(__NR_close_range, 1, ~0U, 0);
+		if (res != 0)
+			stub_syscall1(__NR_exit, 13);
+	}
+
+	/* setup signal stack inside stub data */
+	stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
+	stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
+
+	/* register signal handlers */
+	sa.sa_handler_ = (void *) init_data.signal_handler;
+	sa.sa_restorer = (void *) init_data.signal_restorer;
+	if (!init_data.seccomp) {
+		/* In ptrace mode, the SIGSEGV handler never returns */
+		sa.sa_mask = 0;
+
+		res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
+				    (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+		if (res != 0)
+			stub_syscall1(__NR_exit, 14);
+	} else {
+		/* SECCOMP mode uses rt_sigreturn, need to mask all signals */
+		sa.sa_mask = ~0ULL;
+
+		res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
+				    (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+		if (res != 0)
+			stub_syscall1(__NR_exit, 15);
+
+		res = stub_syscall4(__NR_rt_sigaction, SIGSYS,
+				    (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+		if (res != 0)
+			stub_syscall1(__NR_exit, 16);
+
+		res = stub_syscall4(__NR_rt_sigaction, SIGALRM,
+				    (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+		if (res != 0)
+			stub_syscall1(__NR_exit, 17);
+
+		res = stub_syscall4(__NR_rt_sigaction, SIGTRAP,
+				    (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+		if (res != 0)
+			stub_syscall1(__NR_exit, 18);
+
+		res = stub_syscall4(__NR_rt_sigaction, SIGILL,
+				    (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+		if (res != 0)
+			stub_syscall1(__NR_exit, 19);
+
+		res = stub_syscall4(__NR_rt_sigaction, SIGFPE,
+				    (unsigned long)&sa, 0, sizeof(sa.sa_mask));
+		if (res != 0)
+			stub_syscall1(__NR_exit, 20);
+	}
+
+	/*
+	 * If in seccomp mode, install the SECCOMP filter and trigger a syscall.
+	 * Otherwise set PTRACE_TRACEME and do a SIGSTOP.
+	 */
+	if (init_data.seccomp) {
+		struct sock_filter filter[] = {
+#if __BITS_PER_LONG > 32
+			/* [0] Load upper 32bit of instruction pointer from seccomp_data */
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 (offsetof(struct seccomp_data, instruction_pointer) + 4)),
+
+			/* [1] Jump forward 3 instructions if the upper address is not identical */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3),
+#endif
+			/* [2] Load lower 32bit of instruction pointer from seccomp_data */
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 (offsetof(struct seccomp_data, instruction_pointer))),
+
+			/* [3] Mask out lower bits */
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
+
+			/* [4] Jump to [6] if the lower bits are not on the expected page */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0),
+
+			/* [5] Trap call, allow */
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+
+			/* [6,7] Check architecture */
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 offsetof(struct seccomp_data, arch)),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
+				 UM_SECCOMP_ARCH_NATIVE, 1, 0),
+
+			/* [8] Kill (for architecture check) */
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+
+			/* [9] Load syscall number */
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 offsetof(struct seccomp_data, nr)),
+
+			/* [10-16] Check against permitted syscalls */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex,
+				 7, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg,
+				 6, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close,
+				 5, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
+				 4, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap,
+				 3, 0),
+#ifdef __i386__
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area,
+				 2, 0),
+#else
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl,
+				 2, 0),
+#endif
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn,
+				 1, 0),
+
+			/* [17] Not one of the permitted syscalls */
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
+
+			/* [18] Permitted call for the stub */
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+		};
+		struct sock_fprog prog = {
+			.len = sizeof(filter) / sizeof(filter[0]),
+			.filter = filter,
+		};
+
+		if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+				  SECCOMP_FILTER_FLAG_TSYNC,
+				  (unsigned long)&prog) != 0)
+			stub_syscall1(__NR_exit, 21);
+
+		/* Fall through, the exit syscall will cause SIGSYS */
+	} else {
+		stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
+
+		stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
+	}
+
+	stub_syscall1(__NR_exit, 30);
+
+	__builtin_unreachable();
+}
+
+__attribute__((naked)) void _start(void)
+{
+	/*
+	 * Since the stack after exec() starts at the top-most address,
+	 * but that's exactly where we also want to map the stub data
+	 * and code, this must:
+	 *  - push the stack by 1 code and STUB_DATA_PAGES data pages
+	 *  - call real_init()
+	 * This way, real_init() can use the stack normally, while the
+	 * original stack further down (higher address) will become
+	 * inaccessible after the mmap() calls above.
+	 */
+	stub_start(real_init);
+}
diff --git a/arch/um/kernel/skas/stub_exe_embed.S b/arch/um/kernel/skas/stub_exe_embed.S
new file mode 100644
index 000000000000..6d8914fbe8f1
--- /dev/null
+++ b/arch/um/kernel/skas/stub_exe_embed.S
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+
+__INITDATA
+
+SYM_DATA_START(stub_exe_start)
+	.incbin "arch/um/kernel/skas/stub_exe"
+SYM_DATA_END_LABEL(stub_exe_start, SYM_L_GLOBAL, stub_exe_end)
+
+__FINIT
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index c0681e097432..ba7494f9bfe4 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -1,40 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
+#include <linux/seccomp.h>
 #include <kern_util.h>
 #include <sysdep/ptrace.h>
-#include <sysdep/syscalls.h>
-
-extern int syscall_table_size;
-#define NR_SYSCALLS (syscall_table_size / sizeof(void *))
+#include <sysdep/ptrace_user.h>
+#include <linux/time-internal.h>
+#include <asm/syscall.h>
+#include <asm/unistd.h>
+#include <asm/delay.h>
 
 void handle_syscall(struct uml_pt_regs *r)
 {
 	struct pt_regs *regs = container_of(r, struct pt_regs, regs);
-	long result;
 	int syscall;
 
-	syscall_trace_enter(regs);
+	/* Initialize the syscall number and default return value. */
+	UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp);
+	PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS);
+
+	if (syscall_trace_enter(regs))
+		goto out;
+
+	/* Do the seccomp check after ptrace; failures should be fast. */
+	if (secure_computing() == -1)
+		goto out;
+
+	syscall = UPT_SYSCALL_NR(r);
 
 	/*
-	 * This should go in the declaration of syscall, but when I do that,
-	 * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing
-	 * children at all, sometimes hanging when bash doesn't see the first
-	 * ls exit.
-	 * The assembly looks functionally the same to me.  This is
-	 *     gcc version 4.0.1 20050727 (Red Hat 4.0.1-5)
-	 * in case it's a compiler bug.
+	 * If no time passes, then sched_yield may not actually yield, causing
+	 * broken spinlock implementations in userspace (ASAN) to hang for long
+	 * periods of time.
 	 */
-	syscall = UPT_SYSCALL_NR(r);
-	if ((syscall >= NR_SYSCALLS) || (syscall < 0))
-		result = -ENOSYS;
-	else result = EXECUTE_SYSCALL(syscall, regs);
+	if ((time_travel_mode == TT_MODE_INFCPU ||
+	     time_travel_mode == TT_MODE_EXTERNAL) &&
+	    syscall == __NR_sched_yield)
+		tt_extra_sched_jiffies += 1;
+
+	if (syscall >= 0 && syscall < __NR_syscalls) {
+		unsigned long ret;
+
+		ret = (*sys_call_table[syscall])(UPT_SYSCALL_ARG1(&regs->regs),
+						 UPT_SYSCALL_ARG2(&regs->regs),
+						 UPT_SYSCALL_ARG3(&regs->regs),
+						 UPT_SYSCALL_ARG4(&regs->regs),
+						 UPT_SYSCALL_ARG5(&regs->regs),
+						 UPT_SYSCALL_ARG6(&regs->regs));
+
+		PT_REGS_SET_SYSCALL_RETURN(regs, ret);
 
-	PT_REGS_SET_SYSCALL_RETURN(regs, result);
+		/*
+		 * An error value here can be some form of -ERESTARTSYS
+		 * and then we'd just loop. Make any error syscalls take
+		 * some time, so that it won't just loop if something is
+		 * not ready, and hopefully other things will make some
+		 * progress.
+		 */
+		if (IS_ERR_VALUE(ret) &&
+		    (time_travel_mode == TT_MODE_INFCPU ||
+		     time_travel_mode == TT_MODE_EXTERNAL)) {
+			um_udelay(1);
+			schedule();
+		}
+	}
 
+out:
 	syscall_trace_leave(regs);
 }
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 1d3e0c17340b..198269e384c4 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/err.h>
@@ -10,13 +10,14 @@
 #include <linux/sched.h>
 #include <asm/current.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
 #include <kern_util.h>
+#include <asm/futex.h>
 #include <os.h>
 
 pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -27,7 +28,11 @@ pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
 	if (!pgd_present(*pgd))
 		return NULL;
 
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (!p4d_present(*p4d))
+		return NULL;
+
+	pud = pud_offset(p4d, addr);
 	if (!pud_present(*pud))
 		return NULL;
 
@@ -59,38 +64,38 @@ static pte_t *maybe_map(unsigned long virt, int is_write)
 static int do_op_one_page(unsigned long addr, int len, int is_write,
 		 int (*op)(unsigned long addr, int len, void *arg), void *arg)
 {
-	jmp_buf buf;
 	struct page *page;
 	pte_t *pte;
-	int n, faulted;
+	int n;
 
 	pte = maybe_map(addr, is_write);
 	if (pte == NULL)
 		return -1;
 
 	page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+	pagefault_disable();
+	addr = (unsigned long) page_address(page) +
+		(addr & ~PAGE_MASK);
+#else
 	addr = (unsigned long) kmap_atomic(page) +
 		(addr & ~PAGE_MASK);
+#endif
+	n = (*op)(addr, len, arg);
 
-	current->thread.fault_catcher = &buf;
-
-	faulted = UML_SETJMP(&buf);
-	if (faulted == 0)
-		n = (*op)(addr, len, arg);
-	else
-		n = -1;
-
-	current->thread.fault_catcher = NULL;
-
+#ifdef CONFIG_64BIT
+	pagefault_enable();
+#else
 	kunmap_atomic((void *)addr);
+#endif
 
 	return n;
 }
 
-static int buffer_op(unsigned long addr, int len, int is_write,
-		     int (*op)(unsigned long, int, void *), void *arg)
+static long buffer_op(unsigned long addr, int len, int is_write,
+		      int (*op)(unsigned long, int, void *), void *arg)
 {
-	int size, remain, n;
+	long size, remain, n;
 
 	size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len);
 	remain = len;
@@ -139,18 +144,11 @@ static int copy_chunk_from_user(unsigned long from, int len, void *arg)
 	return 0;
 }
 
-int copy_from_user(void *to, const void __user *from, int n)
+unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (segment_eq(get_fs(), KERNEL_DS)) {
-		memcpy(to, (__force void*)from, n);
-		return 0;
-	}
-
-	return access_ok(VERIFY_READ, from, n) ?
-	       buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to):
-	       n;
+	return buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to);
 }
-EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(raw_copy_from_user);
 
 static int copy_chunk_to_user(unsigned long to, int len, void *arg)
 {
@@ -161,18 +159,11 @@ static int copy_chunk_to_user(unsigned long to, int len, void *arg)
 	return 0;
 }
 
-int copy_to_user(void __user *to, const void *from, int n)
+unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (segment_eq(get_fs(), KERNEL_DS)) {
-		memcpy((__force void *) to, from, n);
-		return 0;
-	}
-
-	return access_ok(VERIFY_WRITE, to, n) ?
-	       buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) :
-	       n;
+	return buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from);
 }
-EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(raw_copy_to_user);
 
 static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
 {
@@ -188,19 +179,13 @@ static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
 	return 0;
 }
 
-int strncpy_from_user(char *dst, const char __user *src, int count)
+long strncpy_from_user(char *dst, const char __user *src, long count)
 {
-	int n;
+	long n;
 	char *ptr = dst;
 
-	if (segment_eq(get_fs(), KERNEL_DS)) {
-		strncpy(dst, (__force void *) src, count);
-		return strnlen(dst, count);
-	}
-
-	if (!access_ok(VERIFY_READ, src, 1))
+	if (!access_ok(src, 1))
 		return -EFAULT;
-
 	n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user,
 		      &ptr);
 	if (n != 0)
@@ -215,22 +200,11 @@ static int clear_chunk(unsigned long addr, int len, void *unused)
 	return 0;
 }
 
-int __clear_user(void __user *mem, int len)
+unsigned long __clear_user(void __user *mem, unsigned long len)
 {
 	return buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL);
 }
-
-int clear_user(void __user *mem, int len)
-{
-	if (segment_eq(get_fs(), KERNEL_DS)) {
-		memset((__force void*)mem, 0, len);
-		return 0;
-	}
-
-	return access_ok(VERIFY_WRITE, mem, len) ?
-	       buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len;
-}
-EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
 
 static int strnlen_chunk(unsigned long str, int len, void *arg)
 {
@@ -244,16 +218,151 @@ static int strnlen_chunk(unsigned long str, int len, void *arg)
 	return 0;
 }
 
-int strnlen_user(const void __user *str, int len)
+long strnlen_user(const char __user *str, long len)
 {
 	int count = 0, n;
 
-	if (segment_eq(get_fs(), KERNEL_DS))
-		return strnlen((__force char*)str, len) + 1;
-
+	if (!access_ok(str, 1))
+		return -EFAULT;
 	n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
 	if (n == 0)
 		return count + 1;
-	return -EFAULT;
+	return 0;
 }
 EXPORT_SYMBOL(strnlen_user);
+
+/**
+ * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
+ *			  argument and comparison of the previous
+ *			  futex value with another constant.
+ *
+ * @op:		operation to execute
+ * @oparg:	argument to operation
+ * @oval:	old value at uaddr
+ * @uaddr:	pointer to user space address
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Operation not supported
+ */
+
+int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
+{
+	int oldval, ret;
+	struct page *page;
+	unsigned long addr = (unsigned long) uaddr;
+	pte_t *pte;
+
+	ret = -EFAULT;
+	if (!access_ok(uaddr, sizeof(*uaddr)))
+		return -EFAULT;
+	preempt_disable();
+	pte = maybe_map(addr, 1);
+	if (pte == NULL)
+		goto out_inuser;
+
+	page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+	pagefault_disable();
+	addr = (unsigned long) page_address(page) +
+			(((unsigned long) addr) & ~PAGE_MASK);
+#else
+	addr = (unsigned long) kmap_atomic(page) +
+		((unsigned long) addr & ~PAGE_MASK);
+#endif
+	uaddr = (u32 *) addr;
+	oldval = *uaddr;
+
+	ret = 0;
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		*uaddr = oparg;
+		break;
+	case FUTEX_OP_ADD:
+		*uaddr += oparg;
+		break;
+	case FUTEX_OP_OR:
+		*uaddr |= oparg;
+		break;
+	case FUTEX_OP_ANDN:
+		*uaddr &= ~oparg;
+		break;
+	case FUTEX_OP_XOR:
+		*uaddr ^= oparg;
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+#ifdef CONFIG_64BIT
+	pagefault_enable();
+#else
+	kunmap_atomic((void *)addr);
+#endif
+
+out_inuser:
+	preempt_enable();
+
+	if (ret == 0)
+		*oval = oldval;
+
+	return ret;
+}
+EXPORT_SYMBOL(arch_futex_atomic_op_inuser);
+
+/**
+ * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
+ *				uaddr with newval if the current value is
+ *				oldval.
+ * @uval:	pointer to store content of @uaddr
+ * @uaddr:	pointer to user space address
+ * @oldval:	old value
+ * @newval:	new value to store to @uaddr
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ */
+
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	struct page *page;
+	pte_t *pte;
+	int ret = -EFAULT;
+
+	if (!access_ok(uaddr, sizeof(*uaddr)))
+		return -EFAULT;
+
+	preempt_disable();
+	pte = maybe_map((unsigned long) uaddr, 1);
+	if (pte == NULL)
+		goto out_inatomic;
+
+	page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+	pagefault_disable();
+	uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK);
+#else
+	uaddr = kmap_atomic(page) + ((unsigned long) uaddr & ~PAGE_MASK);
+#endif
+
+	*uval = *uaddr;
+
+	ret = cmpxchg(uaddr, oldval, newval);
+
+#ifdef CONFIG_64BIT
+	pagefault_enable();
+#else
+	kunmap_atomic(uaddr);
+#endif
+	ret = 0;
+
+out_inatomic:
+	preempt_enable();
+	return ret;
+}
+EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic);
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 5c8c3ea7db7b..f1e52b7348fb 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -1,238 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ *
+ * Based on the previous implementation in TT mode
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
-#include <linux/percpu.h>
-#include <asm/pgalloc.h>
-#include <asm/tlb.h>
-
-#ifdef CONFIG_SMP
-
 #include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/module.h>
+#include <linux/processor.h>
 #include <linux/threads.h>
-#include <linux/interrupt.h>
-#include <linux/err.h>
+#include <linux/cpu.h>
 #include <linux/hardirq.h>
-#include <asm/smp.h>
-#include <asm/processor.h>
-#include <asm/spinlock.h>
+#include <linux/smp.h>
+#include <linux/smp-internal.h>
+#include <init.h>
 #include <kern.h>
-#include <irq_user.h>
 #include <os.h>
+#include <smp.h>
 
-/* Per CPU bogomips and other parameters
- * The only piece used here is the ipi pipe, which is set before SMP is
- * started and never changed.
- */
-struct cpuinfo_um cpu_data[NR_CPUS];
+enum {
+	UML_IPI_RES = 0,
+	UML_IPI_CALL_SINGLE,
+	UML_IPI_CALL,
+	UML_IPI_STOP,
+};
 
-/* A statistic, can be a little off */
-int num_reschedules_sent = 0;
+void arch_smp_send_reschedule(int cpu)
+{
+	os_send_ipi(cpu, UML_IPI_RES);
+}
 
-/* Not changed after boot */
-struct task_struct *idle_threads[NR_CPUS];
+void arch_send_call_function_single_ipi(int cpu)
+{
+	os_send_ipi(cpu, UML_IPI_CALL_SINGLE);
+}
 
-void smp_send_reschedule(int cpu)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 {
-	os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1);
-	num_reschedules_sent++;
+	int cpu;
+
+	for_each_cpu(cpu, mask)
+		os_send_ipi(cpu, UML_IPI_CALL);
 }
 
 void smp_send_stop(void)
 {
-	int i;
+	int cpu, me = smp_processor_id();
 
-	printk(KERN_INFO "Stopping all CPUs...");
-	for (i = 0; i < num_online_cpus(); i++) {
-		if (i == current_thread->cpu)
+	for_each_online_cpu(cpu) {
+		if (cpu == me)
 			continue;
-		os_write_file(cpu_data[i].ipi_pipe[1], "S", 1);
+		os_send_ipi(cpu, UML_IPI_STOP);
 	}
-	printk(KERN_CONT "done\n");
 }
 
-static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
-static cpumask_t cpu_callin_map = CPU_MASK_NONE;
+static void ipi_handler(int vector, struct uml_pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
+	int cpu = raw_smp_processor_id();
+
+	irq_enter();
+
+	if (current->mm)
+		os_alarm_process(current->mm->context.id.pid);
+
+	switch (vector) {
+	case UML_IPI_RES:
+		inc_irq_stat(irq_resched_count);
+		scheduler_ipi();
+		break;
+
+	case UML_IPI_CALL_SINGLE:
+		inc_irq_stat(irq_call_count);
+		generic_smp_call_function_single_interrupt();
+		break;
+
+	case UML_IPI_CALL:
+		inc_irq_stat(irq_call_count);
+		generic_smp_call_function_interrupt();
+		break;
+
+	case UML_IPI_STOP:
+		set_cpu_online(cpu, false);
+		while (1)
+			pause();
+		break;
+
+	default:
+		pr_err("CPU#%d received unknown IPI (vector=%d)!\n", cpu, vector);
+		break;
+	}
+
+	irq_exit();
+	set_irq_regs(old_regs);
+}
 
-static int idle_proc(void *cpup)
+void uml_ipi_handler(int vector)
 {
-	int cpu = (int) cpup, err;
+	struct uml_pt_regs r = { .is_user = 0 };
 
-	err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1);
-	if (err < 0)
-		panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err);
+	preempt_disable();
+	ipi_handler(vector, &r);
+	preempt_enable();
+}
 
-	os_set_fd_async(cpu_data[cpu].ipi_pipe[0]);
+/* AP states used only during CPU startup */
+enum {
+	UML_CPU_PAUSED = 0,
+	UML_CPU_RUNNING,
+};
 
-	wmb();
-	if (cpu_test_and_set(cpu, cpu_callin_map)) {
-		printk(KERN_ERR "huh, CPU#%d already present??\n", cpu);
-		BUG();
-	}
+static int cpu_states[NR_CPUS];
 
-	while (!cpu_isset(cpu, smp_commenced_mask))
-		cpu_relax();
+static int start_secondary(void *unused)
+{
+	int err, cpu = raw_smp_processor_id();
 
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
-	default_idle();
-	return 0;
-}
 
-static struct task_struct *idle_thread(int cpu)
-{
-	struct task_struct *new_task;
-
-	current->thread.request.u.thread.proc = idle_proc;
-	current->thread.request.u.thread.arg = (void *) cpu;
-	new_task = fork_idle(cpu);
-	if (IS_ERR(new_task))
-		panic("copy_process failed in idle_thread, error = %ld",
-		      PTR_ERR(new_task));
-
-	cpu_tasks[cpu] = ((struct cpu_task)
-		          { .pid = 	new_task->thread.mode.tt.extern_pid,
-			    .task = 	new_task } );
-	idle_threads[cpu] = new_task;
-	panic("skas mode doesn't support SMP");
-	return new_task;
-}
+	err = um_setup_timer();
+	if (err)
+		panic("CPU#%d failed to setup timer, err = %d", cpu, err);
 
-void smp_prepare_cpus(unsigned int maxcpus)
-{
-	struct task_struct *idle;
-	unsigned long waittime;
-	int err, cpu, me = smp_processor_id();
-	int i;
+	local_irq_enable();
 
-	for (i = 0; i < ncpus; ++i)
-		set_cpu_possible(i, true);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
-	set_cpu_online(me, true);
-	cpu_set(me, cpu_callin_map);
+	return 0;
+}
 
-	err = os_pipe(cpu_data[me].ipi_pipe, 1, 1);
-	if (err < 0)
-		panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
+void uml_start_secondary(void *opaque)
+{
+	int cpu = raw_smp_processor_id();
+	struct mm_struct *mm = &init_mm;
+	struct task_struct *idle;
 
-	os_set_fd_async(cpu_data[me].ipi_pipe[0]);
+	stack_protections((unsigned long) &cpu_irqstacks[cpu]);
+	set_sigstack(&cpu_irqstacks[cpu], THREAD_SIZE);
 
-	for (cpu = 1; cpu < ncpus; cpu++) {
-		printk(KERN_INFO "Booting processor %d...\n", cpu);
+	set_cpu_present(cpu, true);
+	os_futex_wait(&cpu_states[cpu], UML_CPU_PAUSED);
 
-		idle = idle_thread(cpu);
+	smp_rmb(); /* paired with smp_wmb() in __cpu_up() */
 
-		init_idle(idle, cpu);
+	idle = cpu_tasks[cpu];
+	idle->thread_info.cpu = cpu;
 
-		waittime = 200000000;
-		while (waittime-- && !cpu_isset(cpu, cpu_callin_map))
-			cpu_relax();
+	mmgrab(mm);
+	idle->active_mm = mm;
 
-		printk(KERN_INFO "%s\n",
-		       cpu_isset(cpu, cpu_calling_map) ? "done" : "failed");
-	}
-}
+	idle->thread.request.thread.proc = start_secondary;
+	idle->thread.request.thread.arg = NULL;
 
-void smp_prepare_boot_cpu(void)
-{
-	set_cpu_online(smp_processor_id(), true);
+	new_thread(task_stack_page(idle), &idle->thread.switch_buf,
+		   new_thread_handler);
+	os_start_secondary(opaque, &idle->thread.switch_buf);
 }
 
-int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	cpu_set(cpu, smp_commenced_mask);
-	while (!cpu_online(cpu))
-		mb();
-	return 0;
-}
+	int err, cpu, me = smp_processor_id();
+	unsigned long deadline;
 
-int setup_profiling_timer(unsigned int multiplier)
-{
-	printk(KERN_INFO "setup_profiling_timer\n");
-	return 0;
-}
+	os_init_smp();
 
-void smp_call_function_slave(int cpu);
+	for_each_possible_cpu(cpu) {
+		if (cpu == me)
+			continue;
 
-void IPI_handler(int cpu)
-{
-	unsigned char c;
-	int fd;
-
-	fd = cpu_data[cpu].ipi_pipe[0];
-	while (os_read_file(fd, &c, 1) == 1) {
-		switch (c) {
-		case 'C':
-			smp_call_function_slave(cpu);
-			break;
-
-		case 'R':
-			scheduler_ipi();
-			break;
-
-		case 'S':
-			printk(KERN_INFO "CPU#%d stopping\n", cpu);
-			while (1)
-				pause();
-			break;
-
-		default:
-			printk(KERN_ERR "CPU#%d received unknown IPI [%c]!\n",
-			       cpu, c);
-			break;
+		pr_debug("Booting processor %d...\n", cpu);
+		err = os_start_cpu_thread(cpu);
+		if (err) {
+			pr_crit("CPU#%d failed to start cpu thread, err = %d",
+				cpu, err);
+			continue;
 		}
+
+		deadline = jiffies + msecs_to_jiffies(1000);
+		spin_until_cond(cpu_present(cpu) ||
+				time_is_before_jiffies(deadline));
+
+		if (!cpu_present(cpu))
+			pr_crit("CPU#%d failed to boot\n", cpu);
 	}
 }
 
-int hard_smp_processor_id(void)
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
-	return pid_to_processor_id(os_getpid());
-}
+	cpu_tasks[cpu] = tidle;
+	smp_wmb(); /* paired with smp_rmb() in uml_start_secondary() */
+	cpu_states[cpu] = UML_CPU_RUNNING;
+	os_futex_wake(&cpu_states[cpu]);
+	spin_until_cond(cpu_online(cpu));
 
-static DEFINE_SPINLOCK(call_lock);
-static atomic_t scf_started;
-static atomic_t scf_finished;
-static void (*func)(void *info);
-static void *info;
-
-void smp_call_function_slave(int cpu)
-{
-	atomic_inc(&scf_started);
-	(*func)(info);
-	atomic_inc(&scf_finished);
+	return 0;
 }
 
-int smp_call_function(void (*_func)(void *info), void *_info, int wait)
+void __init smp_cpus_done(unsigned int max_cpus)
 {
-	int cpus = num_online_cpus() - 1;
-	int i;
+}
 
-	if (!cpus)
-		return 0;
+/* Set in uml_ncpus_setup */
+int uml_ncpus = 1;
 
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
+void __init prefill_possible_map(void)
+{
+	int cpu;
 
-	spin_lock_bh(&call_lock);
-	atomic_set(&scf_started, 0);
-	atomic_set(&scf_finished, 0);
-	func = _func;
-	info = _info;
+	for (cpu = 0; cpu < uml_ncpus; cpu++)
+		set_cpu_possible(cpu, true);
+	for (; cpu < NR_CPUS; cpu++)
+		set_cpu_possible(cpu, false);
+}
 
-	for_each_online_cpu(i)
-		os_write_file(cpu_data[i].ipi_pipe[1], "C", 1);
+static int __init uml_ncpus_setup(char *line, int *add)
+{
+	*add = 0;
 
-	while (atomic_read(&scf_started) != cpus)
-		barrier();
+	if (kstrtoint(line, 10, &uml_ncpus)) {
+		os_warn("%s: Couldn't parse '%s'\n", __func__, line);
+		return -1;
+	}
 
-	if (wait)
-		while (atomic_read(&scf_finished) != cpus)
-			barrier();
+	uml_ncpus = clamp(uml_ncpus, 1, NR_CPUS);
 
-	spin_unlock_bh(&call_lock);
 	return 0;
 }
 
-#endif
+__uml_setup("ncpus=", uml_ncpus_setup,
+"ncpus=<# of desired CPUs>\n"
+"    This tells UML how many virtual processors to start. The maximum\n"
+"    number of supported virtual processors can be obtained by querying\n"
+"    the CONFIG_NR_CPUS option using --showconfig.\n\n"
+);
+
+EXPORT_SYMBOL(uml_curr_cpu);
diff --git a/arch/um/kernel/stacktrace.c b/arch/um/kernel/stacktrace.c
new file mode 100644
index 000000000000..fd3b61b3d4d2
--- /dev/null
+++ b/arch/um/kernel/stacktrace.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2013 Richard Weinberger <richard@nod.at>
+ * Copyright (C) 2014 Google Inc., Author: Daniel Walter <dwalter@google.com>
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <asm/stacktrace.h>
+
+void dump_trace(struct task_struct *tsk,
+		const struct stacktrace_ops *ops,
+		void *data)
+{
+	int reliable = 0;
+	unsigned long *sp, bp, addr;
+	struct pt_regs *segv_regs = tsk->thread.segv_regs;
+	struct stack_frame *frame;
+
+	bp = get_frame_pointer(tsk, segv_regs);
+	sp = get_stack_pointer(tsk, segv_regs);
+
+	frame = (struct stack_frame *)bp;
+	while (((long) sp & (THREAD_SIZE-1)) != 0) {
+		addr = READ_ONCE_NOCHECK(*sp);
+		if (__kernel_text_address(addr)) {
+			reliable = 0;
+			if ((unsigned long) sp == bp + sizeof(long)) {
+				frame = frame ? frame->next_frame : NULL;
+				bp = (unsigned long)frame;
+				reliable = 1;
+			}
+			ops->address(data, addr, reliable);
+		}
+		sp++;
+	}
+}
+
+static void save_addr(void *data, unsigned long address, int reliable)
+{
+	struct stack_trace *trace = data;
+
+	if (!reliable)
+		return;
+	if (trace->nr_entries >= trace->max_entries)
+		return;
+
+	trace->entries[trace->nr_entries++] = address;
+}
+
+static const struct stacktrace_ops dump_ops = {
+	.address = save_addr
+};
+
+static void __save_stack_trace(struct task_struct *tsk, struct stack_trace *trace)
+{
+	dump_trace(tsk, &dump_ops, trace);
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	__save_stack_trace(current, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	__save_stack_trace(tsk, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
deleted file mode 100644
index c1d0ae069b53..000000000000
--- a/arch/um/kernel/syscall.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/utsname.h>
-#include <linux/syscalls.h>
-#include <asm/current.h>
-#include <asm/mman.h>
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
-
-long old_mmap(unsigned long addr, unsigned long len,
-	      unsigned long prot, unsigned long flags,
-	      unsigned long fd, unsigned long offset)
-{
-	long err = -EINVAL;
-	if (offset & ~PAGE_MASK)
-		goto out;
-
-	err = sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
- out:
-	return err;
-}
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 0dc4d1c6f98a..13ee5666668d 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -1,66 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
+ * Copyright (C) 2013 Richard Weinberger <richrd@nod.at>
  */
 
 #include <linux/kallsyms.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include <asm/sysrq.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 
-/* Catch non-i386 SUBARCH's. */
-#if !defined(CONFIG_UML_X86) || defined(CONFIG_64BIT)
-void show_trace(struct task_struct *task, unsigned long * stack)
-{
-	unsigned long addr;
+#include <asm/stacktrace.h>
+#include <os.h>
 
-	if (!stack) {
-		stack = (unsigned long*) &stack;
-		WARN_ON(1);
-	}
+static void _print_addr(void *data, unsigned long address, int reliable)
+{
+	const char *loglvl = data;
 
-	printk(KERN_INFO "Call Trace: \n");
-	while (((long) stack & (THREAD_SIZE-1)) != 0) {
-		addr = *stack;
-		if (__kernel_text_address(addr)) {
-			printk(KERN_INFO "%08lx:  [<%08lx>]",
-			       (unsigned long) stack, addr);
-			print_symbol(KERN_CONT " %s", addr);
-			printk(KERN_CONT "\n");
-		}
-		stack++;
-	}
-	printk(KERN_INFO "\n");
+	printk("%s [<%08lx>] %s%pS\n", loglvl, address, reliable ? "" : "? ",
+		(void *)address);
 }
-#endif
 
-/*Stolen from arch/i386/kernel/traps.c */
-static const int kstack_depth_to_print = 24;
+static const struct stacktrace_ops stackops = {
+	.address = _print_addr
+};
 
-/* This recently started being used in arch-independent code too, as in
- * kernel/sched/core.c.*/
-void show_stack(struct task_struct *task, unsigned long *esp)
+void show_stack(struct task_struct *task, unsigned long *stack,
+		       const char *loglvl)
 {
-	unsigned long *stack;
+	struct pt_regs *segv_regs = current->thread.segv_regs;
 	int i;
 
-	if (esp == NULL) {
-		if (task != current && task != NULL) {
-			esp = (unsigned long *) KSTK_ESP(task);
-		} else {
-			esp = (unsigned long *) &esp;
-		}
-	}
+	if (!stack)
+		stack = get_stack_pointer(task, segv_regs);
 
-	stack = esp;
-	for (i = 0; i < kstack_depth_to_print; i++) {
+	printk("%sStack:\n", loglvl);
+	for (i = 0; i < 3 * STACKSLOTS_PER_LINE; i++) {
 		if (kstack_end(stack))
 			break;
-		if (i && ((i % 8) == 0))
-			printk(KERN_INFO "       ");
-		printk(KERN_CONT "%08lx ", *stack++);
+		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
+			pr_cont("\n");
+		pr_cont(" %08lx", READ_ONCE_NOCHECK(*stack));
+		stack++;
 	}
 
-	show_trace(task, esp);
+	printk("%sCall Trace:\n", loglvl);
+	dump_trace(task ?: current, &stackops, (void *)loglvl);
 }
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 117568d4f64a..b344a36b44eb 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -1,115 +1,1092 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
+ * Copyright (C) 2012-2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
+ * Copyright (C) 2019 Intel Corporation
  */
 
 #include <linux/clockchips.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
 #include <linux/threads.h>
 #include <asm/irq.h>
 #include <asm/param.h>
 #include <kern_util.h>
 #include <os.h>
+#include <linux/delay.h>
+#include <linux/time-internal.h>
+#include <linux/um_timetravel.h>
+#include <shared/init.h>
+
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+#include <linux/sched/clock.h>
+
+enum time_travel_mode time_travel_mode;
+EXPORT_SYMBOL_GPL(time_travel_mode);
+
+static bool time_travel_start_set;
+static unsigned long long time_travel_start;
+static unsigned long long time_travel_time;
+static unsigned long long time_travel_shm_offset;
+static LIST_HEAD(time_travel_events);
+static LIST_HEAD(time_travel_irqs);
+static unsigned long long time_travel_timer_interval;
+static unsigned long long time_travel_next_event;
+static struct time_travel_event time_travel_timer_event;
+static int time_travel_ext_fd = -1;
+static unsigned int time_travel_ext_waiting;
+static bool time_travel_ext_prev_request_valid;
+static unsigned long long time_travel_ext_prev_request;
+static unsigned long long *time_travel_ext_free_until;
+static unsigned long long _time_travel_ext_free_until;
+static u16 time_travel_shm_id;
+static struct um_timetravel_schedshm *time_travel_shm;
+static union um_timetravel_schedshm_client *time_travel_shm_client;
+
+unsigned long tt_extra_sched_jiffies;
+
+notrace unsigned long long sched_clock(void)
+{
+	return (unsigned long long)(jiffies - INITIAL_JIFFIES +
+				    tt_extra_sched_jiffies)
+					* (NSEC_PER_SEC / HZ);
+}
+
+static void time_travel_set_time(unsigned long long ns)
+{
+	if (unlikely(ns < time_travel_time))
+		panic("time-travel: time goes backwards %lld -> %lld\n",
+		      time_travel_time, ns);
+	else if (unlikely(ns >= S64_MAX))
+		panic("The system was going to sleep forever, aborting");
+
+	time_travel_time = ns;
+}
+
+enum time_travel_message_handling {
+	TTMH_IDLE,
+	TTMH_POLL,
+	TTMH_READ,
+	TTMH_READ_START_ACK,
+};
+
+static u64 bc_message;
+int time_travel_should_print_bc_msg;
+
+void _time_travel_print_bc_msg(void)
+{
+	time_travel_should_print_bc_msg = 0;
+	printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
+}
+
+static void time_travel_setup_shm(int fd, u16 id)
+{
+	u32 len;
+
+	time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
+
+	if (!time_travel_shm)
+		goto out;
+
+	len = time_travel_shm->len;
+
+	if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
+	    len < struct_size(time_travel_shm, clients, id + 1)) {
+		os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
+		time_travel_shm = NULL;
+		goto out;
+	}
+
+	time_travel_shm = os_mremap_rw_shared(time_travel_shm,
+					      sizeof(*time_travel_shm),
+					      len);
+	if (!time_travel_shm)
+		goto out;
+
+	time_travel_shm_offset = time_travel_shm->current_time;
+	time_travel_shm_client = &time_travel_shm->clients[id];
+	time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
+	time_travel_shm_id = id;
+	/* always look at that free_until from now on */
+	time_travel_ext_free_until = &time_travel_shm->free_until;
+out:
+	os_close_file(fd);
+}
+
+static void time_travel_handle_message(struct um_timetravel_msg *msg,
+				       enum time_travel_message_handling mode)
+{
+	struct um_timetravel_msg resp = {
+		.op = UM_TIMETRAVEL_ACK,
+	};
+	int ret;
+
+	/*
+	 * We can't unlock here, but interrupt signals with a timetravel_handler
+	 * (see um_request_irq_tt) get to the timetravel_handler anyway.
+	 */
+	if (mode != TTMH_READ) {
+		BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
+
+		while (os_poll(1, &time_travel_ext_fd) != 0) {
+			/* nothing */
+		}
+	}
+
+	if (unlikely(mode == TTMH_READ_START_ACK)) {
+		int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
+
+		ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
+				    ARRAY_SIZE(fd), msg, sizeof(*msg));
+		if (ret == sizeof(*msg)) {
+			time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
+					      msg->time & UM_TIMETRAVEL_START_ACK_ID);
+			/* we don't use the logging for now */
+			os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
+		}
+	} else {
+		ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+	}
+
+	if (ret == 0)
+		panic("time-travel external link is broken\n");
+	if (ret != sizeof(*msg))
+		panic("invalid time-travel message - %d bytes\n", ret);
+
+	switch (msg->op) {
+	default:
+		WARN_ONCE(1, "time-travel: unexpected message %lld\n",
+			  (unsigned long long)msg->op);
+		break;
+	case UM_TIMETRAVEL_ACK:
+		return;
+	case UM_TIMETRAVEL_RUN:
+		time_travel_set_time(msg->time);
+		if (time_travel_shm) {
+			/* no request right now since we're running */
+			time_travel_shm_client->flags &=
+				~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+			/* no ack for shared memory RUN */
+			return;
+		}
+		break;
+	case UM_TIMETRAVEL_FREE_UNTIL:
+		/* not supposed to get this with shm, but ignore it */
+		if (time_travel_shm)
+			break;
+		time_travel_ext_free_until = &_time_travel_ext_free_until;
+		_time_travel_ext_free_until = msg->time;
+		break;
+	case UM_TIMETRAVEL_BROADCAST:
+		bc_message = msg->time;
+		time_travel_should_print_bc_msg = 1;
+		break;
+	}
+
+	resp.seq = msg->seq;
+	os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
+}
+
+static u64 time_travel_ext_req(u32 op, u64 time)
+{
+	static int seq;
+	int mseq = ++seq;
+	struct um_timetravel_msg msg = {
+		.op = op,
+		.time = time,
+		.seq = mseq,
+	};
+
+	/*
+	 * We need to block even the timetravel handlers of SIGIO here and
+	 * only restore their use when we got the ACK - otherwise we may
+	 * (will) get interrupted by that, try to queue the IRQ for future
+	 * processing and thus send another request while we're still waiting
+	 * for an ACK, but the peer doesn't know we got interrupted and will
+	 * send the ACKs in the same order as the message, but we'd need to
+	 * see them in the opposite order ...
+	 *
+	 * This wouldn't matter *too* much, but some ACKs carry the
+	 * current time (for UM_TIMETRAVEL_GET) and getting another
+	 * ACK without a time would confuse us a lot!
+	 *
+	 * The sequence number assignment that happens here lets us
+	 * debug such message handling issues more easily.
+	 */
+	block_signals_hard();
+	os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
+
+	/* no ACK expected for WAIT in shared memory mode */
+	if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
+		goto done;
+
+	while (msg.op != UM_TIMETRAVEL_ACK)
+		time_travel_handle_message(&msg,
+					   op == UM_TIMETRAVEL_START ?
+						TTMH_READ_START_ACK :
+						TTMH_READ);
+
+	if (msg.seq != mseq)
+		panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
+		      msg.op, msg.seq, mseq, msg.time);
+
+	if (op == UM_TIMETRAVEL_GET)
+		time_travel_set_time(msg.time);
+done:
+	unblock_signals_hard();
+
+	return msg.time;
+}
+
+void __time_travel_wait_readable(int fd)
+{
+	int fds[2] = { fd, time_travel_ext_fd };
+	int ret;
+
+	if (time_travel_mode != TT_MODE_EXTERNAL)
+		return;
+
+	while ((ret = os_poll(2, fds))) {
+		struct um_timetravel_msg msg;
+
+		if (ret == 1)
+			time_travel_handle_message(&msg, TTMH_READ);
+	}
+}
+EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
+
+static void time_travel_ext_update_request(unsigned long long time)
+{
+	if (time_travel_mode != TT_MODE_EXTERNAL)
+		return;
+
+	/* asked for exactly this time previously */
+	if (time_travel_ext_prev_request_valid &&
+	    time == time_travel_ext_prev_request)
+		return;
+
+	/*
+	 * if we're running and are allowed to run past the request
+	 * then we don't need to update it either
+	 *
+	 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+	 * to shared memory, and for non-shm the offset is 0.
+	 */
+	if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+	    time < (*time_travel_ext_free_until - time_travel_shm_offset))
+		return;
+
+	time_travel_ext_prev_request = time;
+	time_travel_ext_prev_request_valid = true;
+
+	if (time_travel_shm) {
+		union um_timetravel_schedshm_client *running;
+
+		running = &time_travel_shm->clients[time_travel_shm->running_id];
+
+		if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
+			time_travel_shm_client->flags |=
+				UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+			time += time_travel_shm_offset;
+			time_travel_shm_client->req_time = time;
+			if (time < time_travel_shm->free_until)
+				time_travel_shm->free_until = time;
+			return;
+		}
+	}
+
+	time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
+}
+
+void __time_travel_propagate_time(void)
+{
+	static unsigned long long last_propagated;
+
+	if (time_travel_shm) {
+		if (time_travel_shm->running_id != time_travel_shm_id)
+			panic("time-travel: setting time while not running\n");
+		time_travel_shm->current_time = time_travel_time +
+						time_travel_shm_offset;
+		return;
+	}
+
+	if (last_propagated == time_travel_time)
+		return;
+
+	time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
+	last_propagated = time_travel_time;
+}
+EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
+
+/* returns true if we must do a wait to the simtime device */
+static bool time_travel_ext_request(unsigned long long time)
+{
+	/*
+	 * If we received an external sync point ("free until") then we
+	 * don't have to request/wait for anything until then, unless
+	 * we're already waiting.
+	 *
+	 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+	 * to shared memory, and for non-shm the offset is 0.
+	 */
+	if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+	    time < (*time_travel_ext_free_until - time_travel_shm_offset))
+		return false;
+
+	time_travel_ext_update_request(time);
+	return true;
+}
+
+static void time_travel_ext_wait(bool idle)
+{
+	struct um_timetravel_msg msg = {
+		.op = UM_TIMETRAVEL_ACK,
+	};
+
+	time_travel_ext_prev_request_valid = false;
+	if (!time_travel_shm)
+		time_travel_ext_free_until = NULL;
+	time_travel_ext_waiting++;
+
+	time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
+
+	/*
+	 * Here we are deep in the idle loop, so we have to break out of the
+	 * kernel abstraction in a sense and implement this in terms of the
+	 * UML system waiting on the VQ interrupt while sleeping, when we get
+	 * the signal it'll call time_travel_ext_vq_notify_done() completing the
+	 * call.
+	 */
+	while (msg.op != UM_TIMETRAVEL_RUN)
+		time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
+
+	time_travel_ext_waiting--;
+
+	/* we might request more stuff while polling - reset when we run */
+	time_travel_ext_prev_request_valid = false;
+}
+
+static void time_travel_ext_get_time(void)
+{
+	if (time_travel_shm)
+		time_travel_set_time(time_travel_shm->current_time -
+				     time_travel_shm_offset);
+	else
+		time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
+}
+
+static void __time_travel_update_time(unsigned long long ns, bool idle)
+{
+	if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
+		time_travel_ext_wait(idle);
+	else
+		time_travel_set_time(ns);
+}
+
+static struct time_travel_event *time_travel_first_event(void)
+{
+	return list_first_entry_or_null(&time_travel_events,
+					struct time_travel_event,
+					list);
+}
+
+static void __time_travel_add_event(struct time_travel_event *e,
+				    unsigned long long time)
+{
+	struct time_travel_event *tmp;
+	bool inserted = false;
+	unsigned long flags;
+
+	if (e->pending)
+		return;
+
+	e->pending = true;
+	e->time = time;
+
+	local_irq_save(flags);
+	list_for_each_entry(tmp, &time_travel_events, list) {
+		/*
+		 * Add the new entry before one with higher time,
+		 * or if they're equal and both on stack, because
+		 * in that case we need to unwind the stack in the
+		 * right order, and the later event (timer sleep
+		 * or such) must be dequeued first.
+		 */
+		if ((tmp->time > e->time) ||
+		    (tmp->time == e->time && tmp->onstack && e->onstack)) {
+			list_add_tail(&e->list, &tmp->list);
+			inserted = true;
+			break;
+		}
+	}
+
+	if (!inserted)
+		list_add_tail(&e->list, &time_travel_events);
+
+	tmp = time_travel_first_event();
+	time_travel_ext_update_request(tmp->time);
+	time_travel_next_event = tmp->time;
+	local_irq_restore(flags);
+}
+
+static void time_travel_add_event(struct time_travel_event *e,
+				  unsigned long long time)
+{
+	if (WARN_ON(!e->fn))
+		return;
+
+	__time_travel_add_event(e, time);
+}
+
+void time_travel_add_event_rel(struct time_travel_event *e,
+			       unsigned long long delay_ns)
+{
+	time_travel_add_event(e, time_travel_time + delay_ns);
+}
+
+static void time_travel_periodic_timer(struct time_travel_event *e)
+{
+	time_travel_add_event(&time_travel_timer_event,
+			      time_travel_time + time_travel_timer_interval);
+
+	/* clock tick; decrease extra jiffies by keeping sched_clock constant */
+	if (tt_extra_sched_jiffies > 0)
+		tt_extra_sched_jiffies -= 1;
+
+	deliver_alarm();
+}
+
+void deliver_time_travel_irqs(void)
+{
+	struct time_travel_event *e;
+	unsigned long flags;
+
+	/*
+	 * Don't do anything for most cases. Note that because here we have
+	 * to disable IRQs (and re-enable later) we'll actually recurse at
+	 * the end of the function, so this is strictly necessary.
+	 */
+	if (likely(list_empty(&time_travel_irqs)))
+		return;
+
+	local_irq_save(flags);
+	irq_enter();
+	while ((e = list_first_entry_or_null(&time_travel_irqs,
+					     struct time_travel_event,
+					     list))) {
+		list_del(&e->list);
+		e->pending = false;
+		e->fn(e);
+	}
+	irq_exit();
+	local_irq_restore(flags);
+}
+
+static void time_travel_deliver_event(struct time_travel_event *e)
+{
+	if (e == &time_travel_timer_event) {
+		/*
+		 * deliver_alarm() does the irq_enter/irq_exit
+		 * by itself, so must handle it specially here
+		 */
+		e->fn(e);
+	} else if (irqs_disabled()) {
+		list_add_tail(&e->list, &time_travel_irqs);
+		/*
+		 * set pending again, it was set to false when the
+		 * event was deleted from the original list, but
+		 * now it's still pending until we deliver the IRQ.
+		 */
+		e->pending = true;
+	} else {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		irq_enter();
+		e->fn(e);
+		irq_exit();
+		local_irq_restore(flags);
+	}
+}
+
+bool time_travel_del_event(struct time_travel_event *e)
+{
+	unsigned long flags;
+
+	if (!e->pending)
+		return false;
+	local_irq_save(flags);
+	list_del(&e->list);
+	e->pending = false;
+	local_irq_restore(flags);
+	return true;
+}
+
+static void time_travel_update_time(unsigned long long next, bool idle)
+{
+	struct time_travel_event ne = {
+		.onstack = true,
+	};
+	struct time_travel_event *e;
+	bool finished = idle;
+
+	/* add it without a handler - we deal with that specifically below */
+	__time_travel_add_event(&ne, next);
+
+	do {
+		e = time_travel_first_event();
+
+		BUG_ON(!e);
+		__time_travel_update_time(e->time, idle);
+
+		/* new events may have been inserted while we were waiting */
+		if (e == time_travel_first_event()) {
+			BUG_ON(!time_travel_del_event(e));
+			BUG_ON(time_travel_time != e->time);
+
+			if (e == &ne) {
+				finished = true;
+			} else {
+				if (e->onstack)
+					panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
+					      time_travel_time, e->time, e);
+				time_travel_deliver_event(e);
+			}
+		}
+
+		e = time_travel_first_event();
+		if (e)
+			time_travel_ext_update_request(e->time);
+	} while (ne.pending && !finished);
+
+	time_travel_del_event(&ne);
+}
+
+static void time_travel_update_time_rel(unsigned long long offs)
+{
+	unsigned long flags;
+
+	/*
+	 * Disable interrupts before calculating the new time so
+	 * that a real timer interrupt (signal) can't happen at
+	 * a bad time e.g. after we read time_travel_time but
+	 * before we've completed updating the time.
+	 */
+	local_irq_save(flags);
+	time_travel_update_time(time_travel_time + offs, false);
+	local_irq_restore(flags);
+}
+
+void time_travel_ndelay(unsigned long nsec)
+{
+	/*
+	 * Not strictly needed to use _rel() version since this is
+	 * only used in INFCPU/EXT modes, but it doesn't hurt and
+	 * is more readable too.
+	 */
+	time_travel_update_time_rel(nsec);
+}
+EXPORT_SYMBOL(time_travel_ndelay);
+
+void time_travel_add_irq_event(struct time_travel_event *e)
+{
+	BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
+
+	time_travel_ext_get_time();
+	/*
+	 * We could model interrupt latency here, for now just
+	 * don't have any latency at all and request the exact
+	 * same time (again) to run the interrupt...
+	 */
+	time_travel_add_event(e, time_travel_time);
+}
+EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
+
+static void time_travel_oneshot_timer(struct time_travel_event *e)
+{
+	/* clock tick; decrease extra jiffies by keeping sched_clock constant */
+	if (tt_extra_sched_jiffies > 0)
+		tt_extra_sched_jiffies -= 1;
+
+	deliver_alarm();
+}
+
+void time_travel_sleep(void)
+{
+	/*
+	 * Wait "forever" (using S64_MAX because there are some potential
+	 * wrapping issues, especially with the current TT_MODE_EXTERNAL
+	 * controller application.
+	 */
+	unsigned long long next = S64_MAX;
+	int cpu = raw_smp_processor_id();
+
+	if (time_travel_mode == TT_MODE_BASIC)
+		os_timer_disable(cpu);
+
+	time_travel_update_time(next, true);
+
+	if (time_travel_mode == TT_MODE_BASIC &&
+	    time_travel_timer_event.pending) {
+		if (time_travel_timer_event.fn == time_travel_periodic_timer) {
+			/*
+			 * This is somewhat wrong - we should get the first
+			 * one sooner like the os_timer_one_shot() below...
+			 */
+			os_timer_set_interval(cpu, time_travel_timer_interval);
+		} else {
+			os_timer_one_shot(cpu, time_travel_timer_event.time - next);
+		}
+	}
+}
+
+static void time_travel_handle_real_alarm(void)
+{
+	time_travel_set_time(time_travel_next_event);
+
+	time_travel_del_event(&time_travel_timer_event);
+
+	if (time_travel_timer_event.fn == time_travel_periodic_timer)
+		time_travel_add_event(&time_travel_timer_event,
+				      time_travel_time +
+				      time_travel_timer_interval);
+}
+
+static void time_travel_set_interval(unsigned long long interval)
+{
+	time_travel_timer_interval = interval;
+}
+
+static int time_travel_connect_external(const char *socket)
+{
+	const char *sep;
+	unsigned long long id = (unsigned long long)-1;
+	int rc;
+
+	if ((sep = strchr(socket, ':'))) {
+		char buf[25] = {};
+		if (sep - socket > sizeof(buf) - 1)
+			goto invalid_number;
+
+		memcpy(buf, socket, sep - socket);
+		if (kstrtoull(buf, 0, &id)) {
+invalid_number:
+			panic("time-travel: invalid external ID in string '%s'\n",
+			      socket);
+			return -EINVAL;
+		}
+
+		socket = sep + 1;
+	}
+
+	rc = os_connect_socket(socket);
+	if (rc < 0) {
+		panic("time-travel: failed to connect to external socket %s\n",
+		      socket);
+		return rc;
+	}
+
+	time_travel_ext_fd = rc;
+
+	time_travel_ext_req(UM_TIMETRAVEL_START, id);
+
+	return 1;
+}
+
+static void time_travel_set_start(void)
+{
+	if (time_travel_start_set)
+		return;
+
+	switch (time_travel_mode) {
+	case TT_MODE_EXTERNAL:
+		time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
+		/* controller gave us the *current* time, so adjust by that */
+		time_travel_ext_get_time();
+		time_travel_start -= time_travel_time;
+		break;
+	case TT_MODE_INFCPU:
+	case TT_MODE_BASIC:
+		if (!time_travel_start_set)
+			time_travel_start = os_persistent_clock_emulation();
+		break;
+	case TT_MODE_OFF:
+		/* we just read the host clock with os_persistent_clock_emulation() */
+		break;
+	}
+
+	time_travel_start_set = true;
+}
+#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
+#define time_travel_start_set 0
+#define time_travel_start 0
+#define time_travel_time 0
+#define time_travel_ext_waiting 0
+
+static inline void time_travel_update_time(unsigned long long ns, bool idle)
+{
+}
+
+static inline void time_travel_update_time_rel(unsigned long long offs)
+{
+}
+
+static inline void time_travel_handle_real_alarm(void)
+{
+}
+
+static void time_travel_set_interval(unsigned long long interval)
+{
+}
+
+static inline void time_travel_set_start(void)
+{
+}
+
+/* fail link if this actually gets used */
+extern u64 time_travel_ext_req(u32 op, u64 time);
+
+/* these are empty macros so the struct/fn need not exist */
+#define time_travel_add_event(e, time) do { } while (0)
+/* externally not usable - redefine here so we can */
+#undef time_travel_del_event
+#define time_travel_del_event(e) do { } while (0)
+#endif
+
+static struct clock_event_device timer_clockevent[NR_CPUS];
 
 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
 	unsigned long flags;
 
+	/*
+	 * In basic time-travel mode we still get real interrupts
+	 * (signals) but since we don't read time from the OS, we
+	 * must update the simulated time here to the expiry when
+	 * we get a signal.
+	 * This is not the case in inf-cpu mode, since there we
+	 * never get any real signals from the OS.
+	 */
+	if (time_travel_mode == TT_MODE_BASIC)
+		time_travel_handle_real_alarm();
+
 	local_irq_save(flags);
 	do_IRQ(TIMER_IRQ, regs);
 	local_irq_restore(flags);
 }
 
-static void itimer_set_mode(enum clock_event_mode mode,
-			    struct clock_event_device *evt)
+static int itimer_shutdown(struct clock_event_device *evt)
 {
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		set_interval();
-		break;
+	int cpu = evt - &timer_clockevent[0];
 
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_ONESHOT:
-		disable_timer();
-		break;
+	if (time_travel_mode != TT_MODE_OFF)
+		time_travel_del_event(&time_travel_timer_event);
 
-	case CLOCK_EVT_MODE_RESUME:
-		break;
+	if (time_travel_mode != TT_MODE_INFCPU &&
+	    time_travel_mode != TT_MODE_EXTERNAL)
+		os_timer_disable(cpu);
+
+	return 0;
+}
+
+static int itimer_set_periodic(struct clock_event_device *evt)
+{
+	unsigned long long interval = NSEC_PER_SEC / HZ;
+	int cpu = evt - &timer_clockevent[0];
+
+	if (time_travel_mode != TT_MODE_OFF) {
+		time_travel_del_event(&time_travel_timer_event);
+		time_travel_set_event_fn(&time_travel_timer_event,
+					 time_travel_periodic_timer);
+		time_travel_set_interval(interval);
+		time_travel_add_event(&time_travel_timer_event,
+				      time_travel_time + interval);
 	}
+
+	if (time_travel_mode != TT_MODE_INFCPU &&
+	    time_travel_mode != TT_MODE_EXTERNAL)
+		os_timer_set_interval(cpu, interval);
+
+	return 0;
 }
 
 static int itimer_next_event(unsigned long delta,
 			     struct clock_event_device *evt)
 {
-	return timer_one_shot(delta + 1);
+	delta += 1;
+
+	if (time_travel_mode != TT_MODE_OFF) {
+		time_travel_del_event(&time_travel_timer_event);
+		time_travel_set_event_fn(&time_travel_timer_event,
+					 time_travel_oneshot_timer);
+		time_travel_add_event(&time_travel_timer_event,
+				      time_travel_time + delta);
+	}
+
+	if (time_travel_mode != TT_MODE_INFCPU &&
+	    time_travel_mode != TT_MODE_EXTERNAL)
+		return os_timer_one_shot(raw_smp_processor_id(), delta);
+
+	return 0;
 }
 
-static struct clock_event_device itimer_clockevent = {
-	.name		= "itimer",
-	.rating		= 250,
-	.cpumask	= cpu_all_mask,
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.set_mode	= itimer_set_mode,
-	.set_next_event = itimer_next_event,
-	.shift		= 32,
-	.irq		= 0,
+static int itimer_one_shot(struct clock_event_device *evt)
+{
+	return itimer_next_event(0, evt);
+}
+
+static struct clock_event_device _timer_clockevent = {
+	.name			= "posix-timer",
+	.rating			= 250,
+	.features		= CLOCK_EVT_FEAT_PERIODIC |
+				  CLOCK_EVT_FEAT_ONESHOT,
+	.set_state_shutdown	= itimer_shutdown,
+	.set_state_periodic	= itimer_set_periodic,
+	.set_state_oneshot	= itimer_one_shot,
+	.set_next_event		= itimer_next_event,
+	.shift			= 0,
+	.max_delta_ns		= 0xffffffff,
+	.max_delta_ticks	= 0xffffffff,
+	.min_delta_ns		= TIMER_MIN_DELTA,
+	.min_delta_ticks	= TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
+	.irq			= 0,
+	.mult			= 1,
 };
 
 static irqreturn_t um_timer(int irq, void *dev)
 {
-	(*itimer_clockevent.event_handler)(&itimer_clockevent);
+	int cpu = raw_smp_processor_id();
+	struct clock_event_device *evt = &timer_clockevent[cpu];
+
+	/*
+	 * Interrupt the (possibly) running userspace process, technically this
+	 * should only happen if userspace is currently executing.
+	 * With infinite CPU time-travel, we can only get here when userspace
+	 * is not executing. Do not notify there and avoid spurious scheduling.
+	 */
+	if (time_travel_mode != TT_MODE_INFCPU &&
+	    time_travel_mode != TT_MODE_EXTERNAL &&
+	    get_current()->mm)
+		os_alarm_process(get_current()->mm->context.id.pid);
+
+	evt->event_handler(evt);
 
 	return IRQ_HANDLED;
 }
 
-static cycle_t itimer_read(struct clocksource *cs)
+static u64 timer_read(struct clocksource *cs)
 {
-	return os_nsecs() / 1000;
+	if (time_travel_mode != TT_MODE_OFF) {
+		/*
+		 * We make reading the timer cost a bit so that we don't get
+		 * stuck in loops that expect time to move more than the
+		 * exact requested sleep amount, e.g. python's socket server,
+		 * see https://bugs.python.org/issue37026.
+		 *
+		 * However, don't do that when we're in interrupt or such as
+		 * then we might recurse into our own processing, and get to
+		 * even more waiting, and that's not good - it messes up the
+		 * "what do I do next" and onstack event we use to know when
+		 * to return from time_travel_update_time().
+		 */
+		if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
+		    !time_travel_ext_waiting)
+			time_travel_update_time_rel(TIMER_MULTIPLIER);
+		return time_travel_time / TIMER_MULTIPLIER;
+	}
+
+	return os_nsecs() / TIMER_MULTIPLIER;
 }
 
-static struct clocksource itimer_clocksource = {
-	.name		= "itimer",
+static struct clocksource timer_clocksource = {
+	.name		= "timer",
 	.rating		= 300,
-	.read		= itimer_read,
+	.read		= timer_read,
 	.mask		= CLOCKSOURCE_MASK(64),
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init setup_itimer(void)
+int um_setup_timer(void)
 {
+	int cpu = raw_smp_processor_id();
+	struct clock_event_device *evt = &timer_clockevent[cpu];
 	int err;
 
-	err = request_irq(TIMER_IRQ, um_timer, 0, "timer", NULL);
+	err = os_timer_create();
+	if (err)
+		return err;
+
+	memcpy(evt, &_timer_clockevent, sizeof(*evt));
+	evt->cpumask = cpumask_of(cpu);
+	clockevents_register_device(evt);
+
+	return 0;
+}
+
+static void __init um_timer_init(void)
+{
+	int err;
+
+	err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
 	if (err != 0)
 		printk(KERN_ERR "register_timer : request_irq failed - "
 		       "errno = %d\n", -err);
 
-	itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32);
-	itimer_clockevent.max_delta_ns =
-		clockevent_delta2ns(60 * HZ, &itimer_clockevent);
-	itimer_clockevent.min_delta_ns =
-		clockevent_delta2ns(1, &itimer_clockevent);
-	err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC);
+	err = um_setup_timer();
+	if (err) {
+		printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
+		return;
+	}
+
+	err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
 	if (err) {
 		printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
 		return;
 	}
-	clockevents_register_device(&itimer_clockevent);
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
-	long long nsecs = os_nsecs();
+	long long nsecs;
+
+	time_travel_set_start();
 
-	set_normalized_timespec(ts, nsecs / NSEC_PER_SEC,
-				nsecs % NSEC_PER_SEC);
+	if (time_travel_mode != TT_MODE_OFF)
+		nsecs = time_travel_start + time_travel_time;
+	else
+		nsecs = os_persistent_clock_emulation();
+
+	set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
+				  nsecs % NSEC_PER_SEC);
 }
 
 void __init time_init(void)
 {
-	timer_init();
-	late_time_init = setup_itimer;
+	timer_set_signal_handler();
+	late_time_init = um_timer_init;
+}
+
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+unsigned long calibrate_delay_is_known(void)
+{
+	if (time_travel_mode == TT_MODE_INFCPU ||
+	    time_travel_mode == TT_MODE_EXTERNAL)
+		return 1;
+	return 0;
+}
+
+static int setup_time_travel(char *str)
+{
+	if (strcmp(str, "=inf-cpu") == 0) {
+		time_travel_mode = TT_MODE_INFCPU;
+		_timer_clockevent.name = "time-travel-timer-infcpu";
+		timer_clocksource.name = "time-travel-clock";
+		return 1;
+	}
+
+	if (strncmp(str, "=ext:", 5) == 0) {
+		time_travel_mode = TT_MODE_EXTERNAL;
+		_timer_clockevent.name = "time-travel-timer-external";
+		timer_clocksource.name = "time-travel-clock-external";
+		return time_travel_connect_external(str + 5);
+	}
+
+	if (!*str) {
+		time_travel_mode = TT_MODE_BASIC;
+		_timer_clockevent.name = "time-travel-timer";
+		timer_clocksource.name = "time-travel-clock";
+		return 1;
+	}
+
+	return -EINVAL;
+}
+
+__setup("time-travel", setup_time_travel);
+__uml_help(setup_time_travel,
+"time-travel\n"
+"    This option just enables basic time travel mode, in which the clock/timers\n"
+"    inside the UML instance skip forward when there's nothing to do, rather than\n"
+"    waiting for real time to elapse. However, instance CPU speed is limited by\n"
+"    the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
+"    clock (but quicker when there's nothing to do).\n"
+"\n"
+"time-travel=inf-cpu\n"
+"    This enables time travel mode with infinite processing power, in which there\n"
+"    are no wall clock timers, and any CPU processing happens - as seen from the\n"
+"    guest - instantly. This can be useful for accurate simulation regardless of\n"
+"    debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
+"    easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
+"\n"
+"time-travel=ext:[ID:]/path/to/socket\n"
+"    This enables time travel mode similar to =inf-cpu, except the system will\n"
+"    use the given socket to coordinate with a central scheduler, in order to\n"
+"    have more than one system simultaneously be on simulated time. The virtio\n"
+"    driver code in UML knows about this so you can also simulate networks and\n"
+"    devices using it, assuming the device has the right capabilities.\n"
+"    The optional ID is a 64-bit integer that's sent to the central scheduler.\n\n");
+
+static int setup_time_travel_start(char *str)
+{
+	int err;
+
+	err = kstrtoull(str, 0, &time_travel_start);
+	if (err)
+		return err;
+
+	time_travel_start_set = 1;
+	return 1;
+}
+
+__setup("time-travel-start=", setup_time_travel_start);
+__uml_help(setup_time_travel_start,
+"time-travel-start=<nanoseconds>\n"
+"    Configure the UML instance's wall clock to start at this value rather than\n"
+"    the host's wall clock at the time of UML boot.\n\n");
+
+static struct kobject *bc_time_kobject;
+
+static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "0x%llx", bc_message);
+}
+
+static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	u64 user_bc_message;
+
+	ret = kstrtou64(buf, 0, &user_bc_message);
+	if (ret)
+		return ret;
+
+	bc_message = user_bc_message;
+
+	time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
+	pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
+	return count;
+}
+
+static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
+
+static int __init um_bc_start(void)
+{
+	if (time_travel_mode != TT_MODE_EXTERNAL)
+		return 0;
+
+	bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
+	if (!bc_time_kobject)
+		return 0;
+
+	if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
+		pr_debug("failed to create the bc file in /sys/kernel/um_time");
+
+	return 0;
 }
+late_initcall(um_bc_start);
+#endif
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 9472079471bb..39608cccf2c6 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -1,212 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/sched.h>
-#include <asm/pgtable.h>
+#include <linux/sched/signal.h>
+
 #include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
 #include <as-layout.h>
 #include <mem_user.h>
 #include <os.h>
 #include <skas.h>
+#include <kern_util.h>
 
-struct host_vm_change {
-	struct host_vm_op {
-		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
-		union {
-			struct {
-				unsigned long addr;
-				unsigned long len;
-				unsigned int prot;
-				int fd;
-				__u64 offset;
-			} mmap;
-			struct {
-				unsigned long addr;
-				unsigned long len;
-			} munmap;
-			struct {
-				unsigned long addr;
-				unsigned long len;
-				unsigned int prot;
-			} mprotect;
-		} u;
-	} ops[1];
-	int index;
-	struct mm_id *id;
-	void *data;
-	int force;
-};
-
-#define INIT_HVC(mm, force) \
-	((struct host_vm_change) \
-	 { .ops		= { { .type = NONE } },	\
-	   .id		= &mm->context.id, \
-       	   .data	= NULL, \
-	   .index	= 0, \
-	   .force	= force })
-
-static int do_ops(struct host_vm_change *hvc, int end,
-		  int finished)
-{
-	struct host_vm_op *op;
-	int i, ret = 0;
-
-	for (i = 0; i < end && !ret; i++) {
-		op = &hvc->ops[i];
-		switch (op->type) {
-		case MMAP:
-			ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
-				  op->u.mmap.prot, op->u.mmap.fd,
-				  op->u.mmap.offset, finished, &hvc->data);
-			break;
-		case MUNMAP:
-			ret = unmap(hvc->id, op->u.munmap.addr,
-				    op->u.munmap.len, finished, &hvc->data);
-			break;
-		case MPROTECT:
-			ret = protect(hvc->id, op->u.mprotect.addr,
-				      op->u.mprotect.len, op->u.mprotect.prot,
-				      finished, &hvc->data);
-			break;
-		default:
-			printk(KERN_ERR "Unknown op type %d in do_ops\n",
-			       op->type);
-			BUG();
-			break;
-		}
-	}
+struct vm_ops {
+	struct mm_id *mm_idp;
 
-	return ret;
-}
+	int (*mmap)(struct mm_id *mm_idp,
+		    unsigned long virt, unsigned long len, int prot,
+		    int phys_fd, unsigned long long offset);
+	int (*unmap)(struct mm_id *mm_idp,
+		     unsigned long virt, unsigned long len);
+};
 
-static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
-		    unsigned int prot, struct host_vm_change *hvc)
+static int kern_map(struct mm_id *mm_idp,
+		    unsigned long virt, unsigned long len, int prot,
+		    int phys_fd, unsigned long long offset)
 {
-	__u64 offset;
-	struct host_vm_op *last;
-	int fd, ret = 0;
-
-	fd = phys_mapping(phys, &offset);
-	if (hvc->index != 0) {
-		last = &hvc->ops[hvc->index - 1];
-		if ((last->type == MMAP) &&
-		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
-		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
-		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
-			last->u.mmap.len += len;
-			return 0;
-		}
-	}
-
-	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
-		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
-		hvc->index = 0;
-	}
-
-	hvc->ops[hvc->index++] = ((struct host_vm_op)
-				  { .type	= MMAP,
-				    .u = { .mmap = { .addr	= virt,
-						     .len	= len,
-						     .prot	= prot,
-						     .fd	= fd,
-						     .offset	= offset }
-			   } });
-	return ret;
+	/* TODO: Why is executable needed to be always set in the kernel? */
+	return os_map_memory((void *)virt, phys_fd, offset, len,
+			     prot & UM_PROT_READ, prot & UM_PROT_WRITE,
+			     1);
 }
 
-static int add_munmap(unsigned long addr, unsigned long len,
-		      struct host_vm_change *hvc)
+static int kern_unmap(struct mm_id *mm_idp,
+		      unsigned long virt, unsigned long len)
 {
-	struct host_vm_op *last;
-	int ret = 0;
-
-	if (hvc->index != 0) {
-		last = &hvc->ops[hvc->index - 1];
-		if ((last->type == MUNMAP) &&
-		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
-			last->u.munmap.len += len;
-			return 0;
-		}
-	}
-
-	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
-		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
-		hvc->index = 0;
-	}
-
-	hvc->ops[hvc->index++] = ((struct host_vm_op)
-				  { .type	= MUNMAP,
-			     	    .u = { .munmap = { .addr	= addr,
-						       .len	= len } } });
-	return ret;
+	return os_unmap_memory((void *)virt, len);
 }
 
-static int add_mprotect(unsigned long addr, unsigned long len,
-			unsigned int prot, struct host_vm_change *hvc)
+void report_enomem(void)
 {
-	struct host_vm_op *last;
-	int ret = 0;
-
-	if (hvc->index != 0) {
-		last = &hvc->ops[hvc->index - 1];
-		if ((last->type == MPROTECT) &&
-		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
-		   (last->u.mprotect.prot == prot)) {
-			last->u.mprotect.len += len;
-			return 0;
-		}
-	}
-
-	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
-		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
-		hvc->index = 0;
-	}
-
-	hvc->ops[hvc->index++] = ((struct host_vm_op)
-				  { .type	= MPROTECT,
-			     	    .u = { .mprotect = { .addr	= addr,
-							 .len	= len,
-							 .prot	= prot } } });
-	return ret;
+	printk(KERN_ERR "UML ran out of memory on the host side! "
+			"This can happen due to a memory limitation or "
+			"vm.max_map_count has been reached.\n");
 }
 
-#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
-
 static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
 				   unsigned long end,
-				   struct host_vm_change *hvc)
+				   struct vm_ops *ops)
 {
 	pte_t *pte;
-	int r, w, x, prot, ret = 0;
+	int ret = 0;
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		if ((addr >= STUB_START) && (addr < STUB_END))
+		if (!pte_needsync(*pte))
 			continue;
 
-		r = pte_read(*pte);
-		w = pte_write(*pte);
-		x = pte_exec(*pte);
-		if (!pte_young(*pte)) {
-			r = 0;
-			w = 0;
-		} else if (!pte_dirty(*pte))
-			w = 0;
-
-		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
-			(x ? UM_PROT_EXEC : 0));
-		if (hvc->force || pte_newpage(*pte)) {
-			if (pte_present(*pte))
-				ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
-					       PAGE_SIZE, prot, hvc);
-			else
-				ret = add_munmap(addr, PAGE_SIZE, hvc);
-		} else if (pte_newprot(*pte))
-			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
+		if (pte_present(*pte)) {
+			__u64 offset;
+			unsigned long phys = pte_val(*pte) & PAGE_MASK;
+			int fd = phys_mapping(phys, &offset);
+			int r, w, x, prot;
+
+			r = pte_read(*pte);
+			w = pte_write(*pte);
+			x = pte_exec(*pte);
+			if (!pte_young(*pte)) {
+				r = 0;
+				w = 0;
+			} else if (!pte_dirty(*pte))
+				w = 0;
+
+			prot = (r ? UM_PROT_READ : 0) |
+			       (w ? UM_PROT_WRITE : 0) |
+			       (x ? UM_PROT_EXEC : 0);
+
+			ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE,
+					prot, fd, offset);
+		} else
+			ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
+
 		*pte = pte_mkuptodate(*pte);
 	} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
 	return ret;
@@ -214,7 +91,7 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
 
 static inline int update_pmd_range(pud_t *pud, unsigned long addr,
 				   unsigned long end,
-				   struct host_vm_change *hvc)
+				   struct vm_ops *ops)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -224,314 +101,125 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
 	do {
 		next = pmd_addr_end(addr, end);
 		if (!pmd_present(*pmd)) {
-			if (hvc->force || pmd_newpage(*pmd)) {
-				ret = add_munmap(addr, next - addr, hvc);
+			if (pmd_needsync(*pmd)) {
+				ret = ops->unmap(ops->mm_idp, addr,
+						 next - addr);
 				pmd_mkuptodate(*pmd);
 			}
 		}
-		else ret = update_pte_range(pmd, addr, next, hvc);
+		else ret = update_pte_range(pmd, addr, next, ops);
 	} while (pmd++, addr = next, ((addr < end) && !ret));
 	return ret;
 }
 
-static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
 				   unsigned long end,
-				   struct host_vm_change *hvc)
+				   struct vm_ops *ops)
 {
 	pud_t *pud;
 	unsigned long next;
 	int ret = 0;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (!pud_present(*pud)) {
-			if (hvc->force || pud_newpage(*pud)) {
-				ret = add_munmap(addr, next - addr, hvc);
+			if (pud_needsync(*pud)) {
+				ret = ops->unmap(ops->mm_idp, addr,
+						 next - addr);
 				pud_mkuptodate(*pud);
 			}
 		}
-		else ret = update_pmd_range(pud, addr, next, hvc);
+		else ret = update_pmd_range(pud, addr, next, ops);
 	} while (pud++, addr = next, ((addr < end) && !ret));
 	return ret;
 }
 
-void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
-		      unsigned long end_addr, int force)
+static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
+				   unsigned long end,
+				   struct vm_ops *ops)
 {
-	pgd_t *pgd;
-	struct host_vm_change hvc;
-	unsigned long addr = start_addr, next;
+	p4d_t *p4d;
+	unsigned long next;
 	int ret = 0;
 
-	hvc = INIT_HVC(mm, force);
-	pgd = pgd_offset(mm, addr);
+	p4d = p4d_offset(pgd, addr);
 	do {
-		next = pgd_addr_end(addr, end_addr);
-		if (!pgd_present(*pgd)) {
-			if (force || pgd_newpage(*pgd)) {
-				ret = add_munmap(addr, next - addr, &hvc);
-				pgd_mkuptodate(*pgd);
+		next = p4d_addr_end(addr, end);
+		if (!p4d_present(*p4d)) {
+			if (p4d_needsync(*p4d)) {
+				ret = ops->unmap(ops->mm_idp, addr,
+						 next - addr);
+				p4d_mkuptodate(*p4d);
 			}
-		}
-		else ret = update_pud_range(pgd, addr, next, &hvc);
-	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
-
-	if (!ret)
-		ret = do_ops(&hvc, hvc.index, 1);
-
-	/* This is not an else because ret is modified above */
-	if (ret) {
-		printk(KERN_ERR "fix_range_common: failed, killing current "
-		       "process\n");
-		force_sig(SIGKILL, current);
-	}
+		} else
+			ret = update_pud_range(p4d, addr, next, ops);
+	} while (p4d++, addr = next, ((addr < end) && !ret));
+	return ret;
 }
 
-static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
+int um_tlb_sync(struct mm_struct *mm)
 {
-	struct mm_struct *mm;
 	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long addr, last;
-	int updated = 0, err;
-
-	mm = &init_mm;
-	for (addr = start; addr < end;) {
-		pgd = pgd_offset(mm, addr);
-		if (!pgd_present(*pgd)) {
-			last = ADD_ROUND(addr, PGDIR_SIZE);
-			if (last > end)
-				last = end;
-			if (pgd_newpage(*pgd)) {
-				updated = 1;
-				err = os_unmap_memory((void *) addr,
-						      last - addr);
-				if (err < 0)
-					panic("munmap failed, errno = %d\n",
-					      -err);
-			}
-			addr = last;
-			continue;
-		}
-
-		pud = pud_offset(pgd, addr);
-		if (!pud_present(*pud)) {
-			last = ADD_ROUND(addr, PUD_SIZE);
-			if (last > end)
-				last = end;
-			if (pud_newpage(*pud)) {
-				updated = 1;
-				err = os_unmap_memory((void *) addr,
-						      last - addr);
-				if (err < 0)
-					panic("munmap failed, errno = %d\n",
-					      -err);
-			}
-			addr = last;
-			continue;
-		}
-
-		pmd = pmd_offset(pud, addr);
-		if (!pmd_present(*pmd)) {
-			last = ADD_ROUND(addr, PMD_SIZE);
-			if (last > end)
-				last = end;
-			if (pmd_newpage(*pmd)) {
-				updated = 1;
-				err = os_unmap_memory((void *) addr,
-						      last - addr);
-				if (err < 0)
-					panic("munmap failed, errno = %d\n",
-					      -err);
-			}
-			addr = last;
-			continue;
-		}
-
-		pte = pte_offset_kernel(pmd, addr);
-		if (!pte_present(*pte) || pte_newpage(*pte)) {
-			updated = 1;
-			err = os_unmap_memory((void *) addr,
-					      PAGE_SIZE);
-			if (err < 0)
-				panic("munmap failed, errno = %d\n",
-				      -err);
-			if (pte_present(*pte))
-				map_memory(addr,
-					   pte_val(*pte) & PAGE_MASK,
-					   PAGE_SIZE, 1, 1, 1);
-		}
-		else if (pte_newprot(*pte)) {
-			updated = 1;
-			os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
-		}
-		addr += PAGE_SIZE;
-	}
-	return updated;
-}
+	struct vm_ops ops;
+	unsigned long addr, next;
+	int ret = 0;
 
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	struct mm_struct *mm = vma->vm_mm;
-	void *flush = NULL;
-	int r, w, x, prot, err = 0;
-	struct mm_id *mm_id;
-
-	address &= PAGE_MASK;
-	pgd = pgd_offset(mm, address);
-	if (!pgd_present(*pgd))
-		goto kill;
-
-	pud = pud_offset(pgd, address);
-	if (!pud_present(*pud))
-		goto kill;
-
-	pmd = pmd_offset(pud, address);
-	if (!pmd_present(*pmd))
-		goto kill;
-
-	pte = pte_offset_kernel(pmd, address);
-
-	r = pte_read(*pte);
-	w = pte_write(*pte);
-	x = pte_exec(*pte);
-	if (!pte_young(*pte)) {
-		r = 0;
-		w = 0;
-	} else if (!pte_dirty(*pte)) {
-		w = 0;
-	}
+	guard(spinlock_irqsave)(&mm->context.sync_tlb_lock);
 
-	mm_id = &mm->context.id;
-	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
-		(x ? UM_PROT_EXEC : 0));
-	if (pte_newpage(*pte)) {
-		if (pte_present(*pte)) {
-			unsigned long long offset;
-			int fd;
+	if (mm->context.sync_tlb_range_to == 0)
+		return 0;
 
-			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
-			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
-				  1, &flush);
-		}
-		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
+	ops.mm_idp = &mm->context.id;
+	if (mm == &init_mm) {
+		ops.mmap = kern_map;
+		ops.unmap = kern_unmap;
+	} else {
+		ops.mmap = map;
+		ops.unmap = unmap;
 	}
-	else if (pte_newprot(*pte))
-		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
-
-	if (err)
-		goto kill;
 
-	*pte = pte_mkuptodate(*pte);
-
-	return;
-
-kill:
-	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
-	force_sig(SIGKILL, current);
-}
-
-pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
-{
-	return pgd_offset(mm, address);
-}
-
-pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
-{
-	return pud_offset(pgd, address);
-}
-
-pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
-{
-	return pmd_offset(pud, address);
-}
+	addr = mm->context.sync_tlb_range_from;
+	pgd = pgd_offset(mm, addr);
+	do {
+		next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
+		if (!pgd_present(*pgd)) {
+			if (pgd_needsync(*pgd)) {
+				ret = ops.unmap(ops.mm_idp, addr,
+						next - addr);
+				pgd_mkuptodate(*pgd);
+			}
+		} else
+			ret = update_p4d_range(pgd, addr, next, &ops);
+	} while (pgd++, addr = next,
+		 ((addr < mm->context.sync_tlb_range_to) && !ret));
 
-pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
-{
-	return pte_offset_kernel(pmd, address);
-}
+	if (ret == -ENOMEM)
+		report_enomem();
 
-pte_t *addr_pte(struct task_struct *task, unsigned long addr)
-{
-	pgd_t *pgd = pgd_offset(task->mm, addr);
-	pud_t *pud = pud_offset(pgd, addr);
-	pmd_t *pmd = pmd_offset(pud, addr);
+	mm->context.sync_tlb_range_from = 0;
+	mm->context.sync_tlb_range_to = 0;
 
-	return pte_offset_map(pmd, addr);
+	return ret;
 }
 
 void flush_tlb_all(void)
 {
-	flush_tlb_mm(current->mm);
-}
-
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-	flush_tlb_kernel_range_common(start, end);
-}
-
-void flush_tlb_kernel_vm(void)
-{
-	flush_tlb_kernel_range_common(start_vm, end_vm);
-}
-
-void __flush_tlb_one(unsigned long addr)
-{
-	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
-}
-
-static void fix_range(struct mm_struct *mm, unsigned long start_addr,
-		      unsigned long end_addr, int force)
-{
-	fix_range_common(mm, start_addr, end_addr, force);
-}
-
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-		     unsigned long end)
-{
-	if (vma->vm_mm == NULL)
-		flush_tlb_kernel_range_common(start, end);
-	else fix_range(vma->vm_mm, start, end, 0);
-}
-EXPORT_SYMBOL(flush_tlb_range);
-
-void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-			unsigned long end)
-{
 	/*
 	 * Don't bother flushing if this address space is about to be
 	 * destroyed.
 	 */
-	if (atomic_read(&mm->mm_users) == 0)
+	if (atomic_read(&current->mm->mm_users) == 0)
 		return;
 
-	fix_range(mm, start, end, 0);
+	flush_tlb_mm(current->mm);
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	struct vm_area_struct *vma = mm->mmap;
-
-	while (vma != NULL) {
-		fix_range(mm, vma->vm_start, vma->vm_end, 0);
-		vma = vma->vm_next;
-	}
-}
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
-void force_flush_all(void)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma = mm->mmap;
-
-	while (vma != NULL) {
-		fix_range(mm, vma->vm_start, vma->vm_end, 1);
-		vma = vma->vm_next;
-	}
+	for_each_vma(vmi, vma)
+		um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end);
 }
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 089f3987e273..177615820a4c 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -1,14 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/hardirq.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/sched/debug.h>
 #include <asm/current.h>
-#include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <arch.h>
 #include <as-layout.h>
@@ -17,7 +18,123 @@
 #include <skas.h>
 
 /*
- * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
+ * NOTE: UML does not have exception tables. As such, this is almost a copy
+ * of the code in mm/memory.c, only adjusting the logic to simply check whether
+ * we are coming from the kernel instead of doing an additional lookup in the
+ * exception table.
+ * We can do this simplification because we never get here if the exception was
+ * fixable.
+ */
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+	if (likely(mmap_read_trylock(mm)))
+		return true;
+
+	if (!is_user)
+		return false;
+
+	return !mmap_read_lock_killable(mm);
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+	/*
+	 * We don't have this operation yet.
+	 *
+	 * It should be easy enough to do: it's basically a
+	 *    atomic_long_try_cmpxchg_acquire()
+	 * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+	 * it also needs the proper lockdep magic etc.
+	 */
+	return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+	mmap_read_unlock(mm);
+	if (!is_user)
+		return false;
+
+	return !mmap_write_lock_killable(mm);
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+static struct vm_area_struct *
+um_lock_mm_and_find_vma(struct mm_struct *mm,
+			unsigned long addr, bool is_user)
+{
+	struct vm_area_struct *vma;
+
+	if (!get_mmap_lock_carefully(mm, is_user))
+		return NULL;
+
+	vma = find_vma(mm, addr);
+	if (likely(vma && (vma->vm_start <= addr)))
+		return vma;
+
+	/*
+	 * Well, dang. We might still be successful, but only
+	 * if we can extend a vma to do so.
+	 */
+	if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+		mmap_read_unlock(mm);
+		return NULL;
+	}
+
+	/*
+	 * We can try to upgrade the mmap lock atomically,
+	 * in which case we can continue to use the vma
+	 * we already looked up.
+	 *
+	 * Otherwise we'll have to drop the mmap lock and
+	 * re-take it, and also look up the vma again,
+	 * re-checking it.
+	 */
+	if (!mmap_upgrade_trylock(mm)) {
+		if (!upgrade_mmap_lock_carefully(mm, is_user))
+			return NULL;
+
+		vma = find_vma(mm, addr);
+		if (!vma)
+			goto fail;
+		if (vma->vm_start <= addr)
+			goto success;
+		if (!(vma->vm_flags & VM_GROWSDOWN))
+			goto fail;
+	}
+
+	if (expand_stack_locked(vma, addr))
+		goto fail;
+
+success:
+	mmap_write_downgrade(mm);
+	return vma;
+
+fail:
+	mmap_write_unlock(mm);
+	return NULL;
+}
+
+/*
+ * Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by
  * segv().
  */
 int handle_page_fault(unsigned long address, unsigned long ip,
@@ -25,79 +142,68 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	pgd_t *pgd;
-	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	int err = -EFAULT;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 (is_write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_DEFAULT;
 
 	*code_out = SEGV_MAPERR;
 
 	/*
-	 * If the fault was during atomic operation, don't take the fault, just
+	 * If the fault was with pagefaults disabled, don't take the fault, just
 	 * fail.
 	 */
-	if (in_atomic())
+	if (faulthandler_disabled())
 		goto out_nosemaphore;
 
+	if (is_user)
+		flags |= FAULT_FLAG_USER;
 retry:
-	down_read(&mm->mmap_sem);
-	vma = find_vma(mm, address);
+	vma = um_lock_mm_and_find_vma(mm, address, is_user);
 	if (!vma)
-		goto out;
-	else if (vma->vm_start <= address)
-		goto good_area;
-	else if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto out;
-	else if (is_user && !ARCH_IS_STACKGROW(address))
-		goto out;
-	else if (expand_stack(vma, address))
-		goto out;
+		goto out_nosemaphore;
 
-good_area:
 	*code_out = SEGV_ACCERR;
-	if (is_write && !(vma->vm_flags & VM_WRITE))
-		goto out;
-
-	/* Don't require VM_READ|VM_EXEC for write faults! */
-	if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
-		goto out;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto out;
+		flags |= FAULT_FLAG_WRITE;
+	} else {
+		/* Don't require VM_READ|VM_EXEC for write faults! */
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto out;
+	}
 
 	do {
-		int fault;
+		vm_fault_t fault;
 
-		fault = handle_mm_fault(mm, vma, address, flags);
+		fault = handle_mm_fault(vma, address, flags, NULL);
 
 		if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 			goto out_nosemaphore;
 
+		/* The fault is fully completed (including releasing mmap lock) */
+		if (fault & VM_FAULT_COMPLETED)
+			return 0;
+
 		if (unlikely(fault & VM_FAULT_ERROR)) {
 			if (fault & VM_FAULT_OOM) {
 				goto out_of_memory;
+			} else if (fault & VM_FAULT_SIGSEGV) {
+				goto out;
 			} else if (fault & VM_FAULT_SIGBUS) {
 				err = -EACCES;
 				goto out;
 			}
 			BUG();
 		}
-		if (flags & FAULT_FLAG_ALLOW_RETRY) {
-			if (fault & VM_FAULT_MAJOR)
-				current->maj_flt++;
-			else
-				current->min_flt++;
-			if (fault & VM_FAULT_RETRY) {
-				flags &= ~FAULT_FLAG_ALLOW_RETRY;
-				flags |= FAULT_FLAG_TRIED;
-
-				goto retry;
-			}
+		if (fault & VM_FAULT_RETRY) {
+			flags |= FAULT_FLAG_TRIED;
+
+			goto retry;
 		}
 
-		pgd = pgd_offset(mm, address);
-		pud = pud_offset(pgd, address);
-		pmd = pmd_offset(pud, address);
+		pmd = pmd_off(mm, address);
 		pte = pte_offset_kernel(pmd, address);
 	} while (!pte_present(*pte));
 	err = 0;
@@ -112,9 +218,9 @@ good_area:
 #if 0
 	WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
 #endif
-	flush_tlb_page(vma, address);
+
 out:
-	up_read(&mm->mmap_sem);
+	mmap_read_unlock(mm);
 out_nosemaphore:
 	return err;
 
@@ -123,11 +229,12 @@ out_of_memory:
 	 * We ran out of memory, call the OOM killer, and return the userspace
 	 * (which will retry the fault, or kill us if we got oom-killed).
 	 */
-	up_read(&mm->mmap_sem);
+	mmap_read_unlock(mm);
+	if (!is_user)
+		goto out_nosemaphore;
 	pagefault_out_of_memory();
 	return 0;
 }
-EXPORT_SYMBOL(handle_page_fault);
 
 static void show_segv_info(struct uml_pt_regs *regs)
 {
@@ -140,7 +247,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
 	if (!printk_ratelimit())
 		return;
 
-	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
 		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 		tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
 		(void *)UPT_IP(regs), (void *)UPT_SP(regs),
@@ -152,19 +259,14 @@ static void show_segv_info(struct uml_pt_regs *regs)
 
 static void bad_segv(struct faultinfo fi, unsigned long ip)
 {
-	struct siginfo si;
-
-	si.si_signo = SIGSEGV;
-	si.si_code = SEGV_ACCERR;
-	si.si_addr = (void __user *) FAULT_ADDRESS(fi);
 	current->thread.arch.faultinfo = fi;
-	force_sig_info(SIGSEGV, &si, current);
+	force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi));
 }
 
 void fatal_sigsegv(void)
 {
-	force_sigsegv(SIGSEGV, current);
-	do_signal();
+	force_fatal_sig(SIGSEGV);
+	do_signal(&current->thread.regs);
 	/*
 	 * This is to tell gcc that we're not returning - do_signal
 	 * can, in general, return, but in this case, it's not, since
@@ -173,7 +275,19 @@ void fatal_sigsegv(void)
 	os_dump_core();
 }
 
-void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+/**
+ * segv_handler() - the SIGSEGV handler
+ * @sig:	the signal number
+ * @unused_si:	the signal info struct; unused in this handler
+ * @regs:	the ptrace register information
+ * @mc:		the mcontext of the signal
+ *
+ * The handler first extracts the faultinfo from the UML ptrace regs struct.
+ * If the userfault did not happen in an UML userspace process, bad_segv is called.
+ * Otherwise the signal did happen in a cloned userspace process, handle it.
+ */
+void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+		  void *mc)
 {
 	struct faultinfo * fi = UPT_FAULTINFO(regs);
 
@@ -182,7 +296,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 		bad_segv(*fi, UPT_IP(regs));
 		return;
 	}
-	segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
+	segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs, mc);
 }
 
 /*
@@ -192,26 +306,55 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
  * give us bad data!
  */
 unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
-		   struct uml_pt_regs *regs)
+		   struct uml_pt_regs *regs, void *mc)
 {
-	struct siginfo si;
-	jmp_buf *catcher;
+	int si_code;
 	int err;
 	int is_write = FAULT_WRITE(fi);
 	unsigned long address = FAULT_ADDRESS(fi);
 
-	if (!is_user && (address >= start_vm) && (address < end_vm)) {
-		flush_tlb_kernel_vm();
-		return 0;
+	if (!is_user && regs)
+		current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
+
+	if (!is_user && address >= start_vm && address < end_vm) {
+		/*
+		 * Kernel has pending updates from set_ptes that were not
+		 * flushed yet. Syncing them should fix the pagefault (if not
+		 * we'll get here again and panic).
+		 */
+		err = um_tlb_sync(&init_mm);
+		if (err == -ENOMEM)
+			report_enomem();
+		if (err)
+			panic("Failed to sync kernel TLBs: %d", err);
+		goto out;
+	}
+	else if (current->pagefault_disabled) {
+		if (!mc) {
+			show_regs(container_of(regs, struct pt_regs, regs));
+			panic("Segfault with pagefaults disabled but no mcontext");
+		}
+		if (!current->thread.segv_continue) {
+			show_regs(container_of(regs, struct pt_regs, regs));
+			panic("Segfault without recovery target");
+		}
+		mc_set_rip(mc, current->thread.segv_continue);
+		current->thread.segv_continue = NULL;
+		goto out;
 	}
 	else if (current->mm == NULL) {
 		show_regs(container_of(regs, struct pt_regs, regs));
 		panic("Segfault with no mm");
 	}
+	else if (!is_user && address > PAGE_SIZE && address < TASK_SIZE) {
+		show_regs(container_of(regs, struct pt_regs, regs));
+		panic("Kernel tried to access user memory at addr 0x%lx, ip 0x%lx",
+		       address, ip);
+	}
 
-	if (SEGV_IS_FIXABLE(&fi) || SEGV_MAYBE_FIXABLE(&fi))
+	if (SEGV_IS_FIXABLE(&fi))
 		err = handle_page_fault(address, ip, is_write, is_user,
-					&si.si_code);
+					&si_code);
 	else {
 		err = -EFAULT;
 		/*
@@ -222,17 +365,10 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 		address = 0;
 	}
 
-	catcher = current->thread.fault_catcher;
 	if (!err)
-		return 0;
-	else if (catcher != NULL) {
-		current->thread.fault_addr = (void *) address;
-		UML_LONGJMP(catcher, 1);
-	}
-	else if (current->thread.fault_addr != NULL)
-		panic("fault_addr set but no fault catcher");
+		goto out;
 	else if (!is_user && arch_fixup(ip, regs))
-		return 0;
+		goto out;
 
 	if (!is_user) {
 		show_regs(container_of(regs, struct pt_regs, regs));
@@ -243,27 +379,25 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 	show_segv_info(regs);
 
 	if (err == -EACCES) {
-		si.si_signo = SIGBUS;
-		si.si_errno = 0;
-		si.si_code = BUS_ADRERR;
-		si.si_addr = (void __user *)address;
 		current->thread.arch.faultinfo = fi;
-		force_sig_info(SIGBUS, &si, current);
+		force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
 	} else {
 		BUG_ON(err != -EFAULT);
-		si.si_signo = SIGSEGV;
-		si.si_addr = (void __user *) address;
 		current->thread.arch.faultinfo = fi;
-		force_sig_info(SIGSEGV, &si, current);
+		force_sig_fault(SIGSEGV, si_code, (void __user *) address);
 	}
+
+out:
+	if (regs)
+		current->thread.segv_regs = NULL;
+
 	return 0;
 }
 
-void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
+void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs,
+		  void *mc)
 {
-	struct faultinfo *fi;
-	struct siginfo clean_si;
-
+	int code, err;
 	if (!UPT_IS_USER(regs)) {
 		if (sig == SIGBUS)
 			printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
@@ -273,44 +407,24 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
 
 	arch_examine_signal(sig, regs);
 
-	memset(&clean_si, 0, sizeof(clean_si));
-	clean_si.si_signo = si->si_signo;
-	clean_si.si_errno = si->si_errno;
-	clean_si.si_code = si->si_code;
-	switch (sig) {
-	case SIGILL:
-	case SIGFPE:
-	case SIGSEGV:
-	case SIGBUS:
-	case SIGTRAP:
-		fi = UPT_FAULTINFO(regs);
-		clean_si.si_addr = (void __user *) FAULT_ADDRESS(*fi);
+	/* Is the signal layout for the signal known?
+	 * Signal data must be scrubbed to prevent information leaks.
+	 */
+	code = si->si_code;
+	err = si->si_errno;
+	if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) {
+		struct faultinfo *fi = UPT_FAULTINFO(regs);
 		current->thread.arch.faultinfo = *fi;
-#ifdef __ARCH_SI_TRAPNO
-		clean_si.si_trapno = si->si_trapno;
-#endif
-		break;
-	default:
-		printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d)\n",
-			sig, si->si_code);
+		force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi));
+	} else {
+		printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n",
+		       sig, code, err);
+		force_sig(sig);
 	}
-
-	force_sig_info(sig, &clean_si, current);
-}
-
-void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
-{
-	if (current->thread.fault_catcher != NULL)
-		UML_LONGJMP(current->thread.fault_catcher, 1);
-	else
-		relay_signal(sig, si, regs);
 }
 
-void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
+	   void *mc)
 {
 	do_IRQ(WINCH_IRQ, regs);
 }
-
-void trap_init(void)
-{
-}
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 87df5e3acc26..e2b24e1ecfa6 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -1,19 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
+#include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/ctype.h>
 #include <linux/module.h>
+#include <linux/panic_notifier.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
+#include <linux/string_choices.h>
 #include <linux/utsname.h>
 #include <linux/sched.h>
-#include <asm/pgtable.h>
+#include <linux/sched/task.h>
+#include <linux/kmsg_dump.h>
+#include <linux/suspend.h>
+#include <linux/random.h>
+#include <linux/smp-internal.h>
+
 #include <asm/processor.h>
+#include <asm/cpufeature.h>
+#include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/text-patching.h>
 #include <as-layout.h>
 #include <arch.h>
 #include <init.h>
@@ -22,7 +34,10 @@
 #include <mem_user.h>
 #include <os.h>
 
-#define DEFAULT_COMMAND_LINE "root=98:0"
+#include "um_arch.h"
+
+#define DEFAULT_COMMAND_LINE_ROOT "root=98:0"
+#define DEFAULT_COMMAND_LINE_CONSOLE "console=tty0"
 
 /* Changed in add_arg and setup_arch, which run before SMP is started */
 static char __initdata command_line[COMMAND_LINE_SIZE] = { 0 };
@@ -30,7 +45,7 @@ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0 };
 static void __init add_arg(char *arg)
 {
 	if (strlen(command_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) {
-		printf("add_arg: Too many command line arguments!\n");
+		os_warn("add_arg: Too many command line arguments!\n");
 		exit(1);
 	}
 	if (strlen(command_line) > 0)
@@ -40,43 +55,42 @@ static void __init add_arg(char *arg)
 
 /*
  * These fields are initialized at boot time and not changed.
- * XXX This structure is used only in the non-SMP case.  Maybe this
- * should be moved to smp.c.
  */
 struct cpuinfo_um boot_cpu_data = {
 	.loops_per_jiffy	= 0,
-	.ipi_pipe		= { -1, -1 }
+	.cache_alignment	= L1_CACHE_BYTES,
+	.x86_capability		= { 0 }
 };
 
-union thread_union cpu0_irqstack
-	__attribute__((__section__(".data..init_irqstack"))) =
-		{ INIT_THREAD_INFO(init_task) };
+EXPORT_SYMBOL(boot_cpu_data);
 
-unsigned long thread_saved_pc(struct task_struct *task)
-{
-	/* FIXME: Need to look up userspace_pid by cpu */
-	return os_process_pc(userspace_pid[0]);
-}
 
 /* Changed in setup_arch, which is called in early boot */
 static char host_info[(__NEW_UTS_LEN + 1) * 5];
 
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
-	int index = 0;
+	int i = 0;
 
-#ifdef CONFIG_SMP
-	index = (struct cpuinfo_um *) v - cpu_data;
-	if (!cpu_online(index))
+#if IS_ENABLED(CONFIG_SMP)
+	i = (uintptr_t) v - 1;
+	if (!cpu_online(i))
 		return 0;
 #endif
 
-	seq_printf(m, "processor\t: %d\n", index);
+	seq_printf(m, "processor\t: %d\n", i);
 	seq_printf(m, "vendor_id\t: User Mode Linux\n");
 	seq_printf(m, "model name\t: UML\n");
 	seq_printf(m, "mode\t\t: skas\n");
 	seq_printf(m, "host\t\t: %s\n", host_info);
-	seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+	seq_printf(m, "fpu\t\t: %s\n", str_yes_no(cpu_has(&boot_cpu_data, X86_FEATURE_FPU)));
+	seq_printf(m, "flags\t\t:");
+	for (i = 0; i < 32*NCAPINTS; i++)
+		if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL))
+			seq_printf(m, " %s", x86_cap_flags[i]);
+	seq_printf(m, "\n");
+	seq_printf(m, "cache_alignment\t: %d\n", boot_cpu_data.cache_alignment);
+	seq_printf(m, "bogomips\t: %lu.%02lu\n",
 		   loops_per_jiffy/(500000/HZ),
 		   (loops_per_jiffy/(5000/HZ)) % 100);
 
@@ -85,7 +99,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+	if (*pos < nr_cpu_ids)
+		return (void *)(uintptr_t)(*pos + 1);
+	return NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
@@ -113,14 +129,13 @@ unsigned long uml_reserved; /* Also modified in mem_init */
 unsigned long start_vm;
 unsigned long end_vm;
 
-/* Set in uml_ncpus_setup */
-int ncpus = 1;
-
 /* Set in early boot */
-static int have_root __initdata = 0;
+static int have_root __initdata;
+static int have_console __initdata;
 
 /* Set in uml_mem_setup and modified in linux_main */
-long long physmem_size = 32 * 1024 * 1024;
+unsigned long long physmem_size = 64 * 1024 * 1024;
+EXPORT_SYMBOL(physmem_size);
 
 static const char *usage_string =
 "User Mode Linux v%s\n"
@@ -128,6 +143,7 @@ static const char *usage_string =
 
 static int __init uml_version_setup(char *line, int *add)
 {
+	/* Explicitly use printf() to show version in stdout */
 	printf("%s\n", init_utsname()->release);
 	exit(0);
 
@@ -154,42 +170,24 @@ __uml_setup("root=", uml_root_setup,
 "        root=/dev/ubd5\n\n"
 );
 
-static int __init no_skas_debug_setup(char *line, int *add)
+static int __init uml_console_setup(char *line, int *add)
 {
-	printf("'debug' is not necessary to gdb UML in skas mode - run \n");
-	printf("'gdb linux'\n");
-
+	have_console = 1;
 	return 0;
 }
 
-__uml_setup("debug", no_skas_debug_setup,
-"debug\n"
-"    this flag is not needed to run gdb on UML in skas mode\n\n"
+__uml_setup("console=", uml_console_setup,
+"console=<preferred console>\n"
+"    Specify the preferred console output driver\n\n"
 );
 
-#ifdef CONFIG_SMP
-static int __init uml_ncpus_setup(char *line, int *add)
-{
-	if (!sscanf(line, "%d", &ncpus)) {
-		printf("Couldn't parse [%s]\n", line);
-		return -1;
-	}
-
-	return 0;
-}
-
-__uml_setup("ncpus=", uml_ncpus_setup,
-"ncpus=<# of desired CPUs>\n"
-"    This tells an SMP kernel how many virtual processors to start.\n\n"
-);
-#endif
-
 static int __init Usage(char *line, int *add)
 {
 	const char **p;
 
 	printf(usage_string, init_utsname()->release);
 	p = &__uml_help_start;
+	/* Explicitly use printf() to show help in stdout */
 	while (p < &__uml_help_end) {
 		printf("%s", *p);
 		p++;
@@ -233,42 +231,86 @@ static void __init uml_postsetup(void)
 static int panic_exit(struct notifier_block *self, unsigned long unused1,
 		      void *unused2)
 {
+	kmsg_dump(KMSG_DUMP_PANIC);
 	bust_spinlocks(1);
-	show_regs(&(current->thread.regs));
 	bust_spinlocks(0);
 	uml_exitcode = 1;
 	os_dump_core();
-	return 0;
+
+	return NOTIFY_DONE;
 }
 
 static struct notifier_block panic_exit_notifier = {
-	.notifier_call 		= panic_exit,
-	.next 			= NULL,
-	.priority 		= 0
+	.notifier_call	= panic_exit,
+	.priority	= INT_MAX - 1, /* run as 2nd notifier, won't return */
 };
 
+void uml_finishsetup(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &panic_exit_notifier);
+
+	uml_postsetup();
+
+	new_thread_handler();
+}
+
 /* Set during early boot */
+unsigned long stub_start;
 unsigned long task_size;
 EXPORT_SYMBOL(task_size);
 
-unsigned long host_task_size;
-
 unsigned long brk_start;
-unsigned long end_iomem;
-EXPORT_SYMBOL(end_iomem);
 
 #define MIN_VMALLOC (32 * 1024 * 1024)
 
-extern char __binary_start;
+static void __init parse_host_cpu_flags(char *line)
+{
+	int i;
+	for (i = 0; i < 32*NCAPINTS; i++) {
+		if ((x86_cap_flags[i] != NULL) && strstr(line, x86_cap_flags[i]))
+			set_cpu_cap(&boot_cpu_data, i);
+	}
+}
+
+static void __init parse_cache_line(char *line)
+{
+	long res;
+	char *to_parse = strstr(line, ":");
+	if (to_parse) {
+		to_parse++;
+		while (*to_parse != 0 && isspace(*to_parse)) {
+			to_parse++;
+		}
+		if (kstrtoul(to_parse, 10, &res) == 0 && is_power_of_2(res))
+			boot_cpu_data.cache_alignment = res;
+		else
+			boot_cpu_data.cache_alignment = L1_CACHE_BYTES;
+	}
+}
 
-int __init linux_main(int argc, char **argv)
+static unsigned long __init get_top_address(char **envp)
+{
+	unsigned long top_addr = (unsigned long) &top_addr;
+	int i;
+
+	/* The earliest variable should be after the program name in ELF */
+	for (i = 0; envp[i]; i++) {
+		if ((unsigned long) envp[i] > top_addr)
+			top_addr = (unsigned long) envp[i];
+	}
+
+	return PAGE_ALIGN(top_addr + 1);
+}
+
+int __init linux_main(int argc, char **argv, char **envp)
 {
 	unsigned long avail, diff;
 	unsigned long virtmem_size, max_physmem;
+	unsigned long host_task_size;
 	unsigned long stack;
 	unsigned int i;
 	int add;
-	char * mode;
 
 	for (i = 1; i < argc; i++) {
 		if ((i == 1) && (argv[i][0] == ' '))
@@ -279,26 +321,31 @@ int __init linux_main(int argc, char **argv)
 			add_arg(argv[i]);
 	}
 	if (have_root == 0)
-		add_arg(DEFAULT_COMMAND_LINE);
+		add_arg(DEFAULT_COMMAND_LINE_ROOT);
+
+	if (have_console == 0)
+		add_arg(DEFAULT_COMMAND_LINE_CONSOLE);
+
+	host_task_size = get_top_address(envp);
+	/* reserve a few pages for the stubs */
+	stub_start = host_task_size - STUB_SIZE;
+	host_task_size = stub_start;
+
+	/* Limit TASK_SIZE to what is addressable by the page table */
+	task_size = host_task_size;
+	if (task_size > (unsigned long long) PTRS_PER_PGD * PGDIR_SIZE)
+		task_size = PTRS_PER_PGD * PGDIR_SIZE;
 
-	host_task_size = os_get_top_address();
 	/*
 	 * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps
 	 * out
 	 */
-	task_size = host_task_size & PGDIR_MASK;
+	task_size = task_size & PGDIR_MASK;
 
 	/* OS sanity checks that need to happen before the kernel runs */
 	os_early_checks();
 
-	can_do_skas();
-
-	if (proc_mm && ptrace_faultinfo)
-		mode = "SKAS3";
-	else
-		mode = "SKAS0";
-
-	printf("UML running in %s mode\n", mode);
+	get_host_cpu_features(parse_host_cpu_flags, parse_cache_line);
 
 	brk_start = (unsigned long) sbrk(0);
 
@@ -307,54 +354,32 @@ int __init linux_main(int argc, char **argv)
 	 * so they actually get what they asked for. This should
 	 * add zero for non-exec shield users
 	 */
-
-	diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
+	diff = PAGE_ALIGN(brk_start) - PAGE_ALIGN((unsigned long) &_end);
 	if (diff > 1024 * 1024) {
-		printf("Adding %ld bytes to physical memory to account for "
-		       "exec-shield gap\n", diff);
-		physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
+		os_info("Adding %ld bytes to physical memory to account for "
+			"exec-shield gap\n", diff);
+		physmem_size += diff;
 	}
 
-	uml_physmem = (unsigned long) &__binary_start & PAGE_MASK;
+	uml_physmem = (unsigned long) __binary_start & PAGE_MASK;
 
 	/* Reserve up to 4M after the current brk */
 	uml_reserved = ROUND_4M(brk_start) + (1 << 22);
 
 	setup_machinename(init_utsname()->machine);
 
-	highmem = 0;
-	iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
-	max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC;
-
-	/*
-	 * Zones have to begin on a 1 << MAX_ORDER page boundary,
-	 * so this makes sure that's true for highmem
-	 */
-	max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1);
-	if (physmem_size + iomem_size > max_physmem) {
-		highmem = physmem_size + iomem_size - max_physmem;
-		physmem_size -= highmem;
-#ifndef CONFIG_HIGHMEM
-		highmem = 0;
-		printf("CONFIG_HIGHMEM not enabled - physical memory shrunk "
-		       "to %Lu bytes\n", physmem_size);
-#endif
+	physmem_size = PAGE_ALIGN(physmem_size);
+	max_physmem = TASK_SIZE - uml_physmem - MIN_VMALLOC;
+	if (physmem_size > max_physmem) {
+		physmem_size = max_physmem;
+		os_info("Physical memory size shrunk to %llu bytes\n",
+			physmem_size);
 	}
 
 	high_physmem = uml_physmem + physmem_size;
-	end_iomem = high_physmem + iomem_size;
-	high_memory = (void *) end_iomem;
 
 	start_vm = VMALLOC_START;
 
-	setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
-	if (init_maps(physmem_size, iomem_size, highmem)) {
-		printf("Failed to allocate mem_map for %Lu bytes of physical "
-		       "memory and %Lu bytes of highmem\n", physmem_size,
-		       highmem);
-		exit(1);
-	}
-
 	virtmem_size = physmem_size;
 	stack = (unsigned long) argv;
 	stack &= ~(1024 * 1024 - 1);
@@ -364,39 +389,70 @@ int __init linux_main(int argc, char **argv)
 	end_vm = start_vm + virtmem_size;
 
 	if (virtmem_size < physmem_size)
-		printf("Kernel virtual memory size shrunk to %lu bytes\n",
-		       virtmem_size);
+		os_info("Kernel virtual memory size shrunk to %lu bytes\n",
+			virtmem_size);
 
-	atomic_notifier_chain_register(&panic_notifier_list,
-				       &panic_exit_notifier);
-
-	uml_postsetup();
+	arch_task_struct_size = sizeof(struct task_struct) + host_fp_size;
 
-	stack_protections((unsigned long) &init_thread_info);
 	os_flush_stdout();
 
 	return start_uml();
 }
 
+int __init __weak read_initrd(void)
+{
+	return 0;
+}
+
 void __init setup_arch(char **cmdline_p)
 {
+	u8 rng_seed[32];
+
+	stack_protections((unsigned long) init_task.stack);
+	setup_physmem(uml_physmem, uml_reserved, physmem_size);
+	uml_dtb_init();
+	read_initrd();
+
 	paging_init();
-	strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
+	strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 	setup_hostinfo(host_info, sizeof host_info);
+	prefill_possible_map();
+
+	if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) {
+		add_bootloader_randomness(rng_seed, sizeof(rng_seed));
+		memzero_explicit(rng_seed, sizeof(rng_seed));
+	}
 }
 
-void __init check_bugs(void)
+void __init arch_cpu_finalize_init(void)
 {
 	arch_check_bugs();
 	os_check_bugs();
 }
 
+void apply_seal_endbr(s32 *start, s32 *end)
+{
+}
+
+void apply_retpolines(s32 *start, s32 *end)
+{
+}
+
+void apply_returns(s32 *start, s32 *end)
+{
+}
+
+void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+		   s32 *start_cfi, s32 *end_cfi)
+{
+}
+
 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
 }
 
-#ifdef CONFIG_SMP
+#if IS_ENABLED(CONFIG_SMP)
 void alternatives_smp_module_add(struct module *mod, char *name,
 				 void *locks, void *locks_end,
 				 void *text,  void *text_end)
@@ -407,3 +463,90 @@ void alternatives_smp_module_del(struct module *mod)
 {
 }
 #endif
+
+void *text_poke(void *addr, const void *opcode, size_t len)
+{
+	/*
+	 * In UML, the only reference to this function is in
+	 * apply_relocate_add(), which shouldn't ever actually call this
+	 * because UML doesn't have live patching.
+	 */
+	WARN_ON(1);
+
+	return memcpy(addr, opcode, len);
+}
+
+void *text_poke_copy(void *addr, const void *opcode, size_t len)
+{
+	return text_poke(addr, opcode, len);
+}
+
+void smp_text_poke_sync_each_cpu(void)
+{
+}
+
+void uml_pm_wake(void)
+{
+	pm_system_wakeup();
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int um_suspend_valid(suspend_state_t state)
+{
+	return state == PM_SUSPEND_MEM;
+}
+
+static int um_suspend_prepare(void)
+{
+	um_irqs_suspend();
+	return 0;
+}
+
+static int um_suspend_enter(suspend_state_t state)
+{
+	if (WARN_ON(state != PM_SUSPEND_MEM))
+		return -EINVAL;
+
+	/*
+	 * This is identical to the idle sleep, but we've just
+	 * (during suspend) turned off all interrupt sources
+	 * except for the ones we want, so now we can only wake
+	 * up on something we actually want to wake up on. All
+	 * timing has also been suspended.
+	 */
+	um_idle_sleep();
+	return 0;
+}
+
+static void um_suspend_finish(void)
+{
+	um_irqs_resume();
+}
+
+const struct platform_suspend_ops um_suspend_ops = {
+	.valid = um_suspend_valid,
+	.prepare = um_suspend_prepare,
+	.enter = um_suspend_enter,
+	.finish = um_suspend_finish,
+};
+
+static int init_pm_wake_signal(void)
+{
+	/*
+	 * In external time-travel mode we can't use signals to wake up
+	 * since that would mess with the scheduling. We'll have to do
+	 * some additional work to support wakeup on virtio devices or
+	 * similar, perhaps implementing a fake RTC controller that can
+	 * trigger wakeup (and request the appropriate scheduling from
+	 * the external scheduler when going to suspend.)
+	 */
+	if (time_travel_mode != TT_MODE_EXTERNAL)
+		register_pm_wake_signal();
+
+	suspend_set_ops(&um_suspend_ops);
+
+	return 0;
+}
+
+late_initcall(init_pm_wake_signal);
+#endif
diff --git a/arch/um/kernel/um_arch.h b/arch/um/kernel/um_arch.h
new file mode 100644
index 000000000000..46e731ab9dfc
--- /dev/null
+++ b/arch/um/kernel/um_arch.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __UML_ARCH_H__
+#define __UML_ARCH_H__
+
+extern void * __init uml_load_file(const char *filename, unsigned long long *size);
+
+#ifdef CONFIG_OF
+extern void __init uml_dtb_init(void);
+#else
+static inline void uml_dtb_init(void) { }
+#endif
+
+extern int __init read_initrd(void);
+
+#endif
diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
index f6cc3bd61781..72bc60ade347 100644
--- a/arch/um/kernel/umid.c
+++ b/arch/um/kernel/umid.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <asm/errno.h>
@@ -9,21 +9,21 @@
 #include <os.h>
 
 /* Changed by set_umid_arg */
-static int umid_inited = 0;
+static int umid_inited;
 
 static int __init set_umid_arg(char *name, int *add)
 {
 	int err;
 
 	if (umid_inited) {
-		printf("umid already set\n");
+		os_warn("umid already set\n");
 		return 0;
 	}
 
 	*add = 0;
 	err = set_umid(name);
 	if (err == -EEXIST)
-		printf("umid '%s' already in use\n", name);
+		os_warn("umid '%s' already in use\n", name);
 	else if (!err)
 		umid_inited = 1;
 
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 6899195602b7..a409d4b66114 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -1,4 +1,5 @@
-#include <asm-generic/vmlinux.lds.h>
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/vmlinux.lds.h>
 #include <asm/page.h>
 
 OUTPUT_FORMAT(ELF_FORMAT)
@@ -6,6 +7,12 @@ OUTPUT_ARCH(ELF_ARCH)
 ENTRY(_start)
 jiffies = jiffies_64;
 
+VERSION {
+  {
+    local: *;
+  };
+}
+
 SECTIONS
 {
   /* This must contain the right address - not quite the default ELF one.*/
@@ -18,10 +25,10 @@ SECTIONS
   __binary_start = START;
 
   . = START + SIZEOF_HEADERS;
+  . = ALIGN(PAGE_SIZE);
 
   _text = .;
   INIT_TEXT_SECTION(0)
-  . = ALIGN(PAGE_SIZE);
 
   .text      :
   {
@@ -29,6 +36,8 @@ SECTIONS
     TEXT_TEXT
     SCHED_TEXT
     LOCK_TEXT
+    IRQENTRY_TEXT
+    SOFTIRQENTRY_TEXT
     *(.fixup)
     /* .gnu.warning sections are handled specially by elf32.em.  */
     *(.gnu.warning)
@@ -68,8 +77,6 @@ SECTIONS
   .data    :
   {
     INIT_TASK_DATA(KERNEL_STACK_SIZE)
-    . = ALIGN(KERNEL_STACK_SIZE);
-    *(.data..init_irqstack)
     DATA_DATA
     *(.gnu.linkonce.d*)
     CONSTRUCTORS
@@ -85,6 +92,7 @@ SECTIONS
   }
 
   .got           : { *(.got.plt) *(.got) }
+  .eh_frame       : { KEEP (*(.eh_frame)) }
   .dynamic       : { *(.dynamic) }
   .tdata	  : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
   .tbss		  : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
@@ -104,8 +112,8 @@ SECTIONS
   PROVIDE (end = .);
 
   STABS_DEBUG
-
   DWARF_DEBUG
+  ELF_DETAILS
 
   DISCARDS
 }
diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S
index 16e49bfa2b42..53d719c04ba9 100644
--- a/arch/um/kernel/vmlinux.lds.S
+++ b/arch/um/kernel/vmlinux.lds.S
@@ -1,4 +1,4 @@
-
+#define RUNTIME_DISCARD_EXIT
 KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER);
 
 #ifdef CONFIG_LD_SCRIPT_STATIC
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 08ff5094fcdd..f8d672d570d9 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -1,20 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
 # 
 # Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
-# Licensed under the GPL
 #
 
-obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \
+# Don't instrument UML-specific code
+KCOV_INSTRUMENT                := n
+
+obj-y = elf_aux.o execvp.o file.o helper.o irq.o main.o mem.o process.o \
 	registers.o sigio.o signal.o start_up.o time.o tty.o \
-	umid.o user_syms.o util.o drivers/ skas/
+	umid.o user_syms.o util.o skas/
 
-obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
+CFLAGS_signal.o += -Wframe-larger-than=4096
 
-USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \
-	main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
-	tty.o umid.o util.o
+CFLAGS_main.o += -Wno-frame-larger-than
 
-HAVE_AIO_ABI := $(shell [ -r /usr/include/linux/aio_abi.h ] && \
-	echo -DHAVE_AIO_ABI )
-CFLAGS_aio.o += $(HAVE_AIO_ABI)
+obj-$(CONFIG_SMP) += smp.o
+
+USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \
+	main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
+	tty.o umid.o util.o smp.o
 
-include arch/um/scripts/Makefile.rules
+include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
deleted file mode 100644
index 3a6bc2af0961..000000000000
--- a/arch/um/os-Linux/aio.c
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <unistd.h>
-#include <sched.h>
-#include <signal.h>
-#include <errno.h>
-#include <sys/time.h>
-#include <asm/unistd.h>
-#include <aio.h>
-#include <init.h>
-#include <kern_util.h>
-#include <os.h>
-
-struct aio_thread_req {
-	enum aio_type type;
-	int io_fd;
-	unsigned long long offset;
-	char *buf;
-	int len;
-	struct aio_context *aio;
-};
-
-#if defined(HAVE_AIO_ABI)
-#include <linux/aio_abi.h>
-
-/*
- * If we have the headers, we are going to build with AIO enabled.
- * If we don't have aio in libc, we define the necessary stubs here.
- */
-
-#if !defined(HAVE_AIO_LIBC)
-
-static long io_setup(int n, aio_context_t *ctxp)
-{
-	return syscall(__NR_io_setup, n, ctxp);
-}
-
-static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
-{
-	return syscall(__NR_io_submit, ctx, nr, iocbpp);
-}
-
-static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
-			 struct io_event *events, struct timespec *timeout)
-{
-	return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
-}
-
-#endif
-
-/*
- * The AIO_MMAP cases force the mmapped page into memory here
- * rather than in whatever place first touches the data.  I used
- * to do this by touching the page, but that's delicate because
- * gcc is prone to optimizing that away.  So, what's done here
- * is we read from the descriptor from which the page was
- * mapped.  The caller is required to pass an offset which is
- * inside the page that was mapped.  Thus, when the read
- * returns, we know that the page is in the page cache, and
- * that it now backs the mmapped area.
- */
-
-static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
-		  int len, unsigned long long offset, struct aio_context *aio)
-{
-	struct iocb *iocbp = & ((struct iocb) {
-				    .aio_data       = (unsigned long) aio,
-				    .aio_fildes     = fd,
-				    .aio_buf        = (unsigned long) buf,
-				    .aio_nbytes     = len,
-				    .aio_offset     = offset
-			     });
-	char c;
-
-	switch (type) {
-	case AIO_READ:
-		iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
-		break;
-	case AIO_WRITE:
-		iocbp->aio_lio_opcode = IOCB_CMD_PWRITE;
-		break;
-	case AIO_MMAP:
-		iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
-		iocbp->aio_buf = (unsigned long) &c;
-		iocbp->aio_nbytes = sizeof(c);
-		break;
-	default:
-		printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type);
-		return -EINVAL;
-	}
-
-	return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno;
-}
-
-/* Initialized in an initcall and unchanged thereafter */
-static aio_context_t ctx = 0;
-
-static int aio_thread(void *arg)
-{
-	struct aio_thread_reply reply;
-	struct io_event event;
-	int err, n, reply_fd;
-
-	signal(SIGWINCH, SIG_IGN);
-
-	while (1) {
-		n = io_getevents(ctx, 1, 1, &event, NULL);
-		if (n < 0) {
-			if (errno == EINTR)
-				continue;
-			printk(UM_KERN_ERR "aio_thread - io_getevents failed, "
-			       "errno = %d\n", errno);
-		}
-		else {
-			reply = ((struct aio_thread_reply)
-				{ .data = (void *) (long) event.data,
-						.err	= event.res });
-			reply_fd = ((struct aio_context *) reply.data)->reply_fd;
-			err = write(reply_fd, &reply, sizeof(reply));
-			if (err != sizeof(reply))
-				printk(UM_KERN_ERR "aio_thread - write failed, "
-				       "fd = %d, err = %d\n", reply_fd, errno);
-		}
-	}
-	return 0;
-}
-
-#endif
-
-static int do_not_aio(struct aio_thread_req *req)
-{
-	char c;
-	unsigned long long actual;
-	int n;
-
-	actual = lseek64(req->io_fd, req->offset, SEEK_SET);
-	if (actual != req->offset)
-		return -errno;
-
-	switch (req->type) {
-	case AIO_READ:
-		n = read(req->io_fd, req->buf, req->len);
-		break;
-	case AIO_WRITE:
-		n = write(req->io_fd, req->buf, req->len);
-		break;
-	case AIO_MMAP:
-		n = read(req->io_fd, &c, sizeof(c));
-		break;
-	default:
-		printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n",
-		       req->type);
-		return -EINVAL;
-	}
-
-	if (n < 0)
-		return -errno;
-	return 0;
-}
-
-/* These are initialized in initcalls and not changed */
-static int aio_req_fd_r = -1;
-static int aio_req_fd_w = -1;
-static int aio_pid = -1;
-static unsigned long aio_stack;
-
-static int not_aio_thread(void *arg)
-{
-	struct aio_thread_req req;
-	struct aio_thread_reply reply;
-	int err;
-
-	signal(SIGWINCH, SIG_IGN);
-	while (1) {
-		err = read(aio_req_fd_r, &req, sizeof(req));
-		if (err != sizeof(req)) {
-			if (err < 0)
-				printk(UM_KERN_ERR "not_aio_thread - "
-				       "read failed, fd = %d, err = %d\n",
-				       aio_req_fd_r,
-				       errno);
-			else {
-				printk(UM_KERN_ERR "not_aio_thread - short "
-				       "read, fd = %d, length = %d\n",
-				       aio_req_fd_r, err);
-			}
-			continue;
-		}
-		err = do_not_aio(&req);
-		reply = ((struct aio_thread_reply) { .data 	= req.aio,
-						     .err	= err });
-		err = write(req.aio->reply_fd, &reply, sizeof(reply));
-		if (err != sizeof(reply))
-			printk(UM_KERN_ERR "not_aio_thread - write failed, "
-			       "fd = %d, err = %d\n", req.aio->reply_fd, errno);
-	}
-
-	return 0;
-}
-
-static int init_aio_24(void)
-{
-	int fds[2], err;
-
-	err = os_pipe(fds, 1, 1);
-	if (err)
-		goto out;
-
-	aio_req_fd_w = fds[0];
-	aio_req_fd_r = fds[1];
-
-	err = os_set_fd_block(aio_req_fd_w, 0);
-	if (err)
-		goto out_close_pipe;
-
-	err = run_helper_thread(not_aio_thread, NULL,
-				CLONE_FILES | CLONE_VM, &aio_stack);
-	if (err < 0)
-		goto out_close_pipe;
-
-	aio_pid = err;
-	goto out;
-
-out_close_pipe:
-	close(fds[0]);
-	close(fds[1]);
-	aio_req_fd_w = -1;
-	aio_req_fd_r = -1;
-out:
-#ifndef HAVE_AIO_ABI
-	printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during "
-	       "build\n");
-#endif
-	printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to "
-	       "I/O thread\n");
-	return 0;
-}
-
-#ifdef HAVE_AIO_ABI
-#define DEFAULT_24_AIO 0
-static int init_aio_26(void)
-{
-	int err;
-
-	if (io_setup(256, &ctx)) {
-		err = -errno;
-		printk(UM_KERN_ERR "aio_thread failed to initialize context, "
-		       "err = %d\n", errno);
-		return err;
-	}
-
-	err = run_helper_thread(aio_thread, NULL,
-				CLONE_FILES | CLONE_VM, &aio_stack);
-	if (err < 0)
-		return err;
-
-	aio_pid = err;
-
-	printk(UM_KERN_INFO "Using 2.6 host AIO\n");
-	return 0;
-}
-
-static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
-			 unsigned long long offset, struct aio_context *aio)
-{
-	struct aio_thread_reply reply;
-	int err;
-
-	err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
-	if (err) {
-		reply = ((struct aio_thread_reply) { .data = aio,
-					 .err  = err });
-		err = write(aio->reply_fd, &reply, sizeof(reply));
-		if (err != sizeof(reply)) {
-			err = -errno;
-			printk(UM_KERN_ERR "submit_aio_26 - write failed, "
-			       "fd = %d, err = %d\n", aio->reply_fd, -err);
-		}
-		else err = 0;
-	}
-
-	return err;
-}
-
-#else
-#define DEFAULT_24_AIO 1
-static int init_aio_26(void)
-{
-	return -ENOSYS;
-}
-
-static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
-			 unsigned long long offset, struct aio_context *aio)
-{
-	return -ENOSYS;
-}
-#endif
-
-/* Initialized in an initcall and unchanged thereafter */
-static int aio_24 = DEFAULT_24_AIO;
-
-static int __init set_aio_24(char *name, int *add)
-{
-	aio_24 = 1;
-	return 0;
-}
-
-__uml_setup("aio=2.4", set_aio_24,
-"aio=2.4\n"
-"    This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
-"    available.  2.4 AIO is a single thread that handles one request at a\n"
-"    time, synchronously.  2.6 AIO is a thread which uses the 2.6 AIO \n"
-"    interface to handle an arbitrary number of pending requests.  2.6 AIO \n"
-"    is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
-"    /usr/include/linux/aio_abi.h not available.  Many distributions don't\n"
-"    include aio_abi.h, so you will need to copy it from a kernel tree to\n"
-"    your /usr/include/linux in order to build an AIO-capable UML\n\n"
-);
-
-static int init_aio(void)
-{
-	int err;
-
-	if (!aio_24) {
-		err = init_aio_26();
-		if (err && (errno == ENOSYS)) {
-			printk(UM_KERN_INFO "2.6 AIO not supported on the "
-			       "host - reverting to 2.4 AIO\n");
-			aio_24 = 1;
-		}
-		else return err;
-	}
-
-	if (aio_24)
-		return init_aio_24();
-
-	return 0;
-}
-
-/*
- * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
- * needs to be called when the kernel is running because it calls run_helper,
- * which needs get_free_page.  exit_aio is a __uml_exitcall because the generic
- * kernel does not run __exitcalls on shutdown, and can't because many of them
- * break when called outside of module unloading.
- */
-__initcall(init_aio);
-
-static void exit_aio(void)
-{
-	if (aio_pid != -1) {
-		os_kill_process(aio_pid, 1);
-		free_stack(aio_stack, 0);
-	}
-}
-
-__uml_exitcall(exit_aio);
-
-static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
-			 unsigned long long offset, struct aio_context *aio)
-{
-	struct aio_thread_req req = { .type 		= type,
-				      .io_fd		= io_fd,
-				      .offset		= offset,
-				      .buf		= buf,
-				      .len		= len,
-				      .aio		= aio,
-	};
-	int err;
-
-	err = write(aio_req_fd_w, &req, sizeof(req));
-	if (err == sizeof(req))
-		err = 0;
-	else err = -errno;
-
-	return err;
-}
-
-int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
-	       unsigned long long offset, int reply_fd,
-	       struct aio_context *aio)
-{
-	aio->reply_fd = reply_fd;
-	if (aio_24)
-		return submit_aio_24(type, io_fd, buf, len, offset, aio);
-	else
-		return submit_aio_26(type, io_fd, buf, len, offset, aio);
-}
diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile
deleted file mode 100644
index 6c546dc9222b..000000000000
--- a/arch/um/os-Linux/drivers/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# 
-# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
-# Licensed under the GPL
-#
-
-ethertap-objs := ethertap_kern.o ethertap_user.o
-tuntap-objs := tuntap_kern.o tuntap_user.o
-
-obj-y = 
-obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o
-obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h
deleted file mode 100644
index 54183a679fdd..000000000000
--- a/arch/um/os-Linux/drivers/etap.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* 
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __DRIVERS_ETAP_H
-#define __DRIVERS_ETAP_H
-
-#include <net_user.h>
-
-struct ethertap_data {
-	char *dev_name;
-	char *gate_addr;
-	int data_fd;
-	int control_fd;
-	void *dev;
-};
-
-extern const struct net_user_info ethertap_user_info;
-
-#endif
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
deleted file mode 100644
index f424600a583f..000000000000
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include "etap.h"
-#include <net_kern.h>
-
-struct ethertap_init {
-	char *dev_name;
-	char *gate_addr;
-};
-
-static void etap_init(struct net_device *dev, void *data)
-{
-	struct uml_net_private *pri;
-	struct ethertap_data *epri;
-	struct ethertap_init *init = data;
-
-	pri = netdev_priv(dev);
-	epri = (struct ethertap_data *) pri->user;
-	epri->dev_name = init->dev_name;
-	epri->gate_addr = init->gate_addr;
-	epri->data_fd = -1;
-	epri->control_fd = -1;
-	epri->dev = dev;
-
-	printk(KERN_INFO "ethertap backend - %s", epri->dev_name);
-	if (epri->gate_addr != NULL)
-		printk(KERN_CONT ", IP = %s", epri->gate_addr);
-	printk(KERN_CONT "\n");
-}
-
-static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	int len;
-
-	len = net_recvfrom(fd, skb_mac_header(skb),
-			   skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP);
-	if (len <= 0)
-		return(len);
-
-	skb_pull(skb, 2);
-	len -= 2;
-	return len;
-}
-
-static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	skb_push(skb, 2);
-	return net_send(fd, skb->data, skb->len);
-}
-
-const struct net_kern_info ethertap_kern_info = {
-	.init			= etap_init,
-	.protocol		= eth_protocol,
-	.read			= etap_read,
-	.write 			= etap_write,
-};
-
-int ethertap_setup(char *str, char **mac_out, void *data)
-{
-	struct ethertap_init *init = data;
-
-	*init = ((struct ethertap_init)
-		{ .dev_name 	= NULL,
-		  .gate_addr 	= NULL });
-	if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out,
-			    &init->gate_addr))
-		return 0;
-	if (init->dev_name == NULL) {
-		printk(KERN_ERR "ethertap_setup : Missing tap device name\n");
-		return 0;
-	}
-
-	return 1;
-}
-
-static struct transport ethertap_transport = {
-	.list 		= LIST_HEAD_INIT(ethertap_transport.list),
-	.name 		= "ethertap",
-	.setup  	= ethertap_setup,
-	.user 		= &ethertap_user_info,
-	.kern 		= &ethertap_kern_info,
-	.private_size 	= sizeof(struct ethertap_data),
-	.setup_size 	= sizeof(struct ethertap_init),
-};
-
-static int register_ethertap(void)
-{
-	register_transport(&ethertap_transport);
-	return 0;
-}
-
-late_initcall(register_ethertap);
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
deleted file mode 100644
index b39b6696ac58..000000000000
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- * Licensed under the GPL.
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include "etap.h"
-#include <os.h>
-#include <net_user.h>
-#include <um_malloc.h>
-
-#define MAX_PACKET ETH_MAX_PACKET
-
-static int etap_user_init(void *data, void *dev)
-{
-	struct ethertap_data *pri = data;
-
-	pri->dev = dev;
-	return 0;
-}
-
-struct addr_change {
-	enum { ADD_ADDR, DEL_ADDR } what;
-	unsigned char addr[4];
-	unsigned char netmask[4];
-};
-
-static void etap_change(int op, unsigned char *addr, unsigned char *netmask,
-			int fd)
-{
-	struct addr_change change;
-	char *output;
-	int n;
-
-	change.what = op;
-	memcpy(change.addr, addr, sizeof(change.addr));
-	memcpy(change.netmask, netmask, sizeof(change.netmask));
-	CATCH_EINTR(n = write(fd, &change, sizeof(change)));
-	if (n != sizeof(change)) {
-		printk(UM_KERN_ERR "etap_change - request failed, err = %d\n",
-		       errno);
-		return;
-	}
-
-	output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL);
-	if (output == NULL)
-		printk(UM_KERN_ERR "etap_change : Failed to allocate output "
-		       "buffer\n");
-	read_output(fd, output, UM_KERN_PAGE_SIZE);
-	if (output != NULL) {
-		printk("%s", output);
-		kfree(output);
-	}
-}
-
-static void etap_open_addr(unsigned char *addr, unsigned char *netmask,
-			   void *arg)
-{
-	etap_change(ADD_ADDR, addr, netmask, *((int *) arg));
-}
-
-static void etap_close_addr(unsigned char *addr, unsigned char *netmask,
-			    void *arg)
-{
-	etap_change(DEL_ADDR, addr, netmask, *((int *) arg));
-}
-
-struct etap_pre_exec_data {
-	int control_remote;
-	int control_me;
-	int data_me;
-};
-
-static void etap_pre_exec(void *arg)
-{
-	struct etap_pre_exec_data *data = arg;
-
-	dup2(data->control_remote, 1);
-	close(data->data_me);
-	close(data->control_me);
-}
-
-static int etap_tramp(char *dev, char *gate, int control_me,
-		      int control_remote, int data_me, int data_remote)
-{
-	struct etap_pre_exec_data pe_data;
-	int pid, err, n;
-	char version_buf[sizeof("nnnnn\0")];
-	char data_fd_buf[sizeof("nnnnnn\0")];
-	char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
-	char *setup_args[] = { "uml_net", version_buf, "ethertap", dev,
-			       data_fd_buf, gate_buf, NULL };
-	char *nosetup_args[] = { "uml_net", version_buf, "ethertap",
-				 dev, data_fd_buf, NULL };
-	char **args, c;
-
-	sprintf(data_fd_buf, "%d", data_remote);
-	sprintf(version_buf, "%d", UML_NET_VERSION);
-	if (gate != NULL) {
-		strcpy(gate_buf, gate);
-		args = setup_args;
-	}
-	else args = nosetup_args;
-
-	err = 0;
-	pe_data.control_remote = control_remote;
-	pe_data.control_me = control_me;
-	pe_data.data_me = data_me;
-	pid = run_helper(etap_pre_exec, &pe_data, args);
-
-	if (pid < 0)
-		err = pid;
-	close(data_remote);
-	close(control_remote);
-	CATCH_EINTR(n = read(control_me, &c, sizeof(c)));
-	if (n != sizeof(c)) {
-		err = -errno;
-		printk(UM_KERN_ERR "etap_tramp : read of status failed, "
-		       "err = %d\n", -err);
-		return err;
-	}
-	if (c != 1) {
-		printk(UM_KERN_ERR "etap_tramp : uml_net failed\n");
-		err = helper_wait(pid);
-	}
-	return err;
-}
-
-static int etap_open(void *data)
-{
-	struct ethertap_data *pri = data;
-	char *output;
-	int data_fds[2], control_fds[2], err, output_len;
-
-	err = tap_open_common(pri->dev, pri->gate_addr);
-	if (err)
-		return err;
-
-	err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds);
-	if (err) {
-		err = -errno;
-		printk(UM_KERN_ERR "etap_open - data socketpair failed - "
-		       "err = %d\n", errno);
-		return err;
-	}
-
-	err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds);
-	if (err) {
-		err = -errno;
-		printk(UM_KERN_ERR "etap_open - control socketpair failed - "
-		       "err = %d\n", errno);
-		goto out_close_data;
-	}
-
-	err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0],
-			 control_fds[1], data_fds[0], data_fds[1]);
-	output_len = UM_KERN_PAGE_SIZE;
-	output = uml_kmalloc(output_len, UM_GFP_KERNEL);
-	read_output(control_fds[0], output, output_len);
-
-	if (output == NULL)
-		printk(UM_KERN_ERR "etap_open : failed to allocate output "
-		       "buffer\n");
-	else {
-		printk("%s", output);
-		kfree(output);
-	}
-
-	if (err < 0) {
-		printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err);
-		goto out_close_control;
-	}
-
-	pri->data_fd = data_fds[0];
-	pri->control_fd = control_fds[0];
-	iter_addresses(pri->dev, etap_open_addr, &pri->control_fd);
-	return data_fds[0];
-
-out_close_control:
-	close(control_fds[0]);
-	close(control_fds[1]);
-out_close_data:
-	close(data_fds[0]);
-	close(data_fds[1]);
-	return err;
-}
-
-static void etap_close(int fd, void *data)
-{
-	struct ethertap_data *pri = data;
-
-	iter_addresses(pri->dev, etap_close_addr, &pri->control_fd);
-	close(fd);
-
-	if (shutdown(pri->data_fd, SHUT_RDWR) < 0)
-		printk(UM_KERN_ERR "etap_close - shutdown data socket failed, "
-		       "errno = %d\n", errno);
-
-	if (shutdown(pri->control_fd, SHUT_RDWR) < 0)
-		printk(UM_KERN_ERR "etap_close - shutdown control socket "
-		       "failed, errno = %d\n", errno);
-
-	close(pri->data_fd);
-	pri->data_fd = -1;
-	close(pri->control_fd);
-	pri->control_fd = -1;
-}
-
-static void etap_add_addr(unsigned char *addr, unsigned char *netmask,
-			  void *data)
-{
-	struct ethertap_data *pri = data;
-
-	tap_check_ips(pri->gate_addr, addr);
-	if (pri->control_fd == -1)
-		return;
-	etap_open_addr(addr, netmask, &pri->control_fd);
-}
-
-static void etap_del_addr(unsigned char *addr, unsigned char *netmask,
-			  void *data)
-{
-	struct ethertap_data *pri = data;
-
-	if (pri->control_fd == -1)
-		return;
-
-	etap_close_addr(addr, netmask, &pri->control_fd);
-}
-
-const struct net_user_info ethertap_user_info = {
-	.init		= etap_user_init,
-	.open		= etap_open,
-	.close	 	= etap_close,
-	.remove	 	= NULL,
-	.add_address	= etap_add_addr,
-	.delete_address = etap_del_addr,
-	.mtu		= ETH_MAX_PACKET,
-	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_ETHERTAP,
-};
diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h
deleted file mode 100644
index 7367354ac8df..000000000000
--- a/arch/um/os-Linux/drivers/tuntap.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* 
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_TUNTAP_H
-#define __UM_TUNTAP_H
-
-#include <net_user.h>
-
-struct tuntap_data {
-	char *dev_name;
-	int fixed_config;
-	char *gate_addr;
-	int fd;
-	void *dev;
-};
-
-extern const struct net_user_info tuntap_user_info;
-
-#endif
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
deleted file mode 100644
index d9d56e5810fe..000000000000
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <linux/netdevice.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <asm/errno.h>
-#include <net_kern.h>
-#include "tuntap.h"
-
-struct tuntap_init {
-	char *dev_name;
-	char *gate_addr;
-};
-
-static void tuntap_init(struct net_device *dev, void *data)
-{
-	struct uml_net_private *pri;
-	struct tuntap_data *tpri;
-	struct tuntap_init *init = data;
-
-	pri = netdev_priv(dev);
-	tpri = (struct tuntap_data *) pri->user;
-	tpri->dev_name = init->dev_name;
-	tpri->fixed_config = (init->dev_name != NULL);
-	tpri->gate_addr = init->gate_addr;
-	tpri->fd = -1;
-	tpri->dev = dev;
-
-	printk(KERN_INFO "TUN/TAP backend - ");
-	if (tpri->gate_addr != NULL)
-		printk(KERN_CONT "IP = %s", tpri->gate_addr);
-	printk(KERN_CONT "\n");
-}
-
-static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return net_read(fd, skb_mac_header(skb),
-			skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
-	return net_write(fd, skb->data, skb->len);
-}
-
-const struct net_kern_info tuntap_kern_info = {
-	.init			= tuntap_init,
-	.protocol		= eth_protocol,
-	.read			= tuntap_read,
-	.write 			= tuntap_write,
-};
-
-int tuntap_setup(char *str, char **mac_out, void *data)
-{
-	struct tuntap_init *init = data;
-
-	*init = ((struct tuntap_init)
-		{ .dev_name 	= NULL,
-		  .gate_addr 	= NULL });
-	if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out,
-			    &init->gate_addr))
-		return 0;
-
-	return 1;
-}
-
-static struct transport tuntap_transport = {
-	.list 		= LIST_HEAD_INIT(tuntap_transport.list),
-	.name 		= "tuntap",
-	.setup  	= tuntap_setup,
-	.user 		= &tuntap_user_info,
-	.kern 		= &tuntap_kern_info,
-	.private_size 	= sizeof(struct tuntap_data),
-	.setup_size 	= sizeof(struct tuntap_init),
-};
-
-static int register_tuntap(void)
-{
-	register_transport(&tuntap_transport);
-	return 0;
-}
-
-late_initcall(register_tuntap);
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
deleted file mode 100644
index 14126d9176aa..000000000000
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/* 
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <linux/if_tun.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-#include <kern_util.h>
-#include <os.h>
-#include "tuntap.h"
-
-static int tuntap_user_init(void *data, void *dev)
-{
-	struct tuntap_data *pri = data;
-
-	pri->dev = dev;
-	return 0;
-}
-
-static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask,
-			    void *data)
-{
-	struct tuntap_data *pri = data;
-
-	tap_check_ips(pri->gate_addr, addr);
-	if ((pri->fd == -1) || pri->fixed_config)
-		return;
-	open_addr(addr, netmask, pri->dev_name);
-}
-
-static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask,
-			    void *data)
-{
-	struct tuntap_data *pri = data;
-
-	if ((pri->fd == -1) || pri->fixed_config)
-		return;
-	close_addr(addr, netmask, pri->dev_name);
-}
-
-struct tuntap_pre_exec_data {
-	int stdout;
-	int close_me;
-};
-
-static void tuntap_pre_exec(void *arg)
-{
-	struct tuntap_pre_exec_data *data = arg;
-
-	dup2(data->stdout, 1);
-	close(data->close_me);
-}
-
-static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote,
-			     char *buffer, int buffer_len, int *used_out)
-{
-	struct tuntap_pre_exec_data data;
-	char version_buf[sizeof("nnnnn\0")];
-	char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate,
-			 NULL };
-	char buf[CMSG_SPACE(sizeof(*fd_out))];
-	struct msghdr msg;
-	struct cmsghdr *cmsg;
-	struct iovec iov;
-	int pid, n, err;
-
-	sprintf(version_buf, "%d", UML_NET_VERSION);
-
-	data.stdout = remote;
-	data.close_me = me;
-
-	pid = run_helper(tuntap_pre_exec, &data, argv);
-
-	if (pid < 0)
-		return -pid;
-
-	close(remote);
-
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	if (buffer != NULL) {
-		iov = ((struct iovec) { buffer, buffer_len });
-		msg.msg_iov = &iov;
-		msg.msg_iovlen = 1;
-	}
-	else {
-		msg.msg_iov = NULL;
-		msg.msg_iovlen = 0;
-	}
-	msg.msg_control = buf;
-	msg.msg_controllen = sizeof(buf);
-	msg.msg_flags = 0;
-	n = recvmsg(me, &msg, 0);
-	*used_out = n;
-	if (n < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - "
-		       "errno = %d\n", errno);
-		return err;
-	}
-	helper_wait(pid);
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	if (cmsg == NULL) {
-		printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
-		       "message\n");
-		return -EINVAL;
-	}
-	if ((cmsg->cmsg_level != SOL_SOCKET) ||
-	   (cmsg->cmsg_type != SCM_RIGHTS)) {
-		printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
-		       "descriptor\n");
-		return -EINVAL;
-	}
-	*fd_out = ((int *) CMSG_DATA(cmsg))[0];
-	os_set_exec_close(*fd_out);
-	return 0;
-}
-
-static int tuntap_open(void *data)
-{
-	struct ifreq ifr;
-	struct tuntap_data *pri = data;
-	char *output, *buffer;
-	int err, fds[2], len, used;
-
-	err = tap_open_common(pri->dev, pri->gate_addr);
-	if (err < 0)
-		return err;
-
-	if (pri->fixed_config) {
-		pri->fd = os_open_file("/dev/net/tun",
-				       of_cloexec(of_rdwr(OPENFLAGS())), 0);
-		if (pri->fd < 0) {
-			printk(UM_KERN_ERR "Failed to open /dev/net/tun, "
-			       "err = %d\n", -pri->fd);
-			return pri->fd;
-		}
-		memset(&ifr, 0, sizeof(ifr));
-		ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
-		strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name));
-		if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) {
-			err = -errno;
-			printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n",
-			       errno);
-			close(pri->fd);
-			return err;
-		}
-	}
-	else {
-		err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds);
-		if (err) {
-			err = -errno;
-			printk(UM_KERN_ERR "tuntap_open : socketpair failed - "
-			       "errno = %d\n", errno);
-			return err;
-		}
-
-		buffer = get_output_buffer(&len);
-		if (buffer != NULL)
-			len--;
-		used = 0;
-
-		err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0],
-					fds[1], buffer, len, &used);
-
-		output = buffer;
-		if (err < 0) {
-			printk("%s", output);
-			free_output_buffer(buffer);
-			printk(UM_KERN_ERR "tuntap_open_tramp failed - "
-			       "err = %d\n", -err);
-			return err;
-		}
-
-		pri->dev_name = uml_strdup(buffer);
-		output += IFNAMSIZ;
-		printk("%s", output);
-		free_output_buffer(buffer);
-
-		close(fds[0]);
-		iter_addresses(pri->dev, open_addr, pri->dev_name);
-	}
-
-	return pri->fd;
-}
-
-static void tuntap_close(int fd, void *data)
-{
-	struct tuntap_data *pri = data;
-
-	if (!pri->fixed_config)
-		iter_addresses(pri->dev, close_addr, pri->dev_name);
-	close(fd);
-	pri->fd = -1;
-}
-
-const struct net_user_info tuntap_user_info = {
-	.init		= tuntap_user_init,
-	.open		= tuntap_open,
-	.close	 	= tuntap_close,
-	.remove	 	= NULL,
-	.add_address	= tuntap_add_addr,
-	.delete_address = tuntap_del_addr,
-	.mtu		= ETH_MAX_PACKET,
-	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c
index 1a365ddc4d02..72f416edf252 100644
--- a/arch/um/os-Linux/elf_aux.c
+++ b/arch/um/os-Linux/elf_aux.c
@@ -1,7 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  arch/um/kernel/elf_aux.c
  *
- *  Scan the Elf auxiliary vector provided by the host to extract
+ *  Scan the ELF auxiliary vector provided by the host to extract
  *  information about vsyscall-page, etc.
  *
  *  Copyright (C) 2004 Fujitsu Siemens Computers GmbH
@@ -12,37 +13,27 @@
 #include <init.h>
 #include <elf_user.h>
 #include <mem_user.h>
+#include "internal.h"
+#include <linux/swab.h>
 
+#if __BITS_PER_LONG == 64
+typedef Elf64_auxv_t elf_auxv_t;
+#else
 typedef Elf32_auxv_t elf_auxv_t;
+#endif
 
 /* These are initialized very early in boot and never changed */
 char * elf_aux_platform;
-extern long elf_aux_hwcap;
-unsigned long vsyscall_ehdr;
-unsigned long vsyscall_end;
-unsigned long __kernel_vsyscall;
+long elf_aux_hwcap;
 
 __init void scan_elf_aux( char **envp)
 {
-	long page_size = 0;
 	elf_auxv_t * auxv;
 
 	while ( *envp++ != NULL) ;
 
 	for ( auxv = (elf_auxv_t *)envp; auxv->a_type != AT_NULL; auxv++) {
 		switch ( auxv->a_type ) {
-			case AT_SYSINFO:
-				__kernel_vsyscall = auxv->a_un.a_val;
-				/* See if the page is under TASK_SIZE */
-				if (__kernel_vsyscall < (unsigned long) envp)
-					__kernel_vsyscall = 0;
-				break;
-			case AT_SYSINFO_EHDR:
-				vsyscall_ehdr = auxv->a_un.a_val;
-				/* See if the page is under TASK_SIZE */
-				if (vsyscall_ehdr < (unsigned long) envp)
-					vsyscall_ehdr = 0;
-				break;
 			case AT_HWCAP:
 				elf_aux_hwcap = auxv->a_un.a_val;
 				break;
@@ -54,20 +45,6 @@ __init void scan_elf_aux( char **envp)
 				elf_aux_platform =
 					(char *) (long) auxv->a_un.a_val;
 				break;
-			case AT_PAGESZ:
-				page_size = auxv->a_un.a_val;
-				break;
 		}
 	}
-	if ( ! __kernel_vsyscall || ! vsyscall_ehdr ||
-	     ! elf_aux_hwcap || ! elf_aux_platform ||
-	     ! page_size || (vsyscall_ehdr % page_size) ) {
-		__kernel_vsyscall = 0;
-		vsyscall_ehdr = 0;
-		elf_aux_hwcap = 0;
-		elf_aux_platform = "i586";
-	}
-	else {
-		vsyscall_end = vsyscall_ehdr + page_size;
-	}
 }
diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c
index 8fb25ca07c46..c09a5fd5e225 100644
--- a/arch/um/os-Linux/execvp.c
+++ b/arch/um/os-Linux/execvp.c
@@ -93,6 +93,7 @@ int execvp_noalloc(char *buf, const char *file, char *const argv[])
 					   up finding no executable we can use, we want to diagnose
 					   that we did find one but were denied access.  */
 					got_eacces = 1;
+					break;
 				case ENOENT:
 				case ESTALE:
 				case ENOTDIR:
@@ -136,7 +137,7 @@ int main(int argc, char**argv)
 	int ret;
 	argc--;
 	if (!argc) {
-		fprintf(stderr, "Not enough arguments\n");
+		os_warn("Not enough arguments\n");
 		return 1;
 	}
 	argv++;
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c17bd6f7d674..21f0e50fb1df 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -1,18 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
 #include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
+#include <linux/falloc.h>
 #include <sys/ioctl.h>
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/sysmacros.h>
 #include <sys/un.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/eventfd.h>
+#include <poll.h>
 #include <os.h>
 
 static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
@@ -98,21 +106,6 @@ int os_get_ifname(int fd, char* namebuf)
 	return 0;
 }
 
-int os_set_slip(int fd)
-{
-	int disc, sencap;
-
-	disc = N_SLIP;
-	if (ioctl(fd, TIOCSETD, &disc) < 0)
-		return -errno;
-
-	sencap = 0;
-	if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0)
-		return -errno;
-
-	return 0;
-}
-
 int os_mode_fd(int fd, int mode)
 {
 	int err;
@@ -233,6 +226,16 @@ out:
 	return err;
 }
 
+int os_dup_file(int fd)
+{
+	int new_fd = dup(fd);
+
+	if (new_fd < 0)
+		return -errno;
+
+	return new_fd;
+}
+
 void os_close_file(int fd)
 {
 	close(fd);
@@ -257,6 +260,15 @@ int os_read_file(int fd, void *buf, int len)
 	return n;
 }
 
+int os_pread_file(int fd, void *buf, int len, unsigned long long offset)
+{
+	int n = pread(fd, buf, len, offset);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
 int os_write_file(int fd, const void *buf, int len)
 {
 	int n = write(fd, (void *) buf, len);
@@ -266,6 +278,25 @@ int os_write_file(int fd, const void *buf, int len)
 	return n;
 }
 
+int os_sync_file(int fd)
+{
+	int n = fdatasync(fd);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
+int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset)
+{
+	int n = pwrite(fd, (void *) buf, len, offset);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
+
 int os_file_size(const char *file, unsigned long long *size_out)
 {
 	struct uml_stat buf;
@@ -304,7 +335,7 @@ int os_file_size(const char *file, unsigned long long *size_out)
 	return 0;
 }
 
-int os_file_modtime(const char *file, unsigned long *modtime)
+int os_file_modtime(const char *file, long long *modtime)
 {
 	struct uml_stat buf;
 	int err;
@@ -461,44 +492,51 @@ int os_shutdown_socket(int fd, int r, int w)
 	return 0;
 }
 
-int os_rcv_fd(int fd, int *helper_pid_out)
+/**
+ * os_rcv_fd_msg - receive message with (optional) FDs
+ * @fd: the FD to receive from
+ * @fds: the array for FDs to write to
+ * @n_fds: number of FDs to receive (@fds array size)
+ * @data: the message buffer
+ * @data_len: the size of the message to receive
+ *
+ * Receive a message with FDs.
+ *
+ * Returns: the size of the received message, or an error code
+ */
+ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
+		      void *data, size_t data_len)
 {
-	int new, n;
-	char buf[CMSG_SPACE(sizeof(new))];
-	struct msghdr msg;
+#define MAX_RCV_FDS	2
+	char buf[CMSG_SPACE(sizeof(*fds) * MAX_RCV_FDS)];
 	struct cmsghdr *cmsg;
-	struct iovec iov;
-
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	iov = ((struct iovec) { .iov_base  = helper_pid_out,
-				.iov_len   = sizeof(*helper_pid_out) });
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-	msg.msg_control = buf;
-	msg.msg_controllen = sizeof(buf);
-	msg.msg_flags = 0;
+	struct iovec iov = {
+		.iov_base = data,
+		.iov_len = data_len,
+	};
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = buf,
+		.msg_controllen = CMSG_SPACE(sizeof(*fds) * n_fds),
+	};
+	int n;
+
+	if (n_fds > MAX_RCV_FDS)
+		return -EINVAL;
 
 	n = recvmsg(fd, &msg, 0);
 	if (n < 0)
 		return -errno;
-	else if (n != iov.iov_len)
-		*helper_pid_out = -1;
 
 	cmsg = CMSG_FIRSTHDR(&msg);
-	if (cmsg == NULL) {
-		printk(UM_KERN_ERR "rcv_fd didn't receive anything, "
-		       "error = %d\n", errno);
-		return -1;
-	}
-	if ((cmsg->cmsg_level != SOL_SOCKET) ||
-	    (cmsg->cmsg_type != SCM_RIGHTS)) {
-		printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n");
-		return -1;
-	}
+	if (!cmsg ||
+	    cmsg->cmsg_level != SOL_SOCKET ||
+	    cmsg->cmsg_type != SCM_RIGHTS)
+		return n;
 
-	new = ((int *) CMSG_DATA(cmsg))[0];
-	return new;
+	memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len - CMSG_LEN(0));
+	return n;
 }
 
 int os_create_unix_socket(const char *file, int len, int close_on_exec)
@@ -574,3 +612,115 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
 {
 	return makedev(major, minor);
 }
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
+int os_falloc_zeroes(int fd, unsigned long long offset, int len)
+{
+	int n = fallocate(fd, FALLOC_FL_ZERO_RANGE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
+int os_eventfd(unsigned int initval, int flags)
+{
+	int fd = eventfd(initval, flags);
+
+	if (fd < 0)
+		return -errno;
+	return fd;
+}
+
+int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds,
+		   unsigned int fds_num)
+{
+	struct iovec iov = {
+		.iov_base = (void *) buf,
+		.iov_len = len,
+	};
+	union {
+		char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
+		struct cmsghdr align;
+	} u;
+	unsigned int fds_size = sizeof(*fds) * fds_num;
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = u.control,
+		.msg_controllen = CMSG_SPACE(fds_size),
+	};
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+	int err;
+
+	if (fds_num > OS_SENDMSG_MAX_FDS)
+		return -EINVAL;
+	memset(u.control, 0, sizeof(u.control));
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	cmsg->cmsg_len = CMSG_LEN(fds_size);
+	memcpy(CMSG_DATA(cmsg), fds, fds_size);
+	err = sendmsg(fd, &msg, 0);
+
+	if (err < 0)
+		return -errno;
+	return err;
+}
+
+int os_poll(unsigned int n, const int *fds)
+{
+	/* currently need 2 FDs at most so avoid dynamic allocation */
+	struct pollfd pollfds[2] = {};
+	unsigned int i;
+	int ret;
+
+	if (n > ARRAY_SIZE(pollfds))
+		return -EINVAL;
+
+	for (i = 0; i < n; i++) {
+		pollfds[i].fd = fds[i];
+		pollfds[i].events = POLLIN;
+	}
+
+	ret = poll(pollfds, n, -1);
+	if (ret < 0)
+		return -errno;
+
+	/* Return the index of the available FD */
+	for (i = 0; i < n; i++) {
+		if (pollfds[i].revents)
+			return i;
+	}
+
+	return -EIO;
+}
+
+void *os_mmap_rw_shared(int fd, size_t size)
+{
+	void *res = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+	if (res == MAP_FAILED)
+		return NULL;
+
+	return res;
+}
+
+void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size)
+{
+	void *res;
+
+	res = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE, NULL);
+
+	if (res == MAP_FAILED)
+		return NULL;
+
+	return res;
+}
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index e3ee4a51ef63..89c2ad2a4e3a 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -1,12 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 #include <errno.h>
 #include <sched.h>
+#include <pthread.h>
 #include <linux/limits.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
@@ -45,7 +47,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
 	unsigned long stack, sp;
 	int pid, fds[2], ret, n;
 
-	stack = alloc_stack(0, __cant_sleep());
+	stack = alloc_stack(0, __uml_cant_sleep());
 	if (stack == 0)
 		return -ENOMEM;
 
@@ -64,12 +66,12 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
 		goto out_close;
 	}
 
-	sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *);
+	sp = stack + UM_KERN_PAGE_SIZE;
 	data.pre_exec = pre_exec;
 	data.pre_data = pre_data;
 	data.argv = argv;
 	data.fd = fds[1];
-	data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) :
+	data.buf = __uml_cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) :
 					uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
 	pid = clone(helper_child, (void *) sp, CLONE_VM, &data);
 	if (pid < 0) {
@@ -96,9 +98,13 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
 			       "ret = %d\n", -n);
 			ret = n;
 		}
-		CATCH_EINTR(waitpid(pid, NULL, __WCLONE));
+		CATCH_EINTR(waitpid(pid, NULL, __WALL));
 	}
 
+	if (ret < 0)
+		printk(UM_KERN_ERR "run_helper : failed to exec %s on host: %s\n",
+		       argv[0], strerror(-ret));
+
 out_free2:
 	kfree(data.buf);
 out_close:
@@ -116,11 +122,15 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
 	unsigned long stack, sp;
 	int pid, status, err;
 
-	stack = alloc_stack(0, __cant_sleep());
+	/* To share memory space, use os_run_helper_thread() instead. */
+	if (flags & CLONE_VM)
+		return -EINVAL;
+
+	stack = alloc_stack(0, __uml_cant_sleep());
 	if (stack == 0)
 		return -ENOMEM;
 
-	sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *);
+	sp = stack + UM_KERN_PAGE_SIZE;
 	pid = clone(proc, (void *) sp, flags, arg);
 	if (pid < 0) {
 		err = -errno;
@@ -129,7 +139,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
 		return err;
 	}
 	if (stack_out == NULL) {
-		CATCH_EINTR(pid = waitpid(pid, &status, __WCLONE));
+		CATCH_EINTR(pid = waitpid(pid, &status, __WALL));
 		if (pid < 0) {
 			err = -errno;
 			printk(UM_KERN_ERR "run_helper_thread - wait failed, "
@@ -148,7 +158,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
 int helper_wait(int pid)
 {
 	int ret, status;
-	int wflags = __WCLONE;
+	int wflags = __WALL;
 
 	CATCH_EINTR(ret = waitpid(pid, &status, wflags));
 	if (ret < 0) {
@@ -162,3 +172,65 @@ int helper_wait(int pid)
 	} else
 		return 0;
 }
+
+struct os_helper_thread {
+	pthread_t handle;
+};
+
+int os_run_helper_thread(struct os_helper_thread **td_out,
+			 void *(*routine)(void *), void *arg)
+{
+	struct os_helper_thread *td;
+	sigset_t sigset, oset;
+	int err, flags;
+
+	flags = __uml_cant_sleep() ? UM_GFP_ATOMIC : UM_GFP_KERNEL;
+	td = uml_kmalloc(sizeof(*td), flags);
+	if (!td)
+		return -ENOMEM;
+
+	sigfillset(&sigset);
+	if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) {
+		err = -errno;
+		kfree(td);
+		return err;
+	}
+
+	err = pthread_create(&td->handle, NULL, routine, arg);
+
+	if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0)
+		panic("Failed to restore the signal mask: %d", errno);
+
+	if (err != 0)
+		kfree(td);
+	else
+		*td_out = td;
+
+	return -err;
+}
+
+void os_kill_helper_thread(struct os_helper_thread *td)
+{
+	pthread_cancel(td->handle);
+	pthread_join(td->handle, NULL);
+	kfree(td);
+}
+
+void os_fix_helper_thread_signals(void)
+{
+	sigset_t sigset;
+
+	sigemptyset(&sigset);
+
+	sigaddset(&sigset, SIGWINCH);
+	sigaddset(&sigset, SIGPIPE);
+	sigaddset(&sigset, SIGPROF);
+	sigaddset(&sigset, SIGINT);
+	sigaddset(&sigset, SIGTERM);
+	sigaddset(&sigset, SIGCHLD);
+	sigaddset(&sigset, SIGALRM);
+	sigaddset(&sigset, SIGIO);
+	sigaddset(&sigset, SIGUSR1);
+
+	pthread_sigmask(SIG_SETMASK, &sigset, NULL);
+}
diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h
index 0dc2c9f135f6..bac9fcc8c14c 100644
--- a/arch/um/os-Linux/internal.h
+++ b/arch/um/os-Linux/internal.h
@@ -1 +1,36 @@
-void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc);
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_OS_LINUX_INTERNAL_H
+#define __UM_OS_LINUX_INTERNAL_H
+
+#include <mm_id.h>
+#include <stub-data.h>
+#include <signal.h>
+
+/*
+ * elf_aux.c
+ */
+void scan_elf_aux(char **envp);
+
+/*
+ * mem.c
+ */
+void check_tmpexec(void);
+
+/*
+ * signal.c
+ */
+extern __thread int signals_enabled;
+int timer_alarm_pending(void);
+
+/*
+ * skas/process.c
+ */
+void wait_stub_done(int pid);
+void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys);
+
+/*
+ * smp.c
+ */
+#define IPI_SIGNAL	SIGRTMIN
+
+#endif /* __UM_OS_LINUX_INTERNAL_H */
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index b9afb74b79ad..cf7e49c08b21 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,135 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
 #include <errno.h>
-#include <poll.h>
+#include <sys/epoll.h>
 #include <signal.h>
 #include <string.h>
 #include <irq_user.h>
 #include <os.h>
 #include <um_malloc.h>
 
+/* Epoll support */
+
+static int epollfd = -1;
+
+#define MAX_EPOLL_EVENTS 64
+
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+
+/* Helper to return an Epoll data pointer from an epoll event structure.
+ * We need to keep this one on the userspace side to keep includes separate
+ */
+
+void *os_epoll_get_data_pointer(int index)
+{
+	return epoll_events[index].data.ptr;
+}
+
+/* Helper to compare events versus the events in the epoll structure.
+ * Same as above - needs to be on the userspace side
+ */
+
+
+int os_epoll_triggered(int index, int events)
+{
+	return epoll_events[index].events & events;
+}
+/* Helper to set the event mask.
+ * The event mask is opaque to the kernel side, because it does not have
+ * access to the right includes/defines for EPOLL constants.
+ */
+
+int os_event_mask(enum um_irq_type irq_type)
+{
+	if (irq_type == IRQ_READ)
+		return EPOLLIN | EPOLLPRI | EPOLLERR | EPOLLHUP | EPOLLRDHUP;
+	if (irq_type == IRQ_WRITE)
+		return EPOLLOUT;
+	return 0;
+}
+
 /*
- * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
- * and os_free_irq_by_cb, which are called under irq_lock.
+ * Initial Epoll Setup
  */
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
+int os_setup_epoll(void)
+{
+	epollfd = epoll_create(MAX_EPOLL_EVENTS);
+	return epollfd;
+}
 
-int os_waiting_for_events(struct irq_fd *active_fds)
+/*
+ * Helper to run the actual epoll_wait
+ */
+int os_waiting_for_events_epoll(void)
 {
-	struct irq_fd *irq_fd;
-	int i, n, err;
+	int n, err;
 
-	n = poll(pollfds, pollfds_num, 0);
+	n = epoll_wait(epollfd,
+		(struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0);
 	if (n < 0) {
 		err = -errno;
 		if (errno != EINTR)
-			printk(UM_KERN_ERR "os_waiting_for_events:"
-			       " poll returned %d, errno = %d\n", n, errno);
+			printk(
+				UM_KERN_ERR "os_waiting_for_events:"
+				" epoll returned %d, error = %s\n", n,
+				strerror(errno)
+			);
 		return err;
 	}
-
-	if (n == 0)
-		return 0;
-
-	irq_fd = active_fds;
-
-	for (i = 0; i < pollfds_num; i++) {
-		if (pollfds[i].revents != 0) {
-			irq_fd->current_events = pollfds[i].revents;
-			pollfds[i].fd = -1;
-		}
-		irq_fd = irq_fd->next;
-	}
 	return n;
 }
 
-int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
-{
-	if (pollfds_num == pollfds_size) {
-		if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
-			/* return min size needed for new pollfds area */
-			return (pollfds_size + 1) * sizeof(pollfds[0]);
-		}
-
-		if (pollfds != NULL) {
-			memcpy(tmp_pfd, pollfds,
-			       sizeof(pollfds[0]) * pollfds_size);
-			/* remove old pollfds */
-			kfree(pollfds);
-		}
-		pollfds = tmp_pfd;
-		pollfds_size++;
-	} else
-		kfree(tmp_pfd);	/* remove not used tmp_pfd */
-
-	pollfds[pollfds_num] = ((struct pollfd) { .fd		= fd,
-						  .events	= events,
-						  .revents	= 0 });
-	pollfds_num++;
-
-	return 0;
-}
 
-void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
+/*
+ * Helper to add a fd to epoll
+ */
+int os_add_epoll_fd(int events, int fd, void *data)
 {
-	struct irq_fd **prev;
-	int i = 0;
-
-	prev = &active_fds;
-	while (*prev != NULL) {
-		if ((*test)(*prev, arg)) {
-			struct irq_fd *old_fd = *prev;
-			if ((pollfds[i].fd != -1) &&
-			    (pollfds[i].fd != (*prev)->fd)) {
-				printk(UM_KERN_ERR "os_free_irq_by_cb - "
-				       "mismatch between active_fds and "
-				       "pollfds, fd %d vs %d\n",
-				       (*prev)->fd, pollfds[i].fd);
-				goto out;
-			}
-
-			pollfds_num--;
-
-			/*
-			 * This moves the *whole* array after pollfds[i]
-			 * (though it doesn't spot as such)!
-			 */
-			memmove(&pollfds[i], &pollfds[i + 1],
-			       (pollfds_num - i) * sizeof(pollfds[0]));
-			if (*last_irq_ptr2 == &old_fd->next)
-				*last_irq_ptr2 = prev;
-
-			*prev = (*prev)->next;
-			if (old_fd->type == IRQ_WRITE)
-				ignore_sigio_fd(old_fd->fd);
-			kfree(old_fd);
-			continue;
-		}
-		prev = &(*prev)->next;
-		i++;
-	}
- out:
-	return;
+	struct epoll_event event;
+	int result;
+
+	event.data.ptr = data;
+	event.events = events | EPOLLET;
+	result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+	if ((result) && (errno == EEXIST))
+		result = os_mod_epoll_fd(events, fd, data);
+	if (result)
+		printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
+	return result;
 }
 
-int os_get_pollfd(int i)
+/*
+ * Helper to mod the fd event mask and/or data backreference
+ */
+int os_mod_epoll_fd(int events, int fd, void *data)
 {
-	return pollfds[i].fd;
+	struct epoll_event event;
+	int result;
+
+	event.data.ptr = data;
+	event.events = events;
+	result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
+	if (result)
+		printk(UM_KERN_ERR
+			"epollctl mod err fd %d, %s\n", fd, strerror(errno));
+	return result;
 }
 
-void os_set_pollfd(int i, int fd)
+/*
+ * Helper to delete the epoll fd
+ */
+int os_del_epoll_fd(int fd)
 {
-	pollfds[i].fd = fd;
+	struct epoll_event event;
+	/* This is quiet as we use this as IO ON/OFF - so it is often
+	 * invoked on a non-existent fd
+	 */
+	return epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
 }
 
 void os_set_ioignore(void)
 {
 	signal(SIGIO, SIG_IGN);
 }
+
+void os_close_epoll_fd(void)
+{
+	/* Needed so we do not leak an fd when rebooting */
+	os_close_file(epollfd);
+}
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 749c96da7b99..7e114862a723 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
@@ -10,19 +11,17 @@
 #include <signal.h>
 #include <string.h>
 #include <sys/resource.h>
+#include <sys/personality.h>
 #include <as-layout.h>
 #include <init.h>
 #include <kern_util.h>
 #include <os.h>
 #include <um_malloc.h>
+#include "internal.h"
 
-#define PGD_BOUND (4 * 1024 * 1024)
 #define STACKSIZE (8 * 1024 * 1024)
-#define THREAD_NAME_LEN (256)
 
-long elf_aux_hwcap;
-
-static void set_stklim(void)
+static void __init set_stklim(void)
 {
 	struct rlimit lim;
 
@@ -39,24 +38,13 @@ static void set_stklim(void)
 	}
 }
 
-static __init void do_uml_initcalls(void)
-{
-	initcall_t *call;
-
-	call = &__uml_initcall_start;
-	while (call < &__uml_initcall_end) {
-		(*call)();
-		call++;
-	}
-}
-
 static void last_ditch_exit(int sig)
 {
 	uml_cleanup();
 	exit(1);
 }
 
-static void install_fatal_handler(int sig)
+static void __init install_fatal_handler(int sig)
 {
 	struct sigaction action;
 
@@ -73,15 +61,15 @@ static void install_fatal_handler(int sig)
 	action.sa_restorer = NULL;
 	action.sa_handler = last_ditch_exit;
 	if (sigaction(sig, &action, NULL) < 0) {
-		printf("failed to install handler for signal %d - errno = %d\n",
-		       sig, errno);
+		os_warn("failed to install handler for signal %d "
+			"- errno = %d\n", sig, errno);
 		exit(1);
 	}
 }
 
 #define UML_LIB_PATH	":" OS_LIB_PATH "/uml"
 
-static void setup_env_path(void)
+static void __init setup_env_path(void)
 {
 	char *new_path = NULL;
 	char *old_path = NULL;
@@ -112,17 +100,32 @@ static void setup_env_path(void)
 	}
 }
 
-extern void scan_elf_aux( char **envp);
-
 int __init main(int argc, char **argv, char **envp)
 {
 	char **new_argv;
 	int ret, i, err;
 
+	/* Disable randomization and re-exec if it was changed successfully */
+	ret = personality(PER_LINUX | ADDR_NO_RANDOMIZE);
+	if (ret >= 0 && (ret & (PER_LINUX | ADDR_NO_RANDOMIZE)) !=
+			 (PER_LINUX | ADDR_NO_RANDOMIZE)) {
+		char buf[4096] = {};
+		ssize_t ret;
+
+		ret = readlink("/proc/self/exe", buf, sizeof(buf));
+		if (ret < 0 || ret >= sizeof(buf)) {
+			perror("readlink failure");
+			exit(1);
+		}
+		execve(buf, argv, envp);
+	}
+
 	set_stklim();
 
 	setup_env_path();
 
+	setsid();
+
 	new_argv = malloc((argc + 1) * sizeof(char *));
 	if (new_argv == NULL) {
 		perror("Mallocing argv");
@@ -144,12 +147,10 @@ int __init main(int argc, char **argv, char **envp)
 	install_fatal_handler(SIGINT);
 	install_fatal_handler(SIGTERM);
 
-#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
 	scan_elf_aux(envp);
-#endif
 
-	do_uml_initcalls();
-	ret = linux_main(argc, argv);
+	change_sig(SIGPIPE, 0);
+	ret = linux_main(argc, argv, envp);
 
 	/*
 	 * Disable SIGPROF - I have no idea why libc doesn't do this or turn
@@ -160,18 +161,18 @@ int __init main(int argc, char **argv, char **envp)
 
 	/*
 	 * This signal stuff used to be in the reboot case.  However,
-	 * sometimes a SIGVTALRM can come in when we're halting (reproducably
+	 * sometimes a timer signal can come in when we're halting (reproducably
 	 * when writing out gcov information, presumably because that takes
 	 * some time) and cause a segfault.
 	 */
 
-	/* stop timers and set SIGVTALRM to be ignored */
-	disable_timer();
+	/* stop timers and set timer signal to be ignored */
+	os_timer_disable(0);
 
 	/* disable SIGIO for the fds and set SIGIO to be ignored */
 	err = deactivate_all_fds();
 	if (err)
-		printf("deactivate_all_fds failed, errno = %d\n", -err);
+		os_warn("deactivate_all_fds failed, errno = %d\n", -err);
 
 	/*
 	 * Let any pending signals fire now.  This ensures
@@ -180,18 +181,23 @@ int __init main(int argc, char **argv, char **envp)
 	 */
 	unblock_signals();
 
+	os_info("\n");
 	/* Reboot */
 	if (ret) {
-		printf("\n");
 		execvp(new_argv[0], new_argv);
 		perror("Failed to exec kernel");
 		ret = 1;
 	}
-	printf("\n");
 	return uml_exitcode;
 }
 
 extern void *__real_malloc(int);
+extern void __real_free(void *);
+
+/* workaround for -Wmissing-prototypes warnings */
+void *__wrap_malloc(int size);
+void *__wrap_calloc(int n, int size);
+void __wrap_free(void *ptr);
 
 void *__wrap_malloc(int size)
 {
@@ -224,10 +230,6 @@ void *__wrap_calloc(int n, int size)
 	return ptr;
 }
 
-extern void __real_free(void *);
-
-extern unsigned long high_physmem;
-
 void __wrap_free(void *ptr)
 {
 	unsigned long addr = (unsigned long) ptr;
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index ba4398056fe9..72f302f4d197 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
@@ -12,189 +12,164 @@
 #include <string.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
-#include <sys/param.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
 #include <init.h>
+#include <kern_util.h>
 #include <os.h>
-
-/* Modified by which_tmpdir, which is called during early boot */
-static char *default_tmpdir = "/tmp";
+#include "internal.h"
 
 /*
- *  Modified when creating the physical memory file and when checking
- * the tmp filesystem for usability, both happening during early boot.
+ * kasan_map_memory - maps memory from @start with a size of @len.
+ * The allocated memory is filled with zeroes upon success.
+ * @start: the start address of the memory to be mapped
+ * @len: the length of the memory to be mapped
+ *
+ * This function is used to map shadow memory for KASAN in uml
  */
-static char *tempdir = NULL;
-
-static void __init find_tempdir(void)
+void kasan_map_memory(void *start, size_t len)
 {
-	const char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL };
-	int i;
-	char *dir = NULL;
+	if (mmap(start,
+		 len,
+		 PROT_READ|PROT_WRITE,
+		 MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE,
+		 -1,
+		 0) == MAP_FAILED) {
+		os_info("Couldn't allocate shadow memory: %s\n.",
+			strerror(errno));
+		exit(1);
+	}
 
-	if (tempdir != NULL)
-		/* We've already been called */
-		return;
-	for (i = 0; dirs[i]; i++) {
-		dir = getenv(dirs[i]);
-		if ((dir != NULL) && (*dir != '\0'))
-			break;
+	if (madvise(start, len, MADV_DONTDUMP)) {
+		os_info("Couldn't set MAD_DONTDUMP on shadow memory: %s\n.",
+			strerror(errno));
+		exit(1);
 	}
-	if ((dir == NULL) || (*dir == '\0'))
-		dir = default_tmpdir;
 
-	tempdir = malloc(strlen(dir) + 2);
-	if (tempdir == NULL) {
-		fprintf(stderr, "Failed to malloc tempdir, "
-			"errno = %d\n", errno);
-		return;
+	if (madvise(start, len, MADV_DONTFORK)) {
+		os_info("Couldn't set MADV_DONTFORK on shadow memory: %s\n.",
+			strerror(errno));
+		exit(1);
 	}
-	strcpy(tempdir, dir);
-	strcat(tempdir, "/");
 }
 
-/*
- * This will return 1, with the first character in buf being the
- * character following the next instance of c in the file.  This will
- * read the file as needed.  If there's an error, -errno is returned;
- * if the end of the file is reached, 0 is returned.
- */
-static int next(int fd, char *buf, size_t size, char c)
-{
-	ssize_t n;
-	size_t len;
-	char *ptr;
+/* Set by make_tempfile() during early boot. */
+char *tempdir = NULL;
 
-	while ((ptr = strchr(buf, c)) == NULL) {
-		n = read(fd, buf, size - 1);
-		if (n == 0)
-			return 0;
-		else if (n < 0)
-			return -errno;
-
-		buf[n] = '\0';
+/* Check if dir is on tmpfs. Return 0 if yes, -1 if no or error. */
+static int __init check_tmpfs(const char *dir)
+{
+	struct statfs st;
+
+	os_info("Checking if %s is on tmpfs...", dir);
+	if (statfs(dir, &st) < 0) {
+		os_info("%s\n", strerror(errno));
+	} else if (st.f_type != TMPFS_MAGIC) {
+		os_info("no\n");
+	} else {
+		os_info("OK\n");
+		return 0;
 	}
-
-	ptr++;
-	len = strlen(ptr);
-	memmove(buf, ptr, len + 1);
-
-	/*
-	 * Refill the buffer so that if there's a partial string that we care
-	 * about, it will be completed, and we can recognize it.
-	 */
-	n = read(fd, &buf[len], size - len - 1);
-	if (n < 0)
-		return -errno;
-
-	buf[len + n] = '\0';
-	return 1;
+	return -1;
 }
 
-/* which_tmpdir is called only during early boot */
-static int checked_tmpdir = 0;
-
 /*
- * Look for a tmpfs mounted at /dev/shm.  I couldn't find a cleaner
- * way to do this than to parse /proc/mounts.  statfs will return the
- * same filesystem magic number and fs id for both /dev and /dev/shm
- * when they are both tmpfs, so you can't tell if they are different
- * filesystems.  Also, there seems to be no other way of finding the
- * mount point of a filesystem from within it.
- *
- * If a /dev/shm tmpfs entry is found, then we switch to using it.
- * Otherwise, we stay with the default /tmp.
+ * Choose the tempdir to use. We want something on tmpfs so that our memory is
+ * not subject to the host's vm.dirty_ratio. If a tempdir is specified in the
+ * environment, we use that even if it's not on tmpfs, but we warn the user.
+ * Otherwise, we try common tmpfs locations, and if no tmpfs directory is found
+ * then we fall back to /tmp.
  */
-static void which_tmpdir(void)
+static char * __init choose_tempdir(void)
 {
-	int fd, found;
-	char buf[128] = { '\0' };
-
-	if (checked_tmpdir)
-		return;
-
-	checked_tmpdir = 1;
-
-	printf("Checking for tmpfs mount on /dev/shm...");
-
-	fd = open("/proc/mounts", O_RDONLY);
-	if (fd < 0) {
-		printf("failed to open /proc/mounts, errno = %d\n", errno);
-		return;
+	static const char * const vars[] = {
+		"TMPDIR",
+		"TMP",
+		"TEMP",
+		NULL
+	};
+	static const char fallback_dir[] = "/tmp";
+	static const char * const tmpfs_dirs[] = {
+		"/dev/shm",
+		fallback_dir,
+		NULL
+	};
+	int i;
+	const char *dir;
+
+	os_info("Checking environment variables for a tempdir...");
+	for (i = 0; vars[i]; i++) {
+		dir = getenv(vars[i]);
+		if ((dir != NULL) && (*dir != '\0')) {
+			os_info("%s\n", dir);
+			if (check_tmpfs(dir) >= 0)
+				goto done;
+			else
+				goto warn;
+		}
 	}
+	os_info("none found\n");
 
-	while (1) {
-		found = next(fd, buf, ARRAY_SIZE(buf), ' ');
-		if (found != 1)
-			break;
-
-		if (!strncmp(buf, "/dev/shm", strlen("/dev/shm")))
-			goto found;
-
-		found = next(fd, buf, ARRAY_SIZE(buf), '\n');
-		if (found != 1)
-			break;
+	for (i = 0; tmpfs_dirs[i]; i++) {
+		dir = tmpfs_dirs[i];
+		if (check_tmpfs(dir) >= 0)
+			goto done;
 	}
 
-err:
-	if (found == 0)
-		printf("nothing mounted on /dev/shm\n");
-	else if (found < 0)
-		printf("read returned errno %d\n", -found);
-
-out:
-	close(fd);
-
-	return;
-
-found:
-	found = next(fd, buf, ARRAY_SIZE(buf), ' ');
-	if (found != 1)
-		goto err;
-
-	if (strncmp(buf, "tmpfs", strlen("tmpfs"))) {
-		printf("not tmpfs\n");
-		goto out;
-	}
-
-	printf("OK\n");
-	default_tmpdir = "/dev/shm";
-	goto out;
+	dir = fallback_dir;
+warn:
+	os_warn("Warning: tempdir %s is not on tmpfs\n", dir);
+done:
+	/* Make a copy since getenv results may not remain valid forever. */
+	return strdup(dir);
 }
 
-static int __init make_tempfile(const char *template, char **out_tempname,
-				int do_unlink)
+/*
+ * Create an unlinked tempfile in a suitable tempdir. template must be the
+ * basename part of the template with a leading '/'.
+ */
+static int __init make_tempfile(const char *template)
 {
 	char *tempname;
 	int fd;
 
-	which_tmpdir();
-	tempname = malloc(MAXPATHLEN);
+	if (tempdir == NULL) {
+		tempdir = choose_tempdir();
+		if (tempdir == NULL) {
+			os_warn("Failed to choose tempdir: %s\n",
+				strerror(errno));
+			return -1;
+		}
+	}
+
+#ifdef O_TMPFILE
+	fd = open(tempdir, O_CLOEXEC | O_RDWR | O_EXCL | O_TMPFILE, 0700);
+	/*
+	 * If the running system does not support O_TMPFILE flag then retry
+	 * without it.
+	 */
+	if (fd != -1 || (errno != EINVAL && errno != EISDIR &&
+			errno != EOPNOTSUPP))
+		return fd;
+#endif
+
+	tempname = malloc(strlen(tempdir) + strlen(template) + 1);
 	if (tempname == NULL)
 		return -1;
 
-	find_tempdir();
-	if ((tempdir == NULL) || (strlen(tempdir) >= MAXPATHLEN))
-		goto out;
-
-	if (template[0] != '/')
-		strcpy(tempname, tempdir);
-	else
-		tempname[0] = '\0';
-	strncat(tempname, template, MAXPATHLEN-1-strlen(tempname));
+	strcpy(tempname, tempdir);
+	strcat(tempname, template);
 	fd = mkstemp(tempname);
 	if (fd < 0) {
-		fprintf(stderr, "open - cannot create %s: %s\n", tempname,
+		os_warn("open - cannot create %s: %s\n", tempname,
 			strerror(errno));
 		goto out;
 	}
-	if (do_unlink && (unlink(tempname) < 0)) {
+	if (unlink(tempname) < 0) {
 		perror("unlink");
 		goto close;
 	}
-	if (out_tempname) {
-		*out_tempname = tempname;
-	} else
-		free(tempname);
+	free(tempname);
 	return fd;
 close:
 	close(fd);
@@ -203,23 +178,17 @@ out:
 	return -1;
 }
 
-#define TEMPNAME_TEMPLATE "vm_file-XXXXXX"
+#define TEMPNAME_TEMPLATE "/vm_file-XXXXXX"
 
 static int __init create_tmp_file(unsigned long long len)
 {
 	int fd, err;
 	char zero;
 
-	fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1);
+	fd = make_tempfile(TEMPNAME_TEMPLATE);
 	if (fd < 0)
 		exit(1);
 
-	err = fchmod(fd, 0777);
-	if (err < 0) {
-		perror("fchmod");
-		exit(1);
-	}
-
 	/*
 	 * Seek to len - 1 because writing a character there will
 	 * increase the file size by one byte, to the desired length.
@@ -254,7 +223,6 @@ int __init create_mem_file(unsigned long long len)
 	return fd;
 }
 
-
 void __init check_tmpexec(void)
 {
 	void *addr;
@@ -262,17 +230,16 @@ void __init check_tmpexec(void)
 
 	addr = mmap(NULL, UM_KERN_PAGE_SIZE,
 		    PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
-	printf("Checking PROT_EXEC mmap in %s...",tempdir);
-	fflush(stdout);
+	os_info("Checking PROT_EXEC mmap in %s...", tempdir);
 	if (addr == MAP_FAILED) {
 		err = errno;
-		perror("failed");
+		os_warn("%s\n", strerror(err));
 		close(fd);
 		if (err == EPERM)
-			printf("%s must be not mounted noexec\n",tempdir);
+			os_warn("%s must be not mounted noexec\n", tempdir);
 		exit(1);
 	}
-	printf("OK\n");
+	os_info("OK\n");
 	munmap(addr, UM_KERN_PAGE_SIZE);
 
 	close(fd);
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index b8f34c9e53ae..3a2a84ab9325 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -1,119 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
 #include <errno.h>
 #include <signal.h>
 #include <fcntl.h>
+#include <limits.h>
+#include <linux/futex.h>
 #include <sys/mman.h>
 #include <sys/ptrace.h>
+#include <sys/prctl.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
 #include <init.h>
 #include <longjmp.h>
 #include <os.h>
-#include <skas_ptrace.h>
+#include <skas/skas.h>
 
-#define ARBITRARY_ADDR -1
-#define FAILURE_PID    -1
-
-#define STAT_PATH_LEN sizeof("/proc/#######/stat\0")
-#define COMM_SCANF "%*[^)])"
-
-unsigned long os_process_pc(int pid)
+void os_alarm_process(int pid)
 {
-	char proc_stat[STAT_PATH_LEN], buf[256];
-	unsigned long pc = ARBITRARY_ADDR;
-	int fd, err;
+	if (pid <= 0)
+		return;
 
-	sprintf(proc_stat, "/proc/%d/stat", pid);
-	fd = open(proc_stat, O_RDONLY, 0);
-	if (fd < 0) {
-		printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', "
-		       "errno = %d\n", proc_stat, errno);
-		goto out;
-	}
-	CATCH_EINTR(err = read(fd, buf, sizeof(buf)));
-	if (err < 0) {
-		printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', "
-		       "err = %d\n", proc_stat, errno);
-		goto out_close;
-	}
-	os_close_file(fd);
-	pc = ARBITRARY_ADDR;
-	if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d "
-		   "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d "
-		   "%*d %*d %*d %*d %*d %lu", &pc) != 1)
-		printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n",
-		       buf);
- out_close:
-	close(fd);
- out:
-	return pc;
+	kill(pid, SIGALRM);
 }
 
-int os_process_parent(int pid)
+void os_kill_process(int pid, int reap_child)
 {
-	char stat[STAT_PATH_LEN];
-	char data[256];
-	int parent = FAILURE_PID, n, fd;
-
-	if (pid == -1)
-		return parent;
-
-	snprintf(stat, sizeof(stat), "/proc/%d/stat", pid);
-	fd = open(stat, O_RDONLY, 0);
-	if (fd < 0) {
-		printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat,
-		       errno);
-		return parent;
-	}
+	if (pid <= 0)
+		return;
 
-	CATCH_EINTR(n = read(fd, data, sizeof(data)));
-	close(fd);
-
-	if (n < 0) {
-		printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat,
-		       errno);
-		return parent;
-	}
-
-	parent = FAILURE_PID;
-	n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent);
-	if (n != 1)
-		printk(UM_KERN_ERR "Failed to scan '%s'\n", data);
+	/* Block signals until child is reaped */
+	block_signals();
 
-	return parent;
-}
-
-void os_stop_process(int pid)
-{
-	kill(pid, SIGSTOP);
-}
-
-void os_kill_process(int pid, int reap_child)
-{
 	kill(pid, SIGKILL);
 	if (reap_child)
 		CATCH_EINTR(waitpid(pid, NULL, __WALL));
-}
-
-/* This is here uniquely to have access to the userspace errno, i.e. the one
- * used by ptrace in case of error.
- */
-
-long os_ptrace_ldt(long pid, long addr, long data)
-{
-	int ret;
-
-	ret = ptrace(PTRACE_LDT, pid, addr, data);
 
-	if (ret < 0)
-		return -errno;
-	return ret;
+	unblock_signals();
 }
 
 /* Kill off a ptraced child by all means available.  kill it normally first,
@@ -123,11 +52,27 @@ long os_ptrace_ldt(long pid, long addr, long data)
 
 void os_kill_ptraced_process(int pid, int reap_child)
 {
+	if (pid <= 0)
+		return;
+
+	/* Block signals until child is reaped */
+	block_signals();
+
 	kill(pid, SIGKILL);
 	ptrace(PTRACE_KILL, pid);
 	ptrace(PTRACE_CONT, pid);
 	if (reap_child)
 		CATCH_EINTR(waitpid(pid, NULL, __WALL));
+
+	unblock_signals();
+}
+
+pid_t os_reap_child(void)
+{
+	int status;
+
+	/* Try to reap a child */
+	return waitpid(-1, &status, WNOHANG);
 }
 
 /* Don't use the glibc version, which caches the result in TLS. It misses some
@@ -139,11 +84,6 @@ int os_getpid(void)
 	return syscall(__NR_getpid);
 }
 
-int os_getpgrp(void)
-{
-	return getpgrp();
-}
-
 int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len,
 		  int r, int w, int x)
 {
@@ -241,6 +181,31 @@ void init_new_thread_signals(void)
 	set_handler(SIGBUS);
 	signal(SIGHUP, SIG_IGN);
 	set_handler(SIGIO);
+	/* We (currently) only use the child reaper IRQ in seccomp mode */
+	if (using_seccomp)
+		set_handler(SIGCHLD);
 	signal(SIGWINCH, SIG_IGN);
-	signal(SIGTERM, SIG_DFL);
+}
+
+void os_set_pdeathsig(void)
+{
+	prctl(PR_SET_PDEATHSIG, SIGKILL);
+}
+
+int os_futex_wait(void *uaddr, unsigned int val)
+{
+	int r;
+
+	CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAIT, val,
+				NULL, NULL, 0));
+	return r < 0 ? -errno : r;
+}
+
+int os_futex_wake(void *uaddr)
+{
+	int r;
+
+	CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAKE, INT_MAX,
+				NULL, NULL, 0));
+	return r < 0 ? -errno : r;
 }
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index 2ff8d4fe83c4..bfba2cbc9478 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -1,7 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2004 PathScale, Inc
  * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <errno.h>
@@ -10,33 +10,14 @@
 #include <sysdep/ptrace.h>
 #include <sysdep/ptrace_user.h>
 #include <registers.h>
-
-int save_registers(int pid, struct uml_pt_regs *regs)
-{
-	int err;
-
-	err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp);
-	if (err < 0)
-		return -errno;
-	return 0;
-}
-
-int restore_registers(int pid, struct uml_pt_regs *regs)
-{
-	int err;
-
-	err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp);
-	if (err < 0)
-		return -errno;
-	return 0;
-}
+#include <stdlib.h>
 
 /* This is set once at boot time and not changed thereafter */
 
-static unsigned long exec_regs[MAX_REG_NR];
-static unsigned long exec_fp_regs[FP_SIZE];
+unsigned long exec_regs[MAX_REG_NR];
+unsigned long *exec_fp_regs;
 
-int init_registers(int pid)
+int init_pid_registers(int pid)
 {
 	int err;
 
@@ -44,7 +25,11 @@ int init_registers(int pid)
 	if (err < 0)
 		return -errno;
 
-	arch_init_registers(pid);
+	err = arch_init_registers(pid);
+	if (err < 0)
+		return err;
+
+	exec_fp_regs = malloc(host_fp_size);
 	get_fp_registers(pid, exec_fp_regs);
 	return 0;
 }
@@ -54,5 +39,5 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
 	memcpy(regs, exec_regs, sizeof(exec_regs));
 
 	if (fp_regs)
-		memcpy(fp_regs, exec_fp_regs, sizeof(exec_fp_regs));
+		memcpy(fp_regs, exec_fp_regs, host_fp_size);
 }
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 8b61cc0e82c8..6de145f8fe3d 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <unistd.h>
@@ -11,6 +11,8 @@
 #include <sched.h>
 #include <signal.h>
 #include <string.h>
+#include <sys/epoll.h>
+#include <asm/unistd.h>
 #include <kern_util.h>
 #include <init.h>
 #include <os.h>
@@ -21,366 +23,136 @@
  * Protected by sigio_lock(), also used by sigio_cleanup, which is an
  * exitcall.
  */
-static int write_sigio_pid = -1;
-static unsigned long write_sigio_stack;
+static struct os_helper_thread *write_sigio_td;
 
-/*
- * These arrays are initialized before the sigio thread is started, and
- * the descriptors closed after it is killed.  So, it can't see them change.
- * On the UML side, they are changed under the sigio_lock.
- */
-#define SIGIO_FDS_INIT {-1, -1}
-
-static int write_sigio_fds[2] = SIGIO_FDS_INIT;
-static int sigio_private[2] = SIGIO_FDS_INIT;
+static int epollfd = -1;
 
-struct pollfds {
-	struct pollfd *poll;
-	int size;
-	int used;
-};
+#define MAX_EPOLL_EVENTS 64
 
-/*
- * Protected by sigio_lock().  Used by the sigio thread, but the UML thread
- * synchronizes with it.
- */
-static struct pollfds current_poll;
-static struct pollfds next_poll;
-static struct pollfds all_sigio_fds;
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
 
-static int write_sigio_thread(void *unused)
+static void *write_sigio_thread(void *unused)
 {
-	struct pollfds *fds, tmp;
-	struct pollfd *p;
-	int i, n, respond_fd;
-	char c;
+	int pid = getpid();
+	int r;
+
+	os_fix_helper_thread_signals();
 
-	signal(SIGWINCH, SIG_IGN);
-	fds = &current_poll;
 	while (1) {
-		n = poll(fds->poll, fds->used, -1);
-		if (n < 0) {
+		r = epoll_wait(epollfd, epoll_events, MAX_EPOLL_EVENTS, -1);
+		if (r < 0) {
 			if (errno == EINTR)
 				continue;
-			printk(UM_KERN_ERR "write_sigio_thread : poll returned "
-			       "%d, errno = %d\n", n, errno);
-		}
-		for (i = 0; i < fds->used; i++) {
-			p = &fds->poll[i];
-			if (p->revents == 0)
-				continue;
-			if (p->fd == sigio_private[1]) {
-				CATCH_EINTR(n = read(sigio_private[1], &c,
-						     sizeof(c)));
-				if (n != sizeof(c))
-					printk(UM_KERN_ERR
-					       "write_sigio_thread : "
-					       "read on socket failed, "
-					       "err = %d\n", errno);
-				tmp = current_poll;
-				current_poll = next_poll;
-				next_poll = tmp;
-				respond_fd = sigio_private[1];
-			}
-			else {
-				respond_fd = write_sigio_fds[1];
-				fds->used--;
-				memmove(&fds->poll[i], &fds->poll[i + 1],
-					(fds->used - i) * sizeof(*fds->poll));
-			}
-
-			CATCH_EINTR(n = write(respond_fd, &c, sizeof(c)));
-			if (n != sizeof(c))
-				printk(UM_KERN_ERR "write_sigio_thread : "
-				       "write on socket failed, err = %d\n",
-				       errno);
+			printk(UM_KERN_ERR "%s: epoll_wait failed, errno = %d\n",
+			       __func__, errno);
 		}
-	}
-
-	return 0;
-}
-
-static int need_poll(struct pollfds *polls, int n)
-{
-	struct pollfd *new;
 
-	if (n <= polls->size)
-		return 0;
-
-	new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC);
-	if (new == NULL) {
-		printk(UM_KERN_ERR "need_poll : failed to allocate new "
-		       "pollfds\n");
-		return -ENOMEM;
+		CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO));
+		if (r < 0)
+			printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n",
+			       __func__, errno);
 	}
 
-	memcpy(new, polls->poll, polls->used * sizeof(struct pollfd));
-	kfree(polls->poll);
-
-	polls->poll = new;
-	polls->size = n;
-	return 0;
+	return NULL;
 }
 
-/*
- * Must be called with sigio_lock held, because it's needed by the marked
- * critical section.
- */
-static void update_thread(void)
+int __add_sigio_fd(int fd)
 {
-	unsigned long flags;
-	int n;
-	char c;
-
-	flags = set_signals(0);
-	CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c)));
-	if (n != sizeof(c)) {
-		printk(UM_KERN_ERR "update_thread : write failed, err = %d\n",
-		       errno);
-		goto fail;
-	}
-
-	CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c)));
-	if (n != sizeof(c)) {
-		printk(UM_KERN_ERR "update_thread : read failed, err = %d\n",
-		       errno);
-		goto fail;
-	}
-
-	set_signals(flags);
-	return;
- fail:
-	/* Critical section start */
-	if (write_sigio_pid != -1) {
-		os_kill_process(write_sigio_pid, 1);
-		free_stack(write_sigio_stack, 0);
-	}
-	write_sigio_pid = -1;
-	close(sigio_private[0]);
-	close(sigio_private[1]);
-	close(write_sigio_fds[0]);
-	close(write_sigio_fds[1]);
-	/* Critical section end */
-	set_signals(flags);
+	struct epoll_event event = {
+		.data.fd = fd,
+		.events = EPOLLIN | EPOLLET,
+	};
+	int r;
+
+	CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event));
+	return r < 0 ? -errno : 0;
 }
 
 int add_sigio_fd(int fd)
 {
-	struct pollfd *p;
-	int err = 0, i, n;
+	int err;
 
 	sigio_lock();
-	for (i = 0; i < all_sigio_fds.used; i++) {
-		if (all_sigio_fds.poll[i].fd == fd)
-			break;
-	}
-	if (i == all_sigio_fds.used)
-		goto out;
-
-	p = &all_sigio_fds.poll[i];
+	err = __add_sigio_fd(fd);
+	sigio_unlock();
 
-	for (i = 0; i < current_poll.used; i++) {
-		if (current_poll.poll[i].fd == fd)
-			goto out;
-	}
+	return err;
+}
 
-	n = current_poll.used;
-	err = need_poll(&next_poll, n + 1);
-	if (err)
-		goto out;
+int __ignore_sigio_fd(int fd)
+{
+	struct epoll_event event;
+	int r;
 
-	memcpy(next_poll.poll, current_poll.poll,
-	       current_poll.used * sizeof(struct pollfd));
-	next_poll.poll[n] = *p;
-	next_poll.used = n + 1;
-	update_thread();
- out:
-	sigio_unlock();
-	return err;
+	CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event));
+	return r < 0 ? -errno : 0;
 }
 
 int ignore_sigio_fd(int fd)
 {
-	struct pollfd *p;
-	int err = 0, i, n = 0;
-
-	/*
-	 * This is called from exitcalls elsewhere in UML - if
-	 * sigio_cleanup has already run, then update_thread will hang
-	 * or fail because the thread is no longer running.
-	 */
-	if (write_sigio_pid == -1)
-		return -EIO;
+	int err;
 
 	sigio_lock();
-	for (i = 0; i < current_poll.used; i++) {
-		if (current_poll.poll[i].fd == fd)
-			break;
-	}
-	if (i == current_poll.used)
-		goto out;
-
-	err = need_poll(&next_poll, current_poll.used - 1);
-	if (err)
-		goto out;
-
-	for (i = 0; i < current_poll.used; i++) {
-		p = &current_poll.poll[i];
-		if (p->fd != fd)
-			next_poll.poll[n++] = *p;
-	}
-	next_poll.used = current_poll.used - 1;
-
-	update_thread();
- out:
+	err = __ignore_sigio_fd(fd);
 	sigio_unlock();
-	return err;
-}
-
-static struct pollfd *setup_initial_poll(int fd)
-{
-	struct pollfd *p;
 
-	p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL);
-	if (p == NULL) {
-		printk(UM_KERN_ERR "setup_initial_poll : failed to allocate "
-		       "poll\n");
-		return NULL;
-	}
-	*p = ((struct pollfd) { .fd		= fd,
-				.events 	= POLLIN,
-				.revents 	= 0 });
-	return p;
+	return err;
 }
 
 static void write_sigio_workaround(void)
 {
-	struct pollfd *p;
 	int err;
-	int l_write_sigio_fds[2];
-	int l_sigio_private[2];
-	int l_write_sigio_pid;
 
-	/* We call this *tons* of times - and most ones we must just fail. */
 	sigio_lock();
-	l_write_sigio_pid = write_sigio_pid;
-	sigio_unlock();
-
-	if (l_write_sigio_pid != -1)
-		return;
+	if (write_sigio_td)
+		goto out;
 
-	err = os_pipe(l_write_sigio_fds, 1, 1);
-	if (err < 0) {
-		printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, "
-		       "err = %d\n", -err);
-		return;
+	epollfd = epoll_create(MAX_EPOLL_EVENTS);
+	if (epollfd < 0) {
+		printk(UM_KERN_ERR "%s: epoll_create failed, errno = %d\n",
+		       __func__, errno);
+		goto out;
 	}
-	err = os_pipe(l_sigio_private, 1, 1);
+
+	err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL);
 	if (err < 0) {
-		printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, "
-		       "err = %d\n", -err);
-		goto out_close1;
+		printk(UM_KERN_ERR "%s: os_run_helper_thread failed, errno = %d\n",
+		       __func__, -err);
+		close(epollfd);
+		epollfd = -1;
+		goto out;
 	}
 
-	p = setup_initial_poll(l_sigio_private[1]);
-	if (!p)
-		goto out_close2;
-
-	sigio_lock();
-
-	/*
-	 * Did we race? Don't try to optimize this, please, it's not so likely
-	 * to happen, and no more than once at the boot.
-	 */
-	if (write_sigio_pid != -1)
-		goto out_free;
-
-	current_poll = ((struct pollfds) { .poll 	= p,
-					   .used 	= 1,
-					   .size 	= 1 });
-
-	if (write_sigio_irq(l_write_sigio_fds[0]))
-		goto out_clear_poll;
-
-	memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds));
-	memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private));
-
-	write_sigio_pid = run_helper_thread(write_sigio_thread, NULL,
-					    CLONE_FILES | CLONE_VM,
-					    &write_sigio_stack);
-
-	if (write_sigio_pid < 0)
-		goto out_clear;
-
-	sigio_unlock();
-	return;
-
-out_clear:
-	write_sigio_pid = -1;
-	write_sigio_fds[0] = -1;
-	write_sigio_fds[1] = -1;
-	sigio_private[0] = -1;
-	sigio_private[1] = -1;
-out_clear_poll:
-	current_poll = ((struct pollfds) { .poll	= NULL,
-					   .size	= 0,
-					   .used	= 0 });
-out_free:
+out:
 	sigio_unlock();
-	kfree(p);
-out_close2:
-	close(l_sigio_private[0]);
-	close(l_sigio_private[1]);
-out_close1:
-	close(l_write_sigio_fds[0]);
-	close(l_write_sigio_fds[1]);
 }
 
-void sigio_broken(int fd, int read)
+void sigio_broken(void)
 {
-	int err;
-
 	write_sigio_workaround();
-
-	sigio_lock();
-	err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1);
-	if (err) {
-		printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd "
-		       "for descriptor %d\n", fd);
-		goto out;
-	}
-
-	all_sigio_fds.poll[all_sigio_fds.used++] =
-		((struct pollfd) { .fd  	= fd,
-				   .events 	= read ? POLLIN : POLLOUT,
-				   .revents 	= 0 });
-out:
-	sigio_unlock();
 }
 
 /* Changed during early boot */
 static int pty_output_sigio;
-static int pty_close_sigio;
 
-void maybe_sigio_broken(int fd, int read)
+void maybe_sigio_broken(int fd)
 {
 	if (!isatty(fd))
 		return;
 
-	if ((read || pty_output_sigio) && (!read || pty_close_sigio))
+	if (pty_output_sigio)
 		return;
 
-	sigio_broken(fd, read);
+	sigio_broken();
 }
 
 static void sigio_cleanup(void)
 {
-	if (write_sigio_pid == -1)
+	if (!write_sigio_td)
 		return;
 
-	os_kill_process(write_sigio_pid, 1);
-	free_stack(write_sigio_stack, 0);
-	write_sigio_pid = -1;
+	os_kill_helper_thread(write_sigio_td);
+	write_sigio_td = NULL;
 }
 
 __uml_exitcall(sigio_cleanup);
@@ -514,19 +286,6 @@ static void tty_output(int master, int slave)
 		printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n);
 }
 
-static void tty_close(int master, int slave)
-{
-	printk(UM_KERN_INFO "Checking that host ptys support SIGIO on "
-	       "close...");
-
-	close(slave);
-	if (got_sigio) {
-		printk(UM_KERN_CONT "Yes\n");
-		pty_close_sigio = 1;
-	} else
-		printk(UM_KERN_CONT "No, enabling workaround\n");
-}
-
 static void __init check_sigio(void)
 {
 	if ((access("/dev/ptmx", R_OK) < 0) &&
@@ -536,7 +295,6 @@ static void __init check_sigio(void)
 		return;
 	}
 	check_one_sigio(tty_output);
-	check_one_sigio(tty_close);
 }
 
 /* Here because it only does the SIGIO testing for now */
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 9d9f1b4bf826..327fb3c52fc7 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -1,31 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2004 PathScale, Inc
  * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <errno.h>
 #include <signal.h>
+#include <string.h>
 #include <strings.h>
 #include <as-layout.h>
 #include <kern_util.h>
 #include <os.h>
 #include <sysdep/mcontext.h>
+#include <um_malloc.h>
+#include <sys/ucontext.h>
+#include <timetravel.h>
 #include "internal.h"
 
-void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
+void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) = {
 	[SIGTRAP]	= relay_signal,
 	[SIGFPE]	= relay_signal,
 	[SIGILL]	= relay_signal,
 	[SIGWINCH]	= winch,
-	[SIGBUS]	= bus_handler,
+	[SIGBUS]	= relay_signal,
 	[SIGSEGV]	= segv_handler,
 	[SIGIO]		= sigio_handler,
-	[SIGVTALRM]	= timer_handler };
+	[SIGCHLD]	= sigchld_handler,
+};
 
-static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc)
+static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
 {
 	struct uml_pt_regs r;
 	int save_errno = errno;
@@ -38,10 +46,10 @@ static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc)
 	}
 
 	/* enable signals if sig isn't IRQ signal */
-	if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM))
-		unblock_signals();
+	if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD))
+		unblock_signals_trace();
 
-	(*sig_info[sig])(sig, si, &r);
+	(*sig_info[sig])(sig, si, &r, mc);
 
 	errno = save_errno;
 }
@@ -55,72 +63,131 @@ static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc)
 #define SIGIO_BIT 0
 #define SIGIO_MASK (1 << SIGIO_BIT)
 
-#define SIGVTALRM_BIT 1
-#define SIGVTALRM_MASK (1 << SIGVTALRM_BIT)
+#define SIGALRM_BIT 1
+#define SIGALRM_MASK (1 << SIGALRM_BIT)
 
-static int signals_enabled;
-static unsigned int signals_pending;
+#define SIGCHLD_BIT 2
+#define SIGCHLD_MASK (1 << SIGCHLD_BIT)
 
-void sig_handler(int sig, siginfo_t *si, mcontext_t *mc)
+__thread int signals_enabled;
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
+static int signals_blocked, signals_blocked_pending;
+#endif
+static __thread unsigned int signals_pending;
+static __thread unsigned int signals_active;
+
+static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
 {
-	int enabled;
+	int enabled = signals_enabled;
+
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
+	if ((signals_blocked ||
+	     __atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) &&
+	    (sig == SIGIO)) {
+		/* increment so unblock will do another round */
+		__atomic_add_fetch(&signals_blocked_pending, 1,
+				   __ATOMIC_SEQ_CST);
+		return;
+	}
+#endif
 
-	enabled = signals_enabled;
 	if (!enabled && (sig == SIGIO)) {
-		signals_pending |= SIGIO_MASK;
+		/*
+		 * In TT_MODE_EXTERNAL, need to still call time-travel
+		 * handlers. This will mark signals_pending by itself
+		 * (only if necessary.)
+		 * Note we won't get here if signals are hard-blocked
+		 * (which is handled above), in that case the hard-
+		 * unblock will handle things.
+		 */
+		if (time_travel_mode == TT_MODE_EXTERNAL)
+			sigio_run_timetravel_handlers();
+		else
+			signals_pending |= SIGIO_MASK;
 		return;
 	}
 
-	block_signals();
+	if (!enabled && (sig == SIGCHLD)) {
+		signals_pending |= SIGCHLD_MASK;
+		return;
+	}
+
+	block_signals_trace();
 
 	sig_handler_common(sig, si, mc);
 
-	set_signals(enabled);
+	um_set_signals_trace(enabled);
 }
 
-static void real_alarm_handler(mcontext_t *mc)
+static void timer_real_alarm_handler(mcontext_t *mc)
 {
 	struct uml_pt_regs regs;
 
 	if (mc != NULL)
 		get_regs_from_mc(&regs, mc);
-	regs.is_user = 0;
-	unblock_signals();
-	timer_handler(SIGVTALRM, NULL, &regs);
+	else
+		memset(&regs, 0, sizeof(regs));
+	timer_handler(SIGALRM, NULL, &regs);
 }
 
-void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
+static void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
 {
 	int enabled;
 
 	enabled = signals_enabled;
 	if (!signals_enabled) {
-		signals_pending |= SIGVTALRM_MASK;
+		signals_pending |= SIGALRM_MASK;
 		return;
 	}
 
-	block_signals();
+	block_signals_trace();
+
+	signals_active |= SIGALRM_MASK;
+
+	timer_real_alarm_handler(mc);
 
-	real_alarm_handler(mc);
-	set_signals(enabled);
+	signals_active &= ~SIGALRM_MASK;
+
+	um_set_signals_trace(enabled);
+}
+
+void deliver_alarm(void) {
+    timer_alarm_handler(SIGALRM, NULL, NULL);
 }
 
-void timer_init(void)
+void timer_set_signal_handler(void)
 {
-	set_handler(SIGVTALRM);
+	set_handler(SIGALRM);
+}
+
+int timer_alarm_pending(void)
+{
+	return !!(signals_pending & SIGALRM_MASK);
 }
 
 void set_sigstack(void *sig_stack, int size)
 {
-	stack_t stack = ((stack_t) { .ss_flags	= 0,
-				     .ss_sp	= (__ptr_t) sig_stack,
-				     .ss_size 	= size - sizeof(void *) });
+	stack_t stack = {
+		.ss_flags = 0,
+		.ss_sp = sig_stack,
+		.ss_size = size
+	};
 
 	if (sigaltstack(&stack, NULL) != 0)
 		panic("enabling signal stack failed, errno = %d\n", errno);
 }
 
-static void (*handlers[_NSIG])(int sig, siginfo_t *si, mcontext_t *mc) = {
+static void sigusr1_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
+{
+	uml_pm_wake();
+}
+
+void register_pm_wake_signal(void)
+{
+	set_handler(SIGUSR1);
+}
+
+static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
 	[SIGSEGV] = sig_handler,
 	[SIGBUS] = sig_handler,
 	[SIGILL] = sig_handler,
@@ -129,51 +196,19 @@ static void (*handlers[_NSIG])(int sig, siginfo_t *si, mcontext_t *mc) = {
 
 	[SIGIO] = sig_handler,
 	[SIGWINCH] = sig_handler,
-	[SIGVTALRM] = alarm_handler
-};
+	/* SIGCHLD is only actually registered in seccomp mode. */
+	[SIGCHLD] = sig_handler,
+	[SIGALRM] = timer_alarm_handler,
 
+	[SIGUSR1] = sigusr1_handler,
+};
 
 static void hard_handler(int sig, siginfo_t *si, void *p)
 {
-	struct ucontext *uc = p;
+	ucontext_t *uc = p;
 	mcontext_t *mc = &uc->uc_mcontext;
-	unsigned long pending = 1UL << sig;
 
-	do {
-		int nested, bail;
-
-		/*
-		 * pending comes back with one bit set for each
-		 * interrupt that arrived while setting up the stack,
-		 * plus a bit for this interrupt, plus the zero bit is
-		 * set if this is a nested interrupt.
-		 * If bail is true, then we interrupted another
-		 * handler setting up the stack.  In this case, we
-		 * have to return, and the upper handler will deal
-		 * with this interrupt.
-		 */
-		bail = to_irq_stack(&pending);
-		if (bail)
-			return;
-
-		nested = pending & 1;
-		pending &= ~1;
-
-		while ((sig = ffs(pending)) != 0){
-			sig--;
-			pending &= ~(1 << sig);
-			(*handlers[sig])(sig, si, mc);
-		}
-
-		/*
-		 * Again, pending comes back with a mask of signals
-		 * that arrived while tearing down the stack.  If this
-		 * is non-zero, we just go back, set up the stack
-		 * again, and handle the new interrupts.
-		 */
-		if (!nested)
-			pending = from_irq_stack(nested);
-	} while (pending);
+	(*handlers[sig])(sig, (struct siginfo *)si, mc);
 }
 
 void set_handler(int sig)
@@ -186,9 +221,9 @@ void set_handler(int sig)
 
 	/* block irq ones */
 	sigemptyset(&action.sa_mask);
-	sigaddset(&action.sa_mask, SIGVTALRM);
 	sigaddset(&action.sa_mask, SIGIO);
 	sigaddset(&action.sa_mask, SIGWINCH);
+	sigaddset(&action.sa_mask, SIGALRM);
 
 	if (sig == SIGSEGV)
 		flags |= SA_NODEFER;
@@ -207,6 +242,11 @@ void set_handler(int sig)
 		panic("sigprocmask failed - errno = %d\n", errno);
 }
 
+void send_sigio_to_self(void)
+{
+	kill(os_getpid(), SIGIO);
+}
+
 int change_sig(int signal, int on)
 {
 	sigset_t sigset;
@@ -219,9 +259,29 @@ int change_sig(int signal, int on)
 	return 0;
 }
 
-void block_signals(void)
+static inline void __block_signals(void)
 {
+	if (!signals_enabled)
+		return;
+
+	os_local_ipi_disable();
+	barrier();
 	signals_enabled = 0;
+}
+
+static inline void __unblock_signals(void)
+{
+	if (signals_enabled)
+		return;
+
+	signals_enabled = 1;
+	barrier();
+	os_local_ipi_enable();
+}
+
+void block_signals(void)
+{
+	__block_signals();
 	/*
 	 * This must return with signals disabled, so this barrier
 	 * ensures that writes are flushed out before the return.
@@ -238,6 +298,12 @@ void unblock_signals(void)
 	if (signals_enabled == 1)
 		return;
 
+	__unblock_signals();
+
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
+	deliver_time_travel_irqs();
+#endif
+
 	/*
 	 * We loop because the IRQ handler returns with interrupts off.  So,
 	 * interrupts may have arrived and we need to re-enable them and
@@ -247,12 +313,9 @@ void unblock_signals(void)
 		/*
 		 * Save and reset save_pending after enabling signals.  This
 		 * way, signals_pending won't be changed while we're reading it.
-		 */
-		signals_enabled = 1;
-
-		/*
+		 *
 		 * Setting signals_enabled and reading signals_pending must
-		 * happen in this order.
+		 * happen in this order, so have the barrier here.
 		 */
 		barrier();
 
@@ -265,10 +328,13 @@ void unblock_signals(void)
 		/*
 		 * We have pending interrupts, so disable signals, as the
 		 * handlers expect them off when they are called.  They will
-		 * be enabled again above.
+		 * be enabled again above. We need to trace this, as we're
+		 * expected to be enabling interrupts already, but any more
+		 * tracing that happens inside the handlers we call for the
+		 * pending signals will mess up the tracing state.
 		 */
-
-		signals_enabled = 0;
+		__block_signals();
+		um_trace_signals_off();
 
 		/*
 		 * Deal with SIGIO first because the alarm handler might
@@ -281,17 +347,34 @@ void unblock_signals(void)
 		if (save_pending & SIGIO_MASK)
 			sig_handler_common(SIGIO, NULL, NULL);
 
-		if (save_pending & SIGVTALRM_MASK)
-			real_alarm_handler(NULL);
+		if (save_pending & SIGCHLD_MASK) {
+			struct uml_pt_regs regs = {};
+
+			sigchld_handler(SIGCHLD, NULL, &regs, NULL);
+		}
+
+		/* Do not reenter the handler */
+
+		if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK)))
+			timer_real_alarm_handler(NULL);
+
+		/* Rerun the loop only if there is still pending SIGIO and not in TIMER handler */
+
+		if (!(signals_pending & SIGIO_MASK) && (signals_active & SIGALRM_MASK))
+			return;
+
+		/* Re-enable signals and trace that we're doing so. */
+		um_trace_signals_on();
+		__unblock_signals();
 	}
 }
 
-int get_signals(void)
+int um_get_signals(void)
 {
 	return signals_enabled;
 }
 
-int set_signals(int enable)
+int um_set_signals(int enable)
 {
 	int ret;
 	if (signals_enabled == enable)
@@ -304,3 +387,117 @@ int set_signals(int enable)
 
 	return ret;
 }
+
+int um_set_signals_trace(int enable)
+{
+	int ret;
+	if (signals_enabled == enable)
+		return enable;
+
+	ret = signals_enabled;
+	if (enable)
+		unblock_signals_trace();
+	else
+		block_signals_trace();
+
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
+void mark_sigio_pending(void)
+{
+	/*
+	 * It would seem that this should be atomic so
+	 * it isn't a read-modify-write with a signal
+	 * that could happen in the middle, losing the
+	 * value set by the signal.
+	 *
+	 * However, this function is only called when in
+	 * time-travel=ext simulation mode, in which case
+	 * the only signal ever pending is SIGIO, which
+	 * is blocked while this can be called, and the
+	 * timer signal (SIGALRM) cannot happen.
+	 */
+	signals_pending |= SIGIO_MASK;
+}
+
+void block_signals_hard(void)
+{
+	signals_blocked++;
+	barrier();
+}
+
+void unblock_signals_hard(void)
+{
+	static bool unblocking;
+
+	if (!signals_blocked)
+		panic("unblocking signals while not blocked");
+
+	if (--signals_blocked)
+		return;
+	/*
+	 * Must be set to 0 before we check pending so the
+	 * SIGIO handler will run as normal unless we're still
+	 * going to process signals_blocked_pending.
+	 */
+	barrier();
+
+	/*
+	 * Note that block_signals_hard()/unblock_signals_hard() can be called
+	 * within the unblock_signals()/sigio_run_timetravel_handlers() below.
+	 * This would still be prone to race conditions since it's actually a
+	 * call _within_ e.g. vu_req_read_message(), where we observed this
+	 * issue, which loops. Thus, if the inner call handles the recorded
+	 * pending signals, we can get out of the inner call with the real
+	 * signal hander no longer blocked, and still have a race. Thus don't
+	 * handle unblocking in the inner call, if it happens, but only in
+	 * the outermost call - 'unblocking' serves as an ownership for the
+	 * signals_blocked_pending decrement.
+	 */
+	if (unblocking)
+		return;
+	unblocking = true;
+
+	while (__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) {
+		if (signals_enabled) {
+			/* signals are enabled so we can touch this */
+			signals_pending |= SIGIO_MASK;
+			/*
+			 * this is a bit inefficient, but that's
+			 * not really important
+			 */
+			block_signals();
+			unblock_signals();
+		} else {
+			/*
+			 * we need to run time-travel handlers even
+			 * if not enabled
+			 */
+			sigio_run_timetravel_handlers();
+		}
+
+		/*
+		 * The decrement of signals_blocked_pending must be atomic so
+		 * that the signal handler will either happen before or after
+		 * the decrement, not during a read-modify-write:
+		 *  - If it happens before, it can increment it and we'll
+		 *    decrement it and do another round in the loop.
+		 *  - If it happens after it'll see 0 for both signals_blocked
+		 *    and signals_blocked_pending and thus run the handler as
+		 *    usual (subject to signals_enabled, but that's unrelated.)
+		 *
+		 * Note that a call to unblock_signals_hard() within the calls
+		 * to unblock_signals() or sigio_run_timetravel_handlers() above
+		 * will do nothing due to the 'unblocking' state, so this cannot
+		 * underflow as the only one decrementing will be the outermost
+		 * one.
+		 */
+		if (__atomic_sub_fetch(&signals_blocked_pending, 1,
+				       __ATOMIC_SEQ_CST) < 0)
+			panic("signals_blocked_pending underflow");
+	}
+
+	unblocking = false;
+}
+#endif
diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile
index d2ea3409e072..75f11989d2e9 100644
--- a/arch/um/os-Linux/skas/Makefile
+++ b/arch/um/os-Linux/skas/Makefile
@@ -1,10 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
-# Licensed under the GPL
 #
 
 obj-y := mem.o process.o
 
 USER_OBJS := $(obj-y)
 
-include arch/um/scripts/Makefile.rules
+include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 689b18db798f..8b9921ac3ef8 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
@@ -12,16 +13,47 @@
 #include <as-layout.h>
 #include <mm_id.h>
 #include <os.h>
-#include <proc_mm.h>
 #include <ptrace_user.h>
 #include <registers.h>
 #include <skas.h>
 #include <sysdep/ptrace.h>
 #include <sysdep/stub.h>
+#include "../internal.h"
 
-extern unsigned long batch_syscall_stub, __syscall_stub_start;
+extern char __syscall_stub_start[];
 
-extern void wait_stub_done(int pid);
+void syscall_stub_dump_error(struct mm_id *mm_idp)
+{
+	struct stub_data *proc_data = (void *)mm_idp->stack;
+	struct stub_syscall *sc;
+
+	if (proc_data->syscall_data_len < 0 ||
+	    proc_data->syscall_data_len >= ARRAY_SIZE(proc_data->syscall_data))
+		panic("Syscall data was corrupted by stub (len is: %d, expected maximum: %d)!",
+			proc_data->syscall_data_len,
+			mm_idp->syscall_data_len);
+
+	sc = &proc_data->syscall_data[proc_data->syscall_data_len];
+
+	printk(UM_KERN_ERR "%s : length = %d, last offset = %d",
+		__func__, mm_idp->syscall_data_len,
+		proc_data->syscall_data_len);
+	printk(UM_KERN_ERR "%s : stub syscall type %d failed, return value = 0x%lx\n",
+		__func__, sc->syscall, proc_data->err);
+
+	print_hex_dump(UM_KERN_ERR, "    syscall data: ", 0,
+		       16, 4, sc, sizeof(*sc), 0);
+
+	if (using_seccomp) {
+		printk(UM_KERN_ERR "%s: FD map num: %d", __func__,
+		       mm_idp->syscall_fd_num);
+		print_hex_dump(UM_KERN_ERR,
+				"    FD map: ", 0, 16,
+				sizeof(mm_idp->syscall_fd_map[0]),
+				mm_idp->syscall_fd_map,
+				sizeof(mm_idp->syscall_fd_map), 0);
+	}
+}
 
 static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
 					      unsigned long *stack)
@@ -38,246 +70,215 @@ static unsigned long syscall_regs[MAX_REG_NR];
 static int __init init_syscall_regs(void)
 {
 	get_safe_registers(syscall_regs, NULL);
+
 	syscall_regs[REGS_IP_INDEX] = STUB_CODE +
-		((unsigned long) &batch_syscall_stub -
-		 (unsigned long) &__syscall_stub_start);
+		((unsigned long) stub_syscall_handler -
+		 (unsigned long) __syscall_stub_start);
+	syscall_regs[REGS_SP_INDEX] = STUB_DATA +
+		offsetof(struct stub_data, sigstack) +
+		sizeof(((struct stub_data *) 0)->sigstack) -
+		sizeof(void *);
+
 	return 0;
 }
 
 __initcall(init_syscall_regs);
 
-extern int proc_mm;
-
-static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
+static inline long do_syscall_stub(struct mm_id *mm_idp)
 {
+	struct stub_data *proc_data = (void *)mm_idp->stack;
 	int n, i;
-	long ret, offset;
-	unsigned long * data;
-	unsigned long * syscall;
-	int err, pid = mm_idp->u.pid;
-
-	if (proc_mm)
-		/* FIXME: Need to look up userspace_pid by cpu */
-		pid = userspace_pid[0];
-
-	n = ptrace_setregs(pid, syscall_regs);
-	if (n < 0) {
-		printk(UM_KERN_ERR "Registers - \n");
-		for (i = 0; i < MAX_REG_NR; i++)
-			printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
-		panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n",
-		      -n);
-	}
+	int err, pid = mm_idp->pid;
+
+	/* Inform process how much we have filled in. */
+	proc_data->syscall_data_len = mm_idp->syscall_data_len;
+
+	if (using_seccomp) {
+		proc_data->restart_wait = 1;
+		wait_stub_done_seccomp(mm_idp, 0, 1);
+	} else {
+		n = ptrace_setregs(pid, syscall_regs);
+		if (n < 0) {
+			printk(UM_KERN_ERR "Registers -\n");
+			for (i = 0; i < MAX_REG_NR; i++)
+				printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
+			panic("%s : PTRACE_SETREGS failed, errno = %d\n",
+			      __func__, -n);
+		}
 
-	err = ptrace(PTRACE_CONT, pid, 0, 0);
-	if (err)
-		panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
-		      errno);
+		err = ptrace(PTRACE_CONT, pid, 0, 0);
+		if (err)
+			panic("Failed to continue stub, pid = %d, errno = %d\n",
+			      pid, errno);
 
-	wait_stub_done(pid);
+		wait_stub_done(pid);
+	}
 
 	/*
-	 * When the stub stops, we find the following values on the
-	 * beginning of the stack:
-	 * (long )return_value
-	 * (long )offset to failed sycall-data (0, if no error)
+	 * proc_data->err will be negative if there was an (unexpected) error.
+	 * In that case, syscall_data_len points to the last executed syscall,
+	 * otherwise it will be zero (but we do not need to rely on that).
 	 */
-	ret = *((unsigned long *) mm_idp->stack);
-	offset = *((unsigned long *) mm_idp->stack + 1);
-	if (offset) {
-		data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA);
-		printk(UM_KERN_ERR "do_syscall_stub : ret = %ld, offset = %ld, "
-		       "data = %p\n", ret, offset, data);
-		syscall = (unsigned long *)((unsigned long)data + data[0]);
-		printk(UM_KERN_ERR "do_syscall_stub: syscall %ld failed, "
-		       "return value = 0x%lx, expected return value = 0x%lx\n",
-		       syscall[0], ret, syscall[7]);
-		printk(UM_KERN_ERR "    syscall parameters: "
-		       "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
-		       syscall[1], syscall[2], syscall[3],
-		       syscall[4], syscall[5], syscall[6]);
-		for (n = 1; n < data[0]/sizeof(long); n++) {
-			if (n == 1)
-				printk(UM_KERN_ERR "    additional syscall "
-				       "data:");
-			if (n % 4 == 1)
-				printk("\n" UM_KERN_ERR "      ");
-			printk("  0x%lx", data[n]);
-		}
-		if (n > 1)
-			printk("\n");
+	if (proc_data->err < 0) {
+		syscall_stub_dump_error(mm_idp);
+
+		/* Store error code in case someone tries to add more syscalls */
+		mm_idp->syscall_data_len = proc_data->err;
+	} else {
+		mm_idp->syscall_data_len = 0;
 	}
-	else ret = 0;
 
-	*addr = check_init_stack(mm_idp, NULL);
+	if (using_seccomp)
+		mm_idp->syscall_fd_num = 0;
 
-	return ret;
+	return mm_idp->syscall_data_len;
 }
 
-long run_syscall_stub(struct mm_id * mm_idp, int syscall,
-		      unsigned long *args, long expected, void **addr,
-		      int done)
+int syscall_stub_flush(struct mm_id *mm_idp)
 {
-	unsigned long *stack = check_init_stack(mm_idp, *addr);
-
-	*stack += sizeof(long);
-	stack += *stack / sizeof(long);
-
-	*stack++ = syscall;
-	*stack++ = args[0];
-	*stack++ = args[1];
-	*stack++ = args[2];
-	*stack++ = args[3];
-	*stack++ = args[4];
-	*stack++ = args[5];
-	*stack++ = expected;
-	*stack = 0;
-
-	if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) <
-		     UM_KERN_PAGE_SIZE - 10 * sizeof(long))) {
-		*addr = stack;
+	int res;
+
+	if (mm_idp->syscall_data_len == 0)
 		return 0;
+
+	/* If an error happened already, report it and reset the state. */
+	if (mm_idp->syscall_data_len < 0) {
+		res = mm_idp->syscall_data_len;
+		mm_idp->syscall_data_len = 0;
+		return res;
 	}
 
-	return do_syscall_stub(mm_idp, addr);
+	res = do_syscall_stub(mm_idp);
+	mm_idp->syscall_data_len = 0;
+
+	return res;
 }
 
-long syscall_stub_data(struct mm_id * mm_idp,
-		       unsigned long *data, int data_count,
-		       void **addr, void **stub_addr)
+struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp)
 {
-	unsigned long *stack;
-	int ret = 0;
-
-	/*
-	 * If *addr still is uninitialized, it *must* contain NULL.
-	 * Thus in this case do_syscall_stub correctly won't be called.
-	 */
-	if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >=
-	   UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) {
-		ret = do_syscall_stub(mm_idp, addr);
-		/* in case of error, don't overwrite data on stack */
-		if (ret)
-			return ret;
+	struct stub_syscall *sc;
+	struct stub_data *proc_data = (struct stub_data *) mm_idp->stack;
+
+	if (mm_idp->syscall_data_len > 0 &&
+	    mm_idp->syscall_data_len == ARRAY_SIZE(proc_data->syscall_data))
+		do_syscall_stub(mm_idp);
+
+	if (mm_idp->syscall_data_len < 0) {
+		/* Return dummy to retain error state. */
+		sc = &proc_data->syscall_data[0];
+	} else {
+		sc = &proc_data->syscall_data[mm_idp->syscall_data_len];
+		mm_idp->syscall_data_len += 1;
 	}
+	memset(sc, 0, sizeof(*sc));
 
-	stack = check_init_stack(mm_idp, *addr);
-	*addr = stack;
+	return sc;
+}
 
-	*stack = data_count * sizeof(long);
+static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp,
+						      int syscall_type,
+						      unsigned long virt)
+{
+	if (mm_idp->syscall_data_len > 0) {
+		struct stub_data *proc_data = (void *) mm_idp->stack;
+		struct stub_syscall *sc;
 
-	memcpy(stack + 1, data, data_count * sizeof(long));
+		sc = &proc_data->syscall_data[mm_idp->syscall_data_len - 1];
 
-	*stub_addr = (void *)(((unsigned long)(stack + 1) &
-			       ~UM_KERN_PAGE_MASK) + STUB_DATA);
+		if (sc->syscall == syscall_type &&
+		    sc->mem.addr + sc->mem.length == virt)
+			return sc;
+	}
 
-	return 0;
+	return NULL;
 }
 
-int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot,
-	int phys_fd, unsigned long long offset, int done, void **data)
+static int get_stub_fd(struct mm_id *mm_idp, int fd)
 {
-	int ret;
-
-	if (proc_mm) {
-		struct proc_mm_op map;
-		int fd = mm_idp->u.mm_fd;
-
-		map = ((struct proc_mm_op) { .op	= MM_MMAP,
-				       .u		=
-				       { .mmap	=
-					 { .addr	= virt,
-					   .len	= len,
-					   .prot	= prot,
-					   .flags	= MAP_SHARED |
-					   MAP_FIXED,
-					   .fd	= phys_fd,
-					   .offset= offset
-					 } } } );
-		CATCH_EINTR(ret = write(fd, &map, sizeof(map)));
-		if (ret != sizeof(map)) {
-			ret = -errno;
-			printk(UM_KERN_ERR "map : /proc/mm map failed, "
-			       "err = %d\n", -ret);
+	int i;
+
+	/* Find an FD slot (or flush and use first) */
+	if (!using_seccomp)
+		return fd;
+
+	/* Already crashed, value does not matter */
+	if (mm_idp->syscall_data_len < 0)
+		return 0;
+
+	/* Find existing FD in map if we can allocate another syscall */
+	if (mm_idp->syscall_data_len <
+	    ARRAY_SIZE(((struct stub_data *)NULL)->syscall_data)) {
+		for (i = 0; i < mm_idp->syscall_fd_num; i++) {
+			if (mm_idp->syscall_fd_map[i] == fd)
+				return i;
 		}
-		else ret = 0;
-	}
-	else {
-		unsigned long args[] = { virt, len, prot,
-					 MAP_SHARED | MAP_FIXED, phys_fd,
-					 MMAP_OFFSET(offset) };
 
-		ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt,
-				       data, done);
+		if (mm_idp->syscall_fd_num < STUB_MAX_FDS) {
+			i = mm_idp->syscall_fd_num;
+			mm_idp->syscall_fd_map[i] = fd;
+
+			mm_idp->syscall_fd_num++;
+
+			return i;
+		}
 	}
 
-	return ret;
+	/* FD map full or no syscall space available, continue after flush */
+	do_syscall_stub(mm_idp);
+	mm_idp->syscall_fd_map[0] = fd;
+	mm_idp->syscall_fd_num = 1;
+
+	return 0;
 }
 
-int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
-	  int done, void **data)
+int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
+	int phys_fd, unsigned long long offset)
 {
-	int ret;
-
-	if (proc_mm) {
-		struct proc_mm_op unmap;
-		int fd = mm_idp->u.mm_fd;
-
-		unmap = ((struct proc_mm_op) { .op	= MM_MUNMAP,
-					 .u	=
-					 { .munmap	=
-					   { .addr	=
-					     (unsigned long) addr,
-					     .len		= len } } } );
-		CATCH_EINTR(ret = write(fd, &unmap, sizeof(unmap)));
-		if (ret != sizeof(unmap)) {
-			ret = -errno;
-			printk(UM_KERN_ERR "unmap - proc_mm write returned "
-			       "%d\n", ret);
+	struct stub_syscall *sc;
+
+	/* Compress with previous syscall if that is possible */
+	sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt);
+	if (sc && sc->mem.prot == prot &&
+	    sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) {
+		int prev_fd = sc->mem.fd;
+
+		if (using_seccomp)
+			prev_fd = mm_idp->syscall_fd_map[sc->mem.fd];
+
+		if (phys_fd == prev_fd) {
+			sc->mem.length += len;
+			return 0;
 		}
-		else ret = 0;
 	}
-	else {
-		unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
-					 0 };
 
-		ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0,
-				       data, done);
-	}
+	phys_fd = get_stub_fd(mm_idp, phys_fd);
 
-	return ret;
+	sc = syscall_stub_alloc(mm_idp);
+	sc->syscall = STUB_SYSCALL_MMAP;
+	sc->mem.addr = virt;
+	sc->mem.length = len;
+	sc->mem.prot = prot;
+	sc->mem.fd = phys_fd;
+	sc->mem.offset = MMAP_OFFSET(offset);
+
+	return 0;
 }
 
-int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
-	    unsigned int prot, int done, void **data)
+int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len)
 {
-	struct proc_mm_op protect;
-	int ret;
-
-	if (proc_mm) {
-		int fd = mm_idp->u.mm_fd;
-
-		protect = ((struct proc_mm_op) { .op	= MM_MPROTECT,
-					   .u	=
-					   { .mprotect	=
-					     { .addr	=
-					       (unsigned long) addr,
-					       .len	= len,
-					       .prot	= prot } } } );
-
-		CATCH_EINTR(ret = write(fd, &protect, sizeof(protect)));
-		if (ret != sizeof(protect)) {
-			ret = -errno;
-			printk(UM_KERN_ERR "protect failed, err = %d", -ret);
-		}
-		else ret = 0;
-	}
-	else {
-		unsigned long args[] = { addr, len, prot, 0, 0, 0 };
+	struct stub_syscall *sc;
 
-		ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
-				       data, done);
+	/* Compress with previous syscall if that is possible */
+	sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MUNMAP, addr);
+	if (sc) {
+		sc->mem.length += len;
+		return 0;
 	}
 
-	return ret;
+	sc = syscall_stub_alloc(mm_idp);
+	sc->syscall = STUB_SYSCALL_MUNMAP;
+	sc->mem.addr = addr;
+	sc->mem.length = len;
+
+	return 0;
 }
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 4625949bf1e4..d6c22f8aa06d 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -1,33 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
+#include <stdbool.h>
 #include <unistd.h>
 #include <sched.h>
 #include <errno.h>
 #include <string.h>
+#include <fcntl.h>
+#include <mem_user.h>
 #include <sys/mman.h>
 #include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
 #include <asm/unistd.h>
 #include <as-layout.h>
 #include <init.h>
 #include <kern_util.h>
 #include <mem.h>
 #include <os.h>
-#include <proc_mm.h>
 #include <ptrace_user.h>
 #include <registers.h>
 #include <skas.h>
-#include <skas_ptrace.h>
 #include <sysdep/stub.h>
+#include <sysdep/mcontext.h>
+#include <linux/futex.h>
+#include <linux/threads.h>
+#include <timetravel.h>
+#include <asm-generic/rwonce.h>
+#include "../internal.h"
 
 int is_skas_winch(int pid, int fd, void *data)
 {
 	return pid == getpgrp();
 }
 
+static const char *ptrace_reg_name(int idx)
+{
+#define R(n) case HOST_##n: return #n
+
+	switch (idx) {
+#ifdef __x86_64__
+	R(BX);
+	R(CX);
+	R(DI);
+	R(SI);
+	R(DX);
+	R(BP);
+	R(AX);
+	R(R8);
+	R(R9);
+	R(R10);
+	R(R11);
+	R(R12);
+	R(R13);
+	R(R14);
+	R(R15);
+	R(ORIG_AX);
+	R(CS);
+	R(SS);
+	R(EFLAGS);
+#elif defined(__i386__)
+	R(IP);
+	R(SP);
+	R(EFLAGS);
+	R(AX);
+	R(BX);
+	R(CX);
+	R(DX);
+	R(SI);
+	R(DI);
+	R(BP);
+	R(CS);
+	R(SS);
+	R(DS);
+	R(FS);
+	R(ES);
+	R(GS);
+	R(ORIG_AX);
+#endif
+	}
+	return "";
+}
+
 static int ptrace_dump_regs(int pid)
 {
 	unsigned long regs[MAX_REG_NR];
@@ -37,8 +96,11 @@ static int ptrace_dump_regs(int pid)
 		return -errno;
 
 	printk(UM_KERN_ERR "Stub registers -\n");
-	for (i = 0; i < ARRAY_SIZE(regs); i++)
-		printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]);
+	for (i = 0; i < ARRAY_SIZE(regs); i++) {
+		const char *regname = ptrace_reg_name(i);
+
+		printk(UM_KERN_ERR "\t%s\t(%2d): %lx\n", regname, i, regs[i]);
+	}
 
 	return 0;
 }
@@ -47,7 +109,7 @@ static int ptrace_dump_regs(int pid)
  * Signals that are OK to receive in the stub - we'll just continue it.
  * SIGWINCH will happen when UML is inside a detached screen.
  */
-#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH))
+#define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH))
 
 /* Signals that the stub will finish with - anything else is an error */
 #define STUB_DONE_MASK (1 << SIGTRAP)
@@ -66,8 +128,8 @@ void wait_stub_done(int pid)
 
 		err = ptrace(PTRACE_CONT, pid, 0, 0);
 		if (err) {
-			printk(UM_KERN_ERR "wait_stub_done : continue failed, "
-			       "errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s : continue failed, errno = %d\n",
+			       __func__, errno);
 			fatal_sigsegv();
 		}
 	}
@@ -78,384 +140,647 @@ void wait_stub_done(int pid)
 bad_wait:
 	err = ptrace_dump_regs(pid);
 	if (err)
-		printk(UM_KERN_ERR "Failed to get registers from stub, "
-		       "errno = %d\n", -err);
-	printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, "
-	       "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno,
-	       status);
+		printk(UM_KERN_ERR "Failed to get registers from stub, errno = %d\n",
+		       -err);
+	printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, n = %d, errno = %d, status = 0x%x\n",
+	       __func__, pid, n, errno, status);
 	fatal_sigsegv();
 }
 
-extern unsigned long current_stub_stack(void);
-
-static void get_skas_faultinfo(int pid, struct faultinfo *fi)
+void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys)
 {
-	int err;
+	struct stub_data *data = (void *)mm_idp->stack;
+	int ret;
 
-	if (ptrace_faultinfo) {
-		err = ptrace(PTRACE_FAULTINFO, pid, 0, fi);
-		if (err) {
-			printk(UM_KERN_ERR "get_skas_faultinfo - "
-			       "PTRACE_FAULTINFO failed, errno = %d\n", errno);
-			fatal_sigsegv();
+	do {
+		const char byte = 0;
+		struct iovec iov = {
+			.iov_base = (void *)&byte,
+			.iov_len = sizeof(byte),
+		};
+		union {
+			char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))];
+			struct cmsghdr align;
+		} ctrl;
+		struct msghdr msgh = {
+			.msg_iov = &iov,
+			.msg_iovlen = 1,
+		};
+
+		if (!running) {
+			if (mm_idp->syscall_fd_num) {
+				unsigned int fds_size =
+					sizeof(int) * mm_idp->syscall_fd_num;
+				struct cmsghdr *cmsg;
+
+				msgh.msg_control = ctrl.data;
+				msgh.msg_controllen = CMSG_SPACE(fds_size);
+				cmsg = CMSG_FIRSTHDR(&msgh);
+				cmsg->cmsg_level = SOL_SOCKET;
+				cmsg->cmsg_type = SCM_RIGHTS;
+				cmsg->cmsg_len = CMSG_LEN(fds_size);
+				memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map,
+				       fds_size);
+
+				CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock,
+						&msgh, 0));
+			}
+
+			data->signal = 0;
+			data->futex = FUTEX_IN_CHILD;
+			CATCH_EINTR(syscall(__NR_futex, &data->futex,
+					    FUTEX_WAKE, 1, NULL, NULL, 0));
 		}
 
-		/* Special handling for i386, which has different structs */
-		if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo))
-			memset((char *)fi + sizeof(struct ptrace_faultinfo), 0,
-			       sizeof(struct faultinfo) -
-			       sizeof(struct ptrace_faultinfo));
-	}
-	else {
-		unsigned long fpregs[FP_SIZE];
+		do {
+			/*
+			 * We need to check whether the child is still alive
+			 * before and after the FUTEX_WAIT call. Before, in
+			 * case it just died but we still updated data->futex
+			 * to FUTEX_IN_CHILD. And after, in case it died while
+			 * we were waiting (and SIGCHLD woke us up, see the
+			 * IRQ handler in mmu.c).
+			 *
+			 * Either way, if PID is negative, then we have no
+			 * choice but to kill the task.
+			 */
+			if (__READ_ONCE(mm_idp->pid) < 0)
+				goto out_kill;
+
+			ret = syscall(__NR_futex, &data->futex,
+				      FUTEX_WAIT, FUTEX_IN_CHILD,
+				      NULL, NULL, 0);
+			if (ret < 0 && errno != EINTR && errno != EAGAIN) {
+				printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n",
+				       __func__, errno);
+				goto out_kill;
+			}
+		} while (data->futex == FUTEX_IN_CHILD);
 
-		err = get_fp_registers(pid, fpregs);
-		if (err < 0) {
-			printk(UM_KERN_ERR "save_fp_registers returned %d\n",
-			       err);
-			fatal_sigsegv();
-		}
-		err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV);
-		if (err) {
-			printk(UM_KERN_ERR "Failed to continue stub, pid = %d, "
-			       "errno = %d\n", pid, errno);
-			fatal_sigsegv();
-		}
-		wait_stub_done(pid);
+		if (__READ_ONCE(mm_idp->pid) < 0)
+			goto out_kill;
 
-		/*
-		 * faultinfo is prepared by the stub-segv-handler at start of
-		 * the stub stack page. We just have to copy it.
-		 */
-		memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
+		running = 0;
 
-		err = put_fp_registers(pid, fpregs);
-		if (err < 0) {
-			printk(UM_KERN_ERR "put_fp_registers returned %d\n",
-			       err);
-			fatal_sigsegv();
-		}
+		/* We may receive a SIGALRM before SIGSYS, iterate again. */
+	} while (wait_sigsys && data->signal == SIGALRM);
+
+	if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) {
+		printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__);
+		goto out_kill;
 	}
+
+	if (wait_sigsys && data->signal != SIGSYS) {
+		printk(UM_KERN_ERR "%s : expected SIGSYS but got %d",
+		       __func__, data->signal);
+		goto out_kill;
+	}
+
+	return;
+
+out_kill:
+	printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n",
+	       __func__, mm_idp->pid, errno);
+	/* This is not true inside start_userspace */
+	if (current_mm_id() == mm_idp)
+		fatal_sigsegv();
 }
 
-static void handle_segv(int pid, struct uml_pt_regs * regs)
+extern unsigned long current_stub_stack(void);
+
+static void get_skas_faultinfo(int pid, struct faultinfo *fi)
 {
-	get_skas_faultinfo(pid, &regs->faultinfo);
-	segv(regs->faultinfo, 0, 1, NULL);
+	int err;
+
+	err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV);
+	if (err) {
+		printk(UM_KERN_ERR "Failed to continue stub, pid = %d, "
+		       "errno = %d\n", pid, errno);
+		fatal_sigsegv();
+	}
+	wait_stub_done(pid);
+
+	/*
+	 * faultinfo is prepared by the stub_segv_handler at start of
+	 * the stub stack page. We just have to copy it.
+	 */
+	memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
 }
 
-/*
- * To use the same value of using_sysemu as the caller, ask it that value
- * (in local_using_sysemu
- */
-static void handle_trap(int pid, struct uml_pt_regs *regs,
-			int local_using_sysemu)
+static void handle_trap(struct uml_pt_regs *regs)
 {
-	int err, status;
-
 	if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
 		fatal_sigsegv();
 
-	/* Mark this as a syscall */
-	UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp);
+	handle_syscall(regs);
+}
 
-	if (!local_using_sysemu)
-	{
-		err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
-			     __NR_getpid);
-		if (err < 0) {
-			printk(UM_KERN_ERR "handle_trap - nullifying syscall "
-			       "failed, errno = %d\n", errno);
-			fatal_sigsegv();
-		}
+extern char __syscall_stub_start[];
 
-		err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
-		if (err < 0) {
-			printk(UM_KERN_ERR "handle_trap - continuing to end of "
-			       "syscall failed, errno = %d\n", errno);
-			fatal_sigsegv();
-		}
+static int stub_exe_fd;
 
-		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
-		if ((err < 0) || !WIFSTOPPED(status) ||
-		    (WSTOPSIG(status) != SIGTRAP + 0x80)) {
-			err = ptrace_dump_regs(pid);
-			if (err)
-				printk(UM_KERN_ERR "Failed to get registers "
-				       "from process, errno = %d\n", -err);
-			printk(UM_KERN_ERR "handle_trap - failed to wait at "
-			       "end of syscall, errno = %d, status = %d\n",
-			       errno, status);
-			fatal_sigsegv();
-		}
+struct tramp_data {
+	struct stub_data *stub_data;
+	/* 0 is inherited, 1 is the kernel side */
+	int sockpair[2];
+};
+
+#ifndef CLOSE_RANGE_CLOEXEC
+#define CLOSE_RANGE_CLOEXEC	(1U << 2)
+#endif
+
+static int userspace_tramp(void *data)
+{
+	struct tramp_data *tramp_data = data;
+	char *const argv[] = { "uml-userspace", NULL };
+	unsigned long long offset;
+	struct stub_init_data init_data = {
+		.seccomp = using_seccomp,
+		.stub_start = STUB_START,
+	};
+	int ret;
+
+	if (using_seccomp) {
+		init_data.signal_handler = STUB_CODE +
+					   (unsigned long) stub_signal_interrupt -
+					   (unsigned long) __syscall_stub_start;
+		init_data.signal_restorer = STUB_CODE +
+					   (unsigned long) stub_signal_restorer -
+					   (unsigned long) __syscall_stub_start;
+	} else {
+		init_data.signal_handler = STUB_CODE +
+					   (unsigned long) stub_segv_handler -
+					   (unsigned long) __syscall_stub_start;
+		init_data.signal_restorer = 0;
 	}
 
-	handle_syscall(regs);
+	init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start),
+					      &offset);
+	init_data.stub_code_offset = MMAP_OFFSET(offset);
+
+	init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data),
+					      &offset);
+	init_data.stub_data_offset = MMAP_OFFSET(offset);
+
+	/*
+	 * Avoid leaking unneeded FDs to the stub by setting CLOEXEC on all FDs
+	 * and then unsetting it on all memory related FDs.
+	 * This is not strictly necessary from a safety perspective.
+	 */
+	syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC);
+
+	fcntl(init_data.stub_data_fd, F_SETFD, 0);
+
+	/* dup2 signaling FD/socket to STDIN */
+	if (dup2(tramp_data->sockpair[0], 0) < 0)
+		exit(3);
+	close(tramp_data->sockpair[0]);
+
+	/* Write init_data and close write side */
+	ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data));
+	close(tramp_data->sockpair[1]);
+
+	if (ret != sizeof(init_data))
+		exit(4);
+
+	/* Raw execveat for compatibility with older libc versions */
+	syscall(__NR_execveat, stub_exe_fd, (unsigned long)"",
+		(unsigned long)argv, NULL, AT_EMPTY_PATH);
+
+	exit(5);
 }
 
-extern int __syscall_stub_start;
+extern char stub_exe_start[];
+extern char stub_exe_end[];
+
+extern char *tempdir;
 
-static int userspace_tramp(void *stack)
+#define STUB_EXE_NAME_TEMPLATE "/uml-userspace-XXXXXX"
+
+#ifndef MFD_EXEC
+#define MFD_EXEC 0x0010U
+#endif
+
+static int __init init_stub_exe_fd(void)
 {
-	void *addr;
-	int err;
+	size_t written = 0;
+	char *tmpfile = NULL;
 
-	ptrace(PTRACE_TRACEME, 0, 0, 0);
+	stub_exe_fd = memfd_create("uml-userspace",
+				   MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING);
 
-	signal(SIGTERM, SIG_DFL);
-	signal(SIGWINCH, SIG_IGN);
-	err = set_interval();
-	if (err) {
-		printk(UM_KERN_ERR "userspace_tramp - setting timer failed, "
-		       "errno = %d\n", err);
-		exit(1);
+	if (stub_exe_fd < 0) {
+		printk(UM_KERN_INFO "Could not create executable memfd, using temporary file!");
+
+		tmpfile = malloc(strlen(tempdir) +
+				  strlen(STUB_EXE_NAME_TEMPLATE) + 1);
+		if (tmpfile == NULL)
+			panic("Failed to allocate memory for stub binary name");
+
+		strcpy(tmpfile, tempdir);
+		strcat(tmpfile, STUB_EXE_NAME_TEMPLATE);
+
+		stub_exe_fd = mkstemp(tmpfile);
+		if (stub_exe_fd < 0)
+			panic("Could not create temporary file for stub binary: %d",
+			      -errno);
 	}
 
-	if (!proc_mm) {
-		/*
-		 * This has a pte, but it can't be mapped in with the usual
-		 * tlb_flush mechanism because this is part of that mechanism
-		 */
-		int fd;
-		unsigned long long offset;
-		fd = phys_mapping(to_phys(&__syscall_stub_start), &offset);
-		addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
-			      PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
-		if (addr == MAP_FAILED) {
-			printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, "
-			       "errno = %d\n", STUB_CODE, errno);
-			exit(1);
-		}
+	while (written < stub_exe_end - stub_exe_start) {
+		ssize_t res = write(stub_exe_fd, stub_exe_start + written,
+				    stub_exe_end - stub_exe_start - written);
+		if (res < 0) {
+			if (errno == EINTR)
+				continue;
 
-		if (stack != NULL) {
-			fd = phys_mapping(to_phys(stack), &offset);
-			addr = mmap((void *) STUB_DATA,
-				    UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
-				    MAP_FIXED | MAP_SHARED, fd, offset);
-			if (addr == MAP_FAILED) {
-				printk(UM_KERN_ERR "mapping segfault stack "
-				       "at 0x%lx failed, errno = %d\n",
-				       STUB_DATA, errno);
-				exit(1);
-			}
+			if (tmpfile)
+				unlink(tmpfile);
+			panic("Failed write stub binary: %d", -errno);
 		}
+
+		written += res;
 	}
-	if (!ptrace_faultinfo && (stack != NULL)) {
-		struct sigaction sa;
-
-		unsigned long v = STUB_CODE +
-				  (unsigned long) stub_segv_handler -
-				  (unsigned long) &__syscall_stub_start;
-
-		set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
-		sigemptyset(&sa.sa_mask);
-		sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
-		sa.sa_sigaction = (void *) v;
-		sa.sa_restorer = NULL;
-		if (sigaction(SIGSEGV, &sa, NULL) < 0) {
-			printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV "
-			       "handler failed - errno = %d\n", errno);
-			exit(1);
+
+	if (!tmpfile) {
+		fcntl(stub_exe_fd, F_ADD_SEALS,
+		      F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL);
+	} else {
+		if (fchmod(stub_exe_fd, 00500) < 0) {
+			unlink(tmpfile);
+			panic("Could not make stub binary executable: %d",
+			      -errno);
+		}
+
+		close(stub_exe_fd);
+		stub_exe_fd = open(tmpfile, O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
+		if (stub_exe_fd < 0) {
+			unlink(tmpfile);
+			panic("Could not reopen stub binary: %d", -errno);
 		}
+
+		unlink(tmpfile);
+		free(tmpfile);
 	}
 
-	kill(os_getpid(), SIGSTOP);
 	return 0;
 }
-
-/* Each element set once, and only accessed by a single processor anyway */
-#undef NR_CPUS
-#define NR_CPUS 1
-int userspace_pid[NR_CPUS];
-
-int start_userspace(unsigned long stub_stack)
+__initcall(init_stub_exe_fd);
+
+int using_seccomp;
+
+/**
+ * start_userspace() - prepare a new userspace process
+ * @mm_id: The corresponding struct mm_id
+ *
+ * Setups a new temporary stack page that is used while userspace_tramp() runs
+ * Clones the kernel process into a new userspace process, with FDs only.
+ *
+ * Return: When positive: the process id of the new userspace process,
+ *         when negative: an error number.
+ * FIXME: can PIDs become negative?!
+ */
+int start_userspace(struct mm_id *mm_id)
 {
+	struct stub_data *proc_data = (void *)mm_id->stack;
+	struct tramp_data tramp_data = {
+		.stub_data = proc_data,
+	};
 	void *stack;
 	unsigned long sp;
-	int pid, status, n, flags, err;
+	int status, n, err;
 
+	/* setup a temporary stack page */
 	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
 		     PROT_READ | PROT_WRITE | PROT_EXEC,
 		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	if (stack == MAP_FAILED) {
 		err = -errno;
-		printk(UM_KERN_ERR "start_userspace : mmap failed, "
-		       "errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : mmap failed, errno = %d\n",
+		       __func__, errno);
 		return err;
 	}
 
-	sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *);
+	/* set stack pointer to the end of the stack page, so it can grow downwards */
+	sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
 
-	flags = CLONE_FILES;
-	if (proc_mm)
-		flags |= CLONE_VM;
-	else
-		flags |= SIGCHLD;
-
-	pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
-	if (pid < 0) {
+	/* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */
+	if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) {
 		err = -errno;
-		printk(UM_KERN_ERR "start_userspace : clone failed, "
-		       "errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n",
+		       __func__, errno);
 		return err;
 	}
 
-	do {
-		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
-		if (n < 0) {
-			err = -errno;
-			printk(UM_KERN_ERR "start_userspace : wait failed, "
-			       "errno = %d\n", errno);
-			goto out_kill;
-		}
-	} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM));
+	if (using_seccomp)
+		proc_data->futex = FUTEX_IN_CHILD;
 
-	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
-		err = -EINVAL;
-		printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got "
-		       "status = %d\n", status);
-		goto out_kill;
+	mm_id->pid = clone(userspace_tramp, (void *) sp,
+		    CLONE_VFORK | CLONE_VM | SIGCHLD,
+		    (void *)&tramp_data);
+	if (mm_id->pid < 0) {
+		err = -errno;
+		printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
+		       __func__, errno);
+		goto out_close;
 	}
 
-	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
-		   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS "
-		       "failed, errno = %d\n", errno);
-		goto out_kill;
+	if (using_seccomp) {
+		wait_stub_done_seccomp(mm_id, 1, 1);
+	} else {
+		do {
+			CATCH_EINTR(n = waitpid(mm_id->pid, &status,
+						WUNTRACED | __WALL));
+			if (n < 0) {
+				err = -errno;
+				printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+				       __func__, errno);
+				goto out_kill;
+			}
+		} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
+
+		if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
+			err = -EINVAL;
+			printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
+			       __func__, status);
+			goto out_kill;
+		}
+
+		if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL,
+			   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
+			err = -errno;
+			printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
+			       __func__, errno);
+			goto out_kill;
+		}
 	}
 
 	if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
 		err = -errno;
-		printk(UM_KERN_ERR "start_userspace : munmap failed, "
-		       "errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : munmap failed, errno = %d\n",
+		       __func__, errno);
 		goto out_kill;
 	}
 
-	return pid;
+	close(tramp_data.sockpair[0]);
+	if (using_seccomp)
+		mm_id->sock = tramp_data.sockpair[1];
+	else
+		close(tramp_data.sockpair[1]);
+
+	return 0;
+
+out_kill:
+	os_kill_ptraced_process(mm_id->pid, 1);
+out_close:
+	close(tramp_data.sockpair[0]);
+	close(tramp_data.sockpair[1]);
+
+	mm_id->pid = -1;
 
- out_kill:
-	os_kill_ptraced_process(pid, 1);
 	return err;
 }
 
+static int unscheduled_userspace_iterations;
+extern unsigned long tt_extra_sched_jiffies;
+
 void userspace(struct uml_pt_regs *regs)
 {
-	struct itimerval timer;
-	unsigned long long nsecs, now;
-	int err, status, op, pid = userspace_pid[0];
-	/* To prevent races if using_sysemu changes under us.*/
-	int local_using_sysemu;
-	siginfo_t si;
+	int err, status, op;
+	siginfo_t si_local;
+	siginfo_t *si;
+	int sig;
 
 	/* Handle any immediate reschedules or signals */
 	interrupt_end();
 
-	if (getitimer(ITIMER_VIRTUAL, &timer))
-		printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno);
-	nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC +
-		timer.it_value.tv_usec * UM_NSEC_PER_USEC;
-	nsecs += os_nsecs();
-
 	while (1) {
+		struct mm_id *mm_id = current_mm_id();
+
 		/*
-		 * This can legitimately fail if the process loads a
-		 * bogus value into a segment register.  It will
-		 * segfault and PTRACE_GETREGS will read that value
-		 * out of the process.  However, PTRACE_SETREGS will
-		 * fail.  In this case, there is nothing to do but
-		 * just kill the process.
+		 * At any given time, only one CPU thread can enter the
+		 * turnstile to operate on the same stub process, including
+		 * executing stub system calls (mmap and munmap).
 		 */
-		if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp))
-			fatal_sigsegv();
+		enter_turnstile(mm_id);
 
-		if (put_fp_registers(pid, regs->fp))
-			fatal_sigsegv();
+		/*
+		 * When we are in time-travel mode, userspace can theoretically
+		 * do a *lot* of work without being scheduled. The problem with
+		 * this is that it will prevent kernel bookkeeping (primarily
+		 * the RCU) from running and this can for example cause OOM
+		 * situations.
+		 *
+		 * This code accounts a jiffie against the scheduling clock
+		 * after the defined userspace iterations in the same thread.
+		 * By doing so the situation is effectively prevented.
+		 */
+		if (time_travel_mode == TT_MODE_INFCPU ||
+		    time_travel_mode == TT_MODE_EXTERNAL) {
+#ifdef CONFIG_UML_MAX_USERSPACE_ITERATIONS
+			if (CONFIG_UML_MAX_USERSPACE_ITERATIONS &&
+			    unscheduled_userspace_iterations++ >
+			    CONFIG_UML_MAX_USERSPACE_ITERATIONS) {
+				tt_extra_sched_jiffies += 1;
+				unscheduled_userspace_iterations = 0;
+			}
+#endif
+		}
 
-		/* Now we set local_using_sysemu to be used for one loop */
-		local_using_sysemu = get_using_sysemu();
+		time_travel_print_bc_msg();
 
-		op = SELECT_PTRACE_OPERATION(local_using_sysemu,
-					     singlestepping(NULL));
+		current_mm_sync();
 
-		if (ptrace(op, pid, 0, 0)) {
-			printk(UM_KERN_ERR "userspace - ptrace continue "
-			       "failed, op = %d, errno = %d\n", op, errno);
-			fatal_sigsegv();
-		}
+		if (using_seccomp) {
+			struct stub_data *proc_data = (void *) mm_id->stack;
 
-		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
-		if (err < 0) {
-			printk(UM_KERN_ERR "userspace - wait failed, "
-			       "errno = %d\n", errno);
-			fatal_sigsegv();
-		}
+			err = set_stub_state(regs, proc_data, singlestepping());
+			if (err) {
+				printk(UM_KERN_ERR "%s - failed to set regs: %d",
+				       __func__, err);
+				fatal_sigsegv();
+			}
 
-		regs->is_user = 1;
-		if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
-			printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, "
-			       "errno = %d\n", errno);
-			fatal_sigsegv();
-		}
+			/* Must have been reset by the syscall caller */
+			if (proc_data->restart_wait != 0)
+				panic("Programming error: Flag to only run syscalls in child was not cleared!");
 
-		if (get_fp_registers(pid, regs->fp)) {
-			printk(UM_KERN_ERR "userspace -  get_fp_registers failed, "
-			       "errno = %d\n", errno);
-			fatal_sigsegv();
-		}
+			/* Mark pending syscalls for flushing */
+			proc_data->syscall_data_len = mm_id->syscall_data_len;
 
-		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+			wait_stub_done_seccomp(mm_id, 0, 0);
 
-		if (WIFSTOPPED(status)) {
-			int sig = WSTOPSIG(status);
+			sig = proc_data->signal;
 
-			ptrace(PTRACE_GETSIGINFO, pid, 0, &si);
+			if (sig == SIGTRAP && proc_data->err != 0) {
+				printk(UM_KERN_ERR "%s - Error flushing stub syscalls",
+				       __func__);
+				syscall_stub_dump_error(mm_id);
+				mm_id->syscall_data_len = proc_data->err;
+				fatal_sigsegv();
+			}
 
-			switch (sig) {
-			case SIGSEGV:
-				if (PTRACE_FULL_FAULTINFO ||
-				    !ptrace_faultinfo) {
+			mm_id->syscall_data_len = 0;
+			mm_id->syscall_fd_num = 0;
+
+			err = get_stub_state(regs, proc_data, NULL);
+			if (err) {
+				printk(UM_KERN_ERR "%s - failed to get regs: %d",
+				       __func__, err);
+				fatal_sigsegv();
+			}
+
+			if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
+				panic("%s - Invalid siginfo offset from child", __func__);
+
+			si = &si_local;
+			memcpy(si, &proc_data->sigstack[proc_data->si_offset], sizeof(*si));
+
+			regs->is_user = 1;
+
+			/* Fill in ORIG_RAX and extract fault information */
+			PT_SYSCALL_NR(regs->gp) = si->si_syscall;
+			if (sig == SIGSEGV) {
+				mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset];
+
+				GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext);
+			}
+		} else {
+			int pid = mm_id->pid;
+
+			/* Flush out any pending syscalls */
+			err = syscall_stub_flush(mm_id);
+			if (err) {
+				if (err == -ENOMEM)
+					report_enomem();
+
+				printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
+					__func__, -err);
+				fatal_sigsegv();
+			}
+
+			/*
+			 * This can legitimately fail if the process loads a
+			 * bogus value into a segment register.  It will
+			 * segfault and PTRACE_GETREGS will read that value
+			 * out of the process.  However, PTRACE_SETREGS will
+			 * fail.  In this case, there is nothing to do but
+			 * just kill the process.
+			 */
+			if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
+				printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			if (put_fp_registers(pid, regs->fp)) {
+				printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			if (singlestepping())
+				op = PTRACE_SYSEMU_SINGLESTEP;
+			else
+				op = PTRACE_SYSEMU;
+
+			if (ptrace(op, pid, 0, 0)) {
+				printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
+				       __func__, op, errno);
+				fatal_sigsegv();
+			}
+
+			CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
+			if (err < 0) {
+				printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			regs->is_user = 1;
+			if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
+				printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			if (get_fp_registers(pid, regs->fp)) {
+				printk(UM_KERN_ERR "%s -  get_fp_registers failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			if (WIFSTOPPED(status)) {
+				sig = WSTOPSIG(status);
+
+				/*
+				 * These signal handlers need the si argument
+				 * and SIGSEGV needs the faultinfo.
+				 * The SIGIO and SIGALARM handlers which constitute
+				 * the majority of invocations, do not use it.
+				 */
+				switch (sig) {
+				case SIGSEGV:
 					get_skas_faultinfo(pid,
 							   &regs->faultinfo);
-					(*sig_info[SIGSEGV])(SIGSEGV, &si,
-							     regs);
+					fallthrough;
+				case SIGTRAP:
+				case SIGILL:
+				case SIGBUS:
+				case SIGFPE:
+				case SIGWINCH:
+					ptrace(PTRACE_GETSIGINFO, pid, 0,
+					       (struct siginfo *)&si_local);
+					si = &si_local;
+					break;
+				default:
+					si = NULL;
+					break;
 				}
-				else handle_segv(pid, regs);
+			} else {
+				sig = 0;
+			}
+		}
+
+		exit_turnstile(mm_id);
+
+		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+
+		if (sig) {
+			switch (sig) {
+			case SIGSEGV:
+				if (using_seccomp || PTRACE_FULL_FAULTINFO)
+					(*sig_info[SIGSEGV])(SIGSEGV,
+							     (struct siginfo *)si,
+							     regs, NULL);
+				else
+					segv(regs->faultinfo, 0, 1, NULL, NULL);
+
+				break;
+			case SIGSYS:
+				handle_syscall(regs);
 				break;
 			case SIGTRAP + 0x80:
-			        handle_trap(pid, regs, local_using_sysemu);
+				handle_trap(regs);
 				break;
 			case SIGTRAP:
-				relay_signal(SIGTRAP, &si, regs);
+				relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL);
 				break;
-			case SIGVTALRM:
-				now = os_nsecs();
-				if (now < nsecs)
-					break;
-				block_signals();
-				(*sig_info[sig])(sig, &si, regs);
-				unblock_signals();
-				nsecs = timer.it_value.tv_sec *
-					UM_NSEC_PER_SEC +
-					timer.it_value.tv_usec *
-					UM_NSEC_PER_USEC;
-				nsecs += os_nsecs();
+			case SIGALRM:
 				break;
 			case SIGIO:
 			case SIGILL:
 			case SIGBUS:
 			case SIGFPE:
 			case SIGWINCH:
-				block_signals();
-				(*sig_info[sig])(sig, &si, regs);
-				unblock_signals();
+				block_signals_trace();
+				(*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL);
+				unblock_signals_trace();
 				break;
 			default:
-				printk(UM_KERN_ERR "userspace - child stopped "
-				       "with signal %d\n", sig);
+				printk(UM_KERN_ERR "%s - child stopped with signal %d\n",
+				       __func__, sig);
 				fatal_sigsegv();
 			}
-			pid = userspace_pid[0];
 			interrupt_end();
 
 			/* Avoid -ERESTARTSYS handling in host */
@@ -465,173 +790,6 @@ void userspace(struct uml_pt_regs *regs)
 	}
 }
 
-static unsigned long thread_regs[MAX_REG_NR];
-static unsigned long thread_fp_regs[FP_SIZE];
-
-static int __init init_thread_regs(void)
-{
-	get_safe_registers(thread_regs, thread_fp_regs);
-	/* Set parent's instruction pointer to start of clone-stub */
-	thread_regs[REGS_IP_INDEX] = STUB_CODE +
-				(unsigned long) stub_clone_handler -
-				(unsigned long) &__syscall_stub_start;
-	thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE -
-		sizeof(void *);
-#ifdef __SIGNAL_FRAMESIZE
-	thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
-#endif
-	return 0;
-}
-
-__initcall(init_thread_regs);
-
-int copy_context_skas0(unsigned long new_stack, int pid)
-{
-	struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ };
-	int err;
-	unsigned long current_stack = current_stub_stack();
-	struct stub_data *data = (struct stub_data *) current_stack;
-	struct stub_data *child_data = (struct stub_data *) new_stack;
-	unsigned long long new_offset;
-	int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset);
-
-	/*
-	 * prepare offset and fd of child's stack as argument for parent's
-	 * and child's mmap2 calls
-	 */
-	*data = ((struct stub_data) { .offset	= MMAP_OFFSET(new_offset),
-				      .fd	= new_fd,
-				      .timer    = ((struct itimerval)
-					           { .it_value = tv,
-						     .it_interval = tv }) });
-
-	err = ptrace_setregs(pid, thread_regs);
-	if (err < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS "
-		       "failed, pid = %d, errno = %d\n", pid, -err);
-		return err;
-	}
-
-	err = put_fp_registers(pid, thread_fp_regs);
-	if (err < 0) {
-		printk(UM_KERN_ERR "copy_context_skas0 : put_fp_registers "
-		       "failed, pid = %d, err = %d\n", pid, err);
-		return err;
-	}
-
-	/* set a well known return code for detection of child write failure */
-	child_data->err = 12345678;
-
-	/*
-	 * Wait, until parent has finished its work: read child's pid from
-	 * parent's stack, and check, if bad result.
-	 */
-	err = ptrace(PTRACE_CONT, pid, 0, 0);
-	if (err) {
-		err = -errno;
-		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, "
-		       "errno = %d\n", pid, errno);
-		return err;
-	}
-
-	wait_stub_done(pid);
-
-	pid = data->err;
-	if (pid < 0) {
-		printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports "
-		       "error %d\n", -pid);
-		return pid;
-	}
-
-	/*
-	 * Wait, until child has finished too: read child's result from
-	 * child's stack and check it.
-	 */
-	wait_stub_done(pid);
-	if (child_data->err != STUB_DATA) {
-		printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports "
-		       "error %ld\n", child_data->err);
-		err = child_data->err;
-		goto out_kill;
-	}
-
-	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
-		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS "
-		       "failed, errno = %d\n", errno);
-		goto out_kill;
-	}
-
-	return pid;
-
- out_kill:
-	os_kill_ptraced_process(pid, 1);
-	return err;
-}
-
-/*
- * This is used only, if stub pages are needed, while proc_mm is
- * available. Opening /proc/mm creates a new mm_context, which lacks
- * the stub-pages. Thus, we map them using /proc/mm-fd
- */
-int map_stub_pages(int fd, unsigned long code, unsigned long data,
-		   unsigned long stack)
-{
-	struct proc_mm_op mmop;
-	int n;
-	unsigned long long code_offset;
-	int code_fd = phys_mapping(to_phys((void *) &__syscall_stub_start),
-				   &code_offset);
-
-	mmop = ((struct proc_mm_op) { .op        = MM_MMAP,
-				      .u         =
-				      { .mmap    =
-					{ .addr    = code,
-					  .len     = UM_KERN_PAGE_SIZE,
-					  .prot    = PROT_EXEC,
-					  .flags   = MAP_FIXED | MAP_PRIVATE,
-					  .fd      = code_fd,
-					  .offset  = code_offset
-	} } });
-	CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop)));
-	if (n != sizeof(mmop)) {
-		n = errno;
-		printk(UM_KERN_ERR "mmap args - addr = 0x%lx, fd = %d, "
-		       "offset = %llx\n", code, code_fd,
-		       (unsigned long long) code_offset);
-		printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for code "
-		       "failed, err = %d\n", n);
-		return -n;
-	}
-
-	if (stack) {
-		unsigned long long map_offset;
-		int map_fd = phys_mapping(to_phys((void *)stack), &map_offset);
-		mmop = ((struct proc_mm_op)
-				{ .op        = MM_MMAP,
-				  .u         =
-				  { .mmap    =
-				    { .addr    = data,
-				      .len     = UM_KERN_PAGE_SIZE,
-				      .prot    = PROT_READ | PROT_WRITE,
-				      .flags   = MAP_FIXED | MAP_SHARED,
-				      .fd      = map_fd,
-				      .offset  = map_offset
-		} } });
-		CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop)));
-		if (n != sizeof(mmop)) {
-			n = errno;
-			printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for "
-			       "data failed, err = %d\n", n);
-			return -n;
-		}
-	}
-
-	return 0;
-}
-
 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
 {
 	(*buf)[0].JB_IP = (unsigned long) handler;
@@ -646,16 +804,17 @@ void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
 
 void switch_threads(jmp_buf *me, jmp_buf *you)
 {
+	unscheduled_userspace_iterations = 0;
+
 	if (UML_SETJMP(me) == 0)
 		UML_LONGJMP(you, 1);
 }
 
 static jmp_buf initial_jmpbuf;
 
-/* XXX Make these percpu */
-static void (*cb_proc)(void *arg);
-static void *cb_arg;
-static jmp_buf *cb_back;
+static __thread void (*cb_proc)(void *arg);
+static __thread void *cb_arg;
+static __thread jmp_buf *cb_back;
 
 int start_idle_thread(void *stack, jmp_buf *switch_buf)
 {
@@ -674,7 +833,7 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
 	n = setjmp(initial_jmpbuf);
 	switch (n) {
 	case INIT_JMP_NEW_THREAD:
-		(*switch_buf)[0].JB_IP = (unsigned long) new_thread_handler;
+		(*switch_buf)[0].JB_IP = (unsigned long) uml_finishsetup;
 		(*switch_buf)[0].JB_SP = (unsigned long) stack +
 			UM_THREAD_SIZE - sizeof(void *);
 		break;
@@ -689,11 +848,16 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
 		kmalloc_ok = 0;
 		return 1;
 	default:
-		printk(UM_KERN_ERR "Bad sigsetjmp return in "
-		       "start_idle_thread - %d\n", n);
+		printk(UM_KERN_ERR "Bad sigsetjmp return in %s - %d\n",
+		       __func__, n);
 		fatal_sigsegv();
 	}
 	longjmp(*switch_buf, 1);
+
+	/* unreachable */
+	printk(UM_KERN_ERR "impossible long jump!");
+	fatal_sigsegv();
+	return 0;
 }
 
 void initial_thread_cb_skas(void (*proc)(void *), void *arg)
@@ -704,10 +868,10 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg)
 	cb_arg = arg;
 	cb_back = &here;
 
-	block_signals();
+	initial_jmpbuf_lock();
 	if (UML_SETJMP(&here) == 0)
 		UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK);
-	unblock_signals();
+	initial_jmpbuf_unlock();
 
 	cb_proc = NULL;
 	cb_arg = NULL;
@@ -716,29 +880,29 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg)
 
 void halt_skas(void)
 {
-	block_signals();
+	initial_jmpbuf_lock();
 	UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT);
+	/* unreachable */
 }
 
-void reboot_skas(void)
+static bool noreboot;
+
+static int __init noreboot_cmd_param(char *str, int *add)
 {
-	block_signals();
-	UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT);
+	*add = 0;
+	noreboot = true;
+	return 0;
 }
 
-void __switch_mm(struct mm_id *mm_idp)
-{
-	int err;
+__uml_setup("noreboot", noreboot_cmd_param,
+"noreboot\n"
+"    Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n"
+"    This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n"
+"    crashes in CI\n\n");
 
-	/* FIXME: need cpu pid in __switch_mm */
-	if (proc_mm) {
-		err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0,
-			     mm_idp->u.mm_fd);
-		if (err) {
-			printk(UM_KERN_ERR "__switch_mm - PTRACE_SWITCH_MM "
-			       "failed, errno = %d\n", errno);
-			fatal_sigsegv();
-		}
-	}
-	else userspace_pid[0] = mm_idp->u.pid;
+void reboot_skas(void)
+{
+	initial_jmpbuf_lock();
+	UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT);
+	/* unreachable */
 }
diff --git a/arch/um/os-Linux/smp.c b/arch/um/os-Linux/smp.c
new file mode 100644
index 000000000000..18d3858a7cd2
--- /dev/null
+++ b/arch/um/os-Linux/smp.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ */
+
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <kern_util.h>
+#include <um_malloc.h>
+#include <init.h>
+#include <os.h>
+#include <smp.h>
+#include "internal.h"
+
+struct cpu_thread_data {
+	int cpu;
+	sigset_t sigset;
+};
+
+static __thread int __curr_cpu;
+
+int uml_curr_cpu(void)
+{
+	return __curr_cpu;
+}
+
+static pthread_t cpu_threads[CONFIG_NR_CPUS];
+
+static void *cpu_thread(void *arg)
+{
+	struct cpu_thread_data *data = arg;
+
+	__curr_cpu = data->cpu;
+
+	uml_start_secondary(data);
+
+	return NULL;
+}
+
+int os_start_cpu_thread(int cpu)
+{
+	struct cpu_thread_data *data;
+	sigset_t sigset, oset;
+	int err;
+
+	data = uml_kmalloc(sizeof(*data), UM_GFP_ATOMIC);
+	if (!data)
+		return -ENOMEM;
+
+	sigfillset(&sigset);
+	if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) {
+		err = errno;
+		goto err;
+	}
+
+	data->cpu = cpu;
+	data->sigset = oset;
+
+	err = pthread_create(&cpu_threads[cpu], NULL, cpu_thread, data);
+	if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0)
+		panic("Failed to restore the signal mask, errno = %d", errno);
+	if (err != 0)
+		goto err;
+
+	return 0;
+
+err:
+	kfree(data);
+	return -err;
+}
+
+void os_start_secondary(void *arg, jmp_buf *switch_buf)
+{
+	struct cpu_thread_data *data = arg;
+
+	sigaddset(&data->sigset, IPI_SIGNAL);
+	sigaddset(&data->sigset, SIGIO);
+
+	if (sigprocmask(SIG_SETMASK, &data->sigset, NULL) < 0)
+		panic("Failed to restore the signal mask, errno = %d", errno);
+
+	kfree(data);
+	longjmp(*switch_buf, 1);
+
+	/* unreachable */
+	printk(UM_KERN_ERR "impossible long jump!");
+	fatal_sigsegv();
+}
+
+int os_send_ipi(int cpu, int vector)
+{
+	union sigval value = { .sival_int = vector };
+
+	return pthread_sigqueue(cpu_threads[cpu], IPI_SIGNAL, value);
+}
+
+static void __local_ipi_set(int enable)
+{
+	sigset_t sigset;
+
+	sigemptyset(&sigset);
+	sigaddset(&sigset, IPI_SIGNAL);
+
+	if (sigprocmask(enable ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0)
+		panic("%s: sigprocmask failed, errno = %d", __func__, errno);
+}
+
+void os_local_ipi_enable(void)
+{
+	__local_ipi_set(1);
+}
+
+void os_local_ipi_disable(void)
+{
+	__local_ipi_set(0);
+}
+
+static void ipi_sig_handler(int sig, siginfo_t *si, void *uc)
+{
+	int save_errno = errno;
+
+	signals_enabled = 0;
+	um_trace_signals_off();
+
+	uml_ipi_handler(si->si_value.sival_int);
+
+	um_trace_signals_on();
+	signals_enabled = 1;
+
+	errno = save_errno;
+}
+
+void __init os_init_smp(void)
+{
+	struct sigaction action = {
+		.sa_sigaction = ipi_sig_handler,
+		.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART,
+	};
+
+	sigfillset(&action.sa_mask);
+
+	if (sigaction(IPI_SIGNAL, &action, NULL) < 0)
+		panic("%s: sigaction failed, errno = %d", __func__, errno);
+
+	cpu_threads[0] = pthread_self();
+}
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 337518c5042a..054ac03bbf5e 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
@@ -17,14 +18,24 @@
 #include <sys/wait.h>
 #include <sys/time.h>
 #include <sys/resource.h>
+#include <asm/ldt.h>
 #include <asm/unistd.h>
 #include <init.h>
 #include <os.h>
+#include <smp.h>
+#include <kern_util.h>
 #include <mem_user.h>
 #include <ptrace_user.h>
+#include <stdbool.h>
+#include <stub-data.h>
+#include <sys/prctl.h>
+#include <linux/seccomp.h>
+#include <linux/filter.h>
+#include <sysdep/mcontext.h>
+#include <sysdep/stub.h>
 #include <registers.h>
 #include <skas.h>
-#include <skas_ptrace.h>
+#include "internal.h"
 
 static void ptrace_child(void)
 {
@@ -95,6 +106,8 @@ static int start_ptraced_child(void)
 {
 	int pid, n, status;
 
+	fflush(stdout);
+
 	pid = fork();
 	if (pid == 0)
 		ptrace_child();
@@ -111,140 +124,32 @@ static int start_ptraced_child(void)
 	return pid;
 }
 
-/* When testing for SYSEMU support, if it is one of the broken versions, we
- * must just avoid using sysemu, not panic, but only if SYSEMU features are
- * broken.
- * So only for SYSEMU features we test mustpanic, while normal host features
- * must work anyway!
- */
-static int stop_ptraced_child(int pid, int exitcode, int mustexit)
+static void stop_ptraced_child(int pid, int exitcode)
 {
-	int status, n, ret = 0;
+	int status, n;
+
+	if (ptrace(PTRACE_CONT, pid, 0, 0) < 0)
+		fatal_perror("stop_ptraced_child : ptrace failed");
 
-	if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) {
-		perror("stop_ptraced_child : ptrace failed");
-		return -1;
-	}
 	CATCH_EINTR(n = waitpid(pid, &status, 0));
 	if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
 		int exit_with = WEXITSTATUS(status);
-		if (exit_with == 2)
-			non_fatal("check_ptrace : child exited with status 2. "
-				  "\nDisabling SYSEMU support.\n");
-		non_fatal("check_ptrace : child exited with exitcode %d, while "
-			  "expecting %d; status 0x%x\n", exit_with,
-			  exitcode, status);
-		if (mustexit)
-			exit(1);
-		ret = -1;
+		fatal("stop_ptraced_child : child exited with exitcode %d, "
+		      "while expecting %d; status 0x%x\n", exit_with,
+		      exitcode, status);
 	}
-
-	return ret;
-}
-
-/* Changed only during early boot */
-int ptrace_faultinfo;
-static int disable_ptrace_faultinfo;
-
-int ptrace_ldt;
-static int disable_ptrace_ldt;
-
-int proc_mm;
-static int disable_proc_mm;
-
-int have_switch_mm;
-static int disable_switch_mm;
-
-int skas_needs_stub;
-
-static int __init skas0_cmd_param(char *str, int* add)
-{
-	disable_ptrace_faultinfo = 1;
-	disable_ptrace_ldt = 1;
-	disable_proc_mm = 1;
-	disable_switch_mm = 1;
-
-	return 0;
 }
 
-/* The two __uml_setup would conflict, without this stupid alias. */
-
-static int __init mode_skas0_cmd_param(char *str, int* add)
-	__attribute__((alias("skas0_cmd_param")));
-
-__uml_setup("skas0", skas0_cmd_param,
-"skas0\n"
-"    Disables SKAS3 and SKAS4 usage, so that SKAS0 is used\n\n");
-
-__uml_setup("mode=skas0", mode_skas0_cmd_param,
-"mode=skas0\n"
-"    Disables SKAS3 and SKAS4 usage, so that SKAS0 is used.\n\n");
-
-/* Changed only during early boot */
-static int force_sysemu_disabled = 0;
-
-static int __init nosysemu_cmd_param(char *str, int* add)
-{
-	force_sysemu_disabled = 1;
-	return 0;
-}
-
-__uml_setup("nosysemu", nosysemu_cmd_param,
-"nosysemu\n"
-"    Turns off syscall emulation patch for ptrace (SYSEMU) on.\n"
-"    SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n"
-"    behaviour of ptrace() and helps reducing host context switch rate.\n"
-"    To make it working, you need a kernel patch for your host, too.\n"
-"    See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n"
-"    information.\n\n");
-
 static void __init check_sysemu(void)
 {
-	unsigned long regs[MAX_REG_NR];
 	int pid, n, status, count=0;
 
-	non_fatal("Checking syscall emulation patch for ptrace...");
-	sysemu_supported = 0;
+	os_info("Checking syscall emulation for ptrace...");
 	pid = start_ptraced_child();
 
-	if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
-		goto fail;
-
-	CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
-	if (n < 0)
-		fatal_perror("check_sysemu : wait failed");
-	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
-		fatal("check_sysemu : expected SIGTRAP, got status = %d\n",
-		      status);
-
-	if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
-		fatal_perror("check_sysemu : PTRACE_GETREGS failed");
-	if (PT_SYSCALL_NR(regs) != __NR_getpid) {
-		non_fatal("check_sysemu got system call number %d, "
-			  "expected %d...", PT_SYSCALL_NR(regs), __NR_getpid);
-		goto fail;
-	}
-
-	n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
-	if (n < 0) {
-		non_fatal("check_sysemu : failed to modify system call "
-			  "return");
-		goto fail;
-	}
-
-	if (stop_ptraced_child(pid, 0, 0) < 0)
-		goto fail_stopped;
-
-	sysemu_supported = 1;
-	non_fatal("OK\n");
-	set_using_sysemu(!force_sysemu_disabled);
-
-	non_fatal("Checking advanced syscall emulation patch for ptrace...");
-	pid = start_ptraced_child();
-
-	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
+	if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
 		   (void *) PTRACE_O_TRACESYSGOOD) < 0))
-		fatal_perror("check_sysemu: PTRACE_OLDSETOPTIONS failed");
+		fatal_perror("check_sysemu: PTRACE_SETOPTIONS failed");
 
 	while (1) {
 		count++;
@@ -277,32 +182,27 @@ static void __init check_sysemu(void)
 			goto fail;
 		}
 	}
-	if (stop_ptraced_child(pid, 0, 0) < 0)
-		goto fail_stopped;
+	stop_ptraced_child(pid, 0);
 
-	sysemu_supported = 2;
-	non_fatal("OK\n");
+	os_info("OK\n");
 
-	if (!force_sysemu_disabled)
-		set_using_sysemu(sysemu_supported);
 	return;
 
 fail:
-	stop_ptraced_child(pid, 1, 0);
-fail_stopped:
-	non_fatal("missing\n");
+	stop_ptraced_child(pid, 1);
+	fatal("missing\n");
 }
 
 static void __init check_ptrace(void)
 {
 	int pid, syscall, n, status;
 
-	non_fatal("Checking that ptrace can change system call numbers...");
+	os_info("Checking that ptrace can change system call numbers...");
 	pid = start_ptraced_child();
 
-	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
+	if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
 		   (void *) PTRACE_O_TRACESYSGOOD) < 0))
-		fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed");
+		fatal_perror("check_ptrace: PTRACE_SETOPTIONS failed");
 
 	while (1) {
 		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
@@ -328,215 +228,268 @@ static void __init check_ptrace(void)
 			break;
 		}
 	}
-	stop_ptraced_child(pid, 0, 1);
-	non_fatal("OK\n");
+	stop_ptraced_child(pid, 0);
+	os_info("OK\n");
 	check_sysemu();
 }
 
-extern void check_tmpexec(void);
+extern unsigned long host_fp_size;
+extern unsigned long exec_regs[MAX_REG_NR];
+extern unsigned long *exec_fp_regs;
 
-static void __init check_coredump_limit(void)
+__initdata static struct stub_data *seccomp_test_stub_data;
+
+static void __init sigsys_handler(int sig, siginfo_t *info, void *p)
 {
-	struct rlimit lim;
-	int err = getrlimit(RLIMIT_CORE, &lim);
+	ucontext_t *uc = p;
 
-	if (err) {
-		perror("Getting core dump limit");
-		return;
-	}
+	/* Stow away the location of the mcontext in the stack */
+	seccomp_test_stub_data->mctx_offset = (unsigned long)&uc->uc_mcontext -
+					      (unsigned long)&seccomp_test_stub_data->sigstack[0];
 
-	printf("Core dump limits :\n\tsoft - ");
-	if (lim.rlim_cur == RLIM_INFINITY)
-		printf("NONE\n");
-	else printf("%lu\n", lim.rlim_cur);
+	/* Prevent libc from clearing memory (mctx_offset in particular) */
+	syscall(__NR_exit, 0);
+}
 
-	printf("\thard - ");
-	if (lim.rlim_max == RLIM_INFINITY)
-		printf("NONE\n");
-	else printf("%lu\n", lim.rlim_max);
+static int __init seccomp_helper(void *data)
+{
+	static struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+			 offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_nanosleep, 1, 0),
+		BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+	};
+	static struct sock_fprog prog = {
+		.len = ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	struct sigaction sa;
+
+	/* close_range is needed for the stub */
+	if (stub_syscall3(__NR_close_range, 1, ~0U, 0))
+		exit(1);
+
+	set_sigstack(seccomp_test_stub_data->sigstack,
+			sizeof(seccomp_test_stub_data->sigstack));
+
+	sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+	sa.sa_sigaction = (void *) sigsys_handler;
+	sa.sa_restorer = NULL;
+	if (sigaction(SIGSYS, &sa, NULL) < 0)
+		exit(2);
+
+	prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+			SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0)
+		exit(3);
+
+	sleep(0);
+
+	/* Never reached. */
+	_exit(4);
 }
 
-void __init os_early_checks(void)
+static bool __init init_seccomp(void)
 {
 	int pid;
+	int status;
+	int n;
+	unsigned long sp;
 
-	/* Print out the core dump limits early */
-	check_coredump_limit();
+	/*
+	 * We check that we can install a seccomp filter and then exit(0)
+	 * from a trapped syscall.
+	 *
+	 * Note that we cannot verify that no seccomp filter already exists
+	 * for a syscall that results in the process/thread to be killed.
+	 */
 
-	check_ptrace();
+	os_info("Checking that seccomp filters can be installed...");
 
-	/* Need to check this early because mmapping happens before the
-	 * kernel is running.
-	 */
-	check_tmpexec();
+	seccomp_test_stub_data = mmap(0, sizeof(*seccomp_test_stub_data),
+				      PROT_READ | PROT_WRITE,
+				      MAP_SHARED | MAP_ANON, 0, 0);
 
-	pid = start_ptraced_child();
-	if (init_registers(pid))
-		fatal("Failed to initialize default registers");
-	stop_ptraced_child(pid, 1, 1);
-}
+	/* Use the syscall data area as stack, we just need something */
+	sp = (unsigned long)&seccomp_test_stub_data->syscall_data +
+	     sizeof(seccomp_test_stub_data->syscall_data) -
+	     sizeof(void *);
+	pid = clone(seccomp_helper, (void *)sp, CLONE_VFORK | CLONE_VM, NULL);
 
-static int __init noprocmm_cmd_param(char *str, int* add)
-{
-	disable_proc_mm = 1;
-	return 0;
-}
+	if (pid < 0)
+		fatal_perror("check_seccomp : clone failed");
 
-__uml_setup("noprocmm", noprocmm_cmd_param,
-"noprocmm\n"
-"    Turns off usage of /proc/mm, even if host supports it.\n"
-"    To support /proc/mm, the host needs to be patched using\n"
-"    the current skas3 patch.\n\n");
+	CATCH_EINTR(n = waitpid(pid, &status, __WCLONE));
+	if (n < 0)
+		fatal_perror("check_seccomp : waitpid failed");
 
-static int __init noptracefaultinfo_cmd_param(char *str, int* add)
-{
-	disable_ptrace_faultinfo = 1;
-	return 0;
-}
+	if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+		struct uml_pt_regs *regs;
+		unsigned long fp_size;
+		int r;
 
-__uml_setup("noptracefaultinfo", noptracefaultinfo_cmd_param,
-"noptracefaultinfo\n"
-"    Turns off usage of PTRACE_FAULTINFO, even if host supports\n"
-"    it. To support PTRACE_FAULTINFO, the host needs to be patched\n"
-"    using the current skas3 patch.\n\n");
+		/* Fill in the host_fp_size from the mcontext. */
+		regs = calloc(1, sizeof(struct uml_pt_regs));
+		get_stub_state(regs, seccomp_test_stub_data, &fp_size);
+		host_fp_size = fp_size;
+		free(regs);
 
-static int __init noptraceldt_cmd_param(char *str, int* add)
-{
-	disable_ptrace_ldt = 1;
-	return 0;
-}
+		/* Repeat with the correct size */
+		regs = calloc(1, sizeof(struct uml_pt_regs) + host_fp_size);
+		r = get_stub_state(regs, seccomp_test_stub_data, NULL);
 
-__uml_setup("noptraceldt", noptraceldt_cmd_param,
-"noptraceldt\n"
-"    Turns off usage of PTRACE_LDT, even if host supports it.\n"
-"    To support PTRACE_LDT, the host needs to be patched using\n"
-"    the current skas3 patch.\n\n");
+		/* Store as the default startup registers */
+		exec_fp_regs = malloc(host_fp_size);
+		memcpy(exec_regs, regs->gp, sizeof(exec_regs));
+		memcpy(exec_fp_regs, regs->fp, host_fp_size);
 
-static inline void check_skas3_ptrace_faultinfo(void)
-{
-	struct ptrace_faultinfo fi;
-	int pid, n;
+		munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
 
-	non_fatal("  - PTRACE_FAULTINFO...");
-	pid = start_ptraced_child();
+		free(regs);
+
+		if (r) {
+			os_info("failed to fetch registers: %d\n", r);
+			return false;
+		}
 
-	n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi);
-	if (n < 0) {
-		if (errno == EIO)
-			non_fatal("not found\n");
-		else
-			perror("not found");
-	} else if (disable_ptrace_faultinfo)
-		non_fatal("found but disabled on command line\n");
-	else {
-		ptrace_faultinfo = 1;
-		non_fatal("found\n");
+		os_info("OK\n");
+		return true;
 	}
 
-	stop_ptraced_child(pid, 1, 1);
+	if (WIFEXITED(status) && WEXITSTATUS(status) == 2)
+		os_info("missing\n");
+	else
+		os_info("error\n");
+
+	munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
+	return false;
 }
 
-static inline void check_skas3_ptrace_ldt(void)
+
+static void __init check_coredump_limit(void)
 {
-#ifdef PTRACE_LDT
-	int pid, n;
-	unsigned char ldtbuf[40];
-	struct ptrace_ldt ldt_op = (struct ptrace_ldt) {
-		.func = 2, /* read default ldt */
-		.ptr = ldtbuf,
-		.bytecount = sizeof(ldtbuf)};
-
-	non_fatal("  - PTRACE_LDT...");
-	pid = start_ptraced_child();
+	struct rlimit lim;
+	int err = getrlimit(RLIMIT_CORE, &lim);
 
-	n = ptrace(PTRACE_LDT, pid, 0, (unsigned long) &ldt_op);
-	if (n < 0) {
-		if (errno == EIO)
-			non_fatal("not found\n");
-		else
-			perror("not found");
-	} else if (disable_ptrace_ldt)
-		non_fatal("found, but use is disabled\n");
-	else {
-		ptrace_ldt = 1;
-		non_fatal("found\n");
+	if (err) {
+		perror("Getting core dump limit");
+		return;
 	}
 
-	stop_ptraced_child(pid, 1, 1);
-#endif
+	os_info("Core dump limits :\n\tsoft - ");
+	if (lim.rlim_cur == RLIM_INFINITY)
+		os_info("NONE\n");
+	else
+		os_info("%llu\n", (unsigned long long)lim.rlim_cur);
+
+	os_info("\thard - ");
+	if (lim.rlim_max == RLIM_INFINITY)
+		os_info("NONE\n");
+	else
+		os_info("%llu\n", (unsigned long long)lim.rlim_max);
 }
 
-static inline void check_skas3_proc_mm(void)
+void  __init get_host_cpu_features(
+		void (*flags_helper_func)(char *line),
+		void (*cache_helper_func)(char *line))
 {
-	non_fatal("  - /proc/mm...");
-	if (access("/proc/mm", W_OK) < 0)
-		perror("not found");
-	else if (disable_proc_mm)
-		non_fatal("found but disabled on command line\n");
-	else {
-		proc_mm = 1;
-		non_fatal("found\n");
+	FILE *cpuinfo;
+	char *line = NULL;
+	size_t len = 0;
+	int done_parsing = 0;
+
+	cpuinfo = fopen("/proc/cpuinfo", "r");
+	if (cpuinfo == NULL) {
+		os_info("Failed to get host CPU features\n");
+	} else {
+		while ((getline(&line, &len, cpuinfo)) != -1) {
+			if (strstr(line, "flags")) {
+				flags_helper_func(line);
+				done_parsing++;
+			}
+			if (strstr(line, "cache_alignment")) {
+				cache_helper_func(line);
+				done_parsing++;
+			}
+			free(line);
+			line = NULL;
+			if (done_parsing > 1)
+				break;
+		}
+		fclose(cpuinfo);
 	}
 }
 
-void can_do_skas(void)
-{
-	non_fatal("Checking for the skas3 patch in the host:\n");
+static int seccomp_config __initdata;
 
-	check_skas3_proc_mm();
-	check_skas3_ptrace_faultinfo();
-	check_skas3_ptrace_ldt();
+static int __init uml_seccomp_config(char *line, int *add)
+{
+	*add = 0;
+
+	if (strcmp(line, "off") == 0)
+		seccomp_config = 0;
+	else if (strcmp(line, "auto") == 0)
+		seccomp_config = 1;
+	else if (strcmp(line, "on") == 0)
+		seccomp_config = 2;
+	else
+		fatal("Invalid seccomp option '%s', expected on/auto/off\n",
+		      line);
 
-	if (!proc_mm || !ptrace_faultinfo || !ptrace_ldt)
-		skas_needs_stub = 1;
+	return 0;
 }
 
-int __init parse_iomem(char *str, int *add)
+__uml_setup("seccomp=", uml_seccomp_config,
+"seccomp=<on/auto/off>\n"
+"    Configure whether or not SECCOMP is used. With SECCOMP, userspace\n"
+"    processes work collaboratively with the kernel instead of being\n"
+"    traced using ptrace. All syscalls from the application are caught and\n"
+"    redirected using a signal. This signal handler in turn is permitted to\n"
+"    do the selected set of syscalls to communicate with the UML kernel and\n"
+"    do the required memory management.\n"
+"\n"
+"    This method is overall faster than the ptrace based userspace, primarily\n"
+"    because it reduces the number of context switches for (minor) page faults.\n"
+"\n"
+"    However, the SECCOMP filter is not (yet) restrictive enough to prevent\n"
+"    userspace from reading and writing all physical memory. Userspace\n"
+"    processes could also trick the stub into disabling SIGALRM which\n"
+"    prevents it from being interrupted for scheduling purposes.\n"
+"\n"
+"    This is insecure and should only be used with a trusted userspace\n\n"
+);
+
+void __init os_early_checks(void)
 {
-	struct iomem_region *new;
-	struct stat64 buf;
-	char *file, *driver;
-	int fd, size;
-
-	driver = str;
-	file = strchr(str,',');
-	if (file == NULL) {
-		fprintf(stderr, "parse_iomem : failed to parse iomem\n");
-		goto out;
-	}
-	*file = '\0';
-	file++;
-	fd = open(file, O_RDWR, 0);
-	if (fd < 0) {
-		perror("parse_iomem - Couldn't open io file");
-		goto out;
-	}
+	int pid;
 
-	if (fstat64(fd, &buf) < 0) {
-		perror("parse_iomem - cannot stat_fd file");
-		goto out_close;
-	}
+	/* Print out the core dump limits early */
+	check_coredump_limit();
+
+	/* Need to check this early because mmapping happens before the
+	 * kernel is running.
+	 */
+	check_tmpexec();
+
+	if (seccomp_config) {
+		if (init_seccomp()) {
+			using_seccomp = 1;
+			return;
+		}
 
-	new = malloc(sizeof(*new));
-	if (new == NULL) {
-		perror("Couldn't allocate iomem_region struct");
-		goto out_close;
+		if (seccomp_config == 2)
+			fatal("SECCOMP userspace requested but not functional!\n");
 	}
 
-	size = (buf.st_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1);
+	if (uml_ncpus > 1)
+		fatal("SMP is not supported with PTRACE userspace.\n");
 
-	*new = ((struct iomem_region) { .next		= iomem_regions,
-					.driver		= driver,
-					.fd		= fd,
-					.size		= size,
-					.phys		= 0,
-					.virt		= 0 });
-	iomem_regions = new;
-	iomem_size += new->size + UM_KERN_PAGE_SIZE;
+	using_seccomp = 0;
+	check_ptrace();
 
-	return 0;
- out_close:
-	close(fd);
- out:
-	return 1;
+	pid = start_ptraced_child();
+	if (init_pid_registers(pid))
+		fatal("Failed to initialize default registers");
+	stop_ptraced_child(pid, 1);
 }
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index e9824d5dd7d5..13ebc86918d4 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -1,186 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
+ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
+ * Copyright (C) 2012-2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stddef.h>
+#include <unistd.h>
 #include <errno.h>
 #include <signal.h>
 #include <time.h>
+#include <sys/signalfd.h>
 #include <sys/time.h>
 #include <kern_util.h>
 #include <os.h>
+#include <smp.h>
+#include <string.h>
 #include "internal.h"
 
-int set_interval(void)
-{
-	int usec = UM_USEC_PER_SEC / UM_HZ;
-	struct itimerval interval = ((struct itimerval) { { 0, usec },
-							  { 0, usec } });
-
-	if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1)
-		return -errno;
+static timer_t event_high_res_timer[CONFIG_NR_CPUS] = { 0 };
 
-	return 0;
+static inline long long timespec_to_ns(const struct timespec *ts)
+{
+	return ((long long) ts->tv_sec * UM_NSEC_PER_SEC) + ts->tv_nsec;
 }
 
-int timer_one_shot(int ticks)
+long long os_persistent_clock_emulation(void)
 {
-	unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ;
-	unsigned long sec = usec / UM_USEC_PER_SEC;
-	struct itimerval interval;
-
-	usec %= UM_USEC_PER_SEC;
-	interval = ((struct itimerval) { { 0, 0 }, { sec, usec } });
+	struct timespec realtime_tp;
 
-	if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1)
-		return -errno;
-
-	return 0;
+	clock_gettime(CLOCK_REALTIME, &realtime_tp);
+	return timespec_to_ns(&realtime_tp);
 }
 
+#ifndef sigev_notify_thread_id
+#define sigev_notify_thread_id _sigev_un._tid
+#endif
+
 /**
- * timeval_to_ns - Convert timeval to nanoseconds
- * @ts:		pointer to the timeval variable to be converted
- *
- * Returns the scalar nanosecond representation of the timeval
- * parameter.
- *
- * Ripped from linux/time.h because it's a kernel header, and thus
- * unusable from here.
+ * os_timer_create() - create an new posix (interval) timer
  */
-static inline long long timeval_to_ns(const struct timeval *tv)
+int os_timer_create(void)
 {
-	return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) +
-		tv->tv_usec * UM_NSEC_PER_USEC;
+	int cpu = uml_curr_cpu();
+	timer_t *t = &event_high_res_timer[cpu];
+	struct sigevent sev = {
+		.sigev_notify = SIGEV_THREAD_ID,
+		.sigev_signo = SIGALRM,
+		.sigev_value.sival_ptr = t,
+		.sigev_notify_thread_id = gettid(),
+	};
+
+	if (timer_create(CLOCK_MONOTONIC, &sev, t) == -1)
+		return -1;
+
+	return 0;
 }
 
-long long disable_timer(void)
+int os_timer_set_interval(int cpu, unsigned long long nsecs)
 {
-	struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } });
-	long long remain, max = UM_NSEC_PER_SEC / UM_HZ;
+	struct itimerspec its;
+
+	its.it_value.tv_sec = nsecs / UM_NSEC_PER_SEC;
+	its.it_value.tv_nsec = nsecs % UM_NSEC_PER_SEC;
 
-	if (setitimer(ITIMER_VIRTUAL, &time, &time) < 0)
-		printk(UM_KERN_ERR "disable_timer - setitimer failed, "
-		       "errno = %d\n", errno);
+	its.it_interval.tv_sec = nsecs / UM_NSEC_PER_SEC;
+	its.it_interval.tv_nsec = nsecs % UM_NSEC_PER_SEC;
 
-	remain = timeval_to_ns(&time.it_value);
-	if (remain > max)
-		remain = max;
+	if (timer_settime(event_high_res_timer[cpu], 0, &its, NULL) == -1)
+		return -errno;
 
-	return remain;
+	return 0;
 }
 
-long long os_nsecs(void)
+int os_timer_one_shot(int cpu, unsigned long long nsecs)
 {
-	struct timeval tv;
+	struct itimerspec its = {
+		.it_value.tv_sec = nsecs / UM_NSEC_PER_SEC,
+		.it_value.tv_nsec = nsecs % UM_NSEC_PER_SEC,
 
-	gettimeofday(&tv, NULL);
-	return timeval_to_ns(&tv);
-}
+		.it_interval.tv_sec = 0,
+		.it_interval.tv_nsec = 0, // we cheat here
+	};
 
-#ifdef UML_CONFIG_NO_HZ_COMMON
-static int after_sleep_interval(struct timespec *ts)
-{
+	timer_settime(event_high_res_timer[cpu], 0, &its, NULL);
 	return 0;
 }
 
-static void deliver_alarm(void)
+/**
+ * os_timer_disable() - disable the posix (interval) timer
+ * @cpu: the CPU for which the timer is to be disabled
+ */
+void os_timer_disable(int cpu)
 {
-	alarm_handler(SIGVTALRM, NULL, NULL);
-}
+	struct itimerspec its;
 
-static unsigned long long sleep_time(unsigned long long nsecs)
-{
-	return nsecs;
+	memset(&its, 0, sizeof(struct itimerspec));
+	timer_settime(event_high_res_timer[cpu], 0, &its, NULL);
 }
 
-#else
-unsigned long long last_tick;
-unsigned long long skew;
-
-static void deliver_alarm(void)
+long long os_nsecs(void)
 {
-	unsigned long long this_tick = os_nsecs();
-	int one_tick = UM_NSEC_PER_SEC / UM_HZ;
-
-	/* Protection against the host's time going backwards */
-	if ((last_tick != 0) && (this_tick < last_tick))
-		this_tick = last_tick;
-
-	if (last_tick == 0)
-		last_tick = this_tick - one_tick;
-
-	skew += this_tick - last_tick;
-
-	while (skew >= one_tick) {
-		alarm_handler(SIGVTALRM, NULL, NULL);
-		skew -= one_tick;
-	}
+	struct timespec ts;
 
-	last_tick = this_tick;
+	clock_gettime(CLOCK_MONOTONIC,&ts);
+	return timespec_to_ns(&ts);
 }
 
-static unsigned long long sleep_time(unsigned long long nsecs)
-{
-	return nsecs > skew ? nsecs - skew : 0;
-}
+static __thread int wake_signals;
 
-static inline long long timespec_to_us(const struct timespec *ts)
+void os_idle_prepare(void)
 {
-	return ((long long) ts->tv_sec * UM_USEC_PER_SEC) +
-		ts->tv_nsec / UM_NSEC_PER_USEC;
-}
+	sigset_t set;
 
-static int after_sleep_interval(struct timespec *ts)
-{
-	int usec = UM_USEC_PER_SEC / UM_HZ;
-	long long start_usecs = timespec_to_us(ts);
-	struct timeval tv;
-	struct itimerval interval;
+	sigemptyset(&set);
+	sigaddset(&set, SIGALRM);
+	sigaddset(&set, IPI_SIGNAL);
 
 	/*
-	 * It seems that rounding can increase the value returned from
-	 * setitimer to larger than the one passed in.  Over time,
-	 * this will cause the remaining time to be greater than the
-	 * tick interval.  If this happens, then just reduce the first
-	 * tick to the interval value.
+	 * We need to use signalfd rather than sigsuspend in idle sleep
+	 * because the IPI signal is a real-time signal that carries data,
+	 * and unlike handling SIGALRM, we cannot simply flag it in
+	 * signals_pending.
 	 */
-	if (start_usecs > usec)
-		start_usecs = usec;
-
-	start_usecs -= skew / UM_NSEC_PER_USEC;
-	if (start_usecs < 0)
-		start_usecs = 0;
-
-	tv = ((struct timeval) { .tv_sec  = start_usecs / UM_USEC_PER_SEC,
-				 .tv_usec = start_usecs % UM_USEC_PER_SEC });
-	interval = ((struct itimerval) { { 0, usec }, tv });
-
-	if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1)
-		return -errno;
-
-	return 0;
+	wake_signals = signalfd(-1, &set, SFD_CLOEXEC);
+	if (wake_signals < 0)
+		panic("Failed to create signal FD, errno = %d", errno);
 }
-#endif
 
-void idle_sleep(unsigned long long nsecs)
+/**
+ * os_idle_sleep() - sleep until interrupted
+ */
+void os_idle_sleep(void)
 {
-	struct timespec ts;
+	sigset_t set;
 
 	/*
-	 * nsecs can come in as zero, in which case, this starts a
-	 * busy loop.  To prevent this, reset nsecs to the tick
-	 * interval if it is zero.
+	 * Block SIGALRM while performing the need_resched check.
+	 * Note that, because IRQs are disabled, the IPI signal is
+	 * already blocked.
 	 */
-	if (nsecs == 0)
-		nsecs = UM_NSEC_PER_SEC / UM_HZ;
+	sigemptyset(&set);
+	sigaddset(&set, SIGALRM);
+	sigprocmask(SIG_BLOCK, &set, NULL);
 
-	nsecs = sleep_time(nsecs);
-	ts = ((struct timespec) { .tv_sec	= nsecs / UM_NSEC_PER_SEC,
-				  .tv_nsec	= nsecs % UM_NSEC_PER_SEC });
+	/*
+	 * Because disabling IRQs does not block SIGALRM, it is also
+	 * necessary to check for any pending timer alarms.
+	 */
+	if (!uml_need_resched() && !timer_alarm_pending())
+		os_poll(1, &wake_signals);
 
-	if (nanosleep(&ts, &ts) == 0)
-		deliver_alarm();
-	after_sleep_interval(&ts);
+	/* Restore the signal mask. */
+	sigprocmask(SIG_UNBLOCK, &set, NULL);
 }
diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c
index 721d8afa329b..f784db83e026 100644
--- a/arch/um/os-Linux/tty.c
+++ b/arch/um/os-Linux/tty.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdlib.h>
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index c1dc89261f67..eb523ab1e218 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -1,6 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
 #include <stdio.h>
@@ -35,11 +35,12 @@ static int __init make_uml_dir(void)
 
 		err = -ENOENT;
 		if (home == NULL) {
-			printk(UM_KERN_ERR "make_uml_dir : no value in "
-			       "environment for $HOME\n");
+			printk(UM_KERN_ERR
+				"%s: no value in environment for $HOME\n",
+				__func__);
 			goto err;
 		}
-		strlcpy(dir, home, sizeof(dir));
+		strscpy(dir, home);
 		uml_dir++;
 	}
 	strlcat(dir, uml_dir, sizeof(dir));
@@ -50,13 +51,15 @@ static int __init make_uml_dir(void)
 	err = -ENOMEM;
 	uml_dir = malloc(strlen(dir) + 1);
 	if (uml_dir == NULL) {
-		printf("make_uml_dir : malloc failed, errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : malloc failed, errno = %d\n",
+			__func__, errno);
 		goto err;
 	}
 	strcpy(uml_dir, dir);
 
 	if ((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)) {
-	        printf("Failed to mkdir '%s': %s\n", uml_dir, strerror(errno));
+		printk(UM_KERN_ERR "Failed to mkdir '%s': %s\n",
+			uml_dir, strerror(errno));
 		err = -errno;
 		goto err_free;
 	}
@@ -94,7 +97,7 @@ static int remove_files_and_dir(char *dir)
 	while ((ent = readdir(directory)) != NULL) {
 		if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
 			continue;
-		len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1;
+		len = strlen(dir) + strlen("/") + strlen(ent->d_name) + 1;
 		if (len > sizeof(file)) {
 			ret = -E2BIG;
 			goto out;
@@ -132,18 +135,16 @@ out:
  */
 static inline int is_umdir_used(char *dir)
 {
-	char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")];
-	char pid[sizeof("nnnnn\0")], *end;
-	int dead, fd, p, n, err;
-
-	n = snprintf(file, sizeof(file), "%s/pid", dir);
-	if (n >= sizeof(file)) {
-		printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n");
-		err = -E2BIG;
-		goto out;
-	}
+	char pid[sizeof("nnnnnnnnn")], *end, *file;
+	int fd, p, n, err;
+	size_t filelen = strlen(dir) + sizeof("/pid") + 1;
+
+	file = malloc(filelen);
+	if (!file)
+		return -ENOMEM;
+
+	snprintf(file, filelen, "%s/pid", dir);
 
-	dead = 0;
 	fd = open(file, O_RDONLY);
 	if (fd < 0) {
 		fd = -errno;
@@ -182,6 +183,7 @@ static inline int is_umdir_used(char *dir)
 out_close:
 	close(fd);
 out:
+	free(file);
 	return 0;
 }
 
@@ -207,18 +209,22 @@ static int umdir_take_if_dead(char *dir)
 
 static void __init create_pid_file(void)
 {
-	char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")];
-	char pid[sizeof("nnnnn\0")];
+	char pid[sizeof("nnnnnnnnn")], *file;
 	int fd, n;
 
-	if (umid_file_name("pid", file, sizeof(file)))
+	n = strlen(uml_dir) + UMID_LEN + sizeof("/pid");
+	file = malloc(n);
+	if (!file)
 		return;
 
+	if (umid_file_name("pid", file, n))
+		goto out;
+
 	fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644);
 	if (fd < 0) {
 		printk(UM_KERN_ERR "Open of machine pid file \"%s\" failed: "
 		       "%s\n", file, strerror(errno));
-		return;
+		goto out;
 	}
 
 	snprintf(pid, sizeof(pid), "%d\n", getpid());
@@ -228,6 +234,8 @@ static void __init create_pid_file(void)
 		       errno);
 
 	close(fd);
+out:
+	free(file);
 }
 
 int __init set_umid(char *name)
@@ -235,7 +243,7 @@ int __init set_umid(char *name)
 	if (strlen(name) > UMID_LEN - 1)
 		return -E2BIG;
 
-	strlcpy(umid, name, sizeof(umid));
+	strscpy(umid, name);
 
 	return 0;
 }
@@ -254,7 +262,7 @@ static int __init make_umid(void)
 	make_uml_dir();
 
 	if (*umid == '\0') {
-		strlcpy(tmp, uml_dir, sizeof(tmp));
+		strscpy(tmp, uml_dir);
 		strlcat(tmp, "XXXXXX", sizeof(tmp));
 		fd = mkstemp(tmp);
 		if (fd < 0) {
@@ -350,8 +358,10 @@ char *get_umid(void)
 
 static int __init set_uml_dir(char *name, int *add)
 {
+	*add = 0;
+
 	if (*name == '\0') {
-		printf("uml_dir can't be an empty string\n");
+		os_warn("uml_dir can't be an empty string\n");
 		return 0;
 	}
 
@@ -362,7 +372,7 @@ static int __init set_uml_dir(char *name, int *add)
 
 	uml_dir = malloc(strlen(name) + 2);
 	if (uml_dir == NULL) {
-		printf("Failed to malloc uml_dir - error = %d\n", errno);
+		os_warn("Failed to malloc uml_dir - error = %d\n", errno);
 
 		/*
 		 * Return 0 here because do_initcalls doesn't look at
@@ -382,13 +392,19 @@ __uml_setup("uml_dir=", set_uml_dir,
 
 static void remove_umid_dir(void)
 {
-	char dir[strlen(uml_dir) + UMID_LEN + 1], err;
+	char *dir, err;
+
+	dir = malloc(strlen(uml_dir) + UMID_LEN + 1);
+	if (!dir)
+		return;
 
 	sprintf(dir, "%s%s", uml_dir, umid);
 	err = remove_files_and_dir(dir);
 	if (err)
-		printf("remove_umid_dir - remove_files_and_dir failed with "
-		       "err = %d\n", err);
+		os_warn("%s - remove_files_and_dir failed with err = %d\n",
+			__func__, err);
+
+	free(dir);
 }
 
 __uml_exitcall(remove_umid_dir);
diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
index db4a034aeee1..67f6112318b6 100644
--- a/arch/um/os-Linux/user_syms.c
+++ b/arch/um/os-Linux/user_syms.c
@@ -1,120 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#define __NO_FORTIFY
 #include <linux/types.h>
 #include <linux/module.h>
 
-/* Some of this are builtin function (some are not but could in the future),
- * so I *must* declare good prototypes for them and then EXPORT them.
- * The kernel code uses the macro defined by include/linux/string.h,
- * so I undef macros; the userspace code does not include that and I
- * add an EXPORT for the glibc one.
+/*
+ * This file exports some critical string functions and compiler
+ * built-in functions (where calls are emitted by the compiler
+ * itself that we cannot avoid even in kernel code) to modules.
+ *
+ * "_user.c" code that previously used exports here such as hostfs
+ * really should be considered part of the 'hypervisor' and define
+ * its own API boundary like hostfs does now; don't add exports to
+ * this file for such cases.
  */
 
-#undef strlen
-#undef strstr
-#undef memcpy
-#undef memset
-
-extern size_t strlen(const char *);
-extern void *memmove(void *, const void *, size_t);
-extern void *memset(void *, int, size_t);
-extern int printf(const char *, ...);
-
 /* If it's not defined, the export is included in lib/string.c.*/
 #ifdef __HAVE_ARCH_STRSTR
+#undef strstr
 EXPORT_SYMBOL(strstr);
 #endif
 
 #ifndef __x86_64__
+#undef memcpy
 extern void *memcpy(void *, const void *, size_t);
 EXPORT_SYMBOL(memcpy);
-#endif
-
+extern void *memmove(void *, const void *, size_t);
 EXPORT_SYMBOL(memmove);
+#undef memset
+extern void *memset(void *, int, size_t);
 EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(printf);
-
-/* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms.
- * However, the modules will use the CRC defined *here*, no matter if it is
- * good; so the versions of these symbols will always match
- */
-#define EXPORT_SYMBOL_PROTO(sym)       \
-	int sym(void);                  \
-	EXPORT_SYMBOL(sym);
-
-extern void readdir64(void) __attribute__((weak));
-EXPORT_SYMBOL(readdir64);
-extern void truncate64(void) __attribute__((weak));
-EXPORT_SYMBOL(truncate64);
-
-#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
-EXPORT_SYMBOL(vsyscall_ehdr);
-EXPORT_SYMBOL(vsyscall_end);
 #endif
 
-EXPORT_SYMBOL_PROTO(__errno_location);
-
-EXPORT_SYMBOL_PROTO(access);
-EXPORT_SYMBOL_PROTO(open);
-EXPORT_SYMBOL_PROTO(open64);
-EXPORT_SYMBOL_PROTO(close);
-EXPORT_SYMBOL_PROTO(read);
-EXPORT_SYMBOL_PROTO(write);
-EXPORT_SYMBOL_PROTO(dup2);
-EXPORT_SYMBOL_PROTO(__xstat);
-EXPORT_SYMBOL_PROTO(__lxstat);
-EXPORT_SYMBOL_PROTO(__lxstat64);
-EXPORT_SYMBOL_PROTO(__fxstat64);
-EXPORT_SYMBOL_PROTO(lseek);
-EXPORT_SYMBOL_PROTO(lseek64);
-EXPORT_SYMBOL_PROTO(chown);
-EXPORT_SYMBOL_PROTO(fchown);
-EXPORT_SYMBOL_PROTO(truncate);
-EXPORT_SYMBOL_PROTO(ftruncate64);
-EXPORT_SYMBOL_PROTO(utime);
-EXPORT_SYMBOL_PROTO(utimes);
-EXPORT_SYMBOL_PROTO(futimes);
-EXPORT_SYMBOL_PROTO(chmod);
-EXPORT_SYMBOL_PROTO(fchmod);
-EXPORT_SYMBOL_PROTO(rename);
-EXPORT_SYMBOL_PROTO(__xmknod);
-
-EXPORT_SYMBOL_PROTO(symlink);
-EXPORT_SYMBOL_PROTO(link);
-EXPORT_SYMBOL_PROTO(unlink);
-EXPORT_SYMBOL_PROTO(readlink);
-
-EXPORT_SYMBOL_PROTO(mkdir);
-EXPORT_SYMBOL_PROTO(rmdir);
-EXPORT_SYMBOL_PROTO(opendir);
-EXPORT_SYMBOL_PROTO(readdir);
-EXPORT_SYMBOL_PROTO(closedir);
-EXPORT_SYMBOL_PROTO(seekdir);
-EXPORT_SYMBOL_PROTO(telldir);
-
-EXPORT_SYMBOL_PROTO(ioctl);
-
-EXPORT_SYMBOL_PROTO(pread64);
-EXPORT_SYMBOL_PROTO(pwrite64);
-
-EXPORT_SYMBOL_PROTO(statfs);
-EXPORT_SYMBOL_PROTO(statfs64);
-
-EXPORT_SYMBOL_PROTO(getuid);
-
-EXPORT_SYMBOL_PROTO(fsync);
-EXPORT_SYMBOL_PROTO(fdatasync);
-
-EXPORT_SYMBOL_PROTO(lstat64);
-EXPORT_SYMBOL_PROTO(fstat64);
-EXPORT_SYMBOL_PROTO(mknod);
-
-/* Export symbols used by GCC for the stack protector. */
-extern void __stack_smash_handler(void *) __attribute__((weak));
-EXPORT_SYMBOL(__stack_smash_handler);
-
-extern long __guard __attribute__((weak));
-EXPORT_SYMBOL(__guard);
-
 #ifdef _FORTIFY_SOURCE
-extern int __sprintf_chk(char *str, int flag, size_t strlen, const char *format);
+extern int __sprintf_chk(char *str, int flag, size_t len, const char *format);
 EXPORT_SYMBOL(__sprintf_chk);
 #endif
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 492ef5e6e166..e3ad71a0d13c 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -1,8 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
  */
 
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -10,15 +11,16 @@
 #include <signal.h>
 #include <string.h>
 #include <termios.h>
-#include <wait.h>
+#include <sys/wait.h>
 #include <sys/mman.h>
 #include <sys/utsname.h>
+#include <sys/random.h>
+#include <init.h>
 #include <os.h>
 
 void stack_protections(unsigned long address)
 {
-	if (mprotect((void *) address, UM_THREAD_SIZE,
-		    PROT_READ | PROT_WRITE | PROT_EXEC) < 0)
+	if (mprotect((void *) address, UM_THREAD_SIZE, PROT_READ | PROT_WRITE) < 0)
 		panic("protecting stack failed, errno = %d", errno);
 }
 
@@ -49,8 +51,8 @@ void setup_machinename(char *machine_out)
 	struct utsname host;
 
 	uname(&host);
-#ifdef UML_CONFIG_UML_X86
-# ifndef UML_CONFIG_64BIT
+#if IS_ENABLED(CONFIG_UML_X86)
+# if !IS_ENABLED(CONFIG_64BIT)
 	if (!strcmp(host.machine, "x86_64")) {
 		strcpy(machine_out, "i686");
 		return;
@@ -94,6 +96,21 @@ static inline void __attribute__ ((noreturn)) uml_abort(void)
 			exit(127);
 }
 
+ssize_t os_getrandom(void *buf, size_t len, unsigned int flags)
+{
+	return getrandom(buf, len, flags);
+}
+
+/*
+ * UML helper threads must not handle SIGWINCH/INT/TERM
+ */
+void os_fix_helper_signals(void)
+{
+	signal(SIGWINCH, SIG_IGN);
+	signal(SIGINT, SIG_DFL);
+	signal(SIGTERM, SIG_DFL);
+}
+
 void os_dump_core(void)
 {
 	int pid;
@@ -142,3 +159,51 @@ void um_early_printk(const char *s, unsigned int n)
 {
 	printf("%.*s", n, s);
 }
+
+static int quiet_info;
+
+static int __init quiet_cmd_param(char *str, int *add)
+{
+	quiet_info = 1;
+	return 0;
+}
+
+__uml_setup("quiet", quiet_cmd_param,
+"quiet\n"
+"    Turns off information messages during boot.\n\n");
+
+/*
+ * The os_info/os_warn functions will be called by helper threads. These
+ * have a very limited stack size and using the libc formatting functions
+ * may overflow the stack.
+ * So pull in the kernel vscnprintf and use that instead with a fixed
+ * on-stack buffer.
+ */
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
+void os_info(const char *fmt, ...)
+{
+	char buf[256];
+	va_list list;
+	int len;
+
+	if (quiet_info)
+		return;
+
+	va_start(list, fmt);
+	len = vscnprintf(buf, sizeof(buf), fmt, list);
+	fwrite(buf, len, 1, stderr);
+	va_end(list);
+}
+
+void os_warn(const char *fmt, ...)
+{
+	char buf[256];
+	va_list list;
+	int len;
+
+	va_start(list, fmt);
+	len = vscnprintf(buf, sizeof(buf), fmt, list);
+	fwrite(buf, len, 1, stderr);
+	va_end(list);
+}
diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules
index 15889df9b466..a8b7d9dab0a6 100644
--- a/arch/um/scripts/Makefile.rules
+++ b/arch/um/scripts/Makefile.rules
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 # ===========================================================================
 # arch/um: Generic definitions
 # ===========================================================================
 
 USER_SINGLE_OBJS := \
-	$(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs))
-USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m)  $(USER_SINGLE_OBJS))
+	$(foreach f,$(patsubst %.o,%,$(obj-y)),$($(f)-objs))
+USER_OBJS += $(filter %_user.o,$(obj-y) $(USER_SINGLE_OBJS))
 USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
 
 $(USER_OBJS:.o=.%): \
diff --git a/arch/um/sys-ia64/Makefile b/arch/um/sys-ia64/Makefile
deleted file mode 100644
index d02f4c265232..000000000000
--- a/arch/um/sys-ia64/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-OBJ = built-in.o
-
-OBJS =
-
-all: $(OBJ)
-
-$(OBJ): $(OBJS)
-	rm -f $@
-	$(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@
-
-clean-files := $(OBJS) link.ld
diff --git a/arch/um/sys-ia64/sysdep/ptrace.h b/arch/um/sys-ia64/sysdep/ptrace.h
deleted file mode 100644
index 0f0f4e6fd334..000000000000
--- a/arch/um/sys-ia64/sysdep/ptrace.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_IA64_PTRACE_H
-#define __SYSDEP_IA64_PTRACE_H
-
-struct sys_pt_regs {
-  int foo;
-};
-
-#define EMPTY_REGS { 0 }
-
-#endif
-
diff --git a/arch/um/sys-ia64/sysdep/sigcontext.h b/arch/um/sys-ia64/sysdep/sigcontext.h
deleted file mode 100644
index 76b43161e779..000000000000
--- a/arch/um/sys-ia64/sysdep/sigcontext.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_IA64_SIGCONTEXT_H
-#define __SYSDEP_IA64_SIGCONTEXT_H
-
-#endif
-
diff --git a/arch/um/sys-ia64/sysdep/skas_ptrace.h b/arch/um/sys-ia64/sysdep/skas_ptrace.h
deleted file mode 100644
index 25a38e715702..000000000000
--- a/arch/um/sys-ia64/sysdep/skas_ptrace.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_IA64_SKAS_PTRACE_H
-#define __SYSDEP_IA64_SKAS_PTRACE_H
-
-struct ptrace_faultinfo {
-        int is_write;
-        unsigned long addr;
-};
-
-struct ptrace_ldt {
-        int func;
-        void *ptr;
-        unsigned long bytecount;
-};
-
-#define PTRACE_LDT 54
-
-#endif
diff --git a/arch/um/sys-ia64/sysdep/syscalls.h b/arch/um/sys-ia64/sysdep/syscalls.h
deleted file mode 100644
index 5f6700c41558..000000000000
--- a/arch/um/sys-ia64/sysdep/syscalls.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_IA64_SYSCALLS_H
-#define __SYSDEP_IA64_SYSCALLS_H
-
-#endif
-
diff --git a/arch/um/sys-ppc/Makefile b/arch/um/sys-ppc/Makefile
deleted file mode 100644
index 20d363bd7004..000000000000
--- a/arch/um/sys-ppc/Makefile
+++ /dev/null
@@ -1,65 +0,0 @@
-OBJ = built-in.o
-
-.S.o:
-	$(CC) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
-
-OBJS = ptrace.o sigcontext.o checksum.o miscthings.o misc.o \
-	ptrace_user.o sysrq.o
-
-asflags-y := -DCONFIG_PPC32 -I. -I$(srctree)/arch/ppc/kernel
-
-all: $(OBJ)
-
-$(OBJ): $(OBJS)
-	rm -f $@
-	$(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@
-
-ptrace_user.o: ptrace_user.c
-	$(CC) -D__KERNEL__ $(USER_CFLAGS) $(ccflags-y) -c -o $@ $<
-
-sigcontext.o: sigcontext.c
-	$(CC) $(USER_CFLAGS) $(ccflags-y) -c -o $@ $<
-
-checksum.S:
-	rm -f $@
-	ln -s $(srctree)/arch/ppc/lib/$@ $@
-
-mk_defs.c:
-	rm -f $@
-	ln -s $(srctree)/arch/ppc/kernel/$@ $@
-
-ppc_defs.head:
-	rm -f $@
-	ln -s $(srctree)/arch/ppc/kernel/$@ $@
-
-ppc_defs.h: mk_defs.c ppc_defs.head \
-		$(srctree)/include/asm-ppc/mmu.h \
-		$(srctree)/include/asm-ppc/processor.h \
-		$(srctree)/include/asm-ppc/pgtable.h \
-		$(srctree)/include/asm-ppc/ptrace.h
-#	$(CC) $(CFLAGS) -S mk_defs.c
-	cp ppc_defs.head ppc_defs.h
-# for bk, this way we can write to the file even if it's not checked out
-	echo '#define THREAD 608' >> ppc_defs.h
-	echo '#define PT_REGS 8' >> ppc_defs.h
-	echo '#define CLONE_VM 256' >> ppc_defs.h
-#	chmod u+w ppc_defs.h
-#	grep '^#define' mk_defs.s >> ppc_defs.h
-#	rm mk_defs.s
-
-# the asm link is horrible, and breaks the other targets.  This is also
-# not going to work with parallel makes.
-
-checksum.o: checksum.S
-	rm -f asm
-	ln -s $(srctree)/include/asm-ppc asm
-	$(CC) $(asflags-y) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
-	rm -f asm
-
-misc.o: misc.S ppc_defs.h
-	rm -f asm
-	ln -s $(srctree)/include/asm-ppc asm
-	$(CC) $(asflags-y) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
-	rm -f asm
-
-clean-files := $(OBJS) ppc_defs.h checksum.S mk_defs.c
diff --git a/arch/um/sys-ppc/asm/archparam.h b/arch/um/sys-ppc/asm/archparam.h
deleted file mode 100644
index 4269d8a37b4f..000000000000
--- a/arch/um/sys-ppc/asm/archparam.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __UM_ARCHPARAM_PPC_H
-#define __UM_ARCHPARAM_PPC_H
-
-/********* Bits for asm-um/string.h **********/
-
-#define __HAVE_ARCH_STRRCHR
-
-#endif
diff --git a/arch/um/sys-ppc/asm/elf.h b/arch/um/sys-ppc/asm/elf.h
deleted file mode 100644
index 8aacaf56508d..000000000000
--- a/arch/um/sys-ppc/asm/elf.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef __UM_ELF_PPC_H
-#define __UM_ELF_PPC_H
-
-
-extern long elf_aux_hwcap;
-#define ELF_HWCAP (elf_aux_hwcap)
-
-#define SET_PERSONALITY(ex) do ; while(0)
-
-#define ELF_EXEC_PAGESIZE 4096
-
-#define elf_check_arch(x) (1)
-
-#ifdef CONFIG_64BIT
-#define ELF_CLASS ELFCLASS64
-#else
-#define ELF_CLASS ELFCLASS32
-#endif
-
-#define R_386_NONE	0
-#define R_386_32	1
-#define R_386_PC32	2
-#define R_386_GOT32	3
-#define R_386_PLT32	4
-#define R_386_COPY	5
-#define R_386_GLOB_DAT	6
-#define R_386_JMP_SLOT	7
-#define R_386_RELATIVE	8
-#define R_386_GOTOFF	9
-#define R_386_GOTPC	10
-#define R_386_NUM	11
-
-#define ELF_PLATFORM (0)
-
-#define ELF_ET_DYN_BASE (0x08000000)
-
-/* the following stolen from asm-ppc/elf.h */
-#define ELF_NGREG	48	/* includes nip, msr, lr, etc. */
-#define ELF_NFPREG	33	/* includes fpscr */
-/* General registers */
-typedef unsigned long elf_greg_t;
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-/* Floating point registers */
-typedef double elf_fpreg_t;
-typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
-
-#define ELF_DATA        ELFDATA2MSB
-#define ELF_ARCH	EM_PPC
-
-#endif
diff --git a/arch/um/sys-ppc/asm/processor.h b/arch/um/sys-ppc/asm/processor.h
deleted file mode 100644
index 959323151229..000000000000
--- a/arch/um/sys-ppc/asm/processor.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __UM_PROCESSOR_PPC_H
-#define __UM_PROCESSOR_PPC_H
-
-#if defined(__ASSEMBLY__)
-
-#define CONFIG_PPC_MULTIPLATFORM
-#include "arch/processor.h"
-
-#else
-
-#include "asm/processor-generic.h"
-
-#endif
-
-#endif
diff --git a/arch/um/sys-ppc/misc.S b/arch/um/sys-ppc/misc.S
deleted file mode 100644
index 1364b7da578c..000000000000
--- a/arch/um/sys-ppc/misc.S
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * This file contains miscellaneous low-level functions.
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- *
- * A couple of functions stolen from arch/ppc/kernel/misc.S for UML
- * by Chris Emerson.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <asm/processor.h>
-#include "ppc_asm.h"
-
-#if defined(CONFIG_4xx) || defined(CONFIG_8xx)
-#define CACHE_LINE_SIZE		16
-#define LG_CACHE_LINE_SIZE	4
-#define MAX_COPY_PREFETCH	1
-#else
-#define CACHE_LINE_SIZE		32
-#define LG_CACHE_LINE_SIZE	5
-#define MAX_COPY_PREFETCH	4
-#endif /* CONFIG_4xx || CONFIG_8xx */
-
-	.text
-
-/*
- * Clear a page using the dcbz instruction, which doesn't cause any
- * memory traffic (except to write out any cache lines which get
- * displaced).  This only works on cacheable memory.
- */
-_GLOBAL(clear_page)
-	li	r0,4096/CACHE_LINE_SIZE
-	mtctr	r0
-#ifdef CONFIG_8xx
-	li	r4, 0
-1:	stw	r4, 0(r3)
-	stw	r4, 4(r3)
-	stw	r4, 8(r3)
-	stw	r4, 12(r3)
-#else
-1:	dcbz	0,r3
-#endif
-	addi	r3,r3,CACHE_LINE_SIZE
-	bdnz	1b
-	blr
-
-/*
- * Copy a whole page.  We use the dcbz instruction on the destination
- * to reduce memory traffic (it eliminates the unnecessary reads of
- * the destination into cache).  This requires that the destination
- * is cacheable.
- */
-#define COPY_16_BYTES		\
-	lwz	r6,4(r4);	\
-	lwz	r7,8(r4);	\
-	lwz	r8,12(r4);	\
-	lwzu	r9,16(r4);	\
-	stw	r6,4(r3);	\
-	stw	r7,8(r3);	\
-	stw	r8,12(r3);	\
-	stwu	r9,16(r3)
-
-_GLOBAL(copy_page)
-	addi	r3,r3,-4
-	addi	r4,r4,-4
-	li	r5,4
-
-#ifndef CONFIG_8xx
-#if MAX_COPY_PREFETCH > 1
-	li	r0,MAX_COPY_PREFETCH
-	li	r11,4
-	mtctr	r0
-11:	dcbt	r11,r4
-	addi	r11,r11,CACHE_LINE_SIZE
-	bdnz	11b
-#else /* MAX_COPY_PREFETCH == 1 */
-	dcbt	r5,r4
-	li	r11,CACHE_LINE_SIZE+4
-#endif /* MAX_COPY_PREFETCH */
-#endif /* CONFIG_8xx */
-
-	li	r0,4096/CACHE_LINE_SIZE
-	mtctr	r0
-1:
-#ifndef CONFIG_8xx
-	dcbt	r11,r4
-	dcbz	r5,r3
-#endif
-	COPY_16_BYTES
-#if CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES
-#if CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES
-	COPY_16_BYTES
-#if CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-#endif
-#endif
-#endif
-	bdnz	1b
-	blr
diff --git a/arch/um/sys-ppc/miscthings.c b/arch/um/sys-ppc/miscthings.c
deleted file mode 100644
index 25908d26ce07..000000000000
--- a/arch/um/sys-ppc/miscthings.c
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <linux/threads.h>
-#include <linux/stddef.h>  // for NULL
-#include <linux/elf.h>  // for AT_NULL
-
-/* The following function nicked from arch/ppc/kernel/process.c and
- * adapted slightly */
-/*
- * XXX ld.so expects the auxiliary table to start on
- * a 16-byte boundary, so we have to find it and
- * move it up. :-(
- */
-void shove_aux_table(unsigned long sp)
-{
-	int argc;
-	char *p;
-	unsigned long e;
-	unsigned long aux_start, offset;
-
-	argc = *(int *)sp;
-	sp += sizeof(int) + (argc + 1) * sizeof(char *);
-	/* skip over the environment pointers */
-	do {
-		p = *(char **)sp;
-		sp += sizeof(char *);
-	} while (p != NULL);
-	aux_start = sp;
-	/* skip to the end of the auxiliary table */
-	do {
-		e = *(unsigned long *)sp;
-		sp += 2 * sizeof(unsigned long);
-	} while (e != AT_NULL);
-	offset = ((aux_start + 15) & ~15) - aux_start;
-	if (offset != 0) {
-		do {
-			sp -= sizeof(unsigned long);
-			e = *(unsigned long *)sp;
-			*(unsigned long *)(sp + offset) = e;
-		} while (sp > aux_start);
-	}
-}
-/* END stuff taken from arch/ppc/kernel/process.c */
-
diff --git a/arch/um/sys-ppc/ptrace.c b/arch/um/sys-ppc/ptrace.c
deleted file mode 100644
index 8245df41b201..000000000000
--- a/arch/um/sys-ppc/ptrace.c
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <linux/sched.h>
-#include "asm/ptrace.h"
-
-int putreg(struct task_struct *child, unsigned long regno, 
-		  unsigned long value)
-{
-	child->thread.process_regs.regs[regno >> 2] = value;
-	return 0;
-}
-
-int poke_user(struct task_struct *child, long addr, long data)
-{
-	if ((addr & 3) || addr < 0)
-		return -EIO;
-
-	if (addr < MAX_REG_OFFSET)
-		return putreg(child, addr, data);
-
-	else if((addr >= offsetof(struct user, u_debugreg[0])) &&
-		(addr <= offsetof(struct user, u_debugreg[7]))){
-		  addr -= offsetof(struct user, u_debugreg[0]);
-		  addr = addr >> 2;
-		  if((addr == 4) || (addr == 5)) return -EIO;
-		  child->thread.arch.debugregs[addr] = data;
-		  return 0;
-	}
-	return -EIO;
-}
-
-unsigned long getreg(struct task_struct *child, unsigned long regno)
-{
-	unsigned long retval = ~0UL;
-
-	retval &= child->thread.process_regs.regs[regno >> 2];
-	return retval;
-}
-
-int peek_user(struct task_struct *child, long addr, long data)
-{
-	/* read the word at location addr in the USER area. */
-	unsigned long tmp;
-
-	if ((addr & 3) || addr < 0)
-		return -EIO;
-
-	tmp = 0;  /* Default return condition */
-	if(addr < MAX_REG_OFFSET){
-		tmp = getreg(child, addr);
-	}
-	else if((addr >= offsetof(struct user, u_debugreg[0])) &&
-		(addr <= offsetof(struct user, u_debugreg[7]))){
-		addr -= offsetof(struct user, u_debugreg[0]);
-		addr = addr >> 2;
-		tmp = child->thread.arch.debugregs[addr];
-	}
-	return put_user(tmp, (unsigned long *) data);
-}
-
diff --git a/arch/um/sys-ppc/ptrace_user.c b/arch/um/sys-ppc/ptrace_user.c
deleted file mode 100644
index 4601b9296aa7..000000000000
--- a/arch/um/sys-ppc/ptrace_user.c
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <errno.h>
-#include <asm/ptrace.h>
-#include <sysdep/ptrace.h>
-
-int ptrace_getregs(long pid, unsigned long *regs_out)
-{
-    int i;
-    for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) {
-	errno = 0;
-	regs_out->regs[i] = ptrace(PTRACE_PEEKUSR, pid, i*4, 0);
-	if (errno) {
-	    return -errno;
-	}
-    }
-    return 0;
-}
-
-int ptrace_setregs(long pid, unsigned long *regs_in)
-{
-    int i;
-    for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) {
-	if (i != 34 /* FIXME: PT_ORIG_R3 */ && i <= PT_MQ) {
-	    if (ptrace(PTRACE_POKEUSR, pid, i*4, regs_in->regs[i]) < 0) {
-		return -errno;
-	    }
-	}
-    }
-    return 0;
-}
diff --git a/arch/um/sys-ppc/shared/sysdep/ptrace.h b/arch/um/sys-ppc/shared/sysdep/ptrace.h
deleted file mode 100644
index efe0c1a3ea9c..000000000000
--- a/arch/um/sys-ppc/shared/sysdep/ptrace.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* 
- * Licensed under the GPL
- */
-
-#ifndef __SYS_PTRACE_PPC_H
-#define __SYS_PTRACE_PPC_H
-
-#include <linux/types.h>
-
-/* the following taken from <asm-ppc/ptrace.h> */
-
-#ifdef CONFIG_PPC64
-#define PPC_REG unsigned long /*long*/
-#else
-#define PPC_REG unsigned long
-#endif
-struct sys_pt_regs_s {
-	PPC_REG gpr[32];
-	PPC_REG nip;
-	PPC_REG msr;
-	PPC_REG orig_gpr3;	/* Used for restarting system calls */
-	PPC_REG ctr;
-	PPC_REG link;
-	PPC_REG xer;
-	PPC_REG ccr;
-	PPC_REG mq;		/* 601 only (not used at present) */
-				/* Used on APUS to hold IPL value. */
-	PPC_REG trap;		/* Reason for being here */
-	PPC_REG dar;		/* Fault registers */
-	PPC_REG dsisr;
-	PPC_REG result; 	/* Result of a system call */
-};
-
-#define NUM_REGS (sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG))
-
-struct sys_pt_regs {
-    PPC_REG regs[sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)];
-};
-
-#define UM_MAX_REG (PT_FPR0)
-#define UM_MAX_REG_OFFSET (UM_MAX_REG * sizeof(PPC_REG))
-
-#define EMPTY_REGS { { [ 0 ... NUM_REGS - 1] = 0 } }
-
-#define UM_REG(r, n) ((r)->regs[n])
-
-#define UM_SYSCALL_RET(r) UM_REG(r, PT_R3)
-#define UM_SP(r) UM_REG(r, PT_R1)
-#define UM_IP(r) UM_REG(r, PT_NIP)
-#define UM_ELF_ZERO(r) UM_REG(r, PT_FPSCR)
-#define UM_SYSCALL_NR(r) UM_REG(r, PT_R0)
-#define UM_SYSCALL_ARG1(r) UM_REG(r, PT_ORIG_R3)
-#define UM_SYSCALL_ARG2(r) UM_REG(r, PT_R4)
-#define UM_SYSCALL_ARG3(r) UM_REG(r, PT_R5)
-#define UM_SYSCALL_ARG4(r) UM_REG(r, PT_R6)
-#define UM_SYSCALL_ARG5(r) UM_REG(r, PT_R7)
-#define UM_SYSCALL_ARG6(r) UM_REG(r, PT_R8)
-
-#define UM_SYSCALL_NR_OFFSET (PT_R0 * sizeof(PPC_REG))
-#define UM_SYSCALL_RET_OFFSET (PT_R3 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG1_OFFSET (PT_R3 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG2_OFFSET (PT_R4 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG3_OFFSET (PT_R5 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG4_OFFSET (PT_R6 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG5_OFFSET (PT_R7 * sizeof(PPC_REG))
-#define UM_SYSCALL_ARG6_OFFSET (PT_R8 * sizeof(PPC_REG))
-#define UM_SP_OFFSET (PT_R1 * sizeof(PPC_REG))
-#define UM_IP_OFFSET (PT_NIP * sizeof(PPC_REG))
-#define UM_ELF_ZERO_OFFSET (PT_R3 * sizeof(PPC_REG))
-
-#define UM_SET_SYSCALL_RETURN(_regs, result)	        \
-do {                                                    \
-        if (result < 0) {				\
-		(_regs)->regs[PT_CCR] |= 0x10000000;	\
-		UM_SYSCALL_RET((_regs)) = -result;	\
-        } else {					\
-		UM_SYSCALL_RET((_regs)) = result;	\
-        }                                               \
-} while(0)
-
-extern void shove_aux_table(unsigned long sp);
-#define UM_FIX_EXEC_STACK(sp) shove_aux_table(sp);
-
-/* These aren't actually defined.  The undefs are just to make sure
- * everyone's clear on the concept.
- */
-#undef UML_HAVE_GETREGS
-#undef UML_HAVE_GETFPREGS
-#undef UML_HAVE_SETREGS
-#undef UML_HAVE_SETFPREGS
-
-#endif
-
diff --git a/arch/um/sys-ppc/shared/sysdep/sigcontext.h b/arch/um/sys-ppc/shared/sysdep/sigcontext.h
deleted file mode 100644
index b7286f0a1e00..000000000000
--- a/arch/um/sys-ppc/shared/sysdep/sigcontext.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYS_SIGCONTEXT_PPC_H
-#define __SYS_SIGCONTEXT_PPC_H
-
-#define DSISR_WRITE 0x02000000
-
-#define SC_FAULT_ADDR(sc) ({ \
-		struct sigcontext *_sc = (sc); \
-		long retval = -1; \
-		switch (_sc->regs->trap) { \
-		case 0x300: \
-			/* data exception */ \
-			retval = _sc->regs->dar; \
-			break; \
-		case 0x400: \
-			/* instruction exception */ \
-			retval = _sc->regs->nip; \
-			break; \
-		default: \
-			panic("SC_FAULT_ADDR: unhandled trap type\n"); \
-		} \
-		retval; \
-	})
-
-#define SC_FAULT_WRITE(sc) ({ \
-		struct sigcontext *_sc = (sc); \
-		long retval = -1; \
-		switch (_sc->regs->trap) { \
-		case 0x300: \
-			/* data exception */ \
-			retval = !!(_sc->regs->dsisr & DSISR_WRITE); \
-			break; \
-		case 0x400: \
-			/* instruction exception: not a write */ \
-			retval = 0; \
-			break; \
-		default: \
-			panic("SC_FAULT_ADDR: unhandled trap type\n"); \
-		} \
-		retval; \
-	})
-
-#define SC_IP(sc) ((sc)->regs->nip)
-#define SC_SP(sc) ((sc)->regs->gpr[1])
-#define SEGV_IS_FIXABLE(sc) (1)
-
-#endif
-
diff --git a/arch/um/sys-ppc/shared/sysdep/skas_ptrace.h b/arch/um/sys-ppc/shared/sysdep/skas_ptrace.h
deleted file mode 100644
index d9fbbac10de0..000000000000
--- a/arch/um/sys-ppc/shared/sysdep/skas_ptrace.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSDEP_PPC_SKAS_PTRACE_H
-#define __SYSDEP_PPC_SKAS_PTRACE_H
-
-struct ptrace_faultinfo {
-        int is_write;
-        unsigned long addr;
-};
-
-struct ptrace_ldt {
-        int func;
-        void *ptr;
-        unsigned long bytecount;
-};
-
-#define PTRACE_LDT 54
-
-#endif
diff --git a/arch/um/sys-ppc/shared/sysdep/syscalls.h b/arch/um/sys-ppc/shared/sysdep/syscalls.h
deleted file mode 100644
index 1ff81552251c..000000000000
--- a/arch/um/sys-ppc/shared/sysdep/syscalls.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-typedef long syscall_handler_t(unsigned long arg1, unsigned long arg2,
-			       unsigned long arg3, unsigned long arg4,
-			       unsigned long arg5, unsigned long arg6);
-
-#define EXECUTE_SYSCALL(syscall, regs) \
-        (*sys_call_table[syscall])(UM_SYSCALL_ARG1(&regs), \
-			           UM_SYSCALL_ARG2(&regs), \
-				   UM_SYSCALL_ARG3(&regs), \
-				   UM_SYSCALL_ARG4(&regs), \
-				   UM_SYSCALL_ARG5(&regs), \
-				   UM_SYSCALL_ARG6(&regs))
-
-extern syscall_handler_t sys_mincore;
-extern syscall_handler_t sys_madvise;
-
-/* old_mmap needs the correct prototype since syscall_kern.c includes
- * this file.
- */
-int old_mmap(unsigned long addr, unsigned long len,
-	     unsigned long prot, unsigned long flags,
-	     unsigned long fd, unsigned long offset);
-
-#define ARCH_SYSCALLS \
-	[ __NR_modify_ldt ] = sys_ni_syscall, \
-	[ __NR_pciconfig_read ] = sys_ni_syscall, \
-	[ __NR_pciconfig_write ] = sys_ni_syscall, \
-	[ __NR_pciconfig_iobase ] = sys_ni_syscall, \
-	[ __NR_pivot_root ] = sys_ni_syscall, \
-	[ __NR_multiplexer ] = sys_ni_syscall, \
-	[ __NR_mmap ] = old_mmap, \
-	[ __NR_madvise ] = sys_madvise, \
-	[ __NR_mincore ] = sys_mincore, \
-	[ __NR_iopl ] = (syscall_handler_t *) sys_ni_syscall, \
-	[ __NR_utimes ] = (syscall_handler_t *) sys_utimes, \
-	[ __NR_fadvise64 ] = (syscall_handler_t *) sys_fadvise64,
-
-#define LAST_ARCH_SYSCALL __NR_fadvise64
-
diff --git a/arch/um/sys-ppc/sigcontext.c b/arch/um/sys-ppc/sigcontext.c
deleted file mode 100644
index aac6c83fe44e..000000000000
--- a/arch/um/sys-ppc/sigcontext.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "asm/ptrace.h"
-#include "asm/sigcontext.h"
-#include <sysdep/ptrace.h>
-
diff --git a/arch/um/sys-ppc/sysrq.c b/arch/um/sys-ppc/sysrq.c
deleted file mode 100644
index 1ff1ad7f27da..000000000000
--- a/arch/um/sys-ppc/sysrq.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* 
- * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
- * Licensed under the GPL
- */
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include "asm/ptrace.h"
-#include "sysrq.h"
-
-void show_regs(struct pt_regs_subarch *regs)
-{
-	printk("\n");
-	show_regs_print_info(KERN_DEFAULT);
-
-	printk("show_regs(): insert regs here.\n");
-#if 0
-        printk("\n");
-        printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs, regs->eip,
-	       smp_processor_id());
-        if (regs->xcs & 3)
-                printk(" ESP: %04x:%08lx",0xffff & regs->xss, regs->esp);
-        printk(" EFLAGS: %08lx\n", regs->eflags);
-        printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-                regs->eax, regs->ebx, regs->ecx, regs->edx);
-        printk("ESI: %08lx EDI: %08lx EBP: %08lx",
-                regs->esi, regs->edi, regs->ebp);
-        printk(" DS: %04x ES: %04x\n",
-                0xffff & regs->xds, 0xffff & regs->xes);
-#endif
-
-        show_trace(current, &regs->gpr[1]);
-}