diff options
Diffstat (limited to 'arch/um/drivers')
76 files changed, 6074 insertions, 4744 deletions
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 2b1aaf7755aa..6a0354ca032f 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -11,58 +11,65 @@ config STDERR_CONSOLE config SSL bool "Virtual serial line" help - The User-Mode Linux environment allows you to create virtual serial - lines on the UML that are usually made to show up on the host as - ttys or ptys. + The User-Mode Linux environment allows you to create virtual serial + lines on the UML that are usually made to show up on the host as + ttys or ptys. - See <http://user-mode-linux.sourceforge.net/old/input.html> for more - information and command line examples of how to use this facility. + See <http://user-mode-linux.sourceforge.net/old/input.html> for more + information and command line examples of how to use this facility. - Unless you have a specific reason for disabling this, say Y. + Unless you have a specific reason for disabling this, say Y. config NULL_CHAN bool "null channel support" help - This option enables support for attaching UML consoles and serial - lines to a device similar to /dev/null. Data written to it disappears - and there is never any data to be read. + This option enables support for attaching UML consoles and serial + lines to a device similar to /dev/null. Data written to it disappears + and there is never any data to be read. config PORT_CHAN bool "port channel support" help - This option enables support for attaching UML consoles and serial - lines to host portals. They may be accessed with 'telnet <host> - <port number>'. Any number of consoles and serial lines may be - attached to a single portal, although what UML device you get when - you telnet to that portal will be unpredictable. - It is safe to say 'Y' here. + This option enables support for attaching UML consoles and serial + lines to host portals. They may be accessed with 'telnet <host> + <port number>'. Any number of consoles and serial lines may be + attached to a single portal, although what UML device you get when + you telnet to that portal will be unpredictable. + It is safe to say 'Y' here. config PTY_CHAN bool "pty channel support" help - This option enables support for attaching UML consoles and serial - lines to host pseudo-terminals. Access to both traditional - pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled - with this option. The assignment of UML devices to host devices - will be announced in the kernel message log. - It is safe to say 'Y' here. + This option enables support for attaching UML consoles and serial + lines to host pseudo-terminals. Access to both traditional + pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled + with this option. The assignment of UML devices to host devices + will be announced in the kernel message log. + It is safe to say 'Y' here. config TTY_CHAN bool "tty channel support" help - This option enables support for attaching UML consoles and serial - lines to host terminals. Access to both virtual consoles - (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and - /dev/pts/*) are controlled by this option. - It is safe to say 'Y' here. + This option enables support for attaching UML consoles and serial + lines to host terminals. Access to both virtual consoles + (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and + /dev/pts/*) are controlled by this option. + It is safe to say 'Y' here. config XTERM_CHAN bool "xterm channel support" help - This option enables support for attaching UML consoles and serial - lines to xterms. Each UML device so assigned will be brought up in - its own xterm. - It is safe to say 'Y' here. + This option enables support for attaching UML consoles and serial + lines to xterms. Each UML device so assigned will be brought up in + its own xterm. + It is safe to say 'Y' here. + +config XTERM_CHAN_DEFAULT_EMULATOR + string "xterm channel default terminal emulator" + depends on XTERM_CHAN + default "xterm" + help + This option allows changing the default terminal emulator. config NOCONFIG_CHAN bool @@ -72,266 +79,112 @@ config CON_ZERO_CHAN string "Default main console channel initialization" default "fd:0,fd:1" help - This is the string describing the channel to which the main console - will be attached by default. This value can be overridden from the - command line. The default value is "fd:0,fd:1", which attaches the - main console to stdin and stdout. - It is safe to leave this unchanged. + This is the string describing the channel to which the main console + will be attached by default. This value can be overridden from the + command line. The default value is "fd:0,fd:1", which attaches the + main console to stdin and stdout. + It is safe to leave this unchanged. config CON_CHAN string "Default console channel initialization" default "xterm" help - This is the string describing the channel to which all consoles - except the main console will be attached by default. This value can - be overridden from the command line. The default value is "xterm", - which brings them up in xterms. - It is safe to leave this unchanged, although you may wish to change - this if you expect the UML that you build to be run in environments - which don't have X or xterm available. + This is the string describing the channel to which all consoles + except the main console will be attached by default. This value can + be overridden from the command line. The default value is "xterm", + which brings them up in xterms. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have X or xterm available. config SSL_CHAN string "Default serial line channel initialization" default "pty" help - This is the string describing the channel to which the serial lines - will be attached by default. This value can be overridden from the - command line. The default value is "pty", which attaches them to - traditional pseudo-terminals. - It is safe to leave this unchanged, although you may wish to change - this if you expect the UML that you build to be run in environments - which don't have a set of /dev/pty* devices. + This is the string describing the channel to which the serial lines + will be attached by default. This value can be overridden from the + command line. The default value is "pty", which attaches them to + traditional pseudo-terminals. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have a set of /dev/pty* devices. config UML_SOUND tristate "Sound support" + depends on SOUND + select SOUND_OSS_CORE help - This option enables UML sound support. If enabled, it will pull in - soundcore and the UML hostaudio relay, which acts as a intermediary - between the host's dsp and mixer devices and the UML sound system. - It is safe to say 'Y' here. - -config SOUND - tristate - default UML_SOUND - -config SOUND_OSS_CORE - bool - default UML_SOUND - -config HOSTAUDIO - tristate - default UML_SOUND + This option enables UML sound support. If enabled, it will pull in + the UML hostaudio relay, which acts as a intermediary + between the host's dsp and mixer devices and the UML sound system. + It is safe to say 'Y' here. endmenu menu "UML Network Devices" depends on NET -# UML virtual driver -config UML_NET - bool "Virtual network device" - help - While the User-Mode port cannot directly talk to any physical - hardware devices, this choice and the following transport options - provide one or more virtual network devices through which the UML - kernels can talk to each other, the host, and with the host's help, - machines on the outside world. - - For more information, including explanations of the networking and - sample configurations, see - <http://user-mode-linux.sourceforge.net/old/networking.html>. - - If you'd like to be able to enable networking in the User-Mode - linux environment, say Y; otherwise say N. Note that you must - enable at least one of the following transport options to actually - make use of UML networking. - -config UML_NET_ETHERTAP - bool "Ethertap transport" - depends on UML_NET - help - The Ethertap User-Mode Linux network transport allows a single - running UML to exchange packets with its host over one of the - host's Ethertap devices, such as /dev/tap0. Additional running - UMLs can use additional Ethertap devices, one per running UML. - While the UML believes it's on a (multi-device, broadcast) virtual - Ethernet network, it's in fact communicating over a point-to-point - link with the host. - - To use this, your host kernel must have support for Ethertap - devices. Also, if your host kernel is 2.4.x, it must have - CONFIG_NETLINK_DEV configured as Y or M. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable Ethertap - networking. - - If you'd like to set up an IP network with the host and/or the - outside world, say Y to this, the Daemon Transport and/or the - Slip Transport. You'll need at least one of them, but may choose - more than one without conflict. If you don't need UML networking, - say N. - -config UML_NET_TUNTAP - bool "TUN/TAP transport" - depends on UML_NET - help - The UML TUN/TAP network transport allows a UML instance to exchange - packets with the host over a TUN/TAP device. This option will only - work with a 2.4 host, unless you've applied the TUN/TAP patch to - your 2.2 host kernel. - - To use this transport, your host kernel must have support for TUN/TAP - devices, either built-in or as a module. - -config UML_NET_SLIP - bool "SLIP transport" - depends on UML_NET - help - The slip User-Mode Linux network transport allows a running UML to - network with its host over a point-to-point link. Unlike Ethertap, - which can carry any Ethernet frame (and hence even non-IP packets), - the slip transport can only carry IP packets. - - To use this, your host must support slip devices. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html>. - has examples of the UML command line to use to enable slip - networking, and details of a few quirks with it. - - The Ethertap Transport is preferred over slip because of its - limitations. If you prefer slip, however, say Y here. Otherwise - choose the Multicast transport (to network multiple UMLs on - multiple hosts), Ethertap (to network with the host and the - outside world), and/or the Daemon transport (to network multiple - UMLs on a single host). You may choose more than one without - conflict. If you don't need UML networking, say N. - -config UML_NET_DAEMON - bool "Daemon transport" - depends on UML_NET - help - This User-Mode Linux network transport allows one or more running - UMLs on a single host to communicate with each other, but not to - the host. - - To use this form of networking, you'll need to run the UML - networking daemon on the host. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable Daemon - networking. - - If you'd like to set up a network with other UMLs on a single host, - say Y. If you need a network between UMLs on multiple physical - hosts, choose the Multicast Transport. To set up a network with - the host and/or other IP machines, say Y to the Ethertap or Slip - transports. You'll need at least one of them, but may choose - more than one without conflict. If you don't need UML networking, - say N. - config UML_NET_VECTOR bool "Vector I/O high performance network devices" - depends on UML_NET - help - This User-Mode Linux network driver uses multi-message send - and receive functions. The host running the UML guest must have - a linux kernel version above 3.0 and a libc version > 2.13. - This driver provides tap, raw, gre and l2tpv3 network transports - with up to 4 times higher network throughput than the UML network - drivers. - -config UML_NET_VDE - bool "VDE transport" - depends on UML_NET - help - This User-Mode Linux network transport allows one or more running - UMLs on a single host to communicate with each other and also - with the rest of the world using Virtual Distributed Ethernet, - an improved fork of uml_switch. - - You must have libvdeplug installed in order to build the vde - transport into UML. - - To use this form of networking, you will need to run vde_switch - on the host. - - For more information, see <http://wiki.virtualsquare.org/> - That site has a good overview of what VDE is and also examples - of the UML command line to use to enable VDE networking. - - If you need UML networking with VDE, - say Y. - -config UML_NET_MCAST - bool "Multicast transport" - depends on UML_NET + select MAY_HAVE_RUNTIME_DEPS help - This Multicast User-Mode Linux network transport allows multiple - UMLs (even ones running on different host machines!) to talk to - each other over a virtual ethernet network. However, it requires - at least one UML with one of the other transports to act as a - bridge if any of them need to be able to talk to their hosts or any - other IP machines. + This User-Mode Linux network driver uses multi-message send + and receive functions. The host running the UML guest must have + a linux kernel version above 3.0 and a libc version > 2.13. + This driver provides tap, raw, gre and l2tpv3 network transports. - To use this, your host kernel(s) must support IP Multicasting. + For more information, including explanations of the networking + and sample configurations, see + <file:Documentation/virt/uml/user_mode_linux_howto_v2.rst>. - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable Multicast - networking, and notes about the security of this approach. - - If you need UMLs on multiple physical hosts to communicate as if - they shared an Ethernet network, say Y. If you need to communicate - with other IP machines, make sure you select one of the other - transports (possibly in addition to Multicast; they're not - exclusive). If you don't need to network UMLs say N to each of - the transports. +endmenu -config UML_NET_PCAP - bool "pcap transport" - depends on UML_NET +config VIRTIO_UML + bool "UML driver for virtio devices" + select VIRTIO help - The pcap transport makes a pcap packet stream on the host look - like an ethernet device inside UML. This is useful for making - UML act as a network monitor for the host. You must have libcap - installed in order to build the pcap transport into UML. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable this option. + This driver provides support for virtio based paravirtual device + drivers over vhost-user sockets. - If you intend to use UML as a network monitor for the host, say - Y here. Otherwise, say N. - -config UML_NET_SLIRP - bool "SLiRP transport" - depends on UML_NET +config UML_RTC + bool "UML RTC driver" + depends on RTC_CLASS + # there's no use in this if PM_SLEEP isn't enabled ... + depends on PM_SLEEP help - The SLiRP User-Mode Linux network transport allows a running UML - to network by invoking a program that can handle SLIP encapsulated - packets. This is commonly (but not limited to) the application - known as SLiRP, a program that can re-socket IP packets back onto - the host on which it is run. Only IP packets are supported, - unlike other network transports that can handle all Ethernet - frames. In general, slirp allows the UML the same IP connectivity - to the outside world that the host user is permitted, and unlike - other transports, SLiRP works without the need of root level - privleges, setuid binaries, or SLIP devices on the host. This - also means not every type of connection is possible, but most - situations can be accommodated with carefully crafted slirp - commands that can be passed along as part of the network device's - setup string. The effect of this transport on the UML is similar - that of a host behind a firewall that masquerades all network - connections passing through it (but is less secure). - - To use this you should first have slirp compiled somewhere - accessible on the host, and have read its documentation. If you - don't need UML networking, say N. - - Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" + When PM_SLEEP is configured, it may be desirable to wake up using + rtcwake, especially in time-travel mode. This driver enables that + by providing a fake RTC clock that causes a wakeup at the right + time. -endmenu +config UML_PCI + bool + select FORCE_PCI + select IRQ_MSI_LIB + select UML_IOMEM_EMULATION + select UML_DMA_EMULATION + select PCI_MSI + select PCI_LOCKLESS_CONFIG + +config UML_PCI_OVER_VIRTIO + bool "Enable PCI over VIRTIO device simulation" + # in theory, just VIRTIO is enough, but that causes recursion + depends on VIRTIO_UML + select UML_PCI + +config UML_PCI_OVER_VIRTIO_DEVICE_ID + int "set the virtio device ID for PCI emulation" + default -1 + depends on UML_PCI_OVER_VIRTIO + help + There's no official device ID assigned (yet), set the one you + wish to use for experimentation here. The default of -1 is + not valid and will cause the driver to fail at probe. + +config UML_PCI_OVER_VFIO + bool "Enable VFIO-based PCI passthrough" + select UML_PCI + help + This driver provides support for VFIO-based PCI passthrough. + Currently, only MSI-X capable devices are supported, and it + is assumed that drivers will use MSI-X. diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 693319839f69..36dc57840084 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -1,34 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 # # Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com) -# Licensed under the GPL # # pcap is broken in 2.5 because kbuild doesn't allow pcap.a to be linked # in to pcap.o -slip-objs := slip_kern.o slip_user.o -slirp-objs := slirp_kern.o slirp_user.o -daemon-objs := daemon_kern.o daemon_user.o vector-objs := vector_kern.o vector_user.o vector_transports.o -umcast-objs := umcast_kern.o umcast_user.o -net-objs := net_kern.o net_user.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o ubd-objs := ubd_kern.o ubd_user.o port-objs := port_kern.o port_user.o -harddog-objs := harddog_kern.o harddog_user.o - -LDFLAGS_pcap.o := -r $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a) - -LDFLAGS_vde.o := -r $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a) - -targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o - -$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o - $(LD) -r -dp -o $@ $^ $(ld_flags) - -$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o - $(LD) -r -dp -o $@ $^ $(ld_flags) +harddog-objs := harddog_kern.o +harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o +rtc-objs := rtc_kern.o rtc_user.o +vfio_uml-objs := vfio_kern.o vfio_user.o #XXX: The call below does not work because the flags are added before the # object name, so nothing from the library gets linked. @@ -41,29 +27,29 @@ obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o obj-$(CONFIG_SSL) += ssl.o obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o -obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o -obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o -obj-$(CONFIG_UML_NET_DAEMON) += daemon.o obj-$(CONFIG_UML_NET_VECTOR) += vector.o -obj-$(CONFIG_UML_NET_VDE) += vde.o -obj-$(CONFIG_UML_NET_MCAST) += umcast.o -obj-$(CONFIG_UML_NET_PCAP) += pcap.o -obj-$(CONFIG_UML_NET) += net.o obj-$(CONFIG_MCONSOLE) += mconsole.o -obj-$(CONFIG_MMAPPER) += mmapper_kern.o obj-$(CONFIG_BLK_DEV_UBD) += ubd.o -obj-$(CONFIG_HOSTAUDIO) += hostaudio.o +obj-$(CONFIG_UML_SOUND) += hostaudio.o obj-$(CONFIG_NULL_CHAN) += null.o obj-$(CONFIG_PORT_CHAN) += port.o obj-$(CONFIG_PTY_CHAN) += pty.o obj-$(CONFIG_TTY_CHAN) += tty.o obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o obj-$(CONFIG_UML_WATCHDOG) += harddog.o +obj-y += $(harddog-builtin-y) $(harddog-builtin-m) obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_UML_RANDOM) += random.o +obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o +obj-$(CONFIG_UML_RTC) += rtc.o +obj-$(CONFIG_UML_PCI) += virt-pci.o +obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o +obj-$(CONFIG_UML_PCI_OVER_VFIO) += vfio_uml.o # pcap_user.o must be added explicitly. -USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o +USER_OBJS := fd.o null.o pty.o tty.o xterm.o vector_user.o CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) -include arch/um/scripts/Makefile.rules +CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"' + +include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h index c512b0306dd4..5a61db512ffb 100644 --- a/arch/um/drivers/chan.h +++ b/arch/um/drivers/chan.h @@ -1,6 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ #ifndef __CHAN_KERN_H__ @@ -22,7 +22,8 @@ struct chan { unsigned int output:1; unsigned int opened:1; unsigned int enabled:1; - int fd; + int fd_in; + int fd_out; /* only different to fd_in if blocking output is needed */ const struct chan_ops *ops; void *data; }; @@ -30,13 +31,12 @@ struct chan { extern void chan_interrupt(struct line *line, int irq); extern int parse_chan_pair(char *str, struct line *line, int device, const struct chan_opts *opts, char **error_out); -extern int write_chan(struct chan *chan, const char *buf, int len, +extern int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq); extern int console_write_chan(struct chan *chan, const char *buf, int len); extern int console_open_chan(struct line *line, struct console *co); extern void deactivate_chan(struct chan *chan, int irq); -extern void reactivate_chan(struct chan *chan, int irq); extern void chan_enable_winch(struct chan *chan, struct tty_port *port); extern int enable_chan(struct line *line); extern void close_chan(struct line *line); diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index a4e64edb8f38..26442db7d608 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) - * Licensed under the GPL */ #include <linux/slab.h> @@ -33,14 +33,14 @@ static void not_configged_close(int fd, void *data) "UML\n"); } -static int not_configged_read(int fd, char *c_out, void *data) +static int not_configged_read(int fd, u8 *c_out, void *data) { printk(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return -EIO; } -static int not_configged_write(int fd, const char *buf, int len, void *data) +static int not_configged_write(int fd, const u8 *buf, size_t len, void *data) { printk(KERN_ERR "Using a channel type which is configured out of " "UML\n"); @@ -81,6 +81,12 @@ static const struct chan_ops not_configged_ops = { }; #endif /* CONFIG_NOCONFIG_CHAN */ +static inline bool need_output_blocking(void) +{ + return time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL; +} + static int open_one_chan(struct chan *chan) { int fd, err; @@ -96,15 +102,43 @@ static int open_one_chan(struct chan *chan) return fd; err = os_set_fd_block(fd, 0); - if (err) { - (*chan->ops->close)(fd, chan->data); - return err; - } + if (err) + goto out_close; + + chan->fd_in = fd; + chan->fd_out = fd; + + /* + * In time-travel modes infinite-CPU and external we need to guarantee + * that any writes to the output succeed immdiately from the point of + * the VM. The best way to do this is to put the FD in blocking mode + * and simply wait/retry until everything is written. + * As every write is guaranteed to complete, we also do not need to + * request an IRQ for the output. + * + * Note that input cannot happen in a time synchronized way. We permit + * it, but time passes very quickly if anything waits for a read. + */ + if (chan->output && need_output_blocking()) { + err = os_dup_file(chan->fd_out); + if (err < 0) + goto out_close; - chan->fd = fd; + chan->fd_out = err; + + err = os_set_fd_block(chan->fd_out, 1); + if (err) { + os_close_file(chan->fd_out); + goto out_close; + } + } chan->opened = 1; return 0; + +out_close: + (*chan->ops->close)(fd, chan->data); + return err; } static int open_chan(struct list_head *chans) @@ -125,7 +159,7 @@ static int open_chan(struct list_head *chans) void chan_enable_winch(struct chan *chan, struct tty_port *port) { if (chan && chan->primary && chan->ops->winch) - register_winch(chan->fd, port); + register_winch(chan->fd_in, port); } static void line_timer_cb(struct work_struct *work) @@ -133,7 +167,7 @@ static void line_timer_cb(struct work_struct *work) struct line *line = container_of(work, struct line, task.work); if (!line->throttled) - chan_interrupt(line, line->driver->read_irq); + chan_interrupt(line, line->read_irq); } int enable_chan(struct line *line) @@ -156,8 +190,9 @@ int enable_chan(struct line *line) if (chan->enabled) continue; - err = line_setup_irq(chan->fd, chan->input, chan->output, line, - chan); + err = line_setup_irq(chan->fd_in, chan->input, + chan->output && !need_output_blocking(), + line, chan); if (err) goto out_close; @@ -171,24 +206,65 @@ int enable_chan(struct line *line) return err; } +/* Items are added in IRQ context, when free_irq can't be called, and + * removed in process context, when it can. + * This handles interrupt sources which disappear, and which need to + * be permanently disabled. This is discovered in IRQ context, but + * the freeing of the IRQ must be done later. + */ +static DEFINE_RAW_SPINLOCK(irqs_to_free_lock); +static LIST_HEAD(irqs_to_free); + +void free_irqs(void) +{ + struct chan *chan; + LIST_HEAD(list); + struct list_head *ele; + unsigned long flags; + + raw_spin_lock_irqsave(&irqs_to_free_lock, flags); + list_splice_init(&irqs_to_free, &list); + raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags); + + list_for_each(ele, &list) { + chan = list_entry(ele, struct chan, free_list); + + if (chan->input && chan->enabled) + um_free_irq(chan->line->read_irq, chan); + if (chan->output && chan->enabled && + !need_output_blocking()) + um_free_irq(chan->line->write_irq, chan); + chan->enabled = 0; + } +} + static void close_one_chan(struct chan *chan, int delay_free_irq) { + unsigned long flags; + if (!chan->opened) return; - /* we can safely call free now - it will be marked - * as free and freed once the IRQ stopped processing - */ - if (chan->input && chan->enabled) - um_free_irq(chan->line->driver->read_irq, chan); - if (chan->output && chan->enabled) - um_free_irq(chan->line->driver->write_irq, chan); - chan->enabled = 0; + if (delay_free_irq) { + raw_spin_lock_irqsave(&irqs_to_free_lock, flags); + list_add(&chan->free_list, &irqs_to_free); + raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags); + } else { + if (chan->input && chan->enabled) + um_free_irq(chan->line->read_irq, chan); + if (chan->output && chan->enabled && + !need_output_blocking()) + um_free_irq(chan->line->write_irq, chan); + chan->enabled = 0; + } + if (chan->fd_out != chan->fd_in) + os_close_file(chan->fd_out); if (chan->ops->close != NULL) - (*chan->ops->close)(chan->fd, chan->data); + (*chan->ops->close)(chan->fd_in, chan->data); chan->opened = 0; - chan->fd = -1; + chan->fd_in = -1; + chan->fd_out = -1; } void close_chan(struct line *line) @@ -208,18 +284,17 @@ void close_chan(struct line *line) void deactivate_chan(struct chan *chan, int irq) { if (chan && chan->enabled) - deactivate_fd(chan->fd, irq); + deactivate_fd(chan->fd_in, irq); } -int write_chan(struct chan *chan, const char *buf, int len, - int write_irq) +int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq) { int n, ret = 0; if (len == 0 || !chan || !chan->ops->write) return 0; - n = chan->ops->write(chan->fd, buf, len, chan->data); + n = chan->ops->write(chan->fd_out, buf, len, chan->data); if (chan->primary) { ret = n; } @@ -233,7 +308,7 @@ int console_write_chan(struct chan *chan, const char *buf, int len) if (!chan || !chan->ops->console_write) return 0; - n = chan->ops->console_write(chan->fd, buf, len); + n = chan->ops->console_write(chan->fd_out, buf, len); if (chan->primary) ret = n; return ret; @@ -261,14 +336,14 @@ int chan_window_size(struct line *line, unsigned short *rows_out, if (chan && chan->primary) { if (chan->ops->window_size == NULL) return 0; - return chan->ops->window_size(chan->fd, chan->data, + return chan->ops->window_size(chan->fd_in, chan->data, rows_out, cols_out); } chan = line->chan_out; if (chan && chan->primary) { if (chan->ops->window_size == NULL) return 0; - return chan->ops->window_size(chan->fd, chan->data, + return chan->ops->window_size(chan->fd_in, chan->data, rows_out, cols_out); } return 0; @@ -284,7 +359,7 @@ static void free_one_chan(struct chan *chan) (*chan->ops->free)(chan->data); if (chan->primary && chan->output) - ignore_sigio_fd(chan->fd); + ignore_sigio_fd(chan->fd_in); kfree(chan); } @@ -443,7 +518,8 @@ static struct chan *parse_chan(struct line *line, char *str, int device, .output = 0, .opened = 0, .enabled = 0, - .fd = -1, + .fd_in = -1, + .fd_out = -1, .ops = ops, .data = data }); return chan; @@ -504,7 +580,7 @@ void chan_interrupt(struct line *line, int irq) struct tty_port *port = &line->port; struct chan *chan = line->chan_in; int err; - char c; + u8 c; if (!chan || !chan->ops->read) goto out; @@ -514,7 +590,7 @@ void chan_interrupt(struct line *line, int irq) schedule_delayed_work(&line->task, 1); goto out; } - err = chan->ops->read(chan->fd, &c, chan->data); + err = chan->ops->read(chan->fd_in, &c, chan->data); if (err > 0) tty_insert_flip_char(port, c, TTY_NORMAL); } while (err > 0); diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 3fd7c3efdb18..35f9beeb19b3 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) - * Licensed under the GPL */ #include <stdlib.h> @@ -19,29 +19,41 @@ void generic_close(int fd, void *unused) close(fd); } -int generic_read(int fd, char *c_out, void *unused) +int generic_read(int fd, __u8 *c_out, void *unused) { int n; - n = read(fd, c_out, sizeof(*c_out)); + CATCH_EINTR(n = read(fd, c_out, sizeof(*c_out))); if (n > 0) return n; - else if (errno == EAGAIN) - return 0; else if (n == 0) return -EIO; + else if (errno == EAGAIN) + return 0; return -errno; } /* XXX Trivial wrapper around write */ -int generic_write(int fd, const char *buf, int n, void *unused) +int generic_write(int fd, const __u8 *buf, size_t n, void *unused) { + int written = 0; int err; - err = write(fd, buf, n); - if (err > 0) - return err; + /* The FD may be in blocking mode, as such, need to retry short writes, + * they may have been interrupted by a signal. + */ + do { + errno = 0; + err = write(fd, buf + written, n - written); + if (err > 0) { + written += err; + continue; + } + } while (err < 0 && errno == EINTR); + + if (written > 0) + return written; else if (errno == EAGAIN) return 0; else if (err == 0) @@ -141,7 +153,7 @@ struct winch_data { int pipe_fd; }; -static int winch_thread(void *arg) +static __noreturn int winch_thread(void *arg) { struct winch_data *data = arg; sigset_t sigs; @@ -149,12 +161,14 @@ static int winch_thread(void *arg) int count; char c = 1; + os_set_pdeathsig(); + pty_fd = data->pty_fd; pipe_fd = data->pipe_fd; count = write(pipe_fd, &c, sizeof(c)); if (count != sizeof(c)) - printk(UM_KERN_ERR "winch_thread : failed to write " - "synchronization byte, err = %d\n", -count); + os_info("winch_thread : failed to write synchronization byte, err = %d\n", + -count); /* * We are not using SIG_IGN on purpose, so don't fix it as I thought to @@ -166,29 +180,29 @@ static int winch_thread(void *arg) sigfillset(&sigs); /* Block all signals possible. */ if (sigprocmask(SIG_SETMASK, &sigs, NULL) < 0) { - printk(UM_KERN_ERR "winch_thread : sigprocmask failed, " - "errno = %d\n", errno); - exit(1); + os_info("winch_thread : sigprocmask failed, errno = %d\n", + errno); + goto wait_kill; } /* In sigsuspend(), block anything else than SIGWINCH. */ sigdelset(&sigs, SIGWINCH); if (setsid() < 0) { - printk(UM_KERN_ERR "winch_thread : setsid failed, errno = %d\n", + os_info("winch_thread : setsid failed, errno = %d\n", errno); - exit(1); + goto wait_kill; } if (ioctl(pty_fd, TIOCSCTTY, 0) < 0) { - printk(UM_KERN_ERR "winch_thread : TIOCSCTTY failed on " - "fd %d err = %d\n", pty_fd, errno); - exit(1); + os_info("winch_thread : TIOCSCTTY failed on " + "fd %d err = %d\n", pty_fd, errno); + goto wait_kill; } if (tcsetpgrp(pty_fd, os_getpid()) < 0) { - printk(UM_KERN_ERR "winch_thread : tcsetpgrp failed on " - "fd %d err = %d\n", pty_fd, errno); - exit(1); + os_info("winch_thread : tcsetpgrp failed on fd %d err = %d\n", + pty_fd, errno); + goto wait_kill; } /* @@ -199,8 +213,8 @@ static int winch_thread(void *arg) */ count = read(pipe_fd, &c, sizeof(c)); if (count != sizeof(c)) - printk(UM_KERN_ERR "winch_thread : failed to read " - "synchronization byte, err = %d\n", errno); + os_info("winch_thread : failed to read synchronization byte, err = %d\n", + errno); while(1) { /* @@ -211,16 +225,22 @@ static int winch_thread(void *arg) count = write(pipe_fd, &c, sizeof(c)); if (count != sizeof(c)) - printk(UM_KERN_ERR "winch_thread : write failed, " - "err = %d\n", errno); + os_info("winch_thread : write failed, err = %d\n", + errno); } + +wait_kill: + c = 2; + count = write(pipe_fd, &c, sizeof(c)); + while (1) + pause(); } static int winch_tramp(int fd, struct tty_port *port, int *fd_out, unsigned long *stack_out) { struct winch_data data; - int fds[2], n, err; + int fds[2], n, err, pid; char c; err = os_pipe(fds, 1, 1); @@ -238,8 +258,9 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, * problem with /dev/net/tun, which if held open by this * thread, prevents the TUN/TAP device from being reused. */ - err = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); - if (err < 0) { + pid = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); + if (pid < 0) { + err = pid; printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n", -err); goto out_close; @@ -256,13 +277,14 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, goto out_close; } - if (os_set_fd_block(*fd_out, 0)) { + err = os_set_fd_block(*fd_out, 0); + if (err) { printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd " "non-blocking.\n"); goto out_close; } - return err; + return pid; out_close: close(fds[1]); diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h index 03f1b565c5f9..e158e16fb3cc 100644 --- a/arch/um/drivers/chan_user.h +++ b/arch/um/drivers/chan_user.h @@ -1,17 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ #ifndef __CHAN_USER_H__ #define __CHAN_USER_H__ #include <init.h> +#include <linux/types.h> struct chan_opts { void (*const announce)(char *dev_name, int dev); char *xterm_title; - const int raw; + int raw; }; struct chan_ops { @@ -19,8 +20,8 @@ struct chan_ops { void *(*init)(char *, int, const struct chan_opts *); int (*open)(int, int, int, void *, char **); void (*close)(int, void *); - int (*read)(int, char *, void *); - int (*write)(int, const char *, int, void *); + int (*read)(int, __u8 *, void *); + int (*write)(int, const __u8 *, size_t, void *); int (*console_write)(int, const char *, int); int (*window_size)(int, void *, unsigned short *, unsigned short *); void (*free)(void *); @@ -31,8 +32,8 @@ extern const struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops, tty_ops, xterm_ops; extern void generic_close(int fd, void *unused); -extern int generic_read(int fd, char *c_out, void *unused); -extern int generic_write(int fd, const char *buf, int n, void *unused); +extern int generic_read(int fd, __u8 *c_out, void *unused); +extern int generic_write(int fd, const __u8 *buf, size_t n, void *unused); extern int generic_console_write(int fd, const char *buf, int n); extern int generic_window_size(int fd, void *unused, unsigned short *rows_out, unsigned short *cols_out); diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h index 760c507dd5b6..9a67c017000f 100644 --- a/arch/um/drivers/cow.h +++ b/arch/um/drivers/cow.h @@ -11,7 +11,7 @@ extern int init_cow_file(int fd, char *cow_file, char *backing_file, extern int file_reader(__u64 offset, char *buf, int len, void *arg); extern int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, __u32 *version_out, - char **backing_file_out, time_t *mtime_out, + char **backing_file_out, long long *mtime_out, unsigned long long *size_out, int *sectorsize_out, __u32 *align_out, int *bitmap_offset_out); @@ -24,10 +24,3 @@ extern void cow_sizes(int version, __u64 size, int sectorsize, int align, int *data_offset_out); #endif - -/* - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c index 0ee9cc6cc4c7..29b46581ddd1 100644 --- a/arch/um/drivers/cow_user.c +++ b/arch/um/drivers/cow_user.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) - * Licensed under the GPL */ /* @@ -17,6 +17,7 @@ #define PATH_LEN_V1 256 +/* unsigned time_t works until year 2106 */ typedef __u32 time32_t; struct cow_header_v1 { @@ -197,7 +198,7 @@ int write_cow_header(char *cow_file, int fd, char *backing_file, int sectorsize, int alignment, unsigned long long *size) { struct cow_header_v3 *header; - unsigned long modtime; + long long modtime; int err; err = cow_seek_file(fd, 0); @@ -276,7 +277,7 @@ int file_reader(__u64 offset, char *buf, int len, void *arg) int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, __u32 *version_out, char **backing_file_out, - time_t *mtime_out, unsigned long long *size_out, + long long *mtime_out, unsigned long long *size_out, int *sectorsize_out, __u32 *align_out, int *bitmap_offset_out) { @@ -363,7 +364,7 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, /* * this was used until Dec2005 - 64bits are needed to represent - * 2038+. I.e. we can safely do this truncating cast. + * 2106+. I.e. we can safely do this truncating cast. * * Additionally, we must use be32toh() instead of be64toh(), since * the program used to use the former (tested - I got mtime diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h deleted file mode 100644 index c2dd1951559f..000000000000 --- a/arch/um/drivers/daemon.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#ifndef __DAEMON_H__ -#define __DAEMON_H__ - -#include <net_user.h> - -#define SWITCH_VERSION 3 - -struct daemon_data { - char *sock_type; - char *ctl_sock; - void *ctl_addr; - void *data_addr; - void *local_addr; - int fd; - int control; - void *dev; -}; - -extern const struct net_user_info daemon_user_info; - -extern int daemon_user_write(int fd, void *buf, int len, - struct daemon_data *pri); - -#endif diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c deleted file mode 100644 index 7568cc2f3cd6..000000000000 --- a/arch/um/drivers/daemon_kern.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 by various other people who didn't put their name here. - * Licensed under the GPL. - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include <net_kern.h> -#include "daemon.h" - -struct daemon_init { - char *sock_type; - char *ctl_sock; -}; - -static void daemon_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct daemon_data *dpri; - struct daemon_init *init = data; - - pri = netdev_priv(dev); - dpri = (struct daemon_data *) pri->user; - dpri->sock_type = init->sock_type; - dpri->ctl_sock = init->ctl_sock; - dpri->fd = -1; - dpri->control = -1; - dpri->dev = dev; - /* We will free this pointer. If it contains crap we're burned. */ - dpri->ctl_addr = NULL; - dpri->data_addr = NULL; - dpri->local_addr = NULL; - - printk("daemon backend (uml_switch version %d) - %s:%s", - SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock); - printk("\n"); -} - -static int daemon_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_recvfrom(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER); -} - -static int daemon_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return daemon_user_write(fd, skb->data, skb->len, - (struct daemon_data *) &lp->user); -} - -static const struct net_kern_info daemon_kern_info = { - .init = daemon_init, - .protocol = eth_protocol, - .read = daemon_read, - .write = daemon_write, -}; - -static int daemon_setup(char *str, char **mac_out, void *data) -{ - struct daemon_init *init = data; - char *remain; - - *init = ((struct daemon_init) - { .sock_type = "unix", - .ctl_sock = "/tmp/uml.ctl" }); - - remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock, - NULL); - if (remain != NULL) - printk(KERN_WARNING "daemon_setup : Ignoring data socket " - "specification\n"); - - return 1; -} - -static struct transport daemon_transport = { - .list = LIST_HEAD_INIT(daemon_transport.list), - .name = "daemon", - .setup = daemon_setup, - .user = &daemon_user_info, - .kern = &daemon_kern_info, - .private_size = sizeof(struct daemon_data), - .setup_size = sizeof(struct daemon_init), -}; - -static int register_daemon(void) -{ - register_transport(&daemon_transport); - return 0; -} - -late_initcall(register_daemon); diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c deleted file mode 100644 index 8813c10d0177..000000000000 --- a/arch/um/drivers/daemon_user.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - * Licensed under the GPL. - */ - -#include <stdint.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/time.h> -#include <sys/un.h> -#include "daemon.h" -#include <net_user.h> -#include <os.h> -#include <um_malloc.h> - -enum request_type { REQ_NEW_CONTROL }; - -#define SWITCH_MAGIC 0xfeedface - -struct request_v3 { - uint32_t magic; - uint32_t version; - enum request_type type; - struct sockaddr_un sock; -}; - -static struct sockaddr_un *new_addr(void *name, int len) -{ - struct sockaddr_un *sun; - - sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); - if (sun == NULL) { - printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un " - "failed\n"); - return NULL; - } - sun->sun_family = AF_UNIX; - memcpy(sun->sun_path, name, len); - return sun; -} - -static int connect_to_switch(struct daemon_data *pri) -{ - struct sockaddr_un *ctl_addr = pri->ctl_addr; - struct sockaddr_un *local_addr = pri->local_addr; - struct sockaddr_un *sun; - struct request_v3 req; - int fd, n, err; - - pri->control = socket(AF_UNIX, SOCK_STREAM, 0); - if (pri->control < 0) { - err = -errno; - printk(UM_KERN_ERR "daemon_open : control socket failed, " - "errno = %d\n", -err); - return err; - } - - if (connect(pri->control, (struct sockaddr *) ctl_addr, - sizeof(*ctl_addr)) < 0) { - err = -errno; - printk(UM_KERN_ERR "daemon_open : control connect failed, " - "errno = %d\n", -err); - goto out; - } - - fd = socket(AF_UNIX, SOCK_DGRAM, 0); - if (fd < 0) { - err = -errno; - printk(UM_KERN_ERR "daemon_open : data socket failed, " - "errno = %d\n", -err); - goto out; - } - if (bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0) { - err = -errno; - printk(UM_KERN_ERR "daemon_open : data bind failed, " - "errno = %d\n", -err); - goto out_close; - } - - sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); - if (sun == NULL) { - printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un " - "failed\n"); - err = -ENOMEM; - goto out_close; - } - - req.magic = SWITCH_MAGIC; - req.version = SWITCH_VERSION; - req.type = REQ_NEW_CONTROL; - req.sock = *local_addr; - n = write(pri->control, &req, sizeof(req)); - if (n != sizeof(req)) { - printk(UM_KERN_ERR "daemon_open : control setup request " - "failed, err = %d\n", -errno); - err = -ENOTCONN; - goto out_free; - } - - n = read(pri->control, sun, sizeof(*sun)); - if (n != sizeof(*sun)) { - printk(UM_KERN_ERR "daemon_open : read of data socket failed, " - "err = %d\n", -errno); - err = -ENOTCONN; - goto out_free; - } - - pri->data_addr = sun; - return fd; - - out_free: - kfree(sun); - out_close: - close(fd); - out: - close(pri->control); - return err; -} - -static int daemon_user_init(void *data, void *dev) -{ - struct daemon_data *pri = data; - struct timeval tv; - struct { - char zero; - int pid; - int usecs; - } name; - - if (!strcmp(pri->sock_type, "unix")) - pri->ctl_addr = new_addr(pri->ctl_sock, - strlen(pri->ctl_sock) + 1); - name.zero = 0; - name.pid = os_getpid(); - gettimeofday(&tv, NULL); - name.usecs = tv.tv_usec; - pri->local_addr = new_addr(&name, sizeof(name)); - pri->dev = dev; - pri->fd = connect_to_switch(pri); - if (pri->fd < 0) { - kfree(pri->local_addr); - pri->local_addr = NULL; - return pri->fd; - } - - return 0; -} - -static int daemon_open(void *data) -{ - struct daemon_data *pri = data; - return pri->fd; -} - -static void daemon_remove(void *data) -{ - struct daemon_data *pri = data; - - close(pri->fd); - pri->fd = -1; - close(pri->control); - pri->control = -1; - - kfree(pri->data_addr); - pri->data_addr = NULL; - kfree(pri->ctl_addr); - pri->ctl_addr = NULL; - kfree(pri->local_addr); - pri->local_addr = NULL; -} - -int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) -{ - struct sockaddr_un *data_addr = pri->data_addr; - - return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr)); -} - -const struct net_user_info daemon_user_info = { - .init = daemon_user_init, - .open = daemon_open, - .close = NULL, - .remove = daemon_remove, - .add_address = NULL, - .delete_address = NULL, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c index a13a427b996b..082d739dc052 100644 --- a/arch/um/drivers/fd.c +++ b/arch/um/drivers/fd.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) - * Licensed under the GPL */ #include <stdio.h> diff --git a/arch/um/drivers/harddog.h b/arch/um/drivers/harddog.h new file mode 100644 index 000000000000..6d9ea60e7133 --- /dev/null +++ b/arch/um/drivers/harddog.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef UM_WATCHDOG_H +#define UM_WATCHDOG_H + +int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock); +void stop_watchdog(int in_fd, int out_fd); +int ping_watchdog(int fd); + +#endif /* UM_WATCHDOG_H */ diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c index 6d381279b362..819aabb4ecdc 100644 --- a/arch/um/drivers/harddog_kern.c +++ b/arch/um/drivers/harddog_kern.c @@ -47,7 +47,9 @@ #include <linux/spinlock.h> #include <linux/uaccess.h> #include "mconsole.h" +#include "harddog.h" +MODULE_DESCRIPTION("UML hardware watchdog"); MODULE_LICENSE("GPL"); static DEFINE_MUTEX(harddog_mutex); @@ -60,8 +62,6 @@ static int harddog_out_fd = -1; * Allow only one person to hold it open */ -extern int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock); - static int harddog_open(struct inode *inode, struct file *file) { int err = -EBUSY; @@ -85,15 +85,13 @@ static int harddog_open(struct inode *inode, struct file *file) timer_alive = 1; spin_unlock(&lock); mutex_unlock(&harddog_mutex); - return nonseekable_open(inode, file); + return stream_open(inode, file); err: spin_unlock(&lock); mutex_unlock(&harddog_mutex); return err; } -extern void stop_watchdog(int in_fd, int out_fd); - static int harddog_release(struct inode *inode, struct file *file) { /* @@ -112,8 +110,6 @@ static int harddog_release(struct inode *inode, struct file *file) return 0; } -extern int ping_watchdog(int fd); - static ssize_t harddog_write(struct file *file, const char __user *data, size_t len, loff_t *ppos) { @@ -165,9 +161,9 @@ static const struct file_operations harddog_fops = { .owner = THIS_MODULE, .write = harddog_write, .unlocked_ioctl = harddog_ioctl, + .compat_ioctl = compat_ptr_ioctl, .open = harddog_open, .release = harddog_release, - .llseek = no_llseek, }; static struct miscdevice harddog_miscdev = { diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c index 3aa8b0d52a48..9ed89304975e 100644 --- a/arch/um/drivers/harddog_user.c +++ b/arch/um/drivers/harddog_user.c @@ -1,12 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #include <stdio.h> #include <unistd.h> #include <errno.h> #include <os.h> +#include "harddog.h" struct dog_data { int stdin_fd; diff --git a/arch/um/drivers/harddog_user_exp.c b/arch/um/drivers/harddog_user_exp.c new file mode 100644 index 000000000000..c74d4b815d14 --- /dev/null +++ b/arch/um/drivers/harddog_user_exp.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> +#include "harddog.h" + +#if IS_MODULE(CONFIG_UML_WATCHDOG) +EXPORT_SYMBOL(start_watchdog); +EXPORT_SYMBOL(stop_watchdog); +EXPORT_SYMBOL(ping_watchdog); +#endif diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 7f9dbdbc4eb7..0ac149de1ac0 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2002 Steve Schmidtke - * Licensed under the GPL */ #include <linux/fs.h> @@ -48,6 +48,7 @@ MODULE_PARM_DESC(mixer, MIXER_HELP); #ifndef MODULE static int set_dsp(char *name, int *add) { + *add = 0; dsp = name; return 0; } @@ -56,6 +57,7 @@ __uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP); static int set_mixer(char *name, int *add) { + *add = 0; mixer = name; return 0; } @@ -122,13 +124,11 @@ static ssize_t hostaudio_write(struct file *file, const char __user *buffer, static __poll_t hostaudio_poll(struct file *file, struct poll_table_struct *wait) { - __poll_t mask = 0; - #ifdef DEBUG printk(KERN_DEBUG "hostaudio: poll called (unimplemented)\n"); #endif - return mask; + return 0; } static long hostaudio_ioctl(struct file *file, @@ -293,11 +293,11 @@ static int hostmixer_release(struct inode *inode, struct file *file) static const struct file_operations hostaudio_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read = hostaudio_read, .write = hostaudio_write, .poll = hostaudio_poll, .unlocked_ioctl = hostaudio_ioctl, + .compat_ioctl = compat_ptr_ioctl, .mmap = NULL, .open = hostaudio_open, .release = hostaudio_release, @@ -305,13 +305,12 @@ static const struct file_operations hostaudio_fops = { static const struct file_operations hostmixer_fops = { .owner = THIS_MODULE, - .llseek = no_llseek, .unlocked_ioctl = hostmixer_ioctl_mixdev, .open = hostmixer_open_mixdev, .release = hostmixer_release, }; -struct { +static struct { int dev_audio; int dev_mixer; } module_data; diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index e0e63931fb2b..43d8959cc746 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #include <linux/irqreturn.h> @@ -32,7 +32,7 @@ static irqreturn_t line_interrupt(int irq, void *data) * * Should be called while holding line->lock (this does not modify data). */ -static int write_room(struct line *line) +static unsigned int write_room(struct line *line) { int n; @@ -47,11 +47,11 @@ static int write_room(struct line *line) return n - 1; } -int line_write_room(struct tty_struct *tty) +unsigned int line_write_room(struct tty_struct *tty) { struct line *line = tty->driver_data; unsigned long flags; - int room; + unsigned int room; spin_lock_irqsave(&line->lock, flags); room = write_room(line); @@ -60,11 +60,11 @@ int line_write_room(struct tty_struct *tty) return room; } -int line_chars_in_buffer(struct tty_struct *tty) +unsigned int line_chars_in_buffer(struct tty_struct *tty) { struct line *line = tty->driver_data; unsigned long flags; - int ret; + unsigned int ret; spin_lock_irqsave(&line->lock, flags); /* write_room subtracts 1 for the needed NULL, so we readd it.*/ @@ -83,7 +83,7 @@ int line_chars_in_buffer(struct tty_struct *tty) * * Must be called while holding line->lock! */ -static int buffer_data(struct line *line, const char *buf, int len) +static int buffer_data(struct line *line, const u8 *buf, size_t len) { int end, room; @@ -139,7 +139,7 @@ static int flush_buffer(struct line *line) count = line->buffer + LINE_BUFSIZE - line->head; n = write_chan(line->chan_out, line->head, count, - line->driver->write_irq); + line->write_irq); if (n < 0) return n; if (n == count) { @@ -156,7 +156,7 @@ static int flush_buffer(struct line *line) count = line->tail - line->head; n = write_chan(line->chan_out, line->head, count, - line->driver->write_irq); + line->write_irq); if (n < 0) return n; @@ -184,12 +184,7 @@ void line_flush_chars(struct tty_struct *tty) line_flush_buffer(tty); } -int line_put_char(struct tty_struct *tty, unsigned char ch) -{ - return line_write(tty, &ch, sizeof(ch)); -} - -int line_write(struct tty_struct *tty, const unsigned char *buf, int len) +ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len) { struct line *line = tty->driver_data; unsigned long flags; @@ -200,7 +195,7 @@ int line_write(struct tty_struct *tty, const unsigned char *buf, int len) ret = buffer_data(line, buf, len); else { n = write_chan(line->chan_out, buf, len, - line->driver->write_irq); + line->write_irq); if (n < 0) { ret = n; goto out_up; @@ -216,16 +211,11 @@ out_up: return ret; } -void line_set_termios(struct tty_struct *tty, struct ktermios * old) -{ - /* nothing */ -} - void line_throttle(struct tty_struct *tty) { struct line *line = tty->driver_data; - deactivate_chan(line->chan_in, line->driver->read_irq); + deactivate_chan(line->chan_in, line->read_irq); line->throttled = 1; } @@ -234,7 +224,7 @@ void line_unthrottle(struct tty_struct *tty) struct line *line = tty->driver_data; line->throttled = 0; - chan_interrupt(line, line->driver->read_irq); + chan_interrupt(line, line->read_irq); } static irqreturn_t line_write_interrupt(int irq, void *data) @@ -267,19 +257,29 @@ static irqreturn_t line_write_interrupt(int irq, void *data) int line_setup_irq(int fd, int input, int output, struct line *line, void *data) { const struct line_driver *driver = line->driver; - int err = 0; + int err; - if (input) - err = um_request_irq(driver->read_irq, fd, IRQ_READ, - line_interrupt, IRQF_SHARED, + if (input) { + err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_READ, + line_interrupt, 0, driver->read_irq_name, data); - if (err) - return err; - if (output) - err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, - line_write_interrupt, IRQF_SHARED, + if (err < 0) + return err; + + line->read_irq = err; + } + + if (output) { + err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_WRITE, + line_write_interrupt, 0, driver->write_irq_name, data); - return err; + if (err < 0) + return err; + + line->write_irq = err; + } + + return 0; } static int line_activate(struct tty_port *port, struct tty_struct *tty) @@ -383,6 +383,7 @@ int setup_one_line(struct line *lines, int n, char *init, parse_chan_pair(NULL, line, n, opts, error_out); err = 0; } + *error_out = "configured as 'none'"; } else { char *new = kstrdup(init, GFP_KERNEL); if (!new) { @@ -406,6 +407,7 @@ int setup_one_line(struct line *lines, int n, char *init, } } if (err) { + *error_out = "failed to parse channel pair"; line->init_str = NULL; line->valid = 0; kfree(new); @@ -542,12 +544,14 @@ int register_lines(struct line_driver *line_driver, const struct tty_operations *ops, struct line *lines, int nlines) { - struct tty_driver *driver = alloc_tty_driver(nlines); + struct tty_driver *driver; int err; int i; - if (!driver) - return -ENOMEM; + driver = tty_alloc_driver(nlines, TTY_DRIVER_REAL_RAW | + TTY_DRIVER_DYNAMIC_DEV); + if (IS_ERR(driver)) + return PTR_ERR(driver); driver->driver_name = line_driver->name; driver->name = line_driver->device_name; @@ -555,9 +559,8 @@ int register_lines(struct line_driver *line_driver, driver->minor_start = line_driver->minor_start; driver->type = line_driver->type; driver->subtype = line_driver->subtype; - driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; driver->init_termios = tty_std_termios; - + for (i = 0; i < nlines; i++) { tty_port_init(&lines[i].port); lines[i].port.ops = &line_port_ops; @@ -571,7 +574,7 @@ int register_lines(struct line_driver *line_driver, if (err) { printk(KERN_ERR "register_lines : can't register %s driver\n", line_driver->name); - put_tty_driver(driver); + tty_driver_kref_put(driver); for (i = 0; i < nlines; i++) tty_port_destroy(&lines[i].port); return err; @@ -613,7 +616,6 @@ static void free_winch(struct winch *winch) winch->fd = -1; if (fd != -1) os_close_file(fd); - list_del(&winch->list); __free_winch(&winch->work); } @@ -629,15 +631,18 @@ static irqreturn_t winch_interrupt(int irq, void *data) if (fd != -1) { err = generic_read(fd, &c, NULL); - if (err < 0) { + /* A read of 2 means the winch thread failed and has warned */ + if (err < 0 || (err == 1 && c == 2)) { if (err != -EAGAIN) { winch->fd = -1; list_del(&winch->list); os_close_file(fd); - printk(KERN_ERR "winch_interrupt : " - "read failed, errno = %d\n", -err); - printk(KERN_ERR "fd %d is losing SIGWINCH " - "support\n", winch->tty_fd); + if (err < 0) { + printk(KERN_ERR "winch_interrupt : read failed, errno = %d\n", + -err); + printk(KERN_ERR "fd %d is losing SIGWINCH support\n", + winch->tty_fd); + } INIT_WORK(&winch->work, __free_winch); schedule_work(&winch->work); return IRQ_HANDLED; @@ -673,24 +678,26 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_port *port, goto cleanup; } - *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), - .fd = fd, + *winch = ((struct winch) { .fd = fd, .tty_fd = tty_fd, .pid = pid, .port = port, .stack = stack }); + spin_lock(&winch_handler_lock); + list_add(&winch->list, &winch_handlers); + spin_unlock(&winch_handler_lock); + if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, IRQF_SHARED, "winch", winch) < 0) { printk(KERN_ERR "register_winch_irq - failed to register " "IRQ\n"); + spin_lock(&winch_handler_lock); + list_del(&winch->list); + spin_unlock(&winch_handler_lock); goto out_free; } - spin_lock(&winch_handler_lock); - list_add(&winch->list, &winch_handlers); - spin_unlock(&winch_handler_lock); - return; out_free: @@ -714,6 +721,8 @@ static void unregister_winch(struct tty_struct *tty) winch = list_entry(ele, struct winch, list); wtty = tty_port_tty_get(winch->port); if (wtty == tty) { + list_del(&winch->list); + spin_unlock(&winch_handler_lock); free_winch(winch); break; } @@ -724,14 +733,17 @@ static void unregister_winch(struct tty_struct *tty) static void winch_cleanup(void) { - struct list_head *ele, *next; struct winch *winch; spin_lock(&winch_handler_lock); + while ((winch = list_first_entry_or_null(&winch_handlers, + struct winch, list))) { + list_del(&winch->list); + spin_unlock(&winch_handler_lock); - list_for_each_safe(ele, next, &winch_handlers) { - winch = list_entry(ele, struct winch, list); free_winch(winch); + + spin_lock(&winch_handler_lock); } spin_unlock(&winch_handler_lock); diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h index 138a14526d9c..e8bd6f3dfb50 100644 --- a/arch/um/drivers/line.h +++ b/arch/um/drivers/line.h @@ -1,6 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ #ifndef __LINE_H__ @@ -23,9 +23,7 @@ struct line_driver { const short minor_start; const short type; const short subtype; - const int read_irq; const char *read_irq_name; - const int write_irq; const char *write_irq_name; struct mc_device mc; struct tty_driver *driver; @@ -35,6 +33,8 @@ struct line { struct tty_port port; int valid; + int read_irq, write_irq; + char *init_str; struct list_head chan_list; struct chan *chan_in, *chan_out; @@ -47,9 +47,9 @@ struct line { * * buffer points to a buffer allocated on demand, of length * LINE_BUFSIZE, head to the start of the ring, tail to the end.*/ - char *buffer; - char *head; - char *tail; + u8 *buffer; + u8 *head; + u8 *tail; int sigio; struct delayed_work task; @@ -64,14 +64,11 @@ extern void line_cleanup(struct tty_struct *tty); extern void line_hangup(struct tty_struct *tty); extern int line_setup(char **conf, unsigned nlines, char **def, char *init, char *name); -extern int line_write(struct tty_struct *tty, const unsigned char *buf, - int len); -extern int line_put_char(struct tty_struct *tty, unsigned char ch); -extern void line_set_termios(struct tty_struct *tty, struct ktermios * old); -extern int line_chars_in_buffer(struct tty_struct *tty); +extern ssize_t line_write(struct tty_struct *tty, const u8 *buf, size_t len); +extern unsigned int line_chars_in_buffer(struct tty_struct *tty); extern void line_flush_buffer(struct tty_struct *tty); extern void line_flush_chars(struct tty_struct *tty); -extern int line_write_room(struct tty_struct *tty); +extern unsigned int line_write_room(struct tty_struct *tty); extern void line_throttle(struct tty_struct *tty); extern void line_unthrottle(struct tty_struct *tty); diff --git a/arch/um/drivers/mconsole.h b/arch/um/drivers/mconsole.h index 44af7379ea19..6356378304fd 100644 --- a/arch/um/drivers/mconsole.h +++ b/arch/um/drivers/mconsole.h @@ -1,7 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #ifndef __MCONSOLE_H__ diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index ff3ab72fd90f..ff4bda95b9c7 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -1,7 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) * Copyright (C) 2001 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #include <linux/console.h> @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/notifier.h> +#include <linux/panic_notifier.h> #include <linux/reboot.h> #include <linux/sched/debug.h> #include <linux/proc_fs.h> @@ -36,6 +37,8 @@ #include "mconsole_kern.h" #include <os.h> +static struct vfsmount *proc_mnt = NULL; + static int do_unlink_socket(struct notifier_block *notifier, unsigned long what, void *data) { @@ -123,7 +126,7 @@ void mconsole_log(struct mc_request *req) void mconsole_proc(struct mc_request *req) { - struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt; + struct vfsmount *mnt = proc_mnt; char *buf; int len; struct file *file; @@ -134,7 +137,11 @@ void mconsole_proc(struct mc_request *req) ptr += strlen("proc"); ptr = skip_spaces(ptr); - file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0); + if (!mnt) { + mconsole_reply(req, "Proc not available", 1, 0); + goto out; + } + file = file_open_root_mnt(mnt, ptr, O_RDONLY, 0); if (IS_ERR(file)) { mconsole_reply(req, "Failed to open file", 1, 0); printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file)); @@ -217,7 +224,7 @@ void mconsole_go(struct mc_request *req) void mconsole_stop(struct mc_request *req) { - deactivate_fd(req->originating_fd, MCONSOLE_IRQ); + block_signals(); os_set_fd_block(req->originating_fd, 1); mconsole_reply(req, "stopped", 0, 0); for (;;) { @@ -240,6 +247,7 @@ void mconsole_stop(struct mc_request *req) } os_set_fd_block(req->originating_fd, 0); mconsole_reply(req, "", 0, 0); + unblock_signals(); } static DEFINE_SPINLOCK(mc_devices_lock); @@ -275,7 +283,7 @@ struct unplugged_pages { }; static DEFINE_MUTEX(plug_mem_mutex); -static unsigned long long unplugged_pages_count = 0; +static unsigned long long unplugged_pages_count; static LIST_HEAD(unplugged_pages); static int unplug_index = UNPLUGGED_PER_PAGE; @@ -546,7 +554,7 @@ struct mconsole_output { static DEFINE_SPINLOCK(client_lock); static LIST_HEAD(clients); -static char console_buf[MCONSOLE_MAX_DATA]; +static char console_buf[MCONSOLE_MAX_DATA] __nonstring; static void console_write(struct console *console, const char *string, unsigned int len) @@ -559,7 +567,7 @@ static void console_write(struct console *console, const char *string, while (len > 0) { n = min((size_t) len, ARRAY_SIZE(console_buf)); - strncpy(console_buf, string, n); + memcpy(console_buf, string, n); string += n; len -= n; @@ -642,7 +650,7 @@ static void stack_proc(void *arg) { struct task_struct *task = arg; - show_stack(task, NULL); + show_stack(task, NULL, KERN_INFO); } /* @@ -683,6 +691,24 @@ void mconsole_stack(struct mc_request *req) with_console(req, stack_proc, to); } +static int __init mount_proc(void) +{ + struct file_system_type *proc_fs_type; + struct vfsmount *mnt; + + proc_fs_type = get_fs_type("proc"); + if (!proc_fs_type) + return -ENODEV; + + mnt = kern_mount(proc_fs_type); + put_filesystem(proc_fs_type); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + proc_mnt = mnt; + return 0; +} + /* * Changed by mconsole_setup, which is __setup, and called before SMP is * active. @@ -696,6 +722,8 @@ static int __init mconsole_init(void) int err; char file[UNIX_PATH_MAX]; + mount_proc(); + if (umid_file_name("mconsole", file, sizeof(file))) return -1; snprintf(mconsole_socket_name, sizeof(file), "%s", file); @@ -712,7 +740,7 @@ static int __init mconsole_init(void) err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt, IRQF_SHARED, "mconsole", (void *)sock); - if (err) { + if (err < 0) { printk(KERN_ERR "Failed to get IRQ for management console\n"); goto out; } @@ -752,10 +780,9 @@ static ssize_t mconsole_proc_write(struct file *file, return count; } -static const struct file_operations mconsole_proc_fops = { - .owner = THIS_MODULE, - .write = mconsole_proc_write, - .llseek = noop_llseek, +static const struct proc_ops mconsole_proc_ops = { + .proc_write = mconsole_proc_write, + .proc_lseek = noop_llseek, }; static int create_proc_mconsole(void) @@ -765,7 +792,7 @@ static int create_proc_mconsole(void) if (notify_socket == NULL) return 0; - ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_fops); + ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_ops); if (ent == NULL) { printk(KERN_INFO "create_proc_mconsole : proc_create failed\n"); return 0; @@ -819,13 +846,12 @@ static int notify_panic(struct notifier_block *self, unsigned long unused1, mconsole_notify(notify_socket, MCONSOLE_PANIC, message, strlen(message) + 1); - return 0; + return NOTIFY_DONE; } static struct notifier_block panic_exit_notifier = { - .notifier_call = notify_panic, - .next = NULL, - .priority = 1 + .notifier_call = notify_panic, + .priority = INT_MAX, /* run as soon as possible */ }; static int add_notifier(void) diff --git a/arch/um/drivers/mconsole_kern.h b/arch/um/drivers/mconsole_kern.h index 7a0c6a1ad1d4..56d8d6a3ff76 100644 --- a/arch/um/drivers/mconsole_kern.h +++ b/arch/um/drivers/mconsole_kern.h @@ -1,6 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ #ifndef __MCONSOLE_KERN_H__ diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c index 99209826adb1..a04cd13c6315 100644 --- a/arch/um/drivers/mconsole_user.c +++ b/arch/um/drivers/mconsole_user.c @@ -1,7 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #include <errno.h> @@ -71,7 +71,9 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req) return NULL; } +#ifndef MIN #define MIN(a,b) ((a)<(b) ? (a):(b)) +#endif #define STRINGX(x) #x #define STRING(x) STRINGX(x) diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c deleted file mode 100644 index 3645fcb2a787..000000000000 --- a/arch/um/drivers/mmapper_kern.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * arch/um/drivers/mmapper_kern.c - * - * BRIEF MODULE DESCRIPTION - * - * Copyright (C) 2000 RidgeRun, Inc. - * Author: RidgeRun, Inc. - * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com - * - */ - -#include <linux/stddef.h> -#include <linux/types.h> -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/miscdevice.h> -#include <linux/module.h> -#include <linux/mm.h> - -#include <linux/uaccess.h> -#include <mem_user.h> - -/* These are set in mmapper_init, which is called at boot time */ -static unsigned long mmapper_size; -static unsigned long p_buf; -static char *v_buf; - -static ssize_t mmapper_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) -{ - return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size); -} - -static ssize_t mmapper_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - if (*ppos > mmapper_size) - return -EINVAL; - - return simple_write_to_buffer(v_buf, mmapper_size, ppos, buf, count); -} - -static long mmapper_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - return -ENOIOCTLCMD; -} - -static int mmapper_mmap(struct file *file, struct vm_area_struct *vma) -{ - int ret = -EINVAL; - int size; - - if (vma->vm_pgoff != 0) - goto out; - - size = vma->vm_end - vma->vm_start; - if (size > mmapper_size) - return -EFAULT; - - /* - * XXX A comment above remap_pfn_range says it should only be - * called when the mm semaphore is held - */ - if (remap_pfn_range(vma, vma->vm_start, p_buf >> PAGE_SHIFT, size, - vma->vm_page_prot)) - goto out; - ret = 0; -out: - return ret; -} - -static int mmapper_open(struct inode *inode, struct file *file) -{ - return 0; -} - -static int mmapper_release(struct inode *inode, struct file *file) -{ - return 0; -} - -static const struct file_operations mmapper_fops = { - .owner = THIS_MODULE, - .read = mmapper_read, - .write = mmapper_write, - .unlocked_ioctl = mmapper_ioctl, - .mmap = mmapper_mmap, - .open = mmapper_open, - .release = mmapper_release, - .llseek = default_llseek, -}; - -/* - * No locking needed - only used (and modified) by below initcall and exitcall. - */ -static struct miscdevice mmapper_dev = { - .minor = MISC_DYNAMIC_MINOR, - .name = "mmapper", - .fops = &mmapper_fops -}; - -static int __init mmapper_init(void) -{ - int err; - - printk(KERN_INFO "Mapper v0.1\n"); - - v_buf = (char *) find_iomem("mmapper", &mmapper_size); - if (mmapper_size == 0) { - printk(KERN_ERR "mmapper_init - find_iomem failed\n"); - return -ENODEV; - } - p_buf = __pa(v_buf); - - err = misc_register(&mmapper_dev); - if (err) { - printk(KERN_ERR "mmapper - misc_register failed, err = %d\n", - err); - return err; - } - return 0; -} - -static void mmapper_exit(void) -{ - misc_deregister(&mmapper_dev); -} - -module_init(mmapper_init); -module_exit(mmapper_exit); - -MODULE_AUTHOR("Greg Lonnon <glonnon@ridgerun.com>"); -MODULE_DESCRIPTION("DSPLinux simulator mmapper driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c deleted file mode 100644 index d80cfb1d9430..000000000000 --- a/arch/um/drivers/net_kern.c +++ /dev/null @@ -1,901 +0,0 @@ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - * Licensed under the GPL. - */ - -#include <linux/memblock.h> -#include <linux/etherdevice.h> -#include <linux/ethtool.h> -#include <linux/inetdevice.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/platform_device.h> -#include <linux/rtnetlink.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <init.h> -#include <irq_kern.h> -#include <irq_user.h> -#include "mconsole_kern.h" -#include <net_kern.h> -#include <net_user.h> - -#define DRIVER_NAME "uml-netdev" - -static DEFINE_SPINLOCK(opened_lock); -static LIST_HEAD(opened); - -/* - * The drop_skb is used when we can't allocate an skb. The - * packet is read into drop_skb in order to get the data off the - * connection to the host. - * It is reallocated whenever a maximum packet size is seen which is - * larger than any seen before. update_drop_skb is called from - * eth_configure when a new interface is added. - */ -static DEFINE_SPINLOCK(drop_lock); -static struct sk_buff *drop_skb; -static int drop_max; - -static int update_drop_skb(int max) -{ - struct sk_buff *new; - unsigned long flags; - int err = 0; - - spin_lock_irqsave(&drop_lock, flags); - - if (max <= drop_max) - goto out; - - err = -ENOMEM; - new = dev_alloc_skb(max); - if (new == NULL) - goto out; - - skb_put(new, max); - - kfree_skb(drop_skb); - drop_skb = new; - drop_max = max; - err = 0; -out: - spin_unlock_irqrestore(&drop_lock, flags); - - return err; -} - -static int uml_net_rx(struct net_device *dev) -{ - struct uml_net_private *lp = netdev_priv(dev); - int pkt_len; - struct sk_buff *skb; - - /* If we can't allocate memory, try again next round. */ - skb = dev_alloc_skb(lp->max_packet); - if (skb == NULL) { - drop_skb->dev = dev; - /* Read a packet into drop_skb and don't do anything with it. */ - (*lp->read)(lp->fd, drop_skb, lp); - dev->stats.rx_dropped++; - return 0; - } - - skb->dev = dev; - skb_put(skb, lp->max_packet); - skb_reset_mac_header(skb); - pkt_len = (*lp->read)(lp->fd, skb, lp); - - if (pkt_len > 0) { - skb_trim(skb, pkt_len); - skb->protocol = (*lp->protocol)(skb); - - dev->stats.rx_bytes += skb->len; - dev->stats.rx_packets++; - netif_rx(skb); - return pkt_len; - } - - kfree_skb(skb); - return pkt_len; -} - -static void uml_dev_close(struct work_struct *work) -{ - struct uml_net_private *lp = - container_of(work, struct uml_net_private, work); - dev_close(lp->dev); -} - -static irqreturn_t uml_net_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct uml_net_private *lp = netdev_priv(dev); - int err; - - if (!netif_running(dev)) - return IRQ_NONE; - - spin_lock(&lp->lock); - while ((err = uml_net_rx(dev)) > 0) ; - if (err < 0) { - printk(KERN_ERR - "Device '%s' read returned %d, shutting it down\n", - dev->name, err); - /* dev_close can't be called in interrupt context, and takes - * again lp->lock. - * And dev_close() can be safely called multiple times on the - * same device, since it tests for (dev->flags & IFF_UP). So - * there's no harm in delaying the device shutdown. - * Furthermore, the workqueue will not re-enqueue an already - * enqueued work item. */ - schedule_work(&lp->work); - goto out; - } -out: - spin_unlock(&lp->lock); - return IRQ_HANDLED; -} - -static int uml_net_open(struct net_device *dev) -{ - struct uml_net_private *lp = netdev_priv(dev); - int err; - - if (lp->fd >= 0) { - err = -ENXIO; - goto out; - } - - lp->fd = (*lp->open)(&lp->user); - if (lp->fd < 0) { - err = lp->fd; - goto out; - } - - err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt, - IRQF_SHARED, dev->name, dev); - if (err != 0) { - printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err); - err = -ENETUNREACH; - goto out_close; - } - - netif_start_queue(dev); - - /* clear buffer - it can happen that the host side of the interface - * is full when we get here. In this case, new data is never queued, - * SIGIOs never arrive, and the net never works. - */ - while ((err = uml_net_rx(dev)) > 0) ; - - spin_lock(&opened_lock); - list_add(&lp->list, &opened); - spin_unlock(&opened_lock); - - return 0; -out_close: - if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user); - lp->fd = -1; -out: - return err; -} - -static int uml_net_close(struct net_device *dev) -{ - struct uml_net_private *lp = netdev_priv(dev); - - netif_stop_queue(dev); - - um_free_irq(dev->irq, dev); - if (lp->close != NULL) - (*lp->close)(lp->fd, &lp->user); - lp->fd = -1; - - spin_lock(&opened_lock); - list_del(&lp->list); - spin_unlock(&opened_lock); - - return 0; -} - -static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct uml_net_private *lp = netdev_priv(dev); - unsigned long flags; - int len; - - netif_stop_queue(dev); - - spin_lock_irqsave(&lp->lock, flags); - - len = (*lp->write)(lp->fd, skb, lp); - skb_tx_timestamp(skb); - - if (len == skb->len) { - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - netif_trans_update(dev); - netif_start_queue(dev); - - /* this is normally done in the interrupt when tx finishes */ - netif_wake_queue(dev); - } - else if (len == 0) { - netif_start_queue(dev); - dev->stats.tx_dropped++; - } - else { - netif_start_queue(dev); - printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len); - } - - spin_unlock_irqrestore(&lp->lock, flags); - - dev_consume_skb_any(skb); - - return NETDEV_TX_OK; -} - -static void uml_net_set_multicast_list(struct net_device *dev) -{ - return; -} - -static void uml_net_tx_timeout(struct net_device *dev) -{ - netif_trans_update(dev); - netif_wake_queue(dev); -} - -#ifdef CONFIG_NET_POLL_CONTROLLER -static void uml_net_poll_controller(struct net_device *dev) -{ - disable_irq(dev->irq); - uml_net_interrupt(dev->irq, dev); - enable_irq(dev->irq); -} -#endif - -static void uml_net_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); - strlcpy(info->version, "42", sizeof(info->version)); -} - -static const struct ethtool_ops uml_net_ethtool_ops = { - .get_drvinfo = uml_net_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_ts_info = ethtool_op_get_ts_info, -}; - -static void uml_net_user_timer_expire(struct timer_list *t) -{ -#ifdef undef - struct uml_net_private *lp = from_timer(lp, t, tl); - struct connection *conn = &lp->user; - - dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn); - do_connect(conn); -#endif -} - -void uml_net_setup_etheraddr(struct net_device *dev, char *str) -{ - unsigned char *addr = dev->dev_addr; - char *end; - int i; - - if (str == NULL) - goto random; - - for (i = 0; i < 6; i++) { - addr[i] = simple_strtoul(str, &end, 16); - if ((end == str) || - ((*end != ':') && (*end != ',') && (*end != '\0'))) { - printk(KERN_ERR - "setup_etheraddr: failed to parse '%s' " - "as an ethernet address\n", str); - goto random; - } - str = end + 1; - } - if (is_multicast_ether_addr(addr)) { - printk(KERN_ERR - "Attempt to assign a multicast ethernet address to a " - "device disallowed\n"); - goto random; - } - if (!is_valid_ether_addr(addr)) { - printk(KERN_ERR - "Attempt to assign an invalid ethernet address to a " - "device disallowed\n"); - goto random; - } - if (!is_local_ether_addr(addr)) { - printk(KERN_WARNING - "Warning: Assigning a globally valid ethernet " - "address to a device\n"); - printk(KERN_WARNING "You should set the 2nd rightmost bit in " - "the first byte of the MAC,\n"); - printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n", - addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], - addr[5]); - } - return; - -random: - printk(KERN_INFO - "Choosing a random ethernet address for device %s\n", dev->name); - eth_hw_addr_random(dev); -} - -static DEFINE_SPINLOCK(devices_lock); -static LIST_HEAD(devices); - -static struct platform_driver uml_net_driver = { - .driver = { - .name = DRIVER_NAME, - }, -}; - -static void net_device_release(struct device *dev) -{ - struct uml_net *device = dev_get_drvdata(dev); - struct net_device *netdev = device->dev; - struct uml_net_private *lp = netdev_priv(netdev); - - if (lp->remove != NULL) - (*lp->remove)(&lp->user); - list_del(&device->list); - kfree(device); - free_netdev(netdev); -} - -static const struct net_device_ops uml_netdev_ops = { - .ndo_open = uml_net_open, - .ndo_stop = uml_net_close, - .ndo_start_xmit = uml_net_start_xmit, - .ndo_set_rx_mode = uml_net_set_multicast_list, - .ndo_tx_timeout = uml_net_tx_timeout, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = uml_net_poll_controller, -#endif -}; - -/* - * Ensures that platform_driver_register is called only once by - * eth_configure. Will be set in an initcall. - */ -static int driver_registered; - -static void eth_configure(int n, void *init, char *mac, - struct transport *transport, gfp_t gfp_mask) -{ - struct uml_net *device; - struct net_device *dev; - struct uml_net_private *lp; - int err, size; - - size = transport->private_size + sizeof(struct uml_net_private); - - device = kzalloc(sizeof(*device), gfp_mask); - if (device == NULL) { - printk(KERN_ERR "eth_configure failed to allocate struct " - "uml_net\n"); - return; - } - - dev = alloc_etherdev(size); - if (dev == NULL) { - printk(KERN_ERR "eth_configure: failed to allocate struct " - "net_device for eth%d\n", n); - goto out_free_device; - } - - INIT_LIST_HEAD(&device->list); - device->index = n; - - /* If this name ends up conflicting with an existing registered - * netdevice, that is OK, register_netdev{,ice}() will notice this - * and fail. - */ - snprintf(dev->name, sizeof(dev->name), "eth%d", n); - - uml_net_setup_etheraddr(dev, mac); - - printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr); - - lp = netdev_priv(dev); - /* This points to the transport private data. It's still clear, but we - * must memset it to 0 *now*. Let's help the drivers. */ - memset(lp, 0, size); - INIT_WORK(&lp->work, uml_dev_close); - - /* sysfs register */ - if (!driver_registered) { - platform_driver_register(¨_net_driver); - driver_registered = 1; - } - device->pdev.id = n; - device->pdev.name = DRIVER_NAME; - device->pdev.dev.release = net_device_release; - dev_set_drvdata(&device->pdev.dev, device); - if (platform_device_register(&device->pdev)) - goto out_free_netdev; - SET_NETDEV_DEV(dev,&device->pdev.dev); - - device->dev = dev; - - /* - * These just fill in a data structure, so there's no failure - * to be worried about. - */ - (*transport->kern->init)(dev, init); - - *lp = ((struct uml_net_private) - { .list = LIST_HEAD_INIT(lp->list), - .dev = dev, - .fd = -1, - .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, - .max_packet = transport->user->max_packet, - .protocol = transport->kern->protocol, - .open = transport->user->open, - .close = transport->user->close, - .remove = transport->user->remove, - .read = transport->kern->read, - .write = transport->kern->write, - .add_address = transport->user->add_address, - .delete_address = transport->user->delete_address }); - - timer_setup(&lp->tl, uml_net_user_timer_expire, 0); - spin_lock_init(&lp->lock); - memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac)); - - if ((transport->user->init != NULL) && - ((*transport->user->init)(&lp->user, dev) != 0)) - goto out_unregister; - - dev->mtu = transport->user->mtu; - dev->netdev_ops = ¨_netdev_ops; - dev->ethtool_ops = ¨_net_ethtool_ops; - dev->watchdog_timeo = (HZ >> 1); - dev->irq = UM_ETH_IRQ; - - err = update_drop_skb(lp->max_packet); - if (err) - goto out_undo_user_init; - - rtnl_lock(); - err = register_netdevice(dev); - rtnl_unlock(); - if (err) - goto out_undo_user_init; - - spin_lock(&devices_lock); - list_add(&device->list, &devices); - spin_unlock(&devices_lock); - - return; - -out_undo_user_init: - if (transport->user->remove != NULL) - (*transport->user->remove)(&lp->user); -out_unregister: - platform_device_unregister(&device->pdev); - return; /* platform_device_unregister frees dev and device */ -out_free_netdev: - free_netdev(dev); -out_free_device: - kfree(device); -} - -static struct uml_net *find_device(int n) -{ - struct uml_net *device; - struct list_head *ele; - - spin_lock(&devices_lock); - list_for_each(ele, &devices) { - device = list_entry(ele, struct uml_net, list); - if (device->index == n) - goto out; - } - device = NULL; - out: - spin_unlock(&devices_lock); - return device; -} - -static int eth_parse(char *str, int *index_out, char **str_out, - char **error_out) -{ - char *end; - int n, err = -EINVAL; - - n = simple_strtoul(str, &end, 0); - if (end == str) { - *error_out = "Bad device number"; - return err; - } - - str = end; - if (*str != '=') { - *error_out = "Expected '=' after device number"; - return err; - } - - str++; - if (find_device(n)) { - *error_out = "Device already configured"; - return err; - } - - *index_out = n; - *str_out = str; - return 0; -} - -struct eth_init { - struct list_head list; - char *init; - int index; -}; - -static DEFINE_SPINLOCK(transports_lock); -static LIST_HEAD(transports); - -/* Filled in during early boot */ -static LIST_HEAD(eth_cmd_line); - -static int check_transport(struct transport *transport, char *eth, int n, - void **init_out, char **mac_out, gfp_t gfp_mask) -{ - int len; - - len = strlen(transport->name); - if (strncmp(eth, transport->name, len)) - return 0; - - eth += len; - if (*eth == ',') - eth++; - else if (*eth != '\0') - return 0; - - *init_out = kmalloc(transport->setup_size, gfp_mask); - if (*init_out == NULL) - return 1; - - if (!transport->setup(eth, mac_out, *init_out)) { - kfree(*init_out); - *init_out = NULL; - } - return 1; -} - -void register_transport(struct transport *new) -{ - struct list_head *ele, *next; - struct eth_init *eth; - void *init; - char *mac = NULL; - int match; - - spin_lock(&transports_lock); - BUG_ON(!list_empty(&new->list)); - list_add(&new->list, &transports); - spin_unlock(&transports_lock); - - list_for_each_safe(ele, next, ð_cmd_line) { - eth = list_entry(ele, struct eth_init, list); - match = check_transport(new, eth->init, eth->index, &init, - &mac, GFP_KERNEL); - if (!match) - continue; - else if (init != NULL) { - eth_configure(eth->index, init, mac, new, GFP_KERNEL); - kfree(init); - } - list_del(ð->list); - } -} - -static int eth_setup_common(char *str, int index) -{ - struct list_head *ele; - struct transport *transport; - void *init; - char *mac = NULL; - int found = 0; - - spin_lock(&transports_lock); - list_for_each(ele, &transports) { - transport = list_entry(ele, struct transport, list); - if (!check_transport(transport, str, index, &init, - &mac, GFP_ATOMIC)) - continue; - if (init != NULL) { - eth_configure(index, init, mac, transport, GFP_ATOMIC); - kfree(init); - } - found = 1; - break; - } - - spin_unlock(&transports_lock); - return found; -} - -static int __init eth_setup(char *str) -{ - struct eth_init *new; - char *error; - int n, err; - - err = eth_parse(str, &n, &str, &error); - if (err) { - printk(KERN_ERR "eth_setup - Couldn't parse '%s' : %s\n", - str, error); - return 1; - } - - new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES); - - INIT_LIST_HEAD(&new->list); - new->index = n; - new->init = str; - - list_add_tail(&new->list, ð_cmd_line); - return 1; -} - -__setup("eth", eth_setup); -__uml_help(eth_setup, -"eth[0-9]+=<transport>,<options>\n" -" Configure a network device.\n\n" -); - -static int net_config(char *str, char **error_out) -{ - int n, err; - - err = eth_parse(str, &n, &str, error_out); - if (err) - return err; - - /* This string is broken up and the pieces used by the underlying - * driver. So, it is freed only if eth_setup_common fails. - */ - str = kstrdup(str, GFP_KERNEL); - if (str == NULL) { - *error_out = "net_config failed to strdup string"; - return -ENOMEM; - } - err = !eth_setup_common(str, n); - if (err) - kfree(str); - return err; -} - -static int net_id(char **str, int *start_out, int *end_out) -{ - char *end; - int n; - - n = simple_strtoul(*str, &end, 0); - if ((*end != '\0') || (end == *str)) - return -1; - - *start_out = n; - *end_out = n; - *str = end; - return n; -} - -static int net_remove(int n, char **error_out) -{ - struct uml_net *device; - struct net_device *dev; - struct uml_net_private *lp; - - device = find_device(n); - if (device == NULL) - return -ENODEV; - - dev = device->dev; - lp = netdev_priv(dev); - if (lp->fd > 0) - return -EBUSY; - unregister_netdev(dev); - platform_device_unregister(&device->pdev); - - return 0; -} - -static struct mc_device net_mc = { - .list = LIST_HEAD_INIT(net_mc.list), - .name = "eth", - .config = net_config, - .get_config = NULL, - .id = net_id, - .remove = net_remove, -}; - -#ifdef CONFIG_INET -static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct in_ifaddr *ifa = ptr; - struct net_device *dev = ifa->ifa_dev->dev; - struct uml_net_private *lp; - void (*proc)(unsigned char *, unsigned char *, void *); - unsigned char addr_buf[4], netmask_buf[4]; - - if (dev->netdev_ops->ndo_open != uml_net_open) - return NOTIFY_DONE; - - lp = netdev_priv(dev); - - proc = NULL; - switch (event) { - case NETDEV_UP: - proc = lp->add_address; - break; - case NETDEV_DOWN: - proc = lp->delete_address; - break; - } - if (proc != NULL) { - memcpy(addr_buf, &ifa->ifa_address, sizeof(addr_buf)); - memcpy(netmask_buf, &ifa->ifa_mask, sizeof(netmask_buf)); - (*proc)(addr_buf, netmask_buf, &lp->user); - } - return NOTIFY_DONE; -} - -/* uml_net_init shouldn't be called twice on two CPUs at the same time */ -static struct notifier_block uml_inetaddr_notifier = { - .notifier_call = uml_inetaddr_event, -}; - -static void inet_register(void) -{ - struct list_head *ele; - struct uml_net_private *lp; - struct in_device *ip; - struct in_ifaddr *in; - - register_inetaddr_notifier(¨_inetaddr_notifier); - - /* Devices may have been opened already, so the uml_inetaddr_notifier - * didn't get a chance to run for them. This fakes it so that - * addresses which have already been set up get handled properly. - */ - spin_lock(&opened_lock); - list_for_each(ele, &opened) { - lp = list_entry(ele, struct uml_net_private, list); - ip = lp->dev->ip_ptr; - if (ip == NULL) - continue; - in = ip->ifa_list; - while (in != NULL) { - uml_inetaddr_event(NULL, NETDEV_UP, in); - in = in->ifa_next; - } - } - spin_unlock(&opened_lock); -} -#else -static inline void inet_register(void) -{ -} -#endif - -static int uml_net_init(void) -{ - mconsole_register_dev(&net_mc); - inet_register(); - return 0; -} - -__initcall(uml_net_init); - -static void close_devices(void) -{ - struct list_head *ele; - struct uml_net_private *lp; - - spin_lock(&opened_lock); - list_for_each(ele, &opened) { - lp = list_entry(ele, struct uml_net_private, list); - um_free_irq(lp->dev->irq, lp->dev); - if ((lp->close != NULL) && (lp->fd >= 0)) - (*lp->close)(lp->fd, &lp->user); - if (lp->remove != NULL) - (*lp->remove)(&lp->user); - } - spin_unlock(&opened_lock); -} - -__uml_exitcall(close_devices); - -void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *, - void *), - void *arg) -{ - struct net_device *dev = d; - struct in_device *ip = dev->ip_ptr; - struct in_ifaddr *in; - unsigned char address[4], netmask[4]; - - if (ip == NULL) return; - in = ip->ifa_list; - while (in != NULL) { - memcpy(address, &in->ifa_address, sizeof(address)); - memcpy(netmask, &in->ifa_mask, sizeof(netmask)); - (*cb)(address, netmask, arg); - in = in->ifa_next; - } -} - -int dev_netmask(void *d, void *m) -{ - struct net_device *dev = d; - struct in_device *ip = dev->ip_ptr; - struct in_ifaddr *in; - __be32 *mask_out = m; - - if (ip == NULL) - return 1; - - in = ip->ifa_list; - if (in == NULL) - return 1; - - *mask_out = in->ifa_mask; - return 0; -} - -void *get_output_buffer(int *len_out) -{ - void *ret; - - ret = (void *) __get_free_pages(GFP_KERNEL, 0); - if (ret) *len_out = PAGE_SIZE; - else *len_out = 0; - return ret; -} - -void free_output_buffer(void *buffer) -{ - free_pages((unsigned long) buffer, 0); -} - -int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out, - char **gate_addr) -{ - char *remain; - - remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL); - if (remain != NULL) { - printk(KERN_ERR "tap_setup_common - Extra garbage on " - "specification : '%s'\n", remain); - return 1; - } - - return 0; -} - -unsigned short eth_protocol(struct sk_buff *skb) -{ - return eth_type_trans(skb, skb->dev); -} diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c deleted file mode 100644 index e9f8445861dc..000000000000 --- a/arch/um/drivers/net_user.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#include <stdio.h> -#include <unistd.h> -#include <stdarg.h> -#include <errno.h> -#include <stddef.h> -#include <string.h> -#include <sys/socket.h> -#include <sys/wait.h> -#include <net_user.h> -#include <os.h> -#include <um_malloc.h> - -int tap_open_common(void *dev, char *gate_addr) -{ - int tap_addr[4]; - - if (gate_addr == NULL) - return 0; - if (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], - &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4) { - printk(UM_KERN_ERR "Invalid tap IP address - '%s'\n", - gate_addr); - return -EINVAL; - } - return 0; -} - -void tap_check_ips(char *gate_addr, unsigned char *eth_addr) -{ - int tap_addr[4]; - - if ((gate_addr != NULL) && - (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], - &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) && - (eth_addr[0] == tap_addr[0]) && - (eth_addr[1] == tap_addr[1]) && - (eth_addr[2] == tap_addr[2]) && - (eth_addr[3] == tap_addr[3])) { - printk(UM_KERN_ERR "The tap IP address and the UML eth IP " - "address must be different\n"); - } -} - -/* Do reliable error handling as this fails frequently enough. */ -void read_output(int fd, char *output, int len) -{ - int remain, ret, expected; - char c; - char *str; - - if (output == NULL) { - output = &c; - len = sizeof(c); - } - - *output = '\0'; - ret = read(fd, &remain, sizeof(remain)); - - if (ret != sizeof(remain)) { - if (ret < 0) - ret = -errno; - expected = sizeof(remain); - str = "length"; - goto err; - } - - while (remain != 0) { - expected = (remain < len) ? remain : len; - ret = read(fd, output, expected); - if (ret != expected) { - if (ret < 0) - ret = -errno; - str = "data"; - goto err; - } - remain -= ret; - } - - return; - -err: - if (ret < 0) - printk(UM_KERN_ERR "read_output - read of %s failed, " - "errno = %d\n", str, -ret); - else - printk(UM_KERN_ERR "read_output - read of %s failed, read only " - "%d of %d bytes\n", str, ret, expected); -} - -int net_read(int fd, void *buf, int len) -{ - int n; - - n = read(fd, buf, len); - - if ((n < 0) && (errno == EAGAIN)) - return 0; - else if (n == 0) - return -ENOTCONN; - return n; -} - -int net_recvfrom(int fd, void *buf, int len) -{ - int n; - - CATCH_EINTR(n = recvfrom(fd, buf, len, 0, NULL, NULL)); - if (n < 0) { - if (errno == EAGAIN) - return 0; - return -errno; - } - else if (n == 0) - return -ENOTCONN; - return n; -} - -int net_write(int fd, void *buf, int len) -{ - int n; - - n = write(fd, buf, len); - - if ((n < 0) && (errno == EAGAIN)) - return 0; - else if (n == 0) - return -ENOTCONN; - return n; -} - -int net_send(int fd, void *buf, int len) -{ - int n; - - CATCH_EINTR(n = send(fd, buf, len, 0)); - if (n < 0) { - if (errno == EAGAIN) - return 0; - return -errno; - } - else if (n == 0) - return -ENOTCONN; - return n; -} - -int net_sendto(int fd, void *buf, int len, void *to, int sock_len) -{ - int n; - - CATCH_EINTR(n = sendto(fd, buf, len, 0, (struct sockaddr *) to, - sock_len)); - if (n < 0) { - if (errno == EAGAIN) - return 0; - return -errno; - } - else if (n == 0) - return -ENOTCONN; - return n; -} - -struct change_pre_exec_data { - int close_me; - int stdout_fd; -}; - -static void change_pre_exec(void *arg) -{ - struct change_pre_exec_data *data = arg; - - close(data->close_me); - dup2(data->stdout_fd, 1); -} - -static int change_tramp(char **argv, char *output, int output_len) -{ - int pid, fds[2], err; - struct change_pre_exec_data pe_data; - - err = os_pipe(fds, 1, 0); - if (err < 0) { - printk(UM_KERN_ERR "change_tramp - pipe failed, err = %d\n", - -err); - return err; - } - pe_data.close_me = fds[0]; - pe_data.stdout_fd = fds[1]; - pid = run_helper(change_pre_exec, &pe_data, argv); - - if (pid > 0) /* Avoid hang as we won't get data in failure case. */ - read_output(fds[0], output, output_len); - - close(fds[0]); - close(fds[1]); - - if (pid > 0) - helper_wait(pid); - return pid; -} - -static void change(char *dev, char *what, unsigned char *addr, - unsigned char *netmask) -{ - char addr_buf[sizeof("255.255.255.255\0")]; - char netmask_buf[sizeof("255.255.255.255\0")]; - char version[sizeof("nnnnn\0")]; - char *argv[] = { "uml_net", version, what, dev, addr_buf, - netmask_buf, NULL }; - char *output; - int output_len, pid; - - sprintf(version, "%d", UML_NET_VERSION); - sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]); - sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1], - netmask[2], netmask[3]); - - output_len = UM_KERN_PAGE_SIZE; - output = uml_kmalloc(output_len, UM_GFP_KERNEL); - if (output == NULL) - printk(UM_KERN_ERR "change : failed to allocate output " - "buffer\n"); - - pid = change_tramp(argv, output, output_len); - if (pid < 0) { - kfree(output); - return; - } - - if (output != NULL) { - printk("%s", output); - kfree(output); - } -} - -void open_addr(unsigned char *addr, unsigned char *netmask, void *arg) -{ - change(arg, "add", addr, netmask); -} - -void close_addr(unsigned char *addr, unsigned char *netmask, void *arg) -{ - change(arg, "del", addr, netmask); -} - -char *split_if_spec(char *str, ...) -{ - char **arg, *end, *ret = NULL; - va_list ap; - - va_start(ap, str); - while ((arg = va_arg(ap, char **)) != NULL) { - if (*str == '\0') - goto out; - end = strchr(str, ','); - if (end != str) - *arg = str; - if (end == NULL) - goto out; - *end++ = '\0'; - str = end; - } - ret = str; -out: - va_end(ap); - return ret; -} diff --git a/arch/um/drivers/null.c b/arch/um/drivers/null.c index 10495747ce8e..30d59b8481b4 100644 --- a/arch/um/drivers/null.c +++ b/arch/um/drivers/null.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) - * Licensed under the GPL */ #include <stddef.h> @@ -28,7 +28,7 @@ static int null_open(int input, int output, int primary, void *d, return (fd < 0) ? -errno : fd; } -static int null_read(int fd, char *c_out, void *unused) +static int null_read(int fd, __u8 *c_out, void *unused) { return -ENODEV; } diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c deleted file mode 100644 index be0fb57bd1d7..000000000000 --- a/arch/um/drivers/pcap_kern.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL. - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include <net_kern.h> -#include "pcap_user.h" - -struct pcap_init { - char *host_if; - int promisc; - int optimize; - char *filter; -}; - -void pcap_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct pcap_data *ppri; - struct pcap_init *init = data; - - pri = netdev_priv(dev); - ppri = (struct pcap_data *) pri->user; - ppri->host_if = init->host_if; - ppri->promisc = init->promisc; - ppri->optimize = init->optimize; - ppri->filter = init->filter; - - printk("pcap backend, host interface %s\n", ppri->host_if); -} - -static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return pcap_user_read(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER, - (struct pcap_data *) &lp->user); -} - -static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return -EPERM; -} - -static const struct net_kern_info pcap_kern_info = { - .init = pcap_init, - .protocol = eth_protocol, - .read = pcap_read, - .write = pcap_write, -}; - -int pcap_setup(char *str, char **mac_out, void *data) -{ - struct pcap_init *init = data; - char *remain, *host_if = NULL, *options[2] = { NULL, NULL }; - int i; - - *init = ((struct pcap_init) - { .host_if = "eth0", - .promisc = 1, - .optimize = 0, - .filter = NULL }); - - remain = split_if_spec(str, &host_if, &init->filter, - &options[0], &options[1], mac_out, NULL); - if (remain != NULL) { - printk(KERN_ERR "pcap_setup - Extra garbage on " - "specification : '%s'\n", remain); - return 0; - } - - if (host_if != NULL) - init->host_if = host_if; - - for (i = 0; i < ARRAY_SIZE(options); i++) { - if (options[i] == NULL) - continue; - if (!strcmp(options[i], "promisc")) - init->promisc = 1; - else if (!strcmp(options[i], "nopromisc")) - init->promisc = 0; - else if (!strcmp(options[i], "optimize")) - init->optimize = 1; - else if (!strcmp(options[i], "nooptimize")) - init->optimize = 0; - else { - printk(KERN_ERR "pcap_setup : bad option - '%s'\n", - options[i]); - return 0; - } - } - - return 1; -} - -static struct transport pcap_transport = { - .list = LIST_HEAD_INIT(pcap_transport.list), - .name = "pcap", - .setup = pcap_setup, - .user = &pcap_user_info, - .kern = &pcap_kern_info, - .private_size = sizeof(struct pcap_data), - .setup_size = sizeof(struct pcap_init), -}; - -static int register_pcap(void) -{ - register_transport(&pcap_transport); - return 0; -} - -late_initcall(register_pcap); diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c deleted file mode 100644 index c07b9c752c86..000000000000 --- a/arch/um/drivers/pcap_user.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL. - */ - -#include <errno.h> -#include <pcap.h> -#include <string.h> -#include <asm/types.h> -#include <net_user.h> -#include "pcap_user.h" -#include <um_malloc.h> - -#define PCAP_FD(p) (*(int *)(p)) - -static int pcap_user_init(void *data, void *dev) -{ - struct pcap_data *pri = data; - pcap_t *p; - char errors[PCAP_ERRBUF_SIZE]; - - p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER, - pri->promisc, 0, errors); - if (p == NULL) { - printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - " - "'%s'\n", errors); - return -EINVAL; - } - - pri->dev = dev; - pri->pcap = p; - return 0; -} - -static int pcap_open(void *data) -{ - struct pcap_data *pri = data; - __u32 netmask; - int err; - - if (pri->pcap == NULL) - return -ENODEV; - - if (pri->filter != NULL) { - err = dev_netmask(pri->dev, &netmask); - if (err < 0) { - printk(UM_KERN_ERR "pcap_open : dev_netmask failed\n"); - return -EIO; - } - - pri->compiled = uml_kmalloc(sizeof(struct bpf_program), - UM_GFP_KERNEL); - if (pri->compiled == NULL) { - printk(UM_KERN_ERR "pcap_open : kmalloc failed\n"); - return -ENOMEM; - } - - err = pcap_compile(pri->pcap, - (struct bpf_program *) pri->compiled, - pri->filter, pri->optimize, netmask); - if (err < 0) { - printk(UM_KERN_ERR "pcap_open : pcap_compile failed - " - "'%s'\n", pcap_geterr(pri->pcap)); - goto out; - } - - err = pcap_setfilter(pri->pcap, pri->compiled); - if (err < 0) { - printk(UM_KERN_ERR "pcap_open : pcap_setfilter " - "failed - '%s'\n", pcap_geterr(pri->pcap)); - goto out; - } - } - - return PCAP_FD(pri->pcap); - - out: - kfree(pri->compiled); - return -EIO; -} - -static void pcap_remove(void *data) -{ - struct pcap_data *pri = data; - - if (pri->compiled != NULL) - pcap_freecode(pri->compiled); - - if (pri->pcap != NULL) - pcap_close(pri->pcap); -} - -struct pcap_handler_data { - char *buffer; - int len; -}; - -static void handler(u_char *data, const struct pcap_pkthdr *header, - const u_char *packet) -{ - int len; - - struct pcap_handler_data *hdata = (struct pcap_handler_data *) data; - - len = hdata->len < header->caplen ? hdata->len : header->caplen; - memcpy(hdata->buffer, packet, len); - hdata->len = len; -} - -int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri) -{ - struct pcap_handler_data hdata = ((struct pcap_handler_data) - { .buffer = buffer, - .len = len }); - int n; - - n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata); - if (n < 0) { - printk(UM_KERN_ERR "pcap_dispatch failed - %s\n", - pcap_geterr(pri->pcap)); - return -EIO; - } - else if (n == 0) - return 0; - return hdata.len; -} - -const struct net_user_info pcap_user_info = { - .init = pcap_user_init, - .open = pcap_open, - .close = NULL, - .remove = pcap_remove, - .add_address = NULL, - .delete_address = NULL, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h deleted file mode 100644 index 1ca7c764cc63..000000000000 --- a/arch/um/drivers/pcap_user.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <net_user.h> - -struct pcap_data { - char *host_if; - int promisc; - int optimize; - char *filter; - void *compiled; - void *pcap; - void *dev; -}; - -extern const struct net_user_info pcap_user_info; - -extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); - diff --git a/arch/um/drivers/port.h b/arch/um/drivers/port.h index 372a80c0556a..9085b336e683 100644 --- a/arch/um/drivers/port.h +++ b/arch/um/drivers/port.h @@ -1,6 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ #ifndef __PORT_H__ diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c index b0e9ff35daee..a4508470df78 100644 --- a/arch/um/drivers/port_kern.c +++ b/arch/um/drivers/port_kern.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) - * Licensed under the GPL */ #include <linux/completion.h> @@ -45,15 +45,17 @@ struct connection { static irqreturn_t pipe_interrupt(int irq, void *data) { struct connection *conn = data; - int fd; + int n_fds = 1, fd = -1; + ssize_t ret; - fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); - if (fd < 0) { - if (fd == -EAGAIN) + ret = os_rcv_fd_msg(conn->socket[0], &fd, n_fds, &conn->helper_pid, + sizeof(conn->helper_pid)); + if (ret != sizeof(conn->helper_pid)) { + if (ret == -EAGAIN) return IRQ_NONE; - printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", - -fd); + printk(KERN_ERR "pipe_interrupt : os_rcv_fd_msg returned %zd\n", + ret); os_close_file(conn->fd); } @@ -100,7 +102,7 @@ static int port_accept(struct port_list *port) .port = port }); if (um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt, - IRQF_SHARED, "telnetd", conn)) { + IRQF_SHARED, "telnetd", conn) < 0) { printk(KERN_ERR "port_accept : failed to get IRQ for " "telnetd\n"); goto out_free; @@ -144,7 +146,7 @@ static void port_work_proc(struct work_struct *unused) local_irq_restore(flags); } -DECLARE_WORK(port_work, port_work_proc); +static DECLARE_WORK(port_work, port_work_proc); static irqreturn_t port_interrupt(int irq, void *data) { @@ -182,7 +184,7 @@ void *port_data(int port_num) } if (um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt, - IRQF_SHARED, "port", port)) { + IRQF_SHARED, "port", port) < 0) { printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num); goto out_close; } diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c index 5f56d11b886f..3c62ae81df62 100644 --- a/arch/um/drivers/port_user.c +++ b/arch/um/drivers/port_user.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) - * Licensed under the GPL */ #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <errno.h> #include <termios.h> #include <unistd.h> @@ -167,14 +168,29 @@ static void port_pre_exec(void *arg) int port_connection(int fd, int *socket, int *pid_out) { int new, err; - char *argv[] = { "/usr/sbin/in.telnetd", "-L", + char *env; + char *argv[] = { "in.telnetd", "-L", OS_LIB_PATH "/uml/port-helper", NULL }; struct port_pre_exec_data data; + if ((env = getenv("UML_PORT_HELPER"))) + argv[2] = env; + new = accept(fd, NULL, 0); if (new < 0) return -errno; + err = os_access(argv[2], X_OK); + if (err < 0) { + printk(UM_KERN_ERR "port_connection : error accessing port-helper " + "executable at %s: %s\n", argv[2], strerror(-err)); + if (env == NULL) + printk(UM_KERN_ERR "Set UML_PORT_HELPER environment " + "variable to path to uml-utilities port-helper " + "binary\n"); + goto out_close; + } + err = os_pipe(socket, 0, 0); if (err < 0) goto out_close; diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c index f1fcc2cedb5e..39c60068cfdf 100644 --- a/arch/um/drivers/pty.c +++ b/arch/um/drivers/pty.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #include <stdio.h> diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 1d5d3057e6f1..ca08c91f47a3 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -11,6 +11,7 @@ #include <linux/fs.h> #include <linux/interrupt.h> #include <linux/miscdevice.h> +#include <linux/hw_random.h> #include <linux/delay.h> #include <linux/uaccess.h> #include <init.h> @@ -18,100 +19,45 @@ #include <os.h> /* - * core module and version information + * core module information */ -#define RNG_VERSION "1.0.0" #define RNG_MODULE_NAME "hw_random" -#define RNG_MISCDEV_MINOR 183 /* official */ - /* Changed at init time, in the non-modular case, and at module load * time, in the module case. Presumably, the module subsystem * protects against a module being loaded twice at the same time. */ static int random_fd = -1; -static DECLARE_WAIT_QUEUE_HEAD(host_read_wait); +static struct hwrng hwrng; +static DECLARE_COMPLETION(have_data); -static int rng_dev_open (struct inode *inode, struct file *filp) +static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block) { - /* enforce read-only access to this chrdev */ - if ((filp->f_mode & FMODE_READ) == 0) - return -EINVAL; - if ((filp->f_mode & FMODE_WRITE) != 0) - return -EINVAL; - - return 0; -} - -static atomic_t host_sleep_count = ATOMIC_INIT(0); - -static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, - loff_t *offp) -{ - u32 data; - int n, ret = 0, have_data; - - while (size) { - n = os_read_file(random_fd, &data, sizeof(data)); - if (n > 0) { - have_data = n; - while (have_data && size) { - if (put_user((u8) data, buf++)) { - ret = ret ? : -EFAULT; - break; - } - size--; - ret++; - have_data--; - data >>= 8; - } - } - else if (n == -EAGAIN) { - DECLARE_WAITQUEUE(wait, current); + int ret; - if (filp->f_flags & O_NONBLOCK) - return ret ? : -EAGAIN; - - atomic_inc(&host_sleep_count); + for (;;) { + ret = os_read_file(random_fd, buf, max); + if (block && ret == -EAGAIN) { add_sigio_fd(random_fd); - add_wait_queue(&host_read_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); + ret = wait_for_completion_killable(&have_data); - schedule(); - remove_wait_queue(&host_read_wait, &wait); + ignore_sigio_fd(random_fd); + deactivate_fd(random_fd, RANDOM_IRQ); - if (atomic_dec_and_test(&host_sleep_count)) { - ignore_sigio_fd(random_fd); - deactivate_fd(random_fd, RANDOM_IRQ); - } + if (ret < 0) + break; + } else { + break; } - else - return n; - - if (signal_pending (current)) - return ret ? : -ERESTARTSYS; } - return ret; -} - -static const struct file_operations rng_chrdev_ops = { - .owner = THIS_MODULE, - .open = rng_dev_open, - .read = rng_dev_read, - .llseek = noop_llseek, -}; -/* rng_init shouldn't be called more than once at boot time */ -static struct miscdevice rng_miscdev = { - RNG_MISCDEV_MINOR, - RNG_MODULE_NAME, - &rng_chrdev_ops, -}; + return ret != -EAGAIN ? ret : 0; +} static irqreturn_t random_interrupt(int irq, void *data) { - wake_up(&host_read_wait); + complete(&have_data); return IRQ_HANDLED; } @@ -128,18 +74,18 @@ static int __init rng_init (void) goto out; random_fd = err; - err = um_request_irq(RANDOM_IRQ, random_fd, IRQ_READ, random_interrupt, 0, "random", NULL); - if (err) + if (err < 0) goto err_out_cleanup_hw; - sigio_broken(random_fd, 1); + sigio_broken(); + hwrng.name = RNG_MODULE_NAME; + hwrng.read = rng_dev_read; - err = misc_register (&rng_miscdev); + err = hwrng_register(&hwrng); if (err) { - printk (KERN_ERR RNG_MODULE_NAME ": misc device register " - "failed\n"); + pr_err(RNG_MODULE_NAME " registering failed (%d)\n", err); goto err_out_cleanup_hw; } out: @@ -163,8 +109,8 @@ static void cleanup(void) static void __exit rng_cleanup(void) { + hwrng_unregister(&hwrng); os_close_file(random_fd); - misc_deregister (&rng_miscdev); } module_init (rng_init); diff --git a/arch/um/drivers/rtc.h b/arch/um/drivers/rtc.h new file mode 100644 index 000000000000..95e41c7d35c4 --- /dev/null +++ b/arch/um/drivers/rtc.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#ifndef __UM_RTC_H__ +#define __UM_RTC_H__ + +int uml_rtc_start(bool timetravel); +int uml_rtc_enable_alarm(unsigned long long delta_seconds); +void uml_rtc_disable_alarm(void); +void uml_rtc_stop(bool timetravel); +void uml_rtc_send_timetravel_alarm(void); + +#endif /* __UM_RTC_H__ */ diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c new file mode 100644 index 000000000000..9158c936c128 --- /dev/null +++ b/arch/um/drivers/rtc_kern.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <linux/platform_device.h> +#include <linux/time-internal.h> +#include <linux/suspend.h> +#include <linux/err.h> +#include <linux/rtc.h> +#include <kern_util.h> +#include <irq_kern.h> +#include <os.h> +#include "rtc.h" + +static time64_t uml_rtc_alarm_time; +static bool uml_rtc_alarm_enabled; +static struct rtc_device *uml_rtc; +static int uml_rtc_irq_fd, uml_rtc_irq; + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + +static void uml_rtc_time_travel_alarm(struct time_travel_event *ev) +{ + uml_rtc_send_timetravel_alarm(); +} + +static struct time_travel_event uml_rtc_alarm_event = { + .fn = uml_rtc_time_travel_alarm, +}; +#endif + +static int uml_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct timespec64 ts; + + /* Use this to get correct time in time-travel mode */ + read_persistent_clock64(&ts); + rtc_time64_to_tm(timespec64_to_ktime(ts) / NSEC_PER_SEC, tm); + + return 0; +} + +static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + rtc_time64_to_tm(uml_rtc_alarm_time, &alrm->time); + alrm->enabled = uml_rtc_alarm_enabled; + + return 0; +} + +static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) +{ + struct timespec64 ts; + unsigned long long secs; + + if (!enable && !uml_rtc_alarm_enabled) + return 0; + + uml_rtc_alarm_enabled = enable; + + read_persistent_clock64(&ts); + secs = uml_rtc_alarm_time - ts.tv_sec; + + if (time_travel_mode == TT_MODE_OFF) { + if (!enable) { + uml_rtc_disable_alarm(); + return 0; + } + + /* enable or update */ + return uml_rtc_enable_alarm(secs); + } else { + time_travel_del_event(¨_rtc_alarm_event); + + if (enable) + time_travel_add_event_rel(¨_rtc_alarm_event, + secs * NSEC_PER_SEC - + ts.tv_nsec); + } + + return 0; +} + +static int uml_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + uml_rtc_alarm_irq_enable(dev, 0); + uml_rtc_alarm_time = rtc_tm_to_time64(&alrm->time); + uml_rtc_alarm_irq_enable(dev, alrm->enabled); + + return 0; +} + +static const struct rtc_class_ops uml_rtc_ops = { + .read_time = uml_rtc_read_time, + .read_alarm = uml_rtc_read_alarm, + .alarm_irq_enable = uml_rtc_alarm_irq_enable, + .set_alarm = uml_rtc_set_alarm, +}; + +static irqreturn_t uml_rtc_interrupt(int irq, void *data) +{ + unsigned long long c = 0; + + /* alarm triggered, it's now off */ + uml_rtc_alarm_enabled = false; + + os_read_file(uml_rtc_irq_fd, &c, sizeof(c)); + WARN_ON(c == 0); + + pm_system_wakeup(); + rtc_update_irq(uml_rtc, 1, RTC_IRQF | RTC_AF); + + return IRQ_HANDLED; +} + +static int uml_rtc_setup(void) +{ + int err; + + err = uml_rtc_start(time_travel_mode != TT_MODE_OFF); + if (WARN(err < 0, "err = %d\n", err)) + return err; + + uml_rtc_irq_fd = err; + + err = um_request_irq(UM_IRQ_ALLOC, uml_rtc_irq_fd, IRQ_READ, + uml_rtc_interrupt, 0, "rtc", NULL); + if (err < 0) { + uml_rtc_stop(time_travel_mode != TT_MODE_OFF); + return err; + } + + irq_set_irq_wake(err, 1); + + uml_rtc_irq = err; + return 0; +} + +static void uml_rtc_cleanup(void) +{ + um_free_irq(uml_rtc_irq, NULL); + uml_rtc_stop(time_travel_mode != TT_MODE_OFF); +} + +static int uml_rtc_probe(struct platform_device *pdev) +{ + int err; + + err = uml_rtc_setup(); + if (err) + return err; + + uml_rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(uml_rtc)) { + err = PTR_ERR(uml_rtc); + goto cleanup; + } + + uml_rtc->ops = ¨_rtc_ops; + + device_init_wakeup(&pdev->dev, 1); + + err = devm_rtc_register_device(uml_rtc); + if (err) + goto cleanup; + + return 0; +cleanup: + uml_rtc_cleanup(); + return err; +} + +static void uml_rtc_remove(struct platform_device *pdev) +{ + device_init_wakeup(&pdev->dev, 0); + uml_rtc_cleanup(); +} + +static struct platform_driver uml_rtc_driver = { + .probe = uml_rtc_probe, + .remove = uml_rtc_remove, + .driver = { + .name = "uml-rtc", + }, +}; + +static int __init uml_rtc_init(void) +{ + struct platform_device *pdev; + int err; + + err = platform_driver_register(¨_rtc_driver); + if (err) + return err; + + pdev = platform_device_alloc("uml-rtc", 0); + if (!pdev) { + err = -ENOMEM; + goto unregister; + } + + err = platform_device_add(pdev); + if (err) + goto unregister; + return 0; + +unregister: + platform_device_put(pdev); + platform_driver_unregister(¨_rtc_driver); + return err; +} +device_initcall(uml_rtc_init); diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c new file mode 100644 index 000000000000..67912fcf7b28 --- /dev/null +++ b/arch/um/drivers/rtc_user.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <stdbool.h> +#include <os.h> +#include <errno.h> +#include <sched.h> +#include <unistd.h> +#include <kern_util.h> +#include <sys/select.h> +#include <stdio.h> +#include <sys/timerfd.h> +#include "rtc.h" + +static int uml_rtc_irq_fds[2]; + +void uml_rtc_send_timetravel_alarm(void) +{ + unsigned long long c = 1; + + CATCH_EINTR(write(uml_rtc_irq_fds[1], &c, sizeof(c))); +} + +int uml_rtc_start(bool timetravel) +{ + int err; + + if (timetravel) { + err = os_pipe(uml_rtc_irq_fds, 1, 1); + if (err) + goto fail; + } else { + uml_rtc_irq_fds[0] = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC); + if (uml_rtc_irq_fds[0] < 0) { + err = -errno; + goto fail; + } + + /* apparently timerfd won't send SIGIO, use workaround */ + sigio_broken(); + err = add_sigio_fd(uml_rtc_irq_fds[0]); + if (err < 0) { + close(uml_rtc_irq_fds[0]); + goto fail; + } + } + + return uml_rtc_irq_fds[0]; +fail: + uml_rtc_stop(timetravel); + return err; +} + +int uml_rtc_enable_alarm(unsigned long long delta_seconds) +{ + struct itimerspec it = { + .it_value = { + .tv_sec = delta_seconds, + }, + }; + + if (timerfd_settime(uml_rtc_irq_fds[0], 0, &it, NULL)) + return -errno; + return 0; +} + +void uml_rtc_disable_alarm(void) +{ + uml_rtc_enable_alarm(0); +} + +void uml_rtc_stop(bool timetravel) +{ + if (timetravel) + os_close_file(uml_rtc_irq_fds[1]); + else + ignore_sigio_fd(uml_rtc_irq_fds[0]); + os_close_file(uml_rtc_irq_fds[0]); +} diff --git a/arch/um/drivers/slip.h b/arch/um/drivers/slip.h deleted file mode 100644 index 0f3b7ca99465..000000000000 --- a/arch/um/drivers/slip.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_SLIP_H -#define __UM_SLIP_H - -#include "slip_common.h" - -struct slip_data { - void *dev; - char name[sizeof("slnnnnn\0")]; - char *addr; - char *gate_addr; - int slave; - struct slip_proto slip; -}; - -extern const struct net_user_info slip_user_info; - -extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri); -extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri); - -#endif diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c deleted file mode 100644 index 20fe4f42743d..000000000000 --- a/arch/um/drivers/slip_common.c +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <string.h> -#include "slip_common.h" -#include <net_user.h> - -int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip) -{ - int i, n, size, start; - - if(slip->more > 0){ - i = 0; - while(i < slip->more){ - size = slip_unesc(slip->ibuf[i++], slip->ibuf, - &slip->pos, &slip->esc); - if(size){ - memcpy(buf, slip->ibuf, size); - memmove(slip->ibuf, &slip->ibuf[i], - slip->more - i); - slip->more = slip->more - i; - return size; - } - } - slip->more = 0; - } - - n = net_read(fd, &slip->ibuf[slip->pos], - sizeof(slip->ibuf) - slip->pos); - if(n <= 0) - return n; - - start = slip->pos; - for(i = 0; i < n; i++){ - size = slip_unesc(slip->ibuf[start + i], slip->ibuf,&slip->pos, - &slip->esc); - if(size){ - memcpy(buf, slip->ibuf, size); - memmove(slip->ibuf, &slip->ibuf[start+i+1], - n - (i + 1)); - slip->more = n - (i + 1); - return size; - } - } - return 0; -} - -int slip_proto_write(int fd, void *buf, int len, struct slip_proto *slip) -{ - int actual, n; - - actual = slip_esc(buf, slip->obuf, len); - n = net_write(fd, slip->obuf, actual); - if(n < 0) - return n; - else return len; -} diff --git a/arch/um/drivers/slip_common.h b/arch/um/drivers/slip_common.h deleted file mode 100644 index d3798b5caf7f..000000000000 --- a/arch/um/drivers/slip_common.h +++ /dev/null @@ -1,106 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_SLIP_COMMON_H -#define __UM_SLIP_COMMON_H - -#define BUF_SIZE 1500 - /* two bytes each for a (pathological) max packet of escaped chars + * - * terminating END char + initial END char */ -#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) - -/* SLIP protocol characters. */ -#define SLIP_END 0300 /* indicates end of frame */ -#define SLIP_ESC 0333 /* indicates byte stuffing */ -#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */ -#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ - -static inline int slip_unesc(unsigned char c, unsigned char *buf, int *pos, - int *esc) -{ - int ret; - - switch(c){ - case SLIP_END: - *esc = 0; - ret=*pos; - *pos=0; - return(ret); - case SLIP_ESC: - *esc = 1; - return(0); - case SLIP_ESC_ESC: - if(*esc){ - *esc = 0; - c = SLIP_ESC; - } - break; - case SLIP_ESC_END: - if(*esc){ - *esc = 0; - c = SLIP_END; - } - break; - } - buf[(*pos)++] = c; - return(0); -} - -static inline int slip_esc(unsigned char *s, unsigned char *d, int len) -{ - unsigned char *ptr = d; - unsigned char c; - - /* - * Send an initial END character to flush out any - * data that may have accumulated in the receiver - * due to line noise. - */ - - *ptr++ = SLIP_END; - - /* - * For each byte in the packet, send the appropriate - * character sequence, according to the SLIP protocol. - */ - - while (len-- > 0) { - switch(c = *s++) { - case SLIP_END: - *ptr++ = SLIP_ESC; - *ptr++ = SLIP_ESC_END; - break; - case SLIP_ESC: - *ptr++ = SLIP_ESC; - *ptr++ = SLIP_ESC_ESC; - break; - default: - *ptr++ = c; - break; - } - } - *ptr++ = SLIP_END; - return (ptr - d); -} - -struct slip_proto { - unsigned char ibuf[ENC_BUF_SIZE]; - unsigned char obuf[ENC_BUF_SIZE]; - int more; /* more data: do not read fd until ibuf has been drained */ - int pos; - int esc; -}; - -static inline void slip_proto_init(struct slip_proto * slip) -{ - memset(slip->ibuf, 0, sizeof(slip->ibuf)); - memset(slip->obuf, 0, sizeof(slip->obuf)); - slip->more = 0; - slip->pos = 0; - slip->esc = 0; -} - -extern int slip_proto_read(int fd, void *buf, int len, - struct slip_proto *slip); -extern int slip_proto_write(int fd, void *buf, int len, - struct slip_proto *slip); - -#endif diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c deleted file mode 100644 index ed5249fc0574..000000000000 --- a/arch/um/drivers/slip_kern.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL. - */ - -#include <linux/if_arp.h> -#include <linux/init.h> -#include <linux/netdevice.h> -#include <net_kern.h> -#include "slip.h" - -struct slip_init { - char *gate_addr; -}; - -static void slip_init(struct net_device *dev, void *data) -{ - struct uml_net_private *private; - struct slip_data *spri; - struct slip_init *init = data; - - private = netdev_priv(dev); - spri = (struct slip_data *) private->user; - - memset(spri->name, 0, sizeof(spri->name)); - spri->addr = NULL; - spri->gate_addr = init->gate_addr; - spri->slave = -1; - spri->dev = dev; - - slip_proto_init(&spri->slip); - - dev->hard_header_len = 0; - dev->header_ops = NULL; - dev->addr_len = 0; - dev->type = ARPHRD_SLIP; - dev->tx_queue_len = 256; - dev->flags = IFF_NOARP; - printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr); -} - -static unsigned short slip_protocol(struct sk_buff *skbuff) -{ - return htons(ETH_P_IP); -} - -static int slip_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return slip_user_read(fd, skb_mac_header(skb), skb->dev->mtu, - (struct slip_data *) &lp->user); -} - -static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return slip_user_write(fd, skb->data, skb->len, - (struct slip_data *) &lp->user); -} - -static const struct net_kern_info slip_kern_info = { - .init = slip_init, - .protocol = slip_protocol, - .read = slip_read, - .write = slip_write, -}; - -static int slip_setup(char *str, char **mac_out, void *data) -{ - struct slip_init *init = data; - - *init = ((struct slip_init) { .gate_addr = NULL }); - - if (str[0] != '\0') - init->gate_addr = str; - return 1; -} - -static struct transport slip_transport = { - .list = LIST_HEAD_INIT(slip_transport.list), - .name = "slip", - .setup = slip_setup, - .user = &slip_user_info, - .kern = &slip_kern_info, - .private_size = sizeof(struct slip_data), - .setup_size = sizeof(struct slip_init), -}; - -static int register_slip(void) -{ - register_transport(&slip_transport); - return 0; -} - -late_initcall(register_slip); diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c deleted file mode 100644 index 0d6b66c64a81..000000000000 --- a/arch/um/drivers/slip_user.c +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <fcntl.h> -#include <string.h> -#include <sys/termios.h> -#include <sys/wait.h> -#include <net_user.h> -#include <os.h> -#include "slip.h" -#include <um_malloc.h> - -static int slip_user_init(void *data, void *dev) -{ - struct slip_data *pri = data; - - pri->dev = dev; - return 0; -} - -static int set_up_tty(int fd) -{ - int i; - struct termios tios; - - if (tcgetattr(fd, &tios) < 0) { - printk(UM_KERN_ERR "could not get initial terminal " - "attributes\n"); - return -1; - } - - tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL; - tios.c_iflag = IGNBRK | IGNPAR; - tios.c_oflag = 0; - tios.c_lflag = 0; - for (i = 0; i < NCCS; i++) - tios.c_cc[i] = 0; - tios.c_cc[VMIN] = 1; - tios.c_cc[VTIME] = 0; - - cfsetospeed(&tios, B38400); - cfsetispeed(&tios, B38400); - - if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) { - printk(UM_KERN_ERR "failed to set terminal attributes\n"); - return -1; - } - return 0; -} - -struct slip_pre_exec_data { - int stdin_fd; - int stdout_fd; - int close_me; -}; - -static void slip_pre_exec(void *arg) -{ - struct slip_pre_exec_data *data = arg; - - if (data->stdin_fd >= 0) - dup2(data->stdin_fd, 0); - dup2(data->stdout_fd, 1); - if (data->close_me >= 0) - close(data->close_me); -} - -static int slip_tramp(char **argv, int fd) -{ - struct slip_pre_exec_data pe_data; - char *output; - int pid, fds[2], err, output_len; - - err = os_pipe(fds, 1, 0); - if (err < 0) { - printk(UM_KERN_ERR "slip_tramp : pipe failed, err = %d\n", - -err); - goto out; - } - - err = 0; - pe_data.stdin_fd = fd; - pe_data.stdout_fd = fds[1]; - pe_data.close_me = fds[0]; - err = run_helper(slip_pre_exec, &pe_data, argv); - if (err < 0) - goto out_close; - pid = err; - - output_len = UM_KERN_PAGE_SIZE; - output = uml_kmalloc(output_len, UM_GFP_KERNEL); - if (output == NULL) { - printk(UM_KERN_ERR "slip_tramp : failed to allocate output " - "buffer\n"); - os_kill_process(pid, 1); - err = -ENOMEM; - goto out_close; - } - - close(fds[1]); - read_output(fds[0], output, output_len); - printk("%s", output); - - err = helper_wait(pid); - close(fds[0]); - - kfree(output); - return err; - -out_close: - close(fds[0]); - close(fds[1]); -out: - return err; -} - -static int slip_open(void *data) -{ - struct slip_data *pri = data; - char version_buf[sizeof("nnnnn\0")]; - char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; - char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, - NULL }; - int sfd, mfd, err; - - err = get_pty(); - if (err < 0) { - printk(UM_KERN_ERR "slip-open : Failed to open pty, err = %d\n", - -err); - goto out; - } - mfd = err; - - err = open(ptsname(mfd), O_RDWR, 0); - if (err < 0) { - printk(UM_KERN_ERR "Couldn't open tty for slip line, " - "err = %d\n", -err); - goto out_close; - } - sfd = err; - - if (set_up_tty(sfd)) - goto out_close2; - - pri->slave = sfd; - pri->slip.pos = 0; - pri->slip.esc = 0; - if (pri->gate_addr != NULL) { - sprintf(version_buf, "%d", UML_NET_VERSION); - strcpy(gate_buf, pri->gate_addr); - - err = slip_tramp(argv, sfd); - - if (err < 0) { - printk(UM_KERN_ERR "slip_tramp failed - err = %d\n", - -err); - goto out_close2; - } - err = os_get_ifname(pri->slave, pri->name); - if (err < 0) { - printk(UM_KERN_ERR "get_ifname failed, err = %d\n", - -err); - goto out_close2; - } - iter_addresses(pri->dev, open_addr, pri->name); - } - else { - err = os_set_slip(sfd); - if (err < 0) { - printk(UM_KERN_ERR "Failed to set slip discipline " - "encapsulation - err = %d\n", -err); - goto out_close2; - } - } - return mfd; -out_close2: - close(sfd); -out_close: - close(mfd); -out: - return err; -} - -static void slip_close(int fd, void *data) -{ - struct slip_data *pri = data; - char version_buf[sizeof("nnnnn\0")]; - char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name, - NULL }; - int err; - - if (pri->gate_addr != NULL) - iter_addresses(pri->dev, close_addr, pri->name); - - sprintf(version_buf, "%d", UML_NET_VERSION); - - err = slip_tramp(argv, pri->slave); - - if (err != 0) - printk(UM_KERN_ERR "slip_tramp failed - errno = %d\n", -err); - close(fd); - close(pri->slave); - pri->slave = -1; -} - -int slip_user_read(int fd, void *buf, int len, struct slip_data *pri) -{ - return slip_proto_read(fd, buf, len, &pri->slip); -} - -int slip_user_write(int fd, void *buf, int len, struct slip_data *pri) -{ - return slip_proto_write(fd, buf, len, &pri->slip); -} - -static void slip_add_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct slip_data *pri = data; - - if (pri->slave < 0) - return; - open_addr(addr, netmask, pri->name); -} - -static void slip_del_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct slip_data *pri = data; - - if (pri->slave < 0) - return; - close_addr(addr, netmask, pri->name); -} - -const struct net_user_info slip_user_info = { - .init = slip_user_init, - .open = slip_open, - .close = slip_close, - .remove = NULL, - .add_address = slip_add_addr, - .delete_address = slip_del_addr, - .mtu = BUF_SIZE, - .max_packet = BUF_SIZE, -}; diff --git a/arch/um/drivers/slirp.h b/arch/um/drivers/slirp.h deleted file mode 100644 index 4aef2b88249a..000000000000 --- a/arch/um/drivers/slirp.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_SLIRP_H -#define __UM_SLIRP_H - -#include "slip_common.h" - -#define SLIRP_MAX_ARGS 100 -/* - * XXX this next definition is here because I don't understand why this - * initializer doesn't work in slirp_kern.c: - * - * argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] }, - * - * or why I can't typecast like this: - * - * argv : (char* [SLIRP_MAX_ARGS])(init->argv), - */ -struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; }; - -struct slirp_data { - void *dev; - struct arg_list_dummy_wrapper argw; - int pid; - int slave; - struct slip_proto slip; -}; - -extern const struct net_user_info slirp_user_info; - -extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri); -extern int slirp_user_write(int fd, void *buf, int len, - struct slirp_data *pri); - -#endif diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c deleted file mode 100644 index 4ef11ca7cacf..000000000000 --- a/arch/um/drivers/slirp_kern.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL. - */ - -#include <linux/if_arp.h> -#include <linux/init.h> -#include <linux/netdevice.h> -#include <linux/string.h> -#include <net_kern.h> -#include <net_user.h> -#include "slirp.h" - -struct slirp_init { - struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */ -}; - -void slirp_init(struct net_device *dev, void *data) -{ - struct uml_net_private *private; - struct slirp_data *spri; - struct slirp_init *init = data; - int i; - - private = netdev_priv(dev); - spri = (struct slirp_data *) private->user; - - spri->argw = init->argw; - spri->pid = -1; - spri->slave = -1; - spri->dev = dev; - - slip_proto_init(&spri->slip); - - dev->hard_header_len = 0; - dev->header_ops = NULL; - dev->addr_len = 0; - dev->type = ARPHRD_SLIP; - dev->tx_queue_len = 256; - dev->flags = IFF_NOARP; - printk("SLIRP backend - command line:"); - for (i = 0; spri->argw.argv[i] != NULL; i++) - printk(" '%s'",spri->argw.argv[i]); - printk("\n"); -} - -static unsigned short slirp_protocol(struct sk_buff *skbuff) -{ - return htons(ETH_P_IP); -} - -static int slirp_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return slirp_user_read(fd, skb_mac_header(skb), skb->dev->mtu, - (struct slirp_data *) &lp->user); -} - -static int slirp_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return slirp_user_write(fd, skb->data, skb->len, - (struct slirp_data *) &lp->user); -} - -const struct net_kern_info slirp_kern_info = { - .init = slirp_init, - .protocol = slirp_protocol, - .read = slirp_read, - .write = slirp_write, -}; - -static int slirp_setup(char *str, char **mac_out, void *data) -{ - struct slirp_init *init = data; - int i=0; - - *init = ((struct slirp_init) { .argw = { { "slirp", NULL } } }); - - str = split_if_spec(str, mac_out, NULL); - - if (str == NULL) /* no command line given after MAC addr */ - return 1; - - do { - if (i >= SLIRP_MAX_ARGS - 1) { - printk(KERN_WARNING "slirp_setup: truncating slirp " - "arguments\n"); - break; - } - init->argw.argv[i++] = str; - while(*str && *str!=',') { - if (*str == '_') - *str=' '; - str++; - } - if (*str != ',') - break; - *str++ = '\0'; - } while (1); - - init->argw.argv[i] = NULL; - return 1; -} - -static struct transport slirp_transport = { - .list = LIST_HEAD_INIT(slirp_transport.list), - .name = "slirp", - .setup = slirp_setup, - .user = &slirp_user_info, - .kern = &slirp_kern_info, - .private_size = sizeof(struct slirp_data), - .setup_size = sizeof(struct slirp_init), -}; - -static int register_slirp(void) -{ - register_transport(&slirp_transport); - return 0; -} - -late_initcall(register_slirp); diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c deleted file mode 100644 index 98b6a41a254e..000000000000 --- a/arch/um/drivers/slirp_user.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL. - */ - -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <sys/wait.h> -#include <net_user.h> -#include <os.h> -#include "slirp.h" - -static int slirp_user_init(void *data, void *dev) -{ - struct slirp_data *pri = data; - - pri->dev = dev; - return 0; -} - -struct slirp_pre_exec_data { - int stdin_fd; - int stdout_fd; -}; - -static void slirp_pre_exec(void *arg) -{ - struct slirp_pre_exec_data *data = arg; - - if (data->stdin_fd != -1) - dup2(data->stdin_fd, 0); - if (data->stdout_fd != -1) - dup2(data->stdout_fd, 1); -} - -static int slirp_tramp(char **argv, int fd) -{ - struct slirp_pre_exec_data pe_data; - int pid; - - pe_data.stdin_fd = fd; - pe_data.stdout_fd = fd; - pid = run_helper(slirp_pre_exec, &pe_data, argv); - - return pid; -} - -static int slirp_open(void *data) -{ - struct slirp_data *pri = data; - int fds[2], pid, err; - - err = os_pipe(fds, 1, 1); - if (err) - return err; - - err = slirp_tramp(pri->argw.argv, fds[1]); - if (err < 0) { - printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err); - goto out; - } - pid = err; - - pri->slave = fds[1]; - pri->slip.pos = 0; - pri->slip.esc = 0; - pri->pid = err; - - return fds[0]; -out: - close(fds[0]); - close(fds[1]); - return err; -} - -static void slirp_close(int fd, void *data) -{ - struct slirp_data *pri = data; - int err; - - close(fd); - close(pri->slave); - - pri->slave = -1; - - if (pri->pid<1) { - printk(UM_KERN_ERR "slirp_close: no child process to shut " - "down\n"); - return; - } - -#if 0 - if (kill(pri->pid, SIGHUP)<0) { - printk(UM_KERN_ERR "slirp_close: sending hangup to %d failed " - "(%d)\n", pri->pid, errno); - } -#endif - err = helper_wait(pri->pid); - if (err < 0) - return; - - pri->pid = -1; -} - -int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri) -{ - return slip_proto_read(fd, buf, len, &pri->slip); -} - -int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri) -{ - return slip_proto_write(fd, buf, len, &pri->slip); -} - -const struct net_user_info slirp_user_info = { - .init = slirp_user_init, - .open = slirp_open, - .close = slirp_close, - .remove = NULL, - .add_address = NULL, - .delete_address = NULL, - .mtu = BUF_SIZE, - .max_packet = BUF_SIZE, -}; diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index b8d14fa52059..8006a5bd578c 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ #include <linux/fs.h> @@ -12,7 +12,6 @@ #include <linux/console.h> #include <asm/termbits.h> #include <asm/irq.h> -#include "ssl.h" #include "chan.h" #include <init.h> #include <irq_user.h> @@ -48,9 +47,7 @@ static struct line_driver driver = { .minor_start = 64, .type = TTY_DRIVER_TYPE_SERIAL, .subtype = 0, - .read_irq = SSL_IRQ, .read_irq_name = "ssl", - .write_irq = SSL_WRITE_IRQ, .write_irq_name = "ssl-write", .mc = { .list = LIST_HEAD_INIT(driver.mc.list), @@ -96,12 +93,10 @@ static const struct tty_operations ssl_ops = { .open = line_open, .close = line_close, .write = line_write, - .put_char = line_put_char, .write_room = line_write_room, .chars_in_buffer = line_chars_in_buffer, .flush_buffer = line_flush_buffer, .flush_chars = line_flush_chars, - .set_termios = line_set_termios, .throttle = line_throttle, .unthrottle = line_unthrottle, .install = ssl_install, @@ -111,7 +106,7 @@ static const struct tty_operations ssl_ops = { /* Changed by ssl_init and referenced by ssl_exit, which are both serialized * by being an initcall and exitcall, respectively. */ -static int ssl_init_done = 0; +static int ssl_init_done; static void ssl_console_write(struct console *c, const char *string, unsigned len) @@ -197,3 +192,14 @@ static int ssl_chan_setup(char *str) __setup("ssl", ssl_chan_setup); __channel_help(ssl_chan_setup, "ssl"); + +static int ssl_non_raw_setup(char *str) +{ + opts.raw = 0; + return 1; +} +__setup("ssl-non-raw", ssl_non_raw_setup); +__uml_help(ssl_non_raw_setup, +"ssl-non-raw\n" +" Set serial lines to non-raw mode.\n\n" +); diff --git a/arch/um/drivers/ssl.h b/arch/um/drivers/ssl.h deleted file mode 100644 index 314d17725ce6..000000000000 --- a/arch/um/drivers/ssl.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#ifndef __SSL_H__ -#define __SSL_H__ - -extern int ssl_read(int fd, int line); -extern void ssl_receive_char(int line, char ch); - -#endif - diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index c90817b04da9..1c239737d88e 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ #include <linux/posix_types.h> @@ -53,9 +53,7 @@ static struct line_driver driver = { .minor_start = 0, .type = TTY_DRIVER_TYPE_CONSOLE, .subtype = SYSTEM_TYPE_CONSOLE, - .read_irq = CONSOLE_IRQ, .read_irq_name = "console", - .write_irq = CONSOLE_WRITE_IRQ, .write_irq_name = "console-write", .mc = { .list = LIST_HEAD_INIT(driver.mc.list), @@ -90,7 +88,7 @@ static int con_remove(int n, char **error_out) } /* Set in an initcall, checked in an exitcall */ -static int con_init_done = 0; +static int con_init_done; static int con_install(struct tty_driver *driver, struct tty_struct *tty) { @@ -102,12 +100,10 @@ static const struct tty_operations console_ops = { .install = con_install, .close = line_close, .write = line_write, - .put_char = line_put_char, .write_room = line_write_room, .chars_in_buffer = line_chars_in_buffer, .flush_buffer = line_flush_buffer, .flush_chars = line_flush_chars, - .set_termios = line_set_termios, .throttle = line_throttle, .unthrottle = line_unthrottle, .hangup = line_hangup, diff --git a/arch/um/drivers/stdio_console.h b/arch/um/drivers/stdio_console.h index 6d8275f71fd4..3a409ec23d63 100644 --- a/arch/um/drivers/stdio_console.h +++ b/arch/um/drivers/stdio_console.h @@ -1,6 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ #ifndef __STDIO_CONSOLE_H diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c index eaa201bca5ed..884a762d21c7 100644 --- a/arch/um/drivers/tty.c +++ b/arch/um/drivers/tty.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) - * Licensed under the GPL */ #include <errno.h> diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h index cc1cc85f5afc..2985c14661f4 100644 --- a/arch/um/drivers/ubd.h +++ b/arch/um/drivers/ubd.h @@ -1,14 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com) - * Licensed under the GPL */ #ifndef __UM_UBD_USER_H #define __UM_UBD_USER_H -extern int start_io_thread(unsigned long sp, int *fds_out); -extern int io_thread(void *arg); +#include <os.h> + +int start_io_thread(struct os_helper_thread **td_out, int *fd_out); +void *io_thread(void *arg); extern int kernel_fd; extern int ubd_read_poll(int timeout); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index a4a41421c5e2..37455e74d314 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1,8 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2018 Cambridge Greys Ltd * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com) * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ /* 2001-09-28...2002-04-17 @@ -27,6 +27,7 @@ #include <linux/blk-mq.h> #include <linux/ata.h> #include <linux/hdreg.h> +#include <linux/major.h> #include <linux/cdrom.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -35,7 +36,6 @@ #include <linux/vmalloc.h> #include <linux/platform_device.h> #include <linux/scatterlist.h> -#include <asm/tlbflush.h> #include <kern_util.h> #include "mconsole_kern.h" #include <init.h> @@ -47,18 +47,25 @@ /* Max request size is determined by sector mask - 32K */ #define UBD_MAX_REQUEST (8 * sizeof(long)) +struct io_desc { + char *buffer; + unsigned long length; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; +}; + struct io_thread_req { struct request *req; int fds[2]; unsigned long offsets[2]; unsigned long long offset; - unsigned long length; - char *buffer; int sectorsize; - unsigned long sector_mask; - unsigned long long cow_offset; - unsigned long bitmap_words[2]; int error; + + int desc_cnt; + /* io_desc has to be the last element of the struct */ + struct io_desc io_desc[]; }; @@ -98,29 +105,20 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data) #define DRIVER_NAME "uml-blkdev" static DEFINE_MUTEX(ubd_lock); -static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */ -static int ubd_open(struct block_device *bdev, fmode_t mode); -static void ubd_release(struct gendisk *disk, fmode_t mode); -static int ubd_ioctl(struct block_device *bdev, fmode_t mode, +static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); -static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); +static int ubd_getgeo(struct gendisk *disk, struct hd_geometry *geo); #define MAX_DEV (16) static const struct block_device_operations ubd_blops = { .owner = THIS_MODULE, - .open = ubd_open, - .release = ubd_release, .ioctl = ubd_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, .getgeo = ubd_getgeo, }; -/* Protected by ubd_lock */ -static int fake_major = UBD_MAJOR; -static struct gendisk *ubd_gendisk[MAX_DEV]; -static struct gendisk *fake_gendisk[MAX_DEV]; - #ifdef CONFIG_BLK_DEV_UBD_SYNC #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ .cl = 1 }) @@ -147,7 +145,7 @@ struct ubd { /* name (and fd, below) of the file opened for writing, either the * backing or the cow file. */ char *file; - int count; + char *serial; int fd; __u64 size; struct openflags boot_openflags; @@ -157,7 +155,7 @@ struct ubd { unsigned no_trim:1; struct cow cow; struct platform_device pdev; - struct request_queue *queue; + struct gendisk *disk; struct blk_mq_tag_set tag_set; spinlock_t lock; }; @@ -172,7 +170,7 @@ struct ubd { #define DEFAULT_UBD { \ .file = NULL, \ - .count = 0, \ + .serial = NULL, \ .fd = -1, \ .size = -1, \ .boot_openflags = OPEN_FLAGS, \ @@ -187,54 +185,19 @@ struct ubd { /* Protected by ubd_lock */ static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD }; -/* Only changed by fake_ide_setup which is a setup */ -static int fake_ide = 0; -static struct proc_dir_entry *proc_ide_root = NULL; -static struct proc_dir_entry *proc_ide = NULL; - static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd); -static void make_proc_ide(void) -{ - proc_ide_root = proc_mkdir("ide", NULL); - proc_ide = proc_mkdir("ide0", proc_ide_root); -} - -static int fake_ide_media_proc_show(struct seq_file *m, void *v) -{ - seq_puts(m, "disk\n"); - return 0; -} - -static void make_ide_entries(const char *dev_name) -{ - struct proc_dir_entry *dir, *ent; - char name[64]; - - if(proc_ide_root == NULL) make_proc_ide(); - - dir = proc_mkdir(dev_name, proc_ide); - if(!dir) return; - - ent = proc_create_single("media", S_IRUGO, dir, - fake_ide_media_proc_show); - if(!ent) return; - snprintf(name, sizeof(name), "ide0/%s", dev_name); - proc_symlink(dev_name, proc_ide_root, name); -} - static int fake_ide_setup(char *str) { - fake_ide = 1; + pr_warn("The fake_ide option has been removed\n"); return 1; } - __setup("fake_ide", fake_ide_setup); __uml_help(fake_ide_setup, "fake_ide\n" -" Create ide0 entries that map onto ubd devices.\n\n" +" Obsolete stub.\n\n" ); static int parse_unit(char **ptr) @@ -264,42 +227,20 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) { struct ubd *ubd_dev; struct openflags flags = global_openflags; - char *backing_file; + char *file, *backing_file, *serial; int n, err = 0, i; if(index_out) *index_out = -1; n = *str; if(n == '='){ - char *end; - int major; - str++; if(!strcmp(str, "sync")){ global_openflags = of_sync(global_openflags); - goto out1; - } - - err = -EINVAL; - major = simple_strtoul(str, &end, 0); - if((*end != '\0') || (end == str)){ - *error_out = "Didn't parse major number"; - goto out1; + return err; } - mutex_lock(&ubd_lock); - if (fake_major != UBD_MAJOR) { - *error_out = "Can't assign a fake major twice"; - goto out1; - } - - fake_major = major; - - printk(KERN_INFO "Setting extra ubd major number to %d\n", - major); - err = 0; - out1: - mutex_unlock(&ubd_lock); - return err; + pr_warn("fake major not supported any more\n"); + return 0; } n = parse_unit(&str); @@ -360,24 +301,27 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) goto out; break_loop: - backing_file = strchr(str, ','); + file = strsep(&str, ",:"); + if (*file == '\0') + file = NULL; - if (backing_file == NULL) - backing_file = strchr(str, ':'); + backing_file = strsep(&str, ",:"); + if (backing_file && *backing_file == '\0') + backing_file = NULL; - if(backing_file != NULL){ - if(ubd_dev->no_cow){ - *error_out = "Can't specify both 'd' and a cow file"; - goto out; - } - else { - *backing_file = '\0'; - backing_file++; - } + serial = strsep(&str, ",:"); + if (serial && *serial == '\0') + serial = NULL; + + if (backing_file && ubd_dev->no_cow) { + *error_out = "Can't specify both 'd' and a cow file"; + goto out; } + err = 0; - ubd_dev->file = str; + ubd_dev->file = file; ubd_dev->cow.file = backing_file; + ubd_dev->serial = serial; ubd_dev->boot_openflags = flags; out: mutex_unlock(&ubd_lock); @@ -398,7 +342,7 @@ static int ubd_setup(char *str) __setup("ubd", ubd_setup); __uml_help(ubd_setup, -"ubd<n><flags>=<filename>[(:|,)<filename2>]\n" +"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n" " This is used to associate a device with a file in the underlying\n" " filesystem. When specifying two filenames, the first one is the\n" " COW name and the second is the backing file name. As separator you can\n" @@ -421,6 +365,12 @@ __uml_help(ubd_setup, " UMLs and file locking will be turned off - this is appropriate for a\n" " cluster filesystem and inappropriate at almost all other times.\n\n" " 't' will disable trim/discard support on the device (enabled by default).\n\n" +" An optional device serial number can be exposed using the serial parameter\n" +" on the cmdline which is exposed as a sysfs entry. This is particularly\n" +" useful when a unique number should be given to the device. Note when\n" +" specifying a label, the filename2 must be also presented. It can be\n" +" an empty string, in which case the backing file is not used:\n" +" ubd0=File,,Serial\n\n" ); static int udb_setup(char *str) @@ -495,59 +445,41 @@ static int bulk_req_safe_read( return n; } -/* Called without dev->lock held, and only in interrupt context. */ -static void ubd_handler(void) +static void ubd_end_request(struct io_thread_req *io_req) { - int n; - int count; - - while(1){ - n = bulk_req_safe_read( - thread_fd, - irq_req_buffer, - &irq_remainder, - &irq_remainder_size, - UBD_REQ_BUFFER_SIZE - ); - if (n < 0) { - if(n == -EAGAIN) - break; - printk(KERN_ERR "spurious interrupt in ubd_handler, " - "err = %d\n", -n); - return; - } - for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - struct io_thread_req *io_req = (*irq_req_buffer)[count]; - - if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { - blk_queue_max_discard_sectors(io_req->req->q, 0); - blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); - } - if ((io_req->error) || (io_req->buffer == NULL)) - blk_mq_end_request(io_req->req, io_req->error); - else { - if (!blk_update_request(io_req->req, io_req->error, io_req->length)) - __blk_mq_end_request(io_req->req, io_req->error); - } - kfree(io_req); - } + if (io_req->error == BLK_STS_NOTSUPP) { + if (req_op(io_req->req) == REQ_OP_DISCARD) + blk_queue_disable_discard(io_req->req->q); + else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES) + blk_queue_disable_write_zeroes(io_req->req->q); } + blk_mq_end_request(io_req->req, io_req->error); + kfree(io_req); } static irqreturn_t ubd_intr(int irq, void *dev) { - ubd_handler(); + int len, i; + + while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer, + &irq_remainder, &irq_remainder_size, + UBD_REQ_BUFFER_SIZE)) >= 0) { + for (i = 0; i < len / sizeof(struct io_thread_req *); i++) + ubd_end_request((*irq_req_buffer)[i]); + } + + if (len < 0 && len != -EAGAIN) + pr_err("spurious interrupt in %s, err = %d\n", __func__, len); return IRQ_HANDLED; } /* Only changed by ubd_init, which is an initcall. */ -static int io_pid = -1; +static struct os_helper_thread *io_td; static void kill_io_thread(void) { - if(io_pid != -1) - os_kill_process(io_pid, 1); + if (io_td) + os_kill_helper_thread(io_td); } __uml_exitcall(kill_io_thread); @@ -561,7 +493,7 @@ static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) __u32 version; __u32 align; char *backing_file; - time_t mtime; + time64_t mtime; unsigned long long size; int sector_size; int bitmap_offset; @@ -600,9 +532,9 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len) return 0; } -static int backing_file_mismatch(char *file, __u64 size, time_t mtime) +static int backing_file_mismatch(char *file, __u64 size, time64_t mtime) { - unsigned long modtime; + time64_t modtime; unsigned long long actual; int err; @@ -628,7 +560,7 @@ static int backing_file_mismatch(char *file, __u64 size, time_t mtime) return -EINVAL; } if (modtime != mtime) { - printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs " + printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs " "backing file\n", mtime, modtime); return -EINVAL; } @@ -671,7 +603,7 @@ static int open_ubd_file(char *file, struct openflags *openflags, int shared, unsigned long *bitmap_len_out, int *data_offset_out, int *create_cow_out) { - time_t mtime; + time64_t mtime; unsigned long long size; __u32 version, align; char *backing_file; @@ -819,15 +751,12 @@ static int ubd_open_dev(struct ubd *ubd_dev) ubd_dev->fd = fd; if(ubd_dev->cow.file != NULL){ - blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long)); - err = -ENOMEM; ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len); if(ubd_dev->cow.bitmap == NULL){ printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); goto error; } - flush_tlb_kernel_vm(); err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, ubd_dev->cow.bitmap_offset, @@ -842,14 +771,6 @@ static int ubd_open_dev(struct ubd *ubd_dev) if(err < 0) goto error; ubd_dev->cow.fd = err; } - if (ubd_dev->no_trim == 0) { - ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE; - ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE; - blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST); - blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue); - } - blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue); return 0; error: os_close_file(ubd_dev->fd); @@ -858,50 +779,47 @@ static int ubd_open_dev(struct ubd *ubd_dev) static void ubd_device_release(struct device *dev) { - struct ubd *ubd_dev = dev_get_drvdata(dev); + struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev); - blk_cleanup_queue(ubd_dev->queue); blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); } -static int ubd_disk_register(int major, u64 size, int unit, - struct gendisk **disk_out) +static ssize_t serial_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct device *parent = NULL; - struct gendisk *disk; + struct gendisk *disk = dev_to_disk(dev); + struct ubd *ubd_dev = disk->private_data; - disk = alloc_disk(1 << UBD_SHIFT); - if(disk == NULL) - return -ENOMEM; + if (!ubd_dev) + return 0; - disk->major = major; - disk->first_minor = unit << UBD_SHIFT; - disk->fops = &ubd_blops; - set_capacity(disk, size / 512); - if (major == UBD_MAJOR) - sprintf(disk->disk_name, "ubd%c", 'a' + unit); - else - sprintf(disk->disk_name, "ubd_fake%d", unit); - - /* sysfs register (not for ide fake devices) */ - if (major == UBD_MAJOR) { - ubd_devs[unit].pdev.id = unit; - ubd_devs[unit].pdev.name = DRIVER_NAME; - ubd_devs[unit].pdev.dev.release = ubd_device_release; - dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]); - platform_device_register(&ubd_devs[unit].pdev); - parent = &ubd_devs[unit].pdev.dev; - } + return sprintf(buf, "%s", ubd_dev->serial); +} - disk->private_data = &ubd_devs[unit]; - disk->queue = ubd_devs[unit].queue; - device_add_disk(parent, disk, NULL); +static DEVICE_ATTR_RO(serial); - *disk_out = disk; - return 0; +static struct attribute *ubd_attrs[] = { + &dev_attr_serial.attr, + NULL, +}; + +static umode_t ubd_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + return a->mode; } +static const struct attribute_group ubd_attr_group = { + .attrs = ubd_attrs, + .is_visible = ubd_attrs_are_visible, +}; + +static const struct attribute_group *ubd_attr_groups[] = { + &ubd_attr_group, + NULL, +}; + #define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE)) static const struct blk_mq_ops ubd_mq_ops = { @@ -911,66 +829,86 @@ static const struct blk_mq_ops ubd_mq_ops = { static int ubd_add(int n, char **error_out) { struct ubd *ubd_dev = &ubd_devs[n]; + struct queue_limits lim = { + .max_segments = MAX_SG, + .seg_boundary_mask = PAGE_SIZE - 1, + .features = BLK_FEAT_WRITE_CACHE, + }; + struct gendisk *disk; int err = 0; if(ubd_dev->file == NULL) goto out; + if (ubd_dev->cow.file) + lim.max_hw_sectors = 8 * sizeof(long); + if (!ubd_dev->no_trim) { + lim.max_hw_discard_sectors = UBD_MAX_REQUEST; + lim.max_write_zeroes_sectors = UBD_MAX_REQUEST; + } + err = ubd_file_size(ubd_dev, &ubd_dev->size); if(err < 0){ *error_out = "Couldn't determine size of device's file"; goto out; } + err = ubd_open_dev(ubd_dev); + if (err) { + pr_err("ubd%c: Can't open \"%s\": errno = %d\n", + 'a' + n, ubd_dev->file, -err); + goto out; + } + ubd_dev->size = ROUND_BLOCK(ubd_dev->size); ubd_dev->tag_set.ops = &ubd_mq_ops; ubd_dev->tag_set.queue_depth = 64; ubd_dev->tag_set.numa_node = NUMA_NO_NODE; - ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ubd_dev->tag_set.driver_data = ubd_dev; ubd_dev->tag_set.nr_hw_queues = 1; err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); if (err) - goto out; + goto out_close; - ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); - if (IS_ERR(ubd_dev->queue)) { - err = PTR_ERR(ubd_dev->queue); - goto out_cleanup; + disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); + goto out_cleanup_tags; } - ubd_dev->queue->queuedata = ubd_dev; - blk_queue_write_cache(ubd_dev->queue, true, false); + disk->major = UBD_MAJOR; + disk->first_minor = n << UBD_SHIFT; + disk->minors = 1 << UBD_SHIFT; + disk->fops = &ubd_blops; + set_capacity(disk, ubd_dev->size / 512); + sprintf(disk->disk_name, "ubd%c", 'a' + n); + disk->private_data = ubd_dev; + set_disk_ro(disk, !ubd_dev->openflags.w); - blk_queue_max_segments(ubd_dev->queue, MAX_SG); - err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); - if(err){ - *error_out = "Failed to register device"; - goto out_cleanup_tags; - } + ubd_dev->pdev.id = n; + ubd_dev->pdev.name = DRIVER_NAME; + ubd_dev->pdev.dev.release = ubd_device_release; + dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev); + platform_device_register(&ubd_dev->pdev); - if (fake_major != UBD_MAJOR) - ubd_disk_register(fake_major, ubd_dev->size, n, - &fake_gendisk[n]); + err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups); + if (err) + goto out_cleanup_disk; - /* - * Perhaps this should also be under the "if (fake_major)" above - * using the fake_disk->disk_name - */ - if (fake_ide) - make_ide_entries(ubd_gendisk[n]->disk_name); + ubd_dev->disk = disk; - err = 0; -out: - return err; + return 0; +out_cleanup_disk: + put_disk(disk); out_cleanup_tags: blk_mq_free_tag_set(&ubd_dev->tag_set); -out_cleanup: - blk_cleanup_queue(ubd_dev->queue); - goto out; +out_close: + ubd_close_dev(ubd_dev); +out: + return err; } static int ubd_config(char *str, char **error_out) @@ -1054,7 +992,6 @@ static int ubd_id(char **str, int *start_out, int *end_out) static int ubd_remove(int n, char **error_out) { - struct gendisk *disk = ubd_gendisk[n]; struct ubd *ubd_dev; int err = -ENODEV; @@ -1065,21 +1002,15 @@ static int ubd_remove(int n, char **error_out) if(ubd_dev->file == NULL) goto out; - /* you cannot remove a open disk */ - err = -EBUSY; - if(ubd_dev->count > 0) - goto out; - - ubd_gendisk[n] = NULL; - if(disk != NULL){ - del_gendisk(disk); - put_disk(disk); - } + if (ubd_dev->disk) { + /* you cannot remove a open disk */ + err = -EBUSY; + if (disk_openers(ubd_dev->disk)) + goto out; - if(fake_gendisk[n] != NULL){ - del_gendisk(fake_gendisk[n]); - put_disk(fake_gendisk[n]); - fake_gendisk[n] = NULL; + del_gendisk(ubd_dev->disk); + ubd_close_dev(ubd_dev); + put_disk(ubd_dev->disk); } err = 0; @@ -1138,14 +1069,6 @@ static int __init ubd_init(void) if (register_blkdev(UBD_MAJOR, "ubd")) return -1; - if (fake_major != UBD_MAJOR) { - char name[sizeof("ubd_nnn\0")]; - - snprintf(name, sizeof(name), "ubd_%d", fake_major); - if (register_blkdev(fake_major, "ubd")) - return -1; - } - irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, sizeof(struct io_thread_req *), GFP_KERNEL @@ -1154,7 +1077,7 @@ static int __init ubd_init(void) if (irq_req_buffer == NULL) { printk(KERN_ERR "Failed to initialize ubd buffering\n"); - return -1; + return -ENOMEM; } io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, sizeof(struct io_thread_req *), @@ -1165,7 +1088,7 @@ static int __init ubd_init(void) if (io_req_buffer == NULL) { printk(KERN_ERR "Failed to initialize ubd buffering\n"); - return -1; + return -ENOMEM; } platform_driver_register(&ubd_driver); mutex_lock(&ubd_lock); @@ -1181,8 +1104,8 @@ static int __init ubd_init(void) late_initcall(ubd_init); -static int __init ubd_driver_init(void){ - unsigned long stack; +static int __init ubd_driver_init(void) +{ int err; /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ @@ -1191,64 +1114,22 @@ static int __init ubd_driver_init(void){ /* Letting ubd=sync be like using ubd#s= instead of ubd#= is * enough. So use anyway the io thread. */ } - stack = alloc_stack(0, 0); - io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), - &thread_fd); - if(io_pid < 0){ + err = start_io_thread(&io_td, &thread_fd); + if (err < 0) { printk(KERN_ERR "ubd : Failed to start I/O thread (errno = %d) - " - "falling back to synchronous I/O\n", -io_pid); - io_pid = -1; + "falling back to synchronous I/O\n", -err); return 0; } err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, 0, "ubd", ubd_devs); - if(err != 0) + if(err < 0) printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); return 0; } device_initcall(ubd_driver_init); -static int ubd_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - struct ubd *ubd_dev = disk->private_data; - int err = 0; - - mutex_lock(&ubd_mutex); - if(ubd_dev->count == 0){ - err = ubd_open_dev(ubd_dev); - if(err){ - printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", - disk->disk_name, ubd_dev->file, -err); - goto out; - } - } - ubd_dev->count++; - set_disk_ro(disk, !ubd_dev->openflags.w); - - /* This should no more be needed. And it didn't work anyway to exclude - * read-write remounting of filesystems.*/ - /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){ - if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev); - err = -EROFS; - }*/ -out: - mutex_unlock(&ubd_mutex); - return err; -} - -static void ubd_release(struct gendisk *disk, fmode_t mode) -{ - struct ubd *ubd_dev = disk->private_data; - - mutex_lock(&ubd_mutex); - if(--ubd_dev->count == 0) - ubd_close_dev(ubd_dev); - mutex_unlock(&ubd_mutex); -} - static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, __u64 *cow_offset, unsigned long *bitmap, __u64 bitmap_offset, unsigned long *bitmap_words, @@ -1288,37 +1169,73 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, *cow_offset += bitmap_offset; } -static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, +static void cowify_req(struct io_thread_req *req, struct io_desc *segment, + unsigned long offset, unsigned long *bitmap, __u64 bitmap_offset, __u64 bitmap_len) { - __u64 sector = req->offset >> SECTOR_SHIFT; + __u64 sector = offset >> SECTOR_SHIFT; int i; - if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT) + if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT) panic("Operation too long"); if (req_op(req->req) == REQ_OP_READ) { - for (i = 0; i < req->length >> SECTOR_SHIFT; i++) { + for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) { if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) ubd_set_bit(i, (unsigned char *) - &req->sector_mask); + &segment->sector_mask); + } + } else { + cowify_bitmap(offset, segment->length, &segment->sector_mask, + &segment->cow_offset, bitmap, bitmap_offset, + segment->bitmap_words, bitmap_len); + } +} + +static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, + struct request *req) +{ + struct bio_vec bvec; + struct req_iterator iter; + int i = 0; + unsigned long byte_offset = io_req->offset; + enum req_op op = req_op(req); + + if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { + io_req->io_desc[0].buffer = NULL; + io_req->io_desc[0].length = blk_rq_bytes(req); + } else { + rq_for_each_segment(bvec, req, iter) { + BUG_ON(i >= io_req->desc_cnt); + + io_req->io_desc[i].buffer = bvec_virt(&bvec); + io_req->io_desc[i].length = bvec.bv_len; + i++; + } + } + + if (dev->cow.file) { + for (i = 0; i < io_req->desc_cnt; i++) { + cowify_req(io_req, &io_req->io_desc[i], byte_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + byte_offset += io_req->io_desc[i].length; } + } - else cowify_bitmap(req->offset, req->length, &req->sector_mask, - &req->cow_offset, bitmap, bitmap_offset, - req->bitmap_words, bitmap_len); } -static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, - u64 off, struct bio_vec *bvec) +static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req, + int desc_cnt) { - struct ubd *dev = hctx->queue->queuedata; struct io_thread_req *io_req; - int ret; + int i; - io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + io_req = kmalloc(sizeof(*io_req) + + (desc_cnt * sizeof(struct io_desc)), + GFP_ATOMIC); if (!io_req) - return -ENOMEM; + return NULL; io_req->req = req; if (dev->cow.file) @@ -1326,26 +1243,41 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, else io_req->fds[0] = dev->fd; io_req->error = 0; - - if (bvec != NULL) { - io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; - io_req->length = bvec->bv_len; - } else { - io_req->buffer = NULL; - io_req->length = blk_rq_bytes(req); - } - io_req->sectorsize = SECTOR_SIZE; io_req->fds[1] = dev->fd; - io_req->cow_offset = -1; - io_req->offset = off; - io_req->sector_mask = 0; + io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - if (dev->cow.file) - cowify_req(io_req, dev->cow.bitmap, - dev->cow.bitmap_offset, dev->cow.bitmap_len); + for (i = 0 ; i < desc_cnt; i++) { + io_req->io_desc[i].sector_mask = 0; + io_req->io_desc[i].cow_offset = -1; + } + + return io_req; +} + +static int ubd_submit_request(struct ubd *dev, struct request *req) +{ + int segs = 0; + struct io_thread_req *io_req; + int ret; + enum req_op op = req_op(req); + + if (op == REQ_OP_FLUSH) + segs = 0; + else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) + segs = 1; + else + segs = blk_rq_nr_phys_segments(req); + + io_req = ubd_alloc_req(dev, req, segs); + if (!io_req) + return -ENOMEM; + + io_req->desc_cnt = segs; + if (segs) + ubd_map_req(dev, io_req, req); ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); if (ret != sizeof(io_req)) { @@ -1356,22 +1288,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, return ret; } -static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req) -{ - struct req_iterator iter; - struct bio_vec bvec; - int ret; - u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT; - - rq_for_each_segment(bvec, req, iter) { - ret = ubd_queue_one_vec(hctx, req, off, &bvec); - if (ret < 0) - return ret; - off += bvec.bv_len; - } - return 0; -} - static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1384,17 +1300,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(&ubd_dev->lock); switch (req_op(req)) { - /* operations with no lentgth/offset arguments */ case REQ_OP_FLUSH: - ret = ubd_queue_one_vec(hctx, req, 0, NULL); - break; case REQ_OP_READ: case REQ_OP_WRITE: - ret = queue_rw_req(hctx, req); - break; case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: - ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL); + ret = ubd_submit_request(ubd_dev, req); break; default: WARN_ON_ONCE(1); @@ -1403,15 +1314,19 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, spin_unlock_irq(&ubd_dev->lock); - if (ret < 0) - blk_mq_requeue_request(req, true); + if (ret < 0) { + if (ret == -ENOMEM) + res = BLK_STS_RESOURCE; + else + res = BLK_STS_DEV_RESOURCE; + } return res; } -static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int ubd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct ubd *ubd_dev = bdev->bd_disk->private_data; + struct ubd *ubd_dev = disk->private_data; geo->heads = 128; geo->sectors = 32; @@ -1419,7 +1334,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static int ubd_ioctl(struct block_device *bdev, fmode_t mode, +static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct ubd *ubd_dev = bdev->bd_disk->private_data; @@ -1478,22 +1393,22 @@ static int map_error(int error_code) * will result in unpredictable behaviour and/or crashes. */ -static int update_bitmap(struct io_thread_req *req) +static int update_bitmap(struct io_thread_req *req, struct io_desc *segment) { int n; - if(req->cow_offset == -1) + if (segment->cow_offset == -1) return map_error(0); - n = os_pwrite_file(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words), req->cow_offset); - if (n != sizeof(req->bitmap_words)) + n = os_pwrite_file(req->fds[1], &segment->bitmap_words, + sizeof(segment->bitmap_words), segment->cow_offset); + if (n != sizeof(segment->bitmap_words)) return map_error(-n); return map_error(0); } -static void do_io(struct io_thread_req *req) +static void do_io(struct io_thread_req *req, struct io_desc *desc) { char *buf = NULL; unsigned long len; @@ -1508,21 +1423,20 @@ static void do_io(struct io_thread_req *req) return; } - nsectors = req->length / req->sectorsize; + nsectors = desc->length / req->sectorsize; start = 0; do { - bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); + bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask); end = start; while((end < nsectors) && - (ubd_test_bit(end, (unsigned char *) - &req->sector_mask) == bit)) + (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit)) end++; off = req->offset + req->offsets[bit] + start * req->sectorsize; len = (end - start) * req->sectorsize; - if (req->buffer != NULL) - buf = &req->buffer[start * req->sectorsize]; + if (desc->buffer != NULL) + buf = &desc->buffer[start * req->sectorsize]; switch (req_op(req->req)) { case REQ_OP_READ: @@ -1546,13 +1460,19 @@ static void do_io(struct io_thread_req *req) } break; case REQ_OP_DISCARD: - case REQ_OP_WRITE_ZEROES: n = os_falloc_punch(req->fds[bit], off, len); if (n) { req->error = map_error(-n); return; } break; + case REQ_OP_WRITE_ZEROES: + n = os_falloc_zeroes(req->fds[bit], off, len); + if (n) { + req->error = map_error(-n); + return; + } + break; default: WARN_ON_ONCE(1); req->error = BLK_STS_NOTSUPP; @@ -1562,7 +1482,8 @@ static void do_io(struct io_thread_req *req) start = end; } while(start < nsectors); - req->error = update_bitmap(req); + req->offset += len; + req->error = update_bitmap(req, desc); } /* Changed in start_io_thread, which is serialized by being called only @@ -1571,13 +1492,13 @@ static void do_io(struct io_thread_req *req) int kernel_fd = -1; /* Only changed by the io thread. XXX: currently unused. */ -static int io_count = 0; +static int io_count; -int io_thread(void *arg) +void *io_thread(void *arg) { int n, count, written, res; - os_fix_helper_signals(); + os_fix_helper_thread_signals(); while(1){ n = bulk_req_safe_read( @@ -1587,22 +1508,29 @@ int io_thread(void *arg) &io_remainder_size, UBD_REQ_BUFFER_SIZE ); - if (n < 0) { - if (n == -EAGAIN) { + if (n <= 0) { + if (n == -EAGAIN) ubd_read_poll(-1); - continue; - } + + continue; } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { + struct io_thread_req *req = (*io_req_buffer)[count]; + int i; + io_count++; - do_io((*io_req_buffer)[count]); + for (i = 0; !req->error && i < req->desc_cnt; i++) + do_io(req, &(req->io_desc[i])); + } written = 0; do { - res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); + res = os_write_file(kernel_fd, + ((char *) io_req_buffer) + written, + n - written); if (res >= 0) { written += res; } @@ -1612,5 +1540,5 @@ int io_thread(void *arg) } while (written < n); } - return 0; + return NULL; } diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index 6f744794d141..8e8a8bf518b6 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -1,8 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 Anton Ivanov (aivanov@brocade.com) * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) - * Licensed under the GPL */ #include <stddef.h> @@ -23,11 +23,11 @@ #include <os.h> #include <poll.h> -struct pollfd kernel_pollfd; +static struct pollfd kernel_pollfd; -int start_io_thread(unsigned long sp, int *fd_out) +int start_io_thread(struct os_helper_thread **td_out, int *fd_out) { - int pid, fds[2], err; + int fds[2], err; err = os_pipe(fds, 1, 1); if(err < 0){ @@ -41,20 +41,20 @@ int start_io_thread(unsigned long sp, int *fd_out) *fd_out = fds[1]; err = os_set_fd_block(*fd_out, 0); - err = os_set_fd_block(kernel_fd, 0); + err |= os_set_fd_block(kernel_fd, 0); if (err) { printk("start_io_thread - failed to set nonblocking I/O.\n"); goto out_close; } - pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL); - if(pid < 0){ - err = -errno; - printk("start_io_thread - clone failed : errno = %d\n", errno); + err = os_run_helper_thread(td_out, io_thread, NULL); + if (err < 0) { + printk("%s - failed to run helper thread, err = %d\n", + __func__, -err); goto out_close; } - return(pid); + return 0; out_close: os_close_file(fds[0]); diff --git a/arch/um/drivers/umcast.h b/arch/um/drivers/umcast.h deleted file mode 100644 index c190c6440911..000000000000 --- a/arch/um/drivers/umcast.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#ifndef __DRIVERS_UMCAST_H -#define __DRIVERS_UMCAST_H - -#include <net_user.h> - -struct umcast_data { - char *addr; - unsigned short lport; - unsigned short rport; - void *listen_addr; - void *remote_addr; - int ttl; - int unicast; - void *dev; -}; - -extern const struct net_user_info umcast_user_info; - -extern int umcast_user_write(int fd, void *buf, int len, - struct umcast_data *pri); - -#endif diff --git a/arch/um/drivers/umcast_kern.c b/arch/um/drivers/umcast_kern.c deleted file mode 100644 index f5ba6e377913..000000000000 --- a/arch/um/drivers/umcast_kern.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * user-mode-linux networking multicast transport - * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * - * based on the existing uml-networking code, which is - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - * - * Licensed under the GPL. - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include "umcast.h" -#include <net_kern.h> - -struct umcast_init { - char *addr; - int lport; - int rport; - int ttl; - bool unicast; -}; - -static void umcast_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct umcast_data *dpri; - struct umcast_init *init = data; - - pri = netdev_priv(dev); - dpri = (struct umcast_data *) pri->user; - dpri->addr = init->addr; - dpri->lport = init->lport; - dpri->rport = init->rport; - dpri->unicast = init->unicast; - dpri->ttl = init->ttl; - dpri->dev = dev; - - if (dpri->unicast) { - printk(KERN_INFO "ucast backend address: %s:%u listen port: " - "%u\n", dpri->addr, dpri->rport, dpri->lport); - } else { - printk(KERN_INFO "mcast backend multicast address: %s:%u, " - "TTL:%u\n", dpri->addr, dpri->lport, dpri->ttl); - } -} - -static int umcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_recvfrom(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER); -} - -static int umcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return umcast_user_write(fd, skb->data, skb->len, - (struct umcast_data *) &lp->user); -} - -static const struct net_kern_info umcast_kern_info = { - .init = umcast_init, - .protocol = eth_protocol, - .read = umcast_read, - .write = umcast_write, -}; - -static int mcast_setup(char *str, char **mac_out, void *data) -{ - struct umcast_init *init = data; - char *port_str = NULL, *ttl_str = NULL, *remain; - char *last; - - *init = ((struct umcast_init) - { .addr = "239.192.168.1", - .lport = 1102, - .ttl = 1 }); - - remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str, - NULL); - if (remain != NULL) { - printk(KERN_ERR "mcast_setup - Extra garbage on " - "specification : '%s'\n", remain); - return 0; - } - - if (port_str != NULL) { - init->lport = simple_strtoul(port_str, &last, 10); - if ((*last != '\0') || (last == port_str)) { - printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", - port_str); - return 0; - } - } - - if (ttl_str != NULL) { - init->ttl = simple_strtoul(ttl_str, &last, 10); - if ((*last != '\0') || (last == ttl_str)) { - printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n", - ttl_str); - return 0; - } - } - - init->unicast = false; - init->rport = init->lport; - - printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr, - init->lport, init->ttl); - - return 1; -} - -static int ucast_setup(char *str, char **mac_out, void *data) -{ - struct umcast_init *init = data; - char *lport_str = NULL, *rport_str = NULL, *remain; - char *last; - - *init = ((struct umcast_init) - { .addr = "", - .lport = 1102, - .rport = 1102 }); - - remain = split_if_spec(str, mac_out, &init->addr, - &lport_str, &rport_str, NULL); - if (remain != NULL) { - printk(KERN_ERR "ucast_setup - Extra garbage on " - "specification : '%s'\n", remain); - return 0; - } - - if (lport_str != NULL) { - init->lport = simple_strtoul(lport_str, &last, 10); - if ((*last != '\0') || (last == lport_str)) { - printk(KERN_ERR "ucast_setup - Bad listen port : " - "'%s'\n", lport_str); - return 0; - } - } - - if (rport_str != NULL) { - init->rport = simple_strtoul(rport_str, &last, 10); - if ((*last != '\0') || (last == rport_str)) { - printk(KERN_ERR "ucast_setup - Bad remote port : " - "'%s'\n", rport_str); - return 0; - } - } - - init->unicast = true; - - printk(KERN_INFO "Configured ucast device: :%u -> %s:%u\n", - init->lport, init->addr, init->rport); - - return 1; -} - -static struct transport mcast_transport = { - .list = LIST_HEAD_INIT(mcast_transport.list), - .name = "mcast", - .setup = mcast_setup, - .user = &umcast_user_info, - .kern = &umcast_kern_info, - .private_size = sizeof(struct umcast_data), - .setup_size = sizeof(struct umcast_init), -}; - -static struct transport ucast_transport = { - .list = LIST_HEAD_INIT(ucast_transport.list), - .name = "ucast", - .setup = ucast_setup, - .user = &umcast_user_info, - .kern = &umcast_kern_info, - .private_size = sizeof(struct umcast_data), - .setup_size = sizeof(struct umcast_init), -}; - -static int register_umcast(void) -{ - register_transport(&mcast_transport); - register_transport(&ucast_transport); - return 0; -} - -late_initcall(register_umcast); diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c deleted file mode 100644 index 6074184bb51b..000000000000 --- a/arch/um/drivers/umcast_user.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * user-mode-linux networking multicast transport - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> - * - * based on the existing uml-networking code, which is - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - * - * Licensed under the GPL. - * - */ - -#include <unistd.h> -#include <errno.h> -#include <netinet/in.h> -#include "umcast.h" -#include <net_user.h> -#include <um_malloc.h> - -static struct sockaddr_in *new_addr(char *addr, unsigned short port) -{ - struct sockaddr_in *sin; - - sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL); - if (sin == NULL) { - printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in " - "failed\n"); - return NULL; - } - sin->sin_family = AF_INET; - if (addr) - sin->sin_addr.s_addr = in_aton(addr); - else - sin->sin_addr.s_addr = INADDR_ANY; - sin->sin_port = htons(port); - return sin; -} - -static int umcast_user_init(void *data, void *dev) -{ - struct umcast_data *pri = data; - - pri->remote_addr = new_addr(pri->addr, pri->rport); - if (pri->unicast) - pri->listen_addr = new_addr(NULL, pri->lport); - else - pri->listen_addr = pri->remote_addr; - pri->dev = dev; - return 0; -} - -static void umcast_remove(void *data) -{ - struct umcast_data *pri = data; - - kfree(pri->listen_addr); - if (pri->unicast) - kfree(pri->remote_addr); - pri->listen_addr = pri->remote_addr = NULL; -} - -static int umcast_open(void *data) -{ - struct umcast_data *pri = data; - struct sockaddr_in *lsin = pri->listen_addr; - struct sockaddr_in *rsin = pri->remote_addr; - struct ip_mreq mreq; - int fd, yes = 1, err = -EINVAL; - - - if ((!pri->unicast && lsin->sin_addr.s_addr == 0) || - (rsin->sin_addr.s_addr == 0) || - (lsin->sin_port == 0) || (rsin->sin_port == 0)) - goto out; - - fd = socket(AF_INET, SOCK_DGRAM, 0); - - if (fd < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open : data socket failed, " - "errno = %d\n", errno); - goto out; - } - - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open: SO_REUSEADDR failed, " - "errno = %d\n", errno); - goto out_close; - } - - if (!pri->unicast) { - /* set ttl according to config */ - if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl, - sizeof(pri->ttl)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_TTL " - "failed, error = %d\n", errno); - goto out_close; - } - - /* set LOOP, so data does get fed back to local sockets */ - if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, - &yes, sizeof(yes)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_LOOP " - "failed, error = %d\n", errno); - goto out_close; - } - } - - /* bind socket to the address */ - if (bind(fd, (struct sockaddr *) lsin, sizeof(*lsin)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open : data bind failed, " - "errno = %d\n", errno); - goto out_close; - } - - if (!pri->unicast) { - /* subscribe to the multicast group */ - mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr; - mreq.imr_interface.s_addr = 0; - if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, - &mreq, sizeof(mreq)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open: IP_ADD_MEMBERSHIP " - "failed, error = %d\n", errno); - printk(UM_KERN_ERR "There appears not to be a " - "multicast-capable network interface on the " - "host.\n"); - printk(UM_KERN_ERR "eth0 should be configured in order " - "to use the multicast transport.\n"); - goto out_close; - } - } - - return fd; - - out_close: - close(fd); - out: - return err; -} - -static void umcast_close(int fd, void *data) -{ - struct umcast_data *pri = data; - - if (!pri->unicast) { - struct ip_mreq mreq; - struct sockaddr_in *lsin = pri->listen_addr; - - mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr; - mreq.imr_interface.s_addr = 0; - if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP, - &mreq, sizeof(mreq)) < 0) { - printk(UM_KERN_ERR "umcast_close: IP_DROP_MEMBERSHIP " - "failed, error = %d\n", errno); - } - } - - close(fd); -} - -int umcast_user_write(int fd, void *buf, int len, struct umcast_data *pri) -{ - struct sockaddr_in *data_addr = pri->remote_addr; - - return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr)); -} - -const struct net_user_info umcast_user_info = { - .init = umcast_user_init, - .open = umcast_open, - .close = umcast_close, - .remove = umcast_remove, - .add_address = NULL, - .delete_address = NULL, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/drivers/vde.h b/arch/um/drivers/vde.h deleted file mode 100644 index fc3a05902ba1..000000000000 --- a/arch/um/drivers/vde.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). - * Licensed under the GPL. - */ - -#ifndef __UM_VDE_H__ -#define __UM_VDE_H__ - -struct vde_data { - char *vde_switch; - char *descr; - void *args; - void *conn; - void *dev; -}; - -struct vde_init { - char *vde_switch; - char *descr; - int port; - char *group; - int mode; -}; - -extern const struct net_user_info vde_user_info; - -extern void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init); - -extern int vde_user_read(void *conn, void *buf, int len); -extern int vde_user_write(void *conn, void *buf, int len); - -#endif diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c deleted file mode 100644 index 6a365fadc7c4..000000000000 --- a/arch/um/drivers/vde_kern.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). - * Licensed under the GPL. - * - * Transport usage: - * ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description> - * - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include <net_kern.h> -#include <net_user.h> -#include "vde.h" - -static void vde_init(struct net_device *dev, void *data) -{ - struct vde_init *init = data; - struct uml_net_private *pri; - struct vde_data *vpri; - - pri = netdev_priv(dev); - vpri = (struct vde_data *) pri->user; - - vpri->vde_switch = init->vde_switch; - vpri->descr = init->descr ? init->descr : "UML vde_transport"; - vpri->args = NULL; - vpri->conn = NULL; - vpri->dev = dev; - - printk("vde backend - %s, ", vpri->vde_switch ? - vpri->vde_switch : "(default socket)"); - - vde_init_libstuff(vpri, init); - - printk("\n"); -} - -static int vde_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - struct vde_data *pri = (struct vde_data *) &lp->user; - - if (pri->conn != NULL) - return vde_user_read(pri->conn, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER); - - printk(KERN_ERR "vde_read - we have no VDECONN to read from"); - return -EBADF; -} - -static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - struct vde_data *pri = (struct vde_data *) &lp->user; - - if (pri->conn != NULL) - return vde_user_write((void *)pri->conn, skb->data, - skb->len); - - printk(KERN_ERR "vde_write - we have no VDECONN to write to"); - return -EBADF; -} - -static const struct net_kern_info vde_kern_info = { - .init = vde_init, - .protocol = eth_protocol, - .read = vde_read, - .write = vde_write, -}; - -static int vde_setup(char *str, char **mac_out, void *data) -{ - struct vde_init *init = data; - char *remain, *port_str = NULL, *mode_str = NULL, *last; - - *init = ((struct vde_init) - { .vde_switch = NULL, - .descr = NULL, - .port = 0, - .group = NULL, - .mode = 0 }); - - remain = split_if_spec(str, &init->vde_switch, mac_out, &port_str, - &init->group, &mode_str, &init->descr, NULL); - - if (remain != NULL) - printk(KERN_WARNING "vde_setup - Ignoring extra data :" - "'%s'\n", remain); - - if (port_str != NULL) { - init->port = simple_strtoul(port_str, &last, 10); - if ((*last != '\0') || (last == port_str)) { - printk(KERN_ERR "vde_setup - Bad port : '%s'\n", - port_str); - return 0; - } - } - - if (mode_str != NULL) { - init->mode = simple_strtoul(mode_str, &last, 8); - if ((*last != '\0') || (last == mode_str)) { - printk(KERN_ERR "vde_setup - Bad mode : '%s'\n", - mode_str); - return 0; - } - } - - printk(KERN_INFO "Configured vde device: %s\n", init->vde_switch ? - init->vde_switch : "(default socket)"); - - return 1; -} - -static struct transport vde_transport = { - .list = LIST_HEAD_INIT(vde_transport.list), - .name = "vde", - .setup = vde_setup, - .user = &vde_user_info, - .kern = &vde_kern_info, - .private_size = sizeof(struct vde_data), - .setup_size = sizeof(struct vde_init), -}; - -static int register_vde(void) -{ - register_transport(&vde_transport); - return 0; -} - -late_initcall(register_vde); diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c deleted file mode 100644 index 64cb630d1157..000000000000 --- a/arch/um/drivers/vde_user.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). - * Licensed under the GPL. - */ - -#include <stddef.h> -#include <errno.h> -#include <libvdeplug.h> -#include <net_user.h> -#include <um_malloc.h> -#include "vde.h" - -static int vde_user_init(void *data, void *dev) -{ - struct vde_data *pri = data; - VDECONN *conn = NULL; - int err = -EINVAL; - - pri->dev = dev; - - conn = vde_open(pri->vde_switch, pri->descr, pri->args); - - if (conn == NULL) { - err = -errno; - printk(UM_KERN_ERR "vde_user_init: vde_open failed, " - "errno = %d\n", errno); - return err; - } - - printk(UM_KERN_INFO "vde backend - connection opened\n"); - - pri->conn = conn; - - return 0; -} - -static int vde_user_open(void *data) -{ - struct vde_data *pri = data; - - if (pri->conn != NULL) - return vde_datafd(pri->conn); - - printk(UM_KERN_WARNING "vde_open - we have no VDECONN to open"); - return -EINVAL; -} - -static void vde_remove(void *data) -{ - struct vde_data *pri = data; - - if (pri->conn != NULL) { - printk(UM_KERN_INFO "vde backend - closing connection\n"); - vde_close(pri->conn); - pri->conn = NULL; - kfree(pri->args); - pri->args = NULL; - return; - } - - printk(UM_KERN_WARNING "vde_remove - we have no VDECONN to remove"); -} - -const struct net_user_info vde_user_info = { - .init = vde_user_init, - .open = vde_user_open, - .close = NULL, - .remove = vde_remove, - .add_address = NULL, - .delete_address = NULL, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; - -void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init) -{ - struct vde_open_args *args; - - vpri->args = uml_kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL); - if (vpri->args == NULL) { - printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args " - "allocation failed"); - return; - } - - args = vpri->args; - - args->port = init->port; - args->group = init->group; - args->mode = init->mode ? init->mode : 0700; - - args->port ? printk("port %d", args->port) : - printk("undefined port"); -} - -int vde_user_read(void *conn, void *buf, int len) -{ - VDECONN *vconn = conn; - int rv; - - if (vconn == NULL) - return 0; - - rv = vde_recv(vconn, buf, len, 0); - if (rv < 0) { - if (errno == EAGAIN) - return 0; - return -errno; - } - else if (rv == 0) - return -ENOTCONN; - - return rv; -} - -int vde_user_write(void *conn, void *buf, int len) -{ - VDECONN *vconn = conn; - - if (vconn == NULL) - return 0; - - return vde_send(vconn, buf, len, 0); -} - diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 046fa9ea0ccc..25d9258fa592 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1,14 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2017 - Cambridge Greys Limited + * Copyright (C) 2017 - 2019 Cambridge Greys Limited * Copyright (C) 2011 - 2014 Cisco Systems Inc * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and * James Leu (jleu@mindspring.net). * Copyright (C) 2001 by various other people who didn't put their name here. - * Licensed under the GPL. */ -#include <linux/version.h> +#define pr_fmt(fmt) "uml-vector: " fmt + #include <linux/memblock.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -21,10 +22,13 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/interrupt.h> +#include <linux/firmware.h> +#include <linux/fs.h> +#include <asm/atomic.h> +#include <uapi/linux/filter.h> #include <init.h> #include <irq_kern.h> #include <irq_user.h> -#include <net_kern.h> #include <os.h> #include "mconsole_kern.h" #include "vector_user.h" @@ -43,7 +47,6 @@ #define DRIVER_NAME "uml-vector" -#define DRIVER_VERSION "01" struct vector_cmd_line_arg { struct list_head list; int unit; @@ -66,6 +69,7 @@ static LIST_HEAD(vector_devices); static int driver_registered; static void vector_eth_configure(int n, struct arglist *def); +static int vector_mmsg_rx(struct vector_private *vp, int budget); /* Argument accessors to set variables (and/or set default values) * mtu, buffer sizing, default headroom, etc @@ -100,18 +104,33 @@ static const struct { static void vector_reset_stats(struct vector_private *vp) { + /* We reuse the existing queue locks for stats */ + + /* RX stats are modified with RX head_lock held + * in vector_poll. + */ + + spin_lock(&vp->rx_queue->head_lock); vp->estats.rx_queue_max = 0; vp->estats.rx_queue_running_average = 0; - vp->estats.tx_queue_max = 0; - vp->estats.tx_queue_running_average = 0; vp->estats.rx_encaps_errors = 0; + vp->estats.sg_ok = 0; + vp->estats.sg_linearized = 0; + spin_unlock(&vp->rx_queue->head_lock); + + /* TX stats are modified with TX head_lock held + * in vector_send. + */ + + spin_lock(&vp->tx_queue->head_lock); vp->estats.tx_timeout_count = 0; vp->estats.tx_restart_queue = 0; vp->estats.tx_kicks = 0; vp->estats.tx_flow_control_xon = 0; vp->estats.tx_flow_control_xoff = 0; - vp->estats.sg_ok = 0; - vp->estats.sg_linearized = 0; + vp->estats.tx_queue_max = 0; + vp->estats.tx_queue_running_average = 0; + spin_unlock(&vp->tx_queue->head_lock); } static int get_mtu(struct arglist *def) @@ -121,11 +140,29 @@ static int get_mtu(struct arglist *def) if (mtu != NULL) { if (kstrtoul(mtu, 10, &result) == 0) - return result; + if ((result < (1 << 16) - 1) && (result >= 576)) + return result; } return ETH_MAX_PACKET; } +static char *get_bpf_file(struct arglist *def) +{ + return uml_vector_fetch_arg(def, "bpffile"); +} + +static bool get_bpf_flash(struct arglist *def) +{ + char *allow = uml_vector_fetch_arg(def, "bpfflash"); + long result; + + if (allow != NULL) { + if (kstrtoul(allow, 10, &result) == 0) + return result > 0; + } + return false; +} + static int get_depth(struct arglist *def) { char *mtu = uml_vector_fetch_arg(def, "depth"); @@ -174,6 +211,10 @@ static int get_transport_options(struct arglist *def) int vec_rx = VECTOR_RX; int vec_tx = VECTOR_TX; long parsed; + int result = 0; + + if (transport == NULL) + return -EINVAL; if (vector != NULL) { if (kstrtoul(vector, 10, &parsed) == 0) { @@ -184,12 +225,16 @@ static int get_transport_options(struct arglist *def) } } + if (get_bpf_flash(def)) + result = VECTOR_BPF_FLASH; if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) - return (vec_rx | VECTOR_BPF); + return result; + if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0) + return (result | vec_rx | VECTOR_BPF); if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) - return (vec_rx | vec_tx | VECTOR_QDISC_BYPASS); - return (vec_rx | vec_tx); + return (result | vec_rx | vec_tx | VECTOR_QDISC_BYPASS); + return (result | vec_rx | vec_tx); } @@ -204,12 +249,6 @@ static int get_transport_options(struct arglist *def) static char *drop_buffer; -/* Array backed queues optimized for bulk enqueue/dequeue and - * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios. - * For more details and full design rationale see - * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt - */ - /* * Advance the mmsg queue head by n = advance. Resets the queue to @@ -219,27 +258,13 @@ static char *drop_buffer; static int vector_advancehead(struct vector_queue *qi, int advance) { - int queue_depth; - qi->head = (qi->head + advance) % qi->max_depth; - spin_lock(&qi->tail_lock); - qi->queue_depth -= advance; - - /* we are at 0, use this to - * reset head and tail so we can use max size vectors - */ - - if (qi->queue_depth == 0) { - qi->head = 0; - qi->tail = 0; - } - queue_depth = qi->queue_depth; - spin_unlock(&qi->tail_lock); - return queue_depth; + atomic_sub(advance, &qi->queue_depth); + return atomic_read(&qi->queue_depth); } /* Advance the queue tail by n = advance. @@ -249,16 +274,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance) static int vector_advancetail(struct vector_queue *qi, int advance) { - int queue_depth; - qi->tail = (qi->tail + advance) % qi->max_depth; - spin_lock(&qi->head_lock); - qi->queue_depth += advance; - queue_depth = qi->queue_depth; - spin_unlock(&qi->head_lock); - return queue_depth; + atomic_add(advance, &qi->queue_depth); + return atomic_read(&qi->queue_depth); } static int prep_msg(struct vector_private *vp, @@ -311,9 +331,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) int iov_count; spin_lock(&qi->tail_lock); - spin_lock(&qi->head_lock); - queue_depth = qi->queue_depth; - spin_unlock(&qi->head_lock); + queue_depth = atomic_read(&qi->queue_depth); if (skb) packet_len = skb->len; @@ -332,6 +350,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) mmsg_vector->msg_hdr.msg_iovlen = iov_count; mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; + wmb(); /* Make the packet visible to the NAPI poll thread */ queue_depth = vector_advancetail(qi, 1); } else goto drop; @@ -370,7 +389,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count) } /* - * Generic vector deque via sendmmsg with support for forming headers + * Generic vector dequeue via sendmmsg with support for forming headers * using transport specific callback. Allows GRE, L2TPv3, RAW and * other transports to use a common dequeue procedure in vector mode */ @@ -380,71 +399,64 @@ static int vector_send(struct vector_queue *qi) { struct vector_private *vp = netdev_priv(qi->dev); struct mmsghdr *send_from; - int result = 0, send_len, queue_depth = qi->max_depth; + int result = 0, send_len; if (spin_trylock(&qi->head_lock)) { - if (spin_trylock(&qi->tail_lock)) { - /* update queue_depth to current value */ - queue_depth = qi->queue_depth; - spin_unlock(&qi->tail_lock); - while (queue_depth > 0) { - /* Calculate the start of the vector */ - send_len = queue_depth; - send_from = qi->mmsg_vector; - send_from += qi->head; - /* Adjust vector size if wraparound */ - if (send_len + qi->head > qi->max_depth) - send_len = qi->max_depth - qi->head; - /* Try to TX as many packets as possible */ - if (send_len > 0) { - result = uml_vector_sendmmsg( - vp->fds->tx_fd, - send_from, - send_len, - 0 - ); - vp->in_write_poll = - (result != send_len); - } - /* For some of the sendmmsg error scenarios - * we may end being unsure in the TX success - * for all packets. It is safer to declare - * them all TX-ed and blame the network. - */ - if (result < 0) { - if (net_ratelimit()) - netdev_err(vp->dev, "sendmmsg err=%i\n", - result); - result = send_len; - } - if (result > 0) { - queue_depth = - consume_vector_skbs(qi, result); - /* This is equivalent to an TX IRQ. - * Restart the upper layers to feed us - * more packets. - */ - if (result > vp->estats.tx_queue_max) - vp->estats.tx_queue_max = result; - vp->estats.tx_queue_running_average = - (vp->estats.tx_queue_running_average + result) >> 1; - } - netif_trans_update(qi->dev); - netif_wake_queue(qi->dev); - /* if TX is busy, break out of the send loop, - * poll write IRQ will reschedule xmit for us + /* update queue_depth to current value */ + while (atomic_read(&qi->queue_depth) > 0) { + /* Calculate the start of the vector */ + send_len = atomic_read(&qi->queue_depth); + send_from = qi->mmsg_vector; + send_from += qi->head; + /* Adjust vector size if wraparound */ + if (send_len + qi->head > qi->max_depth) + send_len = qi->max_depth - qi->head; + /* Try to TX as many packets as possible */ + if (send_len > 0) { + result = uml_vector_sendmmsg( + vp->fds->tx_fd, + send_from, + send_len, + 0 + ); + vp->in_write_poll = + (result != send_len); + } + /* For some of the sendmmsg error scenarios + * we may end being unsure in the TX success + * for all packets. It is safer to declare + * them all TX-ed and blame the network. + */ + if (result < 0) { + if (net_ratelimit()) + netdev_err(vp->dev, "sendmmsg err=%i\n", + result); + vp->in_error = true; + result = send_len; + } + if (result > 0) { + consume_vector_skbs(qi, result); + /* This is equivalent to an TX IRQ. + * Restart the upper layers to feed us + * more packets. */ - if (result != send_len) { - vp->estats.tx_restart_queue++; - break; - } + if (result > vp->estats.tx_queue_max) + vp->estats.tx_queue_max = result; + vp->estats.tx_queue_running_average = + (vp->estats.tx_queue_running_average + result) >> 1; + } + netif_wake_queue(qi->dev); + /* if TX is busy, break out of the send loop, + * poll write IRQ will reschedule xmit for us. + */ + if (result != send_len) { + vp->estats.tx_restart_queue++; + break; } } spin_unlock(&qi->head_lock); - } else { - tasklet_schedule(&vp->tx_poll); } - return queue_depth; + return atomic_read(&qi->queue_depth); } /* Queue destructor. Deliberately stateless so we can use @@ -563,7 +575,7 @@ static struct vector_queue *create_queue( } spin_lock_init(&result->head_lock); spin_lock_init(&result->tail_lock); - result->queue_depth = 0; + atomic_set(&result->queue_depth, 0); result->head = 0; result->tail = 0; return result; @@ -579,7 +591,7 @@ out_fail: /* * We do not use the RX queue as a proper wraparound queue for now - * This is not necessary because the consumption via netif_rx() + * This is not necessary because the consumption via napi_gro_receive() * happens in-line. While we can try using the return code of * netif_rx() for flow control there are no drivers doing this today. * For this RX specific use we ignore the tail/head locks and @@ -642,18 +654,27 @@ done: } -/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ +/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */ static void prep_queue_for_rx(struct vector_queue *qi) { struct vector_private *vp = netdev_priv(qi->dev); struct mmsghdr *mmsg_vector = qi->mmsg_vector; void **skbuff_vector = qi->skbuff_vector; - int i; + int i, queue_depth; + + queue_depth = atomic_read(&qi->queue_depth); - if (qi->queue_depth == 0) + if (queue_depth == 0) return; - for (i = 0; i < qi->queue_depth; i++) { + + /* RX is always emptied 100% during each cycle, so we do not + * have to do the tail wraparound math for it. + */ + + qi->head = qi->tail = 0; + + for (i = 0; i < queue_depth; i++) { /* it is OK if allocation fails - recvmmsg with NULL data in * iov argument still performs an RX, just drops the packet * This allows us stop faffing around with a "drop buffer" @@ -663,7 +684,7 @@ static void prep_queue_for_rx(struct vector_queue *qi) skbuff_vector++; mmsg_vector++; } - qi->queue_depth = 0; + atomic_set(&qi->queue_depth, 0); } static struct vector_device *find_device(int n) @@ -686,11 +707,9 @@ static struct vector_device *find_device(int n) static int vector_parse(char *str, int *index_out, char **str_out, char **error_out) { - int n, len, err; + int n, err; char *start = str; - len = strlen(str); - while ((*str != ':') && (strlen(str) > 1)) str++; if (*str != ':') { @@ -741,6 +760,7 @@ static int vector_config(char *str, char **error_out) if (parsed == NULL) { *error_out = "vector_config failed to parse parameters"; + kfree(params); return -EINVAL; } @@ -796,7 +816,8 @@ static struct platform_driver uml_net_driver = { static void vector_device_release(struct device *dev) { - struct vector_device *device = dev_get_drvdata(dev); + struct vector_device *device = + container_of(dev, struct vector_device, pdev.dev); struct net_device *netdev = device->dev; list_del(&device->list); @@ -842,6 +863,10 @@ static int vector_legacy_rx(struct vector_private *vp) } pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0); + if (pkt_len < 0) { + vp->in_error = true; + return pkt_len; + } if (skb != NULL) { if (pkt_len > vp->header_size) { @@ -863,7 +888,7 @@ static int vector_legacy_rx(struct vector_private *vp) skb->protocol = eth_type_trans(skb, skb->dev); vp->dev->stats.rx_bytes += skb->len; vp->dev->stats.rx_packets++; - netif_rx(skb); + napi_gro_receive(&vp->napi, skb); } else { dev_kfree_skb_irq(skb); } @@ -888,12 +913,16 @@ static int writev_tx(struct vector_private *vp, struct sk_buff *skb) if (iov_count < 1) goto drop; + pkt_len = uml_vector_writev( vp->fds->tx_fd, (struct iovec *) &iov, iov_count ); + if (pkt_len < 0) + goto drop; + netif_trans_update(vp->dev); netif_wake_queue(vp->dev); @@ -908,6 +937,8 @@ static int writev_tx(struct vector_private *vp, struct sk_buff *skb) drop: vp->dev->stats.tx_dropped++; consume_skb(skb); + if (pkt_len < 0) + vp->in_error = true; return pkt_len; } @@ -916,7 +947,7 @@ drop: * mmsg vector matched to an skb vector which we prepared earlier. */ -static int vector_mmsg_rx(struct vector_private *vp) +static int vector_mmsg_rx(struct vector_private *vp, int budget) { int packet_count, i; struct vector_queue *qi = vp->rx_queue; @@ -933,8 +964,14 @@ static int vector_mmsg_rx(struct vector_private *vp) /* Fire the Lazy Gun - get as many packets as we can in one go. */ + if (budget > qi->max_depth) + budget = qi->max_depth; + packet_count = uml_vector_recvmmsg( - vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); + vp->fds->rx_fd, qi->mmsg_vector, budget, 0); + + if (packet_count < 0) + vp->in_error = true; if (packet_count <= 0) return packet_count; @@ -944,7 +981,7 @@ static int vector_mmsg_rx(struct vector_private *vp) * many do we need to prep the next time prep_queue_for_rx() is called. */ - qi->queue_depth = packet_count; + atomic_add(packet_count, &qi->queue_depth); for (i = 0; i < packet_count; i++) { skb = (*skbuff_vector); @@ -979,7 +1016,7 @@ static int vector_mmsg_rx(struct vector_private *vp) */ vp->dev->stats.rx_bytes += skb->len; vp->dev->stats.rx_packets++; - netif_rx(skb); + napi_gro_receive(&vp->napi, skb); } else { /* Overlay header too short to do anything - discard. * We can actually keep this skb and reuse it, @@ -1002,25 +1039,18 @@ static int vector_mmsg_rx(struct vector_private *vp) return packet_count; } -static void vector_rx(struct vector_private *vp) -{ - int err; - - if ((vp->options & VECTOR_RX) > 0) - while ((err = vector_mmsg_rx(vp)) > 0) - ; - else - while ((err = vector_legacy_rx(vp)) > 0) - ; - if ((err != 0) && net_ratelimit()) - netdev_err(vp->dev, "vector_rx: error(%d)\n", err); -} - static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct vector_private *vp = netdev_priv(dev); int queue_depth = 0; + if (vp->in_error) { + deactivate_fd(vp->fds->rx_fd, vp->rx_irq); + if ((vp->fds->rx_fd != vp->fds->tx_fd) && (vp->tx_irq != 0)) + deactivate_fd(vp->fds->tx_fd, vp->tx_irq); + return NETDEV_TX_BUSY; + } + if ((vp->options & VECTOR_TX) == 0) { writev_tx(vp, skb); return NETDEV_TX_OK; @@ -1033,25 +1063,15 @@ static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) netdev_sent_queue(vp->dev, skb->len); queue_depth = vector_enqueue(vp->tx_queue, skb); - /* if the device queue is full, stop the upper layers and - * flush it. - */ - - if (queue_depth >= vp->tx_queue->max_depth - 1) { - vp->estats.tx_kicks++; - netif_stop_queue(dev); - vector_send(vp->tx_queue); - return NETDEV_TX_OK; - } - if (skb->xmit_more) { + if (queue_depth < vp->tx_queue->max_depth && netdev_xmit_more()) { mod_timer(&vp->tl, vp->coalesce); return NETDEV_TX_OK; + } else { + queue_depth = vector_send(vp->tx_queue); + if (queue_depth > 0) + napi_schedule(&vp->napi); } - if (skb->len < TX_SMALL_PACKET) { - vp->estats.tx_kicks++; - vector_send(vp->tx_queue); - } else - tasklet_schedule(&vp->tx_poll); + return NETDEV_TX_OK; } @@ -1062,7 +1082,7 @@ static irqreturn_t vector_rx_interrupt(int irq, void *dev_id) if (!netif_running(dev)) return IRQ_NONE; - vector_rx(vp); + napi_schedule(&vp->napi); return IRQ_HANDLED; } @@ -1081,8 +1101,7 @@ static irqreturn_t vector_tx_interrupt(int irq, void *dev_id) * tweaking the IRQ mask less costly */ - if (vp->in_write_poll) - tasklet_schedule(&vp->tx_poll); + napi_schedule(&vp->napi); return IRQ_HANDLED; } @@ -1092,10 +1111,11 @@ static int irq_rr; static int vector_net_close(struct net_device *dev) { struct vector_private *vp = netdev_priv(dev); - unsigned long flags; netif_stop_queue(dev); - del_timer(&vp->tl); + timer_delete(&vp->tl); + + vp->opened = false; if (vp->fds == NULL) return 0; @@ -1109,8 +1129,11 @@ static int vector_net_close(struct net_device *dev) um_free_irq(vp->tx_irq, dev); vp->tx_irq = 0; } - tasklet_kill(&vp->tx_poll); + napi_disable(&vp->napi); + netif_napi_del(&vp->napi); if (vp->fds->rx_fd > 0) { + if (vp->bpf) + uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf); os_close_file(vp->fds->rx_fd); vp->fds->rx_fd = -1; } @@ -1118,7 +1141,10 @@ static int vector_net_close(struct net_device *dev) os_close_file(vp->fds->tx_fd); vp->fds->tx_fd = -1; } + if (vp->bpf != NULL) + kfree(vp->bpf->filter); kfree(vp->bpf); + vp->bpf = NULL; kfree(vp->fds->remote_addr); kfree(vp->transport_data); kfree(vp->header_rxbuffer); @@ -1129,21 +1155,38 @@ static int vector_net_close(struct net_device *dev) destroy_queue(vp->tx_queue); kfree(vp->fds); vp->fds = NULL; - spin_lock_irqsave(&vp->lock, flags); - vp->opened = false; - spin_unlock_irqrestore(&vp->lock, flags); + vp->in_error = false; return 0; } -/* TX tasklet */ - -static void vector_tx_poll(unsigned long data) +static int vector_poll(struct napi_struct *napi, int budget) { - struct vector_private *vp = (struct vector_private *)data; + struct vector_private *vp = container_of(napi, struct vector_private, napi); + int work_done = 0; + int err; + bool tx_enqueued = false; - vp->estats.tx_kicks++; - vector_send(vp->tx_queue); + if ((vp->options & VECTOR_TX) != 0) + tx_enqueued = (vector_send(vp->tx_queue) > 0); + spin_lock(&vp->rx_queue->head_lock); + if ((vp->options & VECTOR_RX) > 0) + err = vector_mmsg_rx(vp, budget); + else { + err = vector_legacy_rx(vp); + if (err > 0) + err = 1; + } + spin_unlock(&vp->rx_queue->head_lock); + if (err > 0) + work_done += err; + + if (tx_enqueued || err > 0) + napi_schedule(napi); + if (work_done <= budget) + napi_complete_done(napi, work_done); + return work_done; } + static void vector_reset_tx(struct work_struct *work) { struct vector_private *vp = @@ -1152,20 +1195,18 @@ static void vector_reset_tx(struct work_struct *work) netif_start_queue(vp->dev); netif_wake_queue(vp->dev); } + static int vector_net_open(struct net_device *dev) { struct vector_private *vp = netdev_priv(dev); - unsigned long flags; int err = -EINVAL; struct vector_device *vdevice; - spin_lock_irqsave(&vp->lock, flags); - if (vp->opened) { - spin_unlock_irqrestore(&vp->lock, flags); + if (vp->opened) return -ENXIO; - } vp->opened = true; - spin_unlock_irqrestore(&vp->lock, flags); + + vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed)); vp->fds = uml_vector_user_open(vp->unit, vp->parsed); @@ -1182,7 +1223,7 @@ static int vector_net_open(struct net_device *dev) vp->rx_header_size, MAX_IOV_SIZE ); - vp->rx_queue->queue_depth = get_depth(vp->parsed); + atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed)); } else { vp->header_rxbuffer = kmalloc( vp->rx_header_size, @@ -1204,12 +1245,16 @@ static int vector_net_open(struct net_device *dev) goto out_close; } + netif_napi_add_weight(vp->dev, &vp->napi, vector_poll, + get_depth(vp->parsed)); + napi_enable(&vp->napi); + /* READ IRQ */ err = um_request_irq( irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd, IRQ_READ, vector_rx_interrupt, IRQF_SHARED, dev->name, dev); - if (err != 0) { + if (err < 0) { netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err); err = -ENETUNREACH; goto out_close; @@ -1224,7 +1269,7 @@ static int vector_net_open(struct net_device *dev) irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd, IRQ_WRITE, vector_tx_interrupt, IRQF_SHARED, dev->name, dev); - if (err != 0) { + if (err < 0) { netdev_err(dev, "vector_open: failed to get tx irq(%d)\n", err); err = -ENETUNREACH; @@ -1238,19 +1283,22 @@ static int vector_net_open(struct net_device *dev) if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd)) vp->options |= VECTOR_BPF; } - if ((vp->options & VECTOR_BPF) != 0) - vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr); + if (((vp->options & VECTOR_BPF) != 0) && (vp->bpf == NULL)) + vp->bpf = uml_vector_default_bpf(dev->dev_addr); + + if (vp->bpf != NULL) + uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf); netif_start_queue(dev); + vector_reset_stats(vp); /* clear buffer - it can happen that the host side of the interface * is full when we get here. In this case, new data is never queued, * SIGIOs never arrive, and the net never works. */ - vector_rx(vp); + napi_schedule(&vp->napi); - vector_reset_stats(vp); vdevice = find_device(vp->unit); vdevice->opened = 1; @@ -1269,7 +1317,7 @@ static void vector_net_set_multicast_list(struct net_device *dev) return; } -static void vector_net_tx_timeout(struct net_device *dev) +static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct vector_private *vp = netdev_priv(dev); @@ -1314,12 +1362,67 @@ static void vector_net_poll_controller(struct net_device *dev) static void vector_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); - strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); + strscpy(info->driver, DRIVER_NAME); +} + +static int vector_net_load_bpf_flash(struct net_device *dev, + struct ethtool_flash *efl) +{ + struct vector_private *vp = netdev_priv(dev); + struct vector_device *vdevice; + const struct firmware *fw; + int result = 0; + + if (!(vp->options & VECTOR_BPF_FLASH)) { + netdev_err(dev, "loading firmware not permitted: %s\n", efl->data); + return -1; + } + + if (vp->bpf != NULL) { + if (vp->opened) + uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf); + kfree(vp->bpf->filter); + vp->bpf->filter = NULL; + } else { + vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC); + if (vp->bpf == NULL) { + netdev_err(dev, "failed to allocate memory for firmware\n"); + goto flash_fail; + } + } + + vdevice = find_device(vp->unit); + + if (request_firmware(&fw, efl->data, &vdevice->pdev.dev)) + goto flash_fail; + + vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_ATOMIC); + if (!vp->bpf->filter) + goto free_buffer; + + vp->bpf->len = fw->size / sizeof(struct sock_filter); + release_firmware(fw); + + if (vp->opened) + result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf); + + return result; + +free_buffer: + release_firmware(fw); + +flash_fail: + if (vp->bpf != NULL) + kfree(vp->bpf->filter); + kfree(vp->bpf); + vp->bpf = NULL; + return -1; } static void vector_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct vector_private *vp = netdev_priv(netdev); @@ -1362,11 +1465,23 @@ static void vector_get_ethtool_stats(struct net_device *dev, { struct vector_private *vp = netdev_priv(dev); + /* Stats are modified in the dequeue portions of + * rx/tx which are protected by the head locks + * grabbing these locks here ensures they are up + * to date. + */ + + spin_lock(&vp->tx_queue->head_lock); + spin_lock(&vp->rx_queue->head_lock); memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); + spin_unlock(&vp->rx_queue->head_lock); + spin_unlock(&vp->tx_queue->head_lock); } static int vector_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) { struct vector_private *vp = netdev_priv(netdev); @@ -1375,7 +1490,9 @@ static int vector_get_coalesce(struct net_device *netdev, } static int vector_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) { struct vector_private *vp = netdev_priv(netdev); @@ -1386,6 +1503,7 @@ static int vector_set_coalesce(struct net_device *netdev, } static const struct ethtool_ops vector_net_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_TX_USECS, .get_drvinfo = vector_net_get_drvinfo, .get_link = ethtool_op_get_link, .get_ts_info = ethtool_op_get_ts_info, @@ -1395,6 +1513,7 @@ static const struct ethtool_ops vector_net_ethtool_ops = { .get_ethtool_stats = vector_get_ethtool_stats, .get_coalesce = vector_get_coalesce, .set_coalesce = vector_set_coalesce, + .flash_device = vector_net_load_bpf_flash, }; @@ -1413,13 +1532,48 @@ static const struct net_device_ops vector_netdev_ops = { #endif }; - static void vector_timer_expire(struct timer_list *t) { - struct vector_private *vp = from_timer(vp, t, tl); + struct vector_private *vp = timer_container_of(vp, t, tl); vp->estats.tx_kicks++; - vector_send(vp->tx_queue); + napi_schedule(&vp->napi); +} + +static void vector_setup_etheraddr(struct net_device *dev, char *str) +{ + u8 addr[ETH_ALEN]; + + if (str == NULL) + goto random; + + if (!mac_pton(str, addr)) { + netdev_err(dev, + "Failed to parse '%s' as an ethernet address\n", str); + goto random; + } + if (is_multicast_ether_addr(addr)) { + netdev_err(dev, + "Attempt to assign a multicast ethernet address to a device disallowed\n"); + goto random; + } + if (!is_valid_ether_addr(addr)) { + netdev_err(dev, + "Attempt to assign an invalid ethernet address to a device disallowed\n"); + goto random; + } + if (!is_local_ether_addr(addr)) { + netdev_warn(dev, "Warning: Assigning a globally valid ethernet address to a device\n"); + netdev_warn(dev, "You should set the 2nd rightmost bit in the first byte of the MAC,\n"); + netdev_warn(dev, "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n", + addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], addr[5]); + } + eth_hw_addr_set(dev, addr); + return; + +random: + netdev_info(dev, "Choosing a random ethernet address\n"); + eth_hw_addr_random(dev); } static void vector_eth_configure( @@ -1434,14 +1588,12 @@ static void vector_eth_configure( device = kzalloc(sizeof(*device), GFP_KERNEL); if (device == NULL) { - printk(KERN_ERR "eth_configure failed to allocate struct " - "vector_device\n"); + pr_err("Failed to allocate struct vector_device for vec%d\n", n); return; } dev = alloc_etherdev(sizeof(struct vector_private)); if (dev == NULL) { - printk(KERN_ERR "eth_configure: failed to allocate struct " - "net_device for vec%d\n", n); + pr_err("Failed to allocate struct net_device for vec%d\n", n); goto out_free_device; } @@ -1455,7 +1607,7 @@ static void vector_eth_configure( * and fail. */ snprintf(dev->name, sizeof(dev->name), "vec%d", n); - uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac")); + vector_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac")); vp = netdev_priv(dev); /* sysfs register */ @@ -1473,40 +1625,24 @@ static void vector_eth_configure( device->dev = dev; - *vp = ((struct vector_private) - { - .list = LIST_HEAD_INIT(vp->list), - .dev = dev, - .unit = n, - .options = get_transport_options(def), - .rx_irq = 0, - .tx_irq = 0, - .parsed = def, - .max_packet = get_mtu(def) + ETH_HEADER_OTHER, - /* TODO - we need to calculate headroom so that ip header - * is 16 byte aligned all the time - */ - .headroom = get_headroom(def), - .form_header = NULL, - .verify_header = NULL, - .header_rxbuffer = NULL, - .header_txbuffer = NULL, - .header_size = 0, - .rx_header_size = 0, - .rexmit_scheduled = false, - .opened = false, - .transport_data = NULL, - .in_write_poll = false, - .coalesce = 2, - .req_size = get_req_size(def) - }); + INIT_LIST_HEAD(&vp->list); + vp->dev = dev; + vp->unit = n; + vp->options = get_transport_options(def); + vp->parsed = def; + vp->max_packet = get_mtu(def) + ETH_HEADER_OTHER; + /* + * TODO - we need to calculate headroom so that ip header + * is 16 byte aligned all the time + */ + vp->headroom = get_headroom(def); + vp->coalesce = 2; + vp->req_size = get_req_size(def); dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); - tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp); INIT_WORK(&vp->reset_tx, vector_reset_tx); timer_setup(&vp->tl, vector_timer_expire, 0); - spin_lock_init(&vp->lock); /* FIXME */ dev->netdev_ops = &vector_netdev_ops; @@ -1571,11 +1707,10 @@ static int __init vector_setup(char *str) err = vector_parse(str, &n, &str, &error); if (err) { - printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n", - str, error); + pr_err("Couldn't parse '%s': %s\n", str, error); return 1; } - new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES); + new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES); INIT_LIST_HEAD(&new->list); new->unit = n; new->arguments = str; @@ -1586,7 +1721,7 @@ static int __init vector_setup(char *str) __setup("vec", vector_setup); __uml_help(vector_setup, "vec[0-9]+:<option>=<value>,<option>=<value>\n" -" Configure a vector io network device.\n\n" +" Configure a vector io network device.\n\n" ); late_initcall(vector_init); diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h index 0b0a767b9076..417834793658 100644 --- a/arch/um/drivers/vector_kern.h +++ b/arch/um/drivers/vector_kern.h @@ -1,6 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #ifndef __UM_VECTOR_KERN_H @@ -14,6 +14,8 @@ #include <linux/ctype.h> #include <linux/workqueue.h> #include <linux/interrupt.h> +#include <asm/atomic.h> + #include "vector_user.h" /* Queue structure specially adapted for multiple enqueue/dequeue @@ -29,10 +31,13 @@ #define VECTOR_TX (1 << 1) #define VECTOR_BPF (1 << 2) #define VECTOR_QDISC_BYPASS (1 << 3) +#define VECTOR_BPF_FLASH (1 << 4) #define ETH_MAX_PACKET 1500 #define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */ +#define MAX_FILTER_PROG (2 << 16) + struct vector_queue { struct mmsghdr *mmsg_vector; void **skbuff_vector; @@ -40,7 +45,8 @@ struct vector_queue { struct net_device *dev; spinlock_t head_lock; spinlock_t tail_lock; - int queue_depth, head, tail, max_depth, max_iov_frags; + atomic_t queue_depth; + int head, tail, max_depth, max_iov_frags; short options; }; @@ -67,8 +73,8 @@ struct vector_estats { struct vector_private { struct list_head list; - spinlock_t lock; struct net_device *dev; + struct napi_struct napi ____cacheline_aligned; int unit; @@ -112,17 +118,20 @@ struct vector_private { spinlock_t stats_lock; - struct tasklet_struct tx_poll; bool rexmit_scheduled; bool opened; bool in_write_poll; + bool in_error; + + /* guest allowed to use ethtool flash to load bpf */ + bool bpf_via_flash; /* ethtool stats */ struct vector_estats estats; - void *bpf; + struct sock_fprog *bpf; - char user[0]; + char user[]; }; extern int build_transport_data(struct vector_private *vp); diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c index 77e4ebc206ae..0794d23f07cb 100644 --- a/arch/um/drivers/vector_transports.c +++ b/arch/um/drivers/vector_transports.c @@ -1,7 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2017 - Cambridge Greys Limited * Copyright (C) 2011 - 2014 Cisco Systems Inc - * Licensed under the GPL. */ #include <linux/etherdevice.h> @@ -418,7 +418,7 @@ static int build_raw_transport_data(struct vector_private *vp) return 0; } -static int build_tap_transport_data(struct vector_private *vp) +static int build_hybrid_transport_data(struct vector_private *vp) { if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) { vp->form_header = &raw_form_header; @@ -432,7 +432,7 @@ static int build_tap_transport_data(struct vector_private *vp) NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); netdev_info( vp->dev, - "tap/raw: using vnet headers for tso and tx/rx checksum" + "tap/raw hybrid: using vnet headers for tso and tx/rx checksum" ); } else { return 0; /* do not try to enable tap too if raw failed */ @@ -442,6 +442,38 @@ static int build_tap_transport_data(struct vector_private *vp) return -1; } +static int build_tap_transport_data(struct vector_private *vp) +{ + /* "Pure" tap uses the same fd for rx and tx */ + if (uml_tap_enable_vnet_headers(vp->fds->tx_fd)) { + vp->form_header = &raw_form_header; + vp->verify_header = &raw_verify_header; + vp->header_size = sizeof(struct virtio_net_hdr); + vp->rx_header_size = sizeof(struct virtio_net_hdr); + vp->dev->hw_features |= + (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); + vp->dev->features |= + (NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); + netdev_info( + vp->dev, + "tap: using vnet headers for tso and tx/rx checksum" + ); + return 0; + } + return -1; +} + + +static int build_bess_transport_data(struct vector_private *vp) +{ + vp->form_header = NULL; + vp->verify_header = NULL; + vp->header_size = 0; + vp->rx_header_size = 0; + return 0; +} + int build_transport_data(struct vector_private *vp) { char *transport = uml_vector_fetch_arg(vp->parsed, "transport"); @@ -454,6 +486,10 @@ int build_transport_data(struct vector_private *vp) return build_raw_transport_data(vp); if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) return build_tap_transport_data(vp); + if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0) + return build_hybrid_transport_data(vp); + if (strncmp(transport, TRANS_BESS, TRANS_BESS_LEN) == 0) + return build_bess_transport_data(vp); return 0; } diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index d2c17dd74620..2ea67e6fd067 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -1,8 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ +#include <stdbool.h> #include <stdio.h> #include <unistd.h> #include <stdarg.h> @@ -16,37 +17,49 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> -#include <sys/types.h> #include <sys/socket.h> -#include <net/ethernet.h> +#include <sys/un.h> #include <netinet/ip.h> -#include <netinet/ether.h> #include <linux/if_ether.h> #include <linux/if_packet.h> -#include <sys/socket.h> #include <sys/wait.h> #include <sys/uio.h> #include <linux/virtio_net.h> #include <netdb.h> #include <stdlib.h> #include <os.h> +#include <limits.h> #include <um_malloc.h> -#include <sys/uio.h> #include "vector_user.h" #define ID_GRE 0 #define ID_L2TPV3 1 -#define ID_MAX 1 +#define ID_BESS 2 +#define ID_MAX 2 #define TOKEN_IFNAME "ifname" +#define TOKEN_SCRIPT "ifup" #define TRANS_RAW "raw" #define TRANS_RAW_LEN strlen(TRANS_RAW) +#define TRANS_FD "fd" +#define TRANS_FD_LEN strlen(TRANS_FD) + +#define TRANS_VDE "vde" +#define TRANS_VDE_LEN strlen(TRANS_VDE) + #define VNET_HDR_FAIL "could not enable vnet headers on fd %d" #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s" #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i" -#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n" +#define UNIX_BIND_FAIL "unix_open : could not bind socket err=%i" +#define BPF_ATTACH_FAIL "Failed to attach filter size %d prog %px to %d, err %d\n" +#define BPF_DETACH_FAIL "Failed to detach filter size %d prog %px to %d, err %d\n" + +#define MAX_UN_LEN 107 + +static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +static const char *template = "tapXXXXXX"; /* This is very ugly and brute force lookup, but it is done * only once at initialization so not worth doing hashes or @@ -117,19 +130,88 @@ cleanup: #define PATH_NET_TUN "/dev/net/tun" -static struct vector_fds *user_init_tap_fds(struct arglist *ifspec) + +static int create_tap_fd(char *iface) { struct ifreq ifr; int fd = -1; - struct sockaddr_ll sock; int err = -ENOMEM, offload; + + fd = open(PATH_NET_TUN, O_RDWR); + if (fd < 0) { + printk(UM_KERN_ERR "uml_tap: failed to open tun device\n"); + goto tap_fd_cleanup; + } + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + strscpy(ifr.ifr_name, iface); + + err = ioctl(fd, TUNSETIFF, (void *) &ifr); + if (err != 0) { + printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n"); + goto tap_fd_cleanup; + } + + offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; + ioctl(fd, TUNSETOFFLOAD, offload); + return fd; +tap_fd_cleanup: + if (fd >= 0) + os_close_file(fd); + return err; +} + +static int create_raw_fd(char *iface, int flags, int proto) +{ + struct ifreq ifr; + int fd = -1; + struct sockaddr_ll sock; + int err = -ENOMEM; + + fd = socket(AF_PACKET, SOCK_RAW, flags); + if (fd == -1) { + err = -errno; + goto raw_fd_cleanup; + } + memset(&ifr, 0, sizeof(ifr)); + strscpy(ifr.ifr_name, iface); + if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) { + err = -errno; + goto raw_fd_cleanup; + } + + sock.sll_family = AF_PACKET; + sock.sll_protocol = htons(proto); + sock.sll_ifindex = ifr.ifr_ifindex; + + if (bind(fd, + (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { + err = -errno; + goto raw_fd_cleanup; + } + return fd; +raw_fd_cleanup: + printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err); + if (fd >= 0) + os_close_file(fd); + return err; +} + + +static struct vector_fds *user_init_tap_fds(struct arglist *ifspec) +{ + int fd = -1, i; char *iface; struct vector_fds *result = NULL; + bool dynamic = false; + char dynamic_ifname[IFNAMSIZ]; + char *argv[] = {NULL, NULL, NULL, NULL}; iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); if (iface == NULL) { - printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n"); - goto tap_cleanup; + dynamic = true; + iface = dynamic_ifname; + srand(getpid()); } result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); @@ -143,118 +225,318 @@ static struct vector_fds *user_init_tap_fds(struct arglist *ifspec) result->remote_addr_size = 0; /* TAP */ + do { + if (dynamic) { + strcpy(iface, template); + for (i = 0; i < strlen(iface); i++) { + if (iface[i] == 'X') { + iface[i] = padchar[rand() % strlen(padchar)]; + } + } + } + fd = create_tap_fd(iface); + if ((fd < 0) && (!dynamic)) { + printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n"); + goto tap_cleanup; + } + result->tx_fd = fd; + result->rx_fd = fd; + } while (fd < 0); - fd = open(PATH_NET_TUN, O_RDWR); - if (fd < 0) { - printk(UM_KERN_ERR "uml_tap: failed to open tun device\n"); - goto tap_cleanup; + argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT); + if (argv[0]) { + argv[1] = iface; + run_helper(NULL, NULL, argv); } - result->tx_fd = fd; - memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; - strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); - err = ioctl(fd, TUNSETIFF, (void *) &ifr); - if (err != 0) { - printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n"); - goto tap_cleanup; + return result; +tap_cleanup: + printk(UM_KERN_ERR "user_init_tap: init failed, error %d", fd); + kfree(result); + return NULL; +} + +static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec) +{ + char *iface; + struct vector_fds *result = NULL; + char *argv[] = {NULL, NULL, NULL, NULL}; + + iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); + if (iface == NULL) { + printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n"); + goto hybrid_cleanup; } - offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; - ioctl(fd, TUNSETOFFLOAD, offload); + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n"); + goto hybrid_cleanup; + } + result->rx_fd = -1; + result->tx_fd = -1; + result->remote_addr = NULL; + result->remote_addr_size = 0; + + /* TAP */ + + result->tx_fd = create_tap_fd(iface); + if (result->tx_fd < 0) { + printk(UM_KERN_ERR "uml_tap: failed to create tun interface: %i\n", result->tx_fd); + goto hybrid_cleanup; + } /* RAW */ - fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if (fd == -1) { + result->rx_fd = create_raw_fd(iface, ETH_P_ALL, ETH_P_ALL); + if (result->rx_fd == -1) { printk(UM_KERN_ERR - "uml_tap: failed to create socket: %i\n", -errno); - goto tap_cleanup; + "uml_tap: failed to create paired raw socket: %i\n", result->rx_fd); + goto hybrid_cleanup; } - result->rx_fd = fd; - memset(&ifr, 0, sizeof(ifr)); - strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); - if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) { - printk(UM_KERN_ERR - "uml_tap: failed to set interface: %i\n", -errno); - goto tap_cleanup; + + argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT); + if (argv[0]) { + argv[1] = iface; + run_helper(NULL, NULL, argv); } + return result; +hybrid_cleanup: + printk(UM_KERN_ERR "user_init_hybrid: init failed"); + kfree(result); + return NULL; +} - sock.sll_family = AF_PACKET; - sock.sll_protocol = htons(ETH_P_ALL); - sock.sll_ifindex = ifr.ifr_ifindex; +static struct vector_fds *user_init_unix_fds(struct arglist *ifspec, int id) +{ + int fd = -1; + int socktype; + char *src, *dst; + struct vector_fds *result = NULL; + struct sockaddr_un *local_addr = NULL, *remote_addr = NULL; - if (bind(fd, - (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { + src = uml_vector_fetch_arg(ifspec, "src"); + dst = uml_vector_fetch_arg(ifspec, "dst"); + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "unix open:cannot allocate remote addr"); + goto unix_cleanup; + } + remote_addr = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); + if (remote_addr == NULL) { + printk(UM_KERN_ERR "unix open:cannot allocate remote addr"); + goto unix_cleanup; + } + + switch (id) { + case ID_BESS: + socktype = SOCK_SEQPACKET; + if ((src != NULL) && (strlen(src) <= MAX_UN_LEN)) { + local_addr = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); + if (local_addr == NULL) { + printk(UM_KERN_ERR "bess open:cannot allocate local addr"); + goto unix_cleanup; + } + local_addr->sun_family = AF_UNIX; + memcpy(local_addr->sun_path, src, strlen(src) + 1); + } + if ((dst == NULL) || (strlen(dst) > MAX_UN_LEN)) + goto unix_cleanup; + remote_addr->sun_family = AF_UNIX; + memcpy(remote_addr->sun_path, dst, strlen(dst) + 1); + break; + default: + printk(KERN_ERR "Unsupported unix socket type\n"); + return NULL; + } + + fd = socket(AF_UNIX, socktype, 0); + if (fd == -1) { printk(UM_KERN_ERR - "user_init_tap: failed to bind raw pair, err %d\n", - -errno); - goto tap_cleanup; + "unix open: could not open socket, error = %d", + -errno + ); + goto unix_cleanup; + } + if (local_addr != NULL) { + if (bind(fd, (struct sockaddr *) local_addr, sizeof(struct sockaddr_un))) { + printk(UM_KERN_ERR UNIX_BIND_FAIL, errno); + goto unix_cleanup; + } + } + switch (id) { + case ID_BESS: + if (connect(fd, (const struct sockaddr *) remote_addr, sizeof(struct sockaddr_un)) < 0) { + printk(UM_KERN_ERR "bess open:cannot connect to %s %i", remote_addr->sun_path, -errno); + goto unix_cleanup; + } + break; } + result->rx_fd = fd; + result->tx_fd = fd; + result->remote_addr_size = sizeof(struct sockaddr_un); + result->remote_addr = remote_addr; return result; -tap_cleanup: - printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err); - if (result != NULL) { - if (result->rx_fd >= 0) - os_close_file(result->rx_fd); - if (result->tx_fd >= 0) - os_close_file(result->tx_fd); - kfree(result); +unix_cleanup: + if (fd >= 0) + os_close_file(fd); + kfree(remote_addr); + kfree(result); + return NULL; +} + +static int strtofd(const char *nptr) +{ + long fd; + char *endptr; + + if (nptr == NULL) + return -1; + + errno = 0; + fd = strtol(nptr, &endptr, 10); + if (nptr == endptr || + errno != 0 || + *endptr != '\0' || + fd < 0 || + fd > INT_MAX) { + return -1; + } + return fd; +} + +static struct vector_fds *user_init_fd_fds(struct arglist *ifspec) +{ + int fd = -1; + char *fdarg = NULL; + struct vector_fds *result = NULL; + + fdarg = uml_vector_fetch_arg(ifspec, "fd"); + fd = strtofd(fdarg); + if (fd == -1) { + printk(UM_KERN_ERR "fd open: bad or missing fd argument"); + goto fd_cleanup; + } + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "fd open: allocation failed"); + goto fd_cleanup; } + + result->rx_fd = fd; + result->tx_fd = fd; + result->remote_addr_size = 0; + result->remote_addr = NULL; + return result; + +fd_cleanup: + if (fd >= 0) + os_close_file(fd); + kfree(result); return NULL; } +/* enough char to store an int type */ +#define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2) +#define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3) +/* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */ +#define VDE_MAX_ARGC 12 +#define VDE_SEQPACKET_HEAD "seqpacket://" +#define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1) +#define VDE_DEFAULT_DESCRIPTION "UML" + +static struct vector_fds *user_init_vde_fds(struct arglist *ifspec) +{ + char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1]; + char *argv[VDE_MAX_ARGC] = {"vde_plug"}; + int argc = 1; + int rv; + int sv[2]; + struct vector_fds *result = NULL; + + char *vnl = uml_vector_fetch_arg(ifspec,"vnl"); + char *descr = uml_vector_fetch_arg(ifspec,"descr"); + char *port = uml_vector_fetch_arg(ifspec,"port"); + char *mode = uml_vector_fetch_arg(ifspec,"mode"); + char *group = uml_vector_fetch_arg(ifspec,"group"); + if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION; + + argv[argc++] = "--descr"; + argv[argc++] = descr; + if (port != NULL) { + argv[argc++] = "--port2"; + argv[argc++] = port; + } + if (mode != NULL) { + argv[argc++] = "--mod2"; + argv[argc++] = mode; + } + if (group != NULL) { + argv[argc++] = "--group2"; + argv[argc++] = group; + } + argv[argc++] = seqpacketvnl; + argv[argc++] = vnl; + argv[argc++] = NULL; + + rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv); + if (rv < 0) { + printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno); + return NULL; + } + rv = os_set_exec_close(sv[0]); + if (rv < 0) { + printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno); + goto vde_cleanup_sv; + } + snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]); + + run_helper(NULL, NULL, argv); + + close(sv[1]); + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "fd open: allocation failed"); + goto vde_cleanup; + } + + result->rx_fd = sv[0]; + result->tx_fd = sv[0]; + result->remote_addr_size = 0; + result->remote_addr = NULL; + return result; + +vde_cleanup_sv: + close(sv[1]); +vde_cleanup: + close(sv[0]); + return NULL; +} static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) { - struct ifreq ifr; int rxfd = -1, txfd = -1; - struct sockaddr_ll sock; int err = -ENOMEM; char *iface; struct vector_fds *result = NULL; + char *argv[] = {NULL, NULL, NULL, NULL}; iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); if (iface == NULL) - goto cleanup; + goto raw_cleanup; - rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL); + rxfd = create_raw_fd(iface, ETH_P_ALL, ETH_P_ALL); if (rxfd == -1) { err = -errno; - goto cleanup; + goto raw_cleanup; } - txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */ + txfd = create_raw_fd(iface, 0, ETH_P_IP); /* Turn off RX on this fd */ if (txfd == -1) { err = -errno; - goto cleanup; - } - memset(&ifr, 0, sizeof(ifr)); - strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); - if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) { - err = -errno; - goto cleanup; - } - - sock.sll_family = AF_PACKET; - sock.sll_protocol = htons(ETH_P_ALL); - sock.sll_ifindex = ifr.ifr_ifindex; - - if (bind(rxfd, - (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { - err = -errno; - goto cleanup; + goto raw_cleanup; } - - sock.sll_family = AF_PACKET; - sock.sll_protocol = htons(ETH_P_IP); - sock.sll_ifindex = ifr.ifr_ifindex; - - if (bind(txfd, - (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { - err = -errno; - goto cleanup; - } - result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); if (result != NULL) { result->rx_fd = rxfd; @@ -262,13 +544,14 @@ static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) result->remote_addr = NULL; result->remote_addr_size = 0; } + argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT); + if (argv[0]) { + argv[1] = iface; + run_helper(NULL, NULL, argv); + } return result; -cleanup: +raw_cleanup: printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err); - if (rxfd >= 0) - os_close_file(rxfd); - if (txfd >= 0) - os_close_file(txfd); kfree(result); return NULL; } @@ -459,12 +742,20 @@ struct vector_fds *uml_vector_user_open( } if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) return user_init_raw_fds(parsed); + if (strncmp(transport, TRANS_HYBRID, TRANS_HYBRID_LEN) == 0) + return user_init_hybrid_fds(parsed); if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) return user_init_tap_fds(parsed); if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0) return user_init_socket_fds(parsed, ID_GRE); if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0) return user_init_socket_fds(parsed, ID_L2TPV3); + if (strncmp(transport, TRANS_BESS, TRANS_BESS_LEN) == 0) + return user_init_unix_fds(parsed, ID_BESS); + if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0) + return user_init_fd_fds(parsed); + if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0) + return user_init_vde_fds(parsed); return NULL; } @@ -485,8 +776,9 @@ int uml_vector_sendmsg(int fd, void *hdr, int flags) int uml_vector_recvmsg(int fd, void *hdr, int flags) { int n; + struct msghdr *msg = (struct msghdr *) hdr; - CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr, flags)); + CATCH_EINTR(n = readv(fd, msg->msg_iov, msg->msg_iovlen)); if ((n < 0) && (errno == EAGAIN)) return 0; if (n >= 0) @@ -500,7 +792,7 @@ int uml_vector_writev(int fd, void *hdr, int iovcount) int n; CATCH_EINTR(n = writev(fd, (struct iovec *) hdr, iovcount)); - if ((n < 0) && (errno == EAGAIN)) + if ((n < 0) && ((errno == EAGAIN) || (errno == ENOBUFS))) return 0; if (n >= 0) return n; @@ -517,7 +809,7 @@ int uml_vector_sendmmsg( int n; CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags)); - if ((n < 0) && (errno == EAGAIN)) + if ((n < 0) && ((errno == EAGAIN) || (errno == ENOBUFS))) return 0; if (n >= 0) return n; @@ -542,31 +834,44 @@ int uml_vector_recvmmsg( else return -errno; } -int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len) +int uml_vector_attach_bpf(int fd, void *bpf) { - int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len); + struct sock_fprog *prog = bpf; + + int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, sizeof(struct sock_fprog)); if (err < 0) - printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno); + printk(KERN_ERR BPF_ATTACH_FAIL, prog->len, prog->filter, fd, -errno); return err; } -#define DEFAULT_BPF_LEN 6 +int uml_vector_detach_bpf(int fd, void *bpf) +{ + struct sock_fprog *prog = bpf; -void *uml_vector_default_bpf(int fd, void *mac) + int err = setsockopt(fd, SOL_SOCKET, SO_DETACH_FILTER, bpf, sizeof(struct sock_fprog)); + if (err < 0) + printk(KERN_ERR BPF_DETACH_FAIL, prog->len, prog->filter, fd, -errno); + return err; +} +void *uml_vector_default_bpf(const void *mac) { struct sock_filter *bpf; uint32_t *mac1 = (uint32_t *)(mac + 2); uint16_t *mac2 = (uint16_t *) mac; - struct sock_fprog bpf_prog = { - .len = 6, - .filter = NULL, - }; + struct sock_fprog *bpf_prog; + bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL); + if (bpf_prog) { + bpf_prog->len = DEFAULT_BPF_LEN; + bpf_prog->filter = NULL; + } else { + return NULL; + } bpf = uml_kmalloc( sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL); - if (bpf != NULL) { - bpf_prog.filter = bpf; + if (bpf) { + bpf_prog->filter = bpf; /* ld [8] */ bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 }; /* jeq #0xMAC[2-6] jt 2 jf 5*/ @@ -579,12 +884,58 @@ void *uml_vector_default_bpf(int fd, void *mac) bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 }; /* ret #0x40000 */ bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 }; - if (uml_vector_attach_bpf( - fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) { - kfree(bpf); - bpf = NULL; - } + } else { + kfree(bpf_prog); + bpf_prog = NULL; } - return bpf; + return bpf_prog; } +/* Note - this function requires a valid mac being passed as an arg */ + +void *uml_vector_user_bpf(char *filename) +{ + struct sock_filter *bpf; + struct sock_fprog *bpf_prog; + struct stat statbuf; + int res, ffd = -1; + + if (filename == NULL) + return NULL; + + if (stat(filename, &statbuf) < 0) { + printk(KERN_ERR "Error %d reading bpf file", -errno); + return false; + } + bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL); + if (bpf_prog == NULL) { + printk(KERN_ERR "Failed to allocate bpf prog buffer"); + return NULL; + } + bpf_prog->len = statbuf.st_size / sizeof(struct sock_filter); + bpf_prog->filter = NULL; + ffd = os_open_file(filename, of_read(OPENFLAGS()), 0); + if (ffd < 0) { + printk(KERN_ERR "Error %d opening bpf file", -errno); + goto bpf_failed; + } + bpf = uml_kmalloc(statbuf.st_size, UM_GFP_KERNEL); + if (bpf == NULL) { + printk(KERN_ERR "Failed to allocate bpf buffer"); + goto bpf_failed; + } + bpf_prog->filter = bpf; + res = os_read_file(ffd, bpf, statbuf.st_size); + if (res < statbuf.st_size) { + printk(KERN_ERR "Failed to read bpf program %s, error %d", filename, res); + kfree(bpf); + goto bpf_failed; + } + os_close_file(ffd); + return bpf_prog; +bpf_failed: + if (ffd > 0) + os_close_file(ffd); + kfree(bpf_prog); + return NULL; +} diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h index d7cbff73b7ff..59ed5f9e6e41 100644 --- a/arch/um/drivers/vector_user.h +++ b/arch/um/drivers/vector_user.h @@ -1,6 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #ifndef __UM_VECTOR_USER_H @@ -16,13 +16,20 @@ #define TRANS_TAP "tap" #define TRANS_TAP_LEN strlen(TRANS_TAP) - #define TRANS_GRE "gre" -#define TRANS_GRE_LEN strlen(TRANS_RAW) +#define TRANS_GRE_LEN strlen(TRANS_GRE) #define TRANS_L2TPV3 "l2tpv3" #define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3) +#define TRANS_HYBRID "hybrid" +#define TRANS_HYBRID_LEN strlen(TRANS_HYBRID) + +#define TRANS_BESS "bess" +#define TRANS_BESS_LEN strlen(TRANS_BESS) + +#define DEFAULT_BPF_LEN 6 + #ifndef IPPROTO_GRE #define IPPROTO_GRE 0x2F #endif @@ -61,8 +68,6 @@ struct vector_fds { }; #define VECTOR_READ 1 -#define VECTOR_WRITE (1 < 1) -#define VECTOR_HEADERS (1 < 2) extern struct arglist *uml_parse_vector_ifspec(char *arg); @@ -90,8 +95,10 @@ extern int uml_vector_recvmmsg( unsigned int vlen, unsigned int flags ); -extern void *uml_vector_default_bpf(int fd, void *mac); -extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len); +extern void *uml_vector_default_bpf(const void *mac); +extern void *uml_vector_user_bpf(char *filename); +extern int uml_vector_attach_bpf(int fd, void *bpf); +extern int uml_vector_detach_bpf(int fd, void *bpf); extern bool uml_raw_enable_qdisc_bypass(int fd); extern bool uml_raw_enable_vnet_headers(int fd); extern bool uml_tap_enable_vnet_headers(int fd); diff --git a/arch/um/drivers/vfio_kern.c b/arch/um/drivers/vfio_kern.c new file mode 100644 index 000000000000..915812a79bfc --- /dev/null +++ b/arch/um/drivers/vfio_kern.c @@ -0,0 +1,708 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie <tiwei.btw@antgroup.com> + */ + +#define pr_fmt(fmt) "vfio-uml: " fmt + +#include <linux/module.h> +#include <linux/logic_iomem.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/string.h> +#include <linux/unaligned.h> +#include <irq_kern.h> +#include <init.h> +#include <os.h> + +#include "mconsole_kern.h" +#include "virt-pci.h" +#include "vfio_user.h" + +#define to_vdev(_pdev) container_of(_pdev, struct uml_vfio_device, pdev) + +struct uml_vfio_intr_ctx { + struct uml_vfio_device *dev; + int irq; +}; + +struct uml_vfio_device { + const char *name; + int group; + + struct um_pci_device pdev; + struct uml_vfio_user_device udev; + struct uml_vfio_intr_ctx *intr_ctx; + + int msix_cap; + int msix_bar; + int msix_offset; + int msix_size; + u32 *msix_data; + + struct list_head list; +}; + +struct uml_vfio_group { + int id; + int fd; + int users; + struct list_head list; +}; + +static struct { + int fd; + int users; +} uml_vfio_container = { .fd = -1 }; +static DEFINE_MUTEX(uml_vfio_container_mtx); + +static LIST_HEAD(uml_vfio_groups); +static DEFINE_MUTEX(uml_vfio_groups_mtx); + +static LIST_HEAD(uml_vfio_devices); +static DEFINE_MUTEX(uml_vfio_devices_mtx); + +static int uml_vfio_set_container(int group_fd) +{ + int err; + + guard(mutex)(¨_vfio_container_mtx); + + err = uml_vfio_user_set_container(uml_vfio_container.fd, group_fd); + if (err) + return err; + + uml_vfio_container.users++; + if (uml_vfio_container.users > 1) + return 0; + + err = uml_vfio_user_setup_iommu(uml_vfio_container.fd); + if (err) { + uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd); + uml_vfio_container.users--; + } + return err; +} + +static void uml_vfio_unset_container(int group_fd) +{ + guard(mutex)(¨_vfio_container_mtx); + + uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd); + uml_vfio_container.users--; +} + +static int uml_vfio_open_group(int group_id) +{ + struct uml_vfio_group *group; + int err; + + guard(mutex)(¨_vfio_groups_mtx); + + list_for_each_entry(group, ¨_vfio_groups, list) { + if (group->id == group_id) { + group->users++; + return group->fd; + } + } + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return -ENOMEM; + + group->fd = uml_vfio_user_open_group(group_id); + if (group->fd < 0) { + err = group->fd; + goto free_group; + } + + err = uml_vfio_set_container(group->fd); + if (err) + goto close_group; + + group->id = group_id; + group->users = 1; + + list_add(&group->list, ¨_vfio_groups); + + return group->fd; + +close_group: + os_close_file(group->fd); +free_group: + kfree(group); + return err; +} + +static int uml_vfio_release_group(int group_fd) +{ + struct uml_vfio_group *group; + + guard(mutex)(¨_vfio_groups_mtx); + + list_for_each_entry(group, ¨_vfio_groups, list) { + if (group->fd == group_fd) { + group->users--; + if (group->users == 0) { + uml_vfio_unset_container(group_fd); + os_close_file(group_fd); + list_del(&group->list); + kfree(group); + } + return 0; + } + } + + return -ENOENT; +} + +static irqreturn_t uml_vfio_interrupt(int unused, void *opaque) +{ + struct uml_vfio_intr_ctx *ctx = opaque; + struct uml_vfio_device *dev = ctx->dev; + int index = ctx - dev->intr_ctx; + int irqfd = dev->udev.irqfd[index]; + int irq = dev->msix_data[index]; + uint64_t v; + int r; + + do { + r = os_read_file(irqfd, &v, sizeof(v)); + if (r == sizeof(v)) + generic_handle_irq(irq); + } while (r == sizeof(v) || r == -EINTR); + WARN(r != -EAGAIN, "read returned %d\n", r); + + return IRQ_HANDLED; +} + +static int uml_vfio_activate_irq(struct uml_vfio_device *dev, int index) +{ + struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index]; + int err, irqfd; + + if (ctx->irq >= 0) + return 0; + + irqfd = uml_vfio_user_activate_irq(&dev->udev, index); + if (irqfd < 0) + return irqfd; + + ctx->irq = um_request_irq(UM_IRQ_ALLOC, irqfd, IRQ_READ, + uml_vfio_interrupt, 0, + "vfio-uml", ctx); + if (ctx->irq < 0) { + err = ctx->irq; + goto deactivate; + } + + err = add_sigio_fd(irqfd); + if (err) + goto free_irq; + + return 0; + +free_irq: + um_free_irq(ctx->irq, ctx); + ctx->irq = -1; +deactivate: + uml_vfio_user_deactivate_irq(&dev->udev, index); + return err; +} + +static int uml_vfio_deactivate_irq(struct uml_vfio_device *dev, int index) +{ + struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index]; + + if (ctx->irq >= 0) { + ignore_sigio_fd(dev->udev.irqfd[index]); + um_free_irq(ctx->irq, ctx); + uml_vfio_user_deactivate_irq(&dev->udev, index); + ctx->irq = -1; + } + return 0; +} + +static int uml_vfio_update_msix_cap(struct uml_vfio_device *dev, + unsigned int offset, int size, + unsigned long val) +{ + /* + * Here, we handle only the operations we care about, + * ignoring the rest. + */ + if (size == 2 && offset == dev->msix_cap + PCI_MSIX_FLAGS) { + switch (val & ~PCI_MSIX_FLAGS_QSIZE) { + case PCI_MSIX_FLAGS_ENABLE: + case 0: + return uml_vfio_user_update_irqs(&dev->udev); + } + } + return 0; +} + +static int uml_vfio_update_msix_table(struct uml_vfio_device *dev, + unsigned int offset, int size, + unsigned long val) +{ + int index; + + /* + * Here, we handle only the operations we care about, + * ignoring the rest. + */ + offset -= dev->msix_offset + PCI_MSIX_ENTRY_DATA; + + if (size != 4 || offset % PCI_MSIX_ENTRY_SIZE != 0) + return 0; + + index = offset / PCI_MSIX_ENTRY_SIZE; + if (index >= dev->udev.irq_count) + return -EINVAL; + + dev->msix_data[index] = val; + + return val ? uml_vfio_activate_irq(dev, index) : + uml_vfio_deactivate_irq(dev, index); +} + +static unsigned long __uml_vfio_cfgspace_read(struct uml_vfio_device *dev, + unsigned int offset, int size) +{ + u8 data[8]; + + memset(data, 0xff, sizeof(data)); + + if (uml_vfio_user_cfgspace_read(&dev->udev, offset, data, size)) + return ULONG_MAX; + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static unsigned long uml_vfio_cfgspace_read(struct um_pci_device *pdev, + unsigned int offset, int size) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + + return __uml_vfio_cfgspace_read(dev, offset, size); +} + +static void __uml_vfio_cfgspace_write(struct uml_vfio_device *dev, + unsigned int offset, int size, + unsigned long val) +{ + u8 data[8]; + + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + } + + WARN_ON(uml_vfio_user_cfgspace_write(&dev->udev, offset, data, size)); +} + +static void uml_vfio_cfgspace_write(struct um_pci_device *pdev, + unsigned int offset, int size, + unsigned long val) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + + if (offset < dev->msix_cap + PCI_CAP_MSIX_SIZEOF && + offset + size > dev->msix_cap) + WARN_ON(uml_vfio_update_msix_cap(dev, offset, size, val)); + + __uml_vfio_cfgspace_write(dev, offset, size, val); +} + +static void uml_vfio_bar_copy_from(struct um_pci_device *pdev, int bar, + void *buffer, unsigned int offset, int size) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + + memset(buffer, 0xff, size); + uml_vfio_user_bar_read(&dev->udev, bar, offset, buffer, size); +} + +static unsigned long uml_vfio_bar_read(struct um_pci_device *pdev, int bar, + unsigned int offset, int size) +{ + u8 data[8]; + + uml_vfio_bar_copy_from(pdev, bar, data, offset, size); + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void uml_vfio_bar_copy_to(struct um_pci_device *pdev, int bar, + unsigned int offset, const void *buffer, + int size) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + + uml_vfio_user_bar_write(&dev->udev, bar, offset, buffer, size); +} + +static void uml_vfio_bar_write(struct um_pci_device *pdev, int bar, + unsigned int offset, int size, + unsigned long val) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + u8 data[8]; + + if (bar == dev->msix_bar && offset + size > dev->msix_offset && + offset < dev->msix_offset + dev->msix_size) + WARN_ON(uml_vfio_update_msix_table(dev, offset, size, val)); + + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + } + + uml_vfio_bar_copy_to(pdev, bar, offset, data, size); +} + +static void uml_vfio_bar_set(struct um_pci_device *pdev, int bar, + unsigned int offset, u8 value, int size) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + int i; + + for (i = 0; i < size; i++) + uml_vfio_user_bar_write(&dev->udev, bar, offset + i, &value, 1); +} + +static const struct um_pci_ops uml_vfio_um_pci_ops = { + .cfgspace_read = uml_vfio_cfgspace_read, + .cfgspace_write = uml_vfio_cfgspace_write, + .bar_read = uml_vfio_bar_read, + .bar_write = uml_vfio_bar_write, + .bar_copy_from = uml_vfio_bar_copy_from, + .bar_copy_to = uml_vfio_bar_copy_to, + .bar_set = uml_vfio_bar_set, +}; + +static u8 uml_vfio_find_capability(struct uml_vfio_device *dev, u8 cap) +{ + u8 id, pos; + u16 ent; + int ttl = 48; /* PCI_FIND_CAP_TTL */ + + pos = __uml_vfio_cfgspace_read(dev, PCI_CAPABILITY_LIST, sizeof(pos)); + + while (pos && ttl--) { + ent = __uml_vfio_cfgspace_read(dev, pos, sizeof(ent)); + + id = ent & 0xff; + if (id == 0xff) + break; + if (id == cap) + return pos; + + pos = ent >> 8; + } + + return 0; +} + +static int uml_vfio_read_msix_table(struct uml_vfio_device *dev) +{ + unsigned int off; + u16 flags; + u32 tbl; + + off = uml_vfio_find_capability(dev, PCI_CAP_ID_MSIX); + if (!off) + return -ENOTSUPP; + + dev->msix_cap = off; + + tbl = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_TABLE, sizeof(tbl)); + flags = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_FLAGS, sizeof(flags)); + + dev->msix_bar = tbl & PCI_MSIX_TABLE_BIR; + dev->msix_offset = tbl & PCI_MSIX_TABLE_OFFSET; + dev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * PCI_MSIX_ENTRY_SIZE; + + dev->msix_data = kzalloc(dev->msix_size, GFP_KERNEL); + if (!dev->msix_data) + return -ENOMEM; + + return 0; +} + +static void uml_vfio_open_device(struct uml_vfio_device *dev) +{ + struct uml_vfio_intr_ctx *ctx; + int err, group_id, i; + + group_id = uml_vfio_user_get_group_id(dev->name); + if (group_id < 0) { + pr_err("Failed to get group id (%s), error %d\n", + dev->name, group_id); + goto free_dev; + } + + dev->group = uml_vfio_open_group(group_id); + if (dev->group < 0) { + pr_err("Failed to open group %d (%s), error %d\n", + group_id, dev->name, dev->group); + goto free_dev; + } + + err = uml_vfio_user_setup_device(&dev->udev, dev->group, dev->name); + if (err) { + pr_err("Failed to setup device (%s), error %d\n", + dev->name, err); + goto release_group; + } + + err = uml_vfio_read_msix_table(dev); + if (err) { + pr_err("Failed to read MSI-X table (%s), error %d\n", + dev->name, err); + goto teardown_udev; + } + + dev->intr_ctx = kmalloc_array(dev->udev.irq_count, + sizeof(struct uml_vfio_intr_ctx), + GFP_KERNEL); + if (!dev->intr_ctx) { + pr_err("Failed to allocate interrupt context (%s)\n", + dev->name); + goto free_msix; + } + + for (i = 0; i < dev->udev.irq_count; i++) { + ctx = &dev->intr_ctx[i]; + ctx->dev = dev; + ctx->irq = -1; + } + + dev->pdev.ops = ¨_vfio_um_pci_ops; + + err = um_pci_device_register(&dev->pdev); + if (err) { + pr_err("Failed to register UM PCI device (%s), error %d\n", + dev->name, err); + goto free_intr_ctx; + } + + return; + +free_intr_ctx: + kfree(dev->intr_ctx); +free_msix: + kfree(dev->msix_data); +teardown_udev: + uml_vfio_user_teardown_device(&dev->udev); +release_group: + uml_vfio_release_group(dev->group); +free_dev: + list_del(&dev->list); + kfree(dev->name); + kfree(dev); +} + +static void uml_vfio_release_device(struct uml_vfio_device *dev) +{ + int i; + + for (i = 0; i < dev->udev.irq_count; i++) + uml_vfio_deactivate_irq(dev, i); + uml_vfio_user_update_irqs(&dev->udev); + + um_pci_device_unregister(&dev->pdev); + kfree(dev->intr_ctx); + kfree(dev->msix_data); + uml_vfio_user_teardown_device(&dev->udev); + uml_vfio_release_group(dev->group); + list_del(&dev->list); + kfree(dev->name); + kfree(dev); +} + +static struct uml_vfio_device *uml_vfio_find_device(const char *device) +{ + struct uml_vfio_device *dev; + + list_for_each_entry(dev, ¨_vfio_devices, list) { + if (!strcmp(dev->name, device)) + return dev; + } + return NULL; +} + +static struct uml_vfio_device *uml_vfio_add_device(const char *device) +{ + struct uml_vfio_device *dev; + int fd; + + guard(mutex)(¨_vfio_devices_mtx); + + if (uml_vfio_container.fd < 0) { + fd = uml_vfio_user_open_container(); + if (fd < 0) + return ERR_PTR(fd); + uml_vfio_container.fd = fd; + } + + if (uml_vfio_find_device(device)) + return ERR_PTR(-EEXIST); + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + dev->name = kstrdup(device, GFP_KERNEL); + if (!dev->name) { + kfree(dev); + return ERR_PTR(-ENOMEM); + } + + list_add_tail(&dev->list, ¨_vfio_devices); + return dev; +} + +static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp) +{ + struct uml_vfio_device *dev; + + dev = uml_vfio_add_device(device); + if (IS_ERR(dev)) + return PTR_ERR(dev); + return 0; +} + +static int uml_vfio_cmdline_get(char *buffer, const struct kernel_param *kp) +{ + return 0; +} + +static const struct kernel_param_ops uml_vfio_cmdline_param_ops = { + .set = uml_vfio_cmdline_set, + .get = uml_vfio_cmdline_get, +}; + +device_param_cb(device, ¨_vfio_cmdline_param_ops, NULL, 0400); +__uml_help(uml_vfio_cmdline_param_ops, +"vfio_uml.device=<domain:bus:slot.function>\n" +" Pass through a PCI device to UML via VFIO. Currently, only MSI-X\n" +" capable devices are supported, and it is assumed that drivers will\n" +" use MSI-X. This parameter can be specified multiple times to pass\n" +" through multiple PCI devices to UML.\n\n" +); + +static int uml_vfio_mc_config(char *str, char **error_out) +{ + struct uml_vfio_device *dev; + + if (*str != '=') { + *error_out = "Invalid config"; + return -EINVAL; + } + str += 1; + + dev = uml_vfio_add_device(str); + if (IS_ERR(dev)) + return PTR_ERR(dev); + uml_vfio_open_device(dev); + return 0; +} + +static int uml_vfio_mc_id(char **str, int *start_out, int *end_out) +{ + return -EOPNOTSUPP; +} + +static int uml_vfio_mc_remove(int n, char **error_out) +{ + return -EOPNOTSUPP; +} + +static struct mc_device uml_vfio_mc = { + .list = LIST_HEAD_INIT(uml_vfio_mc.list), + .name = "vfio_uml.device", + .config = uml_vfio_mc_config, + .get_config = NULL, + .id = uml_vfio_mc_id, + .remove = uml_vfio_mc_remove, +}; + +static int __init uml_vfio_init(void) +{ + struct uml_vfio_device *dev, *n; + + sigio_broken(); + + /* If the opening fails, the device will be released. */ + list_for_each_entry_safe(dev, n, ¨_vfio_devices, list) + uml_vfio_open_device(dev); + + mconsole_register_dev(¨_vfio_mc); + + return 0; +} +late_initcall(uml_vfio_init); + +static void __exit uml_vfio_exit(void) +{ + struct uml_vfio_device *dev, *n; + + list_for_each_entry_safe(dev, n, ¨_vfio_devices, list) + uml_vfio_release_device(dev); + + if (uml_vfio_container.fd >= 0) + os_close_file(uml_vfio_container.fd); +} +module_exit(uml_vfio_exit); diff --git a/arch/um/drivers/vfio_user.c b/arch/um/drivers/vfio_user.c new file mode 100644 index 000000000000..6a45d8e14582 --- /dev/null +++ b/arch/um/drivers/vfio_user.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie <tiwei.btw@antgroup.com> + */ +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/eventfd.h> +#include <linux/limits.h> +#include <linux/vfio.h> +#include <linux/pci_regs.h> +#include <as-layout.h> +#include <um_malloc.h> + +#include "vfio_user.h" + +int uml_vfio_user_open_container(void) +{ + int r, fd; + + fd = open("/dev/vfio/vfio", O_RDWR); + if (fd < 0) + return -errno; + + r = ioctl(fd, VFIO_GET_API_VERSION); + if (r != VFIO_API_VERSION) { + r = r < 0 ? -errno : -EINVAL; + goto error; + } + + r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU); + if (r <= 0) { + r = r < 0 ? -errno : -EINVAL; + goto error; + } + + return fd; + +error: + close(fd); + return r; +} + +int uml_vfio_user_setup_iommu(int container) +{ + /* + * This is a bit tricky. See the big comment in + * vhost_user_set_mem_table() in virtio_uml.c. + */ + unsigned long reserved = uml_reserved - uml_physmem; + struct vfio_iommu_type1_dma_map dma_map = { + .argsz = sizeof(dma_map), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .vaddr = uml_reserved, + .iova = reserved, + .size = physmem_size - reserved, + }; + + if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0) + return -errno; + + if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0) + return -errno; + + return 0; +} + +int uml_vfio_user_get_group_id(const char *device) +{ + char *path, *buf, *end; + const char *name; + int r; + + path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); + if (!path) + return -ENOMEM; + + sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device); + + buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL); + if (!buf) { + r = -ENOMEM; + goto free_path; + } + + r = readlink(path, buf, PATH_MAX); + if (r < 0) { + r = -errno; + goto free_buf; + } + buf[r] = '\0'; + + name = basename(buf); + + r = strtoul(name, &end, 10); + if (*end != '\0' || end == name) { + r = -EINVAL; + goto free_buf; + } + +free_buf: + kfree(buf); +free_path: + kfree(path); + return r; +} + +int uml_vfio_user_open_group(int group_id) +{ + char *path; + int fd; + + path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); + if (!path) + return -ENOMEM; + + sprintf(path, "/dev/vfio/%d", group_id); + + fd = open(path, O_RDWR); + if (fd < 0) { + fd = -errno; + goto out; + } + +out: + kfree(path); + return fd; +} + +int uml_vfio_user_set_container(int container, int group) +{ + if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0) + return -errno; + return 0; +} + +int uml_vfio_user_unset_container(int container, int group) +{ + if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0) + return -errno; + return 0; +} + +static int vfio_set_irqs(int device, int start, int count, int *irqfd) +{ + struct vfio_irq_set *irq_set; + int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count; + int err = 0; + + irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL); + if (!irq_set) + return -ENOMEM; + + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = start; + irq_set->count = count; + memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count); + + if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { + err = -errno; + goto out; + } + +out: + kfree(irq_set); + return err; +} + +int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev, + int group, const char *device) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; + int err, i; + + dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device); + if (dev->device < 0) + return -errno; + + if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) { + err = -errno; + goto close_device; + } + + dev->num_regions = device_info.num_regions; + if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1) + dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1; + + dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions, + UM_GFP_KERNEL); + if (!dev->region) { + err = -ENOMEM; + goto close_device; + } + + for (i = 0; i < dev->num_regions; i++) { + struct vfio_region_info region = { + .argsz = sizeof(region), + .index = i, + }; + if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, ®ion) < 0) { + err = -errno; + goto free_region; + } + dev->region[i].size = region.size; + dev->region[i].offset = region.offset; + } + + /* Only MSI-X is supported currently. */ + irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX; + if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { + err = -errno; + goto free_region; + } + + dev->irq_count = irq_info.count; + + dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL); + if (!dev->irqfd) { + err = -ENOMEM; + goto free_region; + } + + memset(dev->irqfd, -1, sizeof(int) * dev->irq_count); + + err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd); + if (err) + goto free_irqfd; + + return 0; + +free_irqfd: + kfree(dev->irqfd); +free_region: + kfree(dev->region); +close_device: + close(dev->device); + return err; +} + +void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev) +{ + kfree(dev->irqfd); + kfree(dev->region); + close(dev->device); +} + +int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index) +{ + int irqfd; + + irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (irqfd < 0) + return -errno; + + dev->irqfd[index] = irqfd; + return irqfd; +} + +void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index) +{ + close(dev->irqfd[index]); + dev->irqfd[index] = -1; +} + +int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev) +{ + return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd); +} + +static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index, + uint64_t offset, void *buf, uint64_t size) +{ + if (index >= dev->num_regions || offset + size > dev->region[index].size) + return -EINVAL; + + if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0) + return -errno; + + return 0; +} + +static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index, + uint64_t offset, const void *buf, uint64_t size) +{ + if (index >= dev->num_regions || offset + size > dev->region[index].size) + return -EINVAL; + + if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0) + return -errno; + + return 0; +} + +int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev, + unsigned int offset, void *buf, int size) +{ + return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX, + offset, buf, size); +} + +int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev, + unsigned int offset, const void *buf, int size) +{ + return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX, + offset, buf, size); +} + +int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar, + unsigned int offset, void *buf, int size) +{ + return vfio_region_read(dev, bar, offset, buf, size); +} + +int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar, + unsigned int offset, const void *buf, int size) +{ + return vfio_region_write(dev, bar, offset, buf, size); +} diff --git a/arch/um/drivers/vfio_user.h b/arch/um/drivers/vfio_user.h new file mode 100644 index 000000000000..75535e05059b --- /dev/null +++ b/arch/um/drivers/vfio_user.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_VFIO_USER_H +#define __UM_VFIO_USER_H + +struct uml_vfio_user_device { + int device; + + struct { + uint64_t size; + uint64_t offset; + } *region; + int num_regions; + + int32_t *irqfd; + int irq_count; +}; + +int uml_vfio_user_open_container(void); +int uml_vfio_user_setup_iommu(int container); + +int uml_vfio_user_get_group_id(const char *device); +int uml_vfio_user_open_group(int group_id); +int uml_vfio_user_set_container(int container, int group); +int uml_vfio_user_unset_container(int container, int group); + +int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev, + int group, const char *device); +void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev); + +int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index); +void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index); +int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev); + +int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev, + unsigned int offset, void *buf, int size); +int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev, + unsigned int offset, const void *buf, int size); + +int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar, + unsigned int offset, void *buf, int size); +int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar, + unsigned int offset, const void *buf, int size); + +#endif /* __UM_VFIO_USER_H */ diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h new file mode 100644 index 000000000000..fcfa3b7e021b --- /dev/null +++ b/arch/um/drivers/vhost_user.h @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Vhost-user protocol */ + +#ifndef __VHOST_USER_H__ +#define __VHOST_USER_H__ + +/* Message flags */ +#define VHOST_USER_FLAG_REPLY BIT(2) +#define VHOST_USER_FLAG_NEED_REPLY BIT(3) +/* Feature bits */ +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +/* Protocol feature bits */ +#define VHOST_USER_PROTOCOL_F_MQ 0 +#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14 +/* Vring state index masks */ +#define VHOST_USER_VRING_INDEX_MASK 0xff +#define VHOST_USER_VRING_POLL_MASK BIT(8) + +/* Supported version */ +#define VHOST_USER_VERSION 1 +/* Supported transport features */ +#define VHOST_USER_SUPPORTED_F BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES) +/* Supported protocol features */ +#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_MQ) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) + +enum vhost_user_request { + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_NET_SEND_MTU = 20, + VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_IOTLB_MSG = 22, + VHOST_USER_SET_VRING_ENDIAN = 23, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, + VHOST_USER_VRING_KICK = 35, +}; + +enum vhost_user_slave_request { + VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, + VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VRING_CALL = 4, +}; + +struct vhost_user_header { + /* + * Use enum vhost_user_request for outgoing messages, + * uses enum vhost_user_slave_request for incoming ones. + */ + u32 request; + u32 flags; + u32 size; +} __packed; + +struct vhost_user_config { + u32 offset; + u32 size; + u32 flags; + u8 payload[]; /* Variable length */ +} __packed; + +struct vhost_user_vring_state { + u32 index; + u32 num; +} __packed; + +struct vhost_user_vring_addr { + u32 index; + u32 flags; + u64 desc, used, avail, log; +} __packed; + +struct vhost_user_mem_region { + u64 guest_addr; + u64 size; + u64 user_addr; + u64 mmap_offset; +} __packed; + +struct vhost_user_mem_regions { + u32 num; + u32 padding; + struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */ +} __packed; + +union vhost_user_payload { + u64 integer; + struct vhost_user_config config; + struct vhost_user_vring_state vring_state; + struct vhost_user_vring_addr vring_addr; + struct vhost_user_mem_regions mem_regions; +}; + +struct vhost_user_msg { + struct vhost_user_header header; + union vhost_user_payload payload; +} __packed; + +#endif diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c new file mode 100644 index 000000000000..557d93aea00a --- /dev/null +++ b/arch/um/drivers/virt-pci.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/logic_iomem.h> +#include <linux/of_platform.h> +#include <linux/irqchip/irq-msi-lib.h> +#include <linux/irqdomain.h> +#include <linux/msi.h> +#include <linux/unaligned.h> +#include <irq_kern.h> + +#include "virt-pci.h" + +#define MAX_DEVICES 8 +#define MAX_MSI_VECTORS 32 +#define CFG_SPACE_SIZE 4096 + +struct um_pci_device_reg { + struct um_pci_device *dev; + void __iomem *iomem; +}; + +static struct pci_host_bridge *bridge; +static DEFINE_MUTEX(um_pci_mtx); +static struct um_pci_device *um_pci_platform_device; +static struct um_pci_device_reg um_pci_devices[MAX_DEVICES]; +static struct fwnode_handle *um_pci_fwnode; +static struct irq_domain *um_pci_inner_domain; +static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; + +static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, + int size) +{ + struct um_pci_device_reg *reg = priv; + struct um_pci_device *dev = reg->dev; + + if (!dev) + return ULONG_MAX; + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid config space read size %d\n", size); + return ULONG_MAX; + } + + return dev->ops->cfgspace_read(dev, offset, size); +} + +static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, + unsigned long val) +{ + struct um_pci_device_reg *reg = priv; + struct um_pci_device *dev = reg->dev; + + if (!dev) + return; + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid config space write size %d\n", size); + return; + } + + dev->ops->cfgspace_write(dev, offset, size, val); +} + +static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { + .read = um_pci_cfgspace_read, + .write = um_pci_cfgspace_write, +}; + +static unsigned long um_pci_bar_read(void *priv, unsigned int offset, + int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid bar read size %d\n", size); + return ULONG_MAX; + } + + return dev->ops->bar_read(dev, bar, offset, size); +} + +static void um_pci_bar_write(void *priv, unsigned int offset, int size, + unsigned long val) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid bar write size %d\n", size); + return; + } + + dev->ops->bar_write(dev, bar, offset, size, val); +} + +static void um_pci_bar_copy_from(void *priv, void *buffer, + unsigned int offset, int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; + + dev->ops->bar_copy_from(dev, bar, buffer, offset, size); +} + +static void um_pci_bar_copy_to(void *priv, unsigned int offset, + const void *buffer, int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; + + dev->ops->bar_copy_to(dev, bar, offset, buffer, size); +} + +static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + u8 bar = *resptr; + + dev->ops->bar_set(dev, bar, offset, value, size); +} + +static const struct logic_iomem_ops um_pci_device_bar_ops = { + .read = um_pci_bar_read, + .write = um_pci_bar_write, + .set = um_pci_bar_set, + .copy_from = um_pci_bar_copy_from, + .copy_to = um_pci_bar_copy_to, +}; + +static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn, + int where) +{ + struct um_pci_device_reg *dev; + unsigned int busn = bus->number; + + if (busn > 0) + return NULL; + + /* not allowing functions for now ... */ + if (devfn % 8) + return NULL; + + if (devfn / 8 >= ARRAY_SIZE(um_pci_devices)) + return NULL; + + dev = &um_pci_devices[devfn / 8]; + if (!dev) + return NULL; + + return (void __iomem *)((unsigned long)dev->iomem + where); +} + +static struct pci_ops um_pci_ops = { + .map_bus = um_pci_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + +static void um_pci_rescan(void) +{ + pci_lock_rescan_remove(); + pci_rescan_bus(bridge->bus); + pci_unlock_rescan_remove(); +} + +#ifdef CONFIG_OF +/* Copied from arch/x86/kernel/devicetree.c */ +struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) +{ + struct device_node *np; + + for_each_node_by_type(np, "pci") { + const void *prop; + unsigned int bus_min; + + prop = of_get_property(np, "bus-range", NULL); + if (!prop) + continue; + bus_min = be32_to_cpup(prop); + if (bus->number == bus_min) + return np; + } + return NULL; +} +#endif + +static struct resource virt_cfgspace_resource = { + .name = "PCI config space", + .start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE, + .end = 0xf0000000 - 1, + .flags = IORESOURCE_MEM, +}; + +static long um_pci_map_cfgspace(unsigned long offset, size_t size, + const struct logic_iomem_ops **ops, + void **priv) +{ + if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE)) + return -EINVAL; + + if (offset / CFG_SPACE_SIZE < MAX_DEVICES) { + *ops = &um_pci_device_cfgspace_ops; + *priv = &um_pci_devices[offset / CFG_SPACE_SIZE]; + return 0; + } + + WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size); + return -ENOENT; +} + +static const struct logic_iomem_region_ops um_pci_cfgspace_ops = { + .map = um_pci_map_cfgspace, +}; + +static struct resource virt_iomem_resource = { + .name = "PCI iomem", + .start = 0xf0000000, + .end = 0xffffffff, + .flags = IORESOURCE_MEM, +}; + +struct um_pci_map_iomem_data { + unsigned long offset; + size_t size; + const struct logic_iomem_ops **ops; + void **priv; + long ret; +}; + +static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data) +{ + struct um_pci_map_iomem_data *data = _data; + struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8]; + struct um_pci_device *dev; + int i; + + if (!reg->dev) + return 0; + + for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) { + struct resource *r = &pdev->resource[i]; + + if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM) + continue; + + /* + * must be the whole or part of the resource, + * not allowed to only overlap + */ + if (data->offset < r->start || data->offset > r->end) + continue; + if (data->offset + data->size - 1 > r->end) + continue; + + dev = reg->dev; + *data->ops = &um_pci_device_bar_ops; + dev->resptr[i] = i; + *data->priv = &dev->resptr[i]; + data->ret = data->offset - r->start; + + /* no need to continue */ + return 1; + } + + return 0; +} + +static long um_pci_map_iomem(unsigned long offset, size_t size, + const struct logic_iomem_ops **ops, + void **priv) +{ + struct um_pci_map_iomem_data data = { + /* we want the full address here */ + .offset = offset + virt_iomem_resource.start, + .size = size, + .ops = ops, + .priv = priv, + .ret = -ENOENT, + }; + + pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data); + return data.ret; +} + +static const struct logic_iomem_region_ops um_pci_iomem_ops = { + .map = um_pci_map_iomem, +}; + +static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + /* + * This is a very low address and not actually valid 'physical' memory + * in UML, so we can simply map MSI(-X) vectors to there, it cannot be + * legitimately written to by the device in any other way. + * We use the (virtual) IRQ number here as the message to simplify the + * code that receives the message, where for now we simply trust the + * device to send the correct message. + */ + msg->address_hi = 0; + msg->address_lo = 0xa0000; + msg->data = data->irq; +} + +static struct irq_chip um_pci_msi_bottom_irq_chip = { + .name = "UM virtual MSI", + .irq_compose_msi_msg = um_pci_compose_msi_msg, +}; + +static int um_pci_inner_domain_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *args) +{ + unsigned long bit; + + WARN_ON(nr_irqs != 1); + + mutex_lock(&um_pci_mtx); + bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS); + if (bit >= MAX_MSI_VECTORS) { + mutex_unlock(&um_pci_mtx); + return -ENOSPC; + } + + set_bit(bit, um_pci_msi_used); + mutex_unlock(&um_pci_mtx); + + irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip, + domain->host_data, handle_simple_irq, + NULL, NULL); + + return 0; +} + +static void um_pci_inner_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + + mutex_lock(&um_pci_mtx); + + if (!test_bit(d->hwirq, um_pci_msi_used)) + pr_err("trying to free unused MSI#%lu\n", d->hwirq); + else + __clear_bit(d->hwirq, um_pci_msi_used); + + mutex_unlock(&um_pci_mtx); +} + +static const struct irq_domain_ops um_pci_inner_domain_ops = { + .select = msi_lib_irq_domain_select, + .alloc = um_pci_inner_domain_alloc, + .free = um_pci_inner_domain_free, +}; + +#define UM_PCI_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_NO_AFFINITY) +#define UM_PCI_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_PCI_MSIX) + +static const struct msi_parent_ops um_pci_msi_parent_ops = { + .required_flags = UM_PCI_MSI_FLAGS_REQUIRED, + .supported_flags = UM_PCI_MSI_FLAGS_SUPPORTED, + .bus_select_token = DOMAIN_BUS_NEXUS, + .bus_select_mask = MATCH_PCI_MSI, + .prefix = "UM-virtual-", + .init_dev_msi_info = msi_lib_init_dev_msi_info, +}; + +static struct resource busn_resource = { + .name = "PCI busn", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUS, +}; + +static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin) +{ + struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8]; + + if (WARN_ON(!reg->dev)) + return -EINVAL; + + /* Yes, we map all pins to the same IRQ ... doesn't matter for now. */ + return reg->dev->irq; +} + +void *pci_root_bus_fwnode(struct pci_bus *bus) +{ + return um_pci_fwnode; +} + +static long um_pci_map_platform(unsigned long offset, size_t size, + const struct logic_iomem_ops **ops, + void **priv) +{ + if (!um_pci_platform_device) + return -ENOENT; + + *ops = &um_pci_device_bar_ops; + *priv = &um_pci_platform_device->resptr[0]; + + return offset; +} + +static const struct logic_iomem_region_ops um_pci_platform_ops = { + .map = um_pci_map_platform, +}; + +static struct resource virt_platform_resource = { + .name = "platform", + .start = 0x10000000, + .end = 0x1fffffff, + .flags = IORESOURCE_MEM, +}; + +int um_pci_device_register(struct um_pci_device *dev) +{ + int i, free = -1; + int err = 0; + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev) + continue; + free = i; + break; + } + + if (free < 0) { + err = -ENOSPC; + goto out; + } + + dev->irq = irq_alloc_desc(numa_node_id()); + if (dev->irq < 0) { + err = dev->irq; + goto out; + } + + um_pci_devices[free].dev = dev; + +out: + mutex_unlock(&um_pci_mtx); + if (!err) + um_pci_rescan(); + return err; +} + +void um_pci_device_unregister(struct um_pci_device *dev) +{ + int i; + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev != dev) + continue; + um_pci_devices[i].dev = NULL; + irq_free_desc(dev->irq); + break; + } + mutex_unlock(&um_pci_mtx); + + if (i < MAX_DEVICES) { + struct pci_dev *pci_dev; + + pci_dev = pci_get_slot(bridge->bus, i); + if (pci_dev) + pci_stop_and_remove_bus_device_locked(pci_dev); + } +} + +int um_pci_platform_device_register(struct um_pci_device *dev) +{ + guard(mutex)(&um_pci_mtx); + if (um_pci_platform_device) + return -EBUSY; + um_pci_platform_device = dev; + return 0; +} + +void um_pci_platform_device_unregister(struct um_pci_device *dev) +{ + guard(mutex)(&um_pci_mtx); + if (um_pci_platform_device == dev) + um_pci_platform_device = NULL; +} + +static int __init um_pci_init(void) +{ + int err, i; + + WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource, + &um_pci_cfgspace_ops)); + WARN_ON(logic_iomem_add_region(&virt_iomem_resource, + &um_pci_iomem_ops)); + WARN_ON(logic_iomem_add_region(&virt_platform_resource, + &um_pci_platform_ops)); + + bridge = pci_alloc_host_bridge(0); + if (!bridge) { + err = -ENOMEM; + goto free; + } + + um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci"); + if (!um_pci_fwnode) { + err = -ENOMEM; + goto free; + } + + struct irq_domain_info info = { + .fwnode = um_pci_fwnode, + .ops = &um_pci_inner_domain_ops, + .size = MAX_MSI_VECTORS, + }; + + um_pci_inner_domain = msi_create_parent_irq_domain(&info, &um_pci_msi_parent_ops); + if (!um_pci_inner_domain) { + err = -ENOMEM; + goto free; + } + + pci_add_resource(&bridge->windows, &virt_iomem_resource); + pci_add_resource(&bridge->windows, &busn_resource); + bridge->ops = &um_pci_ops; + bridge->map_irq = um_pci_map_irq; + + for (i = 0; i < MAX_DEVICES; i++) { + resource_size_t start; + + start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE; + um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE); + if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) { + err = -ENOMEM; + goto free; + } + } + + err = pci_host_probe(bridge); + if (err) + goto free; + + return 0; + +free: + if (um_pci_inner_domain) + irq_domain_remove(um_pci_inner_domain); + if (um_pci_fwnode) + irq_domain_free_fwnode(um_pci_fwnode); + if (bridge) { + pci_free_resource_list(&bridge->windows); + pci_free_host_bridge(bridge); + } + return err; +} +device_initcall(um_pci_init); + +static void __exit um_pci_exit(void) +{ + irq_domain_remove(um_pci_inner_domain); + pci_free_resource_list(&bridge->windows); + pci_free_host_bridge(bridge); +} +module_exit(um_pci_exit); diff --git a/arch/um/drivers/virt-pci.h b/arch/um/drivers/virt-pci.h new file mode 100644 index 000000000000..b20d1475d1eb --- /dev/null +++ b/arch/um/drivers/virt-pci.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_VIRT_PCI_H +#define __UM_VIRT_PCI_H + +#include <linux/pci.h> + +struct um_pci_device { + const struct um_pci_ops *ops; + + /* for now just standard BARs */ + u8 resptr[PCI_STD_NUM_BARS]; + + int irq; +}; + +struct um_pci_ops { + unsigned long (*cfgspace_read)(struct um_pci_device *dev, + unsigned int offset, int size); + void (*cfgspace_write)(struct um_pci_device *dev, unsigned int offset, + int size, unsigned long val); + + unsigned long (*bar_read)(struct um_pci_device *dev, int bar, + unsigned int offset, int size); + void (*bar_write)(struct um_pci_device *dev, int bar, + unsigned int offset, int size, unsigned long val); + + void (*bar_copy_from)(struct um_pci_device *dev, int bar, void *buffer, + unsigned int offset, int size); + void (*bar_copy_to)(struct um_pci_device *dev, int bar, + unsigned int offset, const void *buffer, int size); + void (*bar_set)(struct um_pci_device *dev, int bar, + unsigned int offset, u8 value, int size); +}; + +int um_pci_device_register(struct um_pci_device *dev); +void um_pci_device_unregister(struct um_pci_device *dev); + +int um_pci_platform_device_register(struct um_pci_device *dev); +void um_pci_platform_device_unregister(struct um_pci_device *dev); + +#endif /* __UM_VIRT_PCI_H */ diff --git a/arch/um/drivers/virtio_pcidev.c b/arch/um/drivers/virtio_pcidev.c new file mode 100644 index 000000000000..f9b4b6f7582c --- /dev/null +++ b/arch/um/drivers/virtio_pcidev.c @@ -0,0 +1,634 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/logic_iomem.h> +#include <linux/of_platform.h> +#include <linux/irqdomain.h> +#include <linux/virtio_pcidev.h> +#include <linux/virtio-uml.h> +#include <linux/delay.h> +#include <linux/msi.h> +#include <linux/unaligned.h> +#include <irq_kern.h> + +#include "virt-pci.h" + +#define to_virtio_pcidev(_pdev) \ + container_of(_pdev, struct virtio_pcidev_device, pdev) + +/* for MSI-X we have a 32-bit payload */ +#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) +#define NUM_IRQ_MSGS 10 + +struct virtio_pcidev_message_buffer { + struct virtio_pcidev_msg hdr; + u8 data[8]; +}; + +struct virtio_pcidev_device { + struct um_pci_device pdev; + struct virtio_device *vdev; + + struct virtqueue *cmd_vq, *irq_vq; + +#define VIRTIO_PCIDEV_WRITE_BUFS 20 + struct virtio_pcidev_message_buffer bufs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; + void *extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS + 1]; + DECLARE_BITMAP(used_bufs, VIRTIO_PCIDEV_WRITE_BUFS); + +#define VIRTIO_PCIDEV_STAT_WAITING 0 + unsigned long status; + + bool platform; +}; + +static unsigned int virtio_pcidev_max_delay_us = 40000; +module_param_named(max_delay_us, virtio_pcidev_max_delay_us, uint, 0644); + +static int virtio_pcidev_get_buf(struct virtio_pcidev_device *dev, bool *posted) +{ + int i; + + for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { + if (!test_and_set_bit(i, dev->used_bufs)) + return i; + } + + *posted = false; + return VIRTIO_PCIDEV_WRITE_BUFS; +} + +static void virtio_pcidev_free_buf(struct virtio_pcidev_device *dev, void *buf) +{ + int i; + + if (buf == &dev->bufs[VIRTIO_PCIDEV_WRITE_BUFS]) { + kfree(dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS]); + dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS] = NULL; + return; + } + + for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) { + if (buf == &dev->bufs[i]) { + kfree(dev->extra_ptrs[i]); + dev->extra_ptrs[i] = NULL; + WARN_ON(!test_and_clear_bit(i, dev->used_bufs)); + return; + } + } + + WARN_ON(1); +} + +static int virtio_pcidev_send_cmd(struct virtio_pcidev_device *dev, + struct virtio_pcidev_msg *cmd, + unsigned int cmd_size, + const void *extra, unsigned int extra_size, + void *out, unsigned int out_size) +{ + struct scatterlist out_sg, extra_sg, in_sg; + struct scatterlist *sgs_list[] = { + [0] = &out_sg, + [1] = extra ? &extra_sg : &in_sg, + [2] = extra ? &in_sg : NULL, + }; + struct virtio_pcidev_message_buffer *buf; + int delay_count = 0; + bool bounce_out; + int ret, len; + int buf_idx; + bool posted; + + if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) + return -EINVAL; + + switch (cmd->op) { + case VIRTIO_PCIDEV_OP_CFG_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_MEMSET: + /* in PCI, writes are posted, so don't wait */ + posted = !out; + WARN_ON(!posted); + break; + default: + posted = false; + break; + } + + bounce_out = !posted && cmd_size <= sizeof(*cmd) && + out && out_size <= sizeof(buf->data); + + buf_idx = virtio_pcidev_get_buf(dev, &posted); + buf = &dev->bufs[buf_idx]; + memcpy(buf, cmd, cmd_size); + + if (posted && extra && extra_size > sizeof(buf) - cmd_size) { + dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size, + GFP_ATOMIC); + + if (!dev->extra_ptrs[buf_idx]) { + virtio_pcidev_free_buf(dev, buf); + return -ENOMEM; + } + extra = dev->extra_ptrs[buf_idx]; + } else if (extra && extra_size <= sizeof(buf) - cmd_size) { + memcpy((u8 *)buf + cmd_size, extra, extra_size); + cmd_size += extra_size; + extra_size = 0; + extra = NULL; + cmd = (void *)buf; + } else { + cmd = (void *)buf; + } + + sg_init_one(&out_sg, cmd, cmd_size); + if (extra) + sg_init_one(&extra_sg, extra, extra_size); + /* allow stack for small buffers */ + if (bounce_out) + sg_init_one(&in_sg, buf->data, out_size); + else if (out) + sg_init_one(&in_sg, out, out_size); + + /* add to internal virtio queue */ + ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, + extra ? 2 : 1, + out ? 1 : 0, + cmd, GFP_ATOMIC); + if (ret) { + virtio_pcidev_free_buf(dev, buf); + return ret; + } + + if (posted) { + virtqueue_kick(dev->cmd_vq); + return 0; + } + + /* kick and poll for getting a response on the queue */ + set_bit(VIRTIO_PCIDEV_STAT_WAITING, &dev->status); + virtqueue_kick(dev->cmd_vq); + ret = 0; + + while (1) { + void *completed = virtqueue_get_buf(dev->cmd_vq, &len); + + if (completed == buf) + break; + + if (completed) + virtio_pcidev_free_buf(dev, completed); + + if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || + ++delay_count > virtio_pcidev_max_delay_us, + "um virt-pci delay: %d", delay_count)) { + ret = -EIO; + break; + } + udelay(1); + } + clear_bit(VIRTIO_PCIDEV_STAT_WAITING, &dev->status); + + if (bounce_out) + memcpy(out, buf->data, out_size); + + virtio_pcidev_free_buf(dev, buf); + + return ret; +} + +static unsigned long virtio_pcidev_cfgspace_read(struct um_pci_device *pdev, + unsigned int offset, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_READ, + .size = size, + .addr = offset, + }; + /* max 8, we might not use it all */ + u8 data[8]; + + memset(data, 0xff, sizeof(data)); + + /* size has been checked in um_pci_cfgspace_read() */ + if (virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size)) + return ULONG_MAX; + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void virtio_pcidev_cfgspace_write(struct um_pci_device *pdev, + unsigned int offset, int size, + unsigned long val) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct { + struct virtio_pcidev_msg hdr; + /* maximum size - we may only use parts of it */ + u8 data[8]; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .size = size, + .addr = offset, + }, + }; + + /* size has been checked in um_pci_cfgspace_write() */ + switch (size) { + case 1: + msg.data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)msg.data); + break; + case 4: + put_unaligned_le32(val, (void *)msg.data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)msg.data); + break; +#endif + } + + WARN_ON(virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); +} + +static void virtio_pcidev_bar_copy_from(struct um_pci_device *pdev, + int bar, void *buffer, + unsigned int offset, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_READ, + .bar = bar, + .size = size, + .addr = offset, + }; + + memset(buffer, 0xff, size); + + virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); +} + +static unsigned long virtio_pcidev_bar_read(struct um_pci_device *pdev, int bar, + unsigned int offset, int size) +{ + /* 8 is maximum size - we may only use parts of it */ + u8 data[8]; + + /* size has been checked in um_pci_bar_read() */ + virtio_pcidev_bar_copy_from(pdev, bar, data, offset, size); + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void virtio_pcidev_bar_copy_to(struct um_pci_device *pdev, + int bar, unsigned int offset, + const void *buffer, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, + .bar = bar, + .size = size, + .addr = offset, + }; + + virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); +} + +static void virtio_pcidev_bar_write(struct um_pci_device *pdev, int bar, + unsigned int offset, int size, + unsigned long val) +{ + /* maximum size - we may only use parts of it */ + u8 data[8]; + + /* size has been checked in um_pci_bar_write() */ + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + } + + virtio_pcidev_bar_copy_to(pdev, bar, offset, data, size); +} + +static void virtio_pcidev_bar_set(struct um_pci_device *pdev, int bar, + unsigned int offset, u8 value, int size) +{ + struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev); + struct { + struct virtio_pcidev_msg hdr; + u8 data; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .bar = bar, + .size = size, + .addr = offset, + }, + .data = value, + }; + + virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); +} + +static const struct um_pci_ops virtio_pcidev_um_pci_ops = { + .cfgspace_read = virtio_pcidev_cfgspace_read, + .cfgspace_write = virtio_pcidev_cfgspace_write, + .bar_read = virtio_pcidev_bar_read, + .bar_write = virtio_pcidev_bar_write, + .bar_copy_from = virtio_pcidev_bar_copy_from, + .bar_copy_to = virtio_pcidev_bar_copy_to, + .bar_set = virtio_pcidev_bar_set, +}; + +static void virtio_pcidev_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) +{ + struct scatterlist sg[1]; + + sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); + if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) + kfree(buf); + else if (kick) + virtqueue_kick(vq); +} + +static void virtio_pcidev_handle_irq_message(struct virtqueue *vq, + struct virtio_pcidev_msg *msg) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pcidev_device *dev = vdev->priv; + + if (!dev->pdev.irq) + return; + + /* we should properly chain interrupts, but on ARCH=um we don't care */ + + switch (msg->op) { + case VIRTIO_PCIDEV_OP_INT: + generic_handle_irq(dev->pdev.irq); + break; + case VIRTIO_PCIDEV_OP_MSI: + /* our MSI message is just the interrupt number */ + if (msg->size == sizeof(u32)) + generic_handle_irq(le32_to_cpup((void *)msg->data)); + else + generic_handle_irq(le16_to_cpup((void *)msg->data)); + break; + case VIRTIO_PCIDEV_OP_PME: + /* nothing to do - we already woke up due to the message */ + break; + default: + dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); + break; + } +} + +static void virtio_pcidev_cmd_vq_cb(struct virtqueue *vq) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pcidev_device *dev = vdev->priv; + void *cmd; + int len; + + if (test_bit(VIRTIO_PCIDEV_STAT_WAITING, &dev->status)) + return; + + while ((cmd = virtqueue_get_buf(vq, &len))) + virtio_pcidev_free_buf(dev, cmd); +} + +static void virtio_pcidev_irq_vq_cb(struct virtqueue *vq) +{ + struct virtio_pcidev_msg *msg; + int len; + + while ((msg = virtqueue_get_buf(vq, &len))) { + if (len >= sizeof(*msg)) + virtio_pcidev_handle_irq_message(vq, msg); + + /* recycle the message buffer */ + virtio_pcidev_irq_vq_addbuf(vq, msg, true); + } +} + +static int virtio_pcidev_init_vqs(struct virtio_pcidev_device *dev) +{ + struct virtqueue_info vqs_info[] = { + { "cmd", virtio_pcidev_cmd_vq_cb }, + { "irq", virtio_pcidev_irq_vq_cb }, + }; + struct virtqueue *vqs[2]; + int err, i; + + err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL); + if (err) + return err; + + dev->cmd_vq = vqs[0]; + dev->irq_vq = vqs[1]; + + virtio_device_ready(dev->vdev); + + for (i = 0; i < NUM_IRQ_MSGS; i++) { + void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); + + if (msg) + virtio_pcidev_irq_vq_addbuf(dev->irq_vq, msg, false); + } + + virtqueue_kick(dev->irq_vq); + + return 0; +} + +static void __virtio_pcidev_virtio_platform_remove(struct virtio_device *vdev, + struct virtio_pcidev_device *dev) +{ + um_pci_platform_device_unregister(&dev->pdev); + + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); + + kfree(dev); +} + +static int virtio_pcidev_virtio_platform_probe(struct virtio_device *vdev, + struct virtio_pcidev_device *dev) +{ + int err; + + dev->platform = true; + + err = virtio_pcidev_init_vqs(dev); + if (err) + goto err_free; + + err = um_pci_platform_device_register(&dev->pdev); + if (err) + goto err_reset; + + err = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev); + if (err) + goto err_unregister; + + return 0; + +err_unregister: + um_pci_platform_device_unregister(&dev->pdev); +err_reset: + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +err_free: + kfree(dev); + return err; +} + +static int virtio_pcidev_virtio_probe(struct virtio_device *vdev) +{ + struct virtio_pcidev_device *dev; + int err; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->vdev = vdev; + vdev->priv = dev; + + dev->pdev.ops = &virtio_pcidev_um_pci_ops; + + if (of_device_is_compatible(vdev->dev.of_node, "simple-bus")) + return virtio_pcidev_virtio_platform_probe(vdev, dev); + + err = virtio_pcidev_init_vqs(dev); + if (err) + goto err_free; + + err = um_pci_device_register(&dev->pdev); + if (err) + goto err_reset; + + device_set_wakeup_enable(&vdev->dev, true); + + /* + * In order to do suspend-resume properly, don't allow VQs + * to be suspended. + */ + virtio_uml_set_no_vq_suspend(vdev, true); + + return 0; + +err_reset: + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +err_free: + kfree(dev); + return err; +} + +static void virtio_pcidev_virtio_remove(struct virtio_device *vdev) +{ + struct virtio_pcidev_device *dev = vdev->priv; + + if (dev->platform) { + of_platform_depopulate(&vdev->dev); + __virtio_pcidev_virtio_platform_remove(vdev, dev); + return; + } + + device_set_wakeup_enable(&vdev->dev, false); + + um_pci_device_unregister(&dev->pdev); + + /* Stop all virtqueues */ + virtio_reset_device(vdev); + dev->cmd_vq = NULL; + dev->irq_vq = NULL; + vdev->config->del_vqs(vdev); + + kfree(dev); +} + +static void virtio_pcidev_virtio_shutdown(struct virtio_device *vdev) +{ + /* nothing to do, we just don't want queue shutdown */ +} + +static struct virtio_device_id id_table[] = { + { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; +MODULE_DEVICE_TABLE(virtio, id_table); + +static struct virtio_driver virtio_pcidev_virtio_driver = { + .driver.name = "virtio-pci", + .id_table = id_table, + .probe = virtio_pcidev_virtio_probe, + .remove = virtio_pcidev_virtio_remove, + .shutdown = virtio_pcidev_virtio_shutdown, +}; + +static int __init virtio_pcidev_init(void) +{ + if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, + "No virtio device ID configured for PCI - no PCI support\n")) + return 0; + + return register_virtio_driver(&virtio_pcidev_virtio_driver); +} +late_initcall(virtio_pcidev_init); + +static void __exit virtio_pcidev_exit(void) +{ + unregister_virtio_driver(&virtio_pcidev_virtio_driver); +} +module_exit(virtio_pcidev_exit); diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c new file mode 100644 index 000000000000..6cf1152a1a4e --- /dev/null +++ b/arch/um/drivers/virtio_uml.c @@ -0,0 +1,1495 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Virtio vhost-user driver + * + * Copyright(c) 2019 Intel Corporation + * + * This driver allows virtio devices to be used over a vhost-user socket. + * + * Guest devices can be instantiated by kernel module or command line + * parameters. One device will be created for each parameter. Syntax: + * + * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>] + * where: + * <socket> := vhost-user socket path to connect + * <virtio_id> := virtio device id (as in virtio_ids.h) + * <platform_id> := (optional) platform device id + * + * example: + * virtio_uml.device=/var/uml.socket:1 + * + * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd. + */ +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/string_choices.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> +#include <linux/time-internal.h> +#include <linux/virtio-uml.h> +#include <shared/as-layout.h> +#include <irq_kern.h> +#include <init.h> +#include <os.h> +#include "vhost_user.h" + +#define MAX_SUPPORTED_QUEUE_SIZE 256 + +#define to_virtio_uml_device(_vdev) \ + container_of(_vdev, struct virtio_uml_device, vdev) + +struct virtio_uml_platform_data { + u32 virtio_device_id; + const char *socket_path; + struct work_struct conn_broken_wk; + struct platform_device *pdev; +}; + +struct virtio_uml_device { + struct virtio_device vdev; + struct platform_device *pdev; + struct virtio_uml_platform_data *pdata; + + raw_spinlock_t sock_lock; + int sock, req_fd, irq; + u64 features; + u64 protocol_features; + u64 max_vqs; + u8 status; + u8 registered:1; + u8 suspended:1; + u8 no_vq_suspend:1; + + u8 config_changed_irq:1; + uint64_t vq_irq_vq_map; + int recv_rc; +}; + +struct virtio_uml_vq_info { + int kick_fd, call_fd; + char name[32]; + bool suspended; +}; + +#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) + +/* Vhost-user protocol */ + +static int full_sendmsg_fds(int fd, const void *buf, unsigned int len, + const int *fds, unsigned int fds_num) +{ + int rc; + + do { + rc = os_sendmsg_fds(fd, buf, len, fds, fds_num); + if (rc > 0) { + buf += rc; + len -= rc; + fds = NULL; + fds_num = 0; + } + } while (len && (rc >= 0 || rc == -EINTR)); + + if (rc < 0) + return rc; + return 0; +} + +static int full_read(int fd, void *buf, int len, bool abortable) +{ + int rc; + + if (!len) + return 0; + + do { + rc = os_read_file(fd, buf, len); + if (rc > 0) { + buf += rc; + len -= rc; + } + } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN))); + + if (rc < 0) + return rc; + if (rc == 0) + return -ECONNRESET; + return 0; +} + +static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) +{ + return full_read(fd, msg, sizeof(msg->header), true); +} + +static int vhost_user_recv(struct virtio_uml_device *vu_dev, + int fd, struct vhost_user_msg *msg, + size_t max_payload_size, bool wait) +{ + size_t size; + int rc; + + /* + * In virtio time-travel mode, we're handling all the vhost-user + * FDs by polling them whenever appropriate. However, we may get + * into a situation where we're sending out an interrupt message + * to a device (e.g. a net device) and need to handle a simulation + * time message while doing so, e.g. one that tells us to update + * our idea of how long we can run without scheduling. + * + * Thus, we need to not just read() from the given fd, but need + * to also handle messages for the simulation time - this function + * does that for us while waiting for the given fd to be readable. + */ + if (wait) + time_travel_wait_readable(fd); + + rc = vhost_user_recv_header(fd, msg); + + if (rc) + return rc; + size = msg->header.size; + if (size > max_payload_size) + return -EPROTO; + return full_read(fd, &msg->payload, size, false); +} + +static void vhost_user_check_reset(struct virtio_uml_device *vu_dev, + int rc) +{ + struct virtio_uml_platform_data *pdata = vu_dev->pdata; + + if (rc != -ECONNRESET) + return; + + if (!vu_dev->registered) + return; + + vu_dev->registered = 0; + + schedule_work(&pdata->conn_broken_wk); +} + +static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, + struct vhost_user_msg *msg, + size_t max_payload_size) +{ + int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, + max_payload_size, true); + + if (rc) { + vhost_user_check_reset(vu_dev, rc); + return rc; + } + + if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION)) + return -EPROTO; + + return 0; +} + +static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev, + u64 *value) +{ + struct vhost_user_msg msg; + int rc = vhost_user_recv_resp(vu_dev, &msg, + sizeof(msg.payload.integer)); + + if (rc) + return rc; + if (msg.header.size != sizeof(msg.payload.integer)) + return -EPROTO; + *value = msg.payload.integer; + return 0; +} + +static int vhost_user_recv_req(struct virtio_uml_device *vu_dev, + struct vhost_user_msg *msg, + size_t max_payload_size) +{ + int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, + max_payload_size, false); + + if (rc) + return rc; + + if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) != + VHOST_USER_VERSION) + return -EPROTO; + + return 0; +} + +static int vhost_user_send(struct virtio_uml_device *vu_dev, + bool need_response, struct vhost_user_msg *msg, + int *fds, size_t num_fds) +{ + size_t size = sizeof(msg->header) + msg->header.size; + unsigned long flags; + bool request_ack; + int rc; + + msg->header.flags |= VHOST_USER_VERSION; + + /* + * The need_response flag indicates that we already need a response, + * e.g. to read the features. In these cases, don't request an ACK as + * it is meaningless. Also request an ACK only if supported. + */ + request_ack = !need_response; + if (!(vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK))) + request_ack = false; + + if (request_ack) + msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; + + raw_spin_lock_irqsave(&vu_dev->sock_lock, flags); + rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); + if (rc < 0) + goto out; + + if (request_ack) { + uint64_t status; + + rc = vhost_user_recv_u64(vu_dev, &status); + if (rc) + goto out; + + if (status) { + vu_err(vu_dev, "slave reports error: %llu\n", status); + rc = -EIO; + goto out; + } + } + +out: + raw_spin_unlock_irqrestore(&vu_dev->sock_lock, flags); + return rc; +} + +static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev, + bool need_response, u32 request) +{ + struct vhost_user_msg msg = { + .header.request = request, + }; + + return vhost_user_send(vu_dev, need_response, &msg, NULL, 0); +} + +static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev, + u32 request, int fd) +{ + struct vhost_user_msg msg = { + .header.request = request, + }; + + return vhost_user_send(vu_dev, false, &msg, &fd, 1); +} + +static int vhost_user_send_u64(struct virtio_uml_device *vu_dev, + u32 request, u64 value) +{ + struct vhost_user_msg msg = { + .header.request = request, + .header.size = sizeof(msg.payload.integer), + .payload.integer = value, + }; + + return vhost_user_send(vu_dev, false, &msg, NULL, 0); +} + +static int vhost_user_set_owner(struct virtio_uml_device *vu_dev) +{ + return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER); +} + +static int vhost_user_get_features(struct virtio_uml_device *vu_dev, + u64 *features) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_FEATURES); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, features); +} + +static int vhost_user_set_features(struct virtio_uml_device *vu_dev, + u64 features) +{ + return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features); +} + +static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev, + u64 *protocol_features) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_PROTOCOL_FEATURES); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, protocol_features); +} + +static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, + u64 protocol_features) +{ + return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES, + protocol_features); +} + +static int vhost_user_get_queue_num(struct virtio_uml_device *vu_dev, + u64 *queue_num) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_QUEUE_NUM); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, queue_num); +} + +static void vhost_user_reply(struct virtio_uml_device *vu_dev, + struct vhost_user_msg *msg, int response) +{ + struct vhost_user_msg reply = { + .payload.integer = response, + }; + size_t size = sizeof(reply.header) + sizeof(reply.payload.integer); + int rc; + + reply.header = msg->header; + reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY; + reply.header.flags |= VHOST_USER_FLAG_REPLY; + reply.header.size = sizeof(reply.payload.integer); + + rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0); + + if (rc) + vu_err(vu_dev, + "sending reply to slave request failed: %d (size %zu)\n", + rc, size); +} + +static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, + struct time_travel_event *ev) +{ + struct virtqueue *vq; + int response = 1; + struct { + struct vhost_user_msg msg; + u8 extra_payload[512]; + } msg; + int rc; + irqreturn_t irq_rc = IRQ_NONE; + + while (1) { + rc = vhost_user_recv_req(vu_dev, &msg.msg, + sizeof(msg.msg.payload) + + sizeof(msg.extra_payload)); + if (rc) + break; + + switch (msg.msg.header.request) { + case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: + vu_dev->config_changed_irq = true; + response = 0; + break; + case VHOST_USER_SLAVE_VRING_CALL: + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vq->index == msg.msg.payload.vring_state.index) { + response = 0; + vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); + break; + } + } + break; + case VHOST_USER_SLAVE_IOTLB_MSG: + /* not supported - VIRTIO_F_ACCESS_PLATFORM */ + case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: + /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ + default: + vu_err(vu_dev, "unexpected slave request %d\n", + msg.msg.header.request); + } + + if (ev && !vu_dev->suspended) + time_travel_add_irq_event(ev); + + if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) + vhost_user_reply(vu_dev, &msg.msg, response); + irq_rc = IRQ_HANDLED; + } + /* mask EAGAIN as we try non-blocking read until socket is empty */ + vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc; + return irq_rc; +} + +static irqreturn_t vu_req_interrupt(int irq, void *data) +{ + struct virtio_uml_device *vu_dev = data; + irqreturn_t ret = IRQ_HANDLED; + + if (!um_irq_timetravel_handler_used()) + ret = vu_req_read_message(vu_dev, NULL); + + if (vu_dev->recv_rc) { + vhost_user_check_reset(vu_dev, vu_dev->recv_rc); + } else if (vu_dev->vq_irq_vq_map) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index)) + vring_interrupt(0 /* ignored */, vq); + } + vu_dev->vq_irq_vq_map = 0; + } else if (vu_dev->config_changed_irq) { + virtio_config_changed(&vu_dev->vdev); + vu_dev->config_changed_irq = false; + } + + return ret; +} + +static void vu_req_interrupt_comm_handler(int irq, int fd, void *data, + struct time_travel_event *ev) +{ + vu_req_read_message(data, ev); +} + +static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) +{ + int rc, req_fds[2]; + + /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */ + rc = os_pipe(req_fds, true, true); + if (rc < 0) + return rc; + vu_dev->req_fd = req_fds[0]; + + rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, + vu_req_interrupt, IRQF_SHARED, + vu_dev->pdev->name, vu_dev, + vu_req_interrupt_comm_handler); + if (rc < 0) + goto err_close; + + vu_dev->irq = rc; + + rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD, + req_fds[1]); + if (rc) + goto err_free_irq; + + goto out; + +err_free_irq: + um_free_irq(vu_dev->irq, vu_dev); +err_close: + os_close_file(req_fds[0]); +out: + /* Close unused write end of request fds */ + os_close_file(req_fds[1]); + return rc; +} + +static int vhost_user_init(struct virtio_uml_device *vu_dev) +{ + int rc = vhost_user_set_owner(vu_dev); + + if (rc) + return rc; + rc = vhost_user_get_features(vu_dev, &vu_dev->features); + if (rc) + return rc; + + if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) { + rc = vhost_user_get_protocol_features(vu_dev, + &vu_dev->protocol_features); + if (rc) + return rc; + vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F; + rc = vhost_user_set_protocol_features(vu_dev, + vu_dev->protocol_features); + if (rc) + return rc; + } + + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + rc = vhost_user_init_slave_req(vu_dev); + if (rc) + return rc; + } + + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_MQ)) { + rc = vhost_user_get_queue_num(vu_dev, &vu_dev->max_vqs); + if (rc) + return rc; + } else { + vu_dev->max_vqs = U64_MAX; + } + + return 0; +} + +static void vhost_user_get_config(struct virtio_uml_device *vu_dev, + u32 offset, void *buf, u32 len) +{ + u32 cfg_size = offset + len; + struct vhost_user_msg *msg; + size_t payload_size = sizeof(msg->payload.config) + cfg_size; + size_t msg_size = sizeof(msg->header) + payload_size; + int rc; + + if (!(vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) + return; + + msg = kzalloc(msg_size, GFP_KERNEL); + if (!msg) + return; + msg->header.request = VHOST_USER_GET_CONFIG; + msg->header.size = payload_size; + msg->payload.config.offset = 0; + msg->payload.config.size = cfg_size; + + rc = vhost_user_send(vu_dev, true, msg, NULL, 0); + if (rc) { + vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n", + rc); + goto free; + } + + rc = vhost_user_recv_resp(vu_dev, msg, msg_size); + if (rc) { + vu_err(vu_dev, + "receiving VHOST_USER_GET_CONFIG response failed: %d\n", + rc); + goto free; + } + + if (msg->header.size != payload_size || + msg->payload.config.size != cfg_size) { + rc = -EPROTO; + vu_err(vu_dev, + "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n", + msg->header.size, payload_size, + msg->payload.config.size, cfg_size); + goto free; + } + memcpy(buf, msg->payload.config.payload + offset, len); + +free: + kfree(msg); +} + +static void vhost_user_set_config(struct virtio_uml_device *vu_dev, + u32 offset, const void *buf, u32 len) +{ + struct vhost_user_msg *msg; + size_t payload_size = sizeof(msg->payload.config) + len; + size_t msg_size = sizeof(msg->header) + payload_size; + int rc; + + if (!(vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) + return; + + msg = kzalloc(msg_size, GFP_KERNEL); + if (!msg) + return; + msg->header.request = VHOST_USER_SET_CONFIG; + msg->header.size = payload_size; + msg->payload.config.offset = offset; + msg->payload.config.size = len; + memcpy(msg->payload.config.payload, buf, len); + + rc = vhost_user_send(vu_dev, false, msg, NULL, 0); + if (rc) + vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n", + rc); + + kfree(msg); +} + +static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out, + struct vhost_user_mem_region *region_out) +{ + unsigned long long mem_offset; + int rc = phys_mapping(addr, &mem_offset); + + if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc)) + return -EFAULT; + *fd_out = rc; + region_out->guest_addr = addr; + region_out->user_addr = addr; + region_out->size = size; + region_out->mmap_offset = mem_offset; + + /* Ensure mapping is valid for the entire region */ + rc = phys_mapping(addr + size - 1, &mem_offset); + if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n", + addr + size - 1, rc, *fd_out)) + return -EFAULT; + return 0; +} + +static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) +{ + struct vhost_user_msg msg = { + .header.request = VHOST_USER_SET_MEM_TABLE, + .header.size = offsetof(typeof(msg.payload.mem_regions), regions[1]), + .payload.mem_regions.num = 1, + }; + unsigned long reserved = uml_reserved - uml_physmem; + int fds[2]; + int rc; + + /* + * This is a bit tricky, see also the comment with setup_physmem(). + * + * Essentially, setup_physmem() uses a file to mmap() our physmem, + * but the code and data we *already* have is omitted. To us, this + * is no difference, since they both become part of our address + * space and memory consumption. To somebody looking in from the + * outside, however, it is different because the part of our memory + * consumption that's already part of the binary (code/data) is not + * mapped from the file, so it's not visible to another mmap from + * the file descriptor. + * + * Thus, don't advertise this space to the vhost-user slave. This + * means that the slave will likely abort or similar when we give + * it an address from the hidden range, since it's not marked as + * a valid address, but at least that way we detect the issue and + * don't just have the slave read an all-zeroes buffer from the + * shared memory file, or write something there that we can never + * see (depending on the direction of the virtqueue traffic.) + * + * Since we usually don't want to use .text for virtio buffers, + * this effectively means that you cannot use + * 1) global variables, which are in the .bss and not in the shm + * file-backed memory + * 2) the stack in some processes, depending on where they have + * their stack (or maybe only no interrupt stack?) + * + * The stack is already not typically valid for DMA, so this isn't + * much of a restriction, but global variables might be encountered. + * + * It might be possible to fix it by copying around the data that's + * between bss_start and where we map the file now, but it's not + * something that you typically encounter with virtio drivers, so + * it didn't seem worthwhile. + */ + rc = vhost_user_init_mem_region(reserved, physmem_size - reserved, + &fds[0], + &msg.payload.mem_regions.regions[0]); + + if (rc < 0) + return rc; + + return vhost_user_send(vu_dev, false, &msg, fds, + msg.payload.mem_regions.num); +} + +static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev, + u32 request, u32 index, u32 num) +{ + struct vhost_user_msg msg = { + .header.request = request, + .header.size = sizeof(msg.payload.vring_state), + .payload.vring_state.index = index, + .payload.vring_state.num = num, + }; + + return vhost_user_send(vu_dev, false, &msg, NULL, 0); +} + +static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev, + u32 index, u32 num) +{ + return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM, + index, num); +} + +static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev, + u32 index, u32 offset) +{ + return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE, + index, offset); +} + +static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev, + u32 index, u64 desc, u64 used, u64 avail, + u64 log) +{ + struct vhost_user_msg msg = { + .header.request = VHOST_USER_SET_VRING_ADDR, + .header.size = sizeof(msg.payload.vring_addr), + .payload.vring_addr.index = index, + .payload.vring_addr.desc = desc, + .payload.vring_addr.used = used, + .payload.vring_addr.avail = avail, + .payload.vring_addr.log = log, + }; + + return vhost_user_send(vu_dev, false, &msg, NULL, 0); +} + +static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev, + u32 request, int index, int fd) +{ + struct vhost_user_msg msg = { + .header.request = request, + .header.size = sizeof(msg.payload.integer), + .payload.integer = index, + }; + + if (index & ~VHOST_USER_VRING_INDEX_MASK) + return -EINVAL; + if (fd < 0) { + msg.payload.integer |= VHOST_USER_VRING_POLL_MASK; + return vhost_user_send(vu_dev, false, &msg, NULL, 0); + } + return vhost_user_send(vu_dev, false, &msg, &fd, 1); +} + +static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev, + int index, int fd) +{ + return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL, + index, fd); +} + +static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev, + int index, int fd) +{ + return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK, + index, fd); +} + +static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev, + u32 index, bool enable) +{ + if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES))) + return 0; + + return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE, + index, enable); +} + + +/* Virtio interface */ + +static bool vu_notify(struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + const uint64_t n = 1; + int rc; + + if (info->suspended) + return true; + + time_travel_propagate_time(); + + if (info->kick_fd < 0) { + struct virtio_uml_device *vu_dev; + + vu_dev = to_virtio_uml_device(vq->vdev); + + return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK, + vq->index, 0) == 0; + } + + do { + rc = os_write_file(info->kick_fd, &n, sizeof(n)); + } while (rc == -EINTR); + return !WARN(rc != sizeof(n), "write returned %d\n", rc); +} + +static irqreturn_t vu_interrupt(int irq, void *opaque) +{ + struct virtqueue *vq = opaque; + struct virtio_uml_vq_info *info = vq->priv; + uint64_t n; + int rc; + irqreturn_t ret = IRQ_NONE; + + do { + rc = os_read_file(info->call_fd, &n, sizeof(n)); + if (rc == sizeof(n)) + ret |= vring_interrupt(irq, vq); + } while (rc == sizeof(n) || rc == -EINTR); + WARN(rc != -EAGAIN, "read returned %d\n", rc); + return ret; +} + + +static void vu_get(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vhost_user_get_config(vu_dev, offset, buf, len); +} + +static void vu_set(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vhost_user_set_config(vu_dev, offset, buf, len); +} + +static u8 vu_get_status(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + return vu_dev->status; +} + +static void vu_set_status(struct virtio_device *vdev, u8 status) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vu_dev->status = status; +} + +static void vu_reset(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vu_dev->status = 0; +} + +static void vu_del_vq(struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + + if (info->call_fd >= 0) { + struct virtio_uml_device *vu_dev; + + vu_dev = to_virtio_uml_device(vq->vdev); + + um_free_irq(vu_dev->irq, vq); + os_close_file(info->call_fd); + } + + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); + + vring_del_virtqueue(vq); + kfree(info); +} + +static void vu_del_vqs(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + struct virtqueue *vq, *n; + u64 features; + + /* Note: reverse order as a workaround to a decoding bug in snabb */ + list_for_each_entry_reverse(vq, &vdev->vqs, list) + WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false)); + + /* Ensure previous messages have been processed */ + WARN_ON(vhost_user_get_features(vu_dev, &features)); + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + vu_del_vq(vq); +} + +static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, + struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + int call_fds[2]; + int rc, irq; + + /* no call FD needed/desired in this case */ + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && + vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + info->call_fd = -1; + return 0; + } + + /* Use a pipe for call fd, since SIGIO is not supported for eventfd */ + rc = os_pipe(call_fds, true, true); + if (rc < 0) + return rc; + + info->call_fd = call_fds[0]; + irq = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, + vu_interrupt, IRQF_SHARED, info->name, vq); + if (irq < 0) { + rc = irq; + goto close_both; + } + + rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); + if (rc) + goto release_irq; + + vu_dev->irq = irq; + + goto out; + +release_irq: + um_free_irq(irq, vq); +close_both: + os_close_file(call_fds[0]); +out: + /* Close (unused) write end of call fds */ + os_close_file(call_fds[1]); + + return rc; +} + +static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, + unsigned index, vq_callback_t *callback, + const char *name, bool ctx) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + struct platform_device *pdev = vu_dev->pdev; + struct virtio_uml_vq_info *info; + struct virtqueue *vq; + int num = MAX_SUPPORTED_QUEUE_SIZE; + int rc; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + rc = -ENOMEM; + goto error_kzalloc; + } + snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, + pdev->id, name); + + vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, + ctx, vu_notify, callback, info->name); + if (!vq) { + rc = -ENOMEM; + goto error_create; + } + vq->priv = info; + vq->num_max = num; + num = virtqueue_get_vring_size(vq); + + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { + info->kick_fd = -1; + } else { + rc = os_eventfd(0, 0); + if (rc < 0) + goto error_kick; + info->kick_fd = rc; + } + + rc = vu_setup_vq_call_fd(vu_dev, vq); + if (rc) + goto error_call; + + rc = vhost_user_set_vring_num(vu_dev, index, num); + if (rc) + goto error_setup; + + rc = vhost_user_set_vring_base(vu_dev, index, 0); + if (rc) + goto error_setup; + + rc = vhost_user_set_vring_addr(vu_dev, index, + virtqueue_get_desc_addr(vq), + virtqueue_get_used_addr(vq), + virtqueue_get_avail_addr(vq), + (u64) -1); + if (rc) + goto error_setup; + + return vq; + +error_setup: + if (info->call_fd >= 0) { + um_free_irq(vu_dev->irq, vq); + os_close_file(info->call_fd); + } +error_call: + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); +error_kick: + vring_del_virtqueue(vq); +error_create: + kfree(info); +error_kzalloc: + return ERR_PTR(rc); +} + +static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + struct virtqueue_info vqs_info[], + struct irq_affinity *desc) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + int i, queue_idx = 0, rc; + struct virtqueue *vq; + + /* not supported for now */ + if (WARN(nvqs > 64 || nvqs > vu_dev->max_vqs, + "%d VQs requested, only up to 64 or %lld supported\n", + nvqs, vu_dev->max_vqs)) + return -EINVAL; + + rc = vhost_user_set_mem_table(vu_dev); + if (rc) + return rc; + + for (i = 0; i < nvqs; ++i) { + struct virtqueue_info *vqi = &vqs_info[i]; + + if (!vqi->name) { + vqs[i] = NULL; + continue; + } + + vqs[i] = vu_setup_vq(vdev, queue_idx++, vqi->callback, + vqi->name, vqi->ctx); + if (IS_ERR(vqs[i])) { + rc = PTR_ERR(vqs[i]); + goto error_setup; + } + } + + list_for_each_entry(vq, &vdev->vqs, list) { + struct virtio_uml_vq_info *info = vq->priv; + + if (info->kick_fd >= 0) { + rc = vhost_user_set_vring_kick(vu_dev, vq->index, + info->kick_fd); + if (rc) + goto error_setup; + } + + rc = vhost_user_set_vring_enable(vu_dev, vq->index, true); + if (rc) + goto error_setup; + } + + return 0; + +error_setup: + vu_del_vqs(vdev); + return rc; +} + +static u64 vu_get_features(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + return vu_dev->features; +} + +static int vu_finalize_features(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + u64 supported = vdev->features & VHOST_USER_SUPPORTED_F; + + vring_transport_features(vdev); + vu_dev->features = vdev->features | supported; + + return vhost_user_set_features(vu_dev, vu_dev->features); +} + +static const char *vu_bus_name(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + return vu_dev->pdev->name; +} + +static const struct virtio_config_ops virtio_uml_config_ops = { + .get = vu_get, + .set = vu_set, + .get_status = vu_get_status, + .set_status = vu_set_status, + .reset = vu_reset, + .find_vqs = vu_find_vqs, + .del_vqs = vu_del_vqs, + .get_features = vu_get_features, + .finalize_features = vu_finalize_features, + .bus_name = vu_bus_name, +}; + +static void virtio_uml_release_dev(struct device *d) +{ + struct virtio_device *vdev = + container_of(d, struct virtio_device, dev); + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + time_travel_propagate_time(); + + /* might not have been opened due to not negotiating the feature */ + if (vu_dev->req_fd >= 0) { + um_free_irq(vu_dev->irq, vu_dev); + os_close_file(vu_dev->req_fd); + } + + os_close_file(vu_dev->sock); + kfree(vu_dev); +} + +void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, + bool no_vq_suspend) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + if (WARN_ON(vdev->config != &virtio_uml_config_ops)) + return; + + vu_dev->no_vq_suspend = no_vq_suspend; + dev_info(&vdev->dev, "%s VQ suspend\n", str_disabled_enabled(no_vq_suspend)); +} + +static void vu_of_conn_broken(struct work_struct *wk) +{ + struct virtio_uml_platform_data *pdata; + struct virtio_uml_device *vu_dev; + + pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); + + vu_dev = platform_get_drvdata(pdata->pdev); + + virtio_break_device(&vu_dev->vdev); + + /* + * We can't remove the device from the devicetree so the only thing we + * can do is warn. + */ + WARN_ON(1); +} + +/* Platform device */ + +static struct virtio_uml_platform_data * +virtio_uml_create_pdata(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct virtio_uml_platform_data *pdata; + int ret; + + if (!np) + return ERR_PTR(-EINVAL); + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken); + pdata->pdev = pdev; + + ret = of_property_read_string(np, "socket-path", &pdata->socket_path); + if (ret) + return ERR_PTR(ret); + + ret = of_property_read_u32(np, "virtio-device-id", + &pdata->virtio_device_id); + if (ret) + return ERR_PTR(ret); + + return pdata; +} + +static int virtio_uml_probe(struct platform_device *pdev) +{ + struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; + struct virtio_uml_device *vu_dev; + int rc; + + if (!pdata) { + pdata = virtio_uml_create_pdata(pdev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + } + + vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); + if (!vu_dev) + return -ENOMEM; + + vu_dev->pdata = pdata; + vu_dev->vdev.dev.parent = &pdev->dev; + vu_dev->vdev.dev.release = virtio_uml_release_dev; + vu_dev->vdev.config = &virtio_uml_config_ops; + vu_dev->vdev.id.device = pdata->virtio_device_id; + vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; + vu_dev->pdev = pdev; + vu_dev->req_fd = -1; + vu_dev->irq = UM_IRQ_ALLOC; + + time_travel_propagate_time(); + + do { + rc = os_connect_socket(pdata->socket_path); + } while (rc == -EINTR); + if (rc < 0) + goto error_free; + vu_dev->sock = rc; + + raw_spin_lock_init(&vu_dev->sock_lock); + + rc = vhost_user_init(vu_dev); + if (rc) + goto error_init; + + platform_set_drvdata(pdev, vu_dev); + + device_set_wakeup_capable(&vu_dev->vdev.dev, true); + + rc = register_virtio_device(&vu_dev->vdev); + if (rc) { + put_device(&vu_dev->vdev.dev); + return rc; + } + vu_dev->registered = 1; + return 0; + +error_init: + os_close_file(vu_dev->sock); +error_free: + kfree(vu_dev); + return rc; +} + +static void virtio_uml_remove(struct platform_device *pdev) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + + unregister_virtio_device(&vu_dev->vdev); +} + +/* Command line device list */ + +static void vu_cmdline_release_dev(struct device *d) +{ +} + +static struct device vu_cmdline_parent = { + .init_name = "virtio-uml-cmdline", + .release = vu_cmdline_release_dev, +}; + +static bool vu_cmdline_parent_registered; +static int vu_cmdline_id; + +static int vu_unregister_cmdline_device(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; + + kfree(pdata->socket_path); + platform_device_unregister(pdev); + return 0; +} + +static void vu_conn_broken(struct work_struct *wk) +{ + struct virtio_uml_platform_data *pdata; + struct virtio_uml_device *vu_dev; + + pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); + + vu_dev = platform_get_drvdata(pdata->pdev); + + virtio_break_device(&vu_dev->vdev); + + vu_unregister_cmdline_device(&pdata->pdev->dev, NULL); +} + +static int vu_cmdline_set(const char *device, const struct kernel_param *kp) +{ + const char *ids = strchr(device, ':'); + unsigned int virtio_device_id; + int processed, consumed, err; + char *socket_path; + struct virtio_uml_platform_data pdata, *ppdata; + struct platform_device *pdev; + + if (!ids || ids == device) + return -EINVAL; + + processed = sscanf(ids, ":%u%n:%d%n", + &virtio_device_id, &consumed, + &vu_cmdline_id, &consumed); + + if (processed < 1 || ids[consumed]) + return -EINVAL; + + if (!vu_cmdline_parent_registered) { + err = device_register(&vu_cmdline_parent); + if (err) { + pr_err("Failed to register parent device!\n"); + put_device(&vu_cmdline_parent); + return err; + } + vu_cmdline_parent_registered = true; + } + + socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL); + if (!socket_path) + return -ENOMEM; + + pdata.virtio_device_id = (u32) virtio_device_id; + pdata.socket_path = socket_path; + + pr_info("Registering device virtio-uml.%d id=%d at %s\n", + vu_cmdline_id, virtio_device_id, socket_path); + + pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml", + vu_cmdline_id++, &pdata, + sizeof(pdata)); + err = PTR_ERR_OR_ZERO(pdev); + if (err) + goto free; + + ppdata = pdev->dev.platform_data; + ppdata->pdev = pdev; + INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken); + + return 0; + +free: + kfree(socket_path); + return err; +} + +static int vu_cmdline_get_device(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; + char *buffer = data; + unsigned int len = strlen(buffer); + + snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n", + pdata->socket_path, pdata->virtio_device_id, pdev->id); + return 0; +} + +static int vu_cmdline_get(char *buffer, const struct kernel_param *kp) +{ + buffer[0] = '\0'; + if (vu_cmdline_parent_registered) + device_for_each_child(&vu_cmdline_parent, buffer, + vu_cmdline_get_device); + return strlen(buffer) + 1; +} + +static const struct kernel_param_ops vu_cmdline_param_ops = { + .set = vu_cmdline_set, + .get = vu_cmdline_get, +}; + +device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR); +__uml_help(vu_cmdline_param_ops, +"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n" +" Configure a virtio device over a vhost-user socket.\n" +" See virtio_ids.h for a list of possible virtio device id values.\n" +" Optionally use a specific platform_device id.\n\n" +); + + +static void vu_unregister_cmdline_devices(void) +{ + if (vu_cmdline_parent_registered) { + device_for_each_child(&vu_cmdline_parent, NULL, + vu_unregister_cmdline_device); + device_unregister(&vu_cmdline_parent); + vu_cmdline_parent_registered = false; + } +} + +/* Platform driver */ + +static const struct of_device_id virtio_uml_match[] = { + { .compatible = "virtio,uml", }, + { } +}; +MODULE_DEVICE_TABLE(of, virtio_uml_match); + +static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + + if (!vu_dev->no_vq_suspend) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; + + info->suspended = true; + vhost_user_set_vring_enable(vu_dev, vq->index, false); + } + } + + if (!device_may_wakeup(&vu_dev->vdev.dev)) { + vu_dev->suspended = true; + return 0; + } + + return irq_set_irq_wake(vu_dev->irq, 1); +} + +static int virtio_uml_resume(struct platform_device *pdev) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + + if (!vu_dev->no_vq_suspend) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; + + info->suspended = false; + vhost_user_set_vring_enable(vu_dev, vq->index, true); + } + } + + vu_dev->suspended = false; + + if (!device_may_wakeup(&vu_dev->vdev.dev)) + return 0; + + return irq_set_irq_wake(vu_dev->irq, 0); +} + +static struct platform_driver virtio_uml_driver = { + .probe = virtio_uml_probe, + .remove = virtio_uml_remove, + .driver = { + .name = "virtio-uml", + .of_match_table = virtio_uml_match, + }, + .suspend = virtio_uml_suspend, + .resume = virtio_uml_resume, +}; + +static int __init virtio_uml_init(void) +{ + return platform_driver_register(&virtio_uml_driver); +} + +static void __exit virtio_uml_exit(void) +{ + platform_driver_unregister(&virtio_uml_driver); + vu_unregister_cmdline_devices(); +} + +module_init(virtio_uml_init); +module_exit(virtio_uml_exit); +__uml_exitcall(virtio_uml_exit); + +MODULE_DESCRIPTION("UML driver for vhost-user virtio devices"); +MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c index 20e30be44795..d05918e422f9 100644 --- a/arch/um/drivers/xterm.c +++ b/arch/um/drivers/xterm.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #include <stddef.h> @@ -18,6 +18,7 @@ struct xterm_chan { int pid; int helper_pid; + int chan_fd; char *title; int device; int raw; @@ -33,6 +34,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts) return NULL; *data = ((struct xterm_chan) { .pid = -1, .helper_pid = -1, + .chan_fd = -1, .device = device, .title = opts->xterm_title, .raw = opts->raw } ); @@ -40,7 +42,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts) } /* Only changed by xterm_setup, which is a setup */ -static char *terminal_emulator = "xterm"; +static char *terminal_emulator = CONFIG_XTERM_CHAN_DEFAULT_EMULATOR; static char *title_switch = "-T"; static char *exec_switch = "-e"; @@ -77,8 +79,9 @@ __uml_setup("xterm=", xterm_setup, " respectively. The title switch must have the form '<switch> title',\n" " not '<switch>=title'. Similarly, the exec switch must have the form\n" " '<switch> command arg1 arg2 ...'.\n" -" The default values are 'xterm=xterm,-T,-e'. Values for gnome-terminal\n" -" are 'xterm=gnome-terminal,-t,-x'.\n\n" +" The default values are 'xterm=" CONFIG_XTERM_CHAN_DEFAULT_EMULATOR + ",-T,-e'.\n" +" Values for gnome-terminal are 'xterm=gnome-terminal,-t,--'.\n\n" ); static int xterm_open(int input, int output, int primary, void *d, @@ -94,12 +97,9 @@ static int xterm_open(int input, int output, int primary, void *d, if (access(argv[4], X_OK) < 0) argv[4] = "port-helper"; - /* - * Check that DISPLAY is set, this doesn't guarantee the xterm - * will work but w/o it we can be pretty sure it won't. - */ - if (getenv("DISPLAY") == NULL) { - printk(UM_KERN_ERR "xterm_open: $DISPLAY not set.\n"); + /* Ensure we are running on Xorg or Wayland. */ + if (!getenv("DISPLAY") && !getenv("WAYLAND_DISPLAY")) { + printk(UM_KERN_ERR "xterm_open : neither $DISPLAY nor $WAYLAND_DISPLAY is set.\n"); return -ENODEV; } @@ -149,10 +149,11 @@ static int xterm_open(int input, int output, int primary, void *d, goto out_kill; } + data->chan_fd = fd; new = xterm_fd(fd, &data->helper_pid); if (new < 0) { err = new; - printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n", + printk(UM_KERN_ERR "xterm_open : xterm_fd failed, err = %d\n", -err); goto out_kill; } @@ -206,6 +207,8 @@ static void xterm_close(int fd, void *d) os_kill_process(data->helper_pid, 0); data->helper_pid = -1; + if (data->chan_fd != -1) + os_close_file(data->chan_fd); os_close_file(fd); } diff --git a/arch/um/drivers/xterm.h b/arch/um/drivers/xterm.h index 56b9c4aba423..5968da3a6aba 100644 --- a/arch/um/drivers/xterm.h +++ b/arch/um/drivers/xterm.h @@ -1,6 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL */ #ifndef __XTERM_H__ diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c index e8f9957bfbf6..3971252cb1a6 100644 --- a/arch/um/drivers/xterm_kern.c +++ b/arch/um/drivers/xterm_kern.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL */ #include <linux/slab.h> @@ -9,6 +9,7 @@ #include <asm/irq.h> #include <irq_kern.h> #include <os.h> +#include "xterm.h" struct xterm_wait { struct completion ready; @@ -20,12 +21,19 @@ struct xterm_wait { static irqreturn_t xterm_interrupt(int irq, void *data) { struct xterm_wait *xterm = data; - int fd; + int fd = -1, n_fds = 1; + ssize_t ret; - fd = os_rcv_fd(xterm->fd, &xterm->pid); - if (fd == -EAGAIN) + ret = os_rcv_fd_msg(xterm->fd, &fd, n_fds, + &xterm->pid, sizeof(xterm->pid)); + if (ret == -EAGAIN) return IRQ_NONE; + if (ret < 0) + fd = ret; + else if (ret != sizeof(xterm->pid)) + fd = -EMSGSIZE; + xterm->new_fd = fd; complete(&xterm->ready); @@ -51,7 +59,7 @@ int xterm_fd(int socket, int *pid_out) err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt, IRQF_SHARED, "xterm", data); - if (err) { + if (err < 0) { printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " "err = %d\n", err); ret = err; |
