diff options
Diffstat (limited to 'net/core/pktgen.c')
| -rw-r--r-- | net/core/pktgen.c | 1734 |
1 files changed, 1074 insertions, 660 deletions
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9640972ec50e..d41b03fd1f63 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Authors: * Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se> @@ -8,12 +9,6 @@ * Ben Greear <greearb@candelatech.com> * Jens Låås <jens.laas@data.slu.se> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * * A tool for loading the network with preconfigurated packets. * The tool is implemented as a linux module. Parameters are output * device, delay (to hard_xmit), number of packets, and whether @@ -60,21 +55,21 @@ * * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br) * - * * 021124 Finished major redesign and rewrite for new functionality. - * See Documentation/networking/pktgen.txt for how to use this. + * See Documentation/networking/pktgen.rst for how to use this. * * The new operation: * For each CPU one thread/process is created at start. This process checks * for running devices in the if_list and sends packets until count is 0 it * also the thread checks the thread->control which is used for inter-process * communication. controlling process "posts" operations to the threads this - * way. The if_lock should be possible to remove when add/rem_device is merged - * into this too. + * way. + * The if_list is RCU protected, and the if_lock remains to protect updating + * of if_list, from "add_device" as it invoked from userspace (via proc write). * * By design there should only be *one* "controlling" process. In practice * multiple write accesses gives unpredictable result. Understood by "write" - * to /proc gives result code thats should be read be the "writer". + * to /proc gives result code that should be read be the "writer". * For practical use this should be no problem. * * Note when adding devices to a specific CPU there good idea to also assign @@ -96,7 +91,7 @@ * New xmit() return, do_div and misc clean up by Stephen Hemminger * <shemminger@osdl.org> 040923 * - * Randy Dunlap fixed u64 printk compiler waring + * Randy Dunlap fixed u64 printk compiler warning * * Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org> * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213 @@ -113,13 +108,13 @@ * * Fixed src_mac command to set source mac of packet to value specified in * command by Adit Ranadive <adit.262@gmail.com> - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/sys.h> #include <linux/types.h> +#include <linux/minmax.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kernel.h> @@ -157,13 +152,14 @@ #include <linux/etherdevice.h> #include <linux/kthread.h> #include <linux/prefetch.h> +#include <linux/mmzone.h> #include <net/net_namespace.h> #include <net/checksum.h> #include <net/ipv6.h> +#include <net/udp.h> +#include <net/ip6_checksum.h> #include <net/addrconf.h> -#ifdef CONFIG_XFRM #include <net/xfrm.h> -#endif #include <net/netns/generic.h> #include <asm/byteorder.h> #include <linux/rcupdate.h> @@ -174,30 +170,55 @@ #include <asm/dma.h> #include <asm/div64.h> /* do_div */ -#define VERSION "2.74" +#define VERSION "2.75" #define IP_NAME_SZ 32 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ #define MPLS_STACK_BOTTOM htonl(0x00000100) - -#define func_enter() pr_debug("entering %s\n", __func__); +/* Max number of internet mix entries that can be specified in imix_weights. */ +#define MAX_IMIX_ENTRIES 20 +#define IMIX_PRECISION 100 /* Precision of IMIX distribution */ + +#define func_enter() pr_debug("entering %s\n", __func__) + +#define PKT_FLAGS \ + pf(IPV6) /* Interface in IPV6 Mode */ \ + pf(IPSRC_RND) /* IP-Src Random */ \ + pf(IPDST_RND) /* IP-Dst Random */ \ + pf(TXSIZE_RND) /* Transmit size is random */ \ + pf(UDPSRC_RND) /* UDP-Src Random */ \ + pf(UDPDST_RND) /* UDP-Dst Random */ \ + pf(UDPCSUM) /* Include UDP checksum */ \ + pf(NO_TIMESTAMP) /* Don't timestamp packets (default TS) */ \ + pf(MPLS_RND) /* Random MPLS labels */ \ + pf(QUEUE_MAP_RND) /* queue map Random */ \ + pf(QUEUE_MAP_CPU) /* queue map mirrors smp_processor_id() */ \ + pf(FLOW_SEQ) /* Sequential flows */ \ + pf(IPSEC) /* ipsec on for flows */ \ + pf(MACSRC_RND) /* MAC-Src Random */ \ + pf(MACDST_RND) /* MAC-Dst Random */ \ + pf(VID_RND) /* Random VLAN ID */ \ + pf(SVID_RND) /* Random SVLAN ID */ \ + pf(NODE) /* Node memory alloc*/ \ + pf(SHARED) /* Shared SKB */ \ + +#define pf(flag) flag##_SHIFT, +enum pkt_flags { + PKT_FLAGS +}; +#undef pf /* Device flag bits */ -#define F_IPSRC_RND (1<<0) /* IP-Src Random */ -#define F_IPDST_RND (1<<1) /* IP-Dst Random */ -#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ -#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ -#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ -#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ -#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ -#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ -#define F_MPLS_RND (1<<8) /* Random MPLS labels */ -#define F_VID_RND (1<<9) /* Random VLAN ID */ -#define F_SVID_RND (1<<10) /* Random SVLAN ID */ -#define F_FLOW_SEQ (1<<11) /* Sequential flows */ -#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ -#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ -#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ -#define F_NODE (1<<15) /* Node memory alloc*/ +#define pf(flag) static const __u32 F_##flag = (1<<flag##_SHIFT); +PKT_FLAGS +#undef pf + +#define pf(flag) __stringify(flag), +static char *pkt_flag_names[] = { + PKT_FLAGS +}; +#undef pf + +#define NR_PKT_FLAGS ARRAY_SIZE(pkt_flag_names) /* Thread control flag bits */ #define T_STOP (1<<0) /* Stop run */ @@ -205,9 +226,14 @@ #define T_REMDEVALL (1<<2) /* Remove all devs */ #define T_REMDEV (1<<3) /* Remove one dev */ -/* If lock -- can be removed after some work */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); +/* Xmit modes */ +#define M_START_XMIT 0 /* Default normal TX */ +#define M_NETIF_RECEIVE 1 /* Inject packets into stack */ +#define M_QUEUE_XMIT 2 /* Inject packet into qdisc */ + +/* If lock -- protects updating of if_list */ +#define if_lock(t) mutex_lock(&(t->if_lock)) +#define if_unlock(t) mutex_unlock(&(t->if_lock)) /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 @@ -219,6 +245,12 @@ #define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4) #define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4) +struct imix_pkt { + u64 size; + u64 weight; + u64 count_so_far; +}; + struct flow_state { __be32 cur_daddr; int count; @@ -238,6 +270,7 @@ struct pktgen_dev { struct proc_dir_entry *entry; /* proc file */ struct pktgen_thread *pg_thread;/* the owner */ struct list_head list; /* chaining in the thread's run-queue */ + struct rcu_head rcu; /* freed by RCU */ int running; /* if false, the test will stop */ @@ -245,13 +278,15 @@ struct pktgen_dev { * we will do a random selection from within the range. */ __u32 flags; - int removal_mark; /* non-zero => the device is marked for - * removal by worker thread */ - + int xmit_mode; int min_pkt_size; int max_pkt_size; int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; + int removal_mark; /* non-zero => the device is marked for + * removal by worker thread + */ + struct page *page; u64 delay; /* nano-seconds */ @@ -262,7 +297,6 @@ struct pktgen_dev { /* runtime counters relating to clone_skb */ - __u64 allocated_skbs; __u32 clone_count; int last_ok; /* Was last skb sent? * Or a failed transmit of some sort? @@ -314,10 +348,18 @@ struct pktgen_dev { __u16 udp_dst_max; /* exclusive, dest UDP port */ /* DSCP + ECN */ - __u8 tos; /* six MSB of (former) IPv4 TOS - are for dscp codepoint */ - __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 - (see RFC 3260, sec. 4) */ + __u8 tos; /* six MSB of (former) IPv4 TOS + * are for dscp codepoint + */ + __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 + * (see RFC 3260, sec. 4) + */ + + /* IMIX */ + unsigned int n_imix_entries; + struct imix_pkt imix_entries[MAX_IMIX_ENTRIES]; + /* Maps 0-IMIX_PRECISION range to imix_entry based on probability*/ + __u8 imix_distribution[IMIX_PRECISION]; /* MPLS */ unsigned int nr_labels; /* Depth of stack, 0 = no MPLS */ @@ -351,12 +393,12 @@ struct pktgen_dev { __u8 hh[14]; /* = { - 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, - - We fill in SRC address later - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x00 - }; + * 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, + * + * We fill in SRC address later + * 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + * 0x08, 0x00 + * }; */ __u16 pad; /* pad out the hh struct to an even 16 bytes */ @@ -371,6 +413,7 @@ struct pktgen_dev { * device name (not when the inject is * started as it used to do.) */ + netdevice_tracker dev_tracker; char odevname[32]; struct flow_state *flows; unsigned int cflows; /* Concurrent flows (config) */ @@ -381,11 +424,15 @@ struct pktgen_dev { u16 queue_map_min; u16 queue_map_max; __u32 skb_priority; /* skb priority field */ + unsigned int burst; /* number of duplicated packets to burst */ int node; /* Memory node */ #ifdef CONFIG_XFRM __u8 ipsmode; /* IPSEC mode (config) */ __u8 ipsproto; /* IPSEC type (config) */ + __u32 spi; + struct xfrm_dst xdst; + struct dst_ops dstops; #endif char result[512]; }; @@ -398,7 +445,7 @@ struct pktgen_hdr { }; -static int pg_net_id __read_mostly; +static unsigned int pg_net_id __read_mostly; struct pktgen_net { struct net *net; @@ -408,14 +455,15 @@ struct pktgen_net { }; struct pktgen_thread { - spinlock_t if_lock; /* for list of devices */ + struct mutex if_lock; /* for list of devices */ struct list_head if_list; /* All device here */ struct list_head th_list; struct task_struct *tsk; char result[512]; /* Field for thread to receive "posted" events terminate, - stop ifs etc. */ + * stop ifs etc. + */ u32 control; int cpu; @@ -429,8 +477,7 @@ struct pktgen_thread { #define FIND 0 static const char version[] = - "Packet Generator for packet performance testing. " - "Version: " VERSION "\n"; + "Packet Generator for packet performance testing. Version: " VERSION "\n"; static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); @@ -439,10 +486,11 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, static int pktgen_device_event(struct notifier_block *, unsigned long, void *); static void pktgen_run_all_threads(struct pktgen_net *pn); static void pktgen_reset_all_threads(struct pktgen_net *pn); -static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn); +static void pktgen_stop_all_threads(struct pktgen_net *pn); static void pktgen_stop(struct pktgen_thread *t); static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); +static void fill_imix_distribution(struct pktgen_dev *pkt_dev); /* Module parameters, defaults. */ static int pg_count_d __read_mostly = 1000; @@ -470,60 +518,55 @@ static int pgctrl_show(struct seq_file *seq, void *v) static ssize_t pgctrl_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - int err = 0; char data[128]; + size_t max; struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id); - if (!capable(CAP_NET_ADMIN)) { - err = -EPERM; - goto out; - } + if (!capable(CAP_NET_ADMIN)) + return -EPERM; - if (count > sizeof(data)) - count = sizeof(data); + if (count < 1) + return -EINVAL; - if (copy_from_user(data, buf, count)) { - err = -EFAULT; - goto out; - } - data[count - 1] = 0; /* Make string */ + max = min(count, sizeof(data) - 1); + if (copy_from_user(data, buf, max)) + return -EFAULT; - if (!strcmp(data, "stop")) - pktgen_stop_all_threads_ifs(pn); + if (data[max - 1] == '\n') + data[max - 1] = 0; /* strip trailing '\n', terminate string */ + else + data[max] = 0; /* terminate string */ + if (!strcmp(data, "stop")) + pktgen_stop_all_threads(pn); else if (!strcmp(data, "start")) pktgen_run_all_threads(pn); - else if (!strcmp(data, "reset")) pktgen_reset_all_threads(pn); - else - pr_warning("Unknown command: %s\n", data); - - err = count; + return -EINVAL; -out: - return err; + return count; } static int pgctrl_open(struct inode *inode, struct file *file) { - return single_open(file, pgctrl_show, PDE_DATA(inode)); + return single_open(file, pgctrl_show, pde_data(inode)); } -static const struct file_operations pktgen_fops = { - .owner = THIS_MODULE, - .open = pgctrl_open, - .read = seq_read, - .llseek = seq_lseek, - .write = pgctrl_write, - .release = single_release, +static const struct proc_ops pktgen_proc_ops = { + .proc_open = pgctrl_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = pgctrl_write, + .proc_release = single_release, }; static int pktgen_if_show(struct seq_file *seq, void *v) { const struct pktgen_dev *pkt_dev = seq->private; ktime_t stopped; + unsigned int i; u64 idle; seq_printf(seq, @@ -531,6 +574,16 @@ static int pktgen_if_show(struct seq_file *seq, void *v) (unsigned long long)pkt_dev->count, pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); + if (pkt_dev->n_imix_entries > 0) { + seq_puts(seq, " imix_weights: "); + for (i = 0; i < pkt_dev->n_imix_entries; i++) { + seq_printf(seq, "%llu,%llu ", + pkt_dev->imix_entries[i].size, + pkt_dev->imix_entries[i].weight); + } + seq_puts(seq, "\n"); + } + seq_printf(seq, " frags: %d delay: %llu clone_skb: %d ifname: %s\n", pkt_dev->nfrags, (unsigned long long) pkt_dev->delay, @@ -561,7 +614,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) " dst_min: %s dst_max: %s\n", pkt_dev->dst_min, pkt_dev->dst_max); seq_printf(seq, - " src_min: %s src_max: %s\n", + " src_min: %s src_max: %s\n", pkt_dev->src_min, pkt_dev->src_max); } @@ -571,12 +624,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v) is_zero_ether_addr(pkt_dev->src_mac) ? pkt_dev->odev->dev_addr : pkt_dev->src_mac); - seq_printf(seq, "dst_mac: "); + seq_puts(seq, "dst_mac: "); seq_printf(seq, "%pM\n", pkt_dev->dst_mac); seq_printf(seq, - " udp_src_min: %d udp_src_max: %d" - " udp_dst_min: %d udp_dst_max: %d\n", + " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, pkt_dev->udp_dst_max); @@ -585,8 +637,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->src_mac_count, pkt_dev->dst_mac_count); if (pkt_dev->nr_labels) { - unsigned int i; - seq_printf(seq, " mpls: "); + seq_puts(seq, " mpls: "); for (i = 0; i < pkt_dev->nr_labels; i++) seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), i == pkt_dev->nr_labels-1 ? "\n" : ", "); @@ -608,64 +659,34 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->traffic_class) seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); + if (pkt_dev->burst > 1) + seq_printf(seq, " burst: %d\n", pkt_dev->burst); + if (pkt_dev->node >= 0) seq_printf(seq, " node: %d\n", pkt_dev->node); - seq_printf(seq, " Flags: "); + if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) + seq_puts(seq, " xmit_mode: netif_receive\n"); + else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) + seq_puts(seq, " xmit_mode: xmit_queue\n"); - if (pkt_dev->flags & F_IPV6) - seq_printf(seq, "IPV6 "); - - if (pkt_dev->flags & F_IPSRC_RND) - seq_printf(seq, "IPSRC_RND "); - - if (pkt_dev->flags & F_IPDST_RND) - seq_printf(seq, "IPDST_RND "); - - if (pkt_dev->flags & F_TXSIZE_RND) - seq_printf(seq, "TXSIZE_RND "); - - if (pkt_dev->flags & F_UDPSRC_RND) - seq_printf(seq, "UDPSRC_RND "); - - if (pkt_dev->flags & F_UDPDST_RND) - seq_printf(seq, "UDPDST_RND "); - - if (pkt_dev->flags & F_MPLS_RND) - seq_printf(seq, "MPLS_RND "); - - if (pkt_dev->flags & F_QUEUE_MAP_RND) - seq_printf(seq, "QUEUE_MAP_RND "); + seq_puts(seq, " Flags: "); - if (pkt_dev->flags & F_QUEUE_MAP_CPU) - seq_printf(seq, "QUEUE_MAP_CPU "); - - if (pkt_dev->cflows) { - if (pkt_dev->flags & F_FLOW_SEQ) - seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ - else - seq_printf(seq, "FLOW_RND "); - } + for (i = 0; i < NR_PKT_FLAGS; i++) { + if (i == FLOW_SEQ_SHIFT) + if (!pkt_dev->cflows) + continue; + if (pkt_dev->flags & (1 << i)) { + seq_printf(seq, "%s ", pkt_flag_names[i]); #ifdef CONFIG_XFRM - if (pkt_dev->flags & F_IPSEC_ON) - seq_printf(seq, "IPSEC "); + if (i == IPSEC_SHIFT && pkt_dev->spi) + seq_printf(seq, "spi:%u ", pkt_dev->spi); #endif - - if (pkt_dev->flags & F_MACSRC_RND) - seq_printf(seq, "MACSRC_RND "); - - if (pkt_dev->flags & F_MACDST_RND) - seq_printf(seq, "MACDST_RND "); - - if (pkt_dev->flags & F_VID_RND) - seq_printf(seq, "VID_RND "); - - if (pkt_dev->flags & F_SVID_RND) - seq_printf(seq, "SVID_RND "); - - if (pkt_dev->flags & F_NODE) - seq_printf(seq, "NODE_ALLOC "); + } else if (i == FLOW_SEQ_SHIFT) { + seq_puts(seq, "FLOW_RND "); + } + } seq_puts(seq, "\n"); @@ -679,6 +700,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v) (unsigned long long)pkt_dev->sofar, (unsigned long long)pkt_dev->errors); + if (pkt_dev->n_imix_entries > 0) { + int i; + + seq_puts(seq, " imix_size_counts: "); + for (i = 0; i < pkt_dev->n_imix_entries; i++) { + seq_printf(seq, "%llu,%llu ", + pkt_dev->imix_entries[i].size, + pkt_dev->imix_entries[i].count_so_far); + } + seq_puts(seq, "\n"); + } + seq_printf(seq, " started: %lluus stopped: %lluus idle: %lluus\n", (unsigned long long) ktime_to_us(pkt_dev->started_at), @@ -708,40 +741,43 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->result[0]) seq_printf(seq, "Result: %s\n", pkt_dev->result); else - seq_printf(seq, "Result: Idle\n"); + seq_puts(seq, "Result: Idle\n"); return 0; } -static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, - __u32 *num) +static ssize_t hex32_arg(const char __user *user_buffer, size_t maxlen, + __u32 *num) { - int i = 0; + size_t i = 0; + *num = 0; for (; i < maxlen; i++) { int value; char c; - *num <<= 4; + if (get_user(c, &user_buffer[i])) return -EFAULT; value = hex_to_bin(c); - if (value >= 0) + if (value >= 0) { + *num <<= 4; *num |= value; - else + } else { break; + } } return i; } -static int count_trail_chars(const char __user * user_buffer, - unsigned int maxlen) +static ssize_t count_trail_chars(const char __user *user_buffer, size_t maxlen) { - int i; + size_t i; for (i = 0; i < maxlen; i++) { char c; + if (get_user(c, &user_buffer[i])) return -EFAULT; switch (c) { @@ -760,14 +796,15 @@ done: return i; } -static long num_arg(const char __user *user_buffer, unsigned long maxlen, - unsigned long *num) +static ssize_t num_arg(const char __user *user_buffer, size_t maxlen, + unsigned long *num) { - int i; + size_t i; *num = 0; for (i = 0; i < maxlen; i++) { char c; + if (get_user(c, &user_buffer[i])) return -EFAULT; if ((c >= '0') && (c <= '9')) { @@ -779,12 +816,13 @@ static long num_arg(const char __user *user_buffer, unsigned long maxlen, return i; } -static int strn_len(const char __user * user_buffer, unsigned int maxlen) +static ssize_t strn_len(const char __user *user_buffer, size_t maxlen) { - int i; + size_t i; for (i = 0; i < maxlen; i++) { char c; + if (get_user(c, &user_buffer[i])) return -EFAULT; switch (c) { @@ -793,8 +831,8 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen) case '\r': case '\t': case ' ': + case '=': goto done_str; - break; default: break; } @@ -803,66 +841,176 @@ done_str: return i; } -static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev) +/* Parses imix entries from user buffer. + * The user buffer should consist of imix entries separated by spaces + * where each entry consists of size and weight delimited by commas. + * "size1,weight_1 size2,weight_2 ... size_n,weight_n" for example. + */ +static ssize_t get_imix_entries(const char __user *buffer, + size_t maxlen, + struct pktgen_dev *pkt_dev) +{ + size_t i = 0, max; + ssize_t len; + char c; + + pkt_dev->n_imix_entries = 0; + + do { + unsigned long weight; + unsigned long size; + + if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES) + return -E2BIG; + + if (i >= maxlen) + return -EINVAL; + + max = min(10, maxlen - i); + len = num_arg(&buffer[i], max, &size); + if (len < 0) + return len; + i += len; + if (i >= maxlen) + return -EINVAL; + if (get_user(c, &buffer[i])) + return -EFAULT; + /* Check for comma between size_i and weight_i */ + if (c != ',') + return -EINVAL; + i++; + if (i >= maxlen) + return -EINVAL; + + if (size < 14 + 20 + 8) + size = 14 + 20 + 8; + + max = min(10, maxlen - i); + len = num_arg(&buffer[i], max, &weight); + if (len < 0) + return len; + if (weight <= 0) + return -EINVAL; + + pkt_dev->imix_entries[pkt_dev->n_imix_entries].size = size; + pkt_dev->imix_entries[pkt_dev->n_imix_entries].weight = weight; + + i += len; + pkt_dev->n_imix_entries++; + + if (i >= maxlen) + break; + if (get_user(c, &buffer[i])) + return -EFAULT; + i++; + } while (c == ' '); + + return i; +} + +static ssize_t get_labels(const char __user *buffer, + size_t maxlen, struct pktgen_dev *pkt_dev) { unsigned int n = 0; + size_t i = 0, max; + ssize_t len; char c; - ssize_t i = 0; - int len; pkt_dev->nr_labels = 0; do { __u32 tmp; - len = hex32_arg(&buffer[i], 8, &tmp); - if (len <= 0) + + if (n >= MAX_MPLS_LABELS) + return -E2BIG; + + if (i >= maxlen) + return -EINVAL; + + max = min(8, maxlen - i); + len = hex32_arg(&buffer[i], max, &tmp); + if (len < 0) return len; + + /* return empty list in case of invalid input or zero value */ + if (len == 0 || tmp == 0) + return maxlen; + pkt_dev->labels[n] = htonl(tmp); if (pkt_dev->labels[n] & MPLS_STACK_BOTTOM) pkt_dev->flags |= F_MPLS_RND; i += len; + n++; + if (i >= maxlen) + break; if (get_user(c, &buffer[i])) return -EFAULT; i++; - n++; - if (n >= MAX_MPLS_LABELS) - return -E2BIG; } while (c == ','); pkt_dev->nr_labels = n; return i; } +static __u32 pktgen_read_flag(const char *f, bool *disable) +{ + __u32 i; + + if (f[0] == '!') { + *disable = true; + f++; + } + + for (i = 0; i < NR_PKT_FLAGS; i++) { + if (!IS_ENABLED(CONFIG_XFRM) && i == IPSEC_SHIFT) + continue; + + /* allow only disabling ipv6 flag */ + if (!*disable && i == IPV6_SHIFT) + continue; + + if (strcmp(f, pkt_flag_names[i]) == 0) + return 1 << i; + } + + if (strcmp(f, "FLOW_RND") == 0) { + *disable = !*disable; + return F_FLOW_SEQ; + } + + return 0; +} + static ssize_t pktgen_if_write(struct file *file, - const char __user * user_buffer, size_t count, - loff_t * offset) + const char __user *user_buffer, size_t count, + loff_t *offset) { struct seq_file *seq = file->private_data; struct pktgen_dev *pkt_dev = seq->private; - int i, max, len; + size_t i, max; + ssize_t len; char name[16], valstr[32]; unsigned long value = 0; char *pg_result = NULL; - int tmp = 0; char buf[128]; pg_result = &(pkt_dev->result[0]); if (count < 1) { - pr_warning("wrong command format\n"); + pr_warn("wrong command format\n"); return -EINVAL; } max = count; - tmp = count_trail_chars(user_buffer, max); - if (tmp < 0) { - pr_warning("illegal format\n"); - return tmp; + len = count_trail_chars(user_buffer, max); + if (len < 0) { + pr_warn("illegal format\n"); + return len; } - i = tmp; + i = len; /* Read variable name */ - - len = strn_len(&user_buffer[i], sizeof(name) - 1); + max = min(sizeof(name) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -879,45 +1027,46 @@ static ssize_t pktgen_if_write(struct file *file, i += len; if (debug) { - size_t copy = min_t(size_t, count, 1023); - char tb[copy + 1]; - if (copy_from_user(tb, user_buffer, copy)) - return -EFAULT; - tb[copy] = 0; - pr_debug("%s,%lu buffer -:%s:-\n", - name, (unsigned long)count, tb); + size_t copy = min_t(size_t, count + 1, 1024); + char *tp = strndup_user(user_buffer, copy); + + if (IS_ERR(tp)) + return PTR_ERR(tp); + + pr_debug("%s,%zu buffer -:%s:-\n", name, count, tp); + kfree(tp); } if (!strcmp(name, "min_pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value < 14 + 20 + 8) value = 14 + 20 + 8; if (value != pkt_dev->min_pkt_size) { pkt_dev->min_pkt_size = value; pkt_dev->cur_pkt_size = value; } - sprintf(pg_result, "OK: min_pkt_size=%u", + sprintf(pg_result, "OK: min_pkt_size=%d", pkt_dev->min_pkt_size); return count; } if (!strcmp(name, "max_pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value < 14 + 20 + 8) value = 14 + 20 + 8; if (value != pkt_dev->max_pkt_size) { pkt_dev->max_pkt_size = value; pkt_dev->cur_pkt_size = value; } - sprintf(pg_result, "OK: max_pkt_size=%u", + sprintf(pg_result, "OK: max_pkt_size=%d", pkt_dev->max_pkt_size); return count; } @@ -925,11 +1074,11 @@ static ssize_t pktgen_if_write(struct file *file, /* Shortcut for min = max */ if (!strcmp(name, "pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value < 14 + 20 + 8) value = 14 + 20 + 8; if (value != pkt_dev->min_pkt_size) { @@ -937,37 +1086,51 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->max_pkt_size = value; pkt_dev->cur_pkt_size = value; } - sprintf(pg_result, "OK: pkt_size=%u", pkt_dev->min_pkt_size); + sprintf(pg_result, "OK: pkt_size=%d", pkt_dev->min_pkt_size); + return count; + } + + if (!strcmp(name, "imix_weights")) { + if (pkt_dev->clone_skb > 0) + return -EINVAL; + + max = count - i; + len = get_imix_entries(&user_buffer[i], max, pkt_dev); + if (len < 0) + return len; + + fill_imix_distribution(pkt_dev); + return count; } if (!strcmp(name, "debug")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; debug = value; sprintf(pg_result, "OK: debug=%u", debug); return count; } if (!strcmp(name, "frags")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->nfrags = value; - sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags); + sprintf(pg_result, "OK: frags=%d", pkt_dev->nfrags); return count; } if (!strcmp(name, "delay")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value == 0x7FFFFFFF) pkt_dev->delay = ULLONG_MAX; else @@ -978,13 +1141,13 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "rate")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (!value) - return len; + return -EINVAL; pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value; if (debug) pr_info("Delay set at: %llu ns\n", pkt_dev->delay); @@ -993,13 +1156,13 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "ratep")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (!value) - return len; + return -EINVAL; pkt_dev->delay = NSEC_PER_SEC/value; if (debug) pr_info("Delay set at: %llu ns\n", pkt_dev->delay); @@ -1008,11 +1171,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "udp_src_min")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value != pkt_dev->udp_src_min) { pkt_dev->udp_src_min = value; pkt_dev->cur_udp_src = value; @@ -1021,11 +1184,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "udp_dst_min")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value != pkt_dev->udp_dst_min) { pkt_dev->udp_dst_min = value; pkt_dev->cur_udp_dst = value; @@ -1034,11 +1197,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "udp_src_max")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value != pkt_dev->udp_src_max) { pkt_dev->udp_src_max = value; pkt_dev->cur_udp_src = value; @@ -1047,11 +1210,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "udp_dst_max")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value != pkt_dev->udp_dst_max) { pkt_dev->udp_dst_max = value; pkt_dev->cur_udp_dst = value; @@ -1060,35 +1223,43 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "clone_skb")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; + /* clone_skb is not supported for netif_receive xmit_mode and + * IMIX mode. + */ if ((value > 0) && - (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) - return -ENOTSUPP; - i += len; + ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) || + !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + return -EOPNOTSUPP; + if (value > 0 && (pkt_dev->n_imix_entries > 0 || + !(pkt_dev->flags & F_SHARED))) + return -EINVAL; + pkt_dev->clone_skb = value; sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb); return count; } if (!strcmp(name, "count")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->count = value; sprintf(pg_result, "OK: count=%llu", (unsigned long long)pkt_dev->count); return count; } if (!strcmp(name, "src_mac_count")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (pkt_dev->src_mac_count != value) { pkt_dev->src_mac_count = value; pkt_dev->cur_src_mac_offset = 0; @@ -1098,11 +1269,11 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "dst_mac_count")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (pkt_dev->dst_mac_count != value) { pkt_dev->dst_mac_count = value; pkt_dev->cur_dst_mac_offset = 0; @@ -1111,12 +1282,30 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->dst_mac_count); return count; } - if (!strcmp(name, "node")) { - len = num_arg(&user_buffer[i], 10, &value); + if (!strcmp(name, "burst")) { + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; + if ((value > 1) && + ((pkt_dev->xmit_mode == M_QUEUE_XMIT) || + ((pkt_dev->xmit_mode == M_START_XMIT) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))))) + return -EOPNOTSUPP; + + if (value > 1 && !(pkt_dev->flags & F_SHARED)) + return -EINVAL; + + pkt_dev->burst = value < 1 ? 1 : value; + sprintf(pg_result, "OK: burst=%u", pkt_dev->burst); + return count; + } + if (!strcmp(name, "node")) { + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); + if (len < 0) + return len; if (node_possible(value)) { pkt_dev->node = value; @@ -1125,122 +1314,105 @@ static ssize_t pktgen_if_write(struct file *file, put_page(pkt_dev->page); pkt_dev->page = NULL; } - } - else + } else { sprintf(pg_result, "ERROR: node not possible"); + } return count; } - if (!strcmp(name, "flag")) { + if (!strcmp(name, "xmit_mode")) { char f[32]; - memset(f, 0, 32); - len = strn_len(&user_buffer[i], sizeof(f) - 1); + + max = min(sizeof(f) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; + memset(f, 0, sizeof(f)); if (copy_from_user(f, &user_buffer[i], len)) return -EFAULT; - i += len; - if (strcmp(f, "IPSRC_RND") == 0) - pkt_dev->flags |= F_IPSRC_RND; - - else if (strcmp(f, "!IPSRC_RND") == 0) - pkt_dev->flags &= ~F_IPSRC_RND; - - else if (strcmp(f, "TXSIZE_RND") == 0) - pkt_dev->flags |= F_TXSIZE_RND; - - else if (strcmp(f, "!TXSIZE_RND") == 0) - pkt_dev->flags &= ~F_TXSIZE_RND; - else if (strcmp(f, "IPDST_RND") == 0) - pkt_dev->flags |= F_IPDST_RND; + if (strcmp(f, "start_xmit") == 0) { + pkt_dev->xmit_mode = M_START_XMIT; + } else if (strcmp(f, "netif_receive") == 0) { + /* clone_skb set earlier, not supported in this mode */ + if (pkt_dev->clone_skb > 0) + return -EOPNOTSUPP; - else if (strcmp(f, "!IPDST_RND") == 0) - pkt_dev->flags &= ~F_IPDST_RND; + pkt_dev->xmit_mode = M_NETIF_RECEIVE; - else if (strcmp(f, "UDPSRC_RND") == 0) - pkt_dev->flags |= F_UDPSRC_RND; - - else if (strcmp(f, "!UDPSRC_RND") == 0) - pkt_dev->flags &= ~F_UDPSRC_RND; - - else if (strcmp(f, "UDPDST_RND") == 0) - pkt_dev->flags |= F_UDPDST_RND; - - else if (strcmp(f, "!UDPDST_RND") == 0) - pkt_dev->flags &= ~F_UDPDST_RND; - - else if (strcmp(f, "MACSRC_RND") == 0) - pkt_dev->flags |= F_MACSRC_RND; - - else if (strcmp(f, "!MACSRC_RND") == 0) - pkt_dev->flags &= ~F_MACSRC_RND; - - else if (strcmp(f, "MACDST_RND") == 0) - pkt_dev->flags |= F_MACDST_RND; - - else if (strcmp(f, "!MACDST_RND") == 0) - pkt_dev->flags &= ~F_MACDST_RND; - - else if (strcmp(f, "MPLS_RND") == 0) - pkt_dev->flags |= F_MPLS_RND; - - else if (strcmp(f, "!MPLS_RND") == 0) - pkt_dev->flags &= ~F_MPLS_RND; - - else if (strcmp(f, "VID_RND") == 0) - pkt_dev->flags |= F_VID_RND; - - else if (strcmp(f, "!VID_RND") == 0) - pkt_dev->flags &= ~F_VID_RND; - - else if (strcmp(f, "SVID_RND") == 0) - pkt_dev->flags |= F_SVID_RND; - - else if (strcmp(f, "!SVID_RND") == 0) - pkt_dev->flags &= ~F_SVID_RND; - - else if (strcmp(f, "FLOW_SEQ") == 0) - pkt_dev->flags |= F_FLOW_SEQ; - - else if (strcmp(f, "QUEUE_MAP_RND") == 0) - pkt_dev->flags |= F_QUEUE_MAP_RND; - - else if (strcmp(f, "!QUEUE_MAP_RND") == 0) - pkt_dev->flags &= ~F_QUEUE_MAP_RND; + /* make sure new packet is allocated every time + * pktgen_xmit() is called + */ + pkt_dev->last_ok = 1; + } else if (strcmp(f, "queue_xmit") == 0) { + pkt_dev->xmit_mode = M_QUEUE_XMIT; + pkt_dev->last_ok = 1; + } else { + sprintf(pg_result, + "xmit_mode -:%s:- unknown\nAvailable modes: %s", + f, "start_xmit, netif_receive\n"); + return count; + } + sprintf(pg_result, "OK: xmit_mode=%s", f); + return count; + } + if (!strcmp(name, "flag")) { + bool disable = false; + __u32 flag; + char f[32]; + char *end; - else if (strcmp(f, "QUEUE_MAP_CPU") == 0) - pkt_dev->flags |= F_QUEUE_MAP_CPU; + max = min(sizeof(f) - 1, count - i); + len = strn_len(&user_buffer[i], max); + if (len < 0) + return len; - else if (strcmp(f, "!QUEUE_MAP_CPU") == 0) - pkt_dev->flags &= ~F_QUEUE_MAP_CPU; -#ifdef CONFIG_XFRM - else if (strcmp(f, "IPSEC") == 0) - pkt_dev->flags |= F_IPSEC_ON; -#endif + memset(f, 0, 32); + if (copy_from_user(f, &user_buffer[i], len)) + return -EFAULT; - else if (strcmp(f, "!IPV6") == 0) - pkt_dev->flags &= ~F_IPV6; + flag = pktgen_read_flag(f, &disable); + if (flag) { + if (disable) { + /* If "clone_skb", or "burst" parameters are + * configured, it means that the skb still + * needs to be referenced by the pktgen, so + * the skb must be shared. + */ + if (flag == F_SHARED && (pkt_dev->clone_skb || + pkt_dev->burst > 1)) + return -EINVAL; + pkt_dev->flags &= ~flag; + } else { + pkt_dev->flags |= flag; + } - else if (strcmp(f, "NODE_ALLOC") == 0) - pkt_dev->flags |= F_NODE; + sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); + return count; + } - else if (strcmp(f, "!NODE_ALLOC") == 0) - pkt_dev->flags &= ~F_NODE; + /* Unknown flag */ + end = pkt_dev->result + sizeof(pkt_dev->result); + pg_result += sprintf(pg_result, + "Flag -:%s:- unknown\n" + "Available flags, (prepend ! to un-set flag):\n", f); - else { - sprintf(pg_result, - "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", - f, - "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n"); - return count; + for (int n = 0; n < NR_PKT_FLAGS && pg_result < end; n++) { + if (!IS_ENABLED(CONFIG_XFRM) && n == IPSEC_SHIFT) + continue; + pg_result += snprintf(pg_result, end - pg_result, + "%s, ", pkt_flag_names[n]); + } + if (!WARN_ON_ONCE(pg_result >= end)) { + /* Remove the comma and whitespace at the end */ + *(pg_result - 2) = '\0'; } - sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); + return count; } if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1); + max = min(sizeof(pkt_dev->dst_min) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1248,41 +1420,39 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; if (strcmp(buf, pkt_dev->dst_min) != 0) { - memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min)); - strncpy(pkt_dev->dst_min, buf, len); + strscpy_pad(pkt_dev->dst_min, buf); pkt_dev->daddr_min = in_aton(pkt_dev->dst_min); pkt_dev->cur_daddr = pkt_dev->daddr_min; } if (debug) pr_debug("dst_min set to: %s\n", pkt_dev->dst_min); - i += len; + sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min); return count; } if (!strcmp(name, "dst_max")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1); + max = min(sizeof(pkt_dev->dst_max) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; - if (copy_from_user(buf, &user_buffer[i], len)) return -EFAULT; - buf[len] = 0; if (strcmp(buf, pkt_dev->dst_max) != 0) { - memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max)); - strncpy(pkt_dev->dst_max, buf, len); + strscpy_pad(pkt_dev->dst_max, buf); pkt_dev->daddr_max = in_aton(pkt_dev->dst_max); pkt_dev->cur_daddr = pkt_dev->daddr_max; } if (debug) pr_debug("dst_max set to: %s\n", pkt_dev->dst_max); - i += len; + sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max); return count; } if (!strcmp(name, "dst6")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); + max = min(sizeof(buf) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1300,12 +1470,12 @@ static ssize_t pktgen_if_write(struct file *file, if (debug) pr_debug("dst6 set to: %s\n", buf); - i += len; sprintf(pg_result, "OK: dst6=%s", buf); return count; } if (!strcmp(name, "dst6_min")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); + max = min(sizeof(buf) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1322,12 +1492,12 @@ static ssize_t pktgen_if_write(struct file *file, if (debug) pr_debug("dst6_min set to: %s\n", buf); - i += len; sprintf(pg_result, "OK: dst6_min=%s", buf); return count; } if (!strcmp(name, "dst6_max")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); + max = min(sizeof(buf) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1343,12 +1513,12 @@ static ssize_t pktgen_if_write(struct file *file, if (debug) pr_debug("dst6_max set to: %s\n", buf); - i += len; sprintf(pg_result, "OK: dst6_max=%s", buf); return count; } if (!strcmp(name, "src6")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); + max = min(sizeof(buf) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1366,12 +1536,12 @@ static ssize_t pktgen_if_write(struct file *file, if (debug) pr_debug("src6 set to: %s\n", buf); - i += len; sprintf(pg_result, "OK: src6=%s", buf); return count; } if (!strcmp(name, "src_min")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1); + max = min(sizeof(pkt_dev->src_min) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1379,19 +1549,19 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; if (strcmp(buf, pkt_dev->src_min) != 0) { - memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min)); - strncpy(pkt_dev->src_min, buf, len); + strscpy_pad(pkt_dev->src_min, buf); pkt_dev->saddr_min = in_aton(pkt_dev->src_min); pkt_dev->cur_saddr = pkt_dev->saddr_min; } if (debug) pr_debug("src_min set to: %s\n", pkt_dev->src_min); - i += len; + sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min); return count; } if (!strcmp(name, "src_max")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1); + max = min(sizeof(pkt_dev->src_max) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1399,19 +1569,19 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; if (strcmp(buf, pkt_dev->src_max) != 0) { - memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max)); - strncpy(pkt_dev->src_max, buf, len); + strscpy_pad(pkt_dev->src_max, buf); pkt_dev->saddr_max = in_aton(pkt_dev->src_max); pkt_dev->cur_saddr = pkt_dev->saddr_max; } if (debug) pr_debug("src_max set to: %s\n", pkt_dev->src_max); - i += len; + sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max); return count; } if (!strcmp(name, "dst_mac")) { - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); + max = min(sizeof(valstr) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1422,13 +1592,14 @@ static ssize_t pktgen_if_write(struct file *file, if (!mac_pton(valstr, pkt_dev->dst_mac)) return -EINVAL; /* Set up Dest MAC */ - memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN); + ether_addr_copy(&pkt_dev->hh[0], pkt_dev->dst_mac); sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac); return count; } if (!strcmp(name, "src_mac")) { - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); + max = min(sizeof(valstr) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1439,7 +1610,7 @@ static ssize_t pktgen_if_write(struct file *file, if (!mac_pton(valstr, pkt_dev->src_mac)) return -EINVAL; /* Set up Src MAC */ - memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN); + ether_addr_copy(&pkt_dev->hh[6], pkt_dev->src_mac); sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac); return count; @@ -1452,11 +1623,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "flows")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value > MAX_CFLOWS) value = MAX_CFLOWS; @@ -1464,35 +1635,46 @@ static ssize_t pktgen_if_write(struct file *file, sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows); return count; } +#ifdef CONFIG_XFRM + if (!strcmp(name, "spi")) { + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); + if (len < 0) + return len; + pkt_dev->spi = value; + sprintf(pg_result, "OK: spi=%u", pkt_dev->spi); + return count; + } +#endif if (!strcmp(name, "flowlen")) { - len = num_arg(&user_buffer[i], 10, &value); + max = min(10, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->lflow = value; sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow); return count; } if (!strcmp(name, "queue_map_min")) { - len = num_arg(&user_buffer[i], 5, &value); + max = min(5, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->queue_map_min = value; sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min); return count; } if (!strcmp(name, "queue_map_max")) { - len = num_arg(&user_buffer[i], 5, &value); + max = min(5, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->queue_map_max = value; sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max); return count; @@ -1501,10 +1683,11 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "mpls")) { unsigned int n, cnt; - len = get_labels(&user_buffer[i], pkt_dev); + max = count - i; + len = get_labels(&user_buffer[i], max, pkt_dev); if (len < 0) return len; - i += len; + cnt = sprintf(pg_result, "OK: mpls="); for (n = 0; n < pkt_dev->nr_labels; n++) cnt += sprintf(pg_result + cnt, @@ -1522,11 +1705,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "vlan_id")) { - len = num_arg(&user_buffer[i], 4, &value); + max = min(4, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if (value <= 4095) { pkt_dev->vlan_id = value; /* turn on VLAN */ @@ -1549,11 +1732,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "vlan_p")) { - len = num_arg(&user_buffer[i], 1, &value); + max = min(1, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 7) && (pkt_dev->vlan_id != 0xffff)) { pkt_dev->vlan_p = value; sprintf(pg_result, "OK: vlan_p=%u", pkt_dev->vlan_p); @@ -1564,11 +1747,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "vlan_cfi")) { - len = num_arg(&user_buffer[i], 1, &value); + max = min(1, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 1) && (pkt_dev->vlan_id != 0xffff)) { pkt_dev->vlan_cfi = value; sprintf(pg_result, "OK: vlan_cfi=%u", pkt_dev->vlan_cfi); @@ -1579,11 +1762,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "svlan_id")) { - len = num_arg(&user_buffer[i], 4, &value); + max = min(4, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 4095) && ((pkt_dev->vlan_id != 0xffff))) { pkt_dev->svlan_id = value; /* turn on SVLAN */ @@ -1606,11 +1789,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "svlan_p")) { - len = num_arg(&user_buffer[i], 1, &value); + max = min(1, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 7) && (pkt_dev->svlan_id != 0xffff)) { pkt_dev->svlan_p = value; sprintf(pg_result, "OK: svlan_p=%u", pkt_dev->svlan_p); @@ -1621,11 +1804,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "svlan_cfi")) { - len = num_arg(&user_buffer[i], 1, &value); + max = min(1, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; if ((value <= 1) && (pkt_dev->svlan_id != 0xffff)) { pkt_dev->svlan_cfi = value; sprintf(pg_result, "OK: svlan_cfi=%u", pkt_dev->svlan_cfi); @@ -1636,12 +1819,13 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "tos")) { - __u32 tmp_value = 0; - len = hex32_arg(&user_buffer[i], 2, &tmp_value); + __u32 tmp_value; + + max = min(2, count - i); + len = hex32_arg(&user_buffer[i], max, &tmp_value); if (len < 0) return len; - i += len; if (len == 2) { pkt_dev->tos = tmp_value; sprintf(pg_result, "OK: tos=0x%02x", pkt_dev->tos); @@ -1652,12 +1836,13 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "traffic_class")) { - __u32 tmp_value = 0; - len = hex32_arg(&user_buffer[i], 2, &tmp_value); + __u32 tmp_value; + + max = min(2, count - i); + len = hex32_arg(&user_buffer[i], max, &tmp_value); if (len < 0) return len; - i += len; if (len == 2) { pkt_dev->traffic_class = tmp_value; sprintf(pg_result, "OK: traffic_class=0x%02x", pkt_dev->traffic_class); @@ -1668,11 +1853,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "skb_priority")) { - len = num_arg(&user_buffer[i], 9, &value); + max = min(9, count - i); + len = num_arg(&user_buffer[i], max, &value); if (len < 0) return len; - i += len; pkt_dev->skb_priority = value; sprintf(pg_result, "OK: skb_priority=%i", pkt_dev->skb_priority); @@ -1685,16 +1870,15 @@ static ssize_t pktgen_if_write(struct file *file, static int pktgen_if_open(struct inode *inode, struct file *file) { - return single_open(file, pktgen_if_show, PDE_DATA(inode)); + return single_open(file, pktgen_if_show, pde_data(inode)); } -static const struct file_operations pktgen_if_fops = { - .owner = THIS_MODULE, - .open = pktgen_if_open, - .read = seq_read, - .llseek = seq_lseek, - .write = pktgen_if_write, - .release = single_release, +static const struct proc_ops pktgen_if_proc_ops = { + .proc_open = pktgen_if_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = pktgen_if_write, + .proc_release = single_release, }; static int pktgen_thread_show(struct seq_file *seq, void *v) @@ -1704,36 +1888,37 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); - seq_printf(seq, "Running: "); + seq_puts(seq, "Running: "); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) if (pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odevname); - seq_printf(seq, "\nStopped: "); + seq_puts(seq, "\nStopped: "); - list_for_each_entry(pkt_dev, &t->if_list, list) + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) if (!pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odevname); if (t->result[0]) seq_printf(seq, "\nResult: %s\n", t->result); else - seq_printf(seq, "\nResult: NA\n"); + seq_puts(seq, "\nResult: NA\n"); - if_unlock(t); + rcu_read_unlock(); return 0; } static ssize_t pktgen_thread_write(struct file *file, - const char __user * user_buffer, - size_t count, loff_t * offset) + const char __user *user_buffer, + size_t count, loff_t *offset) { struct seq_file *seq = file->private_data; struct pktgen_thread *t = seq->private; - int i, max, len, ret; + size_t i, max; + ssize_t len, ret; char name[40]; char *pg_result; @@ -1750,8 +1935,8 @@ static ssize_t pktgen_thread_write(struct file *file, i = len; /* Read variable name */ - - len = strn_len(&user_buffer[i], sizeof(name) - 1); + max = min(sizeof(name) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) return len; @@ -1780,15 +1965,17 @@ static ssize_t pktgen_thread_write(struct file *file, if (!strcmp(name, "add_device")) { char f[32]; + memset(f, 0, 32); - len = strn_len(&user_buffer[i], sizeof(f) - 1); + max = min(sizeof(f) - 1, count - i); + len = strn_len(&user_buffer[i], max); if (len < 0) { ret = len; goto out; } if (copy_from_user(f, &user_buffer[i], len)) return -EFAULT; - i += len; + mutex_lock(&pktgen_thread_lock); ret = pktgen_add_device(t, f); mutex_unlock(&pktgen_thread_lock); @@ -1823,16 +2010,15 @@ out: static int pktgen_thread_open(struct inode *inode, struct file *file) { - return single_open(file, pktgen_thread_show, PDE_DATA(inode)); + return single_open(file, pktgen_thread_show, pde_data(inode)); } -static const struct file_operations pktgen_thread_fops = { - .owner = THIS_MODULE, - .open = pktgen_thread_open, - .read = seq_read, - .llseek = seq_lseek, - .write = pktgen_thread_write, - .release = single_release, +static const struct proc_ops pktgen_thread_proc_ops = { + .proc_open = pktgen_thread_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = pktgen_thread_write, + .proc_release = single_release, }; /* Think find or remove for NN */ @@ -1847,10 +2033,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn, pkt_dev = pktgen_find_dev(t, ifname, exact); if (pkt_dev) { if (remove) { - if_lock(t); pkt_dev->removal_mark = 1; t->control |= T_REMDEV; - if_unlock(t); } break; } @@ -1897,9 +2081,12 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d { struct pktgen_thread *t; + mutex_lock(&pktgen_thread_lock); + list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; + if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; @@ -1908,14 +2095,16 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d pkt_dev->entry = proc_create_data(dev->name, 0600, pn->proc_dir, - &pktgen_if_fops, + &pktgen_if_proc_ops, pkt_dev); if (!pkt_dev->entry) pr_err("can't move proc entry for '%s'\n", dev->name); break; } + if_unlock(t); } + mutex_unlock(&pktgen_thread_lock); } static int pktgen_device_event(struct notifier_block *unused, @@ -1973,7 +2162,7 @@ static int pktgen_setup_dev(const struct pktgen_net *pn, /* Clean old setups */ if (pkt_dev->odev) { - dev_put(pkt_dev->odev); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } @@ -1983,14 +2172,15 @@ static int pktgen_setup_dev(const struct pktgen_net *pn, return -ENODEV; } - if (odev->type != ARPHRD_ETHER) { - pr_err("not an ethernet device: \"%s\"\n", ifname); + if (odev->type != ARPHRD_ETHER && odev->type != ARPHRD_LOOPBACK) { + pr_err("not an ethernet or loopback device: \"%s\"\n", ifname); err = -EINVAL; } else if (!netif_running(odev)) { pr_err("device is down: \"%s\"\n", ifname); err = -ENETDOWN; } else { pkt_dev->odev = odev; + netdev_tracker_alloc(odev, &pkt_dev->dev_tracker, GFP_KERNEL); return 0; } @@ -2016,25 +2206,25 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) ntxq = pkt_dev->odev->real_num_tx_queues; if (ntxq <= pkt_dev->queue_map_min) { - pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", - pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, - pkt_dev->odevname); + pr_warn("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, + pkt_dev->odevname); pkt_dev->queue_map_min = (ntxq ?: 1) - 1; } if (pkt_dev->queue_map_max >= ntxq) { - pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", - pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, - pkt_dev->odevname); + pr_warn("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, + pkt_dev->odevname); pkt_dev->queue_map_max = (ntxq ?: 1) - 1; } /* Default to the interface's mac if not explicitly set. */ if (is_zero_ether_addr(pkt_dev->src_mac)) - memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN); + ether_addr_copy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr); /* Set up Dest MAC */ - memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); + ether_addr_copy(&(pkt_dev->hh[0]), pkt_dev->dst_mac); if (pkt_dev->flags & F_IPV6) { int i, set = 0, err = 1; @@ -2047,7 +2237,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) + pkt_dev->pkt_overhead; } - for (i = 0; i < IN6_ADDR_HSIZE; i++) + for (i = 0; i < sizeof(struct in6_addr); i++) if (pkt_dev->cur_in6_saddr.s6_addr[i]) { set = 1; break; @@ -2098,9 +2288,11 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) rcu_read_lock(); in_dev = __in_dev_get_rcu(pkt_dev->odev); if (in_dev) { - if (in_dev->ifa_list) { - pkt_dev->saddr_min = - in_dev->ifa_list->ifa_address; + const struct in_ifaddr *ifa; + + ifa = rcu_dereference(in_dev->ifa_list); + if (ifa) { + pkt_dev->saddr_min = ifa->ifa_address; pkt_dev->saddr_max = pkt_dev->saddr_min; } } @@ -2134,14 +2326,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) s64 remaining; struct hrtimer_sleeper t; - hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_setup_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_set_expires(&t.timer, spin_until); remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); - if (remaining <= 0) { - pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); - return; - } + if (remaining <= 0) + goto out; start_time = ktime_get(); if (remaining < 100000) { @@ -2150,13 +2340,9 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) end_time = ktime_get(); } while (ktime_compare(end_time, spin_until) < 0); } else { - /* see do_nanosleep */ - hrtimer_init_sleeper(&t, current); do { set_current_state(TASK_INTERRUPTIBLE); - hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); - if (!hrtimer_active(&t.timer)) - t.task = NULL; + hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_ABS); if (likely(t.task)) schedule(); @@ -2168,7 +2354,9 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) } pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); +out: pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); + destroy_hrtimer_on_stack(&t.timer); } static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) @@ -2198,7 +2386,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) pkt_dev->curfl = 0; /*reset */ } } else { - flow = prandom_u32() % pkt_dev->cflows; + flow = get_random_u32_below(pkt_dev->cflows); pkt_dev->curfl = flow; if (pkt_dev->flows[flow].count > pkt_dev->lflow) { @@ -2211,23 +2399,32 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) } -#ifdef CONFIG_XFRM /* If there was already an IPSEC SA, we keep it as is, else * we go look for it ... -*/ + */ #define DUMMY_MARK 0 static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) { +#ifdef CONFIG_XFRM struct xfrm_state *x = pkt_dev->flows[flow].x; struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); + if (!x) { - /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find(pn->net, DUMMY_MARK, - (xfrm_address_t *)&pkt_dev->cur_daddr, - (xfrm_address_t *)&pkt_dev->cur_saddr, - AF_INET, - pkt_dev->ipsmode, - pkt_dev->ipsproto, 0); + + if (pkt_dev->spi) { + /* We need as quick as possible to find the right SA + * Searching with minimum criteria to achieve, this. + */ + x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET); + } else { + /* slow path: we don't already have xfrm_state */ + x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0, + (xfrm_address_t *)&pkt_dev->cur_daddr, + (xfrm_address_t *)&pkt_dev->cur_saddr, + AF_INET, + pkt_dev->ipsmode, + pkt_dev->ipsproto, 0); + } if (x) { pkt_dev->flows[flow].x = x; set_pkt_overhead(pkt_dev); @@ -2235,21 +2432,19 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) } } -} #endif +} static void set_cur_queue_map(struct pktgen_dev *pkt_dev) { - if (pkt_dev->flags & F_QUEUE_MAP_CPU) pkt_dev->cur_queue_map = smp_processor_id(); else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) { __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { - t = prandom_u32() % - (pkt_dev->queue_map_max - - pkt_dev->queue_map_min + 1) - + pkt_dev->queue_map_min; + t = get_random_u32_inclusive(pkt_dev->queue_map_min, + pkt_dev->queue_map_max); } else { t = pkt_dev->cur_queue_map + 1; if (t > pkt_dev->queue_map_max) @@ -2278,7 +2473,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 tmp; if (pkt_dev->flags & F_MACSRC_RND) - mc = prandom_u32() % pkt_dev->src_mac_count; + mc = get_random_u32_below(pkt_dev->src_mac_count); else { mc = pkt_dev->cur_src_mac_offset++; if (pkt_dev->cur_src_mac_offset >= @@ -2304,7 +2499,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) __u32 tmp; if (pkt_dev->flags & F_MACDST_RND) - mc = prandom_u32() % pkt_dev->dst_mac_count; + mc = get_random_u32_below(pkt_dev->dst_mac_count); else { mc = pkt_dev->cur_dst_mac_offset++; @@ -2328,26 +2523,26 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->flags & F_MPLS_RND) { unsigned int i; + for (i = 0; i < pkt_dev->nr_labels; i++) if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) pkt_dev->labels[i] = MPLS_STACK_BOTTOM | - ((__force __be32)prandom_u32() & + ((__force __be32)get_random_u32() & htonl(0x000fffff)); } if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) { - pkt_dev->vlan_id = prandom_u32() & (4096 - 1); + pkt_dev->vlan_id = get_random_u32_below(4096); } if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) { - pkt_dev->svlan_id = prandom_u32() & (4096 - 1); + pkt_dev->svlan_id = get_random_u32_below(4096); } if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) { if (pkt_dev->flags & F_UDPSRC_RND) - pkt_dev->cur_udp_src = prandom_u32() % - (pkt_dev->udp_src_max - pkt_dev->udp_src_min) - + pkt_dev->udp_src_min; + pkt_dev->cur_udp_src = get_random_u32_inclusive(pkt_dev->udp_src_min, + pkt_dev->udp_src_max - 1); else { pkt_dev->cur_udp_src++; @@ -2358,9 +2553,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) { if (pkt_dev->flags & F_UDPDST_RND) { - pkt_dev->cur_udp_dst = prandom_u32() % - (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min) - + pkt_dev->udp_dst_min; + pkt_dev->cur_udp_dst = get_random_u32_inclusive(pkt_dev->udp_dst_min, + pkt_dev->udp_dst_max - 1); } else { pkt_dev->cur_udp_dst++; if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max) @@ -2374,8 +2568,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) imx = ntohl(pkt_dev->saddr_max); if (imn < imx) { __u32 t; + if (pkt_dev->flags & F_IPSRC_RND) - t = prandom_u32() % (imx - imn) + imn; + t = get_random_u32_inclusive(imn, imx - 1); else { t = ntohl(pkt_dev->cur_saddr); t++; @@ -2394,11 +2589,11 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (imn < imx) { __u32 t; __be32 s; + if (pkt_dev->flags & F_IPDST_RND) { do { - t = prandom_u32() % - (imx - imn) + imn; + t = get_random_u32_inclusive(imn, imx - 1); s = htonl(t); } while (ipv4_is_loopback(s) || ipv4_is_multicast(s) || @@ -2419,10 +2614,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].flags |= F_INIT; pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; -#ifdef CONFIG_XFRM - if (pkt_dev->flags & F_IPSEC_ON) + if (pkt_dev->flags & F_IPSEC) get_ipsec_sa(pkt_dev, flow); -#endif pkt_dev->nflows++; } } @@ -2435,7 +2628,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for (i = 0; i < 4; i++) { pkt_dev->cur_in6_daddr.s6_addr32[i] = - (((__force __be32)prandom_u32() | + (((__force __be32)get_random_u32() | pkt_dev->min_in6_daddr.s6_addr32[i]) & pkt_dev->max_in6_daddr.s6_addr32[i]); } @@ -2444,16 +2637,24 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) { __u32 t; + if (pkt_dev->flags & F_TXSIZE_RND) { - t = prandom_u32() % - (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size) - + pkt_dev->min_pkt_size; + t = get_random_u32_inclusive(pkt_dev->min_pkt_size, + pkt_dev->max_pkt_size - 1); } else { t = pkt_dev->cur_pkt_size + 1; if (t > pkt_dev->max_pkt_size) t = pkt_dev->min_pkt_size; } pkt_dev->cur_pkt_size = t; + } else if (pkt_dev->n_imix_entries > 0) { + struct imix_pkt *entry; + __u32 t = get_random_u32_below(IMIX_PRECISION); + __u8 entry_index = pkt_dev->imix_distribution[t]; + + entry = &pkt_dev->imix_entries[entry_index]; + entry->count_so_far++; + pkt_dev->cur_pkt_size = entry->size; } set_cur_queue_map(pkt_dev); @@ -2461,33 +2662,76 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].count++; } +static void fill_imix_distribution(struct pktgen_dev *pkt_dev) +{ + int cumulative_probabilites[MAX_IMIX_ENTRIES]; + int j = 0; + __u64 cumulative_prob = 0; + __u64 total_weight = 0; + int i = 0; + + for (i = 0; i < pkt_dev->n_imix_entries; i++) + total_weight += pkt_dev->imix_entries[i].weight; + + /* Fill cumulative_probabilites with sum of normalized probabilities */ + for (i = 0; i < pkt_dev->n_imix_entries - 1; i++) { + cumulative_prob += div64_u64(pkt_dev->imix_entries[i].weight * + IMIX_PRECISION, + total_weight); + cumulative_probabilites[i] = cumulative_prob; + } + cumulative_probabilites[pkt_dev->n_imix_entries - 1] = 100; + + for (i = 0; i < IMIX_PRECISION; i++) { + if (i == cumulative_probabilites[j]) + j++; + pkt_dev->imix_distribution[i] = j; + } +} #ifdef CONFIG_XFRM +static u32 pktgen_dst_metrics[RTAX_MAX + 1] = { + + [RTAX_HOPLIMIT] = 0x5, /* Set a static hoplimit */ +}; + static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) { struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; int err = 0; + struct net *net = dev_net(pkt_dev->odev); if (!x) return 0; /* XXX: we dont support tunnel mode for now until - * we resolve the dst issue */ - if (x->props.mode != XFRM_MODE_TRANSPORT) + * we resolve the dst issue + */ + if ((x->props.mode != XFRM_MODE_TRANSPORT) && (pkt_dev->spi == 0)) return 0; - spin_lock(&x->lock); - - err = x->outer_mode->output(x, skb); - if (err) + /* But when user specify an valid SPI, transformation + * supports both transport/tunnel mode + ESP/AH type. + */ + if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0)) + skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF; + + rcu_read_lock_bh(); + err = pktgen_xfrm_outer_mode_output(x, skb); + rcu_read_unlock_bh(); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); goto error; + } err = x->type->output(x, skb); - if (err) + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); goto error; - + } + spin_lock_bh(&x->lock); x->curlft.bytes += skb->len; x->curlft.packets++; + spin_unlock_bh(&x->lock); error: - spin_unlock(&x->lock); return err; } @@ -2496,8 +2740,10 @@ static void free_SAs(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows) { /* let go of the SAs if we have them */ int i; + for (i = 0; i < pkt_dev->cflows; i++) { struct xfrm_state *x = pkt_dev->flows[i].x; + if (x) { xfrm_state_put(x); pkt_dev->flows[i].x = NULL; @@ -2509,12 +2755,15 @@ static void free_SAs(struct pktgen_dev *pkt_dev) static int process_ipsec(struct pktgen_dev *pkt_dev, struct sk_buff *skb, __be16 protocol) { - if (pkt_dev->flags & F_IPSEC_ON) { + if (pkt_dev->flags & F_IPSEC) { struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; int nhead = 0; + if (x) { + struct ethhdr *eth; + struct iphdr *iph; int ret; - __u8 *eth; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2533,9 +2782,14 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, goto err; } /* restore ll */ - eth = (__u8 *) skb_push(skb, ETH_HLEN); - memcpy(eth, pkt_dev->hh, 12); - *(u16 *) ð[12] = protocol; + eth = skb_push(skb, ETH_HLEN); + memcpy(eth, pkt_dev->hh, 2 * ETH_ALEN); + eth->h_proto = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; @@ -2548,6 +2802,7 @@ err: static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) { unsigned int i; + for (i = 0; i < pkt_dev->nr_labels; i++) *mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM; @@ -2564,14 +2819,14 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, int datalen) { - struct timeval timestamp; + struct timespec64 timestamp; struct pktgen_hdr *pgh; - pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh)); + pgh = skb_put(skb, sizeof(*pgh)); datalen -= sizeof(*pgh); if (pkt_dev->nfrags <= 0) { - memset(skb_put(skb, datalen), 0, datalen); + skb_put_zero(skb, datalen); } else { int frags = pkt_dev->nfrags; int i, len; @@ -2582,13 +2837,12 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, frags = MAX_SKB_FRAGS; len = datalen - frags * PAGE_SIZE; if (len > 0) { - memset(skb_put(skb, len), 0, len); + skb_put_zero(skb, len); datalen = frags * PAGE_SIZE; } i = 0; - frag_len = (datalen/frags) < PAGE_SIZE ? - (datalen/frags) : PAGE_SIZE; + frag_len = min_t(int, datalen / frags, PAGE_SIZE); while (datalen > 0) { if (unlikely(!pkt_dev->page)) { int node = numa_node_id(); @@ -2600,14 +2854,16 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, break; } get_page(pkt_dev->page); - skb_frag_set_page(skb, i, pkt_dev->page); - skb_shinfo(skb)->frags[i].page_offset = 0; + /*last fragment, fill rest of data*/ if (i == (frags - 1)) - skb_frag_size_set(&skb_shinfo(skb)->frags[i], - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE)); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i], + pkt_dev->page, 0, + min(datalen, PAGE_SIZE)); else - skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i], + pkt_dev->page, 0, frag_len); + datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]); skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]); skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]); @@ -2622,19 +2878,32 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->pgh_magic = htonl(PKTGEN_MAGIC); pgh->seq_num = htonl(pkt_dev->seq_num); - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); + if (pkt_dev->flags & F_NO_TIMESTAMP) { + pgh->tv_sec = 0; + pgh->tv_usec = 0; + } else { + /* + * pgh->tv_sec wraps in y2106 when interpreted as unsigned + * as done by wireshark, or y2038 when interpreted as signed. + * This is probably harmless, but if anyone wants to improve + * it, we could introduce a variant that puts 64-bit nanoseconds + * into the respective header bytes. + * This would also be slightly faster to read. + */ + ktime_get_real_ts64(×tamp); + pgh->tv_sec = htonl(timestamp.tv_sec); + pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC); + } } static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, - struct pktgen_dev *pkt_dev, - unsigned int extralen) + struct pktgen_dev *pkt_dev) { + unsigned int extralen = LL_RESERVED_SPACE(dev); struct sk_buff *skb = NULL; - unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + - pkt_dev->pkt_overhead; + unsigned int size; + size = pkt_dev->cur_pkt_size + 64 + extralen + pkt_dev->pkt_overhead; if (pkt_dev->flags & F_NODE) { int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); @@ -2644,9 +2913,13 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, skb->dev = dev; } } else { - skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); + skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); } + /* the caller pre-fetches from skb->data and reserves for the mac hdr */ + if (likely(skb)) + skb_reserve(skb, extralen - 16); + return skb; } @@ -2678,51 +2951,50 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - datalen = (odev->hard_header_len + 16) & ~0xf; - - skb = pktgen_alloc_skb(odev, pkt_dev, datalen); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } prefetchw(skb->data); - skb_reserve(skb, datalen); + skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ - eth = (__u8 *) skb_push(skb, 14); - mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32)); + eth = skb_push(skb, 14); + mpls = skb_put(skb, pkt_dev->nr_labels * sizeof(__u32)); if (pkt_dev->nr_labels) mpls_push(mpls, pkt_dev); if (pkt_dev->vlan_id != 0xffff) { if (pkt_dev->svlan_id != 0xffff) { - svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + svlan_tci = skb_put(skb, sizeof(__be16)); *svlan_tci = build_tci(pkt_dev->svlan_id, pkt_dev->svlan_cfi, pkt_dev->svlan_p); - svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + svlan_encapsulated_proto = skb_put(skb, + sizeof(__be16)); *svlan_encapsulated_proto = htons(ETH_P_8021Q); } - vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + vlan_tci = skb_put(skb, sizeof(__be16)); *vlan_tci = build_tci(pkt_dev->vlan_id, pkt_dev->vlan_cfi, pkt_dev->vlan_p); - vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + vlan_encapsulated_proto = skb_put(skb, sizeof(__be16)); *vlan_encapsulated_proto = htons(ETH_P_IP); } - skb_set_mac_header(skb, 0); + skb_reset_mac_header(skb); skb_set_network_header(skb, skb->len); - iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); + iph = skb_put(skb, sizeof(struct iphdr)); skb_set_transport_header(skb, skb->len); - udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); + udph = skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; memcpy(eth, pkt_dev->hh, 12); - *(__be16 *) & eth[12] = protocol; + *(__be16 *)ð[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - @@ -2733,7 +3005,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, udph->source = htons(pkt_dev->cur_udp_src); udph->dest = htons(pkt_dev->cur_udp_dst); udph->len = htons(datalen + 8); /* DATA + udphdr */ - udph->check = 0; /* No checksum */ + udph->check = 0; iph->ihl = 5; iph->version = 4; @@ -2747,13 +3019,30 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->frag_off = 0; iplen = 20 + 8 + datalen; iph->tot_len = htons(iplen); - iph->check = 0; - iph->check = ip_fast_csum((void *)iph, iph->ihl); + ip_send_check(iph); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; + pktgen_finalize_skb(pkt_dev, skb, datalen); + if (!(pkt_dev->flags & F_UDPCSUM)) { + skb->ip_summed = CHECKSUM_NONE; + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum = 0; + udp4_hwcsum(skb, iph->saddr, iph->daddr); + } else { + __wsum csum = skb_checksum(skb, skb_transport_offset(skb), datalen + 8, 0); + + /* add protocol-dependent pseudo-header */ + udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen + 8, IPPROTO_UDP, csum); + + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; + } + #ifdef CONFIG_XFRM if (!process_ipsec(pkt_dev, skb, protocol)) return NULL; @@ -2768,7 +3057,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, struct sk_buff *skb = NULL; __u8 *eth; struct udphdr *udph; - int datalen; + int datalen, udplen; struct ipv6hdr *iph; __be16 protocol = htons(ETH_P_IPV6); __be32 *mpls; @@ -2790,7 +3079,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = pktgen_alloc_skb(odev, pkt_dev, 16); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2800,34 +3089,35 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ - eth = (__u8 *) skb_push(skb, 14); - mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32)); + eth = skb_push(skb, 14); + mpls = skb_put(skb, pkt_dev->nr_labels * sizeof(__u32)); if (pkt_dev->nr_labels) mpls_push(mpls, pkt_dev); if (pkt_dev->vlan_id != 0xffff) { if (pkt_dev->svlan_id != 0xffff) { - svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + svlan_tci = skb_put(skb, sizeof(__be16)); *svlan_tci = build_tci(pkt_dev->svlan_id, pkt_dev->svlan_cfi, pkt_dev->svlan_p); - svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + svlan_encapsulated_proto = skb_put(skb, + sizeof(__be16)); *svlan_encapsulated_proto = htons(ETH_P_8021Q); } - vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + vlan_tci = skb_put(skb, sizeof(__be16)); *vlan_tci = build_tci(pkt_dev->vlan_id, pkt_dev->vlan_cfi, pkt_dev->vlan_p); - vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + vlan_encapsulated_proto = skb_put(skb, sizeof(__be16)); *vlan_encapsulated_proto = htons(ETH_P_IPV6); } - skb_set_mac_header(skb, 0); + skb_reset_mac_header(skb); skb_set_network_header(skb, skb->len); - iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + iph = skb_put(skb, sizeof(struct ipv6hdr)); skb_set_transport_header(skb, skb->len); - udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); + udph = skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; @@ -2844,10 +3134,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, net_info_ratelimited("increased datalen to %d\n", datalen); } + udplen = datalen + sizeof(struct udphdr); udph->source = htons(pkt_dev->cur_udp_src); udph->dest = htons(pkt_dev->cur_udp_dst); - udph->len = htons(datalen + sizeof(struct udphdr)); - udph->check = 0; /* No checksum */ + udph->len = htons(udplen); + udph->check = 0; *(__be32 *) iph = htonl(0x60000000); /* Version + flow */ @@ -2858,7 +3149,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, iph->hop_limit = 32; - iph->payload_len = htons(sizeof(struct udphdr) + datalen); + iph->payload_len = htons(udplen); iph->nexthdr = IPPROTO_UDP; iph->daddr = pkt_dev->cur_in6_daddr; @@ -2870,6 +3161,23 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, pktgen_finalize_skb(pkt_dev, skb, datalen); + if (!(pkt_dev->flags & F_UDPCSUM)) { + skb->ip_summed = CHECKSUM_NONE; + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + udph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, 0); + } else { + __wsum csum = skb_checksum(skb, skb_transport_offset(skb), udplen, 0); + + /* add protocol-dependent pseudo-header */ + udph->check = csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, csum); + + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; + } + return skb; } @@ -2900,8 +3208,8 @@ static void pktgen_run(struct pktgen_thread *t) func_enter(); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { /* * setup odev and create initial packet. @@ -2910,61 +3218,70 @@ static void pktgen_run(struct pktgen_thread *t) if (pkt_dev->odev) { pktgen_clear_counters(pkt_dev); - pkt_dev->running = 1; /* Cranke yeself! */ pkt_dev->skb = NULL; pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); set_pkt_overhead(pkt_dev); - strcpy(pkt_dev->result, "Starting"); + strscpy(pkt_dev->result, "Starting"); + pkt_dev->running = 1; /* Cranke yeself! */ started++; } else - strcpy(pkt_dev->result, "Error starting"); + strscpy(pkt_dev->result, "Error starting"); } - if_unlock(t); + rcu_read_unlock(); if (started) t->control &= ~(T_STOP); } -static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn) +static void pktgen_handle_all_threads(struct pktgen_net *pn, u32 flags) { struct pktgen_thread *t; - func_enter(); - mutex_lock(&pktgen_thread_lock); list_for_each_entry(t, &pn->pktgen_threads, th_list) - t->control |= T_STOP; + t->control |= (flags); mutex_unlock(&pktgen_thread_lock); } +static void pktgen_stop_all_threads(struct pktgen_net *pn) +{ + func_enter(); + + pktgen_handle_all_threads(pn, T_STOP); +} + static int thread_is_running(const struct pktgen_thread *t) { const struct pktgen_dev *pkt_dev; - list_for_each_entry(pkt_dev, &t->if_list, list) - if (pkt_dev->running) + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) + if (pkt_dev->running) { + rcu_read_unlock(); return 1; + } + rcu_read_unlock(); return 0; } static int pktgen_wait_thread_run(struct pktgen_thread *t) { - if_lock(t); - while (thread_is_running(t)) { - if_unlock(t); - + /* note: 't' will still be around even after the unlock/lock + * cycle because pktgen_thread threads are only cleared at + * net exit + */ + mutex_unlock(&pktgen_thread_lock); msleep_interruptible(100); + mutex_lock(&pktgen_thread_lock); if (signal_pending(current)) goto signal; - if_lock(t); } - if_unlock(t); return 1; signal: return 0; @@ -2975,6 +3292,10 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn) struct pktgen_thread *t; int sig = 1; + /* prevent from racing with rmmod */ + if (!try_module_get(THIS_MODULE)) + return sig; + mutex_lock(&pktgen_thread_lock); list_for_each_entry(t, &pn->pktgen_threads, th_list) { @@ -2988,21 +3309,15 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn) t->control |= (T_STOP); mutex_unlock(&pktgen_thread_lock); + module_put(THIS_MODULE); return sig; } static void pktgen_run_all_threads(struct pktgen_net *pn) { - struct pktgen_thread *t; - func_enter(); - mutex_lock(&pktgen_thread_lock); - - list_for_each_entry(t, &pn->pktgen_threads, th_list) - t->control |= (T_RUN); - - mutex_unlock(&pktgen_thread_lock); + pktgen_handle_all_threads(pn, T_RUN); /* Propagate thread->control */ schedule_timeout_interruptible(msecs_to_jiffies(125)); @@ -3012,16 +3327,9 @@ static void pktgen_run_all_threads(struct pktgen_net *pn) static void pktgen_reset_all_threads(struct pktgen_net *pn) { - struct pktgen_thread *t; - func_enter(); - mutex_lock(&pktgen_thread_lock); - - list_for_each_entry(t, &pn->pktgen_threads, th_list) - t->control |= (T_REMDEVALL); - - mutex_unlock(&pktgen_thread_lock); + pktgen_handle_all_threads(pn, T_REMDEVALL); /* Propagate thread->control */ schedule_timeout_interruptible(msecs_to_jiffies(125)); @@ -3047,7 +3355,19 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) pps = div64_u64(pkt_dev->sofar * NSEC_PER_SEC, ktime_to_ns(elapsed)); - bps = pps * 8 * pkt_dev->cur_pkt_size; + if (pkt_dev->n_imix_entries > 0) { + int i; + struct imix_pkt *entry; + + bps = 0; + for (i = 0; i < pkt_dev->n_imix_entries; i++) { + entry = &pkt_dev->imix_entries[i]; + bps += entry->size * entry->count_so_far; + } + bps = div64_u64(bps * 8 * NSEC_PER_SEC, ktime_to_ns(elapsed)); + } else { + bps = pps * 8 * pkt_dev->cur_pkt_size; + } mbps = bps; do_div(mbps, 1000000); @@ -3064,15 +3384,15 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; if (!pkt_dev->running) { - pr_warning("interface: %s is already stopped\n", - pkt_dev->odevname); + pr_warn("interface: %s is already stopped\n", + pkt_dev->odevname); return -EINVAL; } + pkt_dev->running = 0; kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; pkt_dev->stopped_at = ktime_get(); - pkt_dev->running = 0; show_results(pkt_dev, nr_frags); @@ -3083,9 +3403,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) { struct pktgen_dev *pkt_dev, *best = NULL; - if_lock(t); - - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { if (!pkt_dev->running) continue; if (best == NULL) @@ -3093,7 +3412,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) best = pkt_dev; } - if_unlock(t); + rcu_read_unlock(); + return best; } @@ -3103,13 +3423,13 @@ static void pktgen_stop(struct pktgen_thread *t) func_enter(); - if_lock(t); + rcu_read_lock(); - list_for_each_entry(pkt_dev, &t->if_list, list) { + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { pktgen_stop_device(pkt_dev); } - if_unlock(t); + rcu_read_unlock(); } /* @@ -3123,8 +3443,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) func_enter(); - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3138,8 +3456,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) break; } - - if_unlock(t); } static void pktgen_rem_all_ifs(struct pktgen_thread *t) @@ -3151,8 +3467,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) /* Remove all devices, free mem */ - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3161,8 +3475,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) pktgen_remove_device(t, cur); } - - if_unlock(t); } static void pktgen_rem_thread(struct pktgen_thread *t) @@ -3174,6 +3486,7 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static void pktgen_resched(struct pktgen_dev *pkt_dev) { ktime_t idle_start = ktime_get(); + schedule(); pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } @@ -3182,7 +3495,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) { ktime_t idle_start = ktime_get(); - while (atomic_read(&(pkt_dev->skb->users)) != 1) { + while (refcount_read(&(pkt_dev->skb->users)) != 1) { if (signal_pending(current)) break; @@ -3196,13 +3509,24 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { + bool skb_shared = !!(READ_ONCE(pkt_dev->flags) & F_SHARED); struct net_device *odev = pkt_dev->odev; - netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *) - = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; - u16 queue_map; + unsigned int burst = 1; + struct sk_buff *skb; + int clone_skb = 0; int ret; + /* If 'skb_shared' is false, the read of possible + * new values (if any) for 'burst' and 'clone_skb' will be skipped to + * prevent some concurrent changes from slipping in. And the stabilized + * config will be read in during the next run of pktgen_xmit. + */ + if (skb_shared) { + burst = READ_ONCE(pkt_dev->burst); + clone_skb = READ_ONCE(pkt_dev->clone_skb); + } + /* If device is offline, then don't send */ if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) { pktgen_stop_device(pkt_dev); @@ -3219,7 +3543,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) /* If no skb or clone count exhausted then get new one */ if (!pkt_dev->skb || (pkt_dev->last_ok && - ++pkt_dev->clone_count >= pkt_dev->clone_skb)) { + ++pkt_dev->clone_count >= clone_skb)) { /* build a new pkt */ kfree_skb(pkt_dev->skb); @@ -3231,37 +3555,111 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) return; } pkt_dev->last_pkt_size = pkt_dev->skb->len; - pkt_dev->allocated_skbs++; pkt_dev->clone_count = 0; /* reset counter */ } if (pkt_dev->delay && pkt_dev->last_ok) spin(pkt_dev, pkt_dev->next_tx); - queue_map = skb_get_queue_mapping(pkt_dev->skb); - txq = netdev_get_tx_queue(odev, queue_map); + if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) { + skb = pkt_dev->skb; + skb->protocol = eth_type_trans(skb, skb->dev); + if (skb_shared) + refcount_add(burst, &skb->users); + local_bh_disable(); + do { + ret = netif_receive_skb(skb); + if (ret == NET_RX_DROP) + pkt_dev->errors++; + pkt_dev->sofar++; + pkt_dev->seq_num++; + if (unlikely(!skb_shared)) { + pkt_dev->skb = NULL; + break; + } + if (refcount_read(&skb->users) != burst) { + /* skb was queued by rps/rfs or taps, + * so cannot reuse this skb + */ + WARN_ON(refcount_sub_and_test(burst - 1, &skb->users)); + /* get out of the loop and wait + * until skb is consumed + */ + break; + } + /* skb was 'freed' by stack, so clean few + * bits and reuse it + */ + skb_reset_redirect(skb); + } while (--burst > 0); + goto out; /* Skips xmit_mode M_START_XMIT */ + } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { + local_bh_disable(); + if (skb_shared) + refcount_inc(&pkt_dev->skb->users); + + ret = dev_queue_xmit(pkt_dev->skb); + + if (!skb_shared && dev_xmit_complete(ret)) + pkt_dev->skb = NULL; + + switch (ret) { + case NET_XMIT_SUCCESS: + pkt_dev->sofar++; + pkt_dev->seq_num++; + pkt_dev->tx_bytes += pkt_dev->last_pkt_size; + break; + case NET_XMIT_DROP: + case NET_XMIT_CN: + /* These are all valid return codes for a qdisc but + * indicate packets are being dropped or will likely + * be dropped soon. + */ + case NETDEV_TX_BUSY: + /* qdisc may call dev_hard_start_xmit directly in cases + * where no queues exist e.g. loopback device, virtual + * devices, etc. In this case we need to handle + * NETDEV_TX_ codes. + */ + default: + pkt_dev->errors++; + net_info_ratelimited("%s xmit error: %d\n", + pkt_dev->odevname, ret); + break; + } + goto out; + } + + txq = skb_get_tx_queue(odev, pkt_dev->skb); + + local_bh_disable(); - __netif_tx_lock_bh(txq); + HARD_TX_LOCK(odev, txq, smp_processor_id()); - if (unlikely(netif_xmit_frozen_or_stopped(txq))) { - ret = NETDEV_TX_BUSY; + if (unlikely(netif_xmit_frozen_or_drv_stopped(txq))) { pkt_dev->last_ok = 0; goto unlock; } - atomic_inc(&(pkt_dev->skb->users)); - ret = (*xmit)(pkt_dev->skb, odev); + if (skb_shared) + refcount_add(burst, &pkt_dev->skb->users); + +xmit_more: + ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0); + + if (!skb_shared && dev_xmit_complete(ret)) + pkt_dev->skb = NULL; switch (ret) { case NETDEV_TX_OK: - txq_trans_update(txq); pkt_dev->last_ok = 1; pkt_dev->sofar++; pkt_dev->seq_num++; pkt_dev->tx_bytes += pkt_dev->last_pkt_size; + if (burst > 0 && !netif_xmit_frozen_or_drv_stopped(txq)) + goto xmit_more; break; case NET_XMIT_DROP: case NET_XMIT_CN: - case NET_XMIT_POLICED: /* skb has been consumed */ pkt_dev->errors++; break; @@ -3269,19 +3667,25 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) net_info_ratelimited("%s xmit error: %d\n", pkt_dev->odevname, ret); pkt_dev->errors++; - /* fallthru */ - case NETDEV_TX_LOCKED: + fallthrough; case NETDEV_TX_BUSY: /* Retry it next time */ - atomic_dec(&(pkt_dev->skb->users)); + if (skb_shared) + refcount_dec(&pkt_dev->skb->users); pkt_dev->last_ok = 0; } + if (unlikely(burst)) + WARN_ON(refcount_sub_and_test(burst, &pkt_dev->skb->users)); unlock: - __netif_tx_unlock_bh(txq); + HARD_TX_UNLOCK(odev, txq); + +out: + local_bh_enable(); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { - pktgen_wait_for_skb(pkt_dev); + if (pkt_dev->skb) + pktgen_wait_for_skb(pkt_dev); /* Done with this */ pktgen_stop_device(pkt_dev); @@ -3294,20 +3698,17 @@ unlock: static int pktgen_thread_worker(void *arg) { - DEFINE_WAIT(wait); struct pktgen_thread *t = arg; struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - BUG_ON(smp_processor_id() != cpu); + WARN_ON_ONCE(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); complete(&t->start_done); pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); - set_current_state(TASK_INTERRUPTIBLE); - set_freezable(); while (!kthread_should_stop()) { @@ -3316,15 +3717,11 @@ static int pktgen_thread_worker(void *arg) if (unlikely(!pkt_dev && t->control == 0)) { if (t->net->pktgen_exiting) break; - wait_event_interruptible_timeout(t->queue, - t->control != 0, - HZ/10); - try_to_freeze(); + wait_event_freezable_timeout(t->queue, + t->control != 0, HZ / 10); continue; } - __set_current_state(TASK_RUNNING); - if (likely(pkt_dev)) { pktgen_xmit(pkt_dev); @@ -3355,8 +3752,6 @@ static int pktgen_thread_worker(void *arg) } try_to_freeze(); - - set_current_state(TASK_INTERRUPTIBLE); } pr_debug("%s stopping all device\n", t->tsk->comm); @@ -3368,13 +3763,6 @@ static int pktgen_thread_worker(void *arg) pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); - /* Wait for kthread_stop */ - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - } - __set_current_state(TASK_RUNNING); - return 0; } @@ -3384,8 +3772,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, struct pktgen_dev *p, *pkt_dev = NULL; size_t len = strlen(ifname); - if_lock(t); - list_for_each_entry(p, &t->if_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(p, &t->if_list, list) if (strncmp(p->odevname, ifname, len) == 0) { if (p->odevname[len]) { if (exact || p->odevname[len] != '@') @@ -3395,7 +3783,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, break; } - if_unlock(t); + rcu_read_unlock(); pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev); return pkt_dev; } @@ -3409,6 +3797,13 @@ static int add_dev_to_thread(struct pktgen_thread *t, { int rv = 0; + /* This function cannot be called concurrently, as its called + * under pktgen_thread_lock mutex, but it can run from + * userspace on another CPU than the kthread. The if_lock() + * is used here to sync with concurrent instances of + * _rem_dev_from_if_list() invoked via kthread, which is also + * updating the if_list + */ if_lock(t); if (pkt_dev->pg_thread) { @@ -3417,9 +3812,9 @@ static int add_dev_to_thread(struct pktgen_thread *t, goto out; } - list_add(&pkt_dev->list, &t->if_list); - pkt_dev->pg_thread = t; pkt_dev->running = 0; + pkt_dev->pg_thread = t; + list_add_rcu(&pkt_dev->list, &t->if_list); out: if_unlock(t); @@ -3446,8 +3841,9 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) if (!pkt_dev) return -ENOMEM; - strcpy(pkt_dev->odevname, ifname); - pkt_dev->flows = vzalloc_node(MAX_CFLOWS * sizeof(struct flow_state), + strscpy(pkt_dev->odevname, ifname); + pkt_dev->flows = vzalloc_node(array_size(MAX_CFLOWS, + sizeof(struct flow_state)), node); if (pkt_dev->flows == NULL) { kfree(pkt_dev); @@ -3469,7 +3865,9 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->svlan_p = 0; pkt_dev->svlan_cfi = 0; pkt_dev->svlan_id = 0xffff; - pkt_dev->node = -1; + pkt_dev->burst = 1; + pkt_dev->node = NUMA_NO_NODE; + pkt_dev->flags = F_SHARED; /* SKB shared by default */ err = pktgen_setup_dev(t->net, pkt_dev, ifname); if (err) @@ -3478,7 +3876,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->clone_skb = pg_clone_skb_d; pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir, - &pktgen_if_fops, pkt_dev); + &pktgen_if_proc_ops, pkt_dev); if (!pkt_dev->entry) { pr_err("cannot create %s/%s procfs entry\n", PG_PROC_DIR, ifname); @@ -3488,11 +3886,22 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) #ifdef CONFIG_XFRM pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; pkt_dev->ipsproto = IPPROTO_ESP; + + /* xfrm tunnel mode needs additional dst to extract outer + * ip header protocol/ttl/id field, here create a phony one. + * instead of looking for a valid rt, which definitely hurting + * performance under such circumstance. + */ + pkt_dev->dstops.family = AF_INET; + pkt_dev->xdst.u.dst.dev = pkt_dev->odev; + dst_init_metrics(&pkt_dev->xdst.u.dst, pktgen_dst_metrics, false); + pkt_dev->xdst.child = &pkt_dev->xdst.u.dst; + pkt_dev->xdst.u.dst.ops = &pkt_dev->dstops; #endif return add_dev_to_thread(t, pkt_dev); out2: - dev_put(pkt_dev->odev); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); out1: #ifdef CONFIG_XFRM free_SAs(pkt_dev); @@ -3515,7 +3924,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) return -ENOMEM; } - spin_lock_init(&t->if_lock); + mutex_init(&t->if_lock); t->cpu = cpu; INIT_LIST_HEAD(&t->if_list); @@ -3523,21 +3932,18 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) list_add_tail(&t->th_list, &pn->pktgen_threads); init_completion(&t->start_done); - p = kthread_create_on_node(pktgen_thread_worker, - t, - cpu_to_node(cpu), - "kpktgend_%d", cpu); + p = kthread_create_on_cpu(pktgen_thread_worker, t, cpu, "kpktgend_%d"); if (IS_ERR(p)) { - pr_err("kernel_thread() failed for cpu %d\n", t->cpu); + pr_err("kthread_create_on_node() failed for cpu %d\n", t->cpu); list_del(&t->th_list); kfree(t); return PTR_ERR(p); } - kthread_bind(p, cpu); + t->tsk = p; pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir, - &pktgen_thread_fops, t); + &pktgen_thread_proc_ops, t); if (!pe) { pr_err("cannot create %s/%s procfs entry\n", PG_PROC_DIR, t->tsk->comm); @@ -3548,6 +3954,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) } t->net = pn; + get_task_struct(p); wake_up_process(p); wait_for_completion(&t->start_done); @@ -3563,11 +3970,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t, struct list_head *q, *n; struct pktgen_dev *p; + if_lock(t); list_for_each_safe(q, n, &t->if_list) { p = list_entry(q, struct pktgen_dev, list); if (p == pkt_dev) - list_del(&p->list); + list_del_rcu(&p->list); } + if_unlock(t); } static int pktgen_remove_device(struct pktgen_thread *t, @@ -3576,31 +3985,33 @@ static int pktgen_remove_device(struct pktgen_thread *t, pr_debug("remove_device pkt_dev=%p\n", pkt_dev); if (pkt_dev->running) { - pr_warning("WARNING: trying to remove a running interface, stopping it now\n"); + pr_warn("WARNING: trying to remove a running interface, stopping it now\n"); pktgen_stop_device(pkt_dev); } /* Dis-associate from the interface */ if (pkt_dev->odev) { - dev_put(pkt_dev->odev); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } - /* And update the thread if_list */ + /* Remove proc before if_list entry, because add_device uses + * list to determine if interface already exist, avoid race + * with proc_create_data() + */ + proc_remove(pkt_dev->entry); + /* And update the thread if_list */ _rem_dev_from_if_list(t, pkt_dev); - if (pkt_dev->entry) - proc_remove(pkt_dev->entry); - #ifdef CONFIG_XFRM free_SAs(pkt_dev); #endif vfree(pkt_dev->flows); if (pkt_dev->page) put_page(pkt_dev->page); - kfree(pkt_dev); + kfree_rcu(pkt_dev, rcu); return 0; } @@ -3618,13 +4029,14 @@ static int __net_init pg_net_init(struct net *net) pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR); return -ENODEV; } - pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops); + pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_proc_ops); if (pe == NULL) { pr_err("cannot create %s procfs entry\n", PGCTRL); ret = -EINVAL; goto remove; } + cpus_read_lock(); for_each_online_cpu(cpu) { int err; @@ -3633,6 +4045,7 @@ static int __net_init pg_net_init(struct net *net) pr_warn("Cannot create thread for cpu %d (%d)\n", cpu, err); } + cpus_read_unlock(); if (list_empty(&pn->pktgen_threads)) { pr_err("Initialization failed for all threads\n"); @@ -3666,7 +4079,7 @@ static void __net_exit pg_net_exit(struct net *net) list_for_each_safe(q, n, &list) { t = list_entry(q, struct pktgen_thread, th_list); list_del(&t->th_list); - kthread_stop(t->tsk); + kthread_stop_put(t->tsk); kfree(t); } @@ -3700,6 +4113,7 @@ static void __exit pg_cleanup(void) { unregister_netdevice_notifier(&pktgen_notifier_block); unregister_pernet_subsys(&pg_net_ops); + /* Don't need rcu_barrier() due to use of kfree_rcu() */ } module_init(pg_init); |
