diff options
Diffstat (limited to 'net')
48 files changed, 836 insertions, 473 deletions
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index c0762a302162..8f528e783a6c 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1023,7 +1023,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; - if (addr == NULL) + if (!addr || !strlen(addr)) return -EINVAL; if (strlen(addr) >= UNIX_PATH_MAX) { diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index bc8807d9281f..f4fea28e05da 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -451,13 +451,13 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, char str[16]; BUILD_BUG_ON(XEN_9PFS_NUM_RINGS > 9); - sprintf(str, "ring-ref%u", i); + sprintf(str, "ring-ref%d", i); ret = xenbus_printf(xbt, dev->nodename, str, "%d", priv->rings[i].ref); if (ret) goto error_xenbus; - sprintf(str, "event-channel-%u", i); + sprintf(str, "event-channel-%d", i); ret = xenbus_printf(xbt, dev->nodename, str, "%u", priv->rings[i].evtchn); if (ret) diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index 12a4f4d93681..3fda71a8579d 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -21,7 +21,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - if (skb_ensure_writable(skb, ETH_ALEN)) + if (skb_ensure_writable(skb, 0)) return EBT_DROP; ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 0cad62a4052b..307790562b49 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -21,7 +21,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_redirect_info *info = par->targinfo; - if (skb_ensure_writable(skb, ETH_ALEN)) + if (skb_ensure_writable(skb, 0)) return EBT_DROP; if (xt_hooknum(par) != NF_BR_BROUTING) diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 27443bf229a3..7dfbcdfc30e5 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -22,7 +22,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - if (skb_ensure_writable(skb, ETH_ALEN * 2)) + if (skb_ensure_writable(skb, 0)) return EBT_DROP; ether_addr_copy(eth_hdr(skb)->h_source, info->mac); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d4d7a0e52491..af0f1fa24937 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2016,11 +2016,11 @@ static int process_banner(struct ceph_connection *con) sizeof(con->peer_addr)) != 0 && !(addr_is_blank(&con->actual_peer_addr) && con->actual_peer_addr.nonce == con->peer_addr.nonce)) { - pr_warn("wrong peer, want %s/%d, got %s/%d\n", + pr_warn("wrong peer, want %s/%u, got %s/%u\n", ceph_pr_addr(&con->peer_addr), - (int)le32_to_cpu(con->peer_addr.nonce), + le32_to_cpu(con->peer_addr.nonce), ceph_pr_addr(&con->actual_peer_addr), - (int)le32_to_cpu(con->actual_peer_addr.nonce)); + le32_to_cpu(con->actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; } @@ -2811,13 +2811,13 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay) return -ENOENT; } + dout("%s %p %lu\n", __func__, con, delay); if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { dout("%s %p - already queued\n", __func__, con); con->ops->put(con); return -EBUSY; } - dout("%s %p %lu\n", __func__, con, delay); return 0; } @@ -2998,6 +2998,11 @@ static void con_fault(struct ceph_connection *con) ceph_msg_put(con->in_msg); con->in_msg = NULL; } + if (con->out_msg) { + BUG_ON(con->out_msg->con != con); + ceph_msg_put(con->out_msg); + con->out_msg = NULL; + } /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index d633a0aeaa55..c4cf2529d08b 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -896,8 +896,9 @@ bad: ceph_msg_dump(msg); } -int ceph_monc_blacklist_add(struct ceph_mon_client *monc, - struct ceph_entity_addr *client_addr) +static __printf(2, 0) +int do_mon_command_vargs(struct ceph_mon_client *monc, const char *fmt, + va_list ap) { struct ceph_mon_generic_request *req; struct ceph_mon_command *h; @@ -925,29 +926,65 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc, h->monhdr.session_mon_tid = 0; h->fsid = monc->monmap->fsid; h->num_strs = cpu_to_le32(1); - len = sprintf(h->str, "{ \"prefix\": \"osd blacklist\", \ - \"blacklistop\": \"add\", \ - \"addr\": \"%pISpc/%u\" }", - &client_addr->in_addr, le32_to_cpu(client_addr->nonce)); + len = vsprintf(h->str, fmt, ap); h->str_len = cpu_to_le32(len); send_generic_request(monc, req); mutex_unlock(&monc->mutex); ret = wait_generic_request(req); - if (!ret) - /* - * Make sure we have the osdmap that includes the blacklist - * entry. This is needed to ensure that the OSDs pick up the - * new blacklist before processing any future requests from - * this client. - */ - ret = ceph_wait_for_latest_osdmap(monc->client, 0); - out: put_generic_request(req); return ret; } -EXPORT_SYMBOL(ceph_monc_blacklist_add); + +static __printf(2, 3) +int do_mon_command(struct ceph_mon_client *monc, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = do_mon_command_vargs(monc, fmt, ap); + va_end(ap); + return ret; +} + +int ceph_monc_blocklist_add(struct ceph_mon_client *monc, + struct ceph_entity_addr *client_addr) +{ + int ret; + + ret = do_mon_command(monc, + "{ \"prefix\": \"osd blocklist\", \ + \"blocklistop\": \"add\", \ + \"addr\": \"%pISpc/%u\" }", + &client_addr->in_addr, + le32_to_cpu(client_addr->nonce)); + if (ret == -EINVAL) { + /* + * The monitor returns EINVAL on an unrecognized command. + * Try the legacy command -- it is exactly the same except + * for the name. + */ + ret = do_mon_command(monc, + "{ \"prefix\": \"osd blacklist\", \ + \"blacklistop\": \"add\", \ + \"addr\": \"%pISpc/%u\" }", + &client_addr->in_addr, + le32_to_cpu(client_addr->nonce)); + } + if (ret) + return ret; + + /* + * Make sure we have the osdmap that includes the blocklist + * entry. This is needed to ensure that the OSDs pick up the + * new blocklist before processing any future requests from + * this client. + */ + return ceph_wait_for_latest_osdmap(monc->client, 0); +} +EXPORT_SYMBOL(ceph_monc_blocklist_add); /* * Resend pending generic requests. diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 96c25f5e064a..fa08c15be0c0 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -965,6 +965,143 @@ bad: } /* + * CRUSH workspaces + * + * workspace_manager framework borrowed from fs/btrfs/compression.c. + * Two simplifications: there is only one type of workspace and there + * is always at least one workspace. + */ +static struct crush_work *alloc_workspace(const struct crush_map *c) +{ + struct crush_work *work; + size_t work_size; + + WARN_ON(!c->working_size); + work_size = crush_work_size(c, CEPH_PG_MAX_SIZE); + dout("%s work_size %zu bytes\n", __func__, work_size); + + work = ceph_kvmalloc(work_size, GFP_NOIO); + if (!work) + return NULL; + + INIT_LIST_HEAD(&work->item); + crush_init_workspace(c, work); + return work; +} + +static void free_workspace(struct crush_work *work) +{ + WARN_ON(!list_empty(&work->item)); + kvfree(work); +} + +static void init_workspace_manager(struct workspace_manager *wsm) +{ + INIT_LIST_HEAD(&wsm->idle_ws); + spin_lock_init(&wsm->ws_lock); + atomic_set(&wsm->total_ws, 0); + wsm->free_ws = 0; + init_waitqueue_head(&wsm->ws_wait); +} + +static void add_initial_workspace(struct workspace_manager *wsm, + struct crush_work *work) +{ + WARN_ON(!list_empty(&wsm->idle_ws)); + + list_add(&work->item, &wsm->idle_ws); + atomic_set(&wsm->total_ws, 1); + wsm->free_ws = 1; +} + +static void cleanup_workspace_manager(struct workspace_manager *wsm) +{ + struct crush_work *work; + + while (!list_empty(&wsm->idle_ws)) { + work = list_first_entry(&wsm->idle_ws, struct crush_work, + item); + list_del_init(&work->item); + free_workspace(work); + } + atomic_set(&wsm->total_ws, 0); + wsm->free_ws = 0; +} + +/* + * Finds an available workspace or allocates a new one. If it's not + * possible to allocate a new one, waits until there is one. + */ +static struct crush_work *get_workspace(struct workspace_manager *wsm, + const struct crush_map *c) +{ + struct crush_work *work; + int cpus = num_online_cpus(); + +again: + spin_lock(&wsm->ws_lock); + if (!list_empty(&wsm->idle_ws)) { + work = list_first_entry(&wsm->idle_ws, struct crush_work, + item); + list_del_init(&work->item); + wsm->free_ws--; + spin_unlock(&wsm->ws_lock); + return work; + + } + if (atomic_read(&wsm->total_ws) > cpus) { + DEFINE_WAIT(wait); + + spin_unlock(&wsm->ws_lock); + prepare_to_wait(&wsm->ws_wait, &wait, TASK_UNINTERRUPTIBLE); + if (atomic_read(&wsm->total_ws) > cpus && !wsm->free_ws) + schedule(); + finish_wait(&wsm->ws_wait, &wait); + goto again; + } + atomic_inc(&wsm->total_ws); + spin_unlock(&wsm->ws_lock); + + work = alloc_workspace(c); + if (!work) { + atomic_dec(&wsm->total_ws); + wake_up(&wsm->ws_wait); + + /* + * Do not return the error but go back to waiting. We + * have the inital workspace and the CRUSH computation + * time is bounded so we will get it eventually. + */ + WARN_ON(atomic_read(&wsm->total_ws) < 1); + goto again; + } + return work; +} + +/* + * Puts a workspace back on the list or frees it if we have enough + * idle ones sitting around. + */ +static void put_workspace(struct workspace_manager *wsm, + struct crush_work *work) +{ + spin_lock(&wsm->ws_lock); + if (wsm->free_ws <= num_online_cpus()) { + list_add(&work->item, &wsm->idle_ws); + wsm->free_ws++; + spin_unlock(&wsm->ws_lock); + goto wake; + } + spin_unlock(&wsm->ws_lock); + + free_workspace(work); + atomic_dec(&wsm->total_ws); +wake: + if (wq_has_sleeper(&wsm->ws_wait)) + wake_up(&wsm->ws_wait); +} + +/* * osd map */ struct ceph_osdmap *ceph_osdmap_alloc(void) @@ -981,7 +1118,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void) map->primary_temp = RB_ROOT; map->pg_upmap = RB_ROOT; map->pg_upmap_items = RB_ROOT; - mutex_init(&map->crush_workspace_mutex); + + init_workspace_manager(&map->crush_wsm); return map; } @@ -989,8 +1127,11 @@ struct ceph_osdmap *ceph_osdmap_alloc(void) void ceph_osdmap_destroy(struct ceph_osdmap *map) { dout("osdmap_destroy %p\n", map); + if (map->crush) crush_destroy(map->crush); + cleanup_workspace_manager(&map->crush_wsm); + while (!RB_EMPTY_ROOT(&map->pg_temp)) { struct ceph_pg_mapping *pg = rb_entry(rb_first(&map->pg_temp), @@ -1029,7 +1170,6 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) kvfree(map->osd_weight); kvfree(map->osd_addr); kvfree(map->osd_primary_affinity); - kvfree(map->crush_workspace); kfree(map); } @@ -1104,26 +1244,22 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max) static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush) { - void *workspace; - size_t work_size; + struct crush_work *work; if (IS_ERR(crush)) return PTR_ERR(crush); - work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE); - dout("%s work_size %zu bytes\n", __func__, work_size); - workspace = ceph_kvmalloc(work_size, GFP_NOIO); - if (!workspace) { + work = alloc_workspace(crush); + if (!work) { crush_destroy(crush); return -ENOMEM; } - crush_init_workspace(crush, workspace); if (map->crush) crush_destroy(map->crush); - kvfree(map->crush_workspace); + cleanup_workspace_manager(&map->crush_wsm); map->crush = crush; - map->crush_workspace = workspace; + add_initial_workspace(&map->crush_wsm, work); return 0; } @@ -2322,6 +2458,7 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, s64 choose_args_index) { struct crush_choose_arg_map *arg_map; + struct crush_work *work; int r; BUG_ON(result_max > CEPH_PG_MAX_SIZE); @@ -2332,12 +2469,11 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, arg_map = lookup_choose_arg_map(&map->crush->choose_args, CEPH_DEFAULT_CHOOSE_ARGS); - mutex_lock(&map->crush_workspace_mutex); + work = get_workspace(&map->crush_wsm, map->crush); r = crush_do_rule(map->crush, ruleno, x, result, result_max, - weight, weight_max, map->crush_workspace, + weight, weight_max, work, arg_map ? arg_map->args : NULL); - mutex_unlock(&map->crush_workspace_mutex); - + put_workspace(&map->crush_wsm, work); return r; } diff --git a/net/core/dev.c b/net/core/dev.c index 751e5264fd49..82dc6b48e45f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -145,6 +145,7 @@ #include <linux/indirect_call_wrapper.h> #include <net/devlink.h> #include <linux/pm_runtime.h> +#include <linux/prandom.h> #include "net-sysfs.h" @@ -3558,6 +3559,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); len = skb->len; + PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies); trace_net_dev_start_xmit(skb, dev); rc = netdev_start_xmit(skb, dev, txq, more); trace_net_dev_xmit(skb, rc, dev, len); @@ -4130,6 +4132,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (!skb) goto out; + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { @@ -4195,6 +4198,7 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) skb_set_queue_mapping(skb, queue_id); txq = skb_get_tx_queue(dev, skb); + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); local_bh_disable(); @@ -10213,7 +10217,7 @@ void netdev_run_todo(void) struct net_device *dev = list_first_entry(&unlink_list, struct net_device, unlink_list); - list_del(&dev->unlink_list); + list_del_init(&dev->unlink_list); dev->nested_level = dev->lower_level - 1; } #endif diff --git a/net/core/filter.c b/net/core/filter.c index c5e2a1c5fd8d..2ca5eecebacf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) -static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, + struct net_device *dev, struct bpf_nh_params *nh) { - struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; u32 hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *nexthop; + struct dst_entry *dst = NULL; struct neighbour *neigh; if (dev_xmit_recursion()) { @@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), - &ipv6_hdr(skb)->daddr); + if (!nh) { + dst = skb_dst(skb); + nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), + &ipv6_hdr(skb)->daddr); + } else { + nexthop = &nh->ipv6_nh; + } neigh = ip_neigh_gw6(dev, nexthop); if (likely(!IS_ERR(neigh))) { int ret; @@ -2210,36 +2215,43 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) return ret; } rcu_read_unlock_bh(); - IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + if (dst) + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); out_drop: kfree_skb(skb); return -ENETDOWN; } -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; - struct dst_entry *dst; - struct flowi6 fl6 = { - .flowi6_flags = FLOWI_FLAG_ANYSRC, - .flowi6_mark = skb->mark, - .flowlabel = ip6_flowinfo(ip6h), - .flowi6_oif = dev->ifindex, - .flowi6_proto = ip6h->nexthdr, - .daddr = ip6h->daddr, - .saddr = ip6h->saddr, - }; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); - if (IS_ERR(dst)) - goto out_drop; + if (!nh) { + struct dst_entry *dst; + struct flowi6 fl6 = { + .flowi6_flags = FLOWI_FLAG_ANYSRC, + .flowi6_mark = skb->mark, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_oif = dev->ifindex, + .flowi6_proto = ip6h->nexthdr, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + }; + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); + if (IS_ERR(dst)) + goto out_drop; - skb_dst_set(skb, dst); + skb_dst_set(skb, dst); + } else if (nh->nh_family != AF_INET6) { + goto out_drop; + } - err = bpf_out_neigh_v6(net, skb); + err = bpf_out_neigh_v6(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -2252,7 +2264,8 @@ out_xmit: return ret; } #else -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { kfree_skb(skb); return NET_XMIT_DROP; @@ -2260,11 +2273,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) #endif /* CONFIG_IPV6 */ #if IS_ENABLED(CONFIG_INET) -static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb) +static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, + struct net_device *dev, struct bpf_nh_params *nh) { - struct dst_entry *dst = skb_dst(skb); - struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst->dev; u32 hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; @@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb) } rcu_read_lock_bh(); - neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + if (!nh) { + struct dst_entry *dst = skb_dst(skb); + struct rtable *rt = container_of(dst, struct rtable, dst); + + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + } else if (nh->nh_family == AF_INET6) { + neigh = ip_neigh_gw6(dev, &nh->ipv6_nh); + is_v6gw = true; + } else if (nh->nh_family == AF_INET) { + neigh = ip_neigh_gw4(dev, nh->ipv4_nh); + } else { + rcu_read_unlock_bh(); + goto out_drop; + } + if (likely(!IS_ERR(neigh))) { int ret; @@ -2309,33 +2334,37 @@ out_drop: return -ENETDOWN; } -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { const struct iphdr *ip4h = ip_hdr(skb); struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; - struct rtable *rt; - struct flowi4 fl4 = { - .flowi4_flags = FLOWI_FLAG_ANYSRC, - .flowi4_mark = skb->mark, - .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_oif = dev->ifindex, - .flowi4_proto = ip4h->protocol, - .daddr = ip4h->daddr, - .saddr = ip4h->saddr, - }; - rt = ip_route_output_flow(net, &fl4, NULL); - if (IS_ERR(rt)) - goto out_drop; - if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { - ip_rt_put(rt); - goto out_drop; - } + if (!nh) { + struct flowi4 fl4 = { + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, + .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_oif = dev->ifindex, + .flowi4_proto = ip4h->protocol, + .daddr = ip4h->daddr, + .saddr = ip4h->saddr, + }; + struct rtable *rt; + + rt = ip_route_output_flow(net, &fl4, NULL); + if (IS_ERR(rt)) + goto out_drop; + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { + ip_rt_put(rt); + goto out_drop; + } - skb_dst_set(skb, &rt->dst); + skb_dst_set(skb, &rt->dst); + } - err = bpf_out_neigh_v4(net, skb); + err = bpf_out_neigh_v4(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -2348,14 +2377,16 @@ out_xmit: return ret; } #else -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { kfree_skb(skb); return NET_XMIT_DROP; } #endif /* CONFIG_INET */ -static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { struct ethhdr *ethh = eth_hdr(skb); @@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) skb_reset_network_header(skb); if (skb->protocol == htons(ETH_P_IP)) - return __bpf_redirect_neigh_v4(skb, dev); + return __bpf_redirect_neigh_v4(skb, dev, nh); else if (skb->protocol == htons(ETH_P_IPV6)) - return __bpf_redirect_neigh_v6(skb, dev); + return __bpf_redirect_neigh_v6(skb, dev, nh); out: kfree_skb(skb); return -ENOTSUPP; @@ -2382,7 +2413,8 @@ out: enum { BPF_F_NEIGH = (1ULL << 1), BPF_F_PEER = (1ULL << 2), -#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER) + BPF_F_NEXTHOP = (1ULL << 3), +#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP) }; BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) @@ -2455,7 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb) return -EAGAIN; } return flags & BPF_F_NEIGH ? - __bpf_redirect_neigh(skb, dev) : + __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ? + &ri->nh : NULL) : __bpf_redirect(skb, dev, flags); out_drop: kfree_skb(skb); @@ -2504,16 +2537,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags) +BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params, + int, plen, u64, flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - if (unlikely(flags)) + if (unlikely((plen && plen < sizeof(*params)) || flags)) return TC_ACT_SHOT; - ri->flags = BPF_F_NEIGH; + ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0); ri->tgt_index = ifindex; + BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params)); + if (plen) + memcpy(&ri->nh, params, sizeof(ri->nh)); + return TC_ACT_REDIRECT; } @@ -2522,7 +2560,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, - .arg2_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_MEM_OR_NULL, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, }; BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes) @@ -4693,7 +4733,8 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); - sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val; + sk->sk_max_pacing_rate = (val == ~0U) ? + ~0UL : (unsigned int)val; sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_max_pacing_rate); break; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 68e0682450c6..7d7223691783 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3709,13 +3709,13 @@ static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh, return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack); } -static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) +static u32 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); - struct net_device *dev; + size_t min_ifinfo_dump_size = 0; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; - u16 min_ifinfo_dump_size = 0; + struct net_device *dev; int hdrlen; /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */ @@ -3735,9 +3735,8 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) */ rcu_read_lock(); for_each_netdev_rcu(net, dev) { - min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size, - if_nlmsg_size(dev, - ext_filter_mask)); + min_ifinfo_dump_size = max(min_ifinfo_dump_size, + if_nlmsg_size(dev, ext_filter_mask)); } rcu_read_unlock(); @@ -5494,7 +5493,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; - u16 min_dump_alloc = 0; + u32 min_dump_alloc = 0; link = rtnl_get_link(family, type); if (!link || !link->dumpit) { diff --git a/net/core/sock.c b/net/core/sock.c index 4e8729357122..727ea1cc633c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1163,7 +1163,7 @@ set_sndbuf: case SO_MAX_PACING_RATE: { - unsigned long ulval = (val == ~0U) ? ~0UL : val; + unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; if (sizeof(ulval) != sizeof(val) && optlen >= sizeof(ulval) && diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 945a9bd5ba35..0a5aa982c60d 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -123,6 +123,7 @@ static const struct dsa_device_ops ksz8795_netdev_ops = { .xmit = ksz8795_xmit, .rcv = ksz8795_rcv, .overhead = KSZ_INGRESS_TAG_LEN, + .tail_tag = true, }; DSA_TAG_DRIVER(ksz8795_netdev_ops); @@ -199,6 +200,7 @@ static const struct dsa_device_ops ksz9477_netdev_ops = { .xmit = ksz9477_xmit, .rcv = ksz9477_rcv, .overhead = KSZ9477_INGRESS_TAG_LEN, + .tail_tag = true, }; DSA_TAG_DRIVER(ksz9477_netdev_ops); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 07f67ced962a..005faea415a4 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -239,7 +239,7 @@ static struct { /** * icmp_global_allow - Are we allowed to send one more ICMP message ? * - * Uses a token bucket to limit our ICMP messages to sysctl_icmp_msgs_per_sec. + * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec. * Returns false if we reached the limit and can not send another packet. * Note: called with BH disabled */ @@ -267,7 +267,10 @@ bool icmp_global_allow(void) } credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); if (credit) { - credit--; + /* We want to use a credit of one in average, but need to randomize + * it for security reasons. + */ + credit = max_t(int, credit - prandom_u32_max(3), 0); rc = true; } WRITE_ONCE(icmp_global.credit, credit); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 8c0f17c6863c..0dc43ad28eb9 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -845,7 +845,7 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, remove_nh_grp_entry(net, nhge, nlinfo); /* make sure all see the newly published array before releasing rtnl */ - synchronize_rcu(); + synchronize_net(); } static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67f10d3ec240..fc445833b5e5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5827,6 +5827,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) tcp_data_snd_check(sk); if (!inet_csk_ack_scheduled(sk)) goto no_ack; + } else { + tcp_update_wl(tp, TCP_SKB_CB(skb)->seq); } __tcp_ack_snd_check(sk, 0); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index fed9666a2f7d..054d287eb13d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -355,6 +355,7 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, ipv6_hdr(skb)->payload_len = htons(payload_len); ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ if (skb->ip_summed == CHECKSUM_COMPLETE) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 2def85718d94..ef59e25dc482 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -300,5 +300,6 @@ static void __exit mpls_iptunnel_exit(void) module_exit(mpls_iptunnel_exit); MODULE_ALIAS_RTNL_LWT(MPLS); +MODULE_SOFTDEP("post: mpls_gso"); MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels"); MODULE_LICENSE("GPL v2"); diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 698bc3525160..a014149aa323 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -19,14 +19,11 @@ config INET_MPTCP_DIAG config MPTCP_IPV6 bool "MPTCP: IPv6 support for Multipath TCP" - select IPV6 + depends on IPV6=y default y -endif - config MPTCP_KUNIT_TESTS tristate "This builds the MPTCP KUnit tests" if !KUNIT_ALL_TESTS - select MPTCP depends on KUNIT default KUNIT_ALL_TESTS help @@ -39,3 +36,4 @@ config MPTCP_KUNIT_TESTS If unsure, say N. +endif diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 092a2d48bfd3..a044dd43411d 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -241,7 +241,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, } mp_opt->add_addr = 1; - mp_opt->port = 0; mp_opt->addr_id = *ptr++; pr_debug("ADD_ADDR: id=%d, echo=%d", mp_opt->addr_id, mp_opt->echo); if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { @@ -297,6 +296,8 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->mp_capable = 0; mp_opt->mp_join = 0; mp_opt->add_addr = 0; + mp_opt->ahmac = 0; + mp_opt->port = 0; mp_opt->rm_addr = 0; mp_opt->dss = 0; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index dc2e7da2742a..7da51390cea6 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -539,8 +539,8 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, if (new_state != cp->state) { struct ip_vs_dest *dest = cp->dest; - IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" - "%s:%d state: %s->%s conn->refcnt:%d\n", + IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] c:%s:%d v:%s:%d " + "d:%s:%d state: %s->%s conn->refcnt:%d\n", pd->pp->name, ((state_off == TCP_DIR_OUTPUT) ? "output " : "input "), @@ -548,10 +548,12 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, th->fin ? 'F' : '.', th->ack ? 'A' : '.', th->rst ? 'R' : '.', - IP_VS_DBG_ADDR(cp->daf, &cp->daddr), - ntohs(cp->dport), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->daf, &cp->daddr), + ntohs(cp->dport), tcp_state_name(cp->state), tcp_state_name(new_state), refcount_read(&cp->refcnt)); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e8c86ee4c1c4..c8fb2187ad4b 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -541,13 +541,20 @@ static bool tcp_in_window(const struct nf_conn *ct, swin = win << sender->td_scale; sender->td_maxwin = (swin == 0 ? 1 : swin); sender->td_maxend = end + sender->td_maxwin; - /* - * We haven't seen traffic in the other direction yet - * but we have to tweak window tracking to pass III - * and IV until that happens. - */ - if (receiver->td_maxwin == 0) + if (receiver->td_maxwin == 0) { + /* We haven't seen traffic in the other + * direction yet but we have to tweak window + * tracking to pass III and IV until that + * happens. + */ receiver->td_end = receiver->td_maxend = sack; + } else if (sack == receiver->td_end + 1) { + /* Likely a reply to a keepalive. + * Needed for III. + */ + receiver->td_end++; + } + } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 2b01a151eaa8..a579e59ee5c5 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -19,6 +19,7 @@ static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) skb_push(skb, skb->mac_len); skb->dev = dev; + skb->tstamp = 0; dev_queue_xmit(skb); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9957e0ed8658..65cb8e3c13d9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -302,7 +302,7 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_expr *expr; expr = nft_expr_first(rule); - while (expr != nft_expr_last(rule) && expr->ops) { + while (nft_expr_more(rule, expr)) { if (expr->ops->activate) expr->ops->activate(ctx, expr); @@ -317,7 +317,7 @@ static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_expr *expr; expr = nft_expr_first(rule); - while (expr != nft_expr_last(rule) && expr->ops) { + while (nft_expr_more(rule, expr)) { if (expr->ops->deactivate) expr->ops->deactivate(ctx, expr, phase); @@ -3080,7 +3080,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, * is called on error from nf_tables_newrule(). */ expr = nft_expr_first(rule); - while (expr != nft_expr_last(rule) && expr->ops) { + while (nft_expr_more(rule, expr)) { next = nft_expr_next(expr); nf_tables_expr_destroy(ctx, expr); expr = next; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 7c7e06624dc3..9f625724a20f 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -37,7 +37,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, struct nft_expr *expr; expr = nft_expr_first(rule); - while (expr->ops && expr != nft_expr_last(rule)) { + while (nft_expr_more(rule, expr)) { if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION) num_actions++; @@ -61,7 +61,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, ctx->net = net; ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; - while (expr->ops && expr != nft_expr_last(rule)) { + while (nft_expr_more(rule, expr)) { if (!expr->ops->offload) { err = -EOPNOTSUPP; goto err_out; diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 3087e23297db..b77985986b24 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -138,6 +138,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, return; skb->dev = dev; + skb->tstamp = 0; neigh_xmit(neigh_table, dev, addr, skb); out: regs->verdict.code = verdict; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index e894254c17d4..8709f3d4e7c4 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1217,7 +1217,7 @@ static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) u32 idx; char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1]; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_FIRMWARE_NAME]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 87c286ad660e..f3486a37361a 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -175,7 +175,7 @@ static struct table_instance *table_instance_alloc(int new_size) static void __mask_array_destroy(struct mask_array *ma) { - free_percpu(ma->masks_usage_cntr); + free_percpu(ma->masks_usage_stats); kfree(ma); } @@ -199,15 +199,15 @@ static void tbl_mask_array_reset_counters(struct mask_array *ma) ma->masks_usage_zero_cntr[i] = 0; for_each_possible_cpu(cpu) { - u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr, - cpu); + struct mask_array_stats *stats; unsigned int start; u64 counter; + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); do { - start = u64_stats_fetch_begin_irq(&ma->syncp); - counter = usage_counters[i]; - } while (u64_stats_fetch_retry_irq(&ma->syncp, start)); + start = u64_stats_fetch_begin_irq(&stats->syncp); + counter = stats->usage_cntrs[i]; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ma->masks_usage_zero_cntr[i] += counter; } @@ -230,9 +230,10 @@ static struct mask_array *tbl_mask_array_alloc(int size) sizeof(struct sw_flow_mask *) * size); - new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size, - __alignof__(u64)); - if (!new->masks_usage_cntr) { + new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) + + sizeof(u64) * size, + __alignof__(u64)); + if (!new->masks_usage_stats) { kfree(new); return NULL; } @@ -722,6 +723,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, /* Flow lookup does full lookup on flow table. It starts with * mask from index passed in *index. + * This function MUST be called with BH disabled due to the use + * of CPU specific variables. */ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct table_instance *ti, @@ -731,7 +734,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, u32 *n_cache_hit, u32 *index) { - u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr); + struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats); struct sw_flow *flow; struct sw_flow_mask *mask; int i; @@ -741,9 +744,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, if (mask) { flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { - u64_stats_update_begin(&ma->syncp); - usage_counters[*index]++; - u64_stats_update_end(&ma->syncp); + u64_stats_update_begin(&stats->syncp); + stats->usage_cntrs[*index]++; + u64_stats_update_end(&stats->syncp); (*n_cache_hit)++; return flow; } @@ -762,9 +765,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { /* Found */ *index = i; - u64_stats_update_begin(&ma->syncp); - usage_counters[*index]++; - u64_stats_update_end(&ma->syncp); + u64_stats_update_begin(&stats->syncp); + stats->usage_cntrs[*index]++; + u64_stats_update_end(&stats->syncp); return flow; } } @@ -850,9 +853,17 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); u32 __always_unused n_mask_hit; u32 __always_unused n_cache_hit; + struct sw_flow *flow; u32 index = 0; - return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index); + /* This function gets called trough the netlink interface and therefore + * is preemptible. However, flow_lookup() function needs to be called + * with BH disabled due to CPU specific variables. + */ + local_bh_disable(); + flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index); + local_bh_enable(); + return flow; } struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, @@ -1109,7 +1120,6 @@ void ovs_flow_masks_rebalance(struct flow_table *table) for (i = 0; i < ma->max; i++) { struct sw_flow_mask *mask; - unsigned int start; int cpu; mask = rcu_dereference_ovsl(ma->masks[i]); @@ -1120,14 +1130,16 @@ void ovs_flow_masks_rebalance(struct flow_table *table) masks_and_count[i].counter = 0; for_each_possible_cpu(cpu) { - u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr, - cpu); + struct mask_array_stats *stats; + unsigned int start; u64 counter; + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); do { - start = u64_stats_fetch_begin_irq(&ma->syncp); - counter = usage_counters[i]; - } while (u64_stats_fetch_retry_irq(&ma->syncp, start)); + start = u64_stats_fetch_begin_irq(&stats->syncp); + counter = stats->usage_cntrs[i]; + } while (u64_stats_fetch_retry_irq(&stats->syncp, + start)); masks_and_count[i].counter += counter; } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index d8fb7a3a3dfd..9e659db78c05 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -38,12 +38,16 @@ struct mask_count { u64 counter; }; +struct mask_array_stats { + struct u64_stats_sync syncp; + u64 usage_cntrs[]; +}; + struct mask_array { struct rcu_head rcu; int count, max; - u64 __percpu *masks_usage_cntr; + struct mask_array_stats __percpu *masks_usage_stats; u64 *masks_usage_zero_cntr; - struct u64_stats_sync syncp; struct sw_flow_mask __rcu *masks[]; }; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9c79fb92c2da..aba3cd85f284 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -156,11 +156,11 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple, __be16 target_dst = target.dst.u.udp.port; if (target_src != tuple->src.u.udp.port) - tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_UDP, offsetof(struct udphdr, source), 0xFFFF, be16_to_cpu(target_src)); if (target_dst != tuple->dst.u.udp.port) - tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_UDP, offsetof(struct udphdr, dest), 0xFFFF, be16_to_cpu(target_dst)); } diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index a229751ee8c4..85c0d0d5b9da 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -459,7 +459,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port, 0, flags, - key_id, 0); + key_id, opts_len); } else { NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst"); ret = -EINVAL; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 41a55c6cbeb8..faeabff283a2 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3712,7 +3712,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->gate.num_entries = tcf_gate_num_entries(act); err = tcf_gate_get_entries(entry, act); if (err) - goto err_out; + goto err_out_locked; } else { err = -EOPNOTSUPP; goto err_out_locked; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 258b04372f85..bd4678db9d76 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1147,9 +1147,9 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp, struct gssp_in_token *in_token) { struct kvec *argv = &rqstp->rq_arg.head[0]; - unsigned int page_base, length; - int pages, i, res; - size_t inlen; + unsigned int length, pgto_offs, pgfrom_offs; + int pages, i, res, pgto, pgfrom; + size_t inlen, to_offs, from_offs; res = gss_read_common_verf(gc, argv, authp, in_handle); if (res) @@ -1177,17 +1177,24 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp, memcpy(page_address(in_token->pages[0]), argv->iov_base, length); inlen -= length; - i = 1; - page_base = rqstp->rq_arg.page_base; + to_offs = length; + from_offs = rqstp->rq_arg.page_base; while (inlen) { - length = min_t(unsigned int, inlen, PAGE_SIZE); - memcpy(page_address(in_token->pages[i]), - page_address(rqstp->rq_arg.pages[i]) + page_base, + pgto = to_offs >> PAGE_SHIFT; + pgfrom = from_offs >> PAGE_SHIFT; + pgto_offs = to_offs & ~PAGE_MASK; + pgfrom_offs = from_offs & ~PAGE_MASK; + + length = min_t(unsigned int, inlen, + min_t(unsigned int, PAGE_SIZE - pgto_offs, + PAGE_SIZE - pgfrom_offs)); + memcpy(page_address(in_token->pages[pgto]) + pgto_offs, + page_address(rqstp->rq_arg.pages[pgfrom]) + pgfrom_offs, length); + to_offs += length; + from_offs += length; inlen -= length; - page_base = 0; - i++; } return 0; } diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 195b40c5dae4..22a2c235abf1 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -5,7 +5,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -111,7 +111,7 @@ out_free: * by the backchannel. This function can be called multiple times * when creating new sessions that use the same rpc_xprt. The * preallocated buffers are added to the pool of resources used by - * the rpc_xprt. Anyone of these resources may be used used by an + * the rpc_xprt. Any one of these resources may be used by an * incoming callback request. It's up to the higher levels in the * stack to enforce that the maximum number of session slots is not * being exceeded. diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index baef5ee43dbb..20c93b68505e 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -498,16 +498,17 @@ static int cache_clean(void) */ static void do_cache_clean(struct work_struct *work) { - int delay = 5; - if (cache_clean() == -1) - delay = round_jiffies_relative(30*HZ); + int delay; if (list_empty(&cache_list)) - delay = 0; + return; + + if (cache_clean() == -1) + delay = round_jiffies_relative(30*HZ); + else + delay = 5; - if (delay) - queue_delayed_work(system_power_efficient_wq, - &cache_cleaner, delay); + queue_delayed_work(system_power_efficient_wq, &cache_cleaner, delay); } @@ -908,7 +909,7 @@ static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, static ssize_t cache_slow_downcall(const char __user *buf, size_t count, struct cache_detail *cd) { - static char write_buf[8192]; /* protected by queue_io_mutex */ + static char write_buf[32768]; /* protected by queue_io_mutex */ ssize_t ret = -EINVAL; if (count >= sizeof(write_buf)) @@ -1436,10 +1437,10 @@ static int c_show(struct seq_file *m, void *p) cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ - seq_printf(m, "# "); + seq_puts(m, "# "); else { if (cache_is_expired(cd, cp)) - seq_printf(m, "# "); + seq_puts(m, "# "); cache_put(cp, cd); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 62e0b6c1e8cf..3259120462ed 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -47,10 +47,6 @@ # define RPCDBG_FACILITY RPCDBG_CALL #endif -#define dprint_status(t) \ - dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \ - __func__, t->tk_status) - /* * All RPC clients are linked into this list */ @@ -1639,10 +1635,6 @@ call_start(struct rpc_task *task) int idx = task->tk_msg.rpc_proc->p_statidx; trace_rpc_request(task); - dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, - clnt->cl_program->name, clnt->cl_vers, - rpc_proc_name(task), - (RPC_IS_ASYNC(task) ? "async" : "sync")); /* Increment call count (version might not be valid for ping) */ if (clnt->cl_program->version[clnt->cl_vers]) @@ -1658,8 +1650,6 @@ call_start(struct rpc_task *task) static void call_reserve(struct rpc_task *task) { - dprint_status(task); - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); @@ -1675,8 +1665,6 @@ call_reserveresult(struct rpc_task *task) { int status = task->tk_status; - dprint_status(task); - /* * After a call to xprt_reserve(), we must have either * a request slot or else an error status. @@ -1717,8 +1705,6 @@ call_reserveresult(struct rpc_task *task) static void call_retry_reserve(struct rpc_task *task) { - dprint_status(task); - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); @@ -1730,8 +1716,6 @@ call_retry_reserve(struct rpc_task *task) static void call_refresh(struct rpc_task *task) { - dprint_status(task); - task->tk_action = call_refreshresult; task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; @@ -1746,8 +1730,6 @@ call_refreshresult(struct rpc_task *task) { int status = task->tk_status; - dprint_status(task); - task->tk_status = 0; task->tk_action = call_refresh; switch (status) { @@ -1770,12 +1752,10 @@ call_refreshresult(struct rpc_task *task) if (!task->tk_cred_retry) break; task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry refresh creds\n", - task->tk_pid, __func__); + trace_rpc_retry_refresh_status(task); return; } - dprintk("RPC: %5u %s: refresh creds failed with error %d\n", - task->tk_pid, __func__, status); + trace_rpc_refresh_status(task); rpc_call_rpcerror(task, status); } @@ -1792,8 +1772,6 @@ call_allocate(struct rpc_task *task) const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; int status; - dprint_status(task); - task->tk_status = 0; task->tk_action = call_encode; @@ -1823,6 +1801,7 @@ call_allocate(struct rpc_task *task) req->rq_rcvsize <<= 2; status = xprt->ops->buf_alloc(task); + trace_rpc_buf_alloc(task, status); xprt_inject_disconnect(xprt); if (status == 0) return; @@ -1831,8 +1810,6 @@ call_allocate(struct rpc_task *task) return; } - dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); - if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) { task->tk_action = call_allocate; rpc_delay(task, HZ>>4); @@ -1883,7 +1860,7 @@ call_encode(struct rpc_task *task) { if (!rpc_task_need_encode(task)) goto out; - dprint_status(task); + /* Dequeue task from the receive queue while we're encoding */ xprt_request_dequeue_xprt(task); /* Encode here so that rpcsec_gss can use correct sequence number. */ @@ -1902,8 +1879,7 @@ call_encode(struct rpc_task *task) } else { task->tk_action = call_refresh; task->tk_cred_retry--; - dprintk("RPC: %5u %s: retry refresh creds\n", - task->tk_pid, __func__); + trace_rpc_retry_refresh_status(task); } break; default: @@ -1960,8 +1936,6 @@ call_bind(struct rpc_task *task) return; } - dprint_status(task); - task->tk_action = call_bind_status; if (!xprt_prepare_transmit(task)) return; @@ -1983,8 +1957,6 @@ call_bind_status(struct rpc_task *task) return; } - dprint_status(task); - trace_rpc_bind_status(task); if (task->tk_status >= 0) goto out_next; if (xprt_bound(xprt)) { @@ -1994,12 +1966,10 @@ call_bind_status(struct rpc_task *task) switch (task->tk_status) { case -ENOMEM: - dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); rpc_delay(task, HZ >> 2); goto retry_timeout; case -EACCES: - dprintk("RPC: %5u remote rpcbind: RPC program/version " - "unavailable\n", task->tk_pid); + trace_rpcb_prog_unavail_err(task); /* fail immediately if this is an RPC ping */ if (task->tk_msg.rpc_proc->p_proc == 0) { status = -EOPNOTSUPP; @@ -2016,17 +1986,14 @@ call_bind_status(struct rpc_task *task) case -EAGAIN: goto retry_timeout; case -ETIMEDOUT: - dprintk("RPC: %5u rpcbind request timed out\n", - task->tk_pid); + trace_rpcb_timeout_err(task); goto retry_timeout; case -EPFNOSUPPORT: /* server doesn't support any rpcbind version we know of */ - dprintk("RPC: %5u unrecognized remote rpcbind service\n", - task->tk_pid); + trace_rpcb_bind_version_err(task); break; case -EPROTONOSUPPORT: - dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", - task->tk_pid); + trace_rpcb_bind_version_err(task); goto retry_timeout; case -ECONNREFUSED: /* connection problems */ case -ECONNRESET: @@ -2037,8 +2004,7 @@ call_bind_status(struct rpc_task *task) case -EHOSTUNREACH: case -ENETUNREACH: case -EPIPE: - dprintk("RPC: %5u remote rpcbind unreachable: %d\n", - task->tk_pid, task->tk_status); + trace_rpcb_unreachable_err(task); if (!RPC_IS_SOFTCONN(task)) { rpc_delay(task, 5*HZ); goto retry_timeout; @@ -2046,8 +2012,7 @@ call_bind_status(struct rpc_task *task) status = task->tk_status; break; default: - dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", - task->tk_pid, -task->tk_status); + trace_rpcb_unrecognized_err(task); } rpc_call_rpcerror(task, status); @@ -2079,10 +2044,6 @@ call_connect(struct rpc_task *task) return; } - dprintk("RPC: %5u call_connect xprt %p %s connected\n", - task->tk_pid, xprt, - (xprt_connected(xprt) ? "is" : "is not")); - task->tk_action = call_connect_status; if (task->tk_status < 0) return; @@ -2110,7 +2071,6 @@ call_connect_status(struct rpc_task *task) return; } - dprint_status(task); trace_rpc_connect_status(task); if (task->tk_status == 0) { @@ -2178,8 +2138,6 @@ call_transmit(struct rpc_task *task) return; } - dprint_status(task); - task->tk_action = call_transmit_status; if (!xprt_prepare_transmit(task)) return; @@ -2214,7 +2172,6 @@ call_transmit_status(struct rpc_task *task) switch (task->tk_status) { default: - dprint_status(task); break; case -EBADMSG: task->tk_status = 0; @@ -2296,8 +2253,6 @@ call_bc_transmit_status(struct rpc_task *task) if (rpc_task_transmitted(task)) task->tk_status = 0; - dprint_status(task); - switch (task->tk_status) { case 0: /* Success */ @@ -2357,8 +2312,6 @@ call_status(struct rpc_task *task) if (!task->tk_msg.rpc_proc->p_proc) trace_xprt_ping(task->tk_xprt, task->tk_status); - dprint_status(task); - status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; @@ -2405,7 +2358,8 @@ call_status(struct rpc_task *task) goto out_exit; } task->tk_action = call_encode; - rpc_check_timeout(task); + if (status != -ECONNRESET && status != -ECONNABORTED) + rpc_check_timeout(task); return; out_exit: rpc_call_rpcerror(task, status); @@ -2433,7 +2387,7 @@ rpc_check_timeout(struct rpc_task *task) if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; - dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); + trace_rpc_timeout_status(task); task->tk_timeouts++; if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) { @@ -2492,8 +2446,6 @@ call_decode(struct rpc_task *task) struct xdr_stream xdr; int err; - dprint_status(task); - if (!task->tk_msg.rpc_proc->p_decode) { task->tk_action = rpc_exit_task; return; @@ -2537,8 +2489,6 @@ out: case 0: task->tk_action = rpc_exit_task; task->tk_status = rpcauth_unwrap_resp(task, &xdr); - dprintk("RPC: %5u %s result %d\n", - task->tk_pid, __func__, task->tk_status); return; case -EAGAIN: task->tk_status = 0; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 4a67685c83eb..38fe2ce8a5aa 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -31,11 +31,9 @@ #include <linux/sunrpc/sched.h> #include <linux/sunrpc/xprtsock.h> -#include "netns.h" +#include <trace/events/sunrpc.h> -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_BIND -#endif +#include "netns.h" #define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" @@ -216,10 +214,6 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; - dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " - "%p, rpcb_local_clnt4: %p) for net %x%s\n", - sn->rpcb_local_clnt, sn->rpcb_local_clnt4, - net->ns.inum, (net == &init_net) ? " (init_net)" : ""); } /* @@ -261,19 +255,13 @@ static int rpcb_create_local_unix(struct net *net) */ clnt = rpc_create(&args); if (IS_ERR(clnt)) { - dprintk("RPC: failed to create AF_LOCAL rpcbind " - "client (errno %ld).\n", PTR_ERR(clnt)); result = PTR_ERR(clnt); goto out; } clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); - if (IS_ERR(clnt4)) { - dprintk("RPC: failed to bind second program to " - "rpcbind v4 client (errno %ld).\n", - PTR_ERR(clnt4)); + if (IS_ERR(clnt4)) clnt4 = NULL; - } rpcb_set_local(net, clnt, clnt4, true); @@ -309,8 +297,6 @@ static int rpcb_create_local_net(struct net *net) clnt = rpc_create(&args); if (IS_ERR(clnt)) { - dprintk("RPC: failed to create local rpcbind " - "client (errno %ld).\n", PTR_ERR(clnt)); result = PTR_ERR(clnt); goto out; } @@ -321,12 +307,8 @@ static int rpcb_create_local_net(struct net *net) * v4 upcalls. */ clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); - if (IS_ERR(clnt4)) { - dprintk("RPC: failed to bind second program to " - "rpcbind v4 client (errno %ld).\n", - PTR_ERR(clnt4)); + if (IS_ERR(clnt4)) clnt4 = NULL; - } rpcb_set_local(net, clnt, clnt4, false); @@ -403,11 +385,8 @@ static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, stru msg->rpc_resp = &result; error = rpc_call_sync(clnt, msg, flags); - if (error < 0) { - dprintk("RPC: failed to contact local rpcbind " - "server (errno %d).\n", -error); + if (error < 0) return error; - } if (!result) return -EACCES; @@ -461,9 +440,7 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); bool is_set = false; - dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " - "rpcbind\n", (port ? "" : "un"), - prog, vers, prot, port); + trace_pmap_register(prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; if (port != 0) { @@ -489,11 +466,6 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); - dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " - "local rpcbind\n", (port ? "" : "un"), - map->r_prog, map->r_vers, - map->r_addr, map->r_netid); - msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; @@ -520,11 +492,6 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); - dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " - "local rpcbind\n", (port ? "" : "un"), - map->r_prog, map->r_vers, - map->r_addr, map->r_netid); - msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; @@ -541,9 +508,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, { struct rpcbind_args *map = msg->rpc_argp; - dprintk("RPC: unregistering [%u, %u, '%s'] with " - "local rpcbind\n", - map->r_prog, map->r_vers, map->r_netid); + trace_rpcb_unregister(map->r_prog, map->r_vers, map->r_netid); map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; @@ -615,6 +580,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version, if (address == NULL) return rpcb_unregister_all_protofamilies(sn, &msg); + trace_rpcb_register(map.r_prog, map.r_vers, map.r_addr, map.r_netid); + switch (address->sa_family) { case AF_INET: return rpcb_register_inet4(sn, address, &msg); @@ -693,18 +660,12 @@ void rpcb_getport_async(struct rpc_task *task) rcu_read_unlock(); xprt = xprt_get(task->tk_xprt); - dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", - task->tk_pid, __func__, - xprt->servername, clnt->cl_prog, clnt->cl_vers, xprt->prot); - /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ rpc_sleep_on_timeout(&xprt->binding, task, NULL, jiffies + xprt->bind_timeout); if (xprt_test_and_set_binding(xprt)) { - dprintk("RPC: %5u %s: waiting for another binder\n", - task->tk_pid, __func__); xprt_put(xprt); return; } @@ -712,8 +673,6 @@ void rpcb_getport_async(struct rpc_task *task) /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { status = 0; - dprintk("RPC: %5u %s: already bound\n", - task->tk_pid, __func__); goto bailout_nofree; } @@ -732,20 +691,15 @@ void rpcb_getport_async(struct rpc_task *task) break; default: status = -EAFNOSUPPORT; - dprintk("RPC: %5u %s: bad address family\n", - task->tk_pid, __func__); goto bailout_nofree; } if (proc == NULL) { xprt->bind_index = 0; status = -EPFNOSUPPORT; - dprintk("RPC: %5u %s: no more getport versions available\n", - task->tk_pid, __func__); goto bailout_nofree; } - dprintk("RPC: %5u %s: trying rpcbind version %u\n", - task->tk_pid, __func__, bind_version); + trace_rpcb_getport(clnt, task, bind_version); rpcb_clnt = rpcb_create(xprt->xprt_net, clnt->cl_nodename, @@ -754,16 +708,12 @@ void rpcb_getport_async(struct rpc_task *task) clnt->cl_cred); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __func__, PTR_ERR(rpcb_clnt)); goto bailout_nofree; } map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS); if (!map) { status = -ENOMEM; - dprintk("RPC: %5u %s: no memory available\n", - task->tk_pid, __func__); goto bailout_release_client; } map->r_prog = clnt->cl_prog; @@ -780,8 +730,6 @@ void rpcb_getport_async(struct rpc_task *task) map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS); if (!map->r_addr) { status = -ENOMEM; - dprintk("RPC: %5u %s: no memory available\n", - task->tk_pid, __func__); goto bailout_free_args; } map->r_owner = ""; @@ -818,34 +766,33 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) { struct rpcbind_args *map = data; struct rpc_xprt *xprt = map->r_xprt; - int status = child->tk_status; + + map->r_status = child->tk_status; /* Garbage reply: retry with a lesser rpcbind version */ - if (status == -EIO) - status = -EPROTONOSUPPORT; + if (map->r_status == -EIO) + map->r_status = -EPROTONOSUPPORT; /* rpcbind server doesn't support this rpcbind protocol version */ - if (status == -EPROTONOSUPPORT) + if (map->r_status == -EPROTONOSUPPORT) xprt->bind_index++; - if (status < 0) { + if (map->r_status < 0) { /* rpcbind server not available on remote host? */ - xprt->ops->set_port(xprt, 0); + map->r_port = 0; + } else if (map->r_port == 0) { /* Requested RPC service wasn't registered on remote host */ - xprt->ops->set_port(xprt, 0); - status = -EACCES; + map->r_status = -EACCES; } else { /* Succeeded */ - xprt->ops->set_port(xprt, map->r_port); - xprt_set_bound(xprt); - status = 0; + map->r_status = 0; } - dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", - child->tk_pid, status, map->r_port); - - map->r_status = status; + trace_rpcb_setport(child, map->r_status, map->r_port); + xprt->ops->set_port(xprt, map->r_port); + if (map->r_port) + xprt_set_bound(xprt); } /* @@ -858,11 +805,6 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb = data; __be32 *p; - dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); - p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p++ = cpu_to_be32(rpcb->r_vers); @@ -884,8 +826,6 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr, return -EIO; port = be32_to_cpup(p); - dprintk("RPC: %5u PMAP_%s result: %lu\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, port); if (unlikely(port > USHRT_MAX)) return -EIO; @@ -906,11 +846,6 @@ static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr, *boolp = 0; if (*p != xdr_zero) *boolp = 1; - - dprintk("RPC: %5u RPCB_%s call %s\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - (*boolp ? "succeeded" : "failed")); return 0; } @@ -935,12 +870,6 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, const struct rpcbind_args *rpcb = data; __be32 *p; - dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, - rpcb->r_prog, rpcb->r_vers, - rpcb->r_netid, rpcb->r_addr); - p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2); *p++ = cpu_to_be32(rpcb->r_prog); *p = cpu_to_be32(rpcb->r_vers); @@ -970,11 +899,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, * If the returned universal address is a null string, * the requested RPC service was not registered. */ - if (len == 0) { - dprintk("RPC: %5u RPCB reply: program not registered\n", - req->rq_task->tk_pid); + if (len == 0) return 0; - } if (unlikely(len > RPCBIND_MAXUADDRLEN)) goto out_fail; @@ -982,8 +908,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) goto out_fail; - dprintk("RPC: %5u RPCB_%s reply: %*pE\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, len, (char *)p); if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len, sap, sizeof(address)) == 0) @@ -993,9 +917,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, return 0; out_fail: - dprintk("RPC: %5u malformed RPCB_%s reply\n", - req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name); return -EIO; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 7eba20a88438..f06d7c315017 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -27,10 +27,6 @@ #include "sunrpc.h" -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -#define RPCDBG_FACILITY RPCDBG_SCHED -#endif - #define CREATE_TRACE_POINTS #include <trace/events/sunrpc.h> @@ -85,7 +81,6 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (list_empty(&task->u.tk_wait.timer_list)) return; - dprintk("RPC: %5u disabling timer\n", task->tk_pid); task->tk_timeout = 0; list_del(&task->u.tk_wait.timer_list); if (list_empty(&queue->timer_list.list)) @@ -111,9 +106,6 @@ static void __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task, unsigned long timeout) { - dprintk("RPC: %5u setting alarm for %u ms\n", - task->tk_pid, jiffies_to_msecs(timeout - jiffies)); - task->tk_timeout = timeout; if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires)) rpc_set_queue_timer(queue, timeout); @@ -216,9 +208,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, /* barrier matches the read in rpc_wake_up_task_queue_locked() */ smp_wmb(); rpc_set_queued(task); - - dprintk("RPC: %5u added to queue %p \"%s\"\n", - task->tk_pid, queue, rpc_qname(queue)); } /* @@ -241,8 +230,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas else list_del(&task->u.tk_wait.list); queue->qlen--; - dprintk("RPC: %5u removed from queue %p \"%s\"\n", - task->tk_pid, queue, rpc_qname(queue)); } static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) @@ -382,13 +369,9 @@ static void __rpc_do_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, unsigned char queue_priority) { - dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", - task->tk_pid, rpc_qname(q), jiffies); - trace_rpc_task_sleep(task, q); __rpc_add_wait_queue(q, task, queue_priority); - } static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, @@ -510,9 +493,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, struct rpc_wait_queue *queue, struct rpc_task *task) { - dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", - task->tk_pid, jiffies); - /* Has the task been executed yet? If not, we cannot wake it up! */ if (!RPC_IS_ACTIVATED(task)) { printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); @@ -524,8 +504,6 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq, __rpc_remove_wait_queue(queue, task); rpc_make_runnable(wq, task); - - dprintk("RPC: __rpc_wake_up_task done\n"); } /* @@ -663,8 +641,6 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_first(%p \"%s\")\n", - queue, rpc_qname(queue)); spin_lock(&queue->lock); task = __rpc_find_next_queued(queue); if (task != NULL) @@ -770,7 +746,7 @@ static void __rpc_queue_timer_fn(struct work_struct *work) list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { timeo = task->tk_timeout; if (time_after_eq(now, timeo)) { - dprintk("RPC: %5u timeout\n", task->tk_pid); + trace_rpc_task_timeout(task, task->tk_action); task->tk_status = -ETIMEDOUT; rpc_wake_up_task_queue_locked(queue, task); continue; @@ -885,9 +861,6 @@ static void __rpc_execute(struct rpc_task *task) int task_is_async = RPC_IS_ASYNC(task); int status = 0; - dprintk("RPC: %5u __rpc_execute flags=0x%x\n", - task->tk_pid, task->tk_flags); - WARN_ON_ONCE(RPC_IS_QUEUED(task)); if (RPC_IS_QUEUED(task)) return; @@ -947,7 +920,7 @@ static void __rpc_execute(struct rpc_task *task) return; /* sync task: sleep here */ - dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid); + trace_rpc_task_sync_sleep(task, task->tk_action); status = out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_QUEUED, rpc_wait_bit_killable, TASK_KILLABLE); @@ -963,11 +936,9 @@ static void __rpc_execute(struct rpc_task *task) task->tk_rpc_status = -ERESTARTSYS; rpc_exit(task, -ERESTARTSYS); } - dprintk("RPC: %5u sync task resuming\n", task->tk_pid); + trace_rpc_task_sync_wake(task, task->tk_action); } - dprintk("RPC: %5u return %d, status %d\n", task->tk_pid, status, - task->tk_status); /* Release all resources associated with the task */ rpc_release_task(task); } @@ -1036,8 +1007,6 @@ int rpc_malloc(struct rpc_task *task) return -ENOMEM; buf->len = size; - dprintk("RPC: %5u allocated buffer of size %zu at %p\n", - task->tk_pid, size, buf); rqst->rq_buffer = buf->data; rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; return 0; @@ -1058,9 +1027,6 @@ void rpc_free(struct rpc_task *task) buf = container_of(buffer, struct rpc_buffer, data); size = buf->len; - dprintk("RPC: freeing buffer of size %zu at %p\n", - size, buf); - if (size <= RPC_BUFFER_MAXSIZE) mempool_free(buf, rpc_buffer_mempool); else @@ -1095,9 +1061,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_action = rpc_prepare_task; rpc_init_task_statistics(task); - - dprintk("RPC: new task initialized, procpid %u\n", - task_pid_nr(current)); } static struct rpc_task * @@ -1121,7 +1084,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) rpc_init_task(task, setup_data); task->tk_flags |= flags; - dprintk("RPC: allocated task %p\n", task); return task; } @@ -1151,10 +1113,8 @@ static void rpc_free_task(struct rpc_task *task) put_rpccred(task->tk_op_cred); rpc_release_calldata(task->tk_ops, task->tk_calldata); - if (tk_flags & RPC_TASK_DYNAMIC) { - dprintk("RPC: %5u freeing task\n", task->tk_pid); + if (tk_flags & RPC_TASK_DYNAMIC) mempool_free(task, rpc_task_mempool); - } } static void rpc_async_release(struct work_struct *work) @@ -1208,8 +1168,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task_async); static void rpc_release_task(struct rpc_task *task) { - dprintk("RPC: %5u release task\n", task->tk_pid); - WARN_ON_ONCE(RPC_IS_QUEUED(task)); rpc_release_resources_task(task); @@ -1250,7 +1208,6 @@ static int rpciod_start(void) /* * Create the rpciod thread and wait for it to start. */ - dprintk("RPC: creating workqueue rpciod\n"); wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!wq) goto out_failed; @@ -1275,7 +1232,6 @@ static void rpciod_stop(void) if (rpciod_workqueue == NULL) return; - dprintk("RPC: destroying workqueue rpciod\n"); wq = rpciod_workqueue; rpciod_workqueue = NULL; diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index f6fe2e6cd65a..2f59464e6524 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -4,7 +4,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 6c86e2a7d942..a18b36b5422d 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -70,7 +70,13 @@ static int proc_do_xprt(struct ctl_table *table, int write, return 0; } len = svc_print_xprts(tmpbuf, sizeof(tmpbuf)); - return memory_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len); + *lenp = memory_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len); + + if (*lenp < 0) { + *lenp = 0; + return -EINVAL; + } + return 0; } static int diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index be11d672b5b9..71e03b930b70 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -19,6 +19,9 @@ #include <linux/bvec.h> #include <trace/events/sunrpc.h> +static void _copy_to_pages(struct page **, size_t, const char *, size_t); + + /* * XDR functions for basic NFS types */ @@ -202,6 +205,88 @@ EXPORT_SYMBOL_GPL(xdr_inline_pages); */ /** + * _shift_data_left_pages + * @pages: vector of pages containing both the source and dest memory area. + * @pgto_base: page vector address of destination + * @pgfrom_base: page vector address of source + * @len: number of bytes to copy + * + * Note: the addresses pgto_base and pgfrom_base are both calculated in + * the same way: + * if a memory area starts at byte 'base' in page 'pages[i]', + * then its address is given as (i << PAGE_CACHE_SHIFT) + base + * Alse note: pgto_base must be < pgfrom_base, but the memory areas + * they point to may overlap. + */ +static void +_shift_data_left_pages(struct page **pages, size_t pgto_base, + size_t pgfrom_base, size_t len) +{ + struct page **pgfrom, **pgto; + char *vfrom, *vto; + size_t copy; + + BUG_ON(pgfrom_base <= pgto_base); + + pgto = pages + (pgto_base >> PAGE_SHIFT); + pgfrom = pages + (pgfrom_base >> PAGE_SHIFT); + + pgto_base &= ~PAGE_MASK; + pgfrom_base &= ~PAGE_MASK; + + do { + if (pgto_base >= PAGE_SIZE) { + pgto_base = 0; + pgto++; + } + if (pgfrom_base >= PAGE_SIZE){ + pgfrom_base = 0; + pgfrom++; + } + + copy = len; + if (copy > (PAGE_SIZE - pgto_base)) + copy = PAGE_SIZE - pgto_base; + if (copy > (PAGE_SIZE - pgfrom_base)) + copy = PAGE_SIZE - pgfrom_base; + + vto = kmap_atomic(*pgto); + if (*pgto != *pgfrom) { + vfrom = kmap_atomic(*pgfrom); + memcpy(vto + pgto_base, vfrom + pgfrom_base, copy); + kunmap_atomic(vfrom); + } else + memmove(vto + pgto_base, vto + pgfrom_base, copy); + flush_dcache_page(*pgto); + kunmap_atomic(vto); + + pgto_base += copy; + pgfrom_base += copy; + + } while ((len -= copy) != 0); +} + +static void +_shift_data_left_tail(struct xdr_buf *buf, unsigned int pgto, size_t len) +{ + struct kvec *tail = buf->tail; + + if (len > tail->iov_len) + len = tail->iov_len; + + _copy_to_pages(buf->pages, + buf->page_base + pgto, + (char *)tail->iov_base, + len); + tail->iov_len -= len; + + if (tail->iov_len > 0) + memmove((char *)tail->iov_base, + tail->iov_base + len, + tail->iov_len); +} + +/** * _shift_data_right_pages * @pages: vector of pages containing both the source and dest memory area. * @pgto_base: page vector address of destination @@ -266,6 +351,46 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, } while ((len -= copy) != 0); } +static unsigned int +_shift_data_right_tail(struct xdr_buf *buf, unsigned int pgfrom, size_t len) +{ + struct kvec *tail = buf->tail; + unsigned int tailbuf_len; + unsigned int result = 0; + size_t copy; + + tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len; + + /* Shift the tail first */ + if (tailbuf_len != 0) { + unsigned int free_space = tailbuf_len - tail->iov_len; + + if (len < free_space) + free_space = len; + if (len > free_space) + len = free_space; + + tail->iov_len += free_space; + copy = len; + + if (tail->iov_len > len) { + char *p = (char *)tail->iov_base + len; + memmove(p, tail->iov_base, tail->iov_len - free_space); + result += tail->iov_len - free_space; + } else + copy = tail->iov_len; + + /* Copy from the inlined pages into the tail */ + _copy_from_pages((char *)tail->iov_base, + buf->pages, + buf->page_base + pgfrom, + copy); + result += copy; + } + + return result; +} + /** * _copy_to_pages * @pages: array of pages @@ -351,6 +476,38 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) EXPORT_SYMBOL_GPL(_copy_from_pages); /** + * _zero_pages + * @pages: array of pages + * @pgbase: beginning page vector address + * @len: length + */ +static void +_zero_pages(struct page **pages, size_t pgbase, size_t len) +{ + struct page **page; + char *vpage; + size_t zero; + + page = pages + (pgbase >> PAGE_SHIFT); + pgbase &= ~PAGE_MASK; + + do { + zero = PAGE_SIZE - pgbase; + if (zero > len) + zero = len; + + vpage = kmap_atomic(*page); + memset(vpage + pgbase, 0, zero); + kunmap_atomic(vpage); + + flush_dcache_page(*page); + pgbase = 0; + page++; + + } while ((len -= zero) != 0); +} + +/** * xdr_shrink_bufhead * @buf: xdr_buf * @len: bytes to remove from buf->head[0] @@ -446,39 +603,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) static unsigned int xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) { - struct kvec *tail; - size_t copy; unsigned int pglen = buf->page_len; - unsigned int tailbuf_len; unsigned int result; - result = 0; - tail = buf->tail; if (len > buf->page_len) len = buf-> page_len; - tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len; - - /* Shift the tail first */ - if (tailbuf_len != 0) { - unsigned int free_space = tailbuf_len - tail->iov_len; - if (len < free_space) - free_space = len; - tail->iov_len += free_space; - - copy = len; - if (tail->iov_len > len) { - char *p = (char *)tail->iov_base + len; - memmove(p, tail->iov_base, tail->iov_len - len); - result += tail->iov_len - len; - } else - copy = tail->iov_len; - /* Copy from the inlined pages into the tail */ - _copy_from_pages((char *)tail->iov_base, - buf->pages, buf->page_base + pglen - len, - copy); - result += copy; - } + result = _shift_data_right_tail(buf, pglen - len, len); buf->page_len -= len; buf->buflen -= len; /* Have we truncated the message? */ @@ -506,6 +637,19 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr) EXPORT_SYMBOL_GPL(xdr_stream_pos); /** + * xdr_page_pos - Return the current offset from the start of the xdr pages + * @xdr: pointer to struct xdr_stream + */ +unsigned int xdr_page_pos(const struct xdr_stream *xdr) +{ + unsigned int pos = xdr_stream_pos(xdr); + + WARN_ON(pos < xdr->buf->head[0].iov_len); + return pos - xdr->buf->head[0].iov_len; +} +EXPORT_SYMBOL_GPL(xdr_page_pos); + +/** * xdr_init_encode - Initialize a struct xdr_stream for sending data. * @xdr: pointer to xdr_stream struct * @buf: pointer to XDR buffer in which to encode data @@ -648,6 +792,51 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_reserve_space); + +/** + * xdr_reserve_space_vec - Reserves a large amount of buffer space for sending + * @xdr: pointer to xdr_stream + * @vec: pointer to a kvec array + * @nbytes: number of bytes to reserve + * + * Reserves enough buffer space to encode 'nbytes' of data and stores the + * pointers in 'vec'. The size argument passed to xdr_reserve_space() is + * determined based on the number of bytes remaining in the current page to + * avoid invalidating iov_base pointers when xdr_commit_encode() is called. + */ +int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes) +{ + int thislen; + int v = 0; + __be32 *p; + + /* + * svcrdma requires every READ payload to start somewhere + * in xdr->pages. + */ + if (xdr->iov == xdr->buf->head) { + xdr->iov = NULL; + xdr->end = xdr->p; + } + + while (nbytes) { + thislen = xdr->buf->page_len % PAGE_SIZE; + thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen); + + p = xdr_reserve_space(xdr, thislen); + if (!p) + return -EIO; + + vec[v].iov_base = p; + vec[v].iov_len = thislen; + v++; + nbytes -= thislen; + } + + return v; +} +EXPORT_SYMBOL_GPL(xdr_reserve_space_vec); + /** * xdr_truncate_encode - truncate an encode buffer * @xdr: pointer to xdr_stream @@ -658,7 +847,7 @@ EXPORT_SYMBOL_GPL(xdr_reserve_space); * head, tail, and page lengths are adjusted to correspond. * * If this means moving xdr->p to a different buffer, we assume that - * that the end pointer should be set to the end of the current page, + * the end pointer should be set to the end of the current page, * except in the case of the head buffer when we assume the head * buffer's current length represents the end of the available buffer. * @@ -825,6 +1014,13 @@ static int xdr_set_page_base(struct xdr_stream *xdr, return 0; } +static void xdr_set_page(struct xdr_stream *xdr, unsigned int base, + unsigned int len) +{ + if (xdr_set_page_base(xdr, base, len) < 0) + xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); +} + static void xdr_set_next_page(struct xdr_stream *xdr) { unsigned int newbase; @@ -832,8 +1028,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr) newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT; newbase -= xdr->buf->page_base; - if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); + xdr_set_page(xdr, newbase, PAGE_SIZE); } static bool xdr_set_next_buffer(struct xdr_stream *xdr) @@ -841,8 +1036,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) if (xdr->page_ptr != NULL) xdr_set_next_page(xdr); else if (xdr->iov == xdr->buf->head) { - if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); + xdr_set_page(xdr, 0, PAGE_SIZE); } return xdr->p != xdr->end; } @@ -979,26 +1173,33 @@ out_overflow: } EXPORT_SYMBOL_GPL(xdr_inline_decode); -static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) +static void xdr_realign_pages(struct xdr_stream *xdr) { struct xdr_buf *buf = xdr->buf; - struct kvec *iov; - unsigned int nwords = XDR_QUADLEN(len); + struct kvec *iov = buf->head; unsigned int cur = xdr_stream_pos(xdr); unsigned int copied, offset; - if (xdr->nwords == 0) - return 0; - /* Realign pages to current pointer position */ - iov = buf->head; if (iov->iov_len > cur) { offset = iov->iov_len - cur; copied = xdr_shrink_bufhead(buf, offset); trace_rpc_xdr_alignment(xdr, offset, copied); xdr->nwords = XDR_QUADLEN(buf->len - cur); } +} + +static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int nwords = XDR_QUADLEN(len); + unsigned int cur = xdr_stream_pos(xdr); + unsigned int copied, offset; + + if (xdr->nwords == 0) + return 0; + xdr_realign_pages(xdr); if (nwords > xdr->nwords) { nwords = xdr->nwords; len = nwords << 2; @@ -1057,6 +1258,79 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL_GPL(xdr_read_pages); +uint64_t xdr_align_data(struct xdr_stream *xdr, uint64_t offset, uint32_t length) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int from, bytes; + unsigned int shift = 0; + + if ((offset + length) < offset || + (offset + length) > buf->page_len) + length = buf->page_len - offset; + + xdr_realign_pages(xdr); + from = xdr_page_pos(xdr); + bytes = xdr->nwords << 2; + if (length < bytes) + bytes = length; + + /* Move page data to the left */ + if (from > offset) { + shift = min_t(unsigned int, bytes, buf->page_len - from); + _shift_data_left_pages(buf->pages, + buf->page_base + offset, + buf->page_base + from, + shift); + bytes -= shift; + + /* Move tail data into the pages, if necessary */ + if (bytes > 0) + _shift_data_left_tail(buf, offset + shift, bytes); + } + + xdr->nwords -= XDR_QUADLEN(length); + xdr_set_page(xdr, from + length, PAGE_SIZE); + return length; +} +EXPORT_SYMBOL_GPL(xdr_align_data); + +uint64_t xdr_expand_hole(struct xdr_stream *xdr, uint64_t offset, uint64_t length) +{ + struct xdr_buf *buf = xdr->buf; + unsigned int bytes; + unsigned int from; + unsigned int truncated = 0; + + if ((offset + length) < offset || + (offset + length) > buf->page_len) + length = buf->page_len - offset; + + xdr_realign_pages(xdr); + from = xdr_page_pos(xdr); + bytes = xdr->nwords << 2; + + if (offset + length + bytes > buf->page_len) { + unsigned int shift = (offset + length + bytes) - buf->page_len; + unsigned int res = _shift_data_right_tail(buf, from + bytes - shift, shift); + truncated = shift - res; + xdr->nwords -= XDR_QUADLEN(truncated); + bytes -= shift; + } + + /* Now move the page data over and zero pages */ + if (bytes > 0) + _shift_data_right_pages(buf->pages, + buf->page_base + offset + length, + buf->page_base + from, + bytes); + _zero_pages(buf->pages, buf->page_base + offset, length); + + buf->len += length - (from - offset) - truncated; + xdr_set_page(xdr, offset + length, PAGE_SIZE); + return length; +} +EXPORT_SYMBOL_GPL(xdr_expand_hole); + /** * xdr_enter_page - decode data from the XDR page * @xdr: pointer to xdr_stream struct diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5a8e47bbfb9f..f6c17e75f20e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -834,8 +834,7 @@ void xprt_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid, - xprt, (xprt_connected(xprt) ? "is" : "is not")); + trace_xprt_connect(xprt); if (!xprt_bound(xprt)) { task->tk_status = -EAGAIN; @@ -1131,8 +1130,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - trace_xprt_complete_rqst(xprt, req->rq_xid, copied); - xprt->stat.recvs++; req->rq_private_buf.len = copied; @@ -1269,7 +1266,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 1); goto out; } } else if (RPC_IS_SWAPPER(task)) { @@ -1281,7 +1277,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) /* Note: req is added _before_ pos */ list_add_tail(&req->rq_xmit, &pos->rq_xmit); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 2); goto out; } } else if (!req->rq_seqno) { @@ -1290,13 +1285,11 @@ xprt_request_enqueue_transmit(struct rpc_task *task) continue; list_add_tail(&req->rq_xmit2, &pos->rq_xmit2); INIT_LIST_HEAD(&req->rq_xmit); - trace_xprt_enq_xmit(task, 3); goto out; } } list_add_tail(&req->rq_xmit, &xprt->xmit_queue); INIT_LIST_HEAD(&req->rq_xmit2); - trace_xprt_enq_xmit(task, 4); out: set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); spin_unlock(&xprt->queue_lock); @@ -1414,9 +1407,9 @@ bool xprt_prepare_transmit(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); - if (!xprt_lock_write(xprt, task)) { + trace_xprt_transmit_queued(xprt, task); + /* Race breaker: someone may have transmitted us */ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) rpc_wake_up_queued_task_set_status(&xprt->sending, @@ -1520,10 +1513,13 @@ xprt_transmit(struct rpc_task *task) { struct rpc_rqst *next, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status; + int counter, status; spin_lock(&xprt->queue_lock); + counter = 0; while (!list_empty(&xprt->xmit_queue)) { + if (++counter == 20) + break; next = list_first_entry(&xprt->xmit_queue, struct rpc_rqst, rq_xmit); xprt_pin_rqst(next); @@ -1531,7 +1527,6 @@ xprt_transmit(struct rpc_task *task) status = xprt_request_transmit(next, task); if (status == -EBADMSG && next != req) status = 0; - cond_resched(); spin_lock(&xprt->queue_lock); xprt_unpin_rqst(next); if (status == 0) { @@ -1747,8 +1742,8 @@ xprt_request_init(struct rpc_task *task) req->rq_rcv_buf.bvec = NULL; req->rq_release_snd_buf = NULL; xprt_init_majortimeo(task, req); - dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, - req, ntohl(req->rq_xid)); + + trace_xprt_reserve(req); } static void @@ -1838,7 +1833,6 @@ void xprt_release(struct rpc_task *task) if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); - dprintk("RPC: %5u release request %p\n", task->tk_pid, req); if (likely(!bc_prealloc(req))) xprt->ops->free_slot(xprt, req); else diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 7f94c9a19fd3..44888f5badef 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -124,7 +124,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) if (IS_ERR(frmr)) goto out_mr_err; - sg = kcalloc(depth, sizeof(*sg), GFP_NOFS); + sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS); if (!sg) goto out_list_err; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index fe54cbe97a46..80a0c0e87590 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -137,7 +137,7 @@ static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma, } /* A chunk context tracks all I/O for moving one Read or Write - * chunk. This is a a set of rdma_rw's that handle data movement + * chunk. This is a set of rdma_rw's that handle data movement * for all segments of one chunk. * * These are small, acquired with a single allocator call, and diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 7b94d971feb3..c3d588b149aa 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -638,10 +638,11 @@ static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, while (remaining) { len = min_t(u32, PAGE_SIZE - pageoff, remaining); - memcpy(dst, page_address(*ppages), len); + memcpy(dst, page_address(*ppages) + pageoff, len); remaining -= len; dst += len; pageoff = 0; + ppages++; } } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 053c8ab1265a..8915e42240d3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -413,9 +413,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); snprintf(buf, sizeof(buf), "%4hx", port); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); - - trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt, - rx_xprt)); } /** @@ -586,11 +583,9 @@ xprt_rdma_allocate(struct rpc_task *task) rqst->rq_buffer = rdmab_data(req->rl_sendbuf); rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf); - trace_xprtrdma_op_allocate(task, req); return 0; out_fail: - trace_xprtrdma_op_allocate(task, NULL); return -ENOMEM; } @@ -607,8 +602,6 @@ xprt_rdma_free(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - trace_xprtrdma_op_free(task, req); - if (!list_empty(&req->rl_registered)) frwr_unmap_sync(r_xprt, req); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 554e1bb4c1c7..7090bbee0ec5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -762,10 +762,7 @@ static int xs_nospace(struct rpc_rqst *req) struct sock *sk = transport->inet; int ret = -EAGAIN; - dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", - req->rq_task->tk_pid, - req->rq_slen - transport->xmit.offset, - req->rq_slen); + trace_rpc_socket_nospace(req, transport); /* Protect against races with write_space */ spin_lock(&xprt->transport_lock); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 940d176e0e87..d4beca895992 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -108,6 +108,8 @@ static void tipc_bcbase_select_primary(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); int all_dests = tipc_link_bc_peers(bb->link); + int max_win = tipc_link_max_win(bb->link); + int min_win = tipc_link_min_win(bb->link); int i, mtu, prim; bb->primary_bearer = INVALID_BEARER_ID; @@ -121,8 +123,12 @@ static void tipc_bcbase_select_primary(struct net *net) continue; mtu = tipc_bearer_mtu(net, i); - if (mtu < tipc_link_mtu(bb->link)) + if (mtu < tipc_link_mtu(bb->link)) { tipc_link_set_mtu(bb->link, mtu); + tipc_link_set_queue_limits(bb->link, + min_win, + max_win); + } bb->bcast_support &= tipc_bearer_bcast_support(net, i); if (bb->dests[i] < all_dests) continue; @@ -585,7 +591,7 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win) if (max_win > TIPC_MAX_LINK_WIN) return -EINVAL; tipc_bcast_lock(net); - tipc_link_set_queue_limits(l, BCLINK_WIN_MIN, max_win); + tipc_link_set_queue_limits(l, tipc_link_min_win(l), max_win); tipc_bcast_unlock(net); return 0; } |