summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.h28
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.c399
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c58
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h10
-rw-r--r--net/batman-adv/distributed-arp-table.c160
-rw-r--r--net/batman-adv/gateway_client.c41
-rw-r--r--net/batman-adv/hard-interface.c85
-rw-r--r--net/batman-adv/hard-interface.h2
-rw-r--r--net/batman-adv/icmp_socket.c128
-rw-r--r--net/batman-adv/icmp_socket.h2
-rw-r--r--net/batman-adv/main.c37
-rw-r--r--net/batman-adv/main.h23
-rw-r--r--net/batman-adv/network-coding.c8
-rw-r--r--net/batman-adv/originator.c343
-rw-r--r--net/batman-adv/originator.h13
-rw-r--r--net/batman-adv/packet.h39
-rw-r--r--net/batman-adv/routing.c187
-rw-r--r--net/batman-adv/routing.h3
-rw-r--r--net/batman-adv/send.c98
-rw-r--r--net/batman-adv/send.h51
-rw-r--r--net/batman-adv/soft-interface.c235
-rw-r--r--net/batman-adv/soft-interface.h4
-rw-r--r--net/batman-adv/sysfs.c178
-rw-r--r--net/batman-adv/sysfs.h10
-rw-r--r--net/batman-adv/translation-table.c1384
-rw-r--r--net/batman-adv/translation-table.h26
-rw-r--r--net/batman-adv/types.h192
-rw-r--r--net/bluetooth/hidp/hidp.h4
-rw-r--r--net/bridge/br_device.c2
-rw-r--r--net/bridge/br_fdb.c4
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_mdb.c2
-rw-r--r--net/bridge/br_multicast.c82
-rw-r--r--net/bridge/br_netfilter.c2
-rw-r--r--net/bridge/br_netlink.c2
-rw-r--r--net/bridge/br_private.h307
-rw-r--r--net/bridge/br_private_stp.h24
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/bridge/br_vlan.c125
-rw-r--r--net/bridge/netfilter/ebt_ulog.c9
-rw-r--r--net/can/af_can.h6
-rw-r--r--net/ceph/auth_none.h2
-rw-r--r--net/ceph/auth_x.h2
-rw-r--r--net/ceph/crypto.h48
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c41
-rw-r--r--net/core/dev_addr_lists.c4
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/flow_dissector.c36
-rw-r--r--net/core/iovec.c2
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/netpoll.c31
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/secure_seq.c18
-rw-r--r--net/core/skbuff.c53
-rw-r--r--net/core/sock.c33
-rw-r--r--net/core/utils.c49
-rw-r--r--net/dccp/ackvec.h21
-rw-r--r--net/dccp/ccid.h18
-rw-r--r--net/dccp/ccids/lib/loss_interval.h8
-rw-r--r--net/dccp/ccids/lib/packet_history.h25
-rw-r--r--net/dccp/ccids/lib/tfrc.h22
-rw-r--r--net/dccp/dccp.h186
-rw-r--r--net/dccp/feat.h26
-rw-r--r--net/hsr/Kconfig27
-rw-r--r--net/hsr/Makefile7
-rw-r--r--net/hsr/hsr_device.c596
-rw-r--r--net/hsr/hsr_device.h29
-rw-r--r--net/hsr/hsr_framereg.c503
-rw-r--r--net/hsr/hsr_framereg.h53
-rw-r--r--net/hsr/hsr_main.c469
-rw-r--r--net/hsr/hsr_main.h166
-rw-r--r--net/hsr/hsr_netlink.c457
-rw-r--r--net/hsr/hsr_netlink.h30
-rw-r--r--net/ieee802154/6lowpan.c46
-rw-r--r--net/ipv4/af_inet.c91
-rw-r--r--net/ipv4/esp4.c49
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_lookup.h26
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/gre_demux.c29
-rw-r--r--net/ipv4/gre_offload.c3
-rw-r--r--net/ipv4/inet_fragment.c3
-rw-r--r--net/ipv4/inet_hashtables.c25
-rw-r--r--net/ipv4/ip_fragment.c1
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ip_tunnel_core.c33
-rw-r--r--net/ipv4/ip_vti.c14
-rw-r--r--net/ipv4/ipip.c11
-rw-r--r--net/ipv4/netfilter/arp_tables.c5
-rw-r--r--net/ipv4/netfilter/ip_tables.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c110
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c7
-rw-r--r--net/ipv4/route.c16
-rw-r--r--net/ipv4/syncookies.c15
-rw-r--r--net/ipv4/sysctl_net_ipv4.c90
-rw-r--r--net/ipv4/tcp.c12
-rw-r--r--net/ipv4/tcp_fastopen.c27
-rw-r--r--net/ipv4/tcp_input.c38
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_memcontrol.c90
-rw-r--r--net/ipv4/tcp_metrics.c5
-rw-r--r--net/ipv4/tcp_offload.c21
-rw-r--r--net/ipv4/tcp_output.c27
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/tcp_vegas.h10
-rw-r--r--net/ipv4/udp.c20
-rw-r--r--net/ipv4/udp_impl.h36
-rw-r--r--net/ipv4/udp_offload.c1
-rw-r--r--net/ipv4/xfrm4_policy.c7
-rw-r--r--net/ipv6/Kconfig29
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c41
-rw-r--r--net/ipv6/af_inet6.c7
-rw-r--r--net/ipv6/ah6.c3
-rw-r--r--net/ipv6/esp6.c51
-rw-r--r--net/ipv6/inet6_hashtables.c33
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ip6_offload.c40
-rw-r--r--net/ipv6/ip6_output.c29
-rw-r--r--net/ipv6/ip6_vti.c1056
-rw-r--r--net/ipv6/ipcomp6.c3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c5
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c16
-rw-r--r--net/ipv6/reassembly.c12
-rw-r--r--net/ipv6/route.c57
-rw-r--r--net/ipv6/sit.c28
-rw-r--r--net/ipv6/syncookies.c12
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/tcpv6_offload.c2
-rw-r--r--net/ipv6/udp.c36
-rw-r--r--net/ipv6/udp_impl.h41
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c69
-rw-r--r--net/ipv6/xfrm6_policy.c7
-rw-r--r--net/irda/irnet/irnet.h15
-rw-r--r--net/key/af_key.c3
-rw-r--r--net/l2tp/l2tp_core.h57
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/mac80211/cfg.c94
-rw-r--r--net/mac80211/chan.c5
-rw-r--r--net/mac80211/debugfs.c55
-rw-r--r--net/mac80211/driver-ops.h27
-rw-r--r--net/mac80211/ibss.c608
-rw-r--r--net/mac80211/ieee80211_i.h33
-rw-r--r--net/mac80211/iface.c4
-rw-r--r--net/mac80211/key.c2
-rw-r--r--net/mac80211/mlme.c334
-rw-r--r--net/mac80211/offchannel.c2
-rw-r--r--net/mac80211/rate.h12
-rw-r--r--net/mac80211/rc80211_minstrel.c14
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c23
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c26
-rw-r--r--net/mac80211/rx.c39
-rw-r--r--net/mac80211/scan.c22
-rw-r--r--net/mac80211/spectmgmt.c162
-rw-r--r--net/mac80211/status.c3
-rw-r--r--net/mac80211/trace.h35
-rw-r--r--net/mac80211/tx.c42
-rw-r--r--net/mac80211/util.c166
-rw-r--r--net/mac80211/vht.c4
-rw-r--r--net/mac802154/ieee802154_dev.c6
-rw-r--r--net/mac802154/wpan.c2
-rw-r--r--net/mpls/mpls_gso.c1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h11
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c70
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h21
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c22
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c48
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c39
-rw-r--r--net/netfilter/nf_conntrack_acct.c12
-rw-r--r--net/netfilter/nf_conntrack_core.c16
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c51
-rw-r--r--net/netfilter/nf_internals.h28
-rw-r--r--net/netfilter/x_tables.c7
-rw-r--r--net/netfilter/xt_NFQUEUE.c7
-rw-r--r--net/netfilter/xt_connbytes.c6
-rw-r--r--net/netfilter/xt_socket.c13
-rw-r--r--net/openvswitch/Makefile2
-rw-r--r--net/openvswitch/datapath.c668
-rw-r--r--net/openvswitch/datapath.h9
-rw-r--r--net/openvswitch/dp_notify.c7
-rw-r--r--net/openvswitch/flow.c1605
-rw-r--r--net/openvswitch/flow.h132
-rw-r--r--net/openvswitch/flow_netlink.c1630
-rw-r--r--net/openvswitch/flow_netlink.h60
-rw-r--r--net/openvswitch/flow_table.c592
-rw-r--r--net/openvswitch/flow_table.h81
-rw-r--r--net/openvswitch/vport-gre.c2
-rw-r--r--net/openvswitch/vport-internal_dev.c2
-rw-r--r--net/openvswitch/vport-netdev.c16
-rw-r--r--net/openvswitch/vport-netdev.h1
-rw-r--r--net/openvswitch/vport-vxlan.c1
-rw-r--r--net/rds/connection.c12
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rxrpc/ar-internal.h150
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/cls_bpf.c385
-rw-r--r--net/sched/em_ipset.c4
-rw-r--r--net/sched/sch_fq.c1
-rw-r--r--net/sched/sch_netem.c18
-rw-r--r--net/sctp/associola.c4
-rw-r--r--net/sctp/auth.c14
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/output.c12
-rw-r--r--net/sctp/sm_make_chunk.c29
-rw-r--r--net/sctp/sm_sideeffect.c1
-rw-r--r--net/tipc/bearer.c18
-rw-r--r--net/tipc/bearer.h10
-rw-r--r--net/tipc/core.h28
-rw-r--r--net/tipc/eth_media.c68
-rw-r--r--net/tipc/ib_media.c58
-rw-r--r--net/tipc/link.c340
-rw-r--r--net/tipc/link.h4
-rw-r--r--net/tipc/msg.c27
-rw-r--r--net/tipc/msg.h3
-rw-r--r--net/tipc/port.c66
-rw-r--r--net/tipc/port.h16
-rw-r--r--net/tipc/socket.c12
-rw-r--r--net/unix/af_unix.c10
-rw-r--r--net/wimax/wimax-internal.h18
-rw-r--r--net/wireless/chan.c1
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h18
-rw-r--r--net/wireless/debugfs.c24
-rw-r--r--net/wireless/genregdb.awk6
-rw-r--r--net/wireless/nl80211.c52
-rw-r--r--net/wireless/radiotap.c7
-rw-r--r--net/wireless/reg.c14
-rw-r--r--net/wireless/sysfs.h4
-rw-r--r--net/wireless/util.c9
-rw-r--r--net/x25/Kconfig4
-rw-r--r--net/xfrm/xfrm_hash.h4
-rw-r--r--net/xfrm/xfrm_ipcomp.c18
-rw-r--r--net/xfrm/xfrm_policy.c35
-rw-r--r--net/xfrm/xfrm_replay.c54
-rw-r--r--net/xfrm/xfrm_state.c4
-rw-r--r--net/xfrm/xfrm_user.c5
251 files changed, 12561 insertions, 5833 deletions
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index ba5983f34c42..a2caf00b82cc 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -196,12 +196,12 @@ static inline u32 vlan_get_ingress_priority(struct net_device *dev,
}
#ifdef CONFIG_VLAN_8021Q_GVRP
-extern int vlan_gvrp_request_join(const struct net_device *dev);
-extern void vlan_gvrp_request_leave(const struct net_device *dev);
-extern int vlan_gvrp_init_applicant(struct net_device *dev);
-extern void vlan_gvrp_uninit_applicant(struct net_device *dev);
-extern int vlan_gvrp_init(void);
-extern void vlan_gvrp_uninit(void);
+int vlan_gvrp_request_join(const struct net_device *dev);
+void vlan_gvrp_request_leave(const struct net_device *dev);
+int vlan_gvrp_init_applicant(struct net_device *dev);
+void vlan_gvrp_uninit_applicant(struct net_device *dev);
+int vlan_gvrp_init(void);
+void vlan_gvrp_uninit(void);
#else
static inline int vlan_gvrp_request_join(const struct net_device *dev) { return 0; }
static inline void vlan_gvrp_request_leave(const struct net_device *dev) {}
@@ -212,12 +212,12 @@ static inline void vlan_gvrp_uninit(void) {}
#endif
#ifdef CONFIG_VLAN_8021Q_MVRP
-extern int vlan_mvrp_request_join(const struct net_device *dev);
-extern void vlan_mvrp_request_leave(const struct net_device *dev);
-extern int vlan_mvrp_init_applicant(struct net_device *dev);
-extern void vlan_mvrp_uninit_applicant(struct net_device *dev);
-extern int vlan_mvrp_init(void);
-extern void vlan_mvrp_uninit(void);
+int vlan_mvrp_request_join(const struct net_device *dev);
+void vlan_mvrp_request_leave(const struct net_device *dev);
+int vlan_mvrp_init_applicant(struct net_device *dev);
+void vlan_mvrp_uninit_applicant(struct net_device *dev);
+int vlan_mvrp_init(void);
+void vlan_mvrp_uninit(void);
#else
static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; }
static inline void vlan_mvrp_request_leave(const struct net_device *dev) {}
@@ -229,8 +229,8 @@ static inline void vlan_mvrp_uninit(void) {}
extern const char vlan_fullname[];
extern const char vlan_version[];
-extern int vlan_netlink_init(void);
-extern void vlan_netlink_fini(void);
+int vlan_netlink_init(void);
+void vlan_netlink_fini(void);
extern struct rtnl_link_ops vlan_link_ops;
diff --git a/net/Kconfig b/net/Kconfig
index b50dacc072f0..0715db64a5c3 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -220,6 +220,7 @@ source "net/openvswitch/Kconfig"
source "net/vmw_vsock/Kconfig"
source "net/netlink/Kconfig"
source "net/mpls/Kconfig"
+source "net/hsr/Kconfig"
config RPS
boolean
diff --git a/net/Makefile b/net/Makefile
index 9492e8cb64e9..8fa2f91517f1 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -71,3 +71,4 @@ obj-$(CONFIG_NFC) += nfc/
obj-$(CONFIG_OPENVSWITCH) += openvswitch/
obj-$(CONFIG_VSOCKETS) += vmw_vsock/
obj-$(CONFIG_NET_MPLS_GSO) += mpls/
+obj-$(CONFIG_HSR) += hsr/
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 4b4d2b779ec1..a00123ebb0ae 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1735,7 +1735,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
res = -EFAULT;
break;
}
- if (amount > AX25_NOUID_BLOCK) {
+ if (amount < 0 || amount > AX25_NOUID_BLOCK) {
res = -EINVAL;
break;
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 97b42d3c4bef..a2b480a90872 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -87,22 +87,198 @@ static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
return (uint8_t)(sum / count);
}
+/**
+ * batadv_iv_ogm_orig_free - free the private resources allocated for this
+ * orig_node
+ * @orig_node: the orig_node for which the resources have to be free'd
+ */
+static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
+{
+ kfree(orig_node->bat_iv.bcast_own);
+ kfree(orig_node->bat_iv.bcast_own_sum);
+}
+
+/**
+ * batadv_iv_ogm_orig_add_if - change the private structures of the orig_node to
+ * include the new hard-interface
+ * @orig_node: the orig_node that has to be changed
+ * @max_if_num: the current amount of interfaces
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
+ int max_if_num)
+{
+ void *data_ptr;
+ size_t data_size, old_size;
+ int ret = -ENOMEM;
+
+ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+ data_size = max_if_num * sizeof(unsigned long) * BATADV_NUM_WORDS;
+ old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS;
+ data_ptr = kmalloc(data_size, GFP_ATOMIC);
+ if (!data_ptr)
+ goto unlock;
+
+ memcpy(data_ptr, orig_node->bat_iv.bcast_own, old_size);
+ kfree(orig_node->bat_iv.bcast_own);
+ orig_node->bat_iv.bcast_own = data_ptr;
+
+ data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+ if (!data_ptr) {
+ kfree(orig_node->bat_iv.bcast_own);
+ goto unlock;
+ }
+
+ memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
+ (max_if_num - 1) * sizeof(uint8_t));
+ kfree(orig_node->bat_iv.bcast_own_sum);
+ orig_node->bat_iv.bcast_own_sum = data_ptr;
+
+ ret = 0;
+
+unlock:
+ spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+ return ret;
+}
+
+/**
+ * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to
+ * exclude the removed interface
+ * @orig_node: the orig_node that has to be changed
+ * @max_if_num: the current amount of interfaces
+ * @del_if_num: the index of the interface being removed
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
+ int max_if_num, int del_if_num)
+{
+ int chunk_size, ret = -ENOMEM, if_offset;
+ void *data_ptr = NULL;
+
+ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+ /* last interface was removed */
+ if (max_if_num == 0)
+ goto free_bcast_own;
+
+ chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
+ data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC);
+ if (!data_ptr)
+ goto unlock;
+
+ /* copy first part */
+ memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
+
+ /* copy second part */
+ memcpy((char *)data_ptr + del_if_num * chunk_size,
+ orig_node->bat_iv.bcast_own + ((del_if_num + 1) * chunk_size),
+ (max_if_num - del_if_num) * chunk_size);
+
+free_bcast_own:
+ kfree(orig_node->bat_iv.bcast_own);
+ orig_node->bat_iv.bcast_own = data_ptr;
+
+ if (max_if_num == 0)
+ goto free_own_sum;
+
+ data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+ if (!data_ptr) {
+ kfree(orig_node->bat_iv.bcast_own);
+ goto unlock;
+ }
+
+ memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
+ del_if_num * sizeof(uint8_t));
+
+ if_offset = (del_if_num + 1) * sizeof(uint8_t);
+ memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t),
+ orig_node->bat_iv.bcast_own_sum + if_offset,
+ (max_if_num - del_if_num) * sizeof(uint8_t));
+
+free_own_sum:
+ kfree(orig_node->bat_iv.bcast_own_sum);
+ orig_node->bat_iv.bcast_own_sum = data_ptr;
+
+ ret = 0;
+unlock:
+ spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+ return ret;
+}
+
+/**
+ * batadv_iv_ogm_orig_get - retrieve or create (if does not exist) an originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: mac address of the originator
+ *
+ * Returns the originator object corresponding to the passed mac address or NULL
+ * on failure.
+ * If the object does not exists it is created an initialised.
+ */
+static struct batadv_orig_node *
+batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr)
+{
+ struct batadv_orig_node *orig_node;
+ int size, hash_added;
+
+ orig_node = batadv_orig_hash_find(bat_priv, addr);
+ if (orig_node)
+ return orig_node;
+
+ orig_node = batadv_orig_node_new(bat_priv, addr);
+ if (!orig_node)
+ return NULL;
+
+ spin_lock_init(&orig_node->bat_iv.ogm_cnt_lock);
+
+ size = bat_priv->num_ifaces * sizeof(unsigned long) * BATADV_NUM_WORDS;
+ orig_node->bat_iv.bcast_own = kzalloc(size, GFP_ATOMIC);
+ if (!orig_node->bat_iv.bcast_own)
+ goto free_orig_node;
+
+ size = bat_priv->num_ifaces * sizeof(uint8_t);
+ orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC);
+ if (!orig_node->bat_iv.bcast_own_sum)
+ goto free_bcast_own;
+
+ hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
+ batadv_choose_orig, orig_node,
+ &orig_node->hash_entry);
+ if (hash_added != 0)
+ goto free_bcast_own;
+
+ return orig_node;
+
+free_bcast_own:
+ kfree(orig_node->bat_iv.bcast_own);
+free_orig_node:
+ batadv_orig_node_free_ref(orig_node);
+
+ return NULL;
+}
+
static struct batadv_neigh_node *
batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
const uint8_t *neigh_addr,
struct batadv_orig_node *orig_node,
struct batadv_orig_node *orig_neigh)
{
+ struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_neigh_node *neigh_node;
- neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr);
+ neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node);
if (!neigh_node)
goto out;
- INIT_LIST_HEAD(&neigh_node->bonding_list);
+ spin_lock_init(&neigh_node->bat_iv.lq_update_lock);
- neigh_node->orig_node = orig_neigh;
- neigh_node->if_incoming = hard_iface;
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Creating new neighbor %pM for orig_node %pM on interface %s\n",
+ neigh_addr, orig_node->orig, hard_iface->net_dev->name);
spin_lock_bh(&orig_node->neigh_list_lock);
hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
@@ -661,20 +837,22 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
uint32_t i;
size_t word_index;
uint8_t *w;
+ int if_num;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- spin_lock_bh(&orig_node->ogm_cnt_lock);
+ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
word_index = hard_iface->if_num * BATADV_NUM_WORDS;
- word = &(orig_node->bcast_own[word_index]);
+ word = &(orig_node->bat_iv.bcast_own[word_index]);
batadv_bit_get_packet(bat_priv, word, 1, 0);
- w = &orig_node->bcast_own_sum[hard_iface->if_num];
+ if_num = hard_iface->if_num;
+ w = &orig_node->bat_iv.bcast_own_sum[if_num];
*w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE);
- spin_unlock_bh(&orig_node->ogm_cnt_lock);
+ spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
}
rcu_read_unlock();
}
@@ -755,18 +933,18 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
if (dup_status != BATADV_NO_DUP)
continue;
- spin_lock_bh(&tmp_neigh_node->lq_update_lock);
- batadv_ring_buffer_set(tmp_neigh_node->tq_recv,
- &tmp_neigh_node->tq_index, 0);
- tq_avg = batadv_ring_buffer_avg(tmp_neigh_node->tq_recv);
- tmp_neigh_node->tq_avg = tq_avg;
- spin_unlock_bh(&tmp_neigh_node->lq_update_lock);
+ spin_lock_bh(&tmp_neigh_node->bat_iv.lq_update_lock);
+ batadv_ring_buffer_set(tmp_neigh_node->bat_iv.tq_recv,
+ &tmp_neigh_node->bat_iv.tq_index, 0);
+ tq_avg = batadv_ring_buffer_avg(tmp_neigh_node->bat_iv.tq_recv);
+ tmp_neigh_node->bat_iv.tq_avg = tq_avg;
+ spin_unlock_bh(&tmp_neigh_node->bat_iv.lq_update_lock);
}
if (!neigh_node) {
struct batadv_orig_node *orig_tmp;
- orig_tmp = batadv_get_orig_node(bat_priv, ethhdr->h_source);
+ orig_tmp = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source);
if (!orig_tmp)
goto unlock;
@@ -785,19 +963,20 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
neigh_node->last_seen = jiffies;
- spin_lock_bh(&neigh_node->lq_update_lock);
- batadv_ring_buffer_set(neigh_node->tq_recv,
- &neigh_node->tq_index,
+ spin_lock_bh(&neigh_node->bat_iv.lq_update_lock);
+ batadv_ring_buffer_set(neigh_node->bat_iv.tq_recv,
+ &neigh_node->bat_iv.tq_index,
batadv_ogm_packet->tq);
- neigh_node->tq_avg = batadv_ring_buffer_avg(neigh_node->tq_recv);
- spin_unlock_bh(&neigh_node->lq_update_lock);
+ tq_avg = batadv_ring_buffer_avg(neigh_node->bat_iv.tq_recv);
+ neigh_node->bat_iv.tq_avg = tq_avg;
+ spin_unlock_bh(&neigh_node->bat_iv.lq_update_lock);
if (dup_status == BATADV_NO_DUP) {
orig_node->last_ttl = batadv_ogm_packet->header.ttl;
neigh_node->last_ttl = batadv_ogm_packet->header.ttl;
}
- batadv_bonding_candidate_add(orig_node, neigh_node);
+ batadv_bonding_candidate_add(bat_priv, orig_node, neigh_node);
/* if this neighbor already is our next hop there is nothing
* to change
@@ -807,24 +986,24 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
goto out;
/* if this neighbor does not offer a better TQ we won't consider it */
- if (router && (router->tq_avg > neigh_node->tq_avg))
+ if (router && (router->bat_iv.tq_avg > neigh_node->bat_iv.tq_avg))
goto out;
/* if the TQ is the same and the link not more symmetric we
* won't consider it either
*/
- if (router && (neigh_node->tq_avg == router->tq_avg)) {
+ if (router && (neigh_node->bat_iv.tq_avg == router->bat_iv.tq_avg)) {
orig_node_tmp = router->orig_node;
- spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
+ spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
if_num = router->if_incoming->if_num;
- sum_orig = orig_node_tmp->bcast_own_sum[if_num];
- spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
+ sum_orig = orig_node_tmp->bat_iv.bcast_own_sum[if_num];
+ spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
orig_node_tmp = neigh_node->orig_node;
- spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
+ spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
if_num = neigh_node->if_incoming->if_num;
- sum_neigh = orig_node_tmp->bcast_own_sum[if_num];
- spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
+ sum_neigh = orig_node_tmp->bat_iv.bcast_own_sum[if_num];
+ spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
if (sum_orig >= sum_neigh)
goto out;
@@ -852,7 +1031,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
uint8_t total_count;
uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
- int tq_asym_penalty, inv_asym_penalty, ret = 0;
+ int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0;
unsigned int combined_tq;
/* find corresponding one hop neighbor */
@@ -890,10 +1069,11 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
orig_node->last_seen = jiffies;
/* find packet count of corresponding one hop neighbor */
- spin_lock_bh(&orig_node->ogm_cnt_lock);
- orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num];
- neigh_rq_count = neigh_node->real_packet_count;
- spin_unlock_bh(&orig_node->ogm_cnt_lock);
+ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ if_num = if_incoming->if_num;
+ orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
+ neigh_rq_count = neigh_node->bat_iv.real_packet_count;
+ spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
/* pay attention to not get a value bigger than 100 % */
if (orig_eq_count > neigh_rq_count)
@@ -975,12 +1155,13 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
uint32_t seqno = ntohl(batadv_ogm_packet->seqno);
uint8_t *neigh_addr;
uint8_t packet_count;
+ unsigned long *bitmap;
- orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig);
+ orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig);
if (!orig_node)
return BATADV_NO_DUP;
- spin_lock_bh(&orig_node->ogm_cnt_lock);
+ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
seq_diff = seqno - orig_node->last_real_seqno;
/* signalize caller that the packet is to be dropped. */
@@ -995,7 +1176,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
hlist_for_each_entry_rcu(tmp_neigh_node,
&orig_node->neigh_list, list) {
neigh_addr = tmp_neigh_node->addr;
- is_dup = batadv_test_bit(tmp_neigh_node->real_bits,
+ is_dup = batadv_test_bit(tmp_neigh_node->bat_iv.real_bits,
orig_node->last_real_seqno,
seqno);
@@ -1011,13 +1192,13 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
}
/* if the window moved, set the update flag. */
- need_update |= batadv_bit_get_packet(bat_priv,
- tmp_neigh_node->real_bits,
+ bitmap = tmp_neigh_node->bat_iv.real_bits;
+ need_update |= batadv_bit_get_packet(bat_priv, bitmap,
seq_diff, set_mark);
- packet_count = bitmap_weight(tmp_neigh_node->real_bits,
+ packet_count = bitmap_weight(tmp_neigh_node->bat_iv.real_bits,
BATADV_TQ_LOCAL_WINDOW_SIZE);
- tmp_neigh_node->real_packet_count = packet_count;
+ tmp_neigh_node->bat_iv.real_packet_count = packet_count;
}
rcu_read_unlock();
@@ -1029,7 +1210,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
}
out:
- spin_unlock_bh(&orig_node->ogm_cnt_lock);
+ spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
batadv_orig_node_free_ref(orig_node);
return ret;
}
@@ -1041,7 +1222,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_hard_iface *hard_iface;
- struct batadv_orig_node *orig_neigh_node, *orig_node;
+ struct batadv_orig_node *orig_neigh_node, *orig_node, *orig_node_tmp;
struct batadv_neigh_node *router = NULL, *router_router = NULL;
struct batadv_neigh_node *orig_neigh_router = NULL;
int has_directlink_flag;
@@ -1125,8 +1306,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
int16_t if_num;
uint8_t *weight;
- orig_neigh_node = batadv_get_orig_node(bat_priv,
- ethhdr->h_source);
+ orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
+ ethhdr->h_source);
if (!orig_neigh_node)
return;
@@ -1140,15 +1321,15 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
if_num = if_incoming->if_num;
offset = if_num * BATADV_NUM_WORDS;
- spin_lock_bh(&orig_neigh_node->ogm_cnt_lock);
- word = &(orig_neigh_node->bcast_own[offset]);
+ spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
+ word = &(orig_neigh_node->bat_iv.bcast_own[offset]);
bit_pos = if_incoming_seqno - 2;
bit_pos -= ntohl(batadv_ogm_packet->seqno);
batadv_set_bit(word, bit_pos);
- weight = &orig_neigh_node->bcast_own_sum[if_num];
+ weight = &orig_neigh_node->bat_iv.bcast_own_sum[if_num];
*weight = bitmap_weight(word,
BATADV_TQ_LOCAL_WINDOW_SIZE);
- spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock);
+ spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
}
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1171,7 +1352,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
return;
}
- orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig);
+ orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig);
if (!orig_node)
return;
@@ -1192,10 +1373,12 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
}
router = batadv_orig_node_get_router(orig_node);
- if (router)
- router_router = batadv_orig_node_get_router(router->orig_node);
+ if (router) {
+ orig_node_tmp = router->orig_node;
+ router_router = batadv_orig_node_get_router(orig_node_tmp);
+ }
- if ((router && router->tq_avg != 0) &&
+ if ((router && router->bat_iv.tq_avg != 0) &&
(batadv_compare_eth(router->addr, ethhdr->h_source)))
is_from_best_next_hop = true;
@@ -1219,8 +1402,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
if (is_single_hop_neigh)
orig_neigh_node = orig_node;
else
- orig_neigh_node = batadv_get_orig_node(bat_priv,
- ethhdr->h_source);
+ orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
+ ethhdr->h_source);
if (!orig_neigh_node)
goto out;
@@ -1351,6 +1534,106 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
return NET_RX_SUCCESS;
}
+/**
+ * batadv_iv_ogm_orig_print - print the originator table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: debugfs table seq_file struct
+ */
+static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
+ struct seq_file *seq)
+{
+ struct batadv_neigh_node *neigh_node, *neigh_node_tmp;
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ int last_seen_msecs, last_seen_secs;
+ struct batadv_orig_node *orig_node;
+ unsigned long last_seen_jiffies;
+ struct hlist_head *head;
+ int batman_count = 0;
+ uint32_t i;
+
+ seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
+ "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
+ "Nexthop", "outgoingIF", "Potential nexthops");
+
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ neigh_node = batadv_orig_node_get_router(orig_node);
+ if (!neigh_node)
+ continue;
+
+ if (neigh_node->bat_iv.tq_avg == 0)
+ goto next;
+
+ last_seen_jiffies = jiffies - orig_node->last_seen;
+ last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
+ last_seen_secs = last_seen_msecs / 1000;
+ last_seen_msecs = last_seen_msecs % 1000;
+
+ seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:",
+ orig_node->orig, last_seen_secs,
+ last_seen_msecs, neigh_node->bat_iv.tq_avg,
+ neigh_node->addr,
+ neigh_node->if_incoming->net_dev->name);
+
+ hlist_for_each_entry_rcu(neigh_node_tmp,
+ &orig_node->neigh_list, list) {
+ seq_printf(seq, " %pM (%3i)",
+ neigh_node_tmp->addr,
+ neigh_node_tmp->bat_iv.tq_avg);
+ }
+
+ seq_puts(seq, "\n");
+ batman_count++;
+
+next:
+ batadv_neigh_node_free_ref(neigh_node);
+ }
+ rcu_read_unlock();
+ }
+
+ if (batman_count == 0)
+ seq_puts(seq, "No batman nodes in range ...\n");
+}
+
+/**
+ * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * @neigh1: the first neighbor object of the comparison
+ * @neigh2: the second neighbor object of the comparison
+ *
+ * Returns a value less, equal to or greater than 0 if the metric via neigh1 is
+ * lower, the same as or higher than the metric via neigh2
+ */
+static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
+ struct batadv_neigh_node *neigh2)
+{
+ uint8_t tq1, tq2;
+
+ tq1 = neigh1->bat_iv.tq_avg;
+ tq2 = neigh2->bat_iv.tq_avg;
+
+ return tq1 - tq2;
+}
+
+/**
+ * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than
+ * neigh2 from the metric prospective
+ * @neigh1: the first neighbor object of the comparison
+ * @neigh2: the second neighbor object of the comparison
+ *
+ * Returns true if the metric via neigh1 is equally good or better than the
+ * metric via neigh2, false otherwise.
+ */
+static bool batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1,
+ struct batadv_neigh_node *neigh2)
+{
+ int diff = batadv_iv_ogm_neigh_cmp(neigh1, neigh2);
+
+ return diff > -BATADV_TQ_SIMILARITY_THRESHOLD;
+}
+
static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
.name = "BATMAN_IV",
.bat_iface_enable = batadv_iv_ogm_iface_enable,
@@ -1359,6 +1642,12 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
.bat_primary_iface_set = batadv_iv_ogm_primary_iface_set,
.bat_ogm_schedule = batadv_iv_ogm_schedule,
.bat_ogm_emit = batadv_iv_ogm_emit,
+ .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
+ .bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob,
+ .bat_orig_print = batadv_iv_ogm_orig_print,
+ .bat_orig_free = batadv_iv_ogm_orig_free,
+ .bat_orig_add_if = batadv_iv_ogm_orig_add_if,
+ .bat_orig_del_if = batadv_iv_ogm_orig_del_if,
};
int __init batadv_iv_init(void)
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5bb58d7bdd56..28eb5e6d0a02 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -411,10 +411,10 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
return NULL;
}
- /* this is a gateway now, remove any tt entries */
+ /* this is a gateway now, remove any TT entry on this VLAN */
orig_node = batadv_orig_hash_find(bat_priv, orig);
if (orig_node) {
- batadv_tt_global_del_orig(bat_priv, orig_node,
+ batadv_tt_global_del_orig(bat_priv, orig_node, vid,
"became a backbone gateway");
batadv_orig_node_free_ref(orig_node);
}
@@ -858,27 +858,25 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
struct sk_buff *skb)
{
- struct ethhdr *ethhdr;
+ struct batadv_bla_claim_dst *bla_dst;
+ uint8_t *hw_src, *hw_dst;
struct vlan_ethhdr *vhdr;
+ struct ethhdr *ethhdr;
struct arphdr *arphdr;
- uint8_t *hw_src, *hw_dst;
- struct batadv_bla_claim_dst *bla_dst;
+ unsigned short vid;
__be16 proto;
int headlen;
- unsigned short vid = BATADV_NO_FLAGS;
int ret;
+ vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
- if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
+ proto = ethhdr->h_proto;
+ headlen = ETH_HLEN;
+ if (vid & BATADV_VLAN_HAS_TAG) {
vhdr = (struct vlan_ethhdr *)ethhdr;
- vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
- vid |= BATADV_VLAN_HAS_TAG;
proto = vhdr->h_vlan_encapsulated_proto;
- headlen = sizeof(*vhdr);
- } else {
- proto = ethhdr->h_proto;
- headlen = ETH_HLEN;
+ headlen += VLAN_HLEN;
}
if (proto != htons(ETH_P_ARP))
@@ -1317,12 +1315,14 @@ out:
/* @bat_priv: the bat priv with all the soft interface information
* @orig: originator mac address
+ * @vid: VLAN identifier
*
- * check if the originator is a gateway for any VLAN ID.
+ * Check if the originator is a gateway for the VLAN identified by vid.
*
- * returns 1 if it is found, 0 otherwise
+ * Returns true if orig is a backbone for this vid, false otherwise.
*/
-int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig)
+bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig,
+ unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
struct hlist_head *head;
@@ -1330,25 +1330,26 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig)
int i;
if (!atomic_read(&bat_priv->bridge_loop_avoidance))
- return 0;
+ return false;
if (!hash)
- return 0;
+ return false;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
rcu_read_lock();
hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
- if (batadv_compare_eth(backbone_gw->orig, orig)) {
+ if (batadv_compare_eth(backbone_gw->orig, orig) &&
+ backbone_gw->vid == vid) {
rcu_read_unlock();
- return 1;
+ return true;
}
}
rcu_read_unlock();
}
- return 0;
+ return false;
}
@@ -1365,10 +1366,8 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig)
int batadv_bla_is_backbone_gw(struct sk_buff *skb,
struct batadv_orig_node *orig_node, int hdr_size)
{
- struct ethhdr *ethhdr;
- struct vlan_ethhdr *vhdr;
struct batadv_bla_backbone_gw *backbone_gw;
- unsigned short vid = BATADV_NO_FLAGS;
+ unsigned short vid;
if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance))
return 0;
@@ -1377,16 +1376,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
if (!pskb_may_pull(skb, hdr_size + ETH_HLEN))
return 0;
- ethhdr = (struct ethhdr *)(((uint8_t *)skb->data) + hdr_size);
-
- if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
- if (!pskb_may_pull(skb, hdr_size + VLAN_ETH_HLEN))
- return 0;
-
- vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size);
- vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
- vid |= BATADV_VLAN_HAS_TAG;
- }
+ vid = batadv_get_vid(skb, hdr_size);
/* see if this originator is a backbone gw for this VLAN */
backbone_gw = batadv_backbone_hash_find(orig_node->bat_priv,
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 4b102e71e5bd..da173e760e77 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -30,7 +30,8 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
void *offset);
-int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig);
+bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig,
+ unsigned short vid);
int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
struct sk_buff *skb);
void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
@@ -74,10 +75,11 @@ static inline int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
return 0;
}
-static inline int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv,
- uint8_t *orig)
+static inline bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv,
+ uint8_t *orig,
+ unsigned short vid)
{
- return 0;
+ return false;
}
static inline int
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 99da41290f82..6c8c3934bd7b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -19,6 +19,7 @@
#include <linux/if_ether.h>
#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
#include <net/arp.h>
#include "main.h"
@@ -205,15 +206,11 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
*/
static uint32_t batadv_hash_dat(const void *data, uint32_t size)
{
- const unsigned char *key = data;
uint32_t hash = 0;
- size_t i;
+ const struct batadv_dat_entry *dat = data;
- for (i = 0; i < 4; i++) {
- hash += key[i];
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
+ hash = batadv_hash_bytes(hash, &dat->ip, sizeof(dat->ip));
+ hash = batadv_hash_bytes(hash, &dat->vid, sizeof(dat->vid));
hash += (hash << 3);
hash ^= (hash >> 11);
@@ -227,21 +224,26 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size)
* table
* @bat_priv: the bat priv with all the soft interface information
* @ip: search key
+ * @vid: VLAN identifier
*
* Returns the dat_entry if found, NULL otherwise.
*/
static struct batadv_dat_entry *
-batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
+batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
+ unsigned short vid)
{
struct hlist_head *head;
- struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL;
+ struct batadv_dat_entry to_find, *dat_entry, *dat_entry_tmp = NULL;
struct batadv_hashtable *hash = bat_priv->dat.hash;
uint32_t index;
if (!hash)
return NULL;
- index = batadv_hash_dat(&ip, hash->size);
+ to_find.ip = ip;
+ to_find.vid = vid;
+
+ index = batadv_hash_dat(&to_find, hash->size);
head = &hash->table[index];
rcu_read_lock();
@@ -265,22 +267,24 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
* @bat_priv: the bat priv with all the soft interface information
* @ip: ipv4 to add/edit
* @mac_addr: mac address to assign to the given ipv4
+ * @vid: VLAN identifier
*/
static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
- uint8_t *mac_addr)
+ uint8_t *mac_addr, unsigned short vid)
{
struct batadv_dat_entry *dat_entry;
int hash_added;
- dat_entry = batadv_dat_entry_hash_find(bat_priv, ip);
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip, vid);
/* if this entry is already known, just update it */
if (dat_entry) {
if (!batadv_compare_eth(dat_entry->mac_addr, mac_addr))
memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN);
dat_entry->last_update = jiffies;
batadv_dbg(BATADV_DBG_DAT, bat_priv,
- "Entry updated: %pI4 %pM\n", &dat_entry->ip,
- dat_entry->mac_addr);
+ "Entry updated: %pI4 %pM (vid: %d)\n",
+ &dat_entry->ip, dat_entry->mac_addr,
+ BATADV_PRINT_VID(vid));
goto out;
}
@@ -289,12 +293,13 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
goto out;
dat_entry->ip = ip;
+ dat_entry->vid = vid;
memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN);
dat_entry->last_update = jiffies;
atomic_set(&dat_entry->refcount, 2);
hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
- batadv_hash_dat, &dat_entry->ip,
+ batadv_hash_dat, dat_entry,
&dat_entry->hash_entry);
if (unlikely(hash_added != 0)) {
@@ -303,8 +308,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
goto out;
}
- batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM\n",
- &dat_entry->ip, dat_entry->mac_addr);
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM (vid: %d)\n",
+ &dat_entry->ip, dat_entry->mac_addr, BATADV_PRINT_VID(vid));
out:
if (dat_entry)
@@ -756,8 +761,8 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
goto out;
seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name);
- seq_printf(seq, " %-7s %-13s %5s\n", "IPv4", "MAC",
- "last-seen");
+ seq_printf(seq, " %-7s %-9s %4s %11s\n", "IPv4",
+ "MAC", "VID", "last-seen");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -770,8 +775,9 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
last_seen_msecs = last_seen_msecs % 60000;
last_seen_secs = last_seen_msecs / 1000;
- seq_printf(seq, " * %15pI4 %14pM %6i:%02i\n",
+ seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n",
&dat_entry->ip, dat_entry->mac_addr,
+ BATADV_PRINT_VID(dat_entry->vid),
last_seen_mins, last_seen_secs);
}
rcu_read_unlock();
@@ -858,6 +864,31 @@ out:
}
/**
+ * batadv_dat_get_vid - extract the VLAN identifier from skb if any
+ * @skb: the buffer containing the packet to extract the VID from
+ * @hdr_size: the size of the batman-adv header encapsulating the packet
+ *
+ * If the packet embedded in the skb is vlan tagged this function returns the
+ * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned.
+ */
+static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
+{
+ unsigned short vid;
+
+ vid = batadv_get_vid(skb, *hdr_size);
+
+ /* ARP parsing functions jump forward of hdr_size + ETH_HLEN.
+ * If the header contained in the packet is a VLAN one (which is longer)
+ * hdr_size is updated so that the functions will still skip the
+ * correct amount of bytes.
+ */
+ if (vid & BATADV_VLAN_HAS_TAG)
+ *hdr_size += VLAN_HLEN;
+
+ return vid;
+}
+
+/**
* batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to
* answer using DAT
* @bat_priv: the bat priv with all the soft interface information
@@ -876,26 +907,31 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
bool ret = false;
struct batadv_dat_entry *dat_entry = NULL;
struct sk_buff *skb_new;
+ int hdr_size = 0;
+ unsigned short vid;
if (!atomic_read(&bat_priv->distributed_arp_table))
goto out;
- type = batadv_arp_get_type(bat_priv, skb, 0);
+ vid = batadv_dat_get_vid(skb, &hdr_size);
+
+ type = batadv_arp_get_type(bat_priv, skb, hdr_size);
/* If the node gets an ARP_REQUEST it has to send a DHT_GET unicast
* message to the selected DHT candidates
*/
if (type != ARPOP_REQUEST)
goto out;
- batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REQUEST");
+ batadv_dbg_arp(bat_priv, skb, type, hdr_size,
+ "Parsing outgoing ARP REQUEST");
- ip_src = batadv_arp_ip_src(skb, 0);
- hw_src = batadv_arp_hw_src(skb, 0);
- ip_dst = batadv_arp_ip_dst(skb, 0);
+ ip_src = batadv_arp_ip_src(skb, hdr_size);
+ hw_src = batadv_arp_hw_src(skb, hdr_size);
+ ip_dst = batadv_arp_ip_dst(skb, hdr_size);
- batadv_dat_entry_add(bat_priv, ip_src, hw_src);
+ batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
- dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst, vid);
if (dat_entry) {
/* If the ARP request is destined for a local client the local
* client will answer itself. DAT would only generate a
@@ -905,7 +941,8 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
* additional DAT answer may trigger kernel warnings about
* a packet coming from the wrong port.
*/
- if (batadv_is_my_client(bat_priv, dat_entry->mac_addr)) {
+ if (batadv_is_my_client(bat_priv, dat_entry->mac_addr,
+ BATADV_NO_FLAGS)) {
ret = true;
goto out;
}
@@ -916,11 +953,15 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
if (!skb_new)
goto out;
+ if (vid & BATADV_VLAN_HAS_TAG)
+ skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
+ vid & VLAN_VID_MASK);
+
skb_reset_mac_header(skb_new);
skb_new->protocol = eth_type_trans(skb_new,
bat_priv->soft_iface);
bat_priv->stats.rx_packets++;
- bat_priv->stats.rx_bytes += skb->len + ETH_HLEN;
+ bat_priv->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
bat_priv->soft_iface->last_rx = jiffies;
netif_rx(skb_new);
@@ -955,11 +996,14 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
struct sk_buff *skb_new;
struct batadv_dat_entry *dat_entry = NULL;
bool ret = false;
+ unsigned short vid;
int err;
if (!atomic_read(&bat_priv->distributed_arp_table))
goto out;
+ vid = batadv_dat_get_vid(skb, &hdr_size);
+
type = batadv_arp_get_type(bat_priv, skb, hdr_size);
if (type != ARPOP_REQUEST)
goto out;
@@ -971,9 +1015,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
batadv_dbg_arp(bat_priv, skb, type, hdr_size,
"Parsing incoming ARP REQUEST");
- batadv_dat_entry_add(bat_priv, ip_src, hw_src);
+ batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
- dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst, vid);
if (!dat_entry)
goto out;
@@ -984,17 +1028,22 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
if (!skb_new)
goto out;
+ if (vid & BATADV_VLAN_HAS_TAG)
+ skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
+ vid & VLAN_VID_MASK);
+
/* To preserve backwards compatibility, the node has choose the outgoing
* format based on the incoming request packet type. The assumption is
* that a node not using the 4addr packet format doesn't support it.
*/
if (hdr_size == sizeof(struct batadv_unicast_4addr_packet))
- err = batadv_send_skb_unicast_4addr(bat_priv, skb_new,
- BATADV_P_DAT_CACHE_REPLY);
+ err = batadv_send_skb_via_tt_4addr(bat_priv, skb_new,
+ BATADV_P_DAT_CACHE_REPLY,
+ vid);
else
- err = batadv_send_skb_unicast(bat_priv, skb_new);
+ err = batadv_send_skb_via_tt(bat_priv, skb_new, vid);
- if (!err) {
+ if (err != NET_XMIT_DROP) {
batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX);
ret = true;
}
@@ -1017,23 +1066,28 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
uint16_t type;
__be32 ip_src, ip_dst;
uint8_t *hw_src, *hw_dst;
+ int hdr_size = 0;
+ unsigned short vid;
if (!atomic_read(&bat_priv->distributed_arp_table))
return;
- type = batadv_arp_get_type(bat_priv, skb, 0);
+ vid = batadv_dat_get_vid(skb, &hdr_size);
+
+ type = batadv_arp_get_type(bat_priv, skb, hdr_size);
if (type != ARPOP_REPLY)
return;
- batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REPLY");
+ batadv_dbg_arp(bat_priv, skb, type, hdr_size,
+ "Parsing outgoing ARP REPLY");
- hw_src = batadv_arp_hw_src(skb, 0);
- ip_src = batadv_arp_ip_src(skb, 0);
- hw_dst = batadv_arp_hw_dst(skb, 0);
- ip_dst = batadv_arp_ip_dst(skb, 0);
+ hw_src = batadv_arp_hw_src(skb, hdr_size);
+ ip_src = batadv_arp_ip_src(skb, hdr_size);
+ hw_dst = batadv_arp_hw_dst(skb, hdr_size);
+ ip_dst = batadv_arp_ip_dst(skb, hdr_size);
- batadv_dat_entry_add(bat_priv, ip_src, hw_src);
- batadv_dat_entry_add(bat_priv, ip_dst, hw_dst);
+ batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
+ batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid);
/* Send the ARP reply to the candidates for both the IP addresses that
* the node obtained from the ARP reply
@@ -1055,10 +1109,13 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
__be32 ip_src, ip_dst;
uint8_t *hw_src, *hw_dst;
bool ret = false;
+ unsigned short vid;
if (!atomic_read(&bat_priv->distributed_arp_table))
goto out;
+ vid = batadv_dat_get_vid(skb, &hdr_size);
+
type = batadv_arp_get_type(bat_priv, skb, hdr_size);
if (type != ARPOP_REPLY)
goto out;
@@ -1074,13 +1131,13 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
/* Update our internal cache with both the IP addresses the node got
* within the ARP reply
*/
- batadv_dat_entry_add(bat_priv, ip_src, hw_src);
- batadv_dat_entry_add(bat_priv, ip_dst, hw_dst);
+ batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
+ batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid);
/* if this REPLY is directed to a client of mine, let's deliver the
* packet to the interface
*/
- ret = !batadv_is_my_client(bat_priv, hw_dst);
+ ret = !batadv_is_my_client(bat_priv, hw_dst, vid);
out:
if (ret)
kfree_skb(skb);
@@ -1103,7 +1160,8 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
__be32 ip_dst;
struct batadv_dat_entry *dat_entry = NULL;
bool ret = false;
- const size_t bcast_len = sizeof(struct batadv_bcast_packet);
+ int hdr_size = sizeof(struct batadv_bcast_packet);
+ unsigned short vid;
if (!atomic_read(&bat_priv->distributed_arp_table))
goto out;
@@ -1114,12 +1172,14 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
if (forw_packet->num_packets)
goto out;
- type = batadv_arp_get_type(bat_priv, forw_packet->skb, bcast_len);
+ vid = batadv_dat_get_vid(forw_packet->skb, &hdr_size);
+
+ type = batadv_arp_get_type(bat_priv, forw_packet->skb, hdr_size);
if (type != ARPOP_REQUEST)
goto out;
- ip_dst = batadv_arp_ip_dst(forw_packet->skb, bcast_len);
- dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
+ ip_dst = batadv_arp_ip_dst(forw_packet->skb, hdr_size);
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst, vid);
/* check if the node already got this entry */
if (!dat_entry) {
batadv_dbg(BATADV_DBG_DAT, bat_priv,
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 053bb318c7a7..2449afaa7638 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -137,7 +137,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (!atomic_inc_not_zero(&gw_node->refcount))
goto next;
- tq_avg = router->tq_avg;
+ tq_avg = router->bat_iv.tq_avg;
switch (atomic_read(&bat_priv->gw_sel_class)) {
case 1: /* fast connection */
@@ -221,11 +221,6 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
struct batadv_neigh_node *router = NULL;
char gw_addr[18] = { '\0' };
- /* The batman daemon checks here if we already passed a full originator
- * cycle in order to make sure we don't choose the first gateway we
- * hear about. This check is based on the daemon's uptime which we
- * don't have.
- */
if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
goto out;
@@ -261,7 +256,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
next_gw->bandwidth_down / 10,
next_gw->bandwidth_down % 10,
next_gw->bandwidth_up / 10,
- next_gw->bandwidth_up % 10, router->tq_avg);
+ next_gw->bandwidth_up % 10, router->bat_iv.tq_avg);
batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_ADD,
gw_addr);
} else {
@@ -271,7 +266,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
next_gw->bandwidth_down / 10,
next_gw->bandwidth_down % 10,
next_gw->bandwidth_up / 10,
- next_gw->bandwidth_up % 10, router->tq_avg);
+ next_gw->bandwidth_up % 10, router->bat_iv.tq_avg);
batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_CHANGE,
gw_addr);
}
@@ -310,8 +305,8 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
if (!router_orig)
goto out;
- gw_tq_avg = router_gw->tq_avg;
- orig_tq_avg = router_orig->tq_avg;
+ gw_tq_avg = router_gw->bat_iv.tq_avg;
+ orig_tq_avg = router_orig->bat_iv.tq_avg;
/* the TQ value has to be better */
if (orig_tq_avg < gw_tq_avg)
@@ -533,7 +528,7 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
(curr_gw == gw_node ? "=>" : " "),
gw_node->orig_node->orig,
- router->tq_avg, router->addr,
+ router->bat_iv.tq_avg, router->addr,
router->if_incoming->net_dev->name,
gw_node->bandwidth_down / 10,
gw_node->bandwidth_down % 10,
@@ -726,7 +721,20 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len)
return true;
}
-/* this call might reallocate skb data */
+/**
+ * batadv_gw_out_of_range - check if the dhcp request destination is the best gw
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the outgoing packet
+ *
+ * Check if the skb is a DHCP request and if it is sent to the current best GW
+ * server. Due to topology changes it may be the case that the GW server
+ * previously selected is not the best one anymore.
+ *
+ * Returns true if the packet destination is unicast and it is not the best gw,
+ * false otherwise.
+ *
+ * This call might reallocate skb data.
+ */
bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
struct sk_buff *skb)
{
@@ -737,6 +745,9 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
bool ret, out_of_range = false;
unsigned int header_len = 0;
uint8_t curr_tq_avg;
+ unsigned short vid;
+
+ vid = batadv_get_vid(skb, 0);
ret = batadv_gw_is_dhcp_target(skb, &header_len);
if (!ret)
@@ -744,7 +755,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
ethhdr = (struct ethhdr *)skb->data;
orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
- ethhdr->h_dest);
+ ethhdr->h_dest, vid);
if (!orig_dst_node)
goto out;
@@ -781,7 +792,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
if (!neigh_curr)
goto out;
- curr_tq_avg = neigh_curr->tq_avg;
+ curr_tq_avg = neigh_curr->bat_iv.tq_avg;
break;
case BATADV_GW_MODE_OFF:
default:
@@ -792,7 +803,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
if (!neigh_old)
goto out;
- if (curr_tq_avg - neigh_old->tq_avg > BATADV_GW_THRESHOLD)
+ if (curr_tq_avg - neigh_old->bat_iv.tq_avg > BATADV_GW_THRESHOLD)
out_of_range = true;
out:
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index d564af295db4..57c2a19dcb5c 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -28,6 +28,7 @@
#include "originator.h"
#include "hash.h"
#include "bridge_loop_avoidance.h"
+#include "gateway_client.h"
#include <linux/if_arp.h>
#include <linux/if_ether.h>
@@ -124,8 +125,11 @@ static int batadv_is_valid_iface(const struct net_device *net_dev)
*
* Returns true if the net device is a 802.11 wireless device, false otherwise.
*/
-static bool batadv_is_wifi_netdev(struct net_device *net_device)
+bool batadv_is_wifi_netdev(struct net_device *net_device)
{
+ if (!net_device)
+ return false;
+
#ifdef CONFIG_WIRELESS_EXT
/* pre-cfg80211 drivers have to implement WEXT, so it is possible to
* check for wireless_handlers != NULL
@@ -141,34 +145,6 @@ static bool batadv_is_wifi_netdev(struct net_device *net_device)
return false;
}
-/**
- * batadv_is_wifi_iface - check if the given interface represented by ifindex
- * is a wifi interface
- * @ifindex: interface index to check
- *
- * Returns true if the interface represented by ifindex is a 802.11 wireless
- * device, false otherwise.
- */
-bool batadv_is_wifi_iface(int ifindex)
-{
- struct net_device *net_device = NULL;
- bool ret = false;
-
- if (ifindex == BATADV_NULL_IFINDEX)
- goto out;
-
- net_device = dev_get_by_index(&init_net, ifindex);
- if (!net_device)
- goto out;
-
- ret = batadv_is_wifi_netdev(net_device);
-
-out:
- if (net_device)
- dev_put(net_device);
- return ret;
-}
-
static struct batadv_hard_iface *
batadv_hardif_get_active(const struct net_device *soft_iface)
{
@@ -266,16 +242,9 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
int batadv_hardif_min_mtu(struct net_device *soft_iface)
{
- const struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
const struct batadv_hard_iface *hard_iface;
- /* allow big frames if all devices are capable to do so
- * (have MTU > 1500 + batadv_max_header_len())
- */
int min_mtu = ETH_DATA_LEN;
- int max_header_len = batadv_max_header_len();
-
- if (atomic_read(&bat_priv->fragmentation))
- goto out;
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
@@ -286,22 +255,40 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
if (hard_iface->soft_iface != soft_iface)
continue;
- min_mtu = min_t(int, hard_iface->net_dev->mtu - max_header_len,
- min_mtu);
+ min_mtu = min_t(int, hard_iface->net_dev->mtu, min_mtu);
}
rcu_read_unlock();
+
+ atomic_set(&bat_priv->packet_size_max, min_mtu);
+
+ if (atomic_read(&bat_priv->fragmentation) == 0)
+ goto out;
+
+ /* with fragmentation enabled the maximum size of internally generated
+ * packets such as translation table exchanges or tvlv containers, etc
+ * has to be calculated
+ */
+ min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE);
+ min_mtu -= sizeof(struct batadv_frag_packet);
+ min_mtu *= BATADV_FRAG_MAX_FRAGMENTS;
+ atomic_set(&bat_priv->packet_size_max, min_mtu);
+
+ /* with fragmentation enabled we can fragment external packets easily */
+ min_mtu = min_t(int, min_mtu, ETH_DATA_LEN);
+
out:
- return min_mtu;
+ return min_mtu - batadv_max_header_len();
}
/* adjusts the MTU if a new interface with a smaller MTU appeared. */
void batadv_update_min_mtu(struct net_device *soft_iface)
{
- int min_mtu;
+ soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
- min_mtu = batadv_hardif_min_mtu(soft_iface);
- if (soft_iface->mtu != min_mtu)
- soft_iface->mtu = min_mtu;
+ /* Check if the local translate table should be cleaned up to match a
+ * new (and smaller) MTU.
+ */
+ batadv_tt_local_resize_to_mtu(soft_iface);
}
static void
@@ -524,8 +511,12 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
dev_put(hard_iface->soft_iface);
/* nobody uses this interface anymore */
- if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO)
- batadv_softif_destroy_sysfs(hard_iface->soft_iface);
+ if (!bat_priv->num_ifaces) {
+ batadv_gw_check_client_stop(bat_priv);
+
+ if (autodel == BATADV_IF_CLEANUP_AUTO)
+ batadv_softif_destroy_sysfs(hard_iface->soft_iface);
+ }
netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);
hard_iface->soft_iface = NULL;
@@ -643,6 +634,8 @@ static int batadv_hard_if_event(struct notifier_block *this,
if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
batadv_sysfs_add_meshif(net_dev);
+ bat_priv = netdev_priv(net_dev);
+ batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS);
return NOTIFY_DONE;
}
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 49892881a7c5..df4c8bd45c40 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -41,6 +41,7 @@ enum batadv_hard_if_cleanup {
extern struct notifier_block batadv_hard_if_notifier;
+bool batadv_is_wifi_netdev(struct net_device *net_device);
struct batadv_hard_iface*
batadv_hardif_get_by_netdev(const struct net_device *net_dev);
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
@@ -51,7 +52,6 @@ void batadv_hardif_remove_interfaces(void);
int batadv_hardif_min_mtu(struct net_device *soft_iface);
void batadv_update_min_mtu(struct net_device *soft_iface);
void batadv_hardif_free_rcu(struct rcu_head *rcu);
-bool batadv_is_wifi_iface(int ifindex);
static inline void
batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface)
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 82ac6472fa6f..29ae4efe3543 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -29,7 +29,7 @@
static struct batadv_socket_client *batadv_socket_client_hash[256];
static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
- struct batadv_icmp_packet_rr *icmp_packet,
+ struct batadv_icmp_header *icmph,
size_t icmp_len);
void batadv_socket_init(void)
@@ -155,13 +155,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
struct batadv_priv *bat_priv = socket_client->bat_priv;
struct batadv_hard_iface *primary_if = NULL;
struct sk_buff *skb;
- struct batadv_icmp_packet_rr *icmp_packet;
-
+ struct batadv_icmp_packet_rr *icmp_packet_rr;
+ struct batadv_icmp_header *icmp_header;
struct batadv_orig_node *orig_node = NULL;
struct batadv_neigh_node *neigh_node = NULL;
size_t packet_len = sizeof(struct batadv_icmp_packet);
- if (len < sizeof(struct batadv_icmp_packet)) {
+ if (len < sizeof(struct batadv_icmp_header)) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Error - can't send packet from char device: invalid packet size\n");
return -EINVAL;
@@ -174,8 +174,10 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
goto out;
}
- if (len >= sizeof(struct batadv_icmp_packet_rr))
- packet_len = sizeof(struct batadv_icmp_packet_rr);
+ if (len >= BATADV_ICMP_MAX_PACKET_SIZE)
+ packet_len = BATADV_ICMP_MAX_PACKET_SIZE;
+ else
+ packet_len = len;
skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN);
if (!skb) {
@@ -185,67 +187,78 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
- icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len);
+ icmp_header = (struct batadv_icmp_header *)skb_put(skb, packet_len);
- if (copy_from_user(icmp_packet, buff, packet_len)) {
+ if (copy_from_user(icmp_header, buff, packet_len)) {
len = -EFAULT;
goto free_skb;
}
- if (icmp_packet->icmph.header.packet_type != BATADV_ICMP) {
+ if (icmp_header->header.packet_type != BATADV_ICMP) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n");
len = -EINVAL;
goto free_skb;
}
- if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
+ switch (icmp_header->msg_type) {
+ case BATADV_ECHO_REQUEST:
+ if (len < sizeof(struct batadv_icmp_packet)) {
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Error - can't send packet from char device: invalid packet size\n");
+ len = -EINVAL;
+ goto free_skb;
+ }
+
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+ goto dst_unreach;
+
+ orig_node = batadv_orig_hash_find(bat_priv, icmp_header->dst);
+ if (!orig_node)
+ goto dst_unreach;
+
+ neigh_node = batadv_orig_node_get_router(orig_node);
+ if (!neigh_node)
+ goto dst_unreach;
+
+ if (!neigh_node->if_incoming)
+ goto dst_unreach;
+
+ if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE)
+ goto dst_unreach;
+
+ icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmp_header;
+ if (packet_len == sizeof(*icmp_packet_rr))
+ memcpy(icmp_packet_rr->rr,
+ neigh_node->if_incoming->net_dev->dev_addr,
+ ETH_ALEN);
+
+ break;
+ default:
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Error - can't send packet from char device: got bogus message type (expected: ECHO_REQUEST)\n");
+ "Error - can't send packet from char device: got unknown message type\n");
len = -EINVAL;
goto free_skb;
}
- icmp_packet->icmph.uid = socket_client->index;
+ icmp_header->uid = socket_client->index;
- if (icmp_packet->icmph.header.version != BATADV_COMPAT_VERSION) {
- icmp_packet->icmph.msg_type = BATADV_PARAMETER_PROBLEM;
- icmp_packet->icmph.header.version = BATADV_COMPAT_VERSION;
- batadv_socket_add_packet(socket_client, icmp_packet,
+ if (icmp_header->header.version != BATADV_COMPAT_VERSION) {
+ icmp_header->msg_type = BATADV_PARAMETER_PROBLEM;
+ icmp_header->header.version = BATADV_COMPAT_VERSION;
+ batadv_socket_add_packet(socket_client, icmp_header,
packet_len);
goto free_skb;
}
- if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
- goto dst_unreach;
-
- orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst);
- if (!orig_node)
- goto dst_unreach;
-
- neigh_node = batadv_orig_node_get_router(orig_node);
- if (!neigh_node)
- goto dst_unreach;
-
- if (!neigh_node->if_incoming)
- goto dst_unreach;
-
- if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE)
- goto dst_unreach;
-
- memcpy(icmp_packet->icmph.orig,
- primary_if->net_dev->dev_addr, ETH_ALEN);
-
- if (packet_len == sizeof(struct batadv_icmp_packet_rr))
- memcpy(icmp_packet->rr,
- neigh_node->if_incoming->net_dev->dev_addr, ETH_ALEN);
+ memcpy(icmp_header->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
goto out;
dst_unreach:
- icmp_packet->icmph.msg_type = BATADV_DESTINATION_UNREACHABLE;
- batadv_socket_add_packet(socket_client, icmp_packet, packet_len);
+ icmp_header->msg_type = BATADV_DESTINATION_UNREACHABLE;
+ batadv_socket_add_packet(socket_client, icmp_header, packet_len);
free_skb:
kfree_skb(skb);
out:
@@ -298,27 +311,40 @@ err:
return -ENOMEM;
}
+/**
+ * batadv_socket_receive_packet - schedule an icmp packet to be sent to userspace
+ * on an icmp socket.
+ * @socket_client: the socket this packet belongs to
+ * @icmph: pointer to the header of the icmp packet
+ * @icmp_len: total length of the icmp packet
+ */
static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
- struct batadv_icmp_packet_rr *icmp_packet,
+ struct batadv_icmp_header *icmph,
size_t icmp_len)
{
struct batadv_socket_packet *socket_packet;
+ size_t len;
socket_packet = kmalloc(sizeof(*socket_packet), GFP_ATOMIC);
if (!socket_packet)
return;
+ len = icmp_len;
+ /* check the maximum length before filling the buffer */
+ if (len > sizeof(socket_packet->icmp_packet))
+ len = sizeof(socket_packet->icmp_packet);
+
INIT_LIST_HEAD(&socket_packet->list);
- memcpy(&socket_packet->icmp_packet, icmp_packet, icmp_len);
- socket_packet->icmp_len = icmp_len;
+ memcpy(&socket_packet->icmp_packet, icmph, len);
+ socket_packet->icmp_len = len;
spin_lock_bh(&socket_client->lock);
/* while waiting for the lock the socket_client could have been
* deleted
*/
- if (!batadv_socket_client_hash[icmp_packet->icmph.uid]) {
+ if (!batadv_socket_client_hash[icmph->uid]) {
spin_unlock_bh(&socket_client->lock);
kfree(socket_packet);
return;
@@ -342,12 +368,18 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
wake_up(&socket_client->queue_wait);
}
-void batadv_socket_receive_packet(struct batadv_icmp_packet_rr *icmp_packet,
+/**
+ * batadv_socket_receive_packet - schedule an icmp packet to be received
+ * locally and sent to userspace.
+ * @icmph: pointer to the header of the icmp packet
+ * @icmp_len: total length of the icmp packet
+ */
+void batadv_socket_receive_packet(struct batadv_icmp_header *icmph,
size_t icmp_len)
{
struct batadv_socket_client *hash;
- hash = batadv_socket_client_hash[icmp_packet->icmph.uid];
+ hash = batadv_socket_client_hash[icmph->uid];
if (hash)
- batadv_socket_add_packet(hash, icmp_packet, icmp_len);
+ batadv_socket_add_packet(hash, icmph, icmp_len);
}
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 1fcca37b6223..6665080dff79 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -24,7 +24,7 @@
void batadv_socket_init(void);
int batadv_socket_setup(struct batadv_priv *bat_priv);
-void batadv_socket_receive_packet(struct batadv_icmp_packet_rr *icmp_packet,
+void batadv_socket_receive_packet(struct batadv_icmp_header *icmph,
size_t icmp_len);
#endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 7f3a5c426615..c51a5e568f0a 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -110,9 +110,11 @@ int batadv_mesh_init(struct net_device *soft_iface)
spin_lock_init(&bat_priv->tt.req_list_lock);
spin_lock_init(&bat_priv->tt.roam_list_lock);
spin_lock_init(&bat_priv->tt.last_changeset_lock);
+ spin_lock_init(&bat_priv->tt.commit_lock);
spin_lock_init(&bat_priv->gw.list_lock);
spin_lock_init(&bat_priv->tvlv.container_list_lock);
spin_lock_init(&bat_priv->tvlv.handler_list_lock);
+ spin_lock_init(&bat_priv->softif_vlan_list_lock);
INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
@@ -122,6 +124,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_LIST_HEAD(&bat_priv->tt.roam_list);
INIT_HLIST_HEAD(&bat_priv->tvlv.container_list);
INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list);
+ INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
ret = batadv_originator_init(bat_priv);
if (ret < 0)
@@ -131,9 +134,6 @@ int batadv_mesh_init(struct net_device *soft_iface)
if (ret < 0)
goto err;
- batadv_tt_local_add(soft_iface, soft_iface->dev_addr,
- BATADV_NULL_IFINDEX);
-
ret = batadv_bla_init(bat_priv);
if (ret < 0)
goto err;
@@ -501,7 +501,9 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
!bat_algo_ops->bat_iface_update_mac ||
!bat_algo_ops->bat_primary_iface_set ||
!bat_algo_ops->bat_ogm_schedule ||
- !bat_algo_ops->bat_ogm_emit) {
+ !bat_algo_ops->bat_ogm_emit ||
+ !bat_algo_ops->bat_neigh_cmp ||
+ !bat_algo_ops->bat_neigh_is_equiv_or_better) {
pr_info("Routing algo '%s' does not implement required ops\n",
bat_algo_ops->name);
ret = -EINVAL;
@@ -1144,6 +1146,33 @@ out:
batadv_orig_node_free_ref(orig_node);
}
+/**
+ * batadv_get_vid - extract the VLAN identifier from skb if any
+ * @skb: the buffer containing the packet
+ * @header_len: length of the batman header preceding the ethernet header
+ *
+ * If the packet embedded in the skb is vlan tagged this function returns the
+ * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned.
+ */
+unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
+{
+ struct ethhdr *ethhdr = (struct ethhdr *)(skb->data + header_len);
+ struct vlan_ethhdr *vhdr;
+ unsigned short vid;
+
+ if (ethhdr->h_proto != htons(ETH_P_8021Q))
+ return BATADV_NO_FLAGS;
+
+ if (!pskb_may_pull(skb, header_len + VLAN_ETH_HLEN))
+ return BATADV_NO_FLAGS;
+
+ vhdr = (struct vlan_ethhdr *)(skb->data + header_len);
+ vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
+ vid |= BATADV_VLAN_HAS_TAG;
+
+ return vid;
+}
+
static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
{
struct batadv_algo_ops *bat_algo_ops;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 54c13d51edbe..f94f287b8670 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -20,13 +20,13 @@
#ifndef _NET_BATMAN_ADV_MAIN_H_
#define _NET_BATMAN_ADV_MAIN_H_
-#define BATADV_DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \
- "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>"
+#define BATADV_DRIVER_AUTHOR "Marek Lindner <mareklindner@neomailbox.ch>, " \
+ "Simon Wunderlich <sw@simonwunderlich.de>"
#define BATADV_DRIVER_DESC "B.A.T.M.A.N. advanced"
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2013.4.0"
+#define BATADV_SOURCE_VERSION "2013.5.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -86,6 +86,12 @@
/* numbers of originator to contact for any PUT/GET DHT operation */
#define BATADV_DAT_CANDIDATES_NUM 3
+/**
+ * BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ
+ * at most from the primary one in order to be still considered acceptable
+ */
+#define BATADV_TQ_SIMILARITY_THRESHOLD 50
+
/* how much worse secondary interfaces may be to be considered as bonding
* candidates
*/
@@ -167,15 +173,9 @@ enum batadv_uev_type {
#include <net/rtnetlink.h>
#include <linux/jiffies.h>
#include <linux/seq_file.h>
-#include "types.h"
+#include <linux/if_vlan.h>
-/**
- * batadv_vlan_flags - flags for the four MSB of any vlan ID field
- * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
- */
-enum batadv_vlan_flags {
- BATADV_VLAN_HAS_TAG = BIT(15),
-};
+#include "types.h"
#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \
(int)(vid & VLAN_VID_MASK) : -1)
@@ -368,5 +368,6 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src,
uint8_t *dst, uint8_t type, uint8_t version,
void *tvlv_value, uint16_t tvlv_value_len);
+unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len);
#endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 23f611bedb0f..351e199bc0af 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1003,7 +1003,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
struct batadv_nc_packet *nc_packet,
struct batadv_neigh_node *neigh_node)
{
- uint8_t tq_weighted_neigh, tq_weighted_coding;
+ uint8_t tq_weighted_neigh, tq_weighted_coding, tq_tmp;
struct sk_buff *skb_dest, *skb_src;
struct batadv_unicast_packet *packet1;
struct batadv_unicast_packet *packet2;
@@ -1028,8 +1028,10 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
if (!router_coding)
goto out;
- tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg);
- tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg);
+ tq_tmp = batadv_nc_random_weight_tq(router_neigh->bat_iv.tq_avg);
+ tq_weighted_neigh = tq_tmp;
+ tq_tmp = batadv_nc_random_weight_tq(router_coding->bat_iv.tq_avg);
+ tq_weighted_coding = tq_tmp;
/* Select one destination for the MAC-header dst-field based on
* weighted TQ-values.
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index a591dc5c321e..8ab14340d10f 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -36,7 +36,7 @@ static struct lock_class_key batadv_orig_hash_lock_class_key;
static void batadv_purge_orig(struct work_struct *work);
/* returns 1 if they are the same originator */
-static int batadv_compare_orig(const struct hlist_node *node, const void *data2)
+int batadv_compare_orig(const struct hlist_node *node, const void *data2)
{
const void *data1 = container_of(node, struct batadv_orig_node,
hash_entry);
@@ -44,6 +44,88 @@ static int batadv_compare_orig(const struct hlist_node *node, const void *data2)
return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
}
+/**
+ * batadv_orig_node_vlan_get - get an orig_node_vlan object
+ * @orig_node: the originator serving the VLAN
+ * @vid: the VLAN identifier
+ *
+ * Returns the vlan object identified by vid and belonging to orig_node or NULL
+ * if it does not exist.
+ */
+struct batadv_orig_node_vlan *
+batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
+ unsigned short vid)
+{
+ struct batadv_orig_node_vlan *vlan = NULL, *tmp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp, &orig_node->vlan_list, list) {
+ if (tmp->vid != vid)
+ continue;
+
+ if (!atomic_inc_not_zero(&tmp->refcount))
+ continue;
+
+ vlan = tmp;
+
+ break;
+ }
+ rcu_read_unlock();
+
+ return vlan;
+}
+
+/**
+ * batadv_orig_node_vlan_new - search and possibly create an orig_node_vlan
+ * object
+ * @orig_node: the originator serving the VLAN
+ * @vid: the VLAN identifier
+ *
+ * Returns NULL in case of failure or the vlan object identified by vid and
+ * belonging to orig_node otherwise. The object is created and added to the list
+ * if it does not exist.
+ *
+ * The object is returned with refcounter increased by 1.
+ */
+struct batadv_orig_node_vlan *
+batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
+ unsigned short vid)
+{
+ struct batadv_orig_node_vlan *vlan;
+
+ spin_lock_bh(&orig_node->vlan_list_lock);
+
+ /* first look if an object for this vid already exists */
+ vlan = batadv_orig_node_vlan_get(orig_node, vid);
+ if (vlan)
+ goto out;
+
+ vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC);
+ if (!vlan)
+ goto out;
+
+ atomic_set(&vlan->refcount, 2);
+ vlan->vid = vid;
+
+ list_add_rcu(&vlan->list, &orig_node->vlan_list);
+
+out:
+ spin_unlock_bh(&orig_node->vlan_list_lock);
+
+ return vlan;
+}
+
+/**
+ * batadv_orig_node_vlan_free_ref - decrement the refcounter and possibly free
+ * the originator-vlan object
+ * @orig_vlan: the originator-vlan object to release
+ */
+void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan)
+{
+ if (atomic_dec_and_test(&orig_vlan->refcount))
+ kfree_rcu(orig_vlan, rcu);
+}
+
int batadv_originator_init(struct batadv_priv *bat_priv)
{
if (bat_priv->orig_hash)
@@ -90,11 +172,20 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node)
return router;
}
+/**
+ * batadv_neigh_node_new - create and init a new neigh_node object
+ * @hard_iface: the interface where the neighbour is connected to
+ * @neigh_addr: the mac address of the neighbour interface
+ * @orig_node: originator object representing the neighbour
+ *
+ * Allocates a new neigh_node object and initialises all the generic fields.
+ * Returns the new object or NULL on failure.
+ */
struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
- const uint8_t *neigh_addr)
+ const uint8_t *neigh_addr,
+ struct batadv_orig_node *orig_node)
{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_neigh_node *neigh_node;
neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC);
@@ -104,15 +195,14 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
INIT_HLIST_NODE(&neigh_node->list);
memcpy(neigh_node->addr, neigh_addr, ETH_ALEN);
- spin_lock_init(&neigh_node->lq_update_lock);
+ neigh_node->if_incoming = hard_iface;
+ neigh_node->orig_node = orig_node;
+
+ INIT_LIST_HEAD(&neigh_node->bonding_list);
/* extra reference for return */
atomic_set(&neigh_node->refcount, 2);
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Creating new neighbor %pM on interface %s\n", neigh_addr,
- hard_iface->net_dev->name);
-
out:
return neigh_node;
}
@@ -148,12 +238,13 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
batadv_frag_purge_orig(orig_node, NULL);
- batadv_tt_global_del_orig(orig_node->bat_priv, orig_node,
+ batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, -1,
"originator timed out");
+ if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
+ orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+
kfree(orig_node->tt_buff);
- kfree(orig_node->bcast_own);
- kfree(orig_node->bcast_own_sum);
kfree(orig_node);
}
@@ -211,20 +302,22 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
batadv_hash_destroy(hash);
}
-/* this function finds or creates an originator entry for the given
- * address if it does not exits
+/**
+ * batadv_orig_node_new - creates a new orig_node
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: the mac address of the originator
+ *
+ * Creates a new originator object and initialise all the generic fields.
+ * The new object is not added to the originator list.
+ * Returns the newly created object or NULL on failure.
*/
-struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
+struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const uint8_t *addr)
{
struct batadv_orig_node *orig_node;
- int size, i;
- int hash_added;
+ struct batadv_orig_node_vlan *vlan;
unsigned long reset_time;
-
- orig_node = batadv_orig_hash_find(bat_priv, addr);
- if (orig_node)
- return orig_node;
+ int i;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Creating new originator: %pM\n", addr);
@@ -235,10 +328,12 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
INIT_HLIST_HEAD(&orig_node->neigh_list);
INIT_LIST_HEAD(&orig_node->bond_list);
- spin_lock_init(&orig_node->ogm_cnt_lock);
+ INIT_LIST_HEAD(&orig_node->vlan_list);
spin_lock_init(&orig_node->bcast_seqno_lock);
spin_lock_init(&orig_node->neigh_list_lock);
spin_lock_init(&orig_node->tt_buff_lock);
+ spin_lock_init(&orig_node->tt_lock);
+ spin_lock_init(&orig_node->vlan_list_lock);
batadv_nc_init_orig(orig_node);
@@ -250,25 +345,24 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
memcpy(orig_node->orig, addr, ETH_ALEN);
batadv_dat_init_orig_node_addr(orig_node);
orig_node->router = NULL;
- orig_node->tt_crc = 0;
atomic_set(&orig_node->last_ttvn, 0);
orig_node->tt_buff = NULL;
orig_node->tt_buff_len = 0;
- atomic_set(&orig_node->tt_size, 0);
reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS);
orig_node->bcast_seqno_reset = reset_time;
orig_node->batman_seqno_reset = reset_time;
atomic_set(&orig_node->bond_candidates, 0);
- size = bat_priv->num_ifaces * sizeof(unsigned long) * BATADV_NUM_WORDS;
-
- orig_node->bcast_own = kzalloc(size, GFP_ATOMIC);
- if (!orig_node->bcast_own)
+ /* create a vlan object for the "untagged" LAN */
+ vlan = batadv_orig_node_vlan_new(orig_node, BATADV_NO_FLAGS);
+ if (!vlan)
goto free_orig_node;
-
- size = bat_priv->num_ifaces * sizeof(uint8_t);
- orig_node->bcast_own_sum = kzalloc(size, GFP_ATOMIC);
+ /* batadv_orig_node_vlan_new() increases the refcounter.
+ * Immediately release vlan since it is not needed anymore in this
+ * context
+ */
+ batadv_orig_node_vlan_free_ref(vlan);
for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
INIT_HLIST_HEAD(&orig_node->fragments[i].head);
@@ -276,20 +370,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
orig_node->fragments[i].size = 0;
}
- if (!orig_node->bcast_own_sum)
- goto free_bcast_own;
-
- hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
- batadv_choose_orig, orig_node,
- &orig_node->hash_entry);
- if (hash_added != 0)
- goto free_bcast_own_sum;
-
return orig_node;
-free_bcast_own_sum:
- kfree(orig_node->bcast_own_sum);
-free_bcast_own:
- kfree(orig_node->bcast_own);
free_orig_node:
kfree(orig_node);
return NULL;
@@ -298,15 +379,16 @@ free_orig_node:
static bool
batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- struct batadv_neigh_node **best_neigh_node)
+ struct batadv_neigh_node **best_neigh)
{
+ struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
bool neigh_purged = false;
unsigned long last_seen;
struct batadv_hard_iface *if_incoming;
- *best_neigh_node = NULL;
+ *best_neigh = NULL;
spin_lock_bh(&orig_node->neigh_list_lock);
@@ -339,9 +421,12 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
batadv_bonding_candidate_del(orig_node, neigh_node);
batadv_neigh_node_free_ref(neigh_node);
} else {
- if ((!*best_neigh_node) ||
- (neigh_node->tq_avg > (*best_neigh_node)->tq_avg))
- *best_neigh_node = neigh_node;
+ /* store the best_neighbour if this is the first
+ * iteration or if a better neighbor has been found
+ */
+ if (!*best_neigh ||
+ bao->bat_neigh_cmp(neigh_node, *best_neigh) > 0)
+ *best_neigh = neigh_node;
}
}
@@ -430,100 +515,26 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
struct batadv_priv *bat_priv = netdev_priv(net_dev);
- struct batadv_hashtable *hash = bat_priv->orig_hash;
- struct hlist_head *head;
struct batadv_hard_iface *primary_if;
- struct batadv_orig_node *orig_node;
- struct batadv_neigh_node *neigh_node, *neigh_node_tmp;
- int batman_count = 0;
- int last_seen_secs;
- int last_seen_msecs;
- unsigned long last_seen_jiffies;
- uint32_t i;
primary_if = batadv_seq_print_text_primary_if_get(seq);
if (!primary_if)
- goto out;
+ return 0;
- seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
+ seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
BATADV_SOURCE_VERSION, primary_if->net_dev->name,
- primary_if->net_dev->dev_addr, net_dev->name);
- seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
- "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
- "Nexthop", "outgoingIF", "Potential nexthops");
+ primary_if->net_dev->dev_addr, net_dev->name,
+ bat_priv->bat_algo_ops->name);
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
+ batadv_hardif_free_ref(primary_if);
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- neigh_node = batadv_orig_node_get_router(orig_node);
- if (!neigh_node)
- continue;
-
- if (neigh_node->tq_avg == 0)
- goto next;
-
- last_seen_jiffies = jiffies - orig_node->last_seen;
- last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
- last_seen_secs = last_seen_msecs / 1000;
- last_seen_msecs = last_seen_msecs % 1000;
-
- seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:",
- orig_node->orig, last_seen_secs,
- last_seen_msecs, neigh_node->tq_avg,
- neigh_node->addr,
- neigh_node->if_incoming->net_dev->name);
-
- hlist_for_each_entry_rcu(neigh_node_tmp,
- &orig_node->neigh_list, list) {
- seq_printf(seq, " %pM (%3i)",
- neigh_node_tmp->addr,
- neigh_node_tmp->tq_avg);
- }
-
- seq_puts(seq, "\n");
- batman_count++;
-
-next:
- batadv_neigh_node_free_ref(neigh_node);
- }
- rcu_read_unlock();
+ if (!bat_priv->bat_algo_ops->bat_orig_print) {
+ seq_puts(seq,
+ "No printing function for this routing protocol\n");
+ return 0;
}
- if (batman_count == 0)
- seq_puts(seq, "No batman nodes in range ...\n");
-
-out:
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
- return 0;
-}
-
-static int batadv_orig_node_add_if(struct batadv_orig_node *orig_node,
- int max_if_num)
-{
- void *data_ptr;
- size_t data_size, old_size;
-
- data_size = max_if_num * sizeof(unsigned long) * BATADV_NUM_WORDS;
- old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS;
- data_ptr = kmalloc(data_size, GFP_ATOMIC);
- if (!data_ptr)
- return -ENOMEM;
-
- memcpy(data_ptr, orig_node->bcast_own, old_size);
- kfree(orig_node->bcast_own);
- orig_node->bcast_own = data_ptr;
-
- data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
- if (!data_ptr)
- return -ENOMEM;
-
- memcpy(data_ptr, orig_node->bcast_own_sum,
- (max_if_num - 1) * sizeof(uint8_t));
- kfree(orig_node->bcast_own_sum);
- orig_node->bcast_own_sum = data_ptr;
+ bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq);
return 0;
}
@@ -532,6 +543,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
int max_if_num)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+ struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
struct batadv_hashtable *hash = bat_priv->orig_hash;
struct hlist_head *head;
struct batadv_orig_node *orig_node;
@@ -546,10 +558,10 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- spin_lock_bh(&orig_node->ogm_cnt_lock);
- ret = batadv_orig_node_add_if(orig_node, max_if_num);
- spin_unlock_bh(&orig_node->ogm_cnt_lock);
-
+ ret = 0;
+ if (bao->bat_orig_add_if)
+ ret = bao->bat_orig_add_if(orig_node,
+ max_if_num);
if (ret == -ENOMEM)
goto err;
}
@@ -563,54 +575,6 @@ err:
return -ENOMEM;
}
-static int batadv_orig_node_del_if(struct batadv_orig_node *orig_node,
- int max_if_num, int del_if_num)
-{
- void *data_ptr = NULL;
- int chunk_size;
-
- /* last interface was removed */
- if (max_if_num == 0)
- goto free_bcast_own;
-
- chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
- data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC);
- if (!data_ptr)
- return -ENOMEM;
-
- /* copy first part */
- memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size);
-
- /* copy second part */
- memcpy((char *)data_ptr + del_if_num * chunk_size,
- orig_node->bcast_own + ((del_if_num + 1) * chunk_size),
- (max_if_num - del_if_num) * chunk_size);
-
-free_bcast_own:
- kfree(orig_node->bcast_own);
- orig_node->bcast_own = data_ptr;
-
- if (max_if_num == 0)
- goto free_own_sum;
-
- data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
- if (!data_ptr)
- return -ENOMEM;
-
- memcpy(data_ptr, orig_node->bcast_own_sum,
- del_if_num * sizeof(uint8_t));
-
- memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t),
- orig_node->bcast_own_sum + ((del_if_num + 1) * sizeof(uint8_t)),
- (max_if_num - del_if_num) * sizeof(uint8_t));
-
-free_own_sum:
- kfree(orig_node->bcast_own_sum);
- orig_node->bcast_own_sum = data_ptr;
-
- return 0;
-}
-
int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
int max_if_num)
{
@@ -619,6 +583,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
struct hlist_head *head;
struct batadv_hard_iface *hard_iface_tmp;
struct batadv_orig_node *orig_node;
+ struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
uint32_t i;
int ret;
@@ -630,11 +595,11 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- spin_lock_bh(&orig_node->ogm_cnt_lock);
- ret = batadv_orig_node_del_if(orig_node, max_if_num,
- hard_iface->if_num);
- spin_unlock_bh(&orig_node->ogm_cnt_lock);
-
+ ret = 0;
+ if (bao->bat_orig_del_if)
+ ret = bao->bat_orig_del_if(orig_node,
+ max_if_num,
+ hard_iface->if_num);
if (ret == -ENOMEM)
goto err;
}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 7887b84a9af4..6f77d808a916 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -22,16 +22,18 @@
#include "hash.h"
+int batadv_compare_orig(const struct hlist_node *node, const void *data2);
int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node);
void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
-struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
+struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const uint8_t *addr);
struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
- const uint8_t *neigh_addr);
+ const uint8_t *neigh_addr,
+ struct batadv_orig_node *orig_node);
void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node);
struct batadv_neigh_node *
batadv_orig_node_get_router(struct batadv_orig_node *orig_node);
@@ -40,6 +42,13 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
int max_if_num);
int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
int max_if_num);
+struct batadv_orig_node_vlan *
+batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
+ unsigned short vid);
+struct batadv_orig_node_vlan *
+batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
+ unsigned short vid);
+void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan);
/* hashfunction to choose an entry in a hash table of given size
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 65e723ed030b..207459b62966 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -110,18 +110,27 @@ enum batadv_tt_data_flags {
/* BATADV_TT_CLIENT flags.
* Flags from BIT(0) to BIT(7) are sent on the wire, while flags from BIT(8) to
- * BIT(15) are used for local computation only
+ * BIT(15) are used for local computation only.
+ * Flags from BIT(4) to BIT(7) are kept in sync with the rest of the network.
*/
enum batadv_tt_client_flags {
BATADV_TT_CLIENT_DEL = BIT(0),
BATADV_TT_CLIENT_ROAM = BIT(1),
- BATADV_TT_CLIENT_WIFI = BIT(2),
+ BATADV_TT_CLIENT_WIFI = BIT(4),
BATADV_TT_CLIENT_NOPURGE = BIT(8),
BATADV_TT_CLIENT_NEW = BIT(9),
BATADV_TT_CLIENT_PENDING = BIT(10),
BATADV_TT_CLIENT_TEMP = BIT(11),
};
+/**
+ * batadv_vlan_flags - flags for the four MSB of any vlan ID field
+ * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
+ */
+enum batadv_vlan_flags {
+ BATADV_VLAN_HAS_TAG = BIT(15),
+};
+
/* claim frame types for the bridge loop avoidance */
enum batadv_bla_claimframe {
BATADV_CLAIM_TYPE_CLAIM = 0x00,
@@ -230,6 +239,8 @@ struct batadv_icmp_packet_rr {
uint8_t rr[BATADV_RR_LEN][ETH_ALEN];
};
+#define BATADV_ICMP_MAX_PACKET_SIZE sizeof(struct batadv_icmp_packet_rr)
+
/* All packet headers in front of an ethernet header have to be completely
* divisible by 2 but not by 4 to make the payload after the ethernet
* header again 4 bytes boundary aligned.
@@ -383,14 +394,26 @@ struct batadv_tvlv_gateway_data {
* struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container
* @flags: translation table flags (see batadv_tt_data_flags)
* @ttvn: translation table version number
- * @reserved: field reserved for future use
- * @crc: crc32 checksum of the local translation table
+ * @vlan_num: number of announced VLANs. In the TVLV this struct is followed by
+ * one batadv_tvlv_tt_vlan_data object per announced vlan
*/
struct batadv_tvlv_tt_data {
uint8_t flags;
uint8_t ttvn;
+ __be16 num_vlan;
+};
+
+/**
+ * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through
+ * the tt tvlv container
+ * @crc: crc32 checksum of the entries belonging to this vlan
+ * @vid: vlan identifier
+ * @reserved: unused, useful for alignment purposes
+ */
+struct batadv_tvlv_tt_vlan_data {
+ __be32 crc;
+ __be16 vid;
uint16_t reserved;
- __be32 crc;
};
/**
@@ -399,21 +422,23 @@ struct batadv_tvlv_tt_data {
* batadv_tt_client_flags)
* @reserved: reserved field
* @addr: mac address of non-mesh client that triggered this tt change
+ * @vid: VLAN identifier
*/
struct batadv_tvlv_tt_change {
uint8_t flags;
uint8_t reserved;
uint8_t addr[ETH_ALEN];
+ __be16 vid;
};
/**
* struct batadv_tvlv_roam_adv - roaming advertisement
* @client: mac address of roaming client
- * @reserved: field reserved for future use
+ * @vid: VLAN identifier
*/
struct batadv_tvlv_roam_adv {
uint8_t client[ETH_ALEN];
- uint16_t reserved;
+ __be16 vid;
};
#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 3281a504c20a..d4114d775ad6 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -30,6 +30,8 @@
#include "network-coding.h"
#include "fragmentation.h"
+#include <linux/if_vlan.h>
+
static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
@@ -45,7 +47,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
if ((curr_router) && (!neigh_node)) {
batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
"Deleting route towards: %pM\n", orig_node->orig);
- batadv_tt_global_del_orig(bat_priv, orig_node,
+ batadv_tt_global_del_orig(bat_priv, orig_node, -1,
"Deleted route towards originator");
/* route added */
@@ -113,9 +115,19 @@ out:
return;
}
-void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
+/**
+ * batadv_bonding_candidate_add - consider a new link for bonding mode towards
+ * the given originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: the target node
+ * @neigh_node: the neighbor representing the new link to consider for bonding
+ * mode
+ */
+void batadv_bonding_candidate_add(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
struct batadv_neigh_node *neigh_node)
{
+ struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
struct batadv_neigh_node *tmp_neigh_node, *router = NULL;
uint8_t interference_candidate = 0;
@@ -130,8 +142,9 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
if (!router)
goto candidate_del;
+
/* ... and is good enough to be considered */
- if (neigh_node->tq_avg < router->tq_avg - BATADV_BONDING_TQ_THRESHOLD)
+ if (bao->bat_neigh_is_equiv_or_better(neigh_node, router))
goto candidate_del;
/* check if we have another candidate with the same mac address or
@@ -247,47 +260,65 @@ bool batadv_check_management_packet(struct sk_buff *skb,
return true;
}
+/**
+ * batadv_recv_my_icmp_packet - receive an icmp packet locally
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: icmp packet to process
+ *
+ * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
+ * otherwise.
+ */
static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
- struct sk_buff *skb, size_t icmp_len)
+ struct sk_buff *skb)
{
struct batadv_hard_iface *primary_if = NULL;
struct batadv_orig_node *orig_node = NULL;
- struct batadv_icmp_packet_rr *icmp_packet;
- int ret = NET_RX_DROP;
+ struct batadv_icmp_header *icmph;
+ int res, ret = NET_RX_DROP;
- icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
+ icmph = (struct batadv_icmp_header *)skb->data;
- /* add data to device queue */
- if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
- batadv_socket_receive_packet(icmp_packet, icmp_len);
- goto out;
- }
+ switch (icmph->msg_type) {
+ case BATADV_ECHO_REPLY:
+ case BATADV_DESTINATION_UNREACHABLE:
+ case BATADV_TTL_EXCEEDED:
+ /* receive the packet */
+ if (skb_linearize(skb) < 0)
+ break;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
+ batadv_socket_receive_packet(icmph, skb->len);
+ break;
+ case BATADV_ECHO_REQUEST:
+ /* answer echo request (ping) */
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out;
- /* answer echo request (ping) */
- /* get routing information */
- orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig);
- if (!orig_node)
- goto out;
+ /* get routing information */
+ orig_node = batadv_orig_hash_find(bat_priv, icmph->orig);
+ if (!orig_node)
+ goto out;
- /* create a copy of the skb, if needed, to modify it. */
- if (skb_cow(skb, ETH_HLEN) < 0)
- goto out;
+ /* create a copy of the skb, if needed, to modify it. */
+ if (skb_cow(skb, ETH_HLEN) < 0)
+ goto out;
- icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
+ icmph = (struct batadv_icmp_header *)skb->data;
- memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN);
- memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr,
- ETH_ALEN);
- icmp_packet->icmph.msg_type = BATADV_ECHO_REPLY;
- icmp_packet->icmph.header.ttl = BATADV_TTL;
+ memcpy(icmph->dst, icmph->orig, ETH_ALEN);
+ memcpy(icmph->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
+ icmph->msg_type = BATADV_ECHO_REPLY;
+ icmph->header.ttl = BATADV_TTL;
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
- ret = NET_RX_SUCCESS;
+ res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (res != NET_XMIT_DROP)
+ ret = NET_RX_SUCCESS;
+ break;
+ default:
+ /* drop unknown type */
+ goto out;
+ }
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
@@ -350,16 +381,13 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_icmp_packet_rr *icmp_packet;
+ struct batadv_icmp_header *icmph;
+ struct batadv_icmp_packet_rr *icmp_packet_rr;
struct ethhdr *ethhdr;
struct batadv_orig_node *orig_node = NULL;
- int hdr_size = sizeof(struct batadv_icmp_packet);
+ int hdr_size = sizeof(struct batadv_icmp_header);
int ret = NET_RX_DROP;
- /* we truncate all incoming icmp packets if they don't match our size */
- if (skb->len >= sizeof(struct batadv_icmp_packet_rr))
- hdr_size = sizeof(struct batadv_icmp_packet_rr);
-
/* drop packet if it has not necessary minimum size */
if (unlikely(!pskb_may_pull(skb, hdr_size)))
goto out;
@@ -378,28 +406,39 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
goto out;
- icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
+ icmph = (struct batadv_icmp_header *)skb->data;
/* add record route information if not full */
- if ((icmp_packet->icmph.msg_type == BATADV_ECHO_REPLY ||
- icmp_packet->icmph.msg_type == BATADV_ECHO_REQUEST) &&
- (hdr_size == sizeof(struct batadv_icmp_packet_rr)) &&
- (icmp_packet->rr_cur < BATADV_RR_LEN)) {
- memcpy(&(icmp_packet->rr[icmp_packet->rr_cur]),
+ if ((icmph->msg_type == BATADV_ECHO_REPLY ||
+ icmph->msg_type == BATADV_ECHO_REQUEST) &&
+ (skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
+ if (skb_linearize(skb) < 0)
+ goto out;
+
+ /* create a copy of the skb, if needed, to modify it. */
+ if (skb_cow(skb, ETH_HLEN) < 0)
+ goto out;
+
+ icmph = (struct batadv_icmp_header *)skb->data;
+ icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph;
+ if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
+ goto out;
+
+ memcpy(&(icmp_packet_rr->rr[icmp_packet_rr->rr_cur]),
ethhdr->h_dest, ETH_ALEN);
- icmp_packet->rr_cur++;
+ icmp_packet_rr->rr_cur++;
}
/* packet for me */
- if (batadv_is_my_mac(bat_priv, icmp_packet->icmph.dst))
- return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size);
+ if (batadv_is_my_mac(bat_priv, icmph->dst))
+ return batadv_recv_my_icmp_packet(bat_priv, skb);
/* TTL exceeded */
- if (icmp_packet->icmph.header.ttl < 2)
+ if (icmph->header.ttl < 2)
return batadv_recv_icmp_ttl_exceeded(bat_priv, skb);
/* get routing information */
- orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst);
+ orig_node = batadv_orig_hash_find(bat_priv, icmph->dst);
if (!orig_node)
goto out;
@@ -407,10 +446,10 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
if (skb_cow(skb, ETH_HLEN) < 0)
goto out;
- icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
+ icmph = (struct batadv_icmp_header *)skb->data;
/* decrement ttl */
- icmp_packet->icmph.header.ttl--;
+ icmph->header.ttl--;
/* route it */
if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
@@ -477,18 +516,25 @@ out:
return router;
}
-/* Interface Alternating: Use the best of the
- * remaining candidates which are not using
- * this interface.
+/**
+ * batadv_find_ifalter_router - find the best of the remaining candidates which
+ * are not using this interface
+ * @bat_priv: the bat priv with all the soft interface information
+ * @primary_orig: the destination
+ * @recv_if: the interface that the router returned by this function has to not
+ * use
*
- * Increases the returned router's refcount
+ * Returns the best candidate towards primary_orig that is not using recv_if.
+ * Increases the returned neighbor's refcount
*/
static struct batadv_neigh_node *
-batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
+batadv_find_ifalter_router(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *primary_orig,
const struct batadv_hard_iface *recv_if)
{
- struct batadv_neigh_node *tmp_neigh_node;
struct batadv_neigh_node *router = NULL, *first_candidate = NULL;
+ struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_neigh_node *tmp_neigh_node;
rcu_read_lock();
list_for_each_entry_rcu(tmp_neigh_node, &primary_orig->bond_list,
@@ -500,7 +546,7 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
if (tmp_neigh_node->if_incoming == recv_if)
continue;
- if (router && tmp_neigh_node->tq_avg <= router->tq_avg)
+ if (router && bao->bat_neigh_cmp(tmp_neigh_node, router))
continue;
if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
@@ -634,7 +680,8 @@ batadv_find_router(struct batadv_priv *bat_priv,
if (bonding_enabled)
router = batadv_find_bond_router(primary_orig_node, recv_if);
else
- router = batadv_find_ifalter_router(primary_orig_node, recv_if);
+ router = batadv_find_ifalter_router(bat_priv, primary_orig_node,
+ recv_if);
return_router:
if (router && router->if_incoming->if_status != BATADV_IF_ACTIVE)
@@ -724,6 +771,7 @@ out:
* @bat_priv: the bat priv with all the soft interface information
* @unicast_packet: the unicast header to be updated
* @dst_addr: the payload destination
+ * @vid: VLAN identifier
*
* Search the translation table for dst_addr and update the unicast header with
* the new corresponding information (originator address where the destination
@@ -734,21 +782,22 @@ out:
static bool
batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
struct batadv_unicast_packet *unicast_packet,
- uint8_t *dst_addr)
+ uint8_t *dst_addr, unsigned short vid)
{
struct batadv_orig_node *orig_node = NULL;
struct batadv_hard_iface *primary_if = NULL;
bool ret = false;
uint8_t *orig_addr, orig_ttvn;
- if (batadv_is_my_client(bat_priv, dst_addr)) {
+ if (batadv_is_my_client(bat_priv, dst_addr, vid)) {
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
goto out;
orig_addr = primary_if->net_dev->dev_addr;
orig_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
} else {
- orig_node = batadv_transtable_search(bat_priv, NULL, dst_addr);
+ orig_node = batadv_transtable_search(bat_priv, NULL, dst_addr,
+ vid);
if (!orig_node)
goto out;
@@ -775,11 +824,12 @@ out:
static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_len) {
- uint8_t curr_ttvn, old_ttvn;
+ struct batadv_unicast_packet *unicast_packet;
+ struct batadv_hard_iface *primary_if;
struct batadv_orig_node *orig_node;
+ uint8_t curr_ttvn, old_ttvn;
struct ethhdr *ethhdr;
- struct batadv_hard_iface *primary_if;
- struct batadv_unicast_packet *unicast_packet;
+ unsigned short vid;
int is_old_ttvn;
/* check if there is enough data before accessing it */
@@ -791,6 +841,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
return 0;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
+ vid = batadv_get_vid(skb, hdr_len);
ethhdr = (struct ethhdr *)(skb->data + hdr_len);
/* check if the destination client was served by this node and it is now
@@ -798,9 +849,9 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* message and that it knows the new destination in the mesh to re-route
* the packet to
*/
- if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest)) {
+ if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) {
if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
- ethhdr->h_dest))
+ ethhdr->h_dest, vid))
net_ratelimited_function(batadv_dbg, BATADV_DBG_TT,
bat_priv,
"Rerouting unicast packet to %pM (dst=%pM): Local Roaming\n",
@@ -846,7 +897,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* target host
*/
if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
- ethhdr->h_dest)) {
+ ethhdr->h_dest, vid)) {
net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, bat_priv,
"Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
unicast_packet->dest, ethhdr->h_dest,
@@ -858,7 +909,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* currently served by this node or there is no destination at all and
* it is possible to drop the packet
*/
- if (!batadv_is_my_client(bat_priv, ethhdr->h_dest))
+ if (!batadv_is_my_client(bat_priv, ethhdr->h_dest, vid))
return 0;
/* update the header in order to let the packet be delivered to this
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 55d637a90621..19544ddb81b5 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -48,7 +48,8 @@ batadv_find_router(struct batadv_priv *bat_priv,
const struct batadv_hard_iface *recv_if);
void batadv_bonding_candidate_del(struct batadv_orig_node *orig_node,
struct batadv_neigh_node *neigh_node);
-void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
+void batadv_bonding_candidate_add(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
struct batadv_neigh_node *neigh_node);
void batadv_bonding_save_primary(const struct batadv_orig_node *orig_node,
struct batadv_orig_node *orig_neigh_node,
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 82588e425641..c83be5ebaa28 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -234,44 +234,45 @@ out:
}
/**
- * batadv_send_generic_unicast_skb - send an skb as unicast
+ * batadv_send_skb_unicast - encapsulate and send an skb via unicast
* @bat_priv: the bat priv with all the soft interface information
* @skb: payload to send
* @packet_type: the batman unicast packet type to use
* @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast
* 4addr packets)
+ * @orig_node: the originator to send the packet to
+ * @vid: the vid to be used to search the translation table
*
- * Returns 1 in case of error or 0 otherwise.
+ * Wrap the given skb into a batman-adv unicast or unicast-4addr header
+ * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied
+ * as packet_type. Then send this frame to the given orig_node and release a
+ * reference to this orig_node.
+ *
+ * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
-int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv,
- struct sk_buff *skb, int packet_type,
- int packet_subtype)
+static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int packet_type,
+ int packet_subtype,
+ struct batadv_orig_node *orig_node,
+ unsigned short vid)
{
struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
struct batadv_unicast_packet *unicast_packet;
- struct batadv_orig_node *orig_node;
- int ret = NET_RX_DROP;
-
- /* get routing information */
- if (is_multicast_ether_addr(ethhdr->h_dest))
- orig_node = batadv_gw_get_selected_orig(bat_priv);
- else
- /* check for tt host - increases orig_node refcount.
- * returns NULL in case of AP isolation
- */
- orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
- ethhdr->h_dest);
+ int ret = NET_XMIT_DROP;
if (!orig_node)
goto out;
switch (packet_type) {
case BATADV_UNICAST:
- batadv_send_skb_prepare_unicast(skb, orig_node);
+ if (!batadv_send_skb_prepare_unicast(skb, orig_node))
+ goto out;
break;
case BATADV_UNICAST_4ADDR:
- batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, orig_node,
- packet_subtype);
+ if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb,
+ orig_node,
+ packet_subtype))
+ goto out;
break;
default:
/* this function supports UNICAST and UNICAST_4ADDR only. It
@@ -287,20 +288,71 @@ int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv,
* try to reroute it because the ttvn contained in the header is less
* than the current one
*/
- if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest))
+ if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid))
unicast_packet->ttvn = unicast_packet->ttvn - 1;
if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
- ret = 0;
+ ret = NET_XMIT_SUCCESS;
out:
if (orig_node)
batadv_orig_node_free_ref(orig_node);
- if (ret == NET_RX_DROP)
+ if (ret == NET_XMIT_DROP)
kfree_skb(skb);
return ret;
}
+/**
+ * batadv_send_skb_via_tt_generic - send an skb via TT lookup
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: payload to send
+ * @packet_type: the batman unicast packet type to use
+ * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast
+ * 4addr packets)
+ * @vid: the vid to be used to search the translation table
+ *
+ * Look up the recipient node for the destination address in the ethernet
+ * header via the translation table. Wrap the given skb into a batman-adv
+ * unicast or unicast-4addr header depending on whether BATADV_UNICAST or
+ * BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame
+ * to the according destination node.
+ *
+ * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ */
+int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int packet_type,
+ int packet_subtype, unsigned short vid)
+{
+ struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
+ struct batadv_orig_node *orig_node;
+
+ orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
+ ethhdr->h_dest, vid);
+ return batadv_send_skb_unicast(bat_priv, skb, packet_type,
+ packet_subtype, orig_node, vid);
+}
+
+/**
+ * batadv_send_skb_via_gw - send an skb via gateway lookup
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: payload to send
+ * @vid: the vid to be used to search the translation table
+ *
+ * Look up the currently selected gateway. Wrap the given skb into a batman-adv
+ * unicast header and send this frame to this gateway node.
+ *
+ * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ */
+int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid)
+{
+ struct batadv_orig_node *orig_node;
+
+ orig_node = batadv_gw_get_selected_orig(bat_priv);
+ return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0,
+ orig_node, vid);
+}
+
void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index ad63184a4dd9..aa2e2537a739 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -38,41 +38,54 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
struct sk_buff *skb,
struct batadv_orig_node *orig_node,
int packet_subtype);
-int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv,
- struct sk_buff *skb, int packet_type,
- int packet_subtype);
-
+int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int packet_type,
+ int packet_subtype, unsigned short vid);
+int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid);
/**
- * batadv_send_unicast_skb - send the skb encapsulated in a unicast packet
+ * batadv_send_skb_via_tt - send an skb via TT lookup
* @bat_priv: the bat priv with all the soft interface information
* @skb: the payload to send
+ * @vid: the vid to be used to search the translation table
+ *
+ * Look up the recipient node for the destination address in the ethernet
+ * header via the translation table. Wrap the given skb into a batman-adv
+ * unicast header. Then send this frame to the according destination node.
*
- * Returns 1 in case of error or 0 otherwise.
+ * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
-static inline int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
- struct sk_buff *skb)
+static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid)
{
- return batadv_send_skb_generic_unicast(bat_priv, skb, BATADV_UNICAST,
- 0);
+ return batadv_send_skb_via_tt_generic(bat_priv, skb, BATADV_UNICAST, 0,
+ vid);
}
/**
- * batadv_send_4addr_unicast_skb - send the skb encapsulated in a unicast 4addr
- * packet
+ * batadv_send_skb_via_tt_4addr - send an skb via TT lookup
* @bat_priv: the bat priv with all the soft interface information
* @skb: the payload to send
* @packet_subtype: the unicast 4addr packet subtype to use
+ * @vid: the vid to be used to search the translation table
+ *
+ * Look up the recipient node for the destination address in the ethernet
+ * header via the translation table. Wrap the given skb into a batman-adv
+ * unicast-4addr header. Then send this frame to the according destination
+ * node.
*
- * Returns 1 in case of error or 0 otherwise.
+ * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
-static inline int batadv_send_skb_unicast_4addr(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- int packet_subtype)
+static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ int packet_subtype,
+ unsigned short vid)
{
- return batadv_send_skb_generic_unicast(bat_priv, skb,
- BATADV_UNICAST_4ADDR,
- packet_subtype);
+ return batadv_send_skb_via_tt_generic(bat_priv, skb,
+ BATADV_UNICAST_4ADDR,
+ packet_subtype, vid);
}
#endif /* _NET_BATMAN_ADV_SEND_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e8a2bd699d40..36f050876f82 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -118,9 +118,10 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
/* only modify transtable if it has been initialized before */
if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) {
- batadv_tt_local_remove(bat_priv, old_addr,
+ batadv_tt_local_remove(bat_priv, old_addr, BATADV_NO_FLAGS,
"mac address changed", false);
- batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX);
+ batadv_tt_local_add(dev, addr->sa_data, BATADV_NO_FLAGS,
+ BATADV_NULL_IFINDEX);
}
return 0;
@@ -152,33 +153,33 @@ static void batadv_interface_set_rx_mode(struct net_device *dev)
static int batadv_interface_tx(struct sk_buff *skb,
struct net_device *soft_iface)
{
- struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
+ struct ethhdr *ethhdr;
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_hard_iface *primary_if = NULL;
struct batadv_bcast_packet *bcast_packet;
- struct vlan_ethhdr *vhdr;
__be16 ethertype = htons(ETH_P_BATMAN);
static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00,
0x00, 0x00};
static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00,
0x00, 0x00};
+ struct vlan_ethhdr *vhdr;
unsigned int header_len = 0;
int data_len = skb->len, ret;
- unsigned short vid __maybe_unused = BATADV_NO_FLAGS;
- bool do_bcast = false;
- uint32_t seqno;
unsigned long brd_delay = 1;
+ bool do_bcast = false, client_added;
+ unsigned short vid;
+ uint32_t seqno;
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
soft_iface->trans_start = jiffies;
+ vid = batadv_get_vid(skb, 0);
+ ethhdr = (struct ethhdr *)skb->data;
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
vhdr = (struct vlan_ethhdr *)skb->data;
- vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
- vid |= BATADV_VLAN_HAS_TAG;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
break;
@@ -195,8 +196,12 @@ static int batadv_interface_tx(struct sk_buff *skb,
ethhdr = (struct ethhdr *)skb->data;
/* Register the client MAC in the transtable */
- if (!is_multicast_ether_addr(ethhdr->h_source))
- batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
+ if (!is_multicast_ether_addr(ethhdr->h_source)) {
+ client_added = batadv_tt_local_add(soft_iface, ethhdr->h_source,
+ vid, skb->skb_iif);
+ if (!client_added)
+ goto dropped;
+ }
/* don't accept stp packets. STP does not help in meshes.
* better use the bridge loop avoidance ...
@@ -296,8 +301,12 @@ static int batadv_interface_tx(struct sk_buff *skb,
batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb);
- ret = batadv_send_skb_unicast(bat_priv, skb);
- if (ret != 0)
+ if (is_multicast_ether_addr(ethhdr->h_dest))
+ ret = batadv_send_skb_via_gw(bat_priv, skb, vid);
+ else
+ ret = batadv_send_skb_via_tt(bat_priv, skb, vid);
+
+ if (ret == NET_XMIT_DROP)
goto dropped_freed;
}
@@ -319,12 +328,12 @@ void batadv_interface_rx(struct net_device *soft_iface,
struct sk_buff *skb, struct batadv_hard_iface *recv_if,
int hdr_size, struct batadv_orig_node *orig_node)
{
- struct batadv_priv *bat_priv = netdev_priv(soft_iface);
- struct ethhdr *ethhdr;
- struct vlan_ethhdr *vhdr;
struct batadv_header *batadv_header = (struct batadv_header *)skb->data;
- unsigned short vid __maybe_unused = BATADV_NO_FLAGS;
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
__be16 ethertype = htons(ETH_P_BATMAN);
+ struct vlan_ethhdr *vhdr;
+ struct ethhdr *ethhdr;
+ unsigned short vid;
bool is_bcast;
is_bcast = (batadv_header->packet_type == BATADV_BCAST);
@@ -336,13 +345,12 @@ void batadv_interface_rx(struct net_device *soft_iface,
skb_pull_rcsum(skb, hdr_size);
skb_reset_mac_header(skb);
+ vid = batadv_get_vid(skb, hdr_size);
ethhdr = eth_hdr(skb);
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
vhdr = (struct vlan_ethhdr *)skb->data;
- vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
- vid |= BATADV_VLAN_HAS_TAG;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
break;
@@ -378,9 +386,10 @@ void batadv_interface_rx(struct net_device *soft_iface,
if (orig_node)
batadv_tt_add_temporary_global_entry(bat_priv, orig_node,
- ethhdr->h_source);
+ ethhdr->h_source, vid);
- if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest))
+ if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest,
+ vid))
goto dropped;
netif_rx(skb);
@@ -392,6 +401,177 @@ out:
return;
}
+/**
+ * batadv_softif_vlan_free_ref - decrease the vlan object refcounter and
+ * possibly free it
+ * @softif_vlan: the vlan object to release
+ */
+void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan)
+{
+ if (atomic_dec_and_test(&softif_vlan->refcount))
+ kfree_rcu(softif_vlan, rcu);
+}
+
+/**
+ * batadv_softif_vlan_get - get the vlan object for a specific vid
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vid: the identifier of the vlan object to retrieve
+ *
+ * Returns the private data of the vlan matching the vid passed as argument or
+ * NULL otherwise. The refcounter of the returned object is incremented by 1.
+ */
+struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
+ unsigned short vid)
+{
+ struct batadv_softif_vlan *vlan_tmp, *vlan = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->softif_vlan_list, list) {
+ if (vlan_tmp->vid != vid)
+ continue;
+
+ if (!atomic_inc_not_zero(&vlan_tmp->refcount))
+ continue;
+
+ vlan = vlan_tmp;
+ break;
+ }
+ rcu_read_unlock();
+
+ return vlan;
+}
+
+/**
+ * batadv_create_vlan - allocate the needed resources for a new vlan
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vid: the VLAN identifier
+ *
+ * Returns 0 on success, a negative error otherwise.
+ */
+int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
+{
+ struct batadv_softif_vlan *vlan;
+ int err;
+
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (vlan) {
+ batadv_softif_vlan_free_ref(vlan);
+ return -EEXIST;
+ }
+
+ vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC);
+ if (!vlan)
+ return -ENOMEM;
+
+ vlan->vid = vid;
+ atomic_set(&vlan->refcount, 1);
+
+ atomic_set(&vlan->ap_isolation, 0);
+
+ err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
+ if (err) {
+ kfree(vlan);
+ return err;
+ }
+
+ /* add a new TT local entry. This one will be marked with the NOPURGE
+ * flag
+ */
+ batadv_tt_local_add(bat_priv->soft_iface,
+ bat_priv->soft_iface->dev_addr, vid,
+ BATADV_NULL_IFINDEX);
+
+ spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+ hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
+
+ return 0;
+}
+
+/**
+ * batadv_softif_destroy_vlan - remove and destroy a softif_vlan object
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: the object to remove
+ */
+static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
+ struct batadv_softif_vlan *vlan)
+{
+ spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+ hlist_del_rcu(&vlan->list);
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
+
+ batadv_sysfs_del_vlan(bat_priv, vlan);
+
+ /* explicitly remove the associated TT local entry because it is marked
+ * with the NOPURGE flag
+ */
+ batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr,
+ vlan->vid, "vlan interface destroyed", false);
+
+ batadv_softif_vlan_free_ref(vlan);
+}
+
+/**
+ * batadv_interface_add_vid - ndo_add_vid API implementation
+ * @dev: the netdev of the mesh interface
+ * @vid: identifier of the new vlan
+ *
+ * Set up all the internal structures for handling the new vlan on top of the
+ * mesh interface
+ *
+ * Returns 0 on success or a negative error code in case of failure.
+ */
+static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
+ unsigned short vid)
+{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+
+ /* only 802.1Q vlans are supported.
+ * batman-adv does not know how to handle other types
+ */
+ if (proto != htons(ETH_P_8021Q))
+ return -EINVAL;
+
+ vid |= BATADV_VLAN_HAS_TAG;
+
+ return batadv_softif_create_vlan(bat_priv, vid);
+}
+
+/**
+ * batadv_interface_kill_vid - ndo_kill_vid API implementation
+ * @dev: the netdev of the mesh interface
+ * @vid: identifier of the deleted vlan
+ *
+ * Destroy all the internal structures used to handle the vlan identified by vid
+ * on top of the mesh interface
+ *
+ * Returns 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q
+ * or -ENOENT if the specified vlan id wasn't registered.
+ */
+static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
+ unsigned short vid)
+{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+ struct batadv_softif_vlan *vlan;
+
+ /* only 802.1Q vlans are supported. batman-adv does not know how to
+ * handle other types
+ */
+ if (proto != htons(ETH_P_8021Q))
+ return -EINVAL;
+
+ vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG);
+ if (!vlan)
+ return -ENOENT;
+
+ batadv_softif_destroy_vlan(bat_priv, vlan);
+
+ /* finally free the vlan object */
+ batadv_softif_vlan_free_ref(vlan);
+
+ return 0;
+}
+
/* batman-adv network devices have devices nesting below it and are a special
* "super class" of normal network devices; split their locks off into a
* separate class since they always nest.
@@ -431,6 +611,7 @@ static void batadv_set_lockdep_class(struct net_device *dev)
*/
static void batadv_softif_destroy_finish(struct work_struct *work)
{
+ struct batadv_softif_vlan *vlan;
struct batadv_priv *bat_priv;
struct net_device *soft_iface;
@@ -438,6 +619,13 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
cleanup_work);
soft_iface = bat_priv->soft_iface;
+ /* destroy the "untagged" VLAN */
+ vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+ if (vlan) {
+ batadv_softif_destroy_vlan(bat_priv, vlan);
+ batadv_softif_vlan_free_ref(vlan);
+ }
+
batadv_sysfs_del_meshif(soft_iface);
rtnl_lock();
@@ -479,7 +667,6 @@ static int batadv_softif_init_late(struct net_device *dev)
#ifdef CONFIG_BATMAN_ADV_DAT
atomic_set(&bat_priv->distributed_arp_table, 1);
#endif
- atomic_set(&bat_priv->ap_isolation, 0);
atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF);
atomic_set(&bat_priv->gw_sel_class, 20);
atomic_set(&bat_priv->gw.bandwidth_down, 100);
@@ -490,6 +677,7 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->log_level, 0);
#endif
atomic_set(&bat_priv->fragmentation, 1);
+ atomic_set(&bat_priv->packet_size_max, ETH_DATA_LEN);
atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN);
atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN);
@@ -593,6 +781,8 @@ static const struct net_device_ops batadv_netdev_ops = {
.ndo_open = batadv_interface_open,
.ndo_stop = batadv_interface_release,
.ndo_get_stats = batadv_interface_stats,
+ .ndo_vlan_rx_add_vid = batadv_interface_add_vid,
+ .ndo_vlan_rx_kill_vid = batadv_interface_kill_vid,
.ndo_set_mac_address = batadv_interface_set_mac_addr,
.ndo_change_mtu = batadv_interface_change_mtu,
.ndo_set_rx_mode = batadv_interface_set_rx_mode,
@@ -632,6 +822,7 @@ static void batadv_softif_init_early(struct net_device *dev)
dev->netdev_ops = &batadv_netdev_ops;
dev->destructor = batadv_softif_free;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
dev->tx_queue_len = 0;
/* can't call min_mtu, because the needed variables
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 2f2472c2ea0d..06fc91ff5a02 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -28,5 +28,9 @@ struct net_device *batadv_softif_create(const char *name);
void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
int batadv_softif_is_valid(const struct net_device *net_dev);
extern struct rtnl_link_ops batadv_link_ops;
+int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
+void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan);
+struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
+ unsigned short vid);
#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 869eb46329cb..6335433310af 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -24,6 +24,7 @@
#include "network-coding.h"
#include "originator.h"
#include "hard-interface.h"
+#include "soft-interface.h"
#include "gateway_common.h"
#include "gateway_client.h"
@@ -39,6 +40,53 @@ static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj)
return netdev_priv(net_dev);
}
+/**
+ * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv
+ * @obj: kobject to covert
+ *
+ * Returns the associated batadv_priv struct.
+ */
+static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj)
+{
+ /* VLAN specific attributes are located in the root sysfs folder if they
+ * refer to the untagged VLAN..
+ */
+ if (!strcmp(BATADV_SYSFS_IF_MESH_SUBDIR, obj->name))
+ return batadv_kobj_to_batpriv(obj);
+
+ /* ..while the attributes for the tagged vlans are located in
+ * the in the corresponding "vlan%VID" subfolder
+ */
+ return batadv_kobj_to_batpriv(obj->parent);
+}
+
+/**
+ * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct
+ * @obj: kobject to covert
+ *
+ * Returns the associated softif_vlan struct if found, NULL otherwise.
+ */
+static struct batadv_softif_vlan *
+batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj)
+{
+ struct batadv_softif_vlan *vlan_tmp, *vlan = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(vlan_tmp, &bat_priv->softif_vlan_list, list) {
+ if (vlan_tmp->kobj != obj)
+ continue;
+
+ if (!atomic_inc_not_zero(&vlan_tmp->refcount))
+ continue;
+
+ vlan = vlan_tmp;
+ break;
+ }
+ rcu_read_unlock();
+
+ return vlan;
+}
+
#define BATADV_UEV_TYPE_VAR "BATTYPE="
#define BATADV_UEV_ACTION_VAR "BATACTION="
#define BATADV_UEV_DATA_VAR "BATDATA="
@@ -53,6 +101,15 @@ static char *batadv_uev_type_str[] = {
"gw"
};
+/* Use this, if you have customized show and store functions for vlan attrs */
+#define BATADV_ATTR_VLAN(_name, _mode, _show, _store) \
+struct batadv_attribute batadv_attr_vlan_##_name = { \
+ .attr = {.name = __stringify(_name), \
+ .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+};
+
/* Use this, if you have customized show and store functions */
#define BATADV_ATTR(_name, _mode, _show, _store) \
struct batadv_attribute batadv_attr_##_name = { \
@@ -122,6 +179,41 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \
static BATADV_ATTR(_name, _mode, batadv_show_##_name, \
batadv_store_##_name)
+#define BATADV_ATTR_VLAN_STORE_BOOL(_name, _post_func) \
+ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \
+ struct attribute *attr, char *buff, \
+ size_t count) \
+{ \
+ struct batadv_priv *bat_priv = batadv_vlan_kobj_to_batpriv(kobj);\
+ struct batadv_softif_vlan *vlan = batadv_kobj_to_vlan(bat_priv, \
+ kobj); \
+ size_t res = __batadv_store_bool_attr(buff, count, _post_func, \
+ attr, &vlan->_name, \
+ bat_priv->soft_iface); \
+ batadv_softif_vlan_free_ref(vlan); \
+ return res; \
+}
+
+#define BATADV_ATTR_VLAN_SHOW_BOOL(_name) \
+ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \
+ struct attribute *attr, char *buff) \
+{ \
+ struct batadv_priv *bat_priv = batadv_vlan_kobj_to_batpriv(kobj);\
+ struct batadv_softif_vlan *vlan = batadv_kobj_to_vlan(bat_priv, \
+ kobj); \
+ size_t res = sprintf(buff, "%s\n", \
+ atomic_read(&vlan->_name) == 0 ? \
+ "disabled" : "enabled"); \
+ batadv_softif_vlan_free_ref(vlan); \
+ return res; \
+}
+
+/* Use this, if you are going to turn a [name] in the vlan struct on or off */
+#define BATADV_ATTR_VLAN_BOOL(_name, _mode, _post_func) \
+ static BATADV_ATTR_VLAN_STORE_BOOL(_name, _post_func) \
+ static BATADV_ATTR_VLAN_SHOW_BOOL(_name) \
+ static BATADV_ATTR_VLAN(_name, _mode, batadv_show_vlan_##_name, \
+ batadv_store_vlan_##_name)
static int batadv_store_bool_attr(char *buff, size_t count,
struct net_device *net_dev,
@@ -361,7 +453,6 @@ BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR,
batadv_dat_status_update);
#endif
BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu);
-BATADV_ATTR_SIF_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL);
static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode,
batadv_store_gw_mode);
@@ -391,7 +482,6 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
&batadv_attr_distributed_arp_table,
#endif
&batadv_attr_fragmentation,
- &batadv_attr_ap_isolation,
&batadv_attr_routing_algo,
&batadv_attr_gw_mode,
&batadv_attr_orig_interval,
@@ -407,6 +497,16 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
NULL,
};
+BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
+
+/**
+ * batadv_vlan_attrs - array of vlan specific sysfs attributes
+ */
+static struct batadv_attribute *batadv_vlan_attrs[] = {
+ &batadv_attr_vlan_ap_isolation,
+ NULL,
+};
+
int batadv_sysfs_add_meshif(struct net_device *dev)
{
struct kobject *batif_kobject = &dev->dev.kobj;
@@ -457,6 +557,80 @@ void batadv_sysfs_del_meshif(struct net_device *dev)
bat_priv->mesh_obj = NULL;
}
+/**
+ * batadv_sysfs_add_vlan - add all the needed sysfs objects for the new vlan
+ * @dev: netdev of the mesh interface
+ * @vlan: private data of the newly added VLAN interface
+ *
+ * Returns 0 on success and -ENOMEM if any of the structure allocations fails.
+ */
+int batadv_sysfs_add_vlan(struct net_device *dev,
+ struct batadv_softif_vlan *vlan)
+{
+ char vlan_subdir[sizeof(BATADV_SYSFS_VLAN_SUBDIR_PREFIX) + 5];
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+ struct batadv_attribute **bat_attr;
+ int err;
+
+ if (vlan->vid & BATADV_VLAN_HAS_TAG) {
+ sprintf(vlan_subdir, BATADV_SYSFS_VLAN_SUBDIR_PREFIX "%hu",
+ vlan->vid & VLAN_VID_MASK);
+
+ vlan->kobj = kobject_create_and_add(vlan_subdir,
+ bat_priv->mesh_obj);
+ if (!vlan->kobj) {
+ batadv_err(dev, "Can't add sysfs directory: %s/%s\n",
+ dev->name, vlan_subdir);
+ goto out;
+ }
+ } else {
+ /* the untagged LAN uses the root folder to store its "VLAN
+ * specific attributes"
+ */
+ vlan->kobj = bat_priv->mesh_obj;
+ kobject_get(bat_priv->mesh_obj);
+ }
+
+ for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) {
+ err = sysfs_create_file(vlan->kobj,
+ &((*bat_attr)->attr));
+ if (err) {
+ batadv_err(dev, "Can't add sysfs file: %s/%s/%s\n",
+ dev->name, vlan_subdir,
+ ((*bat_attr)->attr).name);
+ goto rem_attr;
+ }
+ }
+
+ return 0;
+
+rem_attr:
+ for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
+ sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
+
+ kobject_put(vlan->kobj);
+ vlan->kobj = NULL;
+out:
+ return -ENOMEM;
+}
+
+/**
+ * batadv_sysfs_del_vlan - remove all the sysfs objects for a given VLAN
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: the private data of the VLAN to destroy
+ */
+void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv,
+ struct batadv_softif_vlan *vlan)
+{
+ struct batadv_attribute **bat_attr;
+
+ for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
+ sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
+
+ kobject_put(vlan->kobj);
+ vlan->kobj = NULL;
+}
+
static ssize_t batadv_show_mesh_iface(struct kobject *kobj,
struct attribute *attr, char *buff)
{
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index 479acf4c16f4..c7d725de50ad 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -22,6 +22,12 @@
#define BATADV_SYSFS_IF_MESH_SUBDIR "mesh"
#define BATADV_SYSFS_IF_BAT_SUBDIR "batman_adv"
+/**
+ * BATADV_SYSFS_VLAN_SUBDIR_PREFIX - prefix of the subfolder that will be
+ * created in the sysfs hierarchy for each VLAN interface. The subfolder will
+ * be named "BATADV_SYSFS_VLAN_SUBDIR_PREFIX%vid".
+ */
+#define BATADV_SYSFS_VLAN_SUBDIR_PREFIX "vlan"
struct batadv_attribute {
struct attribute attr;
@@ -36,6 +42,10 @@ void batadv_sysfs_del_meshif(struct net_device *dev);
int batadv_sysfs_add_hardif(struct kobject **hardif_obj,
struct net_device *dev);
void batadv_sysfs_del_hardif(struct kobject **hardif_obj);
+int batadv_sysfs_add_vlan(struct net_device *dev,
+ struct batadv_softif_vlan *vlan);
+void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv,
+ struct batadv_softif_vlan *vlan);
int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
enum batadv_uev_action action, const char *data);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b521afb186d4..4add57d4857f 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -34,6 +34,7 @@ static struct lock_class_key batadv_tt_local_hash_lock_class_key;
static struct lock_class_key batadv_tt_global_hash_lock_class_key;
static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
+ unsigned short vid,
struct batadv_orig_node *orig_node);
static void batadv_tt_purge(struct work_struct *work);
static void
@@ -41,7 +42,8 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry);
static void batadv_tt_global_del(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
const unsigned char *addr,
- const char *message, bool roaming);
+ unsigned short vid, const char *message,
+ bool roaming);
/* returns 1 if they are the same mac addr */
static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
@@ -52,43 +54,93 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
}
+/**
+ * batadv_choose_tt - return the index of the tt entry in the hash table
+ * @data: pointer to the tt_common_entry object to map
+ * @size: the size of the hash table
+ *
+ * Returns the hash index where the object represented by 'data' should be
+ * stored at.
+ */
+static inline uint32_t batadv_choose_tt(const void *data, uint32_t size)
+{
+ struct batadv_tt_common_entry *tt;
+ uint32_t hash = 0;
+
+ tt = (struct batadv_tt_common_entry *)data;
+ hash = batadv_hash_bytes(hash, &tt->addr, ETH_ALEN);
+ hash = batadv_hash_bytes(hash, &tt->vid, sizeof(tt->vid));
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash % size;
+}
+
+/**
+ * batadv_tt_hash_find - look for a client in the given hash table
+ * @hash: the hash table to search
+ * @addr: the mac address of the client to look for
+ * @vid: VLAN identifier
+ *
+ * Returns a pointer to the tt_common struct belonging to the searched client if
+ * found, NULL otherwise.
+ */
static struct batadv_tt_common_entry *
-batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data)
+batadv_tt_hash_find(struct batadv_hashtable *hash, const uint8_t *addr,
+ unsigned short vid)
{
struct hlist_head *head;
- struct batadv_tt_common_entry *tt_common_entry;
- struct batadv_tt_common_entry *tt_common_entry_tmp = NULL;
+ struct batadv_tt_common_entry to_search, *tt, *tt_tmp = NULL;
uint32_t index;
if (!hash)
return NULL;
- index = batadv_choose_orig(data, hash->size);
+ memcpy(to_search.addr, addr, ETH_ALEN);
+ to_search.vid = vid;
+
+ index = batadv_choose_tt(&to_search, hash->size);
head = &hash->table[index];
rcu_read_lock();
- hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) {
- if (!batadv_compare_eth(tt_common_entry, data))
+ hlist_for_each_entry_rcu(tt, head, hash_entry) {
+ if (!batadv_compare_eth(tt, addr))
+ continue;
+
+ if (tt->vid != vid)
continue;
- if (!atomic_inc_not_zero(&tt_common_entry->refcount))
+ if (!atomic_inc_not_zero(&tt->refcount))
continue;
- tt_common_entry_tmp = tt_common_entry;
+ tt_tmp = tt;
break;
}
rcu_read_unlock();
- return tt_common_entry_tmp;
+ return tt_tmp;
}
+/**
+ * batadv_tt_local_hash_find - search the local table for a given client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: the mac address of the client to look for
+ * @vid: VLAN identifier
+ *
+ * Returns a pointer to the corresponding tt_local_entry struct if the client is
+ * found, NULL otherwise.
+ */
static struct batadv_tt_local_entry *
-batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const void *data)
+batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr,
+ unsigned short vid)
{
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local_entry = NULL;
- tt_common_entry = batadv_tt_hash_find(bat_priv->tt.local_hash, data);
+ tt_common_entry = batadv_tt_hash_find(bat_priv->tt.local_hash, addr,
+ vid);
if (tt_common_entry)
tt_local_entry = container_of(tt_common_entry,
struct batadv_tt_local_entry,
@@ -96,13 +148,24 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const void *data)
return tt_local_entry;
}
+/**
+ * batadv_tt_global_hash_find - search the global table for a given client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: the mac address of the client to look for
+ * @vid: VLAN identifier
+ *
+ * Returns a pointer to the corresponding tt_global_entry struct if the client
+ * is found, NULL otherwise.
+ */
static struct batadv_tt_global_entry *
-batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const void *data)
+batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr,
+ unsigned short vid)
{
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_global_entry *tt_global_entry = NULL;
- tt_common_entry = batadv_tt_hash_find(bat_priv->tt.global_hash, data);
+ tt_common_entry = batadv_tt_hash_find(bat_priv->tt.global_hash, addr,
+ vid);
if (tt_common_entry)
tt_global_entry = container_of(tt_common_entry,
struct batadv_tt_global_entry,
@@ -145,13 +208,107 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
kfree(orig_entry);
}
+/**
+ * batadv_tt_local_size_mod - change the size by v of the local table identified
+ * by vid
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vid: the VLAN identifier of the sub-table to change
+ * @v: the amount to sum to the local table size
+ */
+static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv,
+ unsigned short vid, int v)
+{
+ struct batadv_softif_vlan *vlan;
+
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (!vlan)
+ return;
+
+ atomic_add(v, &vlan->tt.num_entries);
+
+ batadv_softif_vlan_free_ref(vlan);
+}
+
+/**
+ * batadv_tt_local_size_inc - increase by one the local table size for the given
+ * vid
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vid: the VLAN identifier
+ */
+static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv,
+ unsigned short vid)
+{
+ batadv_tt_local_size_mod(bat_priv, vid, 1);
+}
+
+/**
+ * batadv_tt_local_size_dec - decrease by one the local table size for the given
+ * vid
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vid: the VLAN identifier
+ */
+static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv,
+ unsigned short vid)
+{
+ batadv_tt_local_size_mod(bat_priv, vid, -1);
+}
+
+/**
+ * batadv_tt_global_size_mod - change the size by v of the local table
+ * identified by vid
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vid: the VLAN identifier
+ * @v: the amount to sum to the global table size
+ */
+static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
+ unsigned short vid, int v)
+{
+ struct batadv_orig_node_vlan *vlan;
+
+ vlan = batadv_orig_node_vlan_new(orig_node, vid);
+ if (!vlan)
+ return;
+
+ if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
+ spin_lock_bh(&orig_node->vlan_list_lock);
+ list_del_rcu(&vlan->list);
+ spin_unlock_bh(&orig_node->vlan_list_lock);
+ batadv_orig_node_vlan_free_ref(vlan);
+ }
+
+ batadv_orig_node_vlan_free_ref(vlan);
+}
+
+/**
+ * batadv_tt_global_size_inc - increase by one the global table size for the
+ * given vid
+ * @orig_node: the originator which global table size has to be decreased
+ * @vid: the vlan identifier
+ */
+static void batadv_tt_global_size_inc(struct batadv_orig_node *orig_node,
+ unsigned short vid)
+{
+ batadv_tt_global_size_mod(orig_node, vid, 1);
+}
+
+/**
+ * batadv_tt_global_size_dec - decrease by one the global table size for the
+ * given vid
+ * @orig_node: the originator which global table size has to be decreased
+ * @vid: the vlan identifier
+ */
+static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
+ unsigned short vid)
+{
+ batadv_tt_global_size_mod(orig_node, vid, -1);
+}
+
static void
batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
{
if (!atomic_dec_and_test(&orig_entry->refcount))
return;
- /* to avoid race conditions, immediately decrease the tt counter */
- atomic_dec(&orig_entry->orig_node->tt_size);
+
call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
}
@@ -178,6 +335,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
tt_change_node->change.flags = flags;
tt_change_node->change.reserved = 0;
memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN);
+ tt_change_node->change.vid = htons(common->vid);
del_op_requested = flags & BATADV_TT_CLIENT_DEL;
@@ -200,6 +358,13 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
goto del;
if (del_op_requested && !del_op_entry)
goto del;
+
+ /* this is a second add in the same originator interval. It
+ * means that flags have been changed: update them!
+ */
+ if (!del_op_requested && !del_op_entry)
+ entry->change.flags = flags;
+
continue;
del:
list_del(&entry->list);
@@ -243,6 +408,35 @@ static uint16_t batadv_tt_entries(uint16_t tt_len)
return tt_len / batadv_tt_len(1);
}
+/**
+ * batadv_tt_local_table_transmit_size - calculates the local translation table
+ * size when transmitted over the air
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Returns local translation table size in bytes.
+ */
+static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv)
+{
+ uint16_t num_vlan = 0, tt_local_entries = 0;
+ struct batadv_softif_vlan *vlan;
+ int hdr_size;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+ num_vlan++;
+ tt_local_entries += atomic_read(&vlan->tt.num_entries);
+ }
+ rcu_read_unlock();
+
+ /* header size of tvlv encapsulated tt response payload */
+ hdr_size = sizeof(struct batadv_unicast_tvlv_packet);
+ hdr_size += sizeof(struct batadv_tvlv_hdr);
+ hdr_size += sizeof(struct batadv_tvlv_tt_data);
+ hdr_size += num_vlan * sizeof(struct batadv_tvlv_tt_vlan_data);
+
+ return hdr_size + batadv_tt_len(tt_local_entries);
+}
+
static int batadv_tt_local_init(struct batadv_priv *bat_priv)
{
if (bat_priv->tt.local_hash)
@@ -264,33 +458,51 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
const char *message)
{
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Deleting global tt entry %pM: %s\n",
- tt_global->common.addr, message);
+ "Deleting global tt entry %pM (vid: %d): %s\n",
+ tt_global->common.addr,
+ BATADV_PRINT_VID(tt_global->common.vid), message);
batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
- batadv_choose_orig, tt_global->common.addr);
+ batadv_choose_tt, &tt_global->common);
batadv_tt_global_entry_free_ref(tt_global);
}
-void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
- int ifindex)
+/**
+ * batadv_tt_local_add - add a new client to the local table or update an
+ * existing client
+ * @soft_iface: netdev struct of the mesh interface
+ * @addr: the mac address of the client to add
+ * @vid: VLAN identifier
+ * @ifindex: index of the interface where the client is connected to (useful to
+ * identify wireless clients)
+ *
+ * Returns true if the client was successfully added, false otherwise.
+ */
+bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
+ unsigned short vid, int ifindex)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_tt_local_entry *tt_local;
struct batadv_tt_global_entry *tt_global;
+ struct net_device *in_dev = NULL;
struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry;
- int hash_added;
- bool roamed_back = false;
+ int hash_added, table_size, packet_size_max;
+ bool ret = false, roamed_back = false;
+ uint8_t remote_flags;
+
+ if (ifindex != BATADV_NULL_IFINDEX)
+ in_dev = dev_get_by_index(&init_net, ifindex);
- tt_local = batadv_tt_local_hash_find(bat_priv, addr);
- tt_global = batadv_tt_global_hash_find(bat_priv, addr);
+ tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid);
+ tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
if (tt_local) {
tt_local->last_seen = jiffies;
if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) {
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Re-adding pending client %pM\n", addr);
+ "Re-adding pending client %pM (vid: %d)\n",
+ addr, BATADV_PRINT_VID(vid));
/* whatever the reason why the PENDING flag was set,
* this is a client which was enqueued to be removed in
* this orig_interval. Since it popped up again, the
@@ -302,8 +514,8 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) {
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Roaming client %pM came back to its original location\n",
- addr);
+ "Roaming client %pM (vid: %d) came back to its original location\n",
+ addr, BATADV_PRINT_VID(vid));
/* the ROAM flag is set because this client roamed away
* and the node got a roaming_advertisement message. Now
* that the client popped up again at its original
@@ -315,12 +527,24 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
goto check_roaming;
}
+ /* Ignore the client if we cannot send it in a full table response. */
+ table_size = batadv_tt_local_table_transmit_size(bat_priv);
+ table_size += batadv_tt_len(1);
+ packet_size_max = atomic_read(&bat_priv->packet_size_max);
+ if (table_size > packet_size_max) {
+ net_ratelimited_function(batadv_info, soft_iface,
+ "Local translation table size (%i) exceeds maximum packet size (%i); Ignoring new local tt entry: %pM\n",
+ table_size, packet_size_max, addr);
+ goto out;
+ }
+
tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC);
if (!tt_local)
goto out;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Creating new local tt entry: %pM (ttvn: %d)\n", addr,
+ "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
+ addr, BATADV_PRINT_VID(vid),
(uint8_t)atomic_read(&bat_priv->tt.vn));
memcpy(tt_local->common.addr, addr, ETH_ALEN);
@@ -329,7 +553,8 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
* (consistency check)
*/
tt_local->common.flags = BATADV_TT_CLIENT_NEW;
- if (batadv_is_wifi_iface(ifindex))
+ tt_local->common.vid = vid;
+ if (batadv_is_wifi_netdev(in_dev))
tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
atomic_set(&tt_local->common.refcount, 2);
tt_local->last_seen = jiffies;
@@ -340,7 +565,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE;
hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
- batadv_choose_orig, &tt_local->common,
+ batadv_choose_tt, &tt_local->common,
&tt_local->common.hash_entry);
if (unlikely(hash_added != 0)) {
@@ -362,6 +587,7 @@ check_roaming:
rcu_read_lock();
hlist_for_each_entry_rcu(orig_entry, head, list) {
batadv_send_roam_adv(bat_priv, tt_global->common.addr,
+ tt_global->common.vid,
orig_entry->orig_node);
}
rcu_read_unlock();
@@ -378,11 +604,174 @@ check_roaming:
}
}
+ /* store the current remote flags before altering them. This helps
+ * understanding is flags are changing or not
+ */
+ remote_flags = tt_local->common.flags & BATADV_TT_REMOTE_MASK;
+
+ if (batadv_is_wifi_netdev(in_dev))
+ tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
+ else
+ tt_local->common.flags &= ~BATADV_TT_CLIENT_WIFI;
+
+ /* if any "dynamic" flag has been modified, resend an ADD event for this
+ * entry so that all the nodes can get the new flags
+ */
+ if (remote_flags ^ (tt_local->common.flags & BATADV_TT_REMOTE_MASK))
+ batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS);
+
+ ret = true;
out:
+ if (in_dev)
+ dev_put(in_dev);
if (tt_local)
batadv_tt_local_entry_free_ref(tt_local);
if (tt_global)
batadv_tt_global_entry_free_ref(tt_global);
+ return ret;
+}
+
+/**
+ * batadv_tt_prepare_tvlv_global_data - prepare the TVLV TT header to send
+ * within a TT Response directed to another node
+ * @orig_node: originator for which the TT data has to be prepared
+ * @tt_data: uninitialised pointer to the address of the TVLV buffer
+ * @tt_change: uninitialised pointer to the address of the area where the TT
+ * changed can be stored
+ * @tt_len: pointer to the length to reserve to the tt_change. if -1 this
+ * function reserves the amount of space needed to send the entire global TT
+ * table. In case of success the value is updated with the real amount of
+ * reserved bytes
+
+ * Allocate the needed amount of memory for the entire TT TVLV and write its
+ * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
+ * objects, one per active VLAN served by the originator node.
+ *
+ * Return the size of the allocated buffer or 0 in case of failure.
+ */
+static uint16_t
+batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
+ struct batadv_tvlv_tt_data **tt_data,
+ struct batadv_tvlv_tt_change **tt_change,
+ int32_t *tt_len)
+{
+ uint16_t num_vlan = 0, num_entries = 0, change_offset, tvlv_len;
+ struct batadv_tvlv_tt_vlan_data *tt_vlan;
+ struct batadv_orig_node_vlan *vlan;
+ uint8_t *tt_change_ptr;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
+ num_vlan++;
+ num_entries += atomic_read(&vlan->tt.num_entries);
+ }
+
+ change_offset = sizeof(**tt_data);
+ change_offset += num_vlan * sizeof(*tt_vlan);
+
+ /* if tt_len is negative, allocate the space needed by the full table */
+ if (*tt_len < 0)
+ *tt_len = batadv_tt_len(num_entries);
+
+ tvlv_len = *tt_len;
+ tvlv_len += change_offset;
+
+ *tt_data = kmalloc(tvlv_len, GFP_ATOMIC);
+ if (!*tt_data) {
+ *tt_len = 0;
+ goto out;
+ }
+
+ (*tt_data)->flags = BATADV_NO_FLAGS;
+ (*tt_data)->ttvn = atomic_read(&orig_node->last_ttvn);
+ (*tt_data)->num_vlan = htons(num_vlan);
+
+ tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
+ list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
+ tt_vlan->vid = htons(vlan->vid);
+ tt_vlan->crc = htonl(vlan->tt.crc);
+
+ tt_vlan++;
+ }
+
+ tt_change_ptr = (uint8_t *)*tt_data + change_offset;
+ *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
+
+out:
+ rcu_read_unlock();
+ return tvlv_len;
+}
+
+/**
+ * batadv_tt_prepare_tvlv_local_data - allocate and prepare the TT TVLV for this
+ * node
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tt_data: uninitialised pointer to the address of the TVLV buffer
+ * @tt_change: uninitialised pointer to the address of the area where the TT
+ * changes can be stored
+ * @tt_len: pointer to the length to reserve to the tt_change. if -1 this
+ * function reserves the amount of space needed to send the entire local TT
+ * table. In case of success the value is updated with the real amount of
+ * reserved bytes
+ *
+ * Allocate the needed amount of memory for the entire TT TVLV and write its
+ * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
+ * objects, one per active VLAN.
+ *
+ * Return the size of the allocated buffer or 0 in case of failure.
+ */
+static uint16_t
+batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_tt_data **tt_data,
+ struct batadv_tvlv_tt_change **tt_change,
+ int32_t *tt_len)
+{
+ struct batadv_tvlv_tt_vlan_data *tt_vlan;
+ struct batadv_softif_vlan *vlan;
+ uint16_t num_vlan = 0, num_entries = 0, tvlv_len;
+ uint8_t *tt_change_ptr;
+ int change_offset;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+ num_vlan++;
+ num_entries += atomic_read(&vlan->tt.num_entries);
+ }
+
+ change_offset = sizeof(**tt_data);
+ change_offset += num_vlan * sizeof(*tt_vlan);
+
+ /* if tt_len is negative, allocate the space needed by the full table */
+ if (*tt_len < 0)
+ *tt_len = batadv_tt_len(num_entries);
+
+ tvlv_len = *tt_len;
+ tvlv_len += change_offset;
+
+ *tt_data = kmalloc(tvlv_len, GFP_ATOMIC);
+ if (!*tt_data) {
+ tvlv_len = 0;
+ goto out;
+ }
+
+ (*tt_data)->flags = BATADV_NO_FLAGS;
+ (*tt_data)->ttvn = atomic_read(&bat_priv->tt.vn);
+ (*tt_data)->num_vlan = htons(num_vlan);
+
+ tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
+ hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+ tt_vlan->vid = htons(vlan->vid);
+ tt_vlan->crc = htonl(vlan->tt.crc);
+
+ tt_vlan++;
+ }
+
+ tt_change_ptr = (uint8_t *)*tt_data + change_offset;
+ *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
+
+out:
+ rcu_read_unlock();
+ return tvlv_len;
}
/**
@@ -395,10 +784,12 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
struct batadv_tt_change_node *entry, *safe;
struct batadv_tvlv_tt_data *tt_data;
struct batadv_tvlv_tt_change *tt_change;
- int tt_diff_len = 0, tt_change_len = 0;
+ int tt_diff_len, tt_change_len = 0;
int tt_diff_entries_num = 0, tt_diff_entries_count = 0;
+ uint16_t tvlv_len;
- tt_diff_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes));
+ tt_diff_entries_num = atomic_read(&bat_priv->tt.local_changes);
+ tt_diff_len = batadv_tt_len(tt_diff_entries_num);
/* if we have too many changes for one packet don't send any
* and wait for the tt table request which will be fragmented
@@ -406,24 +797,19 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
if (tt_diff_len > bat_priv->soft_iface->mtu)
tt_diff_len = 0;
- tt_data = kzalloc(sizeof(*tt_data) + tt_diff_len, GFP_ATOMIC);
- if (!tt_data)
+ tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, &tt_data,
+ &tt_change, &tt_diff_len);
+ if (!tvlv_len)
return;
tt_data->flags = BATADV_TT_OGM_DIFF;
- tt_data->ttvn = atomic_read(&bat_priv->tt.vn);
- tt_data->crc = htonl(bat_priv->tt.local_crc);
if (tt_diff_len == 0)
goto container_register;
- tt_diff_entries_num = batadv_tt_entries(tt_diff_len);
-
spin_lock_bh(&bat_priv->tt.changes_list_lock);
atomic_set(&bat_priv->tt.local_changes, 0);
- tt_change = (struct batadv_tvlv_tt_change *)(tt_data + 1);
-
list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
list) {
if (tt_diff_entries_count < tt_diff_entries_num) {
@@ -459,7 +845,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
container_register:
batadv_tvlv_container_register(bat_priv, BATADV_TVLV_TT, 1, tt_data,
- sizeof(*tt_data) + tt_change_len);
+ tvlv_len);
kfree(tt_data);
}
@@ -471,7 +857,9 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
struct batadv_hard_iface *primary_if;
+ struct batadv_softif_vlan *vlan;
struct hlist_head *head;
+ unsigned short vid;
uint32_t i;
int last_seen_secs;
int last_seen_msecs;
@@ -484,11 +872,10 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
goto out;
seq_printf(seq,
- "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.8x):\n",
- net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn),
- bat_priv->tt.local_crc);
- seq_printf(seq, " %-13s %-7s %-10s\n", "Client", "Flags",
- "Last seen");
+ "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n",
+ net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn));
+ seq_printf(seq, " %-13s %s %-7s %-9s (%-10s)\n", "Client", "VID",
+ "Flags", "Last seen", "CRC");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -499,6 +886,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
tt_local = container_of(tt_common_entry,
struct batadv_tt_local_entry,
common);
+ vid = tt_common_entry->vid;
last_seen_jiffies = jiffies - tt_local->last_seen;
last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
last_seen_secs = last_seen_msecs / 1000;
@@ -506,8 +894,17 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
no_purge = tt_common_entry->flags & np_flag;
- seq_printf(seq, " * %pM [%c%c%c%c%c] %3u.%03u\n",
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (!vlan) {
+ seq_printf(seq, "Cannot retrieve VLAN %d\n",
+ BATADV_PRINT_VID(vid));
+ continue;
+ }
+
+ seq_printf(seq,
+ " * %pM %4i [%c%c%c%c%c] %3u.%03u (%#.8x)\n",
tt_common_entry->addr,
+ BATADV_PRINT_VID(tt_common_entry->vid),
(tt_common_entry->flags &
BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
no_purge ? 'P' : '.',
@@ -518,7 +915,10 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
(tt_common_entry->flags &
BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
no_purge ? 0 : last_seen_secs,
- no_purge ? 0 : last_seen_msecs);
+ no_purge ? 0 : last_seen_msecs,
+ vlan->tt.crc);
+
+ batadv_softif_vlan_free_ref(vlan);
}
rcu_read_unlock();
}
@@ -542,27 +942,29 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
tt_local_entry->common.flags |= BATADV_TT_CLIENT_PENDING;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Local tt entry (%pM) pending to be removed: %s\n",
- tt_local_entry->common.addr, message);
+ "Local tt entry (%pM, vid: %d) pending to be removed: %s\n",
+ tt_local_entry->common.addr,
+ BATADV_PRINT_VID(tt_local_entry->common.vid), message);
}
/**
* batadv_tt_local_remove - logically remove an entry from the local table
* @bat_priv: the bat priv with all the soft interface information
* @addr: the MAC address of the client to remove
+ * @vid: VLAN identifier
* @message: message to append to the log on deletion
* @roaming: true if the deletion is due to a roaming event
*
* Returns the flags assigned to the local entry before being deleted
*/
uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
- const uint8_t *addr, const char *message,
- bool roaming)
+ const uint8_t *addr, unsigned short vid,
+ const char *message, bool roaming)
{
struct batadv_tt_local_entry *tt_local_entry;
uint16_t flags, curr_flags = BATADV_NO_FLAGS;
- tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
+ tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
goto out;
@@ -598,8 +1000,16 @@ out:
return curr_flags;
}
+/**
+ * batadv_tt_local_purge_list - purge inactive tt local entries
+ * @bat_priv: the bat priv with all the soft interface information
+ * @head: pointer to the list containing the local tt entries
+ * @timeout: parameter deciding whether a given tt local entry is considered
+ * inactive or not
+ */
static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
- struct hlist_head *head)
+ struct hlist_head *head,
+ int timeout)
{
struct batadv_tt_local_entry *tt_local_entry;
struct batadv_tt_common_entry *tt_common_entry;
@@ -617,8 +1027,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
if (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING)
continue;
- if (!batadv_has_timed_out(tt_local_entry->last_seen,
- BATADV_TT_LOCAL_TIMEOUT))
+ if (!batadv_has_timed_out(tt_local_entry->last_seen, timeout))
continue;
batadv_tt_local_set_pending(bat_priv, tt_local_entry,
@@ -626,7 +1035,14 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
}
}
-static void batadv_tt_local_purge(struct batadv_priv *bat_priv)
+/**
+ * batadv_tt_local_purge - purge inactive tt local entries
+ * @bat_priv: the bat priv with all the soft interface information
+ * @timeout: parameter deciding whether a given tt local entry is considered
+ * inactive or not
+ */
+static void batadv_tt_local_purge(struct batadv_priv *bat_priv,
+ int timeout)
{
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct hlist_head *head;
@@ -638,7 +1054,7 @@ static void batadv_tt_local_purge(struct batadv_priv *bat_priv)
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
- batadv_tt_local_purge_list(bat_priv, head);
+ batadv_tt_local_purge_list(bat_priv, head, timeout);
spin_unlock_bh(list_lock);
}
}
@@ -779,7 +1195,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
INIT_HLIST_NODE(&orig_entry->list);
atomic_inc(&orig_node->refcount);
- atomic_inc(&orig_node->tt_size);
+ batadv_tt_global_size_inc(orig_node, tt_global->common.vid);
orig_entry->orig_node = orig_node;
orig_entry->ttvn = ttvn;
atomic_set(&orig_entry->refcount, 2);
@@ -798,6 +1214,7 @@ out:
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: the originator announcing the client
* @tt_addr: the mac address of the non-mesh client
+ * @vid: VLAN identifier
* @flags: TT flags that have to be set for this non-mesh client
* @ttvn: the tt version number ever announcing this non-mesh client
*
@@ -813,7 +1230,8 @@ out:
*/
static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const unsigned char *tt_addr, uint16_t flags,
+ const unsigned char *tt_addr,
+ unsigned short vid, uint16_t flags,
uint8_t ttvn)
{
struct batadv_tt_global_entry *tt_global_entry;
@@ -823,8 +1241,12 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
struct batadv_tt_common_entry *common;
uint16_t local_flags;
- tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr);
- tt_local_entry = batadv_tt_local_hash_find(bat_priv, tt_addr);
+ /* ignore global entries from backbone nodes */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid))
+ return true;
+
+ tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr, vid);
+ tt_local_entry = batadv_tt_local_hash_find(bat_priv, tt_addr, vid);
/* if the node already has a local client for this entry, it has to wait
* for a roaming advertisement instead of manually messing up the global
@@ -841,6 +1263,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
common = &tt_global_entry->common;
memcpy(common->addr, tt_addr, ETH_ALEN);
+ common->vid = vid;
common->flags = flags;
tt_global_entry->roam_at = 0;
@@ -858,7 +1281,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
hash_added = batadv_hash_add(bat_priv->tt.global_hash,
batadv_compare_tt,
- batadv_choose_orig, common,
+ batadv_choose_tt, common,
&common->hash_entry);
if (unlikely(hash_added != 0)) {
@@ -917,14 +1340,15 @@ add_orig_entry:
batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn);
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Creating new global tt entry: %pM (via %pM)\n",
- common->addr, orig_node->orig);
+ "Creating new global tt entry: %pM (vid: %d, via %pM)\n",
+ common->addr, BATADV_PRINT_VID(common->vid),
+ orig_node->orig);
ret = true;
out_remove:
/* remove address from local hash if present */
- local_flags = batadv_tt_local_remove(bat_priv, tt_addr,
+ local_flags = batadv_tt_local_remove(bat_priv, tt_addr, vid,
"global tt received",
flags & BATADV_TT_CLIENT_ROAM);
tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI;
@@ -944,18 +1368,20 @@ out:
}
/* batadv_transtable_best_orig - Get best originator list entry from tt entry
+ * @bat_priv: the bat priv with all the soft interface information
* @tt_global_entry: global translation table entry to be analyzed
*
* This functon assumes the caller holds rcu_read_lock().
* Returns best originator list entry or NULL on errors.
*/
static struct batadv_tt_orig_list_entry *
-batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry)
+batadv_transtable_best_orig(struct batadv_priv *bat_priv,
+ struct batadv_tt_global_entry *tt_global_entry)
{
- struct batadv_neigh_node *router = NULL;
+ struct batadv_neigh_node *router, *best_router = NULL;
+ struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
- int best_tq = 0;
head = &tt_global_entry->orig_list;
hlist_for_each_entry_rcu(orig_entry, head, list) {
@@ -963,64 +1389,104 @@ batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry)
if (!router)
continue;
- if (router->tq_avg > best_tq) {
- best_entry = orig_entry;
- best_tq = router->tq_avg;
+ if (best_router &&
+ bao->bat_neigh_cmp(router, best_router) <= 0) {
+ batadv_neigh_node_free_ref(router);
+ continue;
}
- batadv_neigh_node_free_ref(router);
+ /* release the refcount for the "old" best */
+ if (best_router)
+ batadv_neigh_node_free_ref(best_router);
+
+ best_entry = orig_entry;
+ best_router = router;
}
+ if (best_router)
+ batadv_neigh_node_free_ref(best_router);
+
return best_entry;
}
/* batadv_tt_global_print_entry - print all orig nodes who announce the address
* for this global entry
+ * @bat_priv: the bat priv with all the soft interface information
* @tt_global_entry: global translation table entry to be printed
* @seq: debugfs table seq_file struct
*
* This functon assumes the caller holds rcu_read_lock().
*/
static void
-batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
+batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
+ struct batadv_tt_global_entry *tt_global_entry,
struct seq_file *seq)
{
- struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
struct batadv_tt_common_entry *tt_common_entry;
- uint16_t flags;
+ struct batadv_orig_node_vlan *vlan;
+ struct hlist_head *head;
uint8_t last_ttvn;
+ uint16_t flags;
tt_common_entry = &tt_global_entry->common;
flags = tt_common_entry->flags;
- best_entry = batadv_transtable_best_orig(tt_global_entry);
+ best_entry = batadv_transtable_best_orig(bat_priv, tt_global_entry);
if (best_entry) {
+ vlan = batadv_orig_node_vlan_get(best_entry->orig_node,
+ tt_common_entry->vid);
+ if (!vlan) {
+ seq_printf(seq,
+ " * Cannot retrieve VLAN %d for originator %pM\n",
+ BATADV_PRINT_VID(tt_common_entry->vid),
+ best_entry->orig_node->orig);
+ goto print_list;
+ }
+
last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn);
seq_printf(seq,
- " %c %pM (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n",
+ " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n",
'*', tt_global_entry->common.addr,
+ BATADV_PRINT_VID(tt_global_entry->common.vid),
best_entry->ttvn, best_entry->orig_node->orig,
- last_ttvn, best_entry->orig_node->tt_crc,
+ last_ttvn, vlan->tt.crc,
(flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
(flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
(flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
+
+ batadv_orig_node_vlan_free_ref(vlan);
}
+print_list:
head = &tt_global_entry->orig_list;
hlist_for_each_entry_rcu(orig_entry, head, list) {
if (best_entry == orig_entry)
continue;
+ vlan = batadv_orig_node_vlan_get(orig_entry->orig_node,
+ tt_common_entry->vid);
+ if (!vlan) {
+ seq_printf(seq,
+ " + Cannot retrieve VLAN %d for originator %pM\n",
+ BATADV_PRINT_VID(tt_common_entry->vid),
+ orig_entry->orig_node->orig);
+ continue;
+ }
+
last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn);
- seq_printf(seq, " %c %pM (%3u) via %pM (%3u) [%c%c%c]\n",
+ seq_printf(seq,
+ " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n",
'+', tt_global_entry->common.addr,
+ BATADV_PRINT_VID(tt_global_entry->common.vid),
orig_entry->ttvn, orig_entry->orig_node->orig,
- last_ttvn,
+ last_ttvn, vlan->tt.crc,
(flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
(flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
(flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
+
+ batadv_orig_node_vlan_free_ref(vlan);
}
}
@@ -1042,9 +1508,9 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
"Globally announced TT entries received via the mesh %s\n",
net_dev->name);
- seq_printf(seq, " %-13s %s %-15s %s (%-10s) %s\n",
- "Client", "(TTVN)", "Originator", "(Curr TTVN)", "CRC",
- "Flags");
+ seq_printf(seq, " %-13s %s %s %-15s %s (%-10s) %s\n",
+ "Client", "VID", "(TTVN)", "Originator", "(Curr TTVN)",
+ "CRC", "Flags");
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1055,7 +1521,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
tt_global = container_of(tt_common_entry,
struct batadv_tt_global_entry,
common);
- batadv_tt_global_print_entry(tt_global, seq);
+ batadv_tt_global_print_entry(bat_priv, tt_global, seq);
}
rcu_read_unlock();
}
@@ -1077,6 +1543,8 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry)
head = &tt_global_entry->orig_list;
hlist_for_each_entry_safe(orig_entry, safe, head, list) {
hlist_del_rcu(&orig_entry->list);
+ batadv_tt_global_size_dec(orig_entry->orig_node,
+ tt_global_entry->common.vid);
batadv_tt_orig_list_entry_free_ref(orig_entry);
}
spin_unlock_bh(&tt_global_entry->list_lock);
@@ -1091,16 +1559,21 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv,
struct hlist_head *head;
struct hlist_node *safe;
struct batadv_tt_orig_list_entry *orig_entry;
+ unsigned short vid;
spin_lock_bh(&tt_global_entry->list_lock);
head = &tt_global_entry->orig_list;
hlist_for_each_entry_safe(orig_entry, safe, head, list) {
if (orig_entry->orig_node == orig_node) {
+ vid = tt_global_entry->common.vid;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Deleting %pM from global tt entry %pM: %s\n",
+ "Deleting %pM from global tt entry %pM (vid: %d): %s\n",
orig_node->orig,
- tt_global_entry->common.addr, message);
+ tt_global_entry->common.addr,
+ BATADV_PRINT_VID(vid), message);
hlist_del_rcu(&orig_entry->list);
+ batadv_tt_global_size_dec(orig_node,
+ tt_global_entry->common.vid);
batadv_tt_orig_list_entry_free_ref(orig_entry);
}
}
@@ -1147,17 +1620,25 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
orig_node, message);
}
-
-
+/**
+ * batadv_tt_global_del - remove a client from the global table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: an originator serving this client
+ * @addr: the mac address of the client
+ * @vid: VLAN identifier
+ * @message: a message explaining the reason for deleting the client to print
+ * for debugging purpose
+ * @roaming: true if the deletion has been triggered by a roaming event
+ */
static void batadv_tt_global_del(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const unsigned char *addr,
+ const unsigned char *addr, unsigned short vid,
const char *message, bool roaming)
{
struct batadv_tt_global_entry *tt_global_entry;
struct batadv_tt_local_entry *local_entry = NULL;
- tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr);
+ tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid);
if (!tt_global_entry)
goto out;
@@ -1186,7 +1667,8 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
* the global entry, since it is useless now.
*/
local_entry = batadv_tt_local_hash_find(bat_priv,
- tt_global_entry->common.addr);
+ tt_global_entry->common.addr,
+ vid);
if (local_entry) {
/* local entry exists, case 2: client roamed to us. */
batadv_tt_global_del_orig_list(tt_global_entry);
@@ -1204,8 +1686,18 @@ out:
batadv_tt_local_entry_free_ref(local_entry);
}
+/**
+ * batadv_tt_global_del_orig - remove all the TT global entries belonging to the
+ * given originator matching the provided vid
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: the originator owning the entries to remove
+ * @match_vid: the VLAN identifier to match. If negative all the entries will be
+ * removed
+ * @message: debug message to print as "reason"
+ */
void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
+ int32_t match_vid,
const char *message)
{
struct batadv_tt_global_entry *tt_global;
@@ -1215,6 +1707,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct hlist_node *safe;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
+ unsigned short vid;
if (!hash)
return;
@@ -1226,6 +1719,10 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
spin_lock_bh(list_lock);
hlist_for_each_entry_safe(tt_common_entry, safe,
head, hash_entry) {
+ /* remove only matching entries */
+ if (match_vid >= 0 && tt_common_entry->vid != match_vid)
+ continue;
+
tt_global = container_of(tt_common_entry,
struct batadv_tt_global_entry,
common);
@@ -1234,9 +1731,11 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
orig_node, message);
if (hlist_empty(&tt_global->orig_list)) {
+ vid = tt_global->common.vid;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Deleting global tt entry %pM: %s\n",
- tt_global->common.addr, message);
+ "Deleting global tt entry %pM (vid: %d): %s\n",
+ tt_global->common.addr,
+ BATADV_PRINT_VID(vid), message);
hlist_del_rcu(&tt_common_entry->hash_entry);
batadv_tt_global_entry_free_ref(tt_global);
}
@@ -1294,8 +1793,10 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
continue;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Deleting global tt entry (%pM): %s\n",
- tt_global->common.addr, msg);
+ "Deleting global tt entry %pM (vid: %d): %s\n",
+ tt_global->common.addr,
+ BATADV_PRINT_VID(tt_global->common.vid),
+ msg);
hlist_del_rcu(&tt_common->hash_entry);
@@ -1354,23 +1855,49 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry,
return ret;
}
+/**
+ * batadv_transtable_search - get the mesh destination for a given client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @src: mac address of the source client
+ * @addr: mac address of the destination client
+ * @vid: VLAN identifier
+ *
+ * Returns a pointer to the originator that was selected as destination in the
+ * mesh for contacting the client 'addr', NULL otherwise.
+ * In case of multiple originators serving the same client, the function returns
+ * the best one (best in terms of metric towards the destination node).
+ *
+ * If the two clients are AP isolated the function returns NULL.
+ */
struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
const uint8_t *src,
- const uint8_t *addr)
+ const uint8_t *addr,
+ unsigned short vid)
{
struct batadv_tt_local_entry *tt_local_entry = NULL;
struct batadv_tt_global_entry *tt_global_entry = NULL;
struct batadv_orig_node *orig_node = NULL;
struct batadv_tt_orig_list_entry *best_entry;
+ bool ap_isolation_enabled = false;
+ struct batadv_softif_vlan *vlan;
+
+ /* if the AP isolation is requested on a VLAN, then check for its
+ * setting in the proper VLAN private data structure
+ */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (vlan) {
+ ap_isolation_enabled = atomic_read(&vlan->ap_isolation);
+ batadv_softif_vlan_free_ref(vlan);
+ }
- if (src && atomic_read(&bat_priv->ap_isolation)) {
- tt_local_entry = batadv_tt_local_hash_find(bat_priv, src);
+ if (src && ap_isolation_enabled) {
+ tt_local_entry = batadv_tt_local_hash_find(bat_priv, src, vid);
if (!tt_local_entry ||
(tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING))
goto out;
}
- tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr);
+ tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid);
if (!tt_global_entry)
goto out;
@@ -1382,7 +1909,7 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
goto out;
rcu_read_lock();
- best_entry = batadv_transtable_best_orig(tt_global_entry);
+ best_entry = batadv_transtable_best_orig(bat_priv, tt_global_entry);
/* found anything? */
if (best_entry)
orig_node = best_entry->orig_node;
@@ -1403,15 +1930,36 @@ out:
* batadv_tt_global_crc - calculates the checksum of the local table belonging
* to the given orig_node
* @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: originator for which the CRC should be computed
+ * @vid: VLAN identifier for which the CRC32 has to be computed
+ *
+ * This function computes the checksum for the global table corresponding to a
+ * specific originator. In particular, the checksum is computed as follows: For
+ * each client connected to the originator the CRC32C of the MAC address and the
+ * VID is computed and then all the CRC32Cs of the various clients are xor'ed
+ * together.
+ *
+ * The idea behind is that CRC32C should be used as much as possible in order to
+ * produce a unique hash of the table, but since the order which is used to feed
+ * the CRC32C function affects the result and since every node in the network
+ * probably sorts the clients differently, the hash function cannot be directly
+ * computed over the entire table. Hence the CRC32C is used only on
+ * the single client entry, while all the results are then xor'ed together
+ * because the XOR operation can combine them all while trying to reduce the
+ * noise as much as possible.
+ *
+ * Returns the checksum of the global table of a given originator.
*/
static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node)
+ struct batadv_orig_node *orig_node,
+ unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->tt.global_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_global_entry *tt_global;
struct hlist_head *head;
- uint32_t i, crc = 0;
+ uint32_t i, crc_tmp, crc = 0;
+ uint8_t flags;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1421,6 +1969,12 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
tt_global = container_of(tt_common,
struct batadv_tt_global_entry,
common);
+ /* compute the CRC only for entries belonging to the
+ * VLAN identified by the vid passed as parameter
+ */
+ if (tt_common->vid != vid)
+ continue;
+
/* Roaming clients are in the global table for
* consistency only. They don't have to be
* taken into account while computing the
@@ -1442,7 +1996,16 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
orig_node))
continue;
- crc ^= crc32c(0, tt_common->addr, ETH_ALEN);
+ crc_tmp = crc32c(0, &tt_common->vid,
+ sizeof(tt_common->vid));
+
+ /* compute the CRC on flags that have to be kept in sync
+ * among nodes
+ */
+ flags = tt_common->flags & BATADV_TT_SYNC_MASK;
+ crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags));
+
+ crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN);
}
rcu_read_unlock();
}
@@ -1453,26 +2016,49 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
/**
* batadv_tt_local_crc - calculates the checksum of the local table
* @bat_priv: the bat priv with all the soft interface information
+ * @vid: VLAN identifier for which the CRC32 has to be computed
+ *
+ * For details about the computation, please refer to the documentation for
+ * batadv_tt_global_crc().
+ *
+ * Returns the checksum of the local table
*/
-static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv)
+static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
+ unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct hlist_head *head;
- uint32_t i, crc = 0;
+ uint32_t i, crc_tmp, crc = 0;
+ uint8_t flags;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
rcu_read_lock();
hlist_for_each_entry_rcu(tt_common, head, hash_entry) {
+ /* compute the CRC only for entries belonging to the
+ * VLAN identified by vid
+ */
+ if (tt_common->vid != vid)
+ continue;
+
/* not yet committed clients have not to be taken into
* account while computing the CRC
*/
if (tt_common->flags & BATADV_TT_CLIENT_NEW)
continue;
- crc ^= crc32c(0, tt_common->addr, ETH_ALEN);
+ crc_tmp = crc32c(0, &tt_common->vid,
+ sizeof(tt_common->vid));
+
+ /* compute the CRC on flags that have to be kept in sync
+ * among nodes
+ */
+ flags = tt_common->flags & BATADV_TT_SYNC_MASK;
+ crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags));
+
+ crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN);
}
rcu_read_unlock();
}
@@ -1595,44 +2181,29 @@ static int batadv_tt_global_valid(const void *entry_ptr,
}
/**
- * batadv_tt_tvlv_generate - creates tvlv tt data buffer to fill it with the
- * tt entries from the specified tt hash
+ * batadv_tt_tvlv_generate - fill the tvlv buff with the tt entries from the
+ * specified tt hash
* @bat_priv: the bat priv with all the soft interface information
* @hash: hash table containing the tt entries
* @tt_len: expected tvlv tt data buffer length in number of bytes
+ * @tvlv_buff: pointer to the buffer to fill with the TT data
* @valid_cb: function to filter tt change entries
* @cb_data: data passed to the filter function as argument
- *
- * Returns pointer to allocated tvlv tt data buffer if operation was
- * successful or NULL otherwise.
*/
-static struct batadv_tvlv_tt_data *
-batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
- struct batadv_hashtable *hash, uint16_t tt_len,
- int (*valid_cb)(const void *, const void *),
- void *cb_data)
+static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
+ struct batadv_hashtable *hash,
+ void *tvlv_buff, uint16_t tt_len,
+ int (*valid_cb)(const void *, const void *),
+ void *cb_data)
{
struct batadv_tt_common_entry *tt_common_entry;
- struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
struct batadv_tvlv_tt_change *tt_change;
struct hlist_head *head;
uint16_t tt_tot, tt_num_entries = 0;
- ssize_t tvlv_tt_size = sizeof(struct batadv_tvlv_tt_data);
uint32_t i;
- if (tvlv_tt_size + tt_len > bat_priv->soft_iface->mtu) {
- tt_len = bat_priv->soft_iface->mtu - tvlv_tt_size;
- tt_len -= tt_len % sizeof(struct batadv_tvlv_tt_change);
- }
-
tt_tot = batadv_tt_entries(tt_len);
-
- tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len,
- GFP_ATOMIC);
- if (!tvlv_tt_data)
- goto out;
-
- tt_change = (struct batadv_tvlv_tt_change *)(tvlv_tt_data + 1);
+ tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff;
rcu_read_lock();
for (i = 0; i < hash->size; i++) {
@@ -1649,6 +2220,7 @@ batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
memcpy(tt_change->addr, tt_common_entry->addr,
ETH_ALEN);
tt_change->flags = tt_common_entry->flags;
+ tt_change->vid = htons(tt_common_entry->vid);
tt_change->reserved = 0;
tt_num_entries++;
@@ -1656,9 +2228,91 @@ batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
}
}
rcu_read_unlock();
+}
-out:
- return tvlv_tt_data;
+/**
+ * batadv_tt_global_check_crc - check if all the CRCs are correct
+ * @orig_node: originator for which the CRCs have to be checked
+ * @tt_vlan: pointer to the first tvlv VLAN entry
+ * @num_vlan: number of tvlv VLAN entries
+ * @create: if true, create VLAN objects if not found
+ *
+ * Return true if all the received CRCs match the locally stored ones, false
+ * otherwise
+ */
+static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
+ struct batadv_tvlv_tt_vlan_data *tt_vlan,
+ uint16_t num_vlan)
+{
+ struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp;
+ struct batadv_orig_node_vlan *vlan;
+ int i;
+
+ /* check if each received CRC matches the locally stored one */
+ for (i = 0; i < num_vlan; i++) {
+ tt_vlan_tmp = tt_vlan + i;
+
+ /* if orig_node is a backbone node for this VLAN, don't check
+ * the CRC as we ignore all the global entries over it
+ */
+ if (batadv_bla_is_backbone_gw_orig(orig_node->bat_priv,
+ orig_node->orig,
+ ntohs(tt_vlan_tmp->vid)))
+ continue;
+
+ vlan = batadv_orig_node_vlan_get(orig_node,
+ ntohs(tt_vlan_tmp->vid));
+ if (!vlan)
+ return false;
+
+ if (vlan->tt.crc != ntohl(tt_vlan_tmp->crc))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * batadv_tt_local_update_crc - update all the local CRCs
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv)
+{
+ struct batadv_softif_vlan *vlan;
+
+ /* recompute the global CRC for each VLAN */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+ vlan->tt.crc = batadv_tt_local_crc(bat_priv, vlan->vid);
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * batadv_tt_global_update_crc - update all the global CRCs for this orig_node
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: the orig_node for which the CRCs have to be updated
+ */
+static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node)
+{
+ struct batadv_orig_node_vlan *vlan;
+ uint32_t crc;
+
+ /* recompute the global CRC for each VLAN */
+ rcu_read_lock();
+ list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
+ /* if orig_node is a backbone node for this VLAN, don't compute
+ * the CRC as we ignore all the global entries over it
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig,
+ vlan->vid))
+ continue;
+
+ crc = batadv_tt_global_crc(bat_priv, orig_node, vlan->vid);
+ vlan->tt.crc = crc;
+ }
+ rcu_read_unlock();
}
/**
@@ -1666,19 +2320,23 @@ out:
* @bat_priv: the bat priv with all the soft interface information
* @dst_orig_node: the destination of the message
* @ttvn: the version number that the source of the message is looking for
- * @tt_crc: the CRC associated with the version number
+ * @tt_vlan: pointer to the first tvlv VLAN object to request
+ * @num_vlan: number of tvlv VLAN entries
* @full_table: ask for the entire translation table if true, while only for the
* last TT diff otherwise
*/
static int batadv_send_tt_request(struct batadv_priv *bat_priv,
struct batadv_orig_node *dst_orig_node,
- uint8_t ttvn, uint32_t tt_crc,
- bool full_table)
+ uint8_t ttvn,
+ struct batadv_tvlv_tt_vlan_data *tt_vlan,
+ uint16_t num_vlan, bool full_table)
{
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
- struct batadv_hard_iface *primary_if;
struct batadv_tt_req_node *tt_req_node = NULL;
+ struct batadv_tvlv_tt_vlan_data *tt_vlan_req;
+ struct batadv_hard_iface *primary_if;
bool ret = false;
+ int i, size;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
@@ -1691,13 +2349,26 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
if (!tt_req_node)
goto out;
- tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data), GFP_ATOMIC);
+ size = sizeof(*tvlv_tt_data) + sizeof(*tt_vlan_req) * num_vlan;
+ tvlv_tt_data = kzalloc(size, GFP_ATOMIC);
if (!tvlv_tt_data)
goto out;
tvlv_tt_data->flags = BATADV_TT_REQUEST;
tvlv_tt_data->ttvn = ttvn;
- tvlv_tt_data->crc = htonl(tt_crc);
+ tvlv_tt_data->num_vlan = htons(num_vlan);
+
+ /* send all the CRCs within the request. This is needed by intermediate
+ * nodes to ensure they have the correct table before replying
+ */
+ tt_vlan_req = (struct batadv_tvlv_tt_vlan_data *)(tvlv_tt_data + 1);
+ for (i = 0; i < num_vlan; i++) {
+ tt_vlan_req->vid = tt_vlan->vid;
+ tt_vlan_req->crc = tt_vlan->crc;
+
+ tt_vlan_req++;
+ tt_vlan++;
+ }
if (full_table)
tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE;
@@ -1708,7 +2379,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX);
batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr,
dst_orig_node->orig, BATADV_TVLV_TT, 1,
- tvlv_tt_data, sizeof(*tvlv_tt_data));
+ tvlv_tt_data, size);
ret = true;
out:
@@ -1740,10 +2411,13 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
{
struct batadv_orig_node *req_dst_orig_node;
struct batadv_orig_node *res_dst_orig_node = NULL;
+ struct batadv_tvlv_tt_change *tt_change;
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
- uint8_t orig_ttvn, req_ttvn;
- uint16_t tt_len;
+ struct batadv_tvlv_tt_vlan_data *tt_vlan;
bool ret = false, full_table;
+ uint8_t orig_ttvn, req_ttvn;
+ uint16_t tvlv_len;
+ int32_t tt_len;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n",
@@ -1762,9 +2436,11 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
req_ttvn = tt_data->ttvn;
+ tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1);
/* this node doesn't have the requested data */
if (orig_ttvn != req_ttvn ||
- tt_data->crc != htonl(req_dst_orig_node->tt_crc))
+ !batadv_tt_global_check_crc(req_dst_orig_node, tt_vlan,
+ ntohs(tt_data->num_vlan)))
goto out;
/* If the full table has been explicitly requested */
@@ -1781,26 +2457,43 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
spin_lock_bh(&req_dst_orig_node->tt_buff_lock);
tt_len = req_dst_orig_node->tt_buff_len;
- tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len,
- GFP_ATOMIC);
- if (!tvlv_tt_data)
+ tvlv_len = batadv_tt_prepare_tvlv_global_data(req_dst_orig_node,
+ &tvlv_tt_data,
+ &tt_change,
+ &tt_len);
+ if (!tt_len)
goto unlock;
/* Copy the last orig_node's OGM buffer */
- memcpy(tvlv_tt_data + 1, req_dst_orig_node->tt_buff,
+ memcpy(tt_change, req_dst_orig_node->tt_buff,
req_dst_orig_node->tt_buff_len);
spin_unlock_bh(&req_dst_orig_node->tt_buff_lock);
} else {
- tt_len = (uint16_t)atomic_read(&req_dst_orig_node->tt_size);
- tt_len = batadv_tt_len(tt_len);
-
- tvlv_tt_data = batadv_tt_tvlv_generate(bat_priv,
- bat_priv->tt.global_hash,
- tt_len,
- batadv_tt_global_valid,
- req_dst_orig_node);
- if (!tvlv_tt_data)
+ /* allocate the tvlv, put the tt_data and all the tt_vlan_data
+ * in the initial part
+ */
+ tt_len = -1;
+ tvlv_len = batadv_tt_prepare_tvlv_global_data(req_dst_orig_node,
+ &tvlv_tt_data,
+ &tt_change,
+ &tt_len);
+ if (!tt_len)
goto out;
+
+ /* fill the rest of the tvlv with the real TT entries */
+ batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.global_hash,
+ tt_change, tt_len,
+ batadv_tt_global_valid,
+ req_dst_orig_node);
+ }
+
+ /* Don't send the response, if larger than fragmented packet. */
+ tt_len = sizeof(struct batadv_unicast_tvlv_packet) + tvlv_len;
+ if (tt_len > atomic_read(&bat_priv->packet_size_max)) {
+ net_ratelimited_function(batadv_info, bat_priv->soft_iface,
+ "Ignoring TT_REQUEST from %pM; Response size exceeds max packet size.\n",
+ res_dst_orig_node->orig);
+ goto out;
}
tvlv_tt_data->flags = BATADV_TT_RESPONSE;
@@ -1817,8 +2510,8 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
batadv_tvlv_unicast_send(bat_priv, req_dst_orig_node->orig,
- req_src, BATADV_TVLV_TT, 1,
- tvlv_tt_data, sizeof(*tvlv_tt_data) + tt_len);
+ req_src, BATADV_TVLV_TT, 1, tvlv_tt_data,
+ tvlv_len);
ret = true;
goto out;
@@ -1849,17 +2542,20 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
uint8_t *req_src)
{
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
- struct batadv_orig_node *orig_node;
struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_tvlv_tt_change *tt_change;
+ struct batadv_orig_node *orig_node;
uint8_t my_ttvn, req_ttvn;
+ uint16_t tvlv_len;
bool full_table;
- uint16_t tt_len;
+ int32_t tt_len;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n",
req_src, tt_data->ttvn,
(tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
+ spin_lock_bh(&bat_priv->tt.commit_lock);
my_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
req_ttvn = tt_data->ttvn;
@@ -1886,29 +2582,37 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
*/
if (!full_table) {
spin_lock_bh(&bat_priv->tt.last_changeset_lock);
- tt_len = bat_priv->tt.last_changeset_len;
- tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len,
- GFP_ATOMIC);
- if (!tvlv_tt_data)
+ tt_len = bat_priv->tt.last_changeset_len;
+ tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv,
+ &tvlv_tt_data,
+ &tt_change,
+ &tt_len);
+ if (!tt_len)
goto unlock;
/* Copy the last orig_node's OGM buffer */
- memcpy(tvlv_tt_data + 1, bat_priv->tt.last_changeset,
+ memcpy(tt_change, bat_priv->tt.last_changeset,
bat_priv->tt.last_changeset_len);
spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
} else {
- tt_len = (uint16_t)atomic_read(&bat_priv->tt.local_entry_num);
- tt_len = batadv_tt_len(tt_len);
req_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
- tvlv_tt_data = batadv_tt_tvlv_generate(bat_priv,
- bat_priv->tt.local_hash,
- tt_len,
- batadv_tt_local_valid,
- NULL);
- if (!tvlv_tt_data)
+ /* allocate the tvlv, put the tt_data and all the tt_vlan_data
+ * in the initial part
+ */
+ tt_len = -1;
+ tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv,
+ &tvlv_tt_data,
+ &tt_change,
+ &tt_len);
+ if (!tt_len)
goto out;
+
+ /* fill the rest of the tvlv with the real TT entries */
+ batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.local_hash,
+ tt_change, tt_len,
+ batadv_tt_local_valid, NULL);
}
tvlv_tt_data->flags = BATADV_TT_RESPONSE;
@@ -1924,14 +2628,15 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr,
- req_src, BATADV_TVLV_TT, 1,
- tvlv_tt_data, sizeof(*tvlv_tt_data) + tt_len);
+ req_src, BATADV_TVLV_TT, 1, tvlv_tt_data,
+ tvlv_len);
goto out;
unlock:
spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
out:
+ spin_unlock_bh(&bat_priv->tt.commit_lock);
if (orig_node)
batadv_orig_node_free_ref(orig_node);
if (primary_if)
@@ -1954,16 +2659,11 @@ static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
uint8_t *req_src, uint8_t *req_dst)
{
- if (batadv_is_my_mac(bat_priv, req_dst)) {
- /* don't answer backbone gws! */
- if (batadv_bla_is_backbone_gw_orig(bat_priv, req_src))
- return true;
-
+ if (batadv_is_my_mac(bat_priv, req_dst))
return batadv_send_my_tt_response(bat_priv, tt_data, req_src);
- } else {
+ else
return batadv_send_other_tt_response(bat_priv, tt_data,
req_src, req_dst);
- }
}
static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
@@ -1979,11 +2679,13 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
roams = (tt_change + i)->flags & BATADV_TT_CLIENT_ROAM;
batadv_tt_global_del(bat_priv, orig_node,
(tt_change + i)->addr,
+ ntohs((tt_change + i)->vid),
"tt removed by changes",
roams);
} else {
if (!batadv_tt_global_add(bat_priv, orig_node,
(tt_change + i)->addr,
+ ntohs((tt_change + i)->vid),
(tt_change + i)->flags, ttvn))
/* In case of problem while storing a
* global_entry, we stop the updating
@@ -1998,8 +2700,9 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
}
static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
- struct batadv_tvlv_tt_data *tt_data,
- uint8_t *resp_src, uint16_t num_entries)
+ struct batadv_tvlv_tt_change *tt_change,
+ uint8_t ttvn, uint8_t *resp_src,
+ uint16_t num_entries)
{
struct batadv_orig_node *orig_node;
@@ -2008,11 +2711,11 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
goto out;
/* Purge the old table first.. */
- batadv_tt_global_del_orig(bat_priv, orig_node, "Received full table");
+ batadv_tt_global_del_orig(bat_priv, orig_node, -1,
+ "Received full table");
- _batadv_tt_update_changes(bat_priv, orig_node,
- (struct batadv_tvlv_tt_change *)(tt_data + 1),
- num_entries, tt_data->ttvn);
+ _batadv_tt_update_changes(bat_priv, orig_node, tt_change, num_entries,
+ ttvn);
spin_lock_bh(&orig_node->tt_buff_lock);
kfree(orig_node->tt_buff);
@@ -2020,7 +2723,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
orig_node->tt_buff = NULL;
spin_unlock_bh(&orig_node->tt_buff_lock);
- atomic_set(&orig_node->last_ttvn, tt_data->ttvn);
+ atomic_set(&orig_node->last_ttvn, ttvn);
out:
if (orig_node)
@@ -2040,12 +2743,21 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
atomic_set(&orig_node->last_ttvn, ttvn);
}
-bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr)
+/**
+ * batadv_is_my_client - check if a client is served by the local node
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: the mac adress of the client to check
+ * @vid: VLAN identifier
+ *
+ * Returns true if the client is served by this node, false otherwise.
+ */
+bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr,
+ unsigned short vid)
{
struct batadv_tt_local_entry *tt_local_entry;
bool ret = false;
- tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
+ tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
goto out;
/* Check if the client has been logically deleted (but is kept for
@@ -2075,28 +2787,39 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
struct batadv_tt_req_node *node, *safe;
struct batadv_orig_node *orig_node = NULL;
struct batadv_tvlv_tt_change *tt_change;
+ uint8_t *tvlv_ptr = (uint8_t *)tt_data;
+ uint16_t change_offset;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n",
resp_src, tt_data->ttvn, num_entries,
(tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
- /* we should have never asked a backbone gw */
- if (batadv_bla_is_backbone_gw_orig(bat_priv, resp_src))
- goto out;
-
orig_node = batadv_orig_hash_find(bat_priv, resp_src);
if (!orig_node)
goto out;
+ spin_lock_bh(&orig_node->tt_lock);
+
+ change_offset = sizeof(struct batadv_tvlv_tt_vlan_data);
+ change_offset *= ntohs(tt_data->num_vlan);
+ change_offset += sizeof(*tt_data);
+ tvlv_ptr += change_offset;
+
+ tt_change = (struct batadv_tvlv_tt_change *)tvlv_ptr;
if (tt_data->flags & BATADV_TT_FULL_TABLE) {
- batadv_tt_fill_gtable(bat_priv, tt_data, resp_src, num_entries);
+ batadv_tt_fill_gtable(bat_priv, tt_change, tt_data->ttvn,
+ resp_src, num_entries);
} else {
- tt_change = (struct batadv_tvlv_tt_change *)(tt_data + 1);
batadv_tt_update_changes(bat_priv, orig_node, num_entries,
tt_data->ttvn, tt_change);
}
+ /* Recalculate the CRC for this orig_node and store it */
+ batadv_tt_global_update_crc(bat_priv, orig_node);
+
+ spin_unlock_bh(&orig_node->tt_lock);
+
/* Delete the tt_req_node from pending tt_requests list */
spin_lock_bh(&bat_priv->tt.req_list_lock);
list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
@@ -2105,10 +2828,8 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
list_del(&node->list);
kfree(node);
}
- spin_unlock_bh(&bat_priv->tt.req_list_lock);
- /* Recalculate the CRC for this orig_node and store it */
- orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node);
+ spin_unlock_bh(&bat_priv->tt.req_list_lock);
out:
if (orig_node)
batadv_orig_node_free_ref(orig_node);
@@ -2194,7 +2915,20 @@ unlock:
return ret;
}
+/**
+ * batadv_send_roam_adv - send a roaming advertisement message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @client: mac address of the roaming client
+ * @vid: VLAN identifier
+ * @orig_node: message destination
+ *
+ * Send a ROAMING_ADV message to the node which was previously serving this
+ * client. This is done to inform the node that from now on all traffic destined
+ * for this particular roamed client has to be forwarded to the sender of the
+ * roaming message.
+ */
static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
+ unsigned short vid,
struct batadv_orig_node *orig_node)
{
struct batadv_hard_iface *primary_if;
@@ -2211,13 +2945,13 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
goto out;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Sending ROAMING_ADV to %pM (client %pM)\n",
- orig_node->orig, client);
+ "Sending ROAMING_ADV to %pM (client %pM, vid: %d)\n",
+ orig_node->orig, client, BATADV_PRINT_VID(vid));
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX);
memcpy(tvlv_roam.client, client, sizeof(tvlv_roam.client));
- tvlv_roam.reserved = 0;
+ tvlv_roam.vid = htons(vid);
batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr,
orig_node->orig, BATADV_TVLV_ROAM, 1,
@@ -2238,7 +2972,7 @@ static void batadv_tt_purge(struct work_struct *work)
priv_tt = container_of(delayed_work, struct batadv_priv_tt, work);
bat_priv = container_of(priv_tt, struct batadv_priv, tt);
- batadv_tt_local_purge(bat_priv);
+ batadv_tt_local_purge(bat_priv, BATADV_TT_LOCAL_TIMEOUT);
batadv_tt_global_purge(bat_priv);
batadv_tt_req_purge(bat_priv);
batadv_tt_roam_purge(bat_priv);
@@ -2263,19 +2997,25 @@ void batadv_tt_free(struct batadv_priv *bat_priv)
kfree(bat_priv->tt.last_changeset);
}
-/* This function will enable or disable the specified flags for all the entries
- * in the given hash table and returns the number of modified entries
+/**
+ * batadv_tt_local_set_flags - set or unset the specified flags on the local
+ * table and possibly count them in the TT size
+ * @bat_priv: the bat priv with all the soft interface information
+ * @flags: the flag to switch
+ * @enable: whether to set or unset the flag
+ * @count: whether to increase the TT size by the number of changed entries
*/
-static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash,
- uint16_t flags, bool enable)
+static void batadv_tt_local_set_flags(struct batadv_priv *bat_priv,
+ uint16_t flags, bool enable, bool count)
{
- uint32_t i;
+ struct batadv_hashtable *hash = bat_priv->tt.local_hash;
+ struct batadv_tt_common_entry *tt_common_entry;
uint16_t changed_num = 0;
struct hlist_head *head;
- struct batadv_tt_common_entry *tt_common_entry;
+ uint32_t i;
if (!hash)
- goto out;
+ return;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -2293,11 +3033,15 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash,
tt_common_entry->flags &= ~flags;
}
changed_num++;
+
+ if (!count)
+ continue;
+
+ batadv_tt_local_size_inc(bat_priv,
+ tt_common_entry->vid);
}
rcu_read_unlock();
}
-out:
- return changed_num;
}
/* Purge out all the tt local entries marked with BATADV_TT_CLIENT_PENDING */
@@ -2325,10 +3069,11 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
continue;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Deleting local tt entry (%pM): pending\n",
- tt_common->addr);
+ "Deleting local tt entry (%pM, vid: %d): pending\n",
+ tt_common->addr,
+ BATADV_PRINT_VID(tt_common->vid));
- atomic_dec(&bat_priv->tt.local_entry_num);
+ batadv_tt_local_size_dec(bat_priv, tt_common->vid);
hlist_del_rcu(&tt_common->hash_entry);
tt_local = container_of(tt_common,
struct batadv_tt_local_entry,
@@ -2340,27 +3085,24 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
}
/**
- * batadv_tt_local_commit_changes - commit all pending local tt changes which
- * have been queued in the time since the last commit
+ * batadv_tt_local_commit_changes_nolock - commit all pending local tt changes
+ * which have been queued in the time since the last commit
* @bat_priv: the bat priv with all the soft interface information
+ *
+ * Caller must hold tt->commit_lock.
*/
-void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
+static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
{
- uint16_t changed_num = 0;
-
if (atomic_read(&bat_priv->tt.local_changes) < 1) {
if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))
batadv_tt_tvlv_container_update(bat_priv);
return;
}
- changed_num = batadv_tt_set_flags(bat_priv->tt.local_hash,
- BATADV_TT_CLIENT_NEW, false);
+ batadv_tt_local_set_flags(bat_priv, BATADV_TT_CLIENT_NEW, false, true);
- /* all reset entries have to be counted as local entries */
- atomic_add(changed_num, &bat_priv->tt.local_entry_num);
batadv_tt_local_purge_pending_clients(bat_priv);
- bat_priv->tt.local_crc = batadv_tt_local_crc(bat_priv);
+ batadv_tt_local_update_crc(bat_priv);
/* Increment the TTVN only once per OGM interval */
atomic_inc(&bat_priv->tt.vn);
@@ -2373,21 +3115,35 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
batadv_tt_tvlv_container_update(bat_priv);
}
+/**
+ * batadv_tt_local_commit_changes - commit all pending local tt changes which
+ * have been queued in the time since the last commit
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
+{
+ spin_lock_bh(&bat_priv->tt.commit_lock);
+ batadv_tt_local_commit_changes_nolock(bat_priv);
+ spin_unlock_bh(&bat_priv->tt.commit_lock);
+}
+
bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src,
- uint8_t *dst)
+ uint8_t *dst, unsigned short vid)
{
struct batadv_tt_local_entry *tt_local_entry = NULL;
struct batadv_tt_global_entry *tt_global_entry = NULL;
+ struct batadv_softif_vlan *vlan;
bool ret = false;
- if (!atomic_read(&bat_priv->ap_isolation))
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (!vlan || !atomic_read(&vlan->ap_isolation))
goto out;
- tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst);
+ tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid);
if (!tt_local_entry)
goto out;
- tt_global_entry = batadv_tt_global_hash_find(bat_priv, src);
+ tt_global_entry = batadv_tt_global_hash_find(bat_priv, src, vid);
if (!tt_global_entry)
goto out;
@@ -2397,6 +3153,8 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src,
ret = true;
out:
+ if (vlan)
+ batadv_softif_vlan_free_ref(vlan);
if (tt_global_entry)
batadv_tt_global_entry_free_ref(tt_global_entry);
if (tt_local_entry)
@@ -2409,25 +3167,24 @@ out:
* information received via ogms
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
- * @tt_buff: buffer holding the tt information
+ * @tt_vlan: pointer to the first tvlv VLAN entry
+ * @tt_num_vlan: number of tvlv VLAN entries
+ * @tt_change: pointer to the first entry in the TT buffer
* @tt_num_changes: number of tt changes inside the tt buffer
* @ttvn: translation table version number of this changeset
* @tt_crc: crc32 checksum of orig node's translation table
*/
static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const unsigned char *tt_buff,
- uint16_t tt_num_changes, uint8_t ttvn,
- uint32_t tt_crc)
+ const void *tt_buff, uint16_t tt_num_vlan,
+ struct batadv_tvlv_tt_change *tt_change,
+ uint16_t tt_num_changes, uint8_t ttvn)
{
uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
+ struct batadv_tvlv_tt_vlan_data *tt_vlan;
bool full_table = true;
- struct batadv_tvlv_tt_change *tt_change;
-
- /* don't care about a backbone gateways updates. */
- if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig))
- return;
+ tt_vlan = (struct batadv_tvlv_tt_vlan_data *)tt_buff;
/* orig table not initialised AND first diff is in the OGM OR the ttvn
* increased by one -> we can apply the attached changes
*/
@@ -2443,6 +3200,8 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
goto request_table;
}
+ spin_lock_bh(&orig_node->tt_lock);
+
tt_change = (struct batadv_tvlv_tt_change *)tt_buff;
batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes,
ttvn, tt_change);
@@ -2451,7 +3210,9 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
* prefer to recompute it to spot any possible inconsistency
* in the global table
*/
- orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node);
+ batadv_tt_global_update_crc(bat_priv, orig_node);
+
+ spin_unlock_bh(&orig_node->tt_lock);
/* The ttvn alone is not enough to guarantee consistency
* because a single value could represent different states
@@ -2462,37 +3223,46 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
* checking the CRC value is mandatory to detect the
* inconsistency
*/
- if (orig_node->tt_crc != tt_crc)
+ if (!batadv_tt_global_check_crc(orig_node, tt_vlan,
+ tt_num_vlan))
goto request_table;
} else {
/* if we missed more than one change or our tables are not
* in sync anymore -> request fresh tt data
*/
if (!orig_node->tt_initialised || ttvn != orig_ttvn ||
- orig_node->tt_crc != tt_crc) {
+ !batadv_tt_global_check_crc(orig_node, tt_vlan,
+ tt_num_vlan)) {
request_table:
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.8x last_crc: %#.8x num_changes: %u)\n",
- orig_node->orig, ttvn, orig_ttvn, tt_crc,
- orig_node->tt_crc, tt_num_changes);
+ "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u num_changes: %u)\n",
+ orig_node->orig, ttvn, orig_ttvn,
+ tt_num_changes);
batadv_send_tt_request(bat_priv, orig_node, ttvn,
- tt_crc, full_table);
+ tt_vlan, tt_num_vlan,
+ full_table);
return;
}
}
}
-/* returns true whether we know that the client has moved from its old
- * originator to another one. This entry is kept is still kept for consistency
- * purposes
+/**
+ * batadv_tt_global_client_is_roaming - check if a client is marked as roaming
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: the mac address of the client to check
+ * @vid: VLAN identifier
+ *
+ * Returns true if we know that the client has moved from its old originator
+ * to another one. This entry is still kept for consistency purposes and will be
+ * deleted later by a DEL or because of timeout
*/
bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
- uint8_t *addr)
+ uint8_t *addr, unsigned short vid)
{
struct batadv_tt_global_entry *tt_global_entry;
bool ret = false;
- tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr);
+ tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid);
if (!tt_global_entry)
goto out;
@@ -2505,19 +3275,20 @@ out:
/**
* batadv_tt_local_client_is_roaming - tells whether the client is roaming
* @bat_priv: the bat priv with all the soft interface information
- * @addr: the MAC address of the local client to query
+ * @addr: the mac address of the local client to query
+ * @vid: VLAN identifier
*
* Returns true if the local client is known to be roaming (it is not served by
* this node anymore) or not. If yes, the client is still present in the table
* to keep the latter consistent with the node TTVN
*/
bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
- uint8_t *addr)
+ uint8_t *addr, unsigned short vid)
{
struct batadv_tt_local_entry *tt_local_entry;
bool ret = false;
- tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
+ tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
goto out;
@@ -2529,31 +3300,66 @@ out:
bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const unsigned char *addr)
+ const unsigned char *addr,
+ unsigned short vid)
{
bool ret = false;
- /* if the originator is a backbone node (meaning it belongs to the same
- * LAN of this node) the temporary client must not be added because to
- * reach such destination the node must use the LAN instead of the mesh
- */
- if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig))
- goto out;
-
- if (!batadv_tt_global_add(bat_priv, orig_node, addr,
+ if (!batadv_tt_global_add(bat_priv, orig_node, addr, vid,
BATADV_TT_CLIENT_TEMP,
atomic_read(&orig_node->last_ttvn)))
goto out;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Added temporary global client (addr: %pM orig: %pM)\n",
- addr, orig_node->orig);
+ "Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n",
+ addr, BATADV_PRINT_VID(vid), orig_node->orig);
ret = true;
out:
return ret;
}
/**
+ * batadv_tt_local_resize_to_mtu - resize the local translation table fit the
+ * maximum packet size that can be transported through the mesh
+ * @soft_iface: netdev struct of the mesh interface
+ *
+ * Remove entries older than 'timeout' and half timeout if more entries need
+ * to be removed.
+ */
+void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
+{
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ int packet_size_max = atomic_read(&bat_priv->packet_size_max);
+ int table_size, timeout = BATADV_TT_LOCAL_TIMEOUT / 2;
+ bool reduced = false;
+
+ spin_lock_bh(&bat_priv->tt.commit_lock);
+
+ while (true) {
+ table_size = batadv_tt_local_table_transmit_size(bat_priv);
+ if (packet_size_max >= table_size)
+ break;
+
+ batadv_tt_local_purge(bat_priv, timeout);
+ batadv_tt_local_purge_pending_clients(bat_priv);
+
+ timeout /= 2;
+ reduced = true;
+ net_ratelimited_function(batadv_info, soft_iface,
+ "Forced to purge local tt entries to fit new maximum fragment MTU (%i)\n",
+ packet_size_max);
+ }
+
+ /* commit these changes immediately, to avoid synchronization problem
+ * with the TTVN
+ */
+ if (reduced)
+ batadv_tt_local_commit_changes_nolock(bat_priv);
+
+ spin_unlock_bh(&bat_priv->tt.commit_lock);
+}
+
+/**
* batadv_tt_tvlv_ogm_handler_v1 - process incoming tt tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
@@ -2563,12 +3369,13 @@ out:
*/
static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t flags,
- void *tvlv_value,
+ uint8_t flags, void *tvlv_value,
uint16_t tvlv_value_len)
{
+ struct batadv_tvlv_tt_vlan_data *tt_vlan;
+ struct batadv_tvlv_tt_change *tt_change;
struct batadv_tvlv_tt_data *tt_data;
- uint16_t num_entries;
+ uint16_t num_entries, num_vlan;
if (tvlv_value_len < sizeof(*tt_data))
return;
@@ -2576,11 +3383,19 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
+ num_vlan = ntohs(tt_data->num_vlan);
+
+ if (tvlv_value_len < sizeof(*tt_vlan) * num_vlan)
+ return;
+
+ tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1);
+ tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan);
+ tvlv_value_len -= sizeof(*tt_vlan) * num_vlan;
+
num_entries = batadv_tt_entries(tvlv_value_len);
- batadv_tt_update_orig(bat_priv, orig,
- (unsigned char *)(tt_data + 1),
- num_entries, tt_data->ttvn, ntohl(tt_data->crc));
+ batadv_tt_update_orig(bat_priv, orig, tt_vlan, num_vlan, tt_change,
+ num_entries, tt_data->ttvn);
}
/**
@@ -2601,7 +3416,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
uint16_t tvlv_value_len)
{
struct batadv_tvlv_tt_data *tt_data;
- uint16_t num_entries;
+ uint16_t tt_vlan_len, tt_num_entries;
char tt_flag;
bool ret;
@@ -2611,7 +3426,14 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
- num_entries = batadv_tt_entries(tvlv_value_len);
+ tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data);
+ tt_vlan_len *= ntohs(tt_data->num_vlan);
+
+ if (tvlv_value_len < tt_vlan_len)
+ return NET_RX_SUCCESS;
+
+ tvlv_value_len -= tt_vlan_len;
+ tt_num_entries = batadv_tt_entries(tvlv_value_len);
switch (tt_data->flags & BATADV_TT_DATA_TYPE_MASK) {
case BATADV_TT_REQUEST:
@@ -2639,7 +3461,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
if (batadv_is_my_mac(bat_priv, dst)) {
batadv_handle_tt_response(bat_priv, tt_data,
- src, num_entries);
+ src, tt_num_entries);
return NET_RX_SUCCESS;
}
@@ -2684,13 +3506,6 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
if (!batadv_is_my_mac(bat_priv, dst))
return NET_RX_DROP;
- /* check if it is a backbone gateway. we don't accept
- * roaming advertisement from it, as it has the same
- * entries as we have.
- */
- if (batadv_bla_is_backbone_gw_orig(bat_priv, src))
- goto out;
-
if (tvlv_value_len < sizeof(*roaming_adv))
goto out;
@@ -2706,7 +3521,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
src, roaming_adv->client);
batadv_tt_global_add(bat_priv, orig_node, roaming_adv->client,
- BATADV_TT_CLIENT_ROAM,
+ ntohs(roaming_adv->vid), BATADV_TT_CLIENT_ROAM,
atomic_read(&orig_node->last_ttvn) + 1);
out:
@@ -2725,6 +3540,9 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
{
int ret;
+ /* synchronized flags must be remote */
+ BUILD_BUG_ON(!(BATADV_TT_SYNC_MASK & BATADV_TT_REMOTE_MASK));
+
ret = batadv_tt_local_init(bat_priv);
if (ret < 0)
return ret;
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 015d8b9e63b9..026b1ffa6746 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -21,30 +21,34 @@
#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
int batadv_tt_init(struct batadv_priv *bat_priv);
-void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
- int ifindex);
+bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
+ unsigned short vid, int ifindex);
uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
- const uint8_t *addr, const char *message,
- bool roaming);
+ const uint8_t *addr, unsigned short vid,
+ const char *message, bool roaming);
int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const char *message);
+ int32_t match_vid, const char *message);
struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
const uint8_t *src,
- const uint8_t *addr);
+ const uint8_t *addr,
+ unsigned short vid);
void batadv_tt_free(struct batadv_priv *bat_priv);
-bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr);
+bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr,
+ unsigned short vid);
bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src,
- uint8_t *dst);
+ uint8_t *dst, unsigned short vid);
void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv);
bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
- uint8_t *addr);
+ uint8_t *addr, unsigned short vid);
bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
- uint8_t *addr);
+ uint8_t *addr, unsigned short vid);
+void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface);
bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const unsigned char *addr);
+ const unsigned char *addr,
+ unsigned short vid);
#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 5cbb0d09a9b5..91dd369b0ff2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -36,6 +36,18 @@
#endif /* CONFIG_BATMAN_ADV_DAT */
/**
+ * BATADV_TT_REMOTE_MASK - bitmask selecting the flags that are sent over the
+ * wire only
+ */
+#define BATADV_TT_REMOTE_MASK 0x00FF
+
+/**
+ * BATADV_TT_SYNC_MASK - bitmask of the flags that need to be kept in sync
+ * among the nodes. These flags are used to compute the global/local CRC
+ */
+#define BATADV_TT_SYNC_MASK 0x00F0
+
+/**
* struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data
* @ogm_buff: buffer holding the OGM packet
* @ogm_buff_len: length of the OGM packet buffer
@@ -107,27 +119,69 @@ struct batadv_frag_list_entry {
};
/**
+ * struct batadv_vlan_tt - VLAN specific TT attributes
+ * @crc: CRC32 checksum of the entries belonging to this vlan
+ * @num_entries: number of TT entries for this VLAN
+ */
+struct batadv_vlan_tt {
+ uint32_t crc;
+ atomic_t num_entries;
+};
+
+/**
+ * batadv_orig_node_vlan - VLAN specific data per orig_node
+ * @vid: the VLAN identifier
+ * @tt: VLAN specific TT attributes
+ * @list: list node for orig_node::vlan_list
+ * @refcount: number of context where this object is currently in use
+ * @rcu: struct used for freeing in a RCU-safe manner
+ */
+struct batadv_orig_node_vlan {
+ unsigned short vid;
+ struct batadv_vlan_tt tt;
+ struct list_head list;
+ atomic_t refcount;
+ struct rcu_head rcu;
+};
+
+/**
+ * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members
+ * @bcast_own: bitfield containing the number of our OGMs this orig_node
+ * rebroadcasted "back" to us (relative to last_real_seqno)
+ * @bcast_own_sum: counted result of bcast_own
+ * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
+ * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
+ */
+struct batadv_orig_bat_iv {
+ unsigned long *bcast_own;
+ uint8_t *bcast_own_sum;
+ /* ogm_cnt_lock protects: bcast_own, bcast_own_sum,
+ * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
+ */
+ spinlock_t ogm_cnt_lock;
+};
+
+/**
* struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
* @orig: originator ethernet address
* @primary_addr: hosts primary interface address
* @router: router that should be used to reach this originator
* @batadv_dat_addr_t: address of the orig node in the distributed hash
- * @bcast_own: bitfield containing the number of our OGMs this orig_node
- * rebroadcasted "back" to us (relative to last_real_seqno)
- * @bcast_own_sum: counted result of bcast_own
* @last_seen: time when last packet from this node was received
* @bcast_seqno_reset: time when the broadcast seqno window was reset
* @batman_seqno_reset: time when the batman seqno window was reset
* @capabilities: announced capabilities of this originator
* @last_ttvn: last seen translation table version number
- * @tt_crc: CRC of the translation table
* @tt_buff: last tt changeset this node received from the orig node
* @tt_buff_len: length of the last tt changeset this node received from the
* orig node
* @tt_buff_lock: lock that protects tt_buff and tt_buff_len
- * @tt_size: number of global TT entries announced by the orig node
* @tt_initialised: bool keeping track of whether or not this node have received
* any translation table information from the orig node yet
+ * @tt_lock: prevents from updating the table while reading it. Table update is
+ * made up by two operations (data structure update and metdata -CRC/TTVN-
+ * recalculation) and they have to be executed atomically in order to avoid
+ * another thread to read the table/metadata between those.
* @last_real_seqno: last and best known sequence number
* @last_ttl: ttl of last received packet
* @bcast_bits: bitfield containing the info which payload broadcast originated
@@ -138,8 +192,6 @@ struct batadv_frag_list_entry {
* @neigh_list_lock: lock protecting neigh_list, router and bonding_list
* @hash_entry: hlist node for batadv_priv::orig_hash
* @bat_priv: pointer to soft_iface this orig node belongs to
- * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
- * neigh_node->real_bits & neigh_node->real_packet_count
* @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno
* @bond_candidates: how many candidates are available
* @bond_list: list of bonding candidates
@@ -150,6 +202,10 @@ struct batadv_frag_list_entry {
* @in_coding_list_lock: protects in_coding_list
* @out_coding_list_lock: protects out_coding_list
* @fragments: array with heads for fragment chains
+ * @vlan_list: a list of orig_node_vlan structs, one per VLAN served by the
+ * originator represented by this object
+ * @vlan_list_lock: lock protecting vlan_list
+ * @bat_iv: B.A.T.M.A.N. IV private structure
*/
struct batadv_orig_node {
uint8_t orig[ETH_ALEN];
@@ -158,19 +214,17 @@ struct batadv_orig_node {
#ifdef CONFIG_BATMAN_ADV_DAT
batadv_dat_addr_t dat_addr;
#endif
- unsigned long *bcast_own;
- uint8_t *bcast_own_sum;
unsigned long last_seen;
unsigned long bcast_seqno_reset;
unsigned long batman_seqno_reset;
uint8_t capabilities;
atomic_t last_ttvn;
- uint32_t tt_crc;
unsigned char *tt_buff;
int16_t tt_buff_len;
spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */
- atomic_t tt_size;
bool tt_initialised;
+ /* prevents from changing the table while reading it */
+ spinlock_t tt_lock;
uint32_t last_real_seqno;
uint8_t last_ttl;
DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
@@ -180,10 +234,6 @@ struct batadv_orig_node {
spinlock_t neigh_list_lock;
struct hlist_node hash_entry;
struct batadv_priv *bat_priv;
- /* ogm_cnt_lock protects: bcast_own, bcast_own_sum,
- * neigh_node->real_bits & neigh_node->real_packet_count
- */
- spinlock_t ogm_cnt_lock;
/* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */
spinlock_t bcast_seqno_lock;
atomic_t bond_candidates;
@@ -197,6 +247,9 @@ struct batadv_orig_node {
spinlock_t out_coding_list_lock; /* Protects out_coding_list */
#endif
struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT];
+ struct list_head vlan_list;
+ spinlock_t vlan_list_lock; /* protects vlan_list */
+ struct batadv_orig_bat_iv bat_iv;
};
/**
@@ -230,40 +283,49 @@ struct batadv_gw_node {
};
/**
- * struct batadv_neigh_node - structure for single hop neighbors
- * @list: list node for batadv_orig_node::neigh_list
- * @addr: mac address of neigh node
+ * struct batadv_neigh_bat_iv - B.A.T.M.A.N. IV specific structure for single
+ * hop neighbors
* @tq_recv: ring buffer of received TQ values from this neigh node
* @tq_index: ring buffer index
* @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
- * @last_ttl: last received ttl from this neigh node
- * @bonding_list: list node for batadv_orig_node::bond_list
- * @last_seen: when last packet via this neighbor was received
* @real_bits: bitfield containing the number of OGMs received from this neigh
* node (relative to orig_node->last_real_seqno)
* @real_packet_count: counted result of real_bits
+ * @lq_update_lock: lock protecting tq_recv & tq_index
+ */
+struct batadv_neigh_bat_iv {
+ uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE];
+ uint8_t tq_index;
+ uint8_t tq_avg;
+ DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
+ uint8_t real_packet_count;
+ spinlock_t lq_update_lock; /* protects tq_recv & tq_index */
+};
+
+/**
+ * struct batadv_neigh_node - structure for single hops neighbors
+ * @list: list node for batadv_orig_node::neigh_list
* @orig_node: pointer to corresponding orig_node
+ * @addr: the MAC address of the neighboring interface
* @if_incoming: pointer to incoming hard interface
- * @lq_update_lock: lock protecting tq_recv & tq_index
+ * @last_seen: when last packet via this neighbor was received
+ * @last_ttl: last received ttl from this neigh node
+ * @bonding_list: list node for batadv_orig_node::bond_list
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
+ * @bat_iv: B.A.T.M.A.N. IV private structure
*/
struct batadv_neigh_node {
struct hlist_node list;
+ struct batadv_orig_node *orig_node;
uint8_t addr[ETH_ALEN];
- uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE];
- uint8_t tq_index;
- uint8_t tq_avg;
+ struct batadv_hard_iface *if_incoming;
+ unsigned long last_seen;
uint8_t last_ttl;
struct list_head bonding_list;
- unsigned long last_seen;
- DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
- uint8_t real_packet_count;
- struct batadv_orig_node *orig_node;
- struct batadv_hard_iface *if_incoming;
- spinlock_t lq_update_lock; /* protects tq_recv & tq_index */
atomic_t refcount;
struct rcu_head rcu;
+ struct batadv_neigh_bat_iv bat_iv;
};
/**
@@ -383,11 +445,14 @@ enum batadv_counters {
* @changes_list_lock: lock protecting changes_list
* @req_list_lock: lock protecting req_list
* @roam_list_lock: lock protecting roam_list
- * @local_entry_num: number of entries in the local hash table
- * @local_crc: Checksum of the local table, recomputed before sending a new OGM
* @last_changeset: last tt changeset this host has generated
* @last_changeset_len: length of last tt changeset this host has generated
* @last_changeset_lock: lock protecting last_changeset & last_changeset_len
+ * @commit_lock: prevents from executing a local TT commit while reading the
+ * local table. The local TT commit is made up by two operations (data
+ * structure update and metdata -CRC/TTVN- recalculation) and they have to be
+ * executed atomically in order to avoid another thread to read the
+ * table/metadata between those.
* @work: work queue callback item for translation table purging
*/
struct batadv_priv_tt {
@@ -402,12 +467,12 @@ struct batadv_priv_tt {
spinlock_t changes_list_lock; /* protects changes */
spinlock_t req_list_lock; /* protects req_list */
spinlock_t roam_list_lock; /* protects roam_list */
- atomic_t local_entry_num;
- uint32_t local_crc;
unsigned char *last_changeset;
int16_t last_changeset_len;
/* protects last_changeset & last_changeset_len */
spinlock_t last_changeset_lock;
+ /* prevents from executing a commit while reading the table */
+ spinlock_t commit_lock;
struct delayed_work work;
};
@@ -531,6 +596,26 @@ struct batadv_priv_nc {
};
/**
+ * struct batadv_softif_vlan - per VLAN attributes set
+ * @vid: VLAN identifier
+ * @kobj: kobject for sysfs vlan subdirectory
+ * @ap_isolation: AP isolation state
+ * @tt: TT private attributes (VLAN specific)
+ * @list: list node for bat_priv::softif_vlan_list
+ * @refcount: number of context where this object is currently in use
+ * @rcu: struct used for freeing in a RCU-safe manner
+ */
+struct batadv_softif_vlan {
+ unsigned short vid;
+ struct kobject *kobj;
+ atomic_t ap_isolation; /* boolean */
+ struct batadv_vlan_tt tt;
+ struct hlist_node list;
+ atomic_t refcount;
+ struct rcu_head rcu;
+};
+
+/**
* struct batadv_priv - per mesh interface data
* @mesh_state: current status of the mesh (inactive/active/deactivating)
* @soft_iface: net device which holds this struct as private data
@@ -539,8 +624,9 @@ struct batadv_priv_nc {
* @aggregated_ogms: bool indicating whether OGM aggregation is enabled
* @bonding: bool indicating whether traffic bonding is enabled
* @fragmentation: bool indicating whether traffic fragmentation is enabled
+ * @packet_size_max: max packet size that can be transmitted via
+ * multiple fragmented skbs or a single frame if fragmentation is disabled
* @frag_seqno: incremental counter to identify chains of egress fragments
- * @ap_isolation: bool indicating whether ap isolation is enabled
* @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is
* enabled
* @distributed_arp_table: bool indicating whether distributed ARP table is
@@ -566,6 +652,9 @@ struct batadv_priv_nc {
* @primary_if: one of the hard interfaces assigned to this mesh interface
* becomes the primary interface
* @bat_algo_ops: routing algorithm used by this mesh interface
+ * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
+ * of the mesh interface represented by this object
+ * @softif_vlan_list_lock: lock protecting softif_vlan_list
* @bla: bridge loope avoidance data
* @debug_log: holding debug logging relevant data
* @gw: gateway data
@@ -583,8 +672,8 @@ struct batadv_priv {
atomic_t aggregated_ogms;
atomic_t bonding;
atomic_t fragmentation;
+ atomic_t packet_size_max;
atomic_t frag_seqno;
- atomic_t ap_isolation;
#ifdef CONFIG_BATMAN_ADV_BLA
atomic_t bridge_loop_avoidance;
#endif
@@ -613,6 +702,8 @@ struct batadv_priv {
struct work_struct cleanup_work;
struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */
struct batadv_algo_ops *bat_algo_ops;
+ struct hlist_head softif_vlan_list;
+ spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */
#ifdef CONFIG_BATMAN_ADV_BLA
struct batadv_priv_bla bla;
#endif
@@ -658,7 +749,7 @@ struct batadv_socket_client {
struct batadv_socket_packet {
struct list_head list;
size_t icmp_len;
- struct batadv_icmp_packet_rr icmp_packet;
+ uint8_t icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE];
};
/**
@@ -715,6 +806,7 @@ struct batadv_bla_claim {
/**
* struct batadv_tt_common_entry - tt local & tt global common data
* @addr: mac address of non-mesh client
+ * @vid: VLAN identifier
* @hash_entry: hlist node for batadv_priv_tt::local_hash or for
* batadv_priv_tt::global_hash
* @flags: various state handling flags (see batadv_tt_client_flags)
@@ -724,6 +816,7 @@ struct batadv_bla_claim {
*/
struct batadv_tt_common_entry {
uint8_t addr[ETH_ALEN];
+ unsigned short vid;
struct hlist_node hash_entry;
uint16_t flags;
unsigned long added_at;
@@ -914,6 +1007,16 @@ struct batadv_forw_packet {
* @bat_primary_iface_set: called when primary interface is selected / changed
* @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
* @bat_ogm_emit: send scheduled OGM
+ * @bat_neigh_cmp: compare the metrics of two neighbors
+ * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or
+ * better than neigh2 from the metric prospective
+ * @bat_orig_print: print the originator table (optional)
+ * @bat_orig_free: free the resources allocated by the routing algorithm for an
+ * orig_node object
+ * @bat_orig_add_if: ask the routing algorithm to apply the needed changes to
+ * the orig_node due to a new hard-interface being added into the mesh
+ * @bat_orig_del_if: ask the routing algorithm to apply the needed changes to
+ * the orig_node due to an hard-interface being removed from the mesh
*/
struct batadv_algo_ops {
struct hlist_node list;
@@ -924,6 +1027,17 @@ struct batadv_algo_ops {
void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
+ int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
+ struct batadv_neigh_node *neigh2);
+ bool (*bat_neigh_is_equiv_or_better)(struct batadv_neigh_node *neigh1,
+ struct batadv_neigh_node *neigh2);
+ /* orig_node handling API */
+ void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq);
+ void (*bat_orig_free)(struct batadv_orig_node *orig_node);
+ int (*bat_orig_add_if)(struct batadv_orig_node *orig_node,
+ int max_if_num);
+ int (*bat_orig_del_if)(struct batadv_orig_node *orig_node,
+ int max_if_num, int del_if_num);
};
/**
@@ -931,6 +1045,7 @@ struct batadv_algo_ops {
* is used to stored ARP entries needed for the global DAT cache
* @ip: the IPv4 corresponding to this DAT/ARP entry
* @mac_addr: the MAC address associated to the stored IPv4
+ * @vid: the vlan ID associated to this entry
* @last_update: time in jiffies when this entry was refreshed last time
* @hash_entry: hlist node for batadv_priv_dat::hash
* @refcount: number of contexts the object is used
@@ -939,6 +1054,7 @@ struct batadv_algo_ops {
struct batadv_dat_entry {
__be32 ip;
uint8_t mac_addr[ETH_ALEN];
+ unsigned short vid;
unsigned long last_update;
struct hlist_node hash_entry;
atomic_t refcount;
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 9e6cc3553105..ab5241400cf7 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -182,7 +182,7 @@ struct hidp_session {
};
/* HIDP init defines */
-extern int __init hidp_init_sockets(void);
-extern void __exit hidp_cleanup_sockets(void);
+int __init hidp_init_sockets(void);
+void __exit hidp_cleanup_sockets(void);
#endif /* __HIDP_H */
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ca04163635da..e6b7fecb3af1 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -64,7 +64,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
br_flood_deliver(br, skb, false);
goto out;
}
- if (br_multicast_rcv(br, NULL, skb)) {
+ if (br_multicast_rcv(br, NULL, skb, vid)) {
kfree_skb(skb);
goto out;
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ffd5874f2592..33e8f23acddd 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -700,7 +700,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
vid = nla_get_u16(tb[NDA_VLAN]);
- if (vid >= VLAN_N_VID) {
+ if (!vid || vid >= VLAN_VID_MASK) {
pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
vid);
return -EINVAL;
@@ -794,7 +794,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
vid = nla_get_u16(tb[NDA_VLAN]);
- if (vid >= VLAN_N_VID) {
+ if (!vid || vid >= VLAN_VID_MASK) {
pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
vid);
return -EINVAL;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index a2fd37ec35f7..7e73c32e205d 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
- br_multicast_rcv(br, p, skb))
+ br_multicast_rcv(br, p, skb, vid))
goto drop;
if (p->state == BR_STATE_LEARNING)
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 85a09bb5ca51..b7b1914dfa25 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -453,7 +453,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
call_rcu_bh(&p->rcu, br_multicast_free_pg);
err = 0;
- if (!mp->ports && !mp->mglist && mp->timer_armed &&
+ if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
break;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 005d876dd86c..4c214b2b88ef 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -272,7 +272,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
del_timer(&p->timer);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- if (!mp->ports && !mp->mglist && mp->timer_armed &&
+ if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
@@ -620,7 +620,6 @@ rehash:
mp->br = br;
mp->addr = *group;
-
setup_timer(&mp->timer, br_multicast_group_expired,
(unsigned long)mp);
@@ -660,6 +659,7 @@ static int br_multicast_add_group(struct net_bridge *br,
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ unsigned long now = jiffies;
int err;
spin_lock(&br->multicast_lock);
@@ -674,6 +674,7 @@ static int br_multicast_add_group(struct net_bridge *br,
if (!port) {
mp->mglist = true;
+ mod_timer(&mp->timer, now + br->multicast_membership_interval);
goto out;
}
@@ -681,7 +682,7 @@ static int br_multicast_add_group(struct net_bridge *br,
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->port == port)
- goto out;
+ goto found;
if ((unsigned long)p->port < (unsigned long)port)
break;
}
@@ -692,6 +693,8 @@ static int br_multicast_add_group(struct net_bridge *br,
rcu_assign_pointer(*pp, p);
br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
+found:
+ mod_timer(&p->timer, now + br->multicast_membership_interval);
out:
err = 0;
@@ -944,7 +947,8 @@ void br_multicast_disable_port(struct net_bridge_port *port)
static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
struct net_bridge_port *port,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u16 vid)
{
struct igmpv3_report *ih;
struct igmpv3_grec *grec;
@@ -954,12 +958,10 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
int type;
int err = 0;
__be32 group;
- u16 vid = 0;
if (!pskb_may_pull(skb, sizeof(*ih)))
return -EINVAL;
- br_vlan_get_tag(skb, &vid);
ih = igmpv3_report_hdr(skb);
num = ntohs(ih->ngrec);
len = sizeof(*ih);
@@ -1002,7 +1004,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
#if IS_ENABLED(CONFIG_IPV6)
static int br_ip6_multicast_mld2_report(struct net_bridge *br,
struct net_bridge_port *port,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u16 vid)
{
struct icmp6hdr *icmp6h;
struct mld2_grec *grec;
@@ -1010,12 +1013,10 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
int len;
int num;
int err = 0;
- u16 vid = 0;
if (!pskb_may_pull(skb, sizeof(*icmp6h)))
return -EINVAL;
- br_vlan_get_tag(skb, &vid);
icmp6h = icmp6_hdr(skb);
num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
len = sizeof(*icmp6h);
@@ -1138,7 +1139,8 @@ static void br_multicast_query_received(struct net_bridge *br,
static int br_ip4_multicast_query(struct net_bridge *br,
struct net_bridge_port *port,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u16 vid)
{
const struct iphdr *iph = ip_hdr(skb);
struct igmphdr *ih = igmp_hdr(skb);
@@ -1150,7 +1152,6 @@ static int br_ip4_multicast_query(struct net_bridge *br,
unsigned long now = jiffies;
__be32 group;
int err = 0;
- u16 vid = 0;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
@@ -1186,14 +1187,10 @@ static int br_ip4_multicast_query(struct net_bridge *br,
if (!group)
goto out;
- br_vlan_get_tag(skb, &vid);
mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
if (!mp)
goto out;
- mod_timer(&mp->timer, now + br->multicast_membership_interval);
- mp->timer_armed = true;
-
max_delay *= br->multicast_last_member_count;
if (mp->mglist &&
@@ -1219,7 +1216,8 @@ out:
#if IS_ENABLED(CONFIG_IPV6)
static int br_ip6_multicast_query(struct net_bridge *br,
struct net_bridge_port *port,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u16 vid)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct mld_msg *mld;
@@ -1231,7 +1229,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
unsigned long now = jiffies;
const struct in6_addr *group = NULL;
int err = 0;
- u16 vid = 0;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
@@ -1265,14 +1262,10 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (!group)
goto out;
- br_vlan_get_tag(skb, &vid);
mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
if (!mp)
goto out;
- mod_timer(&mp->timer, now + br->multicast_membership_interval);
- mp->timer_armed = true;
-
max_delay *= br->multicast_last_member_count;
if (mp->mglist &&
(timer_pending(&mp->timer) ?
@@ -1358,7 +1351,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
call_rcu_bh(&p->rcu, br_multicast_free_pg);
br_mdb_notify(br->dev, port, group, RTM_DELMDB);
- if (!mp->ports && !mp->mglist && mp->timer_armed &&
+ if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
}
@@ -1370,12 +1363,30 @@ static void br_multicast_leave_group(struct net_bridge *br,
br->multicast_last_member_interval;
if (!port) {
- if (mp->mglist && mp->timer_armed &&
+ if (mp->mglist &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
try_to_del_timer_sync(&mp->timer) >= 0)) {
mod_timer(&mp->timer, time);
}
+
+ goto out;
+ }
+
+ for (p = mlock_dereference(mp->ports, br);
+ p != NULL;
+ p = mlock_dereference(p->next, br)) {
+ if (p->port != port)
+ continue;
+
+ if (!hlist_unhashed(&p->mglist) &&
+ (timer_pending(&p->timer) ?
+ time_after(p->timer.expires, time) :
+ try_to_del_timer_sync(&p->timer) >= 0)) {
+ mod_timer(&p->timer, time);
+ }
+
+ break;
}
out:
spin_unlock(&br->multicast_lock);
@@ -1424,7 +1435,8 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
static int br_multicast_ipv4_rcv(struct net_bridge *br,
struct net_bridge_port *port,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u16 vid)
{
struct sk_buff *skb2 = skb;
const struct iphdr *iph;
@@ -1432,7 +1444,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
unsigned int len;
unsigned int offset;
int err;
- u16 vid = 0;
/* We treat OOM as packet loss for now. */
if (!pskb_may_pull(skb, sizeof(*iph)))
@@ -1493,7 +1504,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
err = 0;
- br_vlan_get_tag(skb2, &vid);
BR_INPUT_SKB_CB(skb)->igmp = 1;
ih = igmp_hdr(skb2);
@@ -1504,10 +1514,10 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
err = br_ip4_multicast_add_group(br, port, ih->group, vid);
break;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
- err = br_ip4_multicast_igmp3_report(br, port, skb2);
+ err = br_ip4_multicast_igmp3_report(br, port, skb2, vid);
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
- err = br_ip4_multicast_query(br, port, skb2);
+ err = br_ip4_multicast_query(br, port, skb2, vid);
break;
case IGMP_HOST_LEAVE_MESSAGE:
br_ip4_multicast_leave_group(br, port, ih->group, vid);
@@ -1525,7 +1535,8 @@ err_out:
#if IS_ENABLED(CONFIG_IPV6)
static int br_multicast_ipv6_rcv(struct net_bridge *br,
struct net_bridge_port *port,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u16 vid)
{
struct sk_buff *skb2;
const struct ipv6hdr *ip6h;
@@ -1535,7 +1546,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
unsigned int len;
int offset;
int err;
- u16 vid = 0;
if (!pskb_may_pull(skb, sizeof(*ip6h)))
return -EINVAL;
@@ -1625,7 +1635,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
err = 0;
- br_vlan_get_tag(skb, &vid);
BR_INPUT_SKB_CB(skb)->igmp = 1;
switch (icmp6_type) {
@@ -1642,10 +1651,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
break;
}
case ICMPV6_MLD2_REPORT:
- err = br_ip6_multicast_mld2_report(br, port, skb2);
+ err = br_ip6_multicast_mld2_report(br, port, skb2, vid);
break;
case ICMPV6_MGM_QUERY:
- err = br_ip6_multicast_query(br, port, skb2);
+ err = br_ip6_multicast_query(br, port, skb2, vid);
break;
case ICMPV6_MGM_REDUCTION:
{
@@ -1666,7 +1675,7 @@ out:
#endif
int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
- struct sk_buff *skb)
+ struct sk_buff *skb, u16 vid)
{
BR_INPUT_SKB_CB(skb)->igmp = 0;
BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
@@ -1676,10 +1685,10 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
switch (skb->protocol) {
case htons(ETH_P_IP):
- return br_multicast_ipv4_rcv(br, port, skb);
+ return br_multicast_ipv4_rcv(br, port, skb, vid);
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- return br_multicast_ipv6_rcv(br, port, skb);
+ return br_multicast_ipv6_rcv(br, port, skb, vid);
#endif
}
@@ -1798,7 +1807,6 @@ void br_multicast_stop(struct net_bridge *br)
hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
hlist[ver]) {
del_timer(&mp->timer);
- mp->timer_armed = false;
call_rcu_bh(&mp->rcu, br_multicast_free_group);
}
}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 878f008afefa..80cad2cf02a7 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -559,6 +559,8 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
else if (skb->protocol == htons(ETH_P_PPP_SES))
nf_bridge->mask |= BRNF_PPPoE;
+ /* Must drop socket now because of tproxy. */
+ skb_orphan(skb);
return skb->dev;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e74ddc1c29a8..f75d92e4f96b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -243,7 +243,7 @@ static int br_afspec(struct net_bridge *br,
vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
- if (vinfo->vid >= VLAN_N_VID)
+ if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
return -EINVAL;
switch (cmd) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index efb57d911569..229d820bdf0b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -126,7 +126,6 @@ struct net_bridge_mdb_entry
struct timer_list timer;
struct br_ip addr;
bool mglist;
- bool timer_armed;
};
struct net_bridge_mdb_htable
@@ -344,10 +343,9 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
}
/* br_device.c */
-extern void br_dev_setup(struct net_device *dev);
-extern void br_dev_delete(struct net_device *dev, struct list_head *list);
-extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
- struct net_device *dev);
+void br_dev_setup(struct net_device *dev);
+void br_dev_delete(struct net_device *dev, struct list_head *list);
+netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
#ifdef CONFIG_NET_POLL_CONTROLLER
static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
struct sk_buff *skb)
@@ -358,8 +356,8 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
netpoll_send_skb(np, skb);
}
-extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
-extern void br_netpoll_disable(struct net_bridge_port *p);
+int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
+void br_netpoll_disable(struct net_bridge_port *p);
#else
static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
struct sk_buff *skb)
@@ -377,116 +375,99 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
#endif
/* br_fdb.c */
-extern int br_fdb_init(void);
-extern void br_fdb_fini(void);
-extern void br_fdb_flush(struct net_bridge *br);
-extern void br_fdb_changeaddr(struct net_bridge_port *p,
- const unsigned char *newaddr);
-extern void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
-extern void br_fdb_cleanup(unsigned long arg);
-extern void br_fdb_delete_by_port(struct net_bridge *br,
- const struct net_bridge_port *p, int do_all);
-extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
- const unsigned char *addr,
- __u16 vid);
-extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
-extern int br_fdb_fillbuf(struct net_bridge *br, void *buf,
- unsigned long count, unsigned long off);
-extern int br_fdb_insert(struct net_bridge *br,
- struct net_bridge_port *source,
- const unsigned char *addr,
- u16 vid);
-extern void br_fdb_update(struct net_bridge *br,
- struct net_bridge_port *source,
- const unsigned char *addr,
- u16 vid);
-extern int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid);
-
-extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr);
-extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr,
- u16 nlh_flags);
-extern int br_fdb_dump(struct sk_buff *skb,
- struct netlink_callback *cb,
- struct net_device *dev,
- int idx);
+int br_fdb_init(void);
+void br_fdb_fini(void);
+void br_fdb_flush(struct net_bridge *br);
+void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
+void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
+void br_fdb_cleanup(unsigned long arg);
+void br_fdb_delete_by_port(struct net_bridge *br,
+ const struct net_bridge_port *p, int do_all);
+struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
+ const unsigned char *addr, __u16 vid);
+int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
+int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count,
+ unsigned long off);
+int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
+ const unsigned char *addr, u16 vid);
+void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
+ const unsigned char *addr, u16 vid);
+int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid);
+
+int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, const unsigned char *addr);
+int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
+ const unsigned char *addr, u16 nlh_flags);
+int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev, int idx);
/* br_forward.c */
-extern void br_deliver(const struct net_bridge_port *to,
- struct sk_buff *skb);
-extern int br_dev_queue_push_xmit(struct sk_buff *skb);
-extern void br_forward(const struct net_bridge_port *to,
+void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
+int br_dev_queue_push_xmit(struct sk_buff *skb);
+void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, struct sk_buff *skb0);
-extern int br_forward_finish(struct sk_buff *skb);
-extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb,
- bool unicast);
-extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
- struct sk_buff *skb2, bool unicast);
+int br_forward_finish(struct sk_buff *skb);
+void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast);
+void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
+ struct sk_buff *skb2, bool unicast);
/* br_if.c */
-extern void br_port_carrier_check(struct net_bridge_port *p);
-extern int br_add_bridge(struct net *net, const char *name);
-extern int br_del_bridge(struct net *net, const char *name);
-extern void br_net_exit(struct net *net);
-extern int br_add_if(struct net_bridge *br,
- struct net_device *dev);
-extern int br_del_if(struct net_bridge *br,
- struct net_device *dev);
-extern int br_min_mtu(const struct net_bridge *br);
-extern netdev_features_t br_features_recompute(struct net_bridge *br,
- netdev_features_t features);
+void br_port_carrier_check(struct net_bridge_port *p);
+int br_add_bridge(struct net *net, const char *name);
+int br_del_bridge(struct net *net, const char *name);
+void br_net_exit(struct net *net);
+int br_add_if(struct net_bridge *br, struct net_device *dev);
+int br_del_if(struct net_bridge *br, struct net_device *dev);
+int br_min_mtu(const struct net_bridge *br);
+netdev_features_t br_features_recompute(struct net_bridge *br,
+ netdev_features_t features);
/* br_input.c */
-extern int br_handle_frame_finish(struct sk_buff *skb);
-extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
+int br_handle_frame_finish(struct sk_buff *skb);
+rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
/* br_ioctl.c */
-extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg);
+int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd,
+ void __user *arg);
/* br_multicast.c */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
extern unsigned int br_mdb_rehash_seq;
-extern int br_multicast_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
- struct sk_buff *skb);
-extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
- struct sk_buff *skb, u16 vid);
-extern void br_multicast_add_port(struct net_bridge_port *port);
-extern void br_multicast_del_port(struct net_bridge_port *port);
-extern void br_multicast_enable_port(struct net_bridge_port *port);
-extern void br_multicast_disable_port(struct net_bridge_port *port);
-extern void br_multicast_init(struct net_bridge *br);
-extern void br_multicast_open(struct net_bridge *br);
-extern void br_multicast_stop(struct net_bridge *br);
-extern void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb);
-extern void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb, struct sk_buff *skb2);
-extern int br_multicast_set_router(struct net_bridge *br, unsigned long val);
-extern int br_multicast_set_port_router(struct net_bridge_port *p,
- unsigned long val);
-extern int br_multicast_toggle(struct net_bridge *br, unsigned long val);
-extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
-extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
-extern struct net_bridge_mdb_entry *br_mdb_ip_get(
- struct net_bridge_mdb_htable *mdb,
- struct br_ip *dst);
-extern struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
- struct net_bridge_port *port, struct br_ip *group);
-extern void br_multicast_free_pg(struct rcu_head *head);
-extern struct net_bridge_port_group *br_multicast_new_port_group(
- struct net_bridge_port *port,
- struct br_ip *group,
- struct net_bridge_port_group __rcu *next,
- unsigned char state);
-extern void br_mdb_init(void);
-extern void br_mdb_uninit(void);
-extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type);
+int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+ struct sk_buff *skb, u16 vid);
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+ struct sk_buff *skb, u16 vid);
+void br_multicast_add_port(struct net_bridge_port *port);
+void br_multicast_del_port(struct net_bridge_port *port);
+void br_multicast_enable_port(struct net_bridge_port *port);
+void br_multicast_disable_port(struct net_bridge_port *port);
+void br_multicast_init(struct net_bridge *br);
+void br_multicast_open(struct net_bridge *br);
+void br_multicast_stop(struct net_bridge *br);
+void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
+ struct sk_buff *skb);
+void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
+ struct sk_buff *skb, struct sk_buff *skb2);
+int br_multicast_set_router(struct net_bridge *br, unsigned long val);
+int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
+int br_multicast_toggle(struct net_bridge *br, unsigned long val);
+int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
+int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
+struct net_bridge_mdb_entry *
+br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst);
+struct net_bridge_mdb_entry *
+br_multicast_new_group(struct net_bridge *br, struct net_bridge_port *port,
+ struct br_ip *group);
+void br_multicast_free_pg(struct rcu_head *head);
+struct net_bridge_port_group *
+br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
+ struct net_bridge_port_group __rcu *next,
+ unsigned char state);
+void br_mdb_init(void);
+void br_mdb_uninit(void);
+void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
+ struct br_ip *group, int type);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -523,7 +504,8 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
#else
static inline int br_multicast_rcv(struct net_bridge *br,
struct net_bridge_port *port,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u16 vid)
{
return 0;
}
@@ -591,22 +573,21 @@ static inline void br_mdb_uninit(void)
/* br_vlan.c */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
-extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
- struct sk_buff *skb, u16 *vid);
-extern bool br_allowed_egress(struct net_bridge *br,
- const struct net_port_vlans *v,
- const struct sk_buff *skb);
-extern struct sk_buff *br_handle_vlan(struct net_bridge *br,
- const struct net_port_vlans *v,
- struct sk_buff *skb);
-extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
-extern int br_vlan_delete(struct net_bridge *br, u16 vid);
-extern void br_vlan_flush(struct net_bridge *br);
-extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
-extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
-extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
-extern void nbp_vlan_flush(struct net_bridge_port *port);
-extern bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
+bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
+ struct sk_buff *skb, u16 *vid);
+bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v,
+ const struct sk_buff *skb);
+struct sk_buff *br_handle_vlan(struct net_bridge *br,
+ const struct net_port_vlans *v,
+ struct sk_buff *skb);
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
+int br_vlan_delete(struct net_bridge *br, u16 vid);
+void br_vlan_flush(struct net_bridge *br);
+int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
+int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
+int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
+void nbp_vlan_flush(struct net_bridge_port *port);
+bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
static inline struct net_port_vlans *br_get_vlan_info(
const struct net_bridge *br)
@@ -643,9 +624,7 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v)
* vid wasn't set
*/
smp_rmb();
- return (v->pvid & VLAN_TAG_PRESENT) ?
- (v->pvid & ~VLAN_TAG_PRESENT) :
- VLAN_N_VID;
+ return v->pvid ?: VLAN_N_VID;
}
#else
@@ -727,9 +706,9 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v)
/* br_netfilter.c */
#ifdef CONFIG_BRIDGE_NETFILTER
-extern int br_netfilter_init(void);
-extern void br_netfilter_fini(void);
-extern void br_netfilter_rtable_init(struct net_bridge *);
+int br_netfilter_init(void);
+void br_netfilter_fini(void);
+void br_netfilter_rtable_init(struct net_bridge *);
#else
#define br_netfilter_init() (0)
#define br_netfilter_fini() do { } while(0)
@@ -737,43 +716,39 @@ extern void br_netfilter_rtable_init(struct net_bridge *);
#endif
/* br_stp.c */
-extern void br_log_state(const struct net_bridge_port *p);
-extern struct net_bridge_port *br_get_port(struct net_bridge *br,
- u16 port_no);
-extern void br_init_port(struct net_bridge_port *p);
-extern void br_become_designated_port(struct net_bridge_port *p);
+void br_log_state(const struct net_bridge_port *p);
+struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no);
+void br_init_port(struct net_bridge_port *p);
+void br_become_designated_port(struct net_bridge_port *p);
-extern void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
-extern int br_set_forward_delay(struct net_bridge *br, unsigned long x);
-extern int br_set_hello_time(struct net_bridge *br, unsigned long x);
-extern int br_set_max_age(struct net_bridge *br, unsigned long x);
+void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
+int br_set_forward_delay(struct net_bridge *br, unsigned long x);
+int br_set_hello_time(struct net_bridge *br, unsigned long x);
+int br_set_max_age(struct net_bridge *br, unsigned long x);
/* br_stp_if.c */
-extern void br_stp_enable_bridge(struct net_bridge *br);
-extern void br_stp_disable_bridge(struct net_bridge *br);
-extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
-extern void br_stp_enable_port(struct net_bridge_port *p);
-extern void br_stp_disable_port(struct net_bridge_port *p);
-extern bool br_stp_recalculate_bridge_id(struct net_bridge *br);
-extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
-extern void br_stp_set_bridge_priority(struct net_bridge *br,
- u16 newprio);
-extern int br_stp_set_port_priority(struct net_bridge_port *p,
- unsigned long newprio);
-extern int br_stp_set_path_cost(struct net_bridge_port *p,
- unsigned long path_cost);
-extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id);
+void br_stp_enable_bridge(struct net_bridge *br);
+void br_stp_disable_bridge(struct net_bridge *br);
+void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
+void br_stp_enable_port(struct net_bridge_port *p);
+void br_stp_disable_port(struct net_bridge_port *p);
+bool br_stp_recalculate_bridge_id(struct net_bridge *br);
+void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
+void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio);
+int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio);
+int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost);
+ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id);
/* br_stp_bpdu.c */
struct stp_proto;
-extern void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
- struct net_device *dev);
+void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
+ struct net_device *dev);
/* br_stp_timer.c */
-extern void br_stp_timer_init(struct net_bridge *br);
-extern void br_stp_port_timer_init(struct net_bridge_port *p);
-extern unsigned long br_timer_value(const struct timer_list *timer);
+void br_stp_timer_init(struct net_bridge *br);
+void br_stp_port_timer_init(struct net_bridge_port *p);
+unsigned long br_timer_value(const struct timer_list *timer);
/* br.c */
#if IS_ENABLED(CONFIG_ATM_LANE)
@@ -782,23 +757,23 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr)
/* br_netlink.c */
extern struct rtnl_link_ops br_link_ops;
-extern int br_netlink_init(void);
-extern void br_netlink_fini(void);
-extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
-extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
-extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
-extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
- struct net_device *dev, u32 filter_mask);
+int br_netlink_init(void);
+void br_netlink_fini(void);
+void br_ifinfo_notify(int event, struct net_bridge_port *port);
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
+int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
+int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
+ u32 filter_mask);
#ifdef CONFIG_SYSFS
/* br_sysfs_if.c */
extern const struct sysfs_ops brport_sysfs_ops;
-extern int br_sysfs_addif(struct net_bridge_port *p);
-extern int br_sysfs_renameif(struct net_bridge_port *p);
+int br_sysfs_addif(struct net_bridge_port *p);
+int br_sysfs_renameif(struct net_bridge_port *p);
/* br_sysfs_br.c */
-extern int br_sysfs_addbr(struct net_device *dev);
-extern void br_sysfs_delbr(struct net_device *dev);
+int br_sysfs_addbr(struct net_device *dev);
+void br_sysfs_delbr(struct net_device *dev);
#else
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index 0c0fe36e7aa9..2fe910c4e170 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h
@@ -51,19 +51,19 @@ static inline int br_is_designated_port(const struct net_bridge_port *p)
/* br_stp.c */
-extern void br_become_root_bridge(struct net_bridge *br);
-extern void br_config_bpdu_generation(struct net_bridge *);
-extern void br_configuration_update(struct net_bridge *);
-extern void br_port_state_selection(struct net_bridge *);
-extern void br_received_config_bpdu(struct net_bridge_port *p,
- const struct br_config_bpdu *bpdu);
-extern void br_received_tcn_bpdu(struct net_bridge_port *p);
-extern void br_transmit_config(struct net_bridge_port *p);
-extern void br_transmit_tcn(struct net_bridge *br);
-extern void br_topology_change_detection(struct net_bridge *br);
+void br_become_root_bridge(struct net_bridge *br);
+void br_config_bpdu_generation(struct net_bridge *);
+void br_configuration_update(struct net_bridge *);
+void br_port_state_selection(struct net_bridge *);
+void br_received_config_bpdu(struct net_bridge_port *p,
+ const struct br_config_bpdu *bpdu);
+void br_received_tcn_bpdu(struct net_bridge_port *p);
+void br_transmit_config(struct net_bridge_port *p);
+void br_transmit_tcn(struct net_bridge *br);
+void br_topology_change_detection(struct net_bridge *br);
/* br_stp_bpdu.c */
-extern void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *);
-extern void br_send_tcn_bpdu(struct net_bridge_port *);
+void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *);
+void br_send_tcn_bpdu(struct net_bridge_port *);
#endif
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 108084a04671..656a6f3e40de 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -134,7 +134,7 @@ static void br_stp_start(struct net_bridge *br)
if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY)
__br_set_forward_delay(br, BR_MIN_FORWARD_DELAY);
- else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY)
+ else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY)
__br_set_forward_delay(br, BR_MAX_FORWARD_DELAY);
if (r == 0) {
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 9a9ffe7e4019..53f0990eab58 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -45,37 +45,34 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
return 0;
}
- if (vid) {
- if (v->port_idx) {
- p = v->parent.port;
- br = p->br;
- dev = p->dev;
- } else {
- br = v->parent.br;
- dev = br->dev;
- }
- ops = dev->netdev_ops;
-
- if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
- /* Add VLAN to the device filter if it is supported.
- * Stricly speaking, this is not necessary now, since
- * devices are made promiscuous by the bridge, but if
- * that ever changes this code will allow tagged
- * traffic to enter the bridge.
- */
- err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
- vid);
- if (err)
- return err;
- }
-
- err = br_fdb_insert(br, p, dev->dev_addr, vid);
- if (err) {
- br_err(br, "failed insert local address into bridge "
- "forwarding table\n");
- goto out_filt;
- }
+ if (v->port_idx) {
+ p = v->parent.port;
+ br = p->br;
+ dev = p->dev;
+ } else {
+ br = v->parent.br;
+ dev = br->dev;
+ }
+ ops = dev->netdev_ops;
+
+ if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
+ /* Add VLAN to the device filter if it is supported.
+ * Stricly speaking, this is not necessary now, since
+ * devices are made promiscuous by the bridge, but if
+ * that ever changes this code will allow tagged
+ * traffic to enter the bridge.
+ */
+ err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
+ vid);
+ if (err)
+ return err;
+ }
+ err = br_fdb_insert(br, p, dev->dev_addr, vid);
+ if (err) {
+ br_err(br, "failed insert local address into bridge "
+ "forwarding table\n");
+ goto out_filt;
}
set_bit(vid, v->vlan_bitmap);
@@ -98,7 +95,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
__vlan_delete_pvid(v, vid);
clear_bit(vid, v->untagged_bitmap);
- if (v->port_idx && vid) {
+ if (v->port_idx) {
struct net_device *dev = v->parent.port->dev;
const struct net_device_ops *ops = dev->netdev_ops;
@@ -192,6 +189,8 @@ out:
bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
struct sk_buff *skb, u16 *vid)
{
+ int err;
+
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
*/
@@ -204,20 +203,32 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
if (!v)
return false;
- if (br_vlan_get_tag(skb, vid)) {
+ err = br_vlan_get_tag(skb, vid);
+ if (!*vid) {
u16 pvid = br_get_pvid(v);
- /* Frame did not have a tag. See if pvid is set
- * on this port. That tells us which vlan untagged
- * traffic belongs to.
+ /* Frame had a tag with VID 0 or did not have a tag.
+ * See if pvid is set on this port. That tells us which
+ * vlan untagged or priority-tagged traffic belongs to.
*/
if (pvid == VLAN_N_VID)
return false;
- /* PVID is set on this port. Any untagged ingress
- * frame is considered to belong to this vlan.
+ /* PVID is set on this port. Any untagged or priority-tagged
+ * ingress frame is considered to belong to this vlan.
*/
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+ *vid = pvid;
+ if (likely(err))
+ /* Untagged Frame. */
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+ else
+ /* Priority-tagged Frame.
+ * At this point, We know that skb->vlan_tci had
+ * VLAN_TAG_PRESENT bit and its VID field was 0x000.
+ * We update only VID field and preserve PCP field.
+ */
+ skb->vlan_tci |= pvid;
+
return true;
}
@@ -248,7 +259,9 @@ bool br_allowed_egress(struct net_bridge *br,
return false;
}
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
{
struct net_port_vlans *pv = NULL;
@@ -278,7 +291,9 @@ out:
return err;
}
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
int br_vlan_delete(struct net_bridge *br, u16 vid)
{
struct net_port_vlans *pv;
@@ -289,14 +304,9 @@ int br_vlan_delete(struct net_bridge *br, u16 vid)
if (!pv)
return -EINVAL;
- if (vid) {
- /* If the VID !=0 remove fdb for this vid. VID 0 is special
- * in that it's the default and is always there in the fdb.
- */
- spin_lock_bh(&br->hash_lock);
- fdb_delete_by_addr(br, br->dev->dev_addr, vid);
- spin_unlock_bh(&br->hash_lock);
- }
+ spin_lock_bh(&br->hash_lock);
+ fdb_delete_by_addr(br, br->dev->dev_addr, vid);
+ spin_unlock_bh(&br->hash_lock);
__vlan_del(pv, vid);
return 0;
@@ -329,7 +339,9 @@ unlock:
return 0;
}
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
{
struct net_port_vlans *pv = NULL;
@@ -363,7 +375,9 @@ clean_up:
return err;
}
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
{
struct net_port_vlans *pv;
@@ -374,14 +388,9 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
if (!pv)
return -EINVAL;
- if (vid) {
- /* If the VID !=0 remove fdb for this vid. VID 0 is special
- * in that it's the default and is always there in the fdb.
- */
- spin_lock_bh(&port->br->hash_lock);
- fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
- spin_unlock_bh(&port->br->hash_lock);
- }
+ spin_lock_bh(&port->br->hash_lock);
+ fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
+ spin_unlock_bh(&port->br->hash_lock);
return __vlan_del(pv, vid);
}
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 518093802d1d..7c470c371e14 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -181,6 +181,7 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
ub->qlen++;
pm = nlmsg_data(nlh);
+ memset(pm, 0, sizeof(*pm));
/* Fill in the ulog data */
pm->version = EBT_ULOG_VERSION;
@@ -193,8 +194,6 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
pm->hook = hooknr;
if (uloginfo->prefix != NULL)
strcpy(pm->prefix, uloginfo->prefix);
- else
- *(pm->prefix) = '\0';
if (in) {
strcpy(pm->physindev, in->name);
@@ -204,16 +203,14 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name);
else
strcpy(pm->indev, in->name);
- } else
- pm->indev[0] = pm->physindev[0] = '\0';
+ }
if (out) {
/* If out exists, then out is a bridge port */
strcpy(pm->physoutdev, out->name);
/* rcu_read_lock()ed by nf_hook_slow */
strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name);
- } else
- pm->outdev[0] = pm->physoutdev[0] = '\0';
+ }
if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0)
BUG();
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 1dccb4c33894..6de58b40535c 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -108,9 +108,9 @@ struct s_pstats {
extern struct dev_rcv_lists can_rx_alldev_list;
/* function prototypes for the CAN networklayer procfs (proc.c) */
-extern void can_init_proc(void);
-extern void can_remove_proc(void);
-extern void can_stat_update(unsigned long data);
+void can_init_proc(void);
+void can_remove_proc(void);
+void can_stat_update(unsigned long data);
/* structures and variables from af_can.c needed in proc.c for reading */
extern struct timer_list can_stattimer; /* timer for statistics update */
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index ed7d088b1bc9..059a3ce4b53f 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -23,7 +23,7 @@ struct ceph_auth_none_info {
struct ceph_none_authorizer au; /* we only need one; it's static */
};
-extern int ceph_auth_none_init(struct ceph_auth_client *ac);
+int ceph_auth_none_init(struct ceph_auth_client *ac);
#endif
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index c5a058da7ac8..65ee72082d99 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -45,7 +45,7 @@ struct ceph_x_info {
struct ceph_x_authorizer auth_authorizer;
};
-extern int ceph_x_init(struct ceph_auth_client *ac);
+int ceph_x_init(struct ceph_auth_client *ac);
#endif
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 3572dc518bc9..d1498224c49d 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -20,34 +20,32 @@ static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
kfree(key->key);
}
-extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
- const struct ceph_crypto_key *src);
-extern int ceph_crypto_key_encode(struct ceph_crypto_key *key,
- void **p, void *end);
-extern int ceph_crypto_key_decode(struct ceph_crypto_key *key,
- void **p, void *end);
-extern int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
+int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
+ const struct ceph_crypto_key *src);
+int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end);
+int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end);
+int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
/* crypto.c */
-extern int ceph_decrypt(struct ceph_crypto_key *secret,
- void *dst, size_t *dst_len,
- const void *src, size_t src_len);
-extern int ceph_encrypt(struct ceph_crypto_key *secret,
- void *dst, size_t *dst_len,
- const void *src, size_t src_len);
-extern int ceph_decrypt2(struct ceph_crypto_key *secret,
- void *dst1, size_t *dst1_len,
- void *dst2, size_t *dst2_len,
- const void *src, size_t src_len);
-extern int ceph_encrypt2(struct ceph_crypto_key *secret,
- void *dst, size_t *dst_len,
- const void *src1, size_t src1_len,
- const void *src2, size_t src2_len);
-extern int ceph_crypto_init(void);
-extern void ceph_crypto_shutdown(void);
+int ceph_decrypt(struct ceph_crypto_key *secret,
+ void *dst, size_t *dst_len,
+ const void *src, size_t src_len);
+int ceph_encrypt(struct ceph_crypto_key *secret,
+ void *dst, size_t *dst_len,
+ const void *src, size_t src_len);
+int ceph_decrypt2(struct ceph_crypto_key *secret,
+ void *dst1, size_t *dst1_len,
+ void *dst2, size_t *dst2_len,
+ const void *src, size_t src_len);
+int ceph_encrypt2(struct ceph_crypto_key *secret,
+ void *dst, size_t *dst_len,
+ const void *src1, size_t src1_len,
+ const void *src2, size_t src2_len);
+int ceph_crypto_init(void);
+void ceph_crypto_shutdown(void);
/* armor.c */
-extern int ceph_armor(char *dst, const char *src, const char *end);
-extern int ceph_unarmor(char *dst, const char *src, const char *end);
+int ceph_armor(char *dst, const char *src, const char *end);
+int ceph_unarmor(char *dst, const char *src, const char *end);
#endif
diff --git a/net/core/datagram.c b/net/core/datagram.c
index af814e764206..a16ed7bbe376 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -577,7 +577,7 @@ EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
/**
* zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
* @skb: buffer to copy
- * @from: io vector to copy to
+ * @from: io vector to copy from
* @offset: offset in the io vector to start copying from
* @count: amount of vectors to copy to buffer from
*
diff --git a/net/core/dev.c b/net/core/dev.c
index 1b6eadf69289..0e6136546a8c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1203,7 +1203,7 @@ void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
call_netdevice_notifiers(NETDEV_CHANGE, dev);
- rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
}
EXPORT_SYMBOL(netdev_state_change);
@@ -1293,7 +1293,7 @@ int dev_open(struct net_device *dev)
if (ret < 0)
return ret;
- rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
call_netdevice_notifiers(NETDEV_UP, dev);
return ret;
@@ -1371,7 +1371,7 @@ static int dev_close_many(struct list_head *head)
__dev_close_many(head);
list_for_each_entry_safe(dev, tmp, head, close_list) {
- rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
call_netdevice_notifiers(NETDEV_DOWN, dev);
list_del_init(&dev->close_list);
}
@@ -2377,6 +2377,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
}
SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+ SKB_GSO_CB(skb)->encap_level = 0;
+
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
@@ -4646,6 +4648,7 @@ remove_symlinks:
free_adj:
kfree(adj);
+ dev_put(adj_dev);
return ret;
}
@@ -5255,7 +5258,7 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
unsigned int changes = dev->flags ^ old_flags;
if (gchanges)
- rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
if (changes & IFF_UP) {
if (dev->flags & IFF_UP)
@@ -5487,7 +5490,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
/*
* Flush the unicast and multicast chains
@@ -5886,7 +5889,7 @@ int register_netdevice(struct net_device *dev)
*/
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
out:
return ret;
@@ -6193,6 +6196,16 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
+void netdev_freemem(struct net_device *dev)
+{
+ char *addr = (char *)dev - dev->padded;
+
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+}
+
/**
* alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -6236,7 +6249,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
- p = kzalloc(alloc_size, GFP_KERNEL);
+ p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ if (!p)
+ p = vzalloc(alloc_size);
if (!p)
return NULL;
@@ -6245,7 +6260,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
- goto free_p;
+ goto free_dev;
if (dev_addr_init(dev))
goto free_pcpu;
@@ -6298,8 +6313,8 @@ free_pcpu:
kfree(dev->_rx);
#endif
-free_p:
- kfree(p);
+free_dev:
+ netdev_freemem(dev);
return NULL;
}
EXPORT_SYMBOL(alloc_netdev_mqs);
@@ -6336,7 +6351,7 @@ void free_netdev(struct net_device *dev)
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
- kfree((char *)dev - dev->padded);
+ netdev_freemem(dev);
return;
}
@@ -6498,7 +6513,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
/*
* Flush the unicast and multicast chains
@@ -6537,7 +6552,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
* Prevent userspace races by waiting until the network
* device is fully setup before sending notifications.
*/
- rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
synchronize_net();
err = 0;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 6cda4e2c2132..ec40a849fc42 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -752,7 +752,7 @@ int dev_mc_del_global(struct net_device *dev, const unsigned char *addr)
EXPORT_SYMBOL(dev_mc_del_global);
/**
- * dev_mc_sync - Synchronize device's unicast list to another device
+ * dev_mc_sync - Synchronize device's multicast list to another device
* @to: destination device
* @from: source device
*
@@ -780,7 +780,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
EXPORT_SYMBOL(dev_mc_sync);
/**
- * dev_mc_sync_multiple - Synchronize device's unicast list to another
+ * dev_mc_sync_multiple - Synchronize device's multicast list to another
* device, but allow for multiple calls to sync to multiple devices.
* @to: destination device
* @from: source device
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 78e9d9223e40..862989898f61 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -81,6 +81,8 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
[NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
[NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
+ [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation",
+ [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation",
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index f8e25ac41c6c..0242035192f1 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -66,7 +66,7 @@ again:
struct iphdr _iph;
ip:
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
- if (!iph)
+ if (!iph || iph->ihl < 5)
return false;
if (ip_is_fragment(iph))
@@ -184,6 +184,22 @@ ipv6:
EXPORT_SYMBOL(skb_flow_dissect);
static u32 hashrnd __read_mostly;
+static __always_inline void __flow_hash_secret_init(void)
+{
+ net_get_random_once(&hashrnd, sizeof(hashrnd));
+}
+
+static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
+{
+ __flow_hash_secret_init();
+ return jhash_3words(a, b, c, hashrnd);
+}
+
+static __always_inline u32 __flow_hash_1word(u32 a)
+{
+ __flow_hash_secret_init();
+ return jhash_1word(a, hashrnd);
+}
/*
* __skb_get_rxhash: calculate a flow hash based on src/dst addresses
@@ -210,9 +226,9 @@ void __skb_get_rxhash(struct sk_buff *skb)
swap(keys.port16[0], keys.port16[1]);
}
- hash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports, hashrnd);
+ hash = __flow_hash_3words((__force u32)keys.dst,
+ (__force u32)keys.src,
+ (__force u32)keys.ports);
if (!hash)
hash = 1;
@@ -248,7 +264,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
hash = skb->sk->sk_hash;
else
hash = (__force u16) skb->protocol;
- hash = jhash_1word(hash, hashrnd);
+ hash = __flow_hash_1word(hash);
return (u16) (((u64) hash * qcount) >> 32) + qoffset;
}
@@ -340,7 +356,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
else
hash = (__force u16) skb->protocol ^
skb->rxhash;
- hash = jhash_1word(hash, hashrnd);
+ hash = __flow_hash_1word(hash);
queue_index = map->queues[
((u64)hash * map->len) >> 32];
}
@@ -395,11 +411,3 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
skb_set_queue_mapping(skb, queue_index);
return netdev_get_tx_queue(dev, queue_index);
}
-
-static int __init initialize_hashrnd(void)
-{
- get_random_bytes(&hashrnd, sizeof(hashrnd));
- return 0;
-}
-
-late_initcall_sync(initialize_hashrnd);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index b77eeecc0011..4cdb7c48dad6 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -100,7 +100,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
EXPORT_SYMBOL(memcpy_toiovecend);
/*
- * Copy iovec from kernel. Returns -EFAULT on error.
+ * Copy iovec to kernel. Returns -EFAULT on error.
*/
int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d954b56b4e47..d03f2c9750fa 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1263,7 +1263,7 @@ static void netdev_release(struct device *d)
BUG_ON(dev->reg_state != NETREG_RELEASED);
kfree(dev->ifalias);
- kfree((char *)dev - dev->padded);
+ netdev_freemem(dev);
}
static const void *net_namespace(struct device *d)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index fc75c9e461b8..8f971990677c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -636,8 +636,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
netpoll_send_skb(np, send_skb);
- /* If there are several rx_hooks for the same address,
- we're fine by sending a single reply */
+ /* If there are several rx_skb_hooks for the same
+ * address we're fine by sending a single reply
+ */
break;
}
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -719,8 +720,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
netpoll_send_skb(np, send_skb);
- /* If there are several rx_hooks for the same address,
- we're fine by sending a single reply */
+ /* If there are several rx_skb_hooks for the same
+ * address, we're fine by sending a single reply
+ */
break;
}
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
@@ -756,11 +758,12 @@ static bool pkt_is_ns(struct sk_buff *skb)
int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
{
- int proto, len, ulen;
- int hits = 0;
+ int proto, len, ulen, data_len;
+ int hits = 0, offset;
const struct iphdr *iph;
struct udphdr *uh;
struct netpoll *np, *tmp;
+ uint16_t source;
if (list_empty(&npinfo->rx_np))
goto out;
@@ -820,7 +823,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
len -= iph->ihl*4;
uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+ offset = (unsigned char *)(uh + 1) - skb->data;
ulen = ntohs(uh->len);
+ data_len = skb->len - offset;
+ source = ntohs(uh->source);
if (ulen != len)
goto out;
@@ -834,9 +840,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
if (np->local_port && np->local_port != ntohs(uh->dest))
continue;
- np->rx_hook(np, ntohs(uh->source),
- (char *)(uh+1),
- ulen - sizeof(struct udphdr));
+ np->rx_skb_hook(np, source, skb, offset, data_len);
hits++;
}
} else {
@@ -859,7 +863,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
uh = udp_hdr(skb);
+ offset = (unsigned char *)(uh + 1) - skb->data;
ulen = ntohs(uh->len);
+ data_len = skb->len - offset;
+ source = ntohs(uh->source);
if (ulen != skb->len)
goto out;
if (udp6_csum_init(skb, uh, IPPROTO_UDP))
@@ -872,9 +879,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
if (np->local_port && np->local_port != ntohs(uh->dest))
continue;
- np->rx_hook(np, ntohs(uh->source),
- (char *)(uh+1),
- ulen - sizeof(struct udphdr));
+ np->rx_skb_hook(np, source, skb, offset, data_len);
hits++;
}
#endif
@@ -1062,7 +1067,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
npinfo->netpoll = np;
- if (np->rx_hook) {
+ if (np->rx_skb_hook) {
spin_lock_irqsave(&npinfo->rx_lock, flags);
npinfo->rx_flags |= NETPOLL_RX_ENABLED;
list_add_tail(&np->rx, &npinfo->rx_np);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4aedf03da052..cf67144d3e3c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1984,14 +1984,15 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
size_t if_info_size;
- skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+ skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
if (skb == NULL)
goto errout;
@@ -2002,7 +2003,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
return;
errout:
if (err < 0)
@@ -2716,7 +2717,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_JOIN:
break;
default:
- rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
break;
}
return NOTIFY_DONE;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 3f1ec1586ae1..897da56f3aff 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -7,28 +7,20 @@
#include <linux/hrtimer.h>
#include <linux/ktime.h>
#include <linux/string.h>
+#include <linux/net.h>
#include <net/secure_seq.h>
+#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
-static void net_secret_init(void)
+static __always_inline void net_secret_init(void)
{
- u32 tmp;
- int i;
-
- if (likely(net_secret[0]))
- return;
-
- for (i = NET_SECRET_SIZE; i > 0;) {
- do {
- get_random_bytes(&tmp, sizeof(tmp));
- } while (!tmp);
- cmpxchg(&net_secret[--i], 0, tmp);
- }
+ net_get_random_once(net_secret, sizeof(net_secret));
}
+#endif
#ifdef CONFIG_INET
static u32 seq_scale(u32 seq)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8ead744fcc94..e4115597b38b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -903,6 +903,9 @@ EXPORT_SYMBOL(skb_clone);
static void skb_headers_offset_update(struct sk_buff *skb, int off)
{
+ /* Only adjust this if it actually is csum_start rather than csum */
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ skb->csum_start += off;
/* {transport,network,mac}_header and tail are relative to skb->head */
skb->transport_header += off;
skb->network_header += off;
@@ -1109,9 +1112,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
#endif
skb->tail += off;
skb_headers_offset_update(skb, nhead);
- /* Only adjust this if it actually is csum_start rather than csum */
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- skb->csum_start += nhead;
skb->cloned = 0;
skb->hdr_len = 0;
skb->nohdr = 0;
@@ -1176,7 +1176,6 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
NUMA_NO_NODE);
int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
- int off;
if (!n)
return NULL;
@@ -1200,11 +1199,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
copy_skb_header(n, skb);
- off = newheadroom - oldheadroom;
- if (n->ip_summed == CHECKSUM_PARTIAL)
- n->csum_start += off;
-
- skb_headers_offset_update(n, off);
+ skb_headers_offset_update(n, newheadroom - oldheadroom);
return n;
}
@@ -1933,9 +1928,8 @@ fault:
EXPORT_SYMBOL(skb_store_bits);
/* Checksum skb data. */
-
-__wsum skb_checksum(const struct sk_buff *skb, int offset,
- int len, __wsum csum)
+__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
+ __wsum csum, const struct skb_checksum_ops *ops)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
@@ -1946,7 +1940,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
if (copy > 0) {
if (copy > len)
copy = len;
- csum = csum_partial(skb->data + offset, copy, csum);
+ csum = ops->update(skb->data + offset, copy, csum);
if ((len -= copy) == 0)
return csum;
offset += copy;
@@ -1967,10 +1961,10 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
if (copy > len)
copy = len;
vaddr = kmap_atomic(skb_frag_page(frag));
- csum2 = csum_partial(vaddr + frag->page_offset +
- offset - start, copy, 0);
+ csum2 = ops->update(vaddr + frag->page_offset +
+ offset - start, copy, 0);
kunmap_atomic(vaddr);
- csum = csum_block_add(csum, csum2, pos);
+ csum = ops->combine(csum, csum2, pos, copy);
if (!(len -= copy))
return csum;
offset += copy;
@@ -1989,9 +1983,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
__wsum csum2;
if (copy > len)
copy = len;
- csum2 = skb_checksum(frag_iter, offset - start,
- copy, 0);
- csum = csum_block_add(csum, csum2, pos);
+ csum2 = __skb_checksum(frag_iter, offset - start,
+ copy, 0, ops);
+ csum = ops->combine(csum, csum2, pos, copy);
if ((len -= copy) == 0)
return csum;
offset += copy;
@@ -2003,6 +1997,18 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
return csum;
}
+EXPORT_SYMBOL(__skb_checksum);
+
+__wsum skb_checksum(const struct sk_buff *skb, int offset,
+ int len, __wsum csum)
+{
+ const struct skb_checksum_ops ops = {
+ .update = csum_partial_ext,
+ .combine = csum_block_add_ext,
+ };
+
+ return __skb_checksum(skb, offset, len, csum, &ops);
+}
EXPORT_SYMBOL(skb_checksum);
/* Both of above in one bottle. */
@@ -2837,14 +2843,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
__copy_skb_header(nskb, skb);
nskb->mac_len = skb->mac_len;
- /* nskb and skb might have different headroom */
- if (nskb->ip_summed == CHECKSUM_PARTIAL)
- nskb->csum_start += skb_headroom(nskb) - headroom;
-
- skb_reset_mac_header(nskb);
- skb_set_network_header(nskb, skb->mac_len);
- nskb->transport_header = (nskb->network_header +
- skb_network_header_len(skb));
+ skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
nskb->data - tnl_hlen,
diff --git a/net/core/sock.c b/net/core/sock.c
index fd6afa267475..ab20ed9b0f31 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -475,12 +475,6 @@ discard_and_relse:
}
EXPORT_SYMBOL(sk_receive_skb);
-void sk_reset_txq(struct sock *sk)
-{
- sk_tx_queue_clear(sk);
-}
-EXPORT_SYMBOL(sk_reset_txq);
-
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = __sk_dst_get(sk);
@@ -1847,7 +1841,17 @@ EXPORT_SYMBOL(sock_alloc_send_skb);
/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
-bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+/**
+ * skb_page_frag_refill - check that a page_frag contains enough room
+ * @sz: minimum size of the fragment we want to get
+ * @pfrag: pointer to page_frag
+ * @prio: priority for memory allocation
+ *
+ * Note: While this allocator tries to use high order pages, there is
+ * no guarantee that allocations succeed. Therefore, @sz MUST be
+ * less or equal than PAGE_SIZE.
+ */
+bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
{
int order;
@@ -1856,16 +1860,16 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
pfrag->offset = 0;
return true;
}
- if (pfrag->offset < pfrag->size)
+ if (pfrag->offset + sz <= pfrag->size)
return true;
put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
- order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
+ order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
do {
- gfp_t gfp = sk->sk_allocation;
+ gfp_t gfp = prio;
if (order)
gfp |= __GFP_COMP | __GFP_NOWARN;
@@ -1877,6 +1881,15 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
}
} while (--order >= 0);
+ return false;
+}
+EXPORT_SYMBOL(skb_page_frag_refill);
+
+bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+{
+ if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
+ return true;
+
sk_enter_memory_pressure(sk);
sk_stream_moderate_sndbuf(sk);
return false;
diff --git a/net/core/utils.c b/net/core/utils.c
index aa88e23fc87a..2f737bf90b3f 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -338,3 +338,52 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
csum_unfold(*sum)));
}
EXPORT_SYMBOL(inet_proto_csum_replace16);
+
+struct __net_random_once_work {
+ struct work_struct work;
+ struct static_key *key;
+};
+
+static void __net_random_once_deferred(struct work_struct *w)
+{
+ struct __net_random_once_work *work =
+ container_of(w, struct __net_random_once_work, work);
+ if (!static_key_enabled(work->key))
+ static_key_slow_inc(work->key);
+ kfree(work);
+}
+
+static void __net_random_once_disable_jump(struct static_key *key)
+{
+ struct __net_random_once_work *w;
+
+ w = kmalloc(sizeof(*w), GFP_ATOMIC);
+ if (!w)
+ return;
+
+ INIT_WORK(&w->work, __net_random_once_deferred);
+ w->key = key;
+ schedule_work(&w->work);
+}
+
+bool __net_get_random_once(void *buf, int nbytes, bool *done,
+ struct static_key *done_key)
+{
+ static DEFINE_SPINLOCK(lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&lock, flags);
+ if (*done) {
+ spin_unlock_irqrestore(&lock, flags);
+ return false;
+ }
+
+ get_random_bytes(buf, nbytes);
+ *done = true;
+ spin_unlock_irqrestore(&lock, flags);
+
+ __net_random_once_disable_jump(done_key);
+
+ return true;
+}
+EXPORT_SYMBOL(__net_get_random_once);
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index a269aa7f7923..3284bfa988c0 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -101,16 +101,16 @@ struct dccp_ackvec_record {
u8 avr_ack_nonce:1;
};
-extern int dccp_ackvec_init(void);
-extern void dccp_ackvec_exit(void);
+int dccp_ackvec_init(void);
+void dccp_ackvec_exit(void);
-extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
-extern void dccp_ackvec_free(struct dccp_ackvec *av);
+struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
+void dccp_ackvec_free(struct dccp_ackvec *av);
-extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
-extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
-extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
-extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av);
+void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
+int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
+void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
+u16 dccp_ackvec_buflen(const struct dccp_ackvec *av);
static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
{
@@ -133,7 +133,6 @@ struct dccp_ackvec_parsed {
struct list_head node;
};
-extern int dccp_ackvec_parsed_add(struct list_head *head,
- u8 *vec, u8 len, u8 nonce);
-extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
+int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce);
+void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
#endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index fb85d371a8de..6eb837a47b5c 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -93,8 +93,8 @@ extern struct ccid_operations ccid2_ops;
extern struct ccid_operations ccid3_ops;
#endif
-extern int ccid_initialize_builtins(void);
-extern void ccid_cleanup_builtins(void);
+int ccid_initialize_builtins(void);
+void ccid_cleanup_builtins(void);
struct ccid {
struct ccid_operations *ccid_ops;
@@ -106,12 +106,12 @@ static inline void *ccid_priv(const struct ccid *ccid)
return (void *)ccid->ccid_priv;
}
-extern bool ccid_support_check(u8 const *ccid_array, u8 array_len);
-extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
-extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
- char __user *, int __user *);
+bool ccid_support_check(u8 const *ccid_array, u8 array_len);
+int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
+int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+ char __user *, int __user *);
-extern struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx);
+struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx);
static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
{
@@ -131,8 +131,8 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
return ccid->ccid_ops->ccid_id;
}
-extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
-extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
+void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
+void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
/*
* Congestion control of queued data packets via CCID decision.
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index d1d2f5383b7d..57f631a86ccd 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -65,9 +65,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
struct tfrc_rx_hist;
-extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
- u32 (*first_li)(struct sock *), struct sock *);
-extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
-extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
+int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
+ u32 (*first_li)(struct sock *), struct sock *);
+u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
+void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 7ee4a9d9d335..ee362b0b630d 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -60,8 +60,8 @@ static inline struct tfrc_tx_hist_entry *
return head;
}
-extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
-extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
+int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
+void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
/* Subtraction a-b modulo-16, respects circular wrap-around */
#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
@@ -139,20 +139,17 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
return h->loss_count > 0;
}
-extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
- const struct sk_buff *skb, const u64 ndp);
+void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb,
+ const u64 ndp);
-extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
+int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
struct tfrc_loss_hist;
-extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
- struct tfrc_loss_hist *lh,
- struct sk_buff *skb, const u64 ndp,
- u32 (*first_li)(struct sock *sk),
- struct sock *sk);
-extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
- const struct sk_buff *skb);
-extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
-extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
+int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh,
+ struct sk_buff *skb, const u64 ndp,
+ u32 (*first_li)(struct sock *sk), struct sock *sk);
+u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb);
+int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
+void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ed698c42a5fb..40ee7d62b652 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -55,21 +55,21 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval;
}
-extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
-extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
-extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
+u32 tfrc_calc_x(u16 s, u32 R, u32 p);
+u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
+u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
-extern int tfrc_tx_packet_history_init(void);
-extern void tfrc_tx_packet_history_exit(void);
-extern int tfrc_rx_packet_history_init(void);
-extern void tfrc_rx_packet_history_exit(void);
+int tfrc_tx_packet_history_init(void);
+void tfrc_tx_packet_history_exit(void);
+int tfrc_rx_packet_history_init(void);
+void tfrc_rx_packet_history_exit(void);
-extern int tfrc_li_init(void);
-extern void tfrc_li_exit(void);
+int tfrc_li_init(void);
+void tfrc_li_exit(void);
#ifdef CONFIG_IP_DCCP_TFRC_LIB
-extern int tfrc_lib_init(void);
-extern void tfrc_lib_exit(void);
+int tfrc_lib_init(void);
+void tfrc_lib_exit(void);
#else
#define tfrc_lib_init() (0)
#define tfrc_lib_exit()
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 708e75bf623d..30948784dd58 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -53,7 +53,7 @@ extern struct inet_hashinfo dccp_hashinfo;
extern struct percpu_counter dccp_orphan_count;
-extern void dccp_time_wait(struct sock *sk, int state, int timeo);
+void dccp_time_wait(struct sock *sk, int state, int timeo);
/*
* Set safe upper bounds for header and option length. Since Data Offset is 8
@@ -224,114 +224,108 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb)
skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
}
-extern void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
+void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
-extern int dccp_retransmit_skb(struct sock *sk);
+int dccp_retransmit_skb(struct sock *sk);
-extern void dccp_send_ack(struct sock *sk);
-extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
- struct request_sock *rsk);
+void dccp_send_ack(struct sock *sk);
+void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *rsk);
-extern void dccp_send_sync(struct sock *sk, const u64 seq,
- const enum dccp_pkt_type pkt_type);
+void dccp_send_sync(struct sock *sk, const u64 seq,
+ const enum dccp_pkt_type pkt_type);
/*
* TX Packet Dequeueing Interface
*/
-extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
-extern bool dccp_qpolicy_full(struct sock *sk);
-extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
-extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
-extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
-extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
+void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
+bool dccp_qpolicy_full(struct sock *sk);
+void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *dccp_qpolicy_top(struct sock *sk);
+struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
+bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
/*
* TX Packet Output and TX Timers
*/
-extern void dccp_write_xmit(struct sock *sk);
-extern void dccp_write_space(struct sock *sk);
-extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
+void dccp_write_xmit(struct sock *sk);
+void dccp_write_space(struct sock *sk);
+void dccp_flush_write_queue(struct sock *sk, long *time_budget);
-extern void dccp_init_xmit_timers(struct sock *sk);
+void dccp_init_xmit_timers(struct sock *sk);
static inline void dccp_clear_xmit_timers(struct sock *sk)
{
inet_csk_clear_xmit_timers(sk);
}
-extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
+unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
-extern const char *dccp_packet_name(const int type);
+const char *dccp_packet_name(const int type);
-extern void dccp_set_state(struct sock *sk, const int state);
-extern void dccp_done(struct sock *sk);
+void dccp_set_state(struct sock *sk, const int state);
+void dccp_done(struct sock *sk);
-extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
- struct sk_buff const *skb);
+int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
+ struct sk_buff const *skb);
-extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
-extern struct sock *dccp_create_openreq_child(struct sock *sk,
- const struct request_sock *req,
- const struct sk_buff *skb);
+struct sock *dccp_create_openreq_child(struct sock *sk,
+ const struct request_sock *req,
+ const struct sk_buff *skb);
-extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
+int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
-extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
- struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst);
-extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct request_sock **prev);
+struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst);
+struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct request_sock **prev);
-extern int dccp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb);
-extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
- struct dccp_hdr *dh, unsigned int len);
-extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct dccp_hdr *dh, const unsigned int len);
+int dccp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb);
+int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+ struct dccp_hdr *dh, unsigned int len);
+int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ const struct dccp_hdr *dh, const unsigned int len);
-extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
-extern void dccp_destroy_sock(struct sock *sk);
+int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
+void dccp_destroy_sock(struct sock *sk);
-extern void dccp_close(struct sock *sk, long timeout);
-extern struct sk_buff *dccp_make_response(struct sock *sk,
- struct dst_entry *dst,
- struct request_sock *req);
+void dccp_close(struct sock *sk, long timeout);
+struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+ struct request_sock *req);
-extern int dccp_connect(struct sock *sk);
-extern int dccp_disconnect(struct sock *sk, int flags);
-extern int dccp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-extern int dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int dccp_connect(struct sock *sk);
+int dccp_disconnect(struct sock *sk, int flags);
+int dccp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+int dccp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
#ifdef CONFIG_COMPAT
-extern int compat_dccp_getsockopt(struct sock *sk,
- int level, int optname,
- char __user *optval, int __user *optlen);
-extern int compat_dccp_setsockopt(struct sock *sk,
- int level, int optname,
- char __user *optval, unsigned int optlen);
+int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
#endif
-extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
-extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size);
-extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len);
-extern void dccp_shutdown(struct sock *sk, int how);
-extern int inet_dccp_listen(struct socket *sock, int backlog);
-extern unsigned int dccp_poll(struct file *file, struct socket *sock,
- poll_table *wait);
-extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
- int addr_len);
-
-extern struct sk_buff *dccp_ctl_make_reset(struct sock *sk,
- struct sk_buff *skb);
-extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
-extern void dccp_send_close(struct sock *sk, const int active);
-extern int dccp_invalid_packet(struct sk_buff *skb);
-extern u32 dccp_sample_rtt(struct sock *sk, long delta);
+int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t size);
+int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len, int nonblock, int flags,
+ int *addr_len);
+void dccp_shutdown(struct sock *sk, int how);
+int inet_dccp_listen(struct socket *sock, int backlog);
+unsigned int dccp_poll(struct file *file, struct socket *sock,
+ poll_table *wait);
+int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+
+struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb);
+int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
+void dccp_send_close(struct sock *sk, const int active);
+int dccp_invalid_packet(struct sk_buff *skb);
+u32 dccp_sample_rtt(struct sock *sk, long delta);
static inline int dccp_bad_service_code(const struct sock *sk,
const __be32 service)
@@ -475,25 +469,25 @@ static inline int dccp_ack_pending(const struct sock *sk)
return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
}
-extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
-extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
-extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
-extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
- struct sk_buff *skb);
-extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
-extern void dccp_feat_list_purge(struct list_head *fn_list);
-
-extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
-extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
-extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
-extern u32 dccp_timestamp(void);
-extern void dccp_timestamping_init(void);
-extern int dccp_insert_option(struct sk_buff *skb, unsigned char option,
- const void *value, unsigned char len);
+int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
+int dccp_feat_finalise_settings(struct dccp_sock *dp);
+int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
+int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
+ struct sk_buff *skb);
+int dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
+void dccp_feat_list_purge(struct list_head *fn_list);
+
+int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+int dccp_insert_options_rsk(struct dccp_request_sock *, struct sk_buff *);
+int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
+u32 dccp_timestamp(void);
+void dccp_timestamping_init(void);
+int dccp_insert_option(struct sk_buff *skb, unsigned char option,
+ const void *value, unsigned char len);
#ifdef CONFIG_SYSCTL
-extern int dccp_sysctl_init(void);
-extern void dccp_sysctl_exit(void);
+int dccp_sysctl_init(void);
+void dccp_sysctl_exit(void);
#else
static inline int dccp_sysctl_init(void)
{
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 90b957d34d26..0e75cebb2187 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -107,13 +107,13 @@ extern unsigned long sysctl_dccp_sequence_window;
extern int sysctl_dccp_rx_ccid;
extern int sysctl_dccp_tx_ccid;
-extern int dccp_feat_init(struct sock *sk);
-extern void dccp_feat_initialise_sysctls(void);
-extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
- u8 const *list, u8 len);
-extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
- u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
-extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
+int dccp_feat_init(struct sock *sk);
+void dccp_feat_initialise_sysctls(void);
+int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+ u8 const *list, u8 len);
+int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
+ u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
+int dccp_feat_clone_list(struct list_head const *, struct list_head *);
/*
* Encoding variable-length options and their maximum length.
@@ -127,11 +127,11 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
*/
#define DCCP_OPTVAL_MAXLEN 6
-extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
-extern u64 dccp_decode_value_var(const u8 *bf, const u8 len);
-extern u64 dccp_feat_nn_get(struct sock *sk, u8 feat);
+void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
+u64 dccp_decode_value_var(const u8 *bf, const u8 len);
+u64 dccp_feat_nn_get(struct sock *sk, u8 feat);
-extern int dccp_insert_option_mandatory(struct sk_buff *skb);
-extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
- u8 *val, u8 len, bool repeat_first);
+int dccp_insert_option_mandatory(struct sk_buff *skb);
+int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, u8 *val, u8 len,
+ bool repeat_first);
#endif /* _DCCP_FEAT_H */
diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig
new file mode 100644
index 000000000000..0d3d709052ca
--- /dev/null
+++ b/net/hsr/Kconfig
@@ -0,0 +1,27 @@
+#
+# IEC 62439-3 High-availability Seamless Redundancy
+#
+
+config HSR
+ tristate "High-availability Seamless Redundancy (HSR)"
+ ---help---
+ If you say Y here, then your Linux box will be able to act as a
+ DANH ("Doubly attached node implementing HSR"). For this to work,
+ your Linux box needs (at least) two physical Ethernet interfaces,
+ and it must be connected as a node in a ring network together with
+ other HSR capable nodes.
+
+ All Ethernet frames sent over the hsr device will be sent in both
+ directions on the ring (over both slave ports), giving a redundant,
+ instant fail-over network. Each HSR node in the ring acts like a
+ bridge for HSR frames, but filters frames that have been forwarded
+ earlier.
+
+ This code is a "best effort" to comply with the HSR standard as
+ described in IEC 62439-3:2010 (HSRv0), but no compliancy tests have
+ been made.
+
+ You need to perform any and all necessary tests yourself before
+ relying on this code in a safety critical system!
+
+ If unsure, say N.
diff --git a/net/hsr/Makefile b/net/hsr/Makefile
new file mode 100644
index 000000000000..b68359f181cc
--- /dev/null
+++ b/net/hsr/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for HSR
+#
+
+obj-$(CONFIG_HSR) += hsr.o
+
+hsr-y := hsr_main.o hsr_framereg.o hsr_device.o hsr_netlink.o
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
new file mode 100644
index 000000000000..cac505f166d5
--- /dev/null
+++ b/net/hsr/hsr_device.c
@@ -0,0 +1,596 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ *
+ * This file contains device methods for creating, using and destroying
+ * virtual HSR devices.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_sched.h>
+#include "hsr_device.h"
+#include "hsr_framereg.h"
+#include "hsr_main.h"
+
+
+static bool is_admin_up(struct net_device *dev)
+{
+ return dev && (dev->flags & IFF_UP);
+}
+
+static bool is_slave_up(struct net_device *dev)
+{
+ return dev && is_admin_up(dev) && netif_oper_up(dev);
+}
+
+static void __hsr_set_operstate(struct net_device *dev, int transition)
+{
+ write_lock_bh(&dev_base_lock);
+ if (dev->operstate != transition) {
+ dev->operstate = transition;
+ write_unlock_bh(&dev_base_lock);
+ netdev_state_change(dev);
+ } else {
+ write_unlock_bh(&dev_base_lock);
+ }
+}
+
+void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1,
+ struct net_device *slave2)
+{
+ if (!is_admin_up(hsr_dev)) {
+ __hsr_set_operstate(hsr_dev, IF_OPER_DOWN);
+ return;
+ }
+
+ if (is_slave_up(slave1) || is_slave_up(slave2))
+ __hsr_set_operstate(hsr_dev, IF_OPER_UP);
+ else
+ __hsr_set_operstate(hsr_dev, IF_OPER_LOWERLAYERDOWN);
+}
+
+void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1,
+ struct net_device *slave2)
+{
+ if (is_slave_up(slave1) || is_slave_up(slave2))
+ netif_carrier_on(hsr_dev);
+ else
+ netif_carrier_off(hsr_dev);
+}
+
+
+void hsr_check_announce(struct net_device *hsr_dev, int old_operstate)
+{
+ struct hsr_priv *hsr_priv;
+
+ hsr_priv = netdev_priv(hsr_dev);
+
+ if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) {
+ /* Went up */
+ hsr_priv->announce_count = 0;
+ hsr_priv->announce_timer.expires = jiffies +
+ msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+ add_timer(&hsr_priv->announce_timer);
+ }
+
+ if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP))
+ /* Went down */
+ del_timer(&hsr_priv->announce_timer);
+}
+
+
+int hsr_get_max_mtu(struct hsr_priv *hsr_priv)
+{
+ int mtu_max;
+
+ if (hsr_priv->slave[0] && hsr_priv->slave[1])
+ mtu_max = min(hsr_priv->slave[0]->mtu, hsr_priv->slave[1]->mtu);
+ else if (hsr_priv->slave[0])
+ mtu_max = hsr_priv->slave[0]->mtu;
+ else if (hsr_priv->slave[1])
+ mtu_max = hsr_priv->slave[1]->mtu;
+ else
+ mtu_max = HSR_TAGLEN;
+
+ return mtu_max - HSR_TAGLEN;
+}
+
+static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct hsr_priv *hsr_priv;
+
+ hsr_priv = netdev_priv(dev);
+
+ if (new_mtu > hsr_get_max_mtu(hsr_priv)) {
+ netdev_info(hsr_priv->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n",
+ HSR_TAGLEN);
+ return -EINVAL;
+ }
+
+ dev->mtu = new_mtu;
+
+ return 0;
+}
+
+static int hsr_dev_open(struct net_device *dev)
+{
+ struct hsr_priv *hsr_priv;
+ int i;
+ char *slave_name;
+
+ hsr_priv = netdev_priv(dev);
+
+ for (i = 0; i < HSR_MAX_SLAVE; i++) {
+ if (hsr_priv->slave[i])
+ slave_name = hsr_priv->slave[i]->name;
+ else
+ slave_name = "null";
+
+ if (!is_slave_up(hsr_priv->slave[i]))
+ netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a working HSR network\n",
+ 'A' + i, slave_name);
+ }
+
+ return 0;
+}
+
+static int hsr_dev_close(struct net_device *dev)
+{
+ /* Nothing to do here. We could try to restore the state of the slaves
+ * to what they were before being changed by the hsr master dev's state,
+ * but they might have been changed manually in the mean time too, so
+ * taking them up or down here might be confusing and is probably not a
+ * good idea.
+ */
+ return 0;
+}
+
+
+static void hsr_fill_tag(struct hsr_ethhdr *hsr_ethhdr, struct hsr_priv *hsr_priv)
+{
+ unsigned long irqflags;
+
+ /* IEC 62439-1:2010, p 48, says the 4-bit "path" field can take values
+ * between 0001-1001 ("ring identifier", for regular HSR frames),
+ * or 1111 ("HSR management", supervision frames). Unfortunately, the
+ * spec writers forgot to explain what a "ring identifier" is, or
+ * how it is used. So we just set this to 0001 for regular frames,
+ * and 1111 for supervision frames.
+ */
+ set_hsr_tag_path(&hsr_ethhdr->hsr_tag, 0x1);
+
+ /* IEC 62439-1:2010, p 12: "The link service data unit in an Ethernet
+ * frame is the content of the frame located between the Length/Type
+ * field and the Frame Check Sequence."
+ *
+ * IEC 62439-3, p 48, specifies the "original LPDU" to include the
+ * original "LT" field (what "LT" means is not explained anywhere as
+ * far as I can see - perhaps "Length/Type"?). So LSDU_size might
+ * equal original length + 2.
+ * Also, the fact that this field is not used anywhere (might be used
+ * by a RedBox connecting HSR and PRP nets?) means I cannot test its
+ * correctness. Instead of guessing, I set this to 0 here, to make any
+ * problems immediately apparent. Anyone using this driver with PRP/HSR
+ * RedBoxes might need to fix this...
+ */
+ set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, 0);
+
+ spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags);
+ hsr_ethhdr->hsr_tag.sequence_nr = htons(hsr_priv->sequence_nr);
+ hsr_priv->sequence_nr++;
+ spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags);
+
+ hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto;
+
+ hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP);
+}
+
+static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv,
+ enum hsr_dev_idx dev_idx)
+{
+ struct hsr_ethhdr *hsr_ethhdr;
+
+ hsr_ethhdr = (struct hsr_ethhdr *) skb->data;
+
+ skb->dev = hsr_priv->slave[dev_idx];
+
+ hsr_addr_subst_dest(hsr_priv, &hsr_ethhdr->ethhdr, dev_idx);
+
+ /* Address substitution (IEC62439-3 pp 26, 50): replace mac
+ * address of outgoing frame with that of the outgoing slave's.
+ */
+ memcpy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr, ETH_ALEN);
+
+ return dev_queue_xmit(skb);
+}
+
+
+static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct hsr_priv *hsr_priv;
+ struct hsr_ethhdr *hsr_ethhdr;
+ struct sk_buff *skb2;
+ int res1, res2;
+
+ hsr_priv = netdev_priv(dev);
+ hsr_ethhdr = (struct hsr_ethhdr *) skb->data;
+
+ if ((skb->protocol != htons(ETH_P_PRP)) ||
+ (hsr_ethhdr->ethhdr.h_proto != htons(ETH_P_PRP))) {
+ hsr_fill_tag(hsr_ethhdr, hsr_priv);
+ skb->protocol = htons(ETH_P_PRP);
+ }
+
+ skb2 = pskb_copy(skb, GFP_ATOMIC);
+
+ res1 = NET_XMIT_DROP;
+ if (likely(hsr_priv->slave[HSR_DEV_SLAVE_A]))
+ res1 = slave_xmit(skb, hsr_priv, HSR_DEV_SLAVE_A);
+
+ res2 = NET_XMIT_DROP;
+ if (likely(skb2 && hsr_priv->slave[HSR_DEV_SLAVE_B]))
+ res2 = slave_xmit(skb2, hsr_priv, HSR_DEV_SLAVE_B);
+
+ if (likely(res1 == NET_XMIT_SUCCESS || res1 == NET_XMIT_CN ||
+ res2 == NET_XMIT_SUCCESS || res2 == NET_XMIT_CN)) {
+ hsr_priv->dev->stats.tx_packets++;
+ hsr_priv->dev->stats.tx_bytes += skb->len;
+ } else {
+ hsr_priv->dev->stats.tx_dropped++;
+ }
+
+ return NETDEV_TX_OK;
+}
+
+
+static int hsr_header_create(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
+{
+ int res;
+
+ /* Make room for the HSR tag now. We will fill it in later (in
+ * hsr_dev_xmit)
+ */
+ if (skb_headroom(skb) < HSR_TAGLEN + ETH_HLEN)
+ return -ENOBUFS;
+ skb_push(skb, HSR_TAGLEN);
+
+ /* To allow VLAN/HSR combos we should probably use
+ * res = dev_hard_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN);
+ * here instead. It would require other changes too, though - e.g.
+ * separate headers for each slave etc...
+ */
+ res = eth_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN);
+ if (res <= 0)
+ return res;
+ skb_reset_mac_header(skb);
+
+ return res + HSR_TAGLEN;
+}
+
+
+static const struct header_ops hsr_header_ops = {
+ .create = hsr_header_create,
+ .parse = eth_header_parse,
+};
+
+
+/* HSR:2010 supervision frames should be padded so that the whole frame,
+ * including headers and FCS, is 64 bytes (without VLAN).
+ */
+static int hsr_pad(int size)
+{
+ const int min_size = ETH_ZLEN - HSR_TAGLEN - ETH_HLEN;
+
+ if (size >= min_size)
+ return size;
+ return min_size;
+}
+
+static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type)
+{
+ struct hsr_priv *hsr_priv;
+ struct sk_buff *skb;
+ int hlen, tlen;
+ struct hsr_sup_tag *hsr_stag;
+ struct hsr_sup_payload *hsr_sp;
+ unsigned long irqflags;
+
+ hlen = LL_RESERVED_SPACE(hsr_dev);
+ tlen = hsr_dev->needed_tailroom;
+ skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen,
+ GFP_ATOMIC);
+
+ if (skb == NULL)
+ return;
+
+ hsr_priv = netdev_priv(hsr_dev);
+
+ skb_reserve(skb, hlen);
+
+ skb->dev = hsr_dev;
+ skb->protocol = htons(ETH_P_PRP);
+ skb->priority = TC_PRIO_CONTROL;
+
+ if (dev_hard_header(skb, skb->dev, ETH_P_PRP,
+ hsr_priv->sup_multicast_addr,
+ skb->dev->dev_addr, skb->len) < 0)
+ goto out;
+
+ skb_pull(skb, sizeof(struct ethhdr));
+ hsr_stag = (typeof(hsr_stag)) skb->data;
+
+ set_hsr_stag_path(hsr_stag, 0xf);
+ set_hsr_stag_HSR_Ver(hsr_stag, 0);
+
+ spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags);
+ hsr_stag->sequence_nr = htons(hsr_priv->sequence_nr);
+ hsr_priv->sequence_nr++;
+ spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags);
+
+ hsr_stag->HSR_TLV_Type = type;
+ hsr_stag->HSR_TLV_Length = 12;
+
+ skb_push(skb, sizeof(struct ethhdr));
+
+ /* Payload: MacAddressA */
+ hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp));
+ memcpy(hsr_sp->MacAddressA, hsr_dev->dev_addr, ETH_ALEN);
+
+ dev_queue_xmit(skb);
+ return;
+
+out:
+ kfree_skb(skb);
+}
+
+
+/* Announce (supervision frame) timer function
+ */
+static void hsr_announce(unsigned long data)
+{
+ struct hsr_priv *hsr_priv;
+
+ hsr_priv = (struct hsr_priv *) data;
+
+ if (hsr_priv->announce_count < 3) {
+ send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_ANNOUNCE);
+ hsr_priv->announce_count++;
+ } else {
+ send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_LIFE_CHECK);
+ }
+
+ if (hsr_priv->announce_count < 3)
+ hsr_priv->announce_timer.expires = jiffies +
+ msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+ else
+ hsr_priv->announce_timer.expires = jiffies +
+ msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+
+ if (is_admin_up(hsr_priv->dev))
+ add_timer(&hsr_priv->announce_timer);
+}
+
+
+static void restore_slaves(struct net_device *hsr_dev)
+{
+ struct hsr_priv *hsr_priv;
+ int i;
+ int res;
+
+ hsr_priv = netdev_priv(hsr_dev);
+
+ rtnl_lock();
+
+ /* Restore promiscuity */
+ for (i = 0; i < HSR_MAX_SLAVE; i++) {
+ if (!hsr_priv->slave[i])
+ continue;
+ res = dev_set_promiscuity(hsr_priv->slave[i], -1);
+ if (res)
+ netdev_info(hsr_dev,
+ "Cannot restore slave promiscuity (%s, %d)\n",
+ hsr_priv->slave[i]->name, res);
+ }
+
+ rtnl_unlock();
+}
+
+static void reclaim_hsr_dev(struct rcu_head *rh)
+{
+ struct hsr_priv *hsr_priv;
+
+ hsr_priv = container_of(rh, struct hsr_priv, rcu_head);
+ free_netdev(hsr_priv->dev);
+}
+
+
+/* According to comments in the declaration of struct net_device, this function
+ * is "Called from unregister, can be used to call free_netdev". Ok then...
+ */
+static void hsr_dev_destroy(struct net_device *hsr_dev)
+{
+ struct hsr_priv *hsr_priv;
+
+ hsr_priv = netdev_priv(hsr_dev);
+
+ del_timer(&hsr_priv->announce_timer);
+ unregister_hsr_master(hsr_priv); /* calls list_del_rcu on hsr_priv */
+ restore_slaves(hsr_dev);
+ call_rcu(&hsr_priv->rcu_head, reclaim_hsr_dev); /* reclaim hsr_priv */
+}
+
+static const struct net_device_ops hsr_device_ops = {
+ .ndo_change_mtu = hsr_dev_change_mtu,
+ .ndo_open = hsr_dev_open,
+ .ndo_stop = hsr_dev_close,
+ .ndo_start_xmit = hsr_dev_xmit,
+};
+
+
+void hsr_dev_setup(struct net_device *dev)
+{
+ random_ether_addr(dev->dev_addr);
+
+ ether_setup(dev);
+ dev->header_ops = &hsr_header_ops;
+ dev->netdev_ops = &hsr_device_ops;
+ dev->tx_queue_len = 0;
+
+ dev->destructor = hsr_dev_destroy;
+}
+
+
+/* Return true if dev is a HSR master; return false otherwise.
+ */
+bool is_hsr_master(struct net_device *dev)
+{
+ return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit);
+}
+
+static int check_slave_ok(struct net_device *dev)
+{
+ /* Don't allow HSR on non-ethernet like devices */
+ if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) ||
+ (dev->addr_len != ETH_ALEN)) {
+ netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n");
+ return -EINVAL;
+ }
+
+ /* Don't allow enslaving hsr devices */
+ if (is_hsr_master(dev)) {
+ netdev_info(dev, "Cannot create trees of HSR devices.\n");
+ return -EINVAL;
+ }
+
+ if (is_hsr_slave(dev)) {
+ netdev_info(dev, "This device is already a HSR slave.\n");
+ return -EINVAL;
+ }
+
+ if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n");
+ return -EINVAL;
+ }
+
+ /* HSR over bonded devices has not been tested, but I'm not sure it
+ * won't work...
+ */
+
+ return 0;
+}
+
+
+/* Default multicast address for HSR Supervision frames */
+static const unsigned char def_multicast_addr[ETH_ALEN] = {
+ 0x01, 0x15, 0x4e, 0x00, 0x01, 0x00
+};
+
+int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+ unsigned char multicast_spec)
+{
+ struct hsr_priv *hsr_priv;
+ int i;
+ int res;
+
+ hsr_priv = netdev_priv(hsr_dev);
+ hsr_priv->dev = hsr_dev;
+ INIT_LIST_HEAD(&hsr_priv->node_db);
+ INIT_LIST_HEAD(&hsr_priv->self_node_db);
+ for (i = 0; i < HSR_MAX_SLAVE; i++)
+ hsr_priv->slave[i] = slave[i];
+
+ spin_lock_init(&hsr_priv->seqnr_lock);
+ /* Overflow soon to find bugs easier: */
+ hsr_priv->sequence_nr = USHRT_MAX - 1024;
+
+ init_timer(&hsr_priv->announce_timer);
+ hsr_priv->announce_timer.function = hsr_announce;
+ hsr_priv->announce_timer.data = (unsigned long) hsr_priv;
+
+ memcpy(hsr_priv->sup_multicast_addr, def_multicast_addr, ETH_ALEN);
+ hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
+
+/* FIXME: should I modify the value of these?
+ *
+ * - hsr_dev->flags - i.e.
+ * IFF_MASTER/SLAVE?
+ * - hsr_dev->priv_flags - i.e.
+ * IFF_EBRIDGE?
+ * IFF_TX_SKB_SHARING?
+ * IFF_HSR_MASTER/SLAVE?
+ */
+
+ for (i = 0; i < HSR_MAX_SLAVE; i++) {
+ res = check_slave_ok(slave[i]);
+ if (res)
+ return res;
+ }
+
+ hsr_dev->features = slave[0]->features & slave[1]->features;
+ /* Prevent recursive tx locking */
+ hsr_dev->features |= NETIF_F_LLTX;
+ /* VLAN on top of HSR needs testing and probably some work on
+ * hsr_header_create() etc.
+ */
+ hsr_dev->features |= NETIF_F_VLAN_CHALLENGED;
+
+ /* Set hsr_dev's MAC address to that of mac_slave1 */
+ memcpy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr, ETH_ALEN);
+
+ /* Set required header length */
+ for (i = 0; i < HSR_MAX_SLAVE; i++) {
+ if (slave[i]->hard_header_len + HSR_TAGLEN >
+ hsr_dev->hard_header_len)
+ hsr_dev->hard_header_len =
+ slave[i]->hard_header_len + HSR_TAGLEN;
+ }
+
+ /* MTU */
+ for (i = 0; i < HSR_MAX_SLAVE; i++)
+ if (slave[i]->mtu - HSR_TAGLEN < hsr_dev->mtu)
+ hsr_dev->mtu = slave[i]->mtu - HSR_TAGLEN;
+
+ /* Make sure the 1st call to netif_carrier_on() gets through */
+ netif_carrier_off(hsr_dev);
+
+ /* Promiscuity */
+ for (i = 0; i < HSR_MAX_SLAVE; i++) {
+ res = dev_set_promiscuity(slave[i], 1);
+ if (res) {
+ netdev_info(hsr_dev, "Cannot set slave promiscuity (%s, %d)\n",
+ slave[i]->name, res);
+ goto fail;
+ }
+ }
+
+ /* Make sure we recognize frames from ourselves in hsr_rcv() */
+ res = hsr_create_self_node(&hsr_priv->self_node_db,
+ hsr_dev->dev_addr,
+ hsr_priv->slave[1]->dev_addr);
+ if (res < 0)
+ goto fail;
+
+ res = register_netdevice(hsr_dev);
+ if (res)
+ goto fail;
+
+ register_hsr_master(hsr_priv);
+
+ return 0;
+
+fail:
+ restore_slaves(hsr_dev);
+ return res;
+}
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
new file mode 100644
index 000000000000..2c7148e73914
--- /dev/null
+++ b/net/hsr/hsr_device.h
@@ -0,0 +1,29 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ */
+
+#ifndef __HSR_DEVICE_H
+#define __HSR_DEVICE_H
+
+#include <linux/netdevice.h>
+#include "hsr_main.h"
+
+void hsr_dev_setup(struct net_device *dev);
+int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+ unsigned char multicast_spec);
+void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1,
+ struct net_device *slave2);
+void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1,
+ struct net_device *slave2);
+void hsr_check_announce(struct net_device *hsr_dev, int old_operstate);
+bool is_hsr_master(struct net_device *dev);
+int hsr_get_max_mtu(struct hsr_priv *hsr_priv);
+
+#endif /* __HSR_DEVICE_H */
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
new file mode 100644
index 000000000000..003f5bb3acd2
--- /dev/null
+++ b/net/hsr/hsr_framereg.c
@@ -0,0 +1,503 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ *
+ * The HSR spec says never to forward the same frame twice on the same
+ * interface. A frame is identified by its source MAC address and its HSR
+ * sequence number. This code keeps track of senders and their sequence numbers
+ * to allow filtering of duplicate frames, and to detect HSR ring errors.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include "hsr_main.h"
+#include "hsr_framereg.h"
+#include "hsr_netlink.h"
+
+
+struct node_entry {
+ struct list_head mac_list;
+ unsigned char MacAddressA[ETH_ALEN];
+ unsigned char MacAddressB[ETH_ALEN];
+ enum hsr_dev_idx AddrB_if; /* The local slave through which AddrB
+ * frames are received from this node
+ */
+ unsigned long time_in[HSR_MAX_SLAVE];
+ bool time_in_stale[HSR_MAX_SLAVE];
+ u16 seq_out[HSR_MAX_DEV];
+ struct rcu_head rcu_head;
+};
+
+/* TODO: use hash lists for mac addresses (linux/jhash.h)? */
+
+
+
+/* Search for mac entry. Caller must hold rcu read lock.
+ */
+static struct node_entry *find_node_by_AddrA(struct list_head *node_db,
+ const unsigned char addr[ETH_ALEN])
+{
+ struct node_entry *node;
+
+ list_for_each_entry_rcu(node, node_db, mac_list) {
+ if (ether_addr_equal(node->MacAddressA, addr))
+ return node;
+ }
+
+ return NULL;
+}
+
+
+/* Search for mac entry. Caller must hold rcu read lock.
+ */
+static struct node_entry *find_node_by_AddrB(struct list_head *node_db,
+ const unsigned char addr[ETH_ALEN])
+{
+ struct node_entry *node;
+
+ list_for_each_entry_rcu(node, node_db, mac_list) {
+ if (ether_addr_equal(node->MacAddressB, addr))
+ return node;
+ }
+
+ return NULL;
+}
+
+
+/* Search for mac entry. Caller must hold rcu read lock.
+ */
+struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb)
+{
+ struct node_entry *node;
+ struct ethhdr *ethhdr;
+
+ if (!skb_mac_header_was_set(skb))
+ return NULL;
+
+ ethhdr = (struct ethhdr *) skb_mac_header(skb);
+
+ list_for_each_entry_rcu(node, node_db, mac_list) {
+ if (ether_addr_equal(node->MacAddressA, ethhdr->h_source))
+ return node;
+ if (ether_addr_equal(node->MacAddressB, ethhdr->h_source))
+ return node;
+ }
+
+ return NULL;
+}
+
+
+/* Helper for device init; the self_node_db is used in hsr_rcv() to recognize
+ * frames from self that's been looped over the HSR ring.
+ */
+int hsr_create_self_node(struct list_head *self_node_db,
+ unsigned char addr_a[ETH_ALEN],
+ unsigned char addr_b[ETH_ALEN])
+{
+ struct node_entry *node, *oldnode;
+
+ node = kmalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ memcpy(node->MacAddressA, addr_a, ETH_ALEN);
+ memcpy(node->MacAddressB, addr_b, ETH_ALEN);
+
+ rcu_read_lock();
+ oldnode = list_first_or_null_rcu(self_node_db,
+ struct node_entry, mac_list);
+ if (oldnode) {
+ list_replace_rcu(&oldnode->mac_list, &node->mac_list);
+ rcu_read_unlock();
+ synchronize_rcu();
+ kfree(oldnode);
+ } else {
+ rcu_read_unlock();
+ list_add_tail_rcu(&node->mac_list, self_node_db);
+ }
+
+ return 0;
+}
+
+static void node_entry_reclaim(struct rcu_head *rh)
+{
+ kfree(container_of(rh, struct node_entry, rcu_head));
+}
+
+
+/* Add/merge node to the database of nodes. 'skb' must contain an HSR
+ * supervision frame.
+ * - If the supervision header's MacAddressA field is not yet in the database,
+ * this frame is from an hitherto unknown node - add it to the database.
+ * - If the sender's MAC address is not the same as its MacAddressA address,
+ * the node is using PICS_SUBS (address substitution). Record the sender's
+ * address as the node's MacAddressB.
+ *
+ * This function needs to work even if the sender node has changed one of its
+ * slaves' MAC addresses. In this case, there are four different cases described
+ * by (Addr-changed, received-from) pairs as follows. Note that changing the
+ * SlaveA address is equal to changing the node's own address:
+ *
+ * - (AddrB, SlaveB): The new AddrB will be recorded by PICS_SUBS code since
+ * node == NULL.
+ * - (AddrB, SlaveA): Will work as usual (the AddrB change won't be detected
+ * from this frame).
+ *
+ * - (AddrA, SlaveB): The old node will be found. We need to detect this and
+ * remove the node.
+ * - (AddrA, SlaveA): A new node will be registered (non-PICS_SUBS at first).
+ * The old one will be pruned after HSR_NODE_FORGET_TIME.
+ *
+ * We also need to detect if the sender's SlaveA and SlaveB cables have been
+ * swapped.
+ */
+struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
+ struct node_entry *node,
+ struct sk_buff *skb,
+ enum hsr_dev_idx dev_idx)
+{
+ struct hsr_sup_payload *hsr_sp;
+ struct hsr_ethhdr_sp *hsr_ethsup;
+ int i;
+ unsigned long now;
+
+ hsr_ethsup = (struct hsr_ethhdr_sp *) skb_mac_header(skb);
+ hsr_sp = (struct hsr_sup_payload *) skb->data;
+
+ if (node && !ether_addr_equal(node->MacAddressA, hsr_sp->MacAddressA)) {
+ /* Node has changed its AddrA, frame was received from SlaveB */
+ list_del_rcu(&node->mac_list);
+ call_rcu(&node->rcu_head, node_entry_reclaim);
+ node = NULL;
+ }
+
+ if (node && (dev_idx == node->AddrB_if) &&
+ !ether_addr_equal(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) {
+ /* Cables have been swapped */
+ list_del_rcu(&node->mac_list);
+ call_rcu(&node->rcu_head, node_entry_reclaim);
+ node = NULL;
+ }
+
+ if (node && (dev_idx != node->AddrB_if) &&
+ (node->AddrB_if != HSR_DEV_NONE) &&
+ !ether_addr_equal(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) {
+ /* Cables have been swapped */
+ list_del_rcu(&node->mac_list);
+ call_rcu(&node->rcu_head, node_entry_reclaim);
+ node = NULL;
+ }
+
+ if (node)
+ return node;
+
+ node = find_node_by_AddrA(&hsr_priv->node_db, hsr_sp->MacAddressA);
+ if (node) {
+ /* Node is known, but frame was received from an unknown
+ * address. Node is PICS_SUBS capable; merge its AddrB.
+ */
+ memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN);
+ node->AddrB_if = dev_idx;
+ return node;
+ }
+
+ node = kzalloc(sizeof(*node), GFP_ATOMIC);
+ if (!node)
+ return NULL;
+
+ memcpy(node->MacAddressA, hsr_sp->MacAddressA, ETH_ALEN);
+ memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN);
+ if (!ether_addr_equal(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source))
+ node->AddrB_if = dev_idx;
+ else
+ node->AddrB_if = HSR_DEV_NONE;
+
+ /* We are only interested in time diffs here, so use current jiffies
+ * as initialization. (0 could trigger an spurious ring error warning).
+ */
+ now = jiffies;
+ for (i = 0; i < HSR_MAX_SLAVE; i++)
+ node->time_in[i] = now;
+ for (i = 0; i < HSR_MAX_DEV; i++)
+ node->seq_out[i] = ntohs(hsr_ethsup->hsr_sup.sequence_nr) - 1;
+
+ list_add_tail_rcu(&node->mac_list, &hsr_priv->node_db);
+
+ return node;
+}
+
+
+/* 'skb' is a frame meant for this host, that is to be passed to upper layers.
+ *
+ * If the frame was sent by a node's B interface, replace the sender
+ * address with that node's "official" address (MacAddressA) so that upper
+ * layers recognize where it came from.
+ */
+void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb)
+{
+ struct ethhdr *ethhdr;
+ struct node_entry *node;
+
+ if (!skb_mac_header_was_set(skb)) {
+ WARN_ONCE(1, "%s: Mac header not set\n", __func__);
+ return;
+ }
+ ethhdr = (struct ethhdr *) skb_mac_header(skb);
+
+ rcu_read_lock();
+ node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source);
+ if (node)
+ memcpy(ethhdr->h_source, node->MacAddressA, ETH_ALEN);
+ rcu_read_unlock();
+}
+
+
+/* 'skb' is a frame meant for another host.
+ * 'hsr_dev_idx' is the HSR index of the outgoing device
+ *
+ * Substitute the target (dest) MAC address if necessary, so the it matches the
+ * recipient interface MAC address, regardless of whether that is the
+ * recipient's A or B interface.
+ * This is needed to keep the packets flowing through switches that learn on
+ * which "side" the different interfaces are.
+ */
+void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr,
+ enum hsr_dev_idx dev_idx)
+{
+ struct node_entry *node;
+
+ rcu_read_lock();
+ node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest);
+ if (node && (node->AddrB_if == dev_idx))
+ memcpy(ethhdr->h_dest, node->MacAddressB, ETH_ALEN);
+ rcu_read_unlock();
+}
+
+
+/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
+ * false otherwise.
+ */
+static bool seq_nr_after(u16 a, u16 b)
+{
+ /* Remove inconsistency where
+ * seq_nr_after(a, b) == seq_nr_before(a, b) */
+ if ((int) b - a == 32768)
+ return false;
+
+ return (((s16) (b - a)) < 0);
+}
+#define seq_nr_before(a, b) seq_nr_after((b), (a))
+#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b)))
+#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b)))
+
+
+void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx)
+{
+ if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) {
+ WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx);
+ return;
+ }
+ node->time_in[dev_idx] = jiffies;
+ node->time_in_stale[dev_idx] = false;
+}
+
+
+/* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid
+ * ethhdr->h_source address and skb->mac_header set.
+ *
+ * Return:
+ * 1 if frame can be shown to have been sent recently on this interface,
+ * 0 otherwise, or
+ * negative error code on error
+ */
+int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx,
+ struct sk_buff *skb)
+{
+ struct hsr_ethhdr *hsr_ethhdr;
+ u16 sequence_nr;
+
+ if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) {
+ WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx);
+ return -EINVAL;
+ }
+ if (!skb_mac_header_was_set(skb)) {
+ WARN_ONCE(1, "%s: Mac header not set\n", __func__);
+ return -EINVAL;
+ }
+ hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb);
+
+ sequence_nr = ntohs(hsr_ethhdr->hsr_tag.sequence_nr);
+ if (seq_nr_before_or_eq(sequence_nr, node->seq_out[dev_idx]))
+ return 1;
+
+ node->seq_out[dev_idx] = sequence_nr;
+ return 0;
+}
+
+
+
+static bool is_late(struct node_entry *node, enum hsr_dev_idx dev_idx)
+{
+ enum hsr_dev_idx other;
+
+ if (node->time_in_stale[dev_idx])
+ return true;
+
+ if (dev_idx == HSR_DEV_SLAVE_A)
+ other = HSR_DEV_SLAVE_B;
+ else
+ other = HSR_DEV_SLAVE_A;
+
+ if (node->time_in_stale[other])
+ return false;
+
+ if (time_after(node->time_in[other], node->time_in[dev_idx] +
+ msecs_to_jiffies(MAX_SLAVE_DIFF)))
+ return true;
+
+ return false;
+}
+
+
+/* Remove stale sequence_nr records. Called by timer every
+ * HSR_LIFE_CHECK_INTERVAL (two seconds or so).
+ */
+void hsr_prune_nodes(struct hsr_priv *hsr_priv)
+{
+ struct node_entry *node;
+ unsigned long timestamp;
+ unsigned long time_a, time_b;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(node, &hsr_priv->node_db, mac_list) {
+ /* Shorthand */
+ time_a = node->time_in[HSR_DEV_SLAVE_A];
+ time_b = node->time_in[HSR_DEV_SLAVE_B];
+
+ /* Check for timestamps old enough to risk wrap-around */
+ if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET/2))
+ node->time_in_stale[HSR_DEV_SLAVE_A] = true;
+ if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET/2))
+ node->time_in_stale[HSR_DEV_SLAVE_B] = true;
+
+ /* Get age of newest frame from node.
+ * At least one time_in is OK here; nodes get pruned long
+ * before both time_ins can get stale
+ */
+ timestamp = time_a;
+ if (node->time_in_stale[HSR_DEV_SLAVE_A] ||
+ (!node->time_in_stale[HSR_DEV_SLAVE_B] &&
+ time_after(time_b, time_a)))
+ timestamp = time_b;
+
+ /* Warn of ring error only as long as we get frames at all */
+ if (time_is_after_jiffies(timestamp +
+ msecs_to_jiffies(1.5*MAX_SLAVE_DIFF))) {
+
+ if (is_late(node, HSR_DEV_SLAVE_A))
+ hsr_nl_ringerror(hsr_priv, node->MacAddressA,
+ HSR_DEV_SLAVE_A);
+ else if (is_late(node, HSR_DEV_SLAVE_B))
+ hsr_nl_ringerror(hsr_priv, node->MacAddressA,
+ HSR_DEV_SLAVE_B);
+ }
+
+ /* Prune old entries */
+ if (time_is_before_jiffies(timestamp +
+ msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
+ hsr_nl_nodedown(hsr_priv, node->MacAddressA);
+ list_del_rcu(&node->mac_list);
+ /* Note that we need to free this entry later: */
+ call_rcu(&node->rcu_head, node_entry_reclaim);
+ }
+ }
+ rcu_read_unlock();
+}
+
+
+void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos,
+ unsigned char addr[ETH_ALEN])
+{
+ struct node_entry *node;
+
+ if (!_pos) {
+ node = list_first_or_null_rcu(&hsr_priv->node_db,
+ struct node_entry, mac_list);
+ if (node)
+ memcpy(addr, node->MacAddressA, ETH_ALEN);
+ return node;
+ }
+
+ node = _pos;
+ list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) {
+ memcpy(addr, node->MacAddressA, ETH_ALEN);
+ return node;
+ }
+
+ return NULL;
+}
+
+
+int hsr_get_node_data(struct hsr_priv *hsr_priv,
+ const unsigned char *addr,
+ unsigned char addr_b[ETH_ALEN],
+ unsigned int *addr_b_ifindex,
+ int *if1_age,
+ u16 *if1_seq,
+ int *if2_age,
+ u16 *if2_seq)
+{
+ struct node_entry *node;
+ unsigned long tdiff;
+
+
+ rcu_read_lock();
+ node = find_node_by_AddrA(&hsr_priv->node_db, addr);
+ if (!node) {
+ rcu_read_unlock();
+ return -ENOENT; /* No such entry */
+ }
+
+ memcpy(addr_b, node->MacAddressB, ETH_ALEN);
+
+ tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A];
+ if (node->time_in_stale[HSR_DEV_SLAVE_A])
+ *if1_age = INT_MAX;
+#if HZ <= MSEC_PER_SEC
+ else if (tdiff > msecs_to_jiffies(INT_MAX))
+ *if1_age = INT_MAX;
+#endif
+ else
+ *if1_age = jiffies_to_msecs(tdiff);
+
+ tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_B];
+ if (node->time_in_stale[HSR_DEV_SLAVE_B])
+ *if2_age = INT_MAX;
+#if HZ <= MSEC_PER_SEC
+ else if (tdiff > msecs_to_jiffies(INT_MAX))
+ *if2_age = INT_MAX;
+#endif
+ else
+ *if2_age = jiffies_to_msecs(tdiff);
+
+ /* Present sequence numbers as if they were incoming on interface */
+ *if1_seq = node->seq_out[HSR_DEV_SLAVE_B];
+ *if2_seq = node->seq_out[HSR_DEV_SLAVE_A];
+
+ if ((node->AddrB_if != HSR_DEV_NONE) && hsr_priv->slave[node->AddrB_if])
+ *addr_b_ifindex = hsr_priv->slave[node->AddrB_if]->ifindex;
+ else
+ *addr_b_ifindex = -1;
+
+ rcu_read_unlock();
+
+ return 0;
+}
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
new file mode 100644
index 000000000000..e6c4022030ad
--- /dev/null
+++ b/net/hsr/hsr_framereg.h
@@ -0,0 +1,53 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ */
+
+#ifndef _HSR_FRAMEREG_H
+#define _HSR_FRAMEREG_H
+
+#include "hsr_main.h"
+
+struct node_entry;
+
+struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb);
+
+struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
+ struct node_entry *node,
+ struct sk_buff *skb,
+ enum hsr_dev_idx dev_idx);
+
+void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb);
+void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr,
+ enum hsr_dev_idx dev_idx);
+
+void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx);
+
+int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx,
+ struct sk_buff *skb);
+
+void hsr_prune_nodes(struct hsr_priv *hsr_priv);
+
+int hsr_create_self_node(struct list_head *self_node_db,
+ unsigned char addr_a[ETH_ALEN],
+ unsigned char addr_b[ETH_ALEN]);
+
+void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos,
+ unsigned char addr[ETH_ALEN]);
+
+int hsr_get_node_data(struct hsr_priv *hsr_priv,
+ const unsigned char *addr,
+ unsigned char addr_b[ETH_ALEN],
+ unsigned int *addr_b_ifindex,
+ int *if1_age,
+ u16 *if1_seq,
+ int *if2_age,
+ u16 *if2_seq);
+
+#endif /* _HSR_FRAMEREG_H */
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
new file mode 100644
index 000000000000..af68dd83a4e3
--- /dev/null
+++ b/net/hsr/hsr_main.c
@@ -0,0 +1,469 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ *
+ * In addition to routines for registering and unregistering HSR support, this
+ * file also contains the receive routine that handles all incoming frames with
+ * Ethertype (protocol) ETH_P_PRP (HSRv0), and network device event handling.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/timer.h>
+#include <linux/etherdevice.h>
+#include "hsr_main.h"
+#include "hsr_device.h"
+#include "hsr_netlink.h"
+#include "hsr_framereg.h"
+
+
+/* List of all registered virtual HSR devices */
+static LIST_HEAD(hsr_list);
+
+void register_hsr_master(struct hsr_priv *hsr_priv)
+{
+ list_add_tail_rcu(&hsr_priv->hsr_list, &hsr_list);
+}
+
+void unregister_hsr_master(struct hsr_priv *hsr_priv)
+{
+ struct hsr_priv *hsr_priv_it;
+
+ list_for_each_entry(hsr_priv_it, &hsr_list, hsr_list)
+ if (hsr_priv_it == hsr_priv) {
+ list_del_rcu(&hsr_priv_it->hsr_list);
+ return;
+ }
+}
+
+bool is_hsr_slave(struct net_device *dev)
+{
+ struct hsr_priv *hsr_priv_it;
+
+ list_for_each_entry_rcu(hsr_priv_it, &hsr_list, hsr_list) {
+ if (dev == hsr_priv_it->slave[0])
+ return true;
+ if (dev == hsr_priv_it->slave[1])
+ return true;
+ }
+
+ return false;
+}
+
+
+/* If dev is a HSR slave device, return the virtual master device. Return NULL
+ * otherwise.
+ */
+static struct hsr_priv *get_hsr_master(struct net_device *dev)
+{
+ struct hsr_priv *hsr_priv;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list)
+ if ((dev == hsr_priv->slave[0]) ||
+ (dev == hsr_priv->slave[1])) {
+ rcu_read_unlock();
+ return hsr_priv;
+ }
+
+ rcu_read_unlock();
+ return NULL;
+}
+
+
+/* If dev is a HSR slave device, return the other slave device. Return NULL
+ * otherwise.
+ */
+static struct net_device *get_other_slave(struct hsr_priv *hsr_priv,
+ struct net_device *dev)
+{
+ if (dev == hsr_priv->slave[0])
+ return hsr_priv->slave[1];
+ if (dev == hsr_priv->slave[1])
+ return hsr_priv->slave[0];
+
+ return NULL;
+}
+
+
+static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *slave, *other_slave;
+ struct hsr_priv *hsr_priv;
+ int old_operstate;
+ int mtu_max;
+ int res;
+ struct net_device *dev;
+
+ dev = netdev_notifier_info_to_dev(ptr);
+
+ hsr_priv = get_hsr_master(dev);
+ if (hsr_priv) {
+ /* dev is a slave device */
+ slave = dev;
+ other_slave = get_other_slave(hsr_priv, slave);
+ } else {
+ if (!is_hsr_master(dev))
+ return NOTIFY_DONE;
+ hsr_priv = netdev_priv(dev);
+ slave = hsr_priv->slave[0];
+ other_slave = hsr_priv->slave[1];
+ }
+
+ switch (event) {
+ case NETDEV_UP: /* Administrative state DOWN */
+ case NETDEV_DOWN: /* Administrative state UP */
+ case NETDEV_CHANGE: /* Link (carrier) state changes */
+ old_operstate = hsr_priv->dev->operstate;
+ hsr_set_carrier(hsr_priv->dev, slave, other_slave);
+ /* netif_stacked_transfer_operstate() cannot be used here since
+ * it doesn't set IF_OPER_LOWERLAYERDOWN (?)
+ */
+ hsr_set_operstate(hsr_priv->dev, slave, other_slave);
+ hsr_check_announce(hsr_priv->dev, old_operstate);
+ break;
+ case NETDEV_CHANGEADDR:
+
+ /* This should not happen since there's no ndo_set_mac_address()
+ * for HSR devices - i.e. not supported.
+ */
+ if (dev == hsr_priv->dev)
+ break;
+
+ if (dev == hsr_priv->slave[0])
+ memcpy(hsr_priv->dev->dev_addr,
+ hsr_priv->slave[0]->dev_addr, ETH_ALEN);
+
+ /* Make sure we recognize frames from ourselves in hsr_rcv() */
+ res = hsr_create_self_node(&hsr_priv->self_node_db,
+ hsr_priv->dev->dev_addr,
+ hsr_priv->slave[1] ?
+ hsr_priv->slave[1]->dev_addr :
+ hsr_priv->dev->dev_addr);
+ if (res)
+ netdev_warn(hsr_priv->dev,
+ "Could not update HSR node address.\n");
+
+ if (dev == hsr_priv->slave[0])
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, hsr_priv->dev);
+ break;
+ case NETDEV_CHANGEMTU:
+ if (dev == hsr_priv->dev)
+ break; /* Handled in ndo_change_mtu() */
+ mtu_max = hsr_get_max_mtu(hsr_priv);
+ if (hsr_priv->dev->mtu > mtu_max)
+ dev_set_mtu(hsr_priv->dev, mtu_max);
+ break;
+ case NETDEV_UNREGISTER:
+ if (dev == hsr_priv->slave[0])
+ hsr_priv->slave[0] = NULL;
+ if (dev == hsr_priv->slave[1])
+ hsr_priv->slave[1] = NULL;
+
+ /* There should really be a way to set a new slave device... */
+
+ break;
+ case NETDEV_PRE_TYPE_CHANGE:
+ /* HSR works only on Ethernet devices. Refuse slave to change
+ * its type.
+ */
+ return NOTIFY_BAD;
+ }
+
+ return NOTIFY_DONE;
+}
+
+
+static struct timer_list prune_timer;
+
+static void prune_nodes_all(unsigned long data)
+{
+ struct hsr_priv *hsr_priv;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list)
+ hsr_prune_nodes(hsr_priv);
+ rcu_read_unlock();
+
+ prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD);
+ add_timer(&prune_timer);
+}
+
+
+static struct sk_buff *hsr_pull_tag(struct sk_buff *skb)
+{
+ struct hsr_tag *hsr_tag;
+ struct sk_buff *skb2;
+
+ skb2 = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb2))
+ goto err_free;
+ skb = skb2;
+
+ if (unlikely(!pskb_may_pull(skb, HSR_TAGLEN)))
+ goto err_free;
+
+ hsr_tag = (struct hsr_tag *) skb->data;
+ skb->protocol = hsr_tag->encap_proto;
+ skb_pull(skb, HSR_TAGLEN);
+
+ return skb;
+
+err_free:
+ kfree_skb(skb);
+ return NULL;
+}
+
+
+/* The uses I can see for these HSR supervision frames are:
+ * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type =
+ * 22") to reset any sequence_nr counters belonging to that node. Useful if
+ * the other node's counter has been reset for some reason.
+ * --
+ * Or not - resetting the counter and bridging the frame would create a
+ * loop, unfortunately.
+ *
+ * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck
+ * frame is received from a particular node, we know something is wrong.
+ * We just register these (as with normal frames) and throw them away.
+ *
+ * 3) Allow different MAC addresses for the two slave interfaces, using the
+ * MacAddressA field.
+ */
+static bool is_supervision_frame(struct hsr_priv *hsr_priv, struct sk_buff *skb)
+{
+ struct hsr_sup_tag *hsr_stag;
+
+ if (!ether_addr_equal(eth_hdr(skb)->h_dest,
+ hsr_priv->sup_multicast_addr))
+ return false;
+
+ hsr_stag = (struct hsr_sup_tag *) skb->data;
+ if (get_hsr_stag_path(hsr_stag) != 0x0f)
+ return false;
+ if ((hsr_stag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) &&
+ (hsr_stag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK))
+ return false;
+ if (hsr_stag->HSR_TLV_Length != 12)
+ return false;
+
+ return true;
+}
+
+
+/* Implementation somewhat according to IEC-62439-3, p. 43
+ */
+static int hsr_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct hsr_priv *hsr_priv;
+ struct net_device *other_slave;
+ struct node_entry *node;
+ bool deliver_to_self;
+ struct sk_buff *skb_deliver;
+ enum hsr_dev_idx dev_in_idx, dev_other_idx;
+ bool dup_out;
+ int ret;
+
+ hsr_priv = get_hsr_master(dev);
+
+ if (!hsr_priv) {
+ /* Non-HSR-slave device 'dev' is connected to a HSR network */
+ kfree_skb(skb);
+ dev->stats.rx_errors++;
+ return NET_RX_SUCCESS;
+ }
+
+ if (dev == hsr_priv->slave[0]) {
+ dev_in_idx = HSR_DEV_SLAVE_A;
+ dev_other_idx = HSR_DEV_SLAVE_B;
+ } else {
+ dev_in_idx = HSR_DEV_SLAVE_B;
+ dev_other_idx = HSR_DEV_SLAVE_A;
+ }
+
+ node = hsr_find_node(&hsr_priv->self_node_db, skb);
+ if (node) {
+ /* Always kill frames sent by ourselves */
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
+ /* Is this frame a candidate for local reception? */
+ deliver_to_self = false;
+ if ((skb->pkt_type == PACKET_HOST) ||
+ (skb->pkt_type == PACKET_MULTICAST) ||
+ (skb->pkt_type == PACKET_BROADCAST))
+ deliver_to_self = true;
+ else if (ether_addr_equal(eth_hdr(skb)->h_dest,
+ hsr_priv->dev->dev_addr)) {
+ skb->pkt_type = PACKET_HOST;
+ deliver_to_self = true;
+ }
+
+
+ rcu_read_lock(); /* node_db */
+ node = hsr_find_node(&hsr_priv->node_db, skb);
+
+ if (is_supervision_frame(hsr_priv, skb)) {
+ skb_pull(skb, sizeof(struct hsr_sup_tag));
+ node = hsr_merge_node(hsr_priv, node, skb, dev_in_idx);
+ if (!node) {
+ rcu_read_unlock(); /* node_db */
+ kfree_skb(skb);
+ hsr_priv->dev->stats.rx_dropped++;
+ return NET_RX_DROP;
+ }
+ skb_push(skb, sizeof(struct hsr_sup_tag));
+ deliver_to_self = false;
+ }
+
+ if (!node) {
+ /* Source node unknown; this might be a HSR frame from
+ * another net (different multicast address). Ignore it.
+ */
+ rcu_read_unlock(); /* node_db */
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
+ /* Register ALL incoming frames as outgoing through the other interface.
+ * This allows us to register frames as incoming only if they are valid
+ * for the receiving interface, without using a specific counter for
+ * incoming frames.
+ */
+ dup_out = hsr_register_frame_out(node, dev_other_idx, skb);
+ if (!dup_out)
+ hsr_register_frame_in(node, dev_in_idx);
+
+ /* Forward this frame? */
+ if (!dup_out && (skb->pkt_type != PACKET_HOST))
+ other_slave = get_other_slave(hsr_priv, dev);
+ else
+ other_slave = NULL;
+
+ if (hsr_register_frame_out(node, HSR_DEV_MASTER, skb))
+ deliver_to_self = false;
+
+ rcu_read_unlock(); /* node_db */
+
+ if (!deliver_to_self && !other_slave) {
+ kfree_skb(skb);
+ /* Circulated frame; silently remove it. */
+ return NET_RX_SUCCESS;
+ }
+
+ skb_deliver = skb;
+ if (deliver_to_self && other_slave) {
+ /* skb_clone() is not enough since we will strip the hsr tag
+ * and do address substitution below
+ */
+ skb_deliver = pskb_copy(skb, GFP_ATOMIC);
+ if (!skb_deliver) {
+ deliver_to_self = false;
+ hsr_priv->dev->stats.rx_dropped++;
+ }
+ }
+
+ if (deliver_to_self) {
+ bool multicast_frame;
+
+ skb_deliver = hsr_pull_tag(skb_deliver);
+ if (!skb_deliver) {
+ hsr_priv->dev->stats.rx_dropped++;
+ goto forward;
+ }
+#if !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ /* Move everything in the header that is after the HSR tag,
+ * to work around alignment problems caused by the 6-byte HSR
+ * tag. In practice, this removes/overwrites the HSR tag in
+ * the header and restores a "standard" packet.
+ */
+ memmove(skb_deliver->data - HSR_TAGLEN, skb_deliver->data,
+ skb_headlen(skb_deliver));
+
+ /* Adjust skb members so they correspond with the move above.
+ * This cannot possibly underflow skb->data since hsr_pull_tag()
+ * above succeeded.
+ * At this point in the protocol stack, the transport and
+ * network headers have not been set yet, and we haven't touched
+ * the mac header nor the head. So we only need to adjust data
+ * and tail:
+ */
+ skb_deliver->data -= HSR_TAGLEN;
+ skb_deliver->tail -= HSR_TAGLEN;
+#endif
+ skb_deliver->dev = hsr_priv->dev;
+ hsr_addr_subst_source(hsr_priv, skb_deliver);
+ multicast_frame = (skb_deliver->pkt_type == PACKET_MULTICAST);
+ ret = netif_rx(skb_deliver);
+ if (ret == NET_RX_DROP) {
+ hsr_priv->dev->stats.rx_dropped++;
+ } else {
+ hsr_priv->dev->stats.rx_packets++;
+ hsr_priv->dev->stats.rx_bytes += skb->len;
+ if (multicast_frame)
+ hsr_priv->dev->stats.multicast++;
+ }
+ }
+
+forward:
+ if (other_slave) {
+ skb_push(skb, ETH_HLEN);
+ skb->dev = other_slave;
+ dev_queue_xmit(skb);
+ }
+
+ return NET_RX_SUCCESS;
+}
+
+
+static struct packet_type hsr_pt __read_mostly = {
+ .type = htons(ETH_P_PRP),
+ .func = hsr_rcv,
+};
+
+static struct notifier_block hsr_nb = {
+ .notifier_call = hsr_netdev_notify, /* Slave event notifications */
+};
+
+
+static int __init hsr_init(void)
+{
+ int res;
+
+ BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_TAGLEN);
+
+ dev_add_pack(&hsr_pt);
+
+ init_timer(&prune_timer);
+ prune_timer.function = prune_nodes_all;
+ prune_timer.data = 0;
+ prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD);
+ add_timer(&prune_timer);
+
+ register_netdevice_notifier(&hsr_nb);
+
+ res = hsr_netlink_init();
+
+ return res;
+}
+
+static void __exit hsr_exit(void)
+{
+ unregister_netdevice_notifier(&hsr_nb);
+ del_timer(&prune_timer);
+ hsr_netlink_exit();
+ dev_remove_pack(&hsr_pt);
+}
+
+module_init(hsr_init);
+module_exit(hsr_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
new file mode 100644
index 000000000000..56fe060c0ab1
--- /dev/null
+++ b/net/hsr/hsr_main.h
@@ -0,0 +1,166 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ */
+
+#ifndef _HSR_PRIVATE_H
+#define _HSR_PRIVATE_H
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+
+
+/* Time constants as specified in the HSR specification (IEC-62439-3 2010)
+ * Table 8.
+ * All values in milliseconds.
+ */
+#define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */
+#define HSR_NODE_FORGET_TIME 60000 /* ms */
+#define HSR_ANNOUNCE_INTERVAL 100 /* ms */
+
+
+/* By how much may slave1 and slave2 timestamps of latest received frame from
+ * each node differ before we notify of communication problem?
+ */
+#define MAX_SLAVE_DIFF 3000 /* ms */
+
+
+/* How often shall we check for broken ring and remove node entries older than
+ * HSR_NODE_FORGET_TIME?
+ */
+#define PRUNE_PERIOD 3000 /* ms */
+
+
+#define HSR_TLV_ANNOUNCE 22
+#define HSR_TLV_LIFE_CHECK 23
+
+
+/* HSR Tag.
+ * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB,
+ * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest,
+ * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr,
+ * encapsulated protocol } instead.
+ */
+#define HSR_TAGLEN 6
+
+/* Field names below as defined in the IEC:2010 standard for HSR. */
+struct hsr_tag {
+ __be16 path_and_LSDU_size;
+ __be16 sequence_nr;
+ __be16 encap_proto;
+} __packed;
+
+
+/* The helper functions below assumes that 'path' occupies the 4 most
+ * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or
+ * equivalently, the 4 most significant bits of HSR tag byte 14).
+ *
+ * This is unclear in the IEC specification; its definition of MAC addresses
+ * indicates the spec is written with the least significant bit first (to the
+ * left). This, however, would mean that the LSDU field would be split in two
+ * with the path field in-between, which seems strange. I'm guessing the MAC
+ * address definition is in error.
+ */
+static inline u16 get_hsr_tag_path(struct hsr_tag *ht)
+{
+ return ntohs(ht->path_and_LSDU_size) >> 12;
+}
+
+static inline u16 get_hsr_tag_LSDU_size(struct hsr_tag *ht)
+{
+ return ntohs(ht->path_and_LSDU_size) & 0x0FFF;
+}
+
+static inline void set_hsr_tag_path(struct hsr_tag *ht, u16 path)
+{
+ ht->path_and_LSDU_size = htons(
+ (ntohs(ht->path_and_LSDU_size) & 0x0FFF) | (path << 12));
+}
+
+static inline void set_hsr_tag_LSDU_size(struct hsr_tag *ht, u16 LSDU_size)
+{
+ ht->path_and_LSDU_size = htons(
+ (ntohs(ht->path_and_LSDU_size) & 0xF000) |
+ (LSDU_size & 0x0FFF));
+}
+
+struct hsr_ethhdr {
+ struct ethhdr ethhdr;
+ struct hsr_tag hsr_tag;
+} __packed;
+
+
+/* HSR Supervision Frame data types.
+ * Field names as defined in the IEC:2010 standard for HSR.
+ */
+struct hsr_sup_tag {
+ __be16 path_and_HSR_Ver;
+ __be16 sequence_nr;
+ __u8 HSR_TLV_Type;
+ __u8 HSR_TLV_Length;
+} __packed;
+
+struct hsr_sup_payload {
+ unsigned char MacAddressA[ETH_ALEN];
+} __packed;
+
+static inline u16 get_hsr_stag_path(struct hsr_sup_tag *hst)
+{
+ return get_hsr_tag_path((struct hsr_tag *) hst);
+}
+
+static inline u16 get_hsr_stag_HSR_ver(struct hsr_sup_tag *hst)
+{
+ return get_hsr_tag_LSDU_size((struct hsr_tag *) hst);
+}
+
+static inline void set_hsr_stag_path(struct hsr_sup_tag *hst, u16 path)
+{
+ set_hsr_tag_path((struct hsr_tag *) hst, path);
+}
+
+static inline void set_hsr_stag_HSR_Ver(struct hsr_sup_tag *hst, u16 HSR_Ver)
+{
+ set_hsr_tag_LSDU_size((struct hsr_tag *) hst, HSR_Ver);
+}
+
+struct hsr_ethhdr_sp {
+ struct ethhdr ethhdr;
+ struct hsr_sup_tag hsr_sup;
+} __packed;
+
+
+enum hsr_dev_idx {
+ HSR_DEV_NONE = -1,
+ HSR_DEV_SLAVE_A = 0,
+ HSR_DEV_SLAVE_B,
+ HSR_DEV_MASTER,
+};
+#define HSR_MAX_SLAVE (HSR_DEV_SLAVE_B + 1)
+#define HSR_MAX_DEV (HSR_DEV_MASTER + 1)
+
+struct hsr_priv {
+ struct list_head hsr_list; /* List of hsr devices */
+ struct rcu_head rcu_head;
+ struct net_device *dev;
+ struct net_device *slave[HSR_MAX_SLAVE];
+ struct list_head node_db; /* Other HSR nodes */
+ struct list_head self_node_db; /* MACs of slaves */
+ struct timer_list announce_timer; /* Supervision frame dispatch */
+ int announce_count;
+ u16 sequence_nr;
+ spinlock_t seqnr_lock; /* locking for sequence_nr */
+ unsigned char sup_multicast_addr[ETH_ALEN];
+};
+
+void register_hsr_master(struct hsr_priv *hsr_priv);
+void unregister_hsr_master(struct hsr_priv *hsr_priv);
+bool is_hsr_slave(struct net_device *dev);
+
+#endif /* _HSR_PRIVATE_H */
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
new file mode 100644
index 000000000000..4e66bf61f585
--- /dev/null
+++ b/net/hsr/hsr_netlink.c
@@ -0,0 +1,457 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ *
+ * Routines for handling Netlink messages for HSR.
+ */
+
+#include "hsr_netlink.h"
+#include <linux/kernel.h>
+#include <net/rtnetlink.h>
+#include <net/genetlink.h>
+#include "hsr_main.h"
+#include "hsr_device.h"
+#include "hsr_framereg.h"
+
+static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = {
+ [IFLA_HSR_SLAVE1] = { .type = NLA_U32 },
+ [IFLA_HSR_SLAVE2] = { .type = NLA_U32 },
+ [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 },
+};
+
+
+/* Here, it seems a netdevice has already been allocated for us, and the
+ * hsr_dev_setup routine has been executed. Nice!
+ */
+static int hsr_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net_device *link[2];
+ unsigned char multicast_spec;
+
+ if (!data[IFLA_HSR_SLAVE1]) {
+ netdev_info(dev, "IFLA_HSR_SLAVE1 missing!\n");
+ return -EINVAL;
+ }
+ link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1]));
+ if (!data[IFLA_HSR_SLAVE2]) {
+ netdev_info(dev, "IFLA_HSR_SLAVE2 missing!\n");
+ return -EINVAL;
+ }
+ link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2]));
+
+ if (!link[0] || !link[1])
+ return -ENODEV;
+ if (link[0] == link[1])
+ return -EINVAL;
+
+ if (!data[IFLA_HSR_MULTICAST_SPEC])
+ multicast_spec = 0;
+ else
+ multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]);
+
+ return hsr_dev_finalize(dev, link, multicast_spec);
+}
+
+static struct rtnl_link_ops hsr_link_ops __read_mostly = {
+ .kind = "hsr",
+ .maxtype = IFLA_HSR_MAX,
+ .policy = hsr_policy,
+ .priv_size = sizeof(struct hsr_priv),
+ .setup = hsr_dev_setup,
+ .newlink = hsr_newlink,
+};
+
+
+
+/* attribute policy */
+/* NLA_BINARY missing in libnl; use NLA_UNSPEC in userspace instead. */
+static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = {
+ [HSR_A_NODE_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN },
+ [HSR_A_NODE_ADDR_B] = { .type = NLA_BINARY, .len = ETH_ALEN },
+ [HSR_A_IFINDEX] = { .type = NLA_U32 },
+ [HSR_A_IF1_AGE] = { .type = NLA_U32 },
+ [HSR_A_IF2_AGE] = { .type = NLA_U32 },
+ [HSR_A_IF1_SEQ] = { .type = NLA_U16 },
+ [HSR_A_IF2_SEQ] = { .type = NLA_U16 },
+};
+
+static struct genl_family hsr_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = "HSR",
+ .version = 1,
+ .maxattr = HSR_A_MAX,
+};
+
+static struct genl_multicast_group hsr_network_genl_mcgrp = {
+ .name = "hsr-network",
+};
+
+
+
+/* This is called if for some node with MAC address addr, we only get frames
+ * over one of the slave interfaces. This would indicate an open network ring
+ * (i.e. a link has failed somewhere).
+ */
+void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN],
+ enum hsr_dev_idx dev_idx)
+{
+ struct sk_buff *skb;
+ void *msg_head;
+ int res;
+ int ifindex;
+
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+ if (!skb)
+ goto fail;
+
+ msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_RING_ERROR);
+ if (!msg_head)
+ goto nla_put_failure;
+
+ res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr);
+ if (res < 0)
+ goto nla_put_failure;
+
+ if (hsr_priv->slave[dev_idx])
+ ifindex = hsr_priv->slave[dev_idx]->ifindex;
+ else
+ ifindex = -1;
+ res = nla_put_u32(skb, HSR_A_IFINDEX, ifindex);
+ if (res < 0)
+ goto nla_put_failure;
+
+ genlmsg_end(skb, msg_head);
+ genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC);
+
+ return;
+
+nla_put_failure:
+ kfree_skb(skb);
+
+fail:
+ netdev_warn(hsr_priv->dev, "Could not send HSR ring error message\n");
+}
+
+/* This is called when we haven't heard from the node with MAC address addr for
+ * some time (just before the node is removed from the node table/list).
+ */
+void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN])
+{
+ struct sk_buff *skb;
+ void *msg_head;
+ int res;
+
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+ if (!skb)
+ goto fail;
+
+ msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_NODE_DOWN);
+ if (!msg_head)
+ goto nla_put_failure;
+
+
+ res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr);
+ if (res < 0)
+ goto nla_put_failure;
+
+ genlmsg_end(skb, msg_head);
+ genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC);
+
+ return;
+
+nla_put_failure:
+ kfree_skb(skb);
+
+fail:
+ netdev_warn(hsr_priv->dev, "Could not send HSR node down\n");
+}
+
+
+/* HSR_C_GET_NODE_STATUS lets userspace query the internal HSR node table
+ * about the status of a specific node in the network, defined by its MAC
+ * address.
+ *
+ * Input: hsr ifindex, node mac address
+ * Output: hsr ifindex, node mac address (copied from request),
+ * age of latest frame from node over slave 1, slave 2 [ms]
+ */
+static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
+{
+ /* For receiving */
+ struct nlattr *na;
+ struct net_device *hsr_dev;
+
+ /* For sending */
+ struct sk_buff *skb_out;
+ void *msg_head;
+ struct hsr_priv *hsr_priv;
+ unsigned char hsr_node_addr_b[ETH_ALEN];
+ int hsr_node_if1_age;
+ u16 hsr_node_if1_seq;
+ int hsr_node_if2_age;
+ u16 hsr_node_if2_seq;
+ int addr_b_ifindex;
+ int res;
+
+ if (!info)
+ goto invalid;
+
+ na = info->attrs[HSR_A_IFINDEX];
+ if (!na)
+ goto invalid;
+ na = info->attrs[HSR_A_NODE_ADDR];
+ if (!na)
+ goto invalid;
+
+ hsr_dev = __dev_get_by_index(genl_info_net(info),
+ nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+ if (!hsr_dev)
+ goto invalid;
+ if (!is_hsr_master(hsr_dev))
+ goto invalid;
+
+
+ /* Send reply */
+
+ skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb_out) {
+ res = -ENOMEM;
+ goto fail;
+ }
+
+ msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid,
+ info->snd_seq, &hsr_genl_family, 0,
+ HSR_C_SET_NODE_STATUS);
+ if (!msg_head) {
+ res = -ENOMEM;
+ goto nla_put_failure;
+ }
+
+ res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
+ if (res < 0)
+ goto nla_put_failure;
+
+ hsr_priv = netdev_priv(hsr_dev);
+ res = hsr_get_node_data(hsr_priv,
+ (unsigned char *) nla_data(info->attrs[HSR_A_NODE_ADDR]),
+ hsr_node_addr_b,
+ &addr_b_ifindex,
+ &hsr_node_if1_age,
+ &hsr_node_if1_seq,
+ &hsr_node_if2_age,
+ &hsr_node_if2_seq);
+ if (res < 0)
+ goto fail;
+
+ res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN,
+ nla_data(info->attrs[HSR_A_NODE_ADDR]));
+ if (res < 0)
+ goto nla_put_failure;
+
+ if (addr_b_ifindex > -1) {
+ res = nla_put(skb_out, HSR_A_NODE_ADDR_B, ETH_ALEN,
+ hsr_node_addr_b);
+ if (res < 0)
+ goto nla_put_failure;
+
+ res = nla_put_u32(skb_out, HSR_A_ADDR_B_IFINDEX, addr_b_ifindex);
+ if (res < 0)
+ goto nla_put_failure;
+ }
+
+ res = nla_put_u32(skb_out, HSR_A_IF1_AGE, hsr_node_if1_age);
+ if (res < 0)
+ goto nla_put_failure;
+ res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq);
+ if (res < 0)
+ goto nla_put_failure;
+ if (hsr_priv->slave[0])
+ res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX,
+ hsr_priv->slave[0]->ifindex);
+ if (res < 0)
+ goto nla_put_failure;
+
+ res = nla_put_u32(skb_out, HSR_A_IF2_AGE, hsr_node_if2_age);
+ if (res < 0)
+ goto nla_put_failure;
+ res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq);
+ if (res < 0)
+ goto nla_put_failure;
+ if (hsr_priv->slave[1])
+ res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX,
+ hsr_priv->slave[1]->ifindex);
+
+ genlmsg_end(skb_out, msg_head);
+ genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid);
+
+ return 0;
+
+invalid:
+ netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+ return 0;
+
+nla_put_failure:
+ kfree_skb(skb_out);
+ /* Fall through */
+
+fail:
+ return res;
+}
+
+static struct genl_ops hsr_ops_get_node_status = {
+ .cmd = HSR_C_GET_NODE_STATUS,
+ .flags = 0,
+ .policy = hsr_genl_policy,
+ .doit = hsr_get_node_status,
+ .dumpit = NULL,
+};
+
+
+/* Get a list of MacAddressA of all nodes known to this node (other than self).
+ */
+static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
+{
+ /* For receiving */
+ struct nlattr *na;
+ struct net_device *hsr_dev;
+
+ /* For sending */
+ struct sk_buff *skb_out;
+ void *msg_head;
+ struct hsr_priv *hsr_priv;
+ void *pos;
+ unsigned char addr[ETH_ALEN];
+ int res;
+
+ if (!info)
+ goto invalid;
+
+ na = info->attrs[HSR_A_IFINDEX];
+ if (!na)
+ goto invalid;
+
+ hsr_dev = __dev_get_by_index(genl_info_net(info),
+ nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+ if (!hsr_dev)
+ goto invalid;
+ if (!is_hsr_master(hsr_dev))
+ goto invalid;
+
+
+ /* Send reply */
+
+ skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb_out) {
+ res = -ENOMEM;
+ goto fail;
+ }
+
+ msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid,
+ info->snd_seq, &hsr_genl_family, 0,
+ HSR_C_SET_NODE_LIST);
+ if (!msg_head) {
+ res = -ENOMEM;
+ goto nla_put_failure;
+ }
+
+ res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
+ if (res < 0)
+ goto nla_put_failure;
+
+ hsr_priv = netdev_priv(hsr_dev);
+
+ rcu_read_lock();
+ pos = hsr_get_next_node(hsr_priv, NULL, addr);
+ while (pos) {
+ res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr);
+ if (res < 0) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+ pos = hsr_get_next_node(hsr_priv, pos, addr);
+ }
+ rcu_read_unlock();
+
+ genlmsg_end(skb_out, msg_head);
+ genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid);
+
+ return 0;
+
+invalid:
+ netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+ return 0;
+
+nla_put_failure:
+ kfree_skb(skb_out);
+ /* Fall through */
+
+fail:
+ return res;
+}
+
+
+static struct genl_ops hsr_ops_get_node_list = {
+ .cmd = HSR_C_GET_NODE_LIST,
+ .flags = 0,
+ .policy = hsr_genl_policy,
+ .doit = hsr_get_node_list,
+ .dumpit = NULL,
+};
+
+int __init hsr_netlink_init(void)
+{
+ int rc;
+
+ rc = rtnl_link_register(&hsr_link_ops);
+ if (rc)
+ goto fail_rtnl_link_register;
+
+ rc = genl_register_family(&hsr_genl_family);
+ if (rc)
+ goto fail_genl_register_family;
+
+ rc = genl_register_ops(&hsr_genl_family, &hsr_ops_get_node_status);
+ if (rc)
+ goto fail_genl_register_ops;
+
+ rc = genl_register_ops(&hsr_genl_family, &hsr_ops_get_node_list);
+ if (rc)
+ goto fail_genl_register_ops_node_list;
+
+ rc = genl_register_mc_group(&hsr_genl_family, &hsr_network_genl_mcgrp);
+ if (rc)
+ goto fail_genl_register_mc_group;
+
+ return 0;
+
+fail_genl_register_mc_group:
+ genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_list);
+fail_genl_register_ops_node_list:
+ genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_status);
+fail_genl_register_ops:
+ genl_unregister_family(&hsr_genl_family);
+fail_genl_register_family:
+ rtnl_link_unregister(&hsr_link_ops);
+fail_rtnl_link_register:
+
+ return rc;
+}
+
+void __exit hsr_netlink_exit(void)
+{
+ genl_unregister_mc_group(&hsr_genl_family, &hsr_network_genl_mcgrp);
+ genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_status);
+ genl_unregister_family(&hsr_genl_family);
+
+ rtnl_link_unregister(&hsr_link_ops);
+}
+
+MODULE_ALIAS_RTNL_LINK("hsr");
diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h
new file mode 100644
index 000000000000..d4579dcc3c7d
--- /dev/null
+++ b/net/hsr/hsr_netlink.h
@@ -0,0 +1,30 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ */
+
+#ifndef __HSR_NETLINK_H
+#define __HSR_NETLINK_H
+
+#include <linux/if_ether.h>
+#include <linux/module.h>
+#include <uapi/linux/hsr_netlink.h>
+
+struct hsr_priv;
+
+int __init hsr_netlink_init(void);
+void __exit hsr_netlink_exit(void);
+
+void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN],
+ int dev_idx);
+void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]);
+void hsr_nl_framedrop(int dropcount, int dev_idx);
+void hsr_nl_linkdown(int dev_idx);
+
+#endif /* __HSR_NETLINK_H */
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index ff41b4d60d30..9497c6f3276b 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -440,7 +440,6 @@ lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
default:
pr_debug("ERROR: unknown UDP format\n");
goto err;
- break;
}
pr_debug("uncompressed UDP ports: src = %d, dst = %d\n",
@@ -655,7 +654,9 @@ static int lowpan_header_create(struct sk_buff *skb,
head[1] = iphc1;
skb_pull(skb, sizeof(struct ipv6hdr));
+ skb_reset_transport_header(skb);
memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head);
+ skb_reset_network_header(skb);
lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
skb->len);
@@ -738,7 +739,6 @@ static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
return -ENOMEM;
skb_push(new, sizeof(struct ipv6hdr));
- skb_reset_network_header(new);
skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr));
new->protocol = htons(ETH_P_IPV6);
@@ -785,7 +785,6 @@ lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
goto skb_err;
frame->skb->priority = skb->priority;
- frame->skb->dev = skb->dev;
/* reserve headroom for uncompressed ipv6 header */
skb_reserve(frame->skb, sizeof(struct ipv6hdr));
@@ -1061,7 +1060,6 @@ lowpan_process_data(struct sk_buff *skb)
skb = new;
skb_push(skb, sizeof(struct udphdr));
- skb_reset_transport_header(skb);
skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr));
lowpan_raw_dump_table(__func__, "raw UDP header dump",
@@ -1104,50 +1102,40 @@ static int lowpan_set_address(struct net_device *dev, void *p)
return 0;
}
-static int lowpan_get_mac_header_length(struct sk_buff *skb)
-{
- /*
- * Currently long addressing mode is supported only, so the overall
- * header size is 21:
- * FC SeqNum DPAN DA SA Sec
- * 2 + 1 + 2 + 8 + 8 + 0 = 21
- */
- return 21;
-}
-
static int
lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
int mlen, int plen, int offset, int type)
{
struct sk_buff *frag;
- int hlen, ret;
+ int hlen;
hlen = (type == LOWPAN_DISPATCH_FRAG1) ?
LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE;
lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen);
- frag = dev_alloc_skb(hlen + mlen + plen + IEEE802154_MFR_SIZE);
+ frag = netdev_alloc_skb(skb->dev,
+ hlen + mlen + plen + IEEE802154_MFR_SIZE);
if (!frag)
return -ENOMEM;
frag->priority = skb->priority;
- frag->dev = skb->dev;
/* copy header, MFR and payload */
- memcpy(skb_put(frag, mlen), skb->data, mlen);
- memcpy(skb_put(frag, hlen), head, hlen);
+ skb_put(frag, mlen);
+ skb_copy_to_linear_data(frag, skb_mac_header(skb), mlen);
- if (plen)
- skb_copy_from_linear_data_offset(skb, offset + mlen,
- skb_put(frag, plen), plen);
+ skb_put(frag, hlen);
+ skb_copy_to_linear_data_offset(frag, mlen, head, hlen);
+
+ skb_put(frag, plen);
+ skb_copy_to_linear_data_offset(frag, mlen + hlen,
+ skb_network_header(skb) + offset, plen);
lowpan_raw_dump_table(__func__, " raw fragment dump", frag->data,
frag->len);
- ret = dev_queue_xmit(frag);
-
- return ret;
+ return dev_queue_xmit(frag);
}
static int
@@ -1156,7 +1144,7 @@ lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)
int err, header_length, payload_length, tag, offset = 0;
u8 head[5];
- header_length = lowpan_get_mac_header_length(skb);
+ header_length = skb->mac_len;
payload_length = skb->len - header_length;
tag = lowpan_dev_info(dev)->fragment_tag++;
@@ -1181,7 +1169,7 @@ lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)
head[0] &= ~LOWPAN_DISPATCH_FRAG1;
head[0] |= LOWPAN_DISPATCH_FRAGN;
- while ((payload_length - offset > 0) && (err >= 0)) {
+ while (payload_length - offset > 0) {
int len = LOWPAN_FRAG_SIZE;
head[4] = offset / 8;
@@ -1327,8 +1315,6 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
/* Pull off the 1-byte of 6lowpan header. */
skb_pull(local_skb, 1);
- skb_reset_network_header(local_skb);
- skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
lowpan_give_skb_to_devices(local_skb);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 35913fb77dc8..09d78d4a3cff 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -245,29 +245,6 @@ out:
}
EXPORT_SYMBOL(inet_listen);
-u32 inet_ehash_secret __read_mostly;
-EXPORT_SYMBOL(inet_ehash_secret);
-
-u32 ipv6_hash_secret __read_mostly;
-EXPORT_SYMBOL(ipv6_hash_secret);
-
-/*
- * inet_ehash_secret must be set exactly once, and to a non nul value
- * ipv6_hash_secret must be set exactly once.
- */
-void build_ehash_secret(void)
-{
- u32 rnd;
-
- do {
- get_random_bytes(&rnd, sizeof(rnd));
- } while (rnd == 0);
-
- if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
- get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
-}
-EXPORT_SYMBOL(build_ehash_secret);
-
/*
* Create an inet socket.
*/
@@ -284,10 +261,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
- if (unlikely(!inet_ehash_secret))
- if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
- build_ehash_secret();
-
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
@@ -1254,36 +1227,36 @@ static int inet_gso_send_check(struct sk_buff *skb)
if (ihl < sizeof(*iph))
goto out;
+ proto = iph->protocol;
+
+ /* Warning: after this point, iph might be no longer valid */
if (unlikely(!pskb_may_pull(skb, ihl)))
goto out;
-
__skb_pull(skb, ihl);
+
skb_reset_transport_header(skb);
- iph = ip_hdr(skb);
- proto = iph->protocol;
err = -EPROTONOSUPPORT;
- rcu_read_lock();
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_send_check))
err = ops->callbacks.gso_send_check(skb);
- rcu_read_unlock();
out:
return err;
}
static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
- netdev_features_t features)
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
const struct net_offload *ops;
+ unsigned int offset = 0;
+ bool udpfrag, encap;
struct iphdr *iph;
int proto;
+ int nhoff;
int ihl;
int id;
- unsigned int offset = 0;
- bool tunnel;
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_TCPV4 |
@@ -1291,12 +1264,16 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT |
SKB_GSO_TCPV6 |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_MPLS |
0)))
goto out;
+ skb_reset_network_header(skb);
+ nhoff = skb_network_header(skb) - skb_mac_header(skb);
if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
goto out;
@@ -1305,42 +1282,48 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
if (ihl < sizeof(*iph))
goto out;
+ id = ntohs(iph->id);
+ proto = iph->protocol;
+
+ /* Warning: after this point, iph might be no longer valid */
if (unlikely(!pskb_may_pull(skb, ihl)))
goto out;
+ __skb_pull(skb, ihl);
- tunnel = !!skb->encapsulation;
+ encap = SKB_GSO_CB(skb)->encap_level > 0;
+ if (encap)
+ features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ SKB_GSO_CB(skb)->encap_level += ihl;
- __skb_pull(skb, ihl);
skb_reset_transport_header(skb);
- iph = ip_hdr(skb);
- id = ntohs(iph->id);
- proto = iph->protocol;
+
segs = ERR_PTR(-EPROTONOSUPPORT);
- rcu_read_lock();
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
segs = ops->callbacks.gso_segment(skb, features);
- rcu_read_unlock();
if (IS_ERR_OR_NULL(segs))
goto out;
+ udpfrag = !!skb->encapsulation && proto == IPPROTO_UDP;
skb = segs;
do {
- iph = ip_hdr(skb);
- if (!tunnel && proto == IPPROTO_UDP) {
+ iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
+ if (udpfrag) {
iph->id = htons(id);
iph->frag_off = htons(offset >> 3);
if (skb->next != NULL)
iph->frag_off |= htons(IP_MF);
- offset += (skb->len - skb->mac_len - iph->ihl * 4);
- } else {
+ offset += skb->len - nhoff - ihl;
+ } else {
iph->id = htons(id++);
}
- iph->tot_len = htons(skb->len - skb->mac_len);
- iph->check = 0;
- iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
+ iph->tot_len = htons(skb->len - nhoff);
+ ip_send_check(iph);
+ if (encap)
+ skb_reset_inner_headers(skb);
+ skb->network_header = (u8 *)iph - skb->head;
} while ((skb = skb->next));
out:
@@ -1647,6 +1630,13 @@ static struct packet_offload ip_packet_offload __read_mostly = {
},
};
+static const struct net_offload ipip_offload = {
+ .callbacks = {
+ .gso_send_check = inet_gso_send_check,
+ .gso_segment = inet_gso_segment,
+ },
+};
+
static int __init ipv4_offload_init(void)
{
/*
@@ -1658,6 +1648,7 @@ static int __init ipv4_offload_init(void)
pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
dev_add_offload(&ip_packet_offload);
+ inet_add_offload(&ipip_offload, IPPROTO_IPIP);
return 0;
}
@@ -1706,8 +1697,6 @@ static int __init inet_init(void)
ip_static_sysctl_init();
#endif
- tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;
-
/*
* Add all the base protocols.
*/
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 109ee89f123e..7785b28061ac 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -121,7 +121,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct aead_givcrypt_request *req;
struct scatterlist *sg;
struct scatterlist *asg;
- struct esp_data *esp;
struct sk_buff *trailer;
void *tmp;
u8 *iv;
@@ -139,8 +138,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
/* skb is pure payload to encrypt */
- esp = x->data;
- aead = esp->aead;
+ aead = x->data;
alen = crypto_aead_authsize(aead);
tfclen = 0;
@@ -154,8 +152,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
}
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(skb->len + 2 + tfclen, blksize);
- if (esp->padlen)
- clen = ALIGN(clen, esp->padlen);
plen = clen - skb->len - tfclen;
err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
@@ -280,8 +276,7 @@ static int esp_input_done2(struct sk_buff *skb, int err)
{
const struct iphdr *iph;
struct xfrm_state *x = xfrm_input_state(skb);
- struct esp_data *esp = x->data;
- struct crypto_aead *aead = esp->aead;
+ struct crypto_aead *aead = x->data;
int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
int elen = skb->len - hlen;
@@ -376,8 +371,7 @@ static void esp_input_done(struct crypto_async_request *base, int err)
static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ip_esp_hdr *esph;
- struct esp_data *esp = x->data;
- struct crypto_aead *aead = esp->aead;
+ struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
@@ -459,9 +453,8 @@ out:
static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
{
- struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
- u32 align = max_t(u32, blksize, esp->padlen);
+ struct crypto_aead *aead = x->data;
+ u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
unsigned int net_adj;
switch (x->props.mode) {
@@ -476,8 +469,8 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
BUG();
}
- return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
- net_adj) & ~(align - 1)) + net_adj - 2;
+ return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
+ net_adj) & ~(blksize - 1)) + net_adj - 2;
}
static void esp4_err(struct sk_buff *skb, u32 info)
@@ -511,18 +504,16 @@ static void esp4_err(struct sk_buff *skb, u32 info)
static void esp_destroy(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
+ struct crypto_aead *aead = x->data;
- if (!esp)
+ if (!aead)
return;
- crypto_free_aead(esp->aead);
- kfree(esp);
+ crypto_free_aead(aead);
}
static int esp_init_aead(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
struct crypto_aead *aead;
int err;
@@ -531,7 +522,7 @@ static int esp_init_aead(struct xfrm_state *x)
if (IS_ERR(aead))
goto error;
- esp->aead = aead;
+ x->data = aead;
err = crypto_aead_setkey(aead, x->aead->alg_key,
(x->aead->alg_key_len + 7) / 8);
@@ -548,7 +539,6 @@ error:
static int esp_init_authenc(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
struct crypto_aead *aead;
struct crypto_authenc_key_param *param;
struct rtattr *rta;
@@ -583,7 +573,7 @@ static int esp_init_authenc(struct xfrm_state *x)
if (IS_ERR(aead))
goto error;
- esp->aead = aead;
+ x->data = aead;
keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
(x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
@@ -638,16 +628,11 @@ error:
static int esp_init_state(struct xfrm_state *x)
{
- struct esp_data *esp;
struct crypto_aead *aead;
u32 align;
int err;
- esp = kzalloc(sizeof(*esp), GFP_KERNEL);
- if (esp == NULL)
- return -ENOMEM;
-
- x->data = esp;
+ x->data = NULL;
if (x->aead)
err = esp_init_aead(x);
@@ -657,9 +642,7 @@ static int esp_init_state(struct xfrm_state *x)
if (err)
goto error;
- aead = esp->aead;
-
- esp->padlen = 0;
+ aead = x->data;
x->props.header_len = sizeof(struct ip_esp_hdr) +
crypto_aead_ivsize(aead);
@@ -683,9 +666,7 @@ static int esp_init_state(struct xfrm_state *x)
}
align = ALIGN(crypto_aead_blocksize(aead), 4);
- if (esp->padlen)
- align = max_t(u32, align, esp->padlen);
- x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
+ x->props.trailer_len = align + 1 + crypto_aead_authsize(aead);
error:
return err;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b3f627ac4ed8..d846304b7b89 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -933,7 +933,6 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
local_bh_disable();
frn->tb_id = tb->tb_id;
- rcu_read_lock();
frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
if (!frn->err) {
@@ -942,7 +941,6 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
frn->type = res.type;
frn->scope = res.scope;
}
- rcu_read_unlock();
local_bh_enable();
}
}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index af0f14aba169..388d113fd289 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -24,21 +24,17 @@ static inline void fib_alias_accessed(struct fib_alias *fa)
}
/* Exported by fib_semantics.c */
-extern void fib_release_info(struct fib_info *);
-extern struct fib_info *fib_create_info(struct fib_config *cfg);
-extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
-extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u32 tb_id, u8 type, __be32 dst,
- int dst_len, u8 tos, struct fib_info *fi,
- unsigned int);
-extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
- int dst_len, u32 tb_id, struct nl_info *info,
- unsigned int nlm_flags);
-extern struct fib_alias *fib_find_alias(struct list_head *fah,
- u8 tos, u32 prio);
-extern int fib_detect_death(struct fib_info *fi, int order,
- struct fib_info **last_resort,
- int *last_idx, int dflt);
+void fib_release_info(struct fib_info *);
+struct fib_info *fib_create_info(struct fib_config *cfg);
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
+int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
+ u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
+ unsigned int);
+void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len,
+ u32 tb_id, const struct nl_info *info, unsigned int nlm_flags);
+struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
+int fib_detect_death(struct fib_info *fi, int order,
+ struct fib_info **last_resort, int *last_idx, int dflt);
static inline void fib_result_assign(struct fib_result *res,
struct fib_info *fi)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d5dbca5ecf62..e63f47a4e651 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -380,7 +380,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
}
void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
- int dst_len, u32 tb_id, struct nl_info *info,
+ int dst_len, u32 tb_id, const struct nl_info *info,
unsigned int nlm_flags)
{
struct sk_buff *skb;
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 736c9fc3ef93..5893e99e8299 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -93,35 +93,6 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
}
EXPORT_SYMBOL_GPL(gre_build_header);
-struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
-{
- int err;
-
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
-
- if (skb_is_gso(skb)) {
- err = skb_unclone(skb, GFP_ATOMIC);
- if (unlikely(err))
- goto error;
- skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
- return skb;
- } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
- err = skb_checksum_help(skb);
- if (unlikely(err))
- goto error;
- } else if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->ip_summed = CHECKSUM_NONE;
-
- return skb;
-error:
- kfree_skb(skb);
- return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(gre_handle_offloads);
-
static __sum16 check_checksum(struct sk_buff *skb)
{
__sum16 csum = 0;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 55e6bfb3a289..e5d436188464 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -39,7 +39,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
- SKB_GSO_GRE)))
+ SKB_GSO_GRE |
+ SKB_GSO_IPIP)))
goto out;
if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c5313a9c019b..bb075fc9a14f 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -93,9 +93,6 @@ void inet_frags_init(struct inet_frags *f)
}
rwlock_init(&f->lock);
- f->rnd = (u32) ((totalram_pages ^ (totalram_pages >> 7)) ^
- (jiffies ^ (jiffies >> 6)));
-
setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
(unsigned long)f);
f->secret_timer.expires = jiffies + f->secret_interval;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index a4b66bbe4f21..8b9cf279450d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -24,6 +24,31 @@
#include <net/secure_seq.h>
#include <net/ip.h>
+static unsigned int inet_ehashfn(struct net *net, const __be32 laddr,
+ const __u16 lport, const __be32 faddr,
+ const __be16 fport)
+{
+ static u32 inet_ehash_secret __read_mostly;
+
+ net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
+
+ return __inet_ehashfn(laddr, lport, faddr, fport,
+ inet_ehash_secret + net_hash_mix(net));
+}
+
+
+static unsigned int inet_sk_ehashfn(const struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const __be32 laddr = inet->inet_rcv_saddr;
+ const __u16 lport = inet->inet_num;
+ const __be32 faddr = inet->inet_daddr;
+ const __be16 fport = inet->inet_dport;
+ struct net *net = sock_net(sk);
+
+ return inet_ehashfn(net, laddr, lport, faddr, fport);
+}
+
/*
* Allocate and initialize a new local port bind bucket.
* The bindhash mutex for snum's hash chain must be held here.
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b66910aaef4d..2481993a4970 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -106,6 +106,7 @@ struct ip4_create_arg {
static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
{
+ net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
return jhash_3words((__force u32)id << 16 | prot,
(__force u32)saddr, (__force u32)daddr,
ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7d8357bb2ba6..51be64e18e32 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -772,15 +772,20 @@ static inline int ip_ufo_append_data(struct sock *sk,
/* initialize protocol header pointer */
skb->transport_header = skb->network_header + fragheaderlen;
- skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- /* specify the length of each IP datagram fragment */
- skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+
__skb_queue_tail(queue, skb);
+ } else if (skb_is_gso(skb)) {
+ goto append;
}
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ /* specify the length of each IP datagram fragment */
+ skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+
+append:
return skb_append_datato_frags(sk, skb, getfrag, from,
(length - transhdrlen));
}
@@ -805,7 +810,7 @@ static int __ip_append_data(struct sock *sk,
int copy;
int err;
int offset = 0;
- unsigned int maxfraglen, fragheaderlen;
+ unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
@@ -818,8 +823,10 @@ static int __ip_append_data(struct sock *sk,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
+ maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
+ mtu : 0xFFFF;
- if (cork->length + length > 0xFFFF - fragheaderlen) {
+ if (cork->length + length > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
mtu-exthdrlen);
return -EMSGSIZE;
@@ -1117,7 +1124,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
int mtu;
int len;
int err;
- unsigned int maxfraglen, fragheaderlen, fraggap;
+ unsigned int maxfraglen, fragheaderlen, fraggap, maxnonfragsize;
if (inet->hdrincl)
return -EPERM;
@@ -1141,8 +1148,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
+ maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
+ mtu : 0xFFFF;
- if (cork->length + size > 0xFFFF - fragheaderlen) {
+ if (cork->length + size > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu);
return -EMSGSIZE;
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index c31e3ad98ef2..42ffbc8d65c6 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -116,3 +116,36 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
return 0;
}
EXPORT_SYMBOL_GPL(iptunnel_pull_header);
+
+struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
+ bool csum_help,
+ int gso_type_mask)
+{
+ int err;
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
+ if (skb_is_gso(skb)) {
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (unlikely(err))
+ goto error;
+ skb_shinfo(skb)->gso_type |= gso_type_mask;
+ return skb;
+ }
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
+ err = skb_checksum_help(skb);
+ if (unlikely(err))
+ goto error;
+ } else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return skb;
+error:
+ kfree_skb(skb);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 91f69bc883fe..5d9c845d288a 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -61,8 +61,17 @@ static int vti_rcv(struct sk_buff *skb)
iph->saddr, iph->daddr, 0);
if (tunnel != NULL) {
struct pcpu_tstats *tstats;
+ u32 oldmark = skb->mark;
+ int ret;
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+
+ /* temporarily mark the skb with the tunnel o_key, to
+ * only match policies with this mark.
+ */
+ skb->mark = be32_to_cpu(tunnel->parms.o_key);
+ ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb);
+ skb->mark = oldmark;
+ if (!ret)
return -1;
tstats = this_cpu_ptr(tunnel->dev->tstats);
@@ -71,7 +80,6 @@ static int vti_rcv(struct sk_buff *skb)
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
- skb->mark = 0;
secpath_reset(skb);
skb->dev = tunnel->dev;
return 1;
@@ -103,7 +111,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(&fl4, 0, sizeof(fl4));
flowi4_init_output(&fl4, tunnel->parms.link,
- be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
+ be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos),
RT_SCOPE_UNIVERSE,
IPPROTO_IPIP, 0,
dst, tiph->saddr, 0, 0);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7f80fb4b82d3..fe3e9f7f1f0b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -220,17 +220,17 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(skb->protocol != htons(ETH_P_IP)))
goto tx_error;
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ if (IS_ERR(skb))
+ goto out;
ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
return NETDEV_TX_OK;
tx_error:
- dev->stats.tx_errors++;
dev_kfree_skb(skb);
+out:
+ dev->stats.tx_errors++;
return NETDEV_TX_OK;
}
@@ -275,6 +275,7 @@ static const struct net_device_ops ipip_netdev_ops = {
#define IPIP_FEATURES (NETIF_F_SG | \
NETIF_F_FRAGLIST | \
NETIF_F_HIGHDMA | \
+ NETIF_F_GSO_SOFTWARE | \
NETIF_F_HW_CSUM)
static void ipip_tunnel_setup(struct net_device *dev)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 85a4f21aac1a..59da7cde0724 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -271,6 +271,11 @@ unsigned int arpt_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
private = table->private;
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d23118d95ff9..718dfbd30cbe 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -327,6 +327,11 @@ ipt_do_table(struct sk_buff *skb,
addend = xt_write_recseq_begin();
private = table->private;
cpu = smp_processor_id();
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
table_base = private->entries[cpu];
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
stackptr = per_cpu_ptr(private->stackptr, cpu);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a2e2b61cd7da..2510c02c2d21 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -28,6 +28,7 @@
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/checksum.h>
#include <net/ip.h>
@@ -57,15 +58,21 @@ struct clusterip_config {
struct rcu_head rcu;
};
-static LIST_HEAD(clusterip_configs);
+#ifdef CONFIG_PROC_FS
+static const struct file_operations clusterip_proc_fops;
+#endif
-/* clusterip_lock protects the clusterip_configs list */
-static DEFINE_SPINLOCK(clusterip_lock);
+static int clusterip_net_id __read_mostly;
+
+struct clusterip_net {
+ struct list_head configs;
+ /* lock protects the configs list */
+ spinlock_t lock;
#ifdef CONFIG_PROC_FS
-static const struct file_operations clusterip_proc_fops;
-static struct proc_dir_entry *clusterip_procdir;
+ struct proc_dir_entry *procdir;
#endif
+};
static inline void
clusterip_config_get(struct clusterip_config *c)
@@ -92,10 +99,13 @@ clusterip_config_put(struct clusterip_config *c)
static inline void
clusterip_config_entry_put(struct clusterip_config *c)
{
+ struct net *net = dev_net(c->dev);
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+
local_bh_disable();
- if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) {
+ if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
list_del_rcu(&c->list);
- spin_unlock(&clusterip_lock);
+ spin_unlock(&cn->lock);
local_bh_enable();
dev_mc_del(c->dev, c->clustermac);
@@ -113,11 +123,12 @@ clusterip_config_entry_put(struct clusterip_config *c)
}
static struct clusterip_config *
-__clusterip_config_find(__be32 clusterip)
+__clusterip_config_find(struct net *net, __be32 clusterip)
{
struct clusterip_config *c;
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
- list_for_each_entry_rcu(c, &clusterip_configs, list) {
+ list_for_each_entry_rcu(c, &cn->configs, list) {
if (c->clusterip == clusterip)
return c;
}
@@ -126,12 +137,12 @@ __clusterip_config_find(__be32 clusterip)
}
static inline struct clusterip_config *
-clusterip_config_find_get(__be32 clusterip, int entry)
+clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
{
struct clusterip_config *c;
rcu_read_lock_bh();
- c = __clusterip_config_find(clusterip);
+ c = __clusterip_config_find(net, clusterip);
if (c) {
if (unlikely(!atomic_inc_not_zero(&c->refcount)))
c = NULL;
@@ -158,6 +169,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
struct net_device *dev)
{
struct clusterip_config *c;
+ struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id);
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
@@ -180,7 +192,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
- clusterip_procdir,
+ cn->procdir,
&clusterip_proc_fops, c);
if (!c->pde) {
kfree(c);
@@ -189,9 +201,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
}
#endif
- spin_lock_bh(&clusterip_lock);
- list_add_rcu(&c->list, &clusterip_configs);
- spin_unlock_bh(&clusterip_lock);
+ spin_lock_bh(&cn->lock);
+ list_add_rcu(&c->list, &cn->configs);
+ spin_unlock_bh(&cn->lock);
return c;
}
@@ -370,7 +382,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
/* FIXME: further sanity checks */
- config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
+ config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
if (!config) {
if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
pr_info("no config found for %pI4, need 'new'\n",
@@ -384,7 +396,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
- dev = dev_get_by_name(&init_net, e->ip.iniface);
+ dev = dev_get_by_name(par->net, e->ip.iniface);
if (!dev) {
pr_info("no such interface %s\n",
e->ip.iniface);
@@ -492,6 +504,7 @@ arp_mangle(const struct nf_hook_ops *ops,
struct arphdr *arp = arp_hdr(skb);
struct arp_payload *payload;
struct clusterip_config *c;
+ struct net *net = dev_net(in ? in : out);
/* we don't care about non-ethernet and non-ipv4 ARP */
if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
@@ -508,7 +521,7 @@ arp_mangle(const struct nf_hook_ops *ops,
/* if there is no clusterip configuration for the arp reply's
* source ip, we don't want to mangle it */
- c = clusterip_config_find_get(payload->src_ip, 0);
+ c = clusterip_config_find_get(net, payload->src_ip, 0);
if (!c)
return NF_ACCEPT;
@@ -698,48 +711,75 @@ static const struct file_operations clusterip_proc_fops = {
#endif /* CONFIG_PROC_FS */
+static int clusterip_net_init(struct net *net)
+{
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+
+ INIT_LIST_HEAD(&cn->configs);
+
+ spin_lock_init(&cn->lock);
+
+#ifdef CONFIG_PROC_FS
+ cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
+ if (!cn->procdir) {
+ pr_err("Unable to proc dir entry\n");
+ return -ENOMEM;
+ }
+#endif /* CONFIG_PROC_FS */
+
+ return 0;
+}
+
+static void clusterip_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ proc_remove(cn->procdir);
+#endif
+}
+
+static struct pernet_operations clusterip_net_ops = {
+ .init = clusterip_net_init,
+ .exit = clusterip_net_exit,
+ .id = &clusterip_net_id,
+ .size = sizeof(struct clusterip_net),
+};
+
static int __init clusterip_tg_init(void)
{
int ret;
- ret = xt_register_target(&clusterip_tg_reg);
+ ret = register_pernet_subsys(&clusterip_net_ops);
if (ret < 0)
return ret;
+ ret = xt_register_target(&clusterip_tg_reg);
+ if (ret < 0)
+ goto cleanup_subsys;
+
ret = nf_register_hook(&cip_arp_ops);
if (ret < 0)
goto cleanup_target;
-#ifdef CONFIG_PROC_FS
- clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
- if (!clusterip_procdir) {
- pr_err("Unable to proc dir entry\n");
- ret = -ENOMEM;
- goto cleanup_hook;
- }
-#endif /* CONFIG_PROC_FS */
-
pr_info("ClusterIP Version %s loaded successfully\n",
CLUSTERIP_VERSION);
+
return 0;
-#ifdef CONFIG_PROC_FS
-cleanup_hook:
- nf_unregister_hook(&cip_arp_ops);
-#endif /* CONFIG_PROC_FS */
cleanup_target:
xt_unregister_target(&clusterip_tg_reg);
+cleanup_subsys:
+ unregister_pernet_subsys(&clusterip_net_ops);
return ret;
}
static void __exit clusterip_tg_exit(void)
{
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
-#ifdef CONFIG_PROC_FS
- proc_remove(clusterip_procdir);
-#endif
+
nf_unregister_hook(&cip_arp_ops);
xt_unregister_target(&clusterip_tg_reg);
+ unregister_pernet_subsys(&clusterip_net_ops);
/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
rcu_barrier_bh();
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index cbc22158af49..9cb993cd224b 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -220,6 +220,7 @@ static void ipt_ulog_packet(struct net *net,
ub->qlen++;
pm = nlmsg_data(nlh);
+ memset(pm, 0, sizeof(*pm));
/* We might not have a timestamp, get one */
if (skb->tstamp.tv64 == 0)
@@ -238,8 +239,6 @@ static void ipt_ulog_packet(struct net *net,
}
else if (loginfo->prefix[0] != '\0')
strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
- else
- *(pm->prefix) = '\0';
if (in && in->hard_header_len > 0 &&
skb->mac_header != skb->network_header &&
@@ -251,13 +250,9 @@ static void ipt_ulog_packet(struct net *net,
if (in)
strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
- else
- pm->indev_name[0] = '\0';
if (out)
strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
- else
- pm->outdev_name[0] = '\0';
/* copy_len <= skb->len, so can't fail. */
if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6011615e810d..d2d325382b13 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -295,7 +295,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
" %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
dst_entries_get_slow(&ipv4_dst_ops),
- st->in_hit,
+ 0, /* st->in_hit */
st->in_slow_tot,
st->in_slow_mc,
st->in_no_route,
@@ -303,16 +303,16 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
st->in_martian_dst,
st->in_martian_src,
- st->out_hit,
+ 0, /* st->out_hit */
st->out_slow_tot,
st->out_slow_mc,
- st->gc_total,
- st->gc_ignored,
- st->gc_goal_miss,
- st->gc_dst_overflow,
- st->in_hlist_search,
- st->out_hlist_search
+ 0, /* st->gc_total */
+ 0, /* st->gc_ignored */
+ 0, /* st->gc_goal_miss */
+ 0, /* st->gc_dst_overflow */
+ 0, /* st->in_hlist_search */
+ 0 /* st->out_hlist_search */
);
return 0;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 3b64c59b4109..b95331e6c077 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -25,15 +25,7 @@
extern int sysctl_tcp_syncookies;
-__u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
-EXPORT_SYMBOL(syncookie_secret);
-
-static __init int init_syncookies(void)
-{
- get_random_bytes(syncookie_secret, sizeof(syncookie_secret));
- return 0;
-}
-__initcall(init_syncookies);
+static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
@@ -44,8 +36,11 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c)
{
- __u32 *tmp = __get_cpu_var(ipv4_cookie_scratch);
+ __u32 *tmp;
+
+ net_get_random_once(syncookie_secret, sizeof(syncookie_secret));
+ tmp = __get_cpu_var(ipv4_cookie_scratch);
memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
tmp[0] = (__force u32)saddr;
tmp[1] = (__force u32)daddr;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c08f096d46b5..d5b1390eebbe 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -200,49 +200,6 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
return ret;
}
-static int ipv4_tcp_mem(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- int ret;
- unsigned long vec[3];
- struct net *net = current->nsproxy->net_ns;
-#ifdef CONFIG_MEMCG_KMEM
- struct mem_cgroup *memcg;
-#endif
-
- struct ctl_table tmp = {
- .data = &vec,
- .maxlen = sizeof(vec),
- .mode = ctl->mode,
- };
-
- if (!write) {
- ctl->data = &net->ipv4.sysctl_tcp_mem;
- return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
- }
-
- ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
- if (ret)
- return ret;
-
-#ifdef CONFIG_MEMCG_KMEM
- rcu_read_lock();
- memcg = mem_cgroup_from_task(current);
-
- tcp_prot_mem(memcg, vec[0], 0);
- tcp_prot_mem(memcg, vec[1], 1);
- tcp_prot_mem(memcg, vec[2], 2);
- rcu_read_unlock();
-#endif
-
- net->ipv4.sysctl_tcp_mem[0] = vec[0];
- net->ipv4.sysctl_tcp_mem[1] = vec[1];
- net->ipv4.sysctl_tcp_mem[2] = vec[2];
-
- return 0;
-}
-
static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
@@ -274,6 +231,11 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
ret = -EINVAL;
goto bad_key;
}
+ /* Generate a dummy secret but don't publish it. This
+ * is needed so we don't regenerate a new key on the
+ * first invocation of tcp_fastopen_cookie_gen
+ */
+ tcp_fastopen_init_key_once(false);
tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
}
@@ -552,6 +514,13 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "tcp_mem",
+ .maxlen = sizeof(sysctl_tcp_mem),
+ .data = &sysctl_tcp_mem,
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
.procname = "tcp_wmem",
.data = &sysctl_tcp_wmem,
.maxlen = sizeof(sysctl_tcp_wmem),
@@ -860,12 +829,6 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
- {
- .procname = "tcp_mem",
- .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem),
- .mode = 0644,
- .proc_handler = ipv4_tcp_mem,
- },
{ }
};
@@ -875,32 +838,15 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
table = ipv4_net_table;
if (!net_eq(net, &init_net)) {
+ int i;
+
table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
if (table == NULL)
goto err_alloc;
- table[0].data =
- &net->ipv4.sysctl_icmp_echo_ignore_all;
- table[1].data =
- &net->ipv4.sysctl_icmp_echo_ignore_broadcasts;
- table[2].data =
- &net->ipv4.sysctl_icmp_ignore_bogus_error_responses;
- table[3].data =
- &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr;
- table[4].data =
- &net->ipv4.sysctl_icmp_ratelimit;
- table[5].data =
- &net->ipv4.sysctl_icmp_ratemask;
- table[6].data =
- &net->ipv4.sysctl_ping_group_range;
- table[7].data =
- &net->ipv4.sysctl_tcp_ecn;
- table[8].data =
- &net->ipv4.sysctl_local_ports.range;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
+ /* Update the variables to point into the current struct net */
+ for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++)
+ table[i].data += (void *)net - (void *)&init_net;
}
/*
@@ -917,8 +863,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
net->ipv4.sysctl_local_ports.range[0] = 32768;
net->ipv4.sysctl_local_ports.range[1] = 61000;
- tcp_init_mem(net);
-
net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
if (net->ipv4.ipv4_hdr == NULL)
goto err_reg;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index be4b161802e8..8e8529d3c8c9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -288,9 +288,11 @@ int sysctl_tcp_min_tso_segs __read_mostly = 2;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
+long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
+EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
@@ -3097,13 +3099,13 @@ static int __init set_thash_entries(char *str)
}
__setup("thash_entries=", set_thash_entries);
-void tcp_init_mem(struct net *net)
+static void tcp_init_mem(void)
{
unsigned long limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
- net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
- net->ipv4.sysctl_tcp_mem[1] = limit;
- net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
+ sysctl_tcp_mem[0] = limit / 4 * 3;
+ sysctl_tcp_mem[1] = limit;
+ sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
}
void __init tcp_init(void)
@@ -3165,7 +3167,7 @@ void __init tcp_init(void)
sysctl_tcp_max_orphans = cnt / 2;
sysctl_max_syn_backlog = max(128, cnt / 256);
- tcp_init_mem(&init_net);
+ tcp_init_mem();
/* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
max_wshare = min(4UL*1024*1024, limit);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index ab7bd35bb312..766032b4a6c3 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -14,6 +14,20 @@ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
+void tcp_fastopen_init_key_once(bool publish)
+{
+ static u8 key[TCP_FASTOPEN_KEY_LENGTH];
+
+ /* tcp_fastopen_reset_cipher publishes the new context
+ * atomically, so we allow this race happening here.
+ *
+ * All call sites of tcp_fastopen_cookie_gen also check
+ * for a valid cookie, so this is an acceptable risk.
+ */
+ if (net_get_random_once(key, sizeof(key)) && publish)
+ tcp_fastopen_reset_cipher(key, sizeof(key));
+}
+
static void tcp_fastopen_ctx_free(struct rcu_head *head)
{
struct tcp_fastopen_context *ctx =
@@ -70,6 +84,8 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
__be32 path[4] = { src, dst, 0, 0 };
struct tcp_fastopen_context *ctx;
+ tcp_fastopen_init_key_once(true);
+
rcu_read_lock();
ctx = rcu_dereference(tcp_fastopen_ctx);
if (ctx) {
@@ -78,14 +94,3 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
}
rcu_read_unlock();
}
-
-static int __init tcp_fastopen_init(void)
-{
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
-
- get_random_bytes(key, sizeof(key));
- tcp_fastopen_reset_cipher(key, sizeof(key));
- return 0;
-}
-
-late_initcall(tcp_fastopen_init);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb651a069a6c..63095b218b4a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2903,7 +2903,8 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* left edge of the send window.
* See draft-ietf-tcplw-high-performance-00, section 3.3.
*/
- if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
+ if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+ flag & FLAG_ACKED)
seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
if (seq_rtt < 0)
@@ -2918,14 +2919,19 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
}
/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
-static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
+static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
{
struct tcp_sock *tp = tcp_sk(sk);
s32 seq_rtt = -1;
- if (tp->lsndtime && !tp->total_retrans)
- seq_rtt = tcp_time_stamp - tp->lsndtime;
- tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
+ if (synack_stamp && !tp->total_retrans)
+ seq_rtt = tcp_time_stamp - synack_stamp;
+
+ /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
+ * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
+ */
+ if (!tp->srtt)
+ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
}
static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
@@ -3028,6 +3034,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
s32 seq_rtt = -1;
s32 ca_seq_rtt = -1;
ktime_t last_ackt = net_invalid_timestamp();
+ bool rtt_update;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -3104,14 +3111,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
- if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) ||
- (flag & FLAG_ACKED))
- tcp_rearm_rto(sk);
+ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt);
if (flag & FLAG_ACKED) {
const struct tcp_congestion_ops *ca_ops
= inet_csk(sk)->icsk_ca_ops;
+ tcp_rearm_rto(sk);
if (unlikely(icsk->icsk_mtup.probe_size &&
!after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
tcp_mtup_probe_success(sk);
@@ -3150,6 +3156,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
}
+ } else if (skb && rtt_update && sack_rtt >= 0 &&
+ sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) {
+ /* Do not re-arm RTO if the sack RTT is measured from data sent
+ * after when the head was last (re)transmitted. Otherwise the
+ * timeout may continue to extend in loss recovery.
+ */
+ tcp_rearm_rto(sk);
}
#if FASTRETRANS_DEBUG > 0
@@ -3338,7 +3351,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
tcp_init_cwnd_reduction(sk, true);
tcp_set_ca_state(sk, TCP_CA_CWR);
tcp_end_cwnd_reduction(sk);
- tcp_set_ca_state(sk, TCP_CA_Open);
+ tcp_try_keep_open(sk);
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPLOSSPROBERECOVERY);
}
@@ -5626,6 +5639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct request_sock *req;
int queued = 0;
bool acceptable;
+ u32 synack_stamp;
tp->rx_opt.saw_tstamp = 0;
@@ -5708,9 +5722,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* so release it.
*/
if (req) {
+ synack_stamp = tcp_rsk(req)->snt_synack;
tp->total_retrans = req->num_retrans;
reqsk_fastopen_remove(sk, req, false);
} else {
+ synack_stamp = tp->lsndtime;
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_congestion_control(sk);
@@ -5733,7 +5749,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- tcp_synack_rtt_meas(sk, req);
+ tcp_synack_rtt_meas(sk, synack_stamp);
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -5751,6 +5767,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
} else
tcp_init_metrics(sk);
+ tcp_update_pacing_rate(sk);
+
/* Prevent spurious tcp_cwnd_restart() on first data packet */
tp->lsndtime = tcp_time_stamp;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 114d1b748cbb..300ab2c93f29 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2749,6 +2749,7 @@ struct proto tcp_prot = {
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
+ .sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 559d4ae6ebf4..03e9154f7e68 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -6,15 +6,10 @@
#include <linux/memcontrol.h>
#include <linux/module.h>
-static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
-{
- return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
-}
-
static void memcg_tcp_enter_memory_pressure(struct sock *sk)
{
if (sk->sk_cgrp->memory_pressure)
- *sk->sk_cgrp->memory_pressure = 1;
+ sk->sk_cgrp->memory_pressure = 1;
}
EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
@@ -27,34 +22,24 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
*/
struct res_counter *res_parent = NULL;
struct cg_proto *cg_proto, *parent_cg;
- struct tcp_memcontrol *tcp;
struct mem_cgroup *parent = parent_mem_cgroup(memcg);
- struct net *net = current->nsproxy->net_ns;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return 0;
- tcp = tcp_from_cgproto(cg_proto);
-
- tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
- tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
- tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
- tcp->tcp_memory_pressure = 0;
+ cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0];
+ cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1];
+ cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2];
+ cg_proto->memory_pressure = 0;
+ cg_proto->memcg = memcg;
parent_cg = tcp_prot.proto_cgroup(parent);
if (parent_cg)
- res_parent = parent_cg->memory_allocated;
-
- res_counter_init(&tcp->tcp_memory_allocated, res_parent);
- percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
+ res_parent = &parent_cg->memory_allocated;
- cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
- cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
- cg_proto->sysctl_mem = tcp->tcp_prot_mem;
- cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
- cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
- cg_proto->memcg = memcg;
+ res_counter_init(&cg_proto->memory_allocated, res_parent);
+ percpu_counter_init(&cg_proto->sockets_allocated, 0);
return 0;
}
@@ -63,21 +48,17 @@ EXPORT_SYMBOL(tcp_init_cgroup);
void tcp_destroy_cgroup(struct mem_cgroup *memcg)
{
struct cg_proto *cg_proto;
- struct tcp_memcontrol *tcp;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return;
- tcp = tcp_from_cgproto(cg_proto);
- percpu_counter_destroy(&tcp->tcp_sockets_allocated);
+ percpu_counter_destroy(&cg_proto->sockets_allocated);
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
{
- struct net *net = current->nsproxy->net_ns;
- struct tcp_memcontrol *tcp;
struct cg_proto *cg_proto;
u64 old_lim;
int i;
@@ -90,16 +71,14 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
if (val > RES_COUNTER_MAX)
val = RES_COUNTER_MAX;
- tcp = tcp_from_cgproto(cg_proto);
-
- old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
- ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
+ old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT);
+ ret = res_counter_set_limit(&cg_proto->memory_allocated, val);
if (ret)
return ret;
for (i = 0; i < 3; i++)
- tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
- net->ipv4.sysctl_tcp_mem[i]);
+ cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT,
+ sysctl_tcp_mem[i]);
if (val == RES_COUNTER_MAX)
clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
@@ -156,28 +135,24 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
{
- struct tcp_memcontrol *tcp;
struct cg_proto *cg_proto;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return default_val;
- tcp = tcp_from_cgproto(cg_proto);
- return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
+ return res_counter_read_u64(&cg_proto->memory_allocated, type);
}
static u64 tcp_read_usage(struct mem_cgroup *memcg)
{
- struct tcp_memcontrol *tcp;
struct cg_proto *cg_proto;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
- tcp = tcp_from_cgproto(cg_proto);
- return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
+ return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE);
}
static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -205,54 +180,25 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
{
struct mem_cgroup *memcg;
- struct tcp_memcontrol *tcp;
struct cg_proto *cg_proto;
memcg = mem_cgroup_from_css(css);
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return 0;
- tcp = tcp_from_cgproto(cg_proto);
switch (event) {
case RES_MAX_USAGE:
- res_counter_reset_max(&tcp->tcp_memory_allocated);
+ res_counter_reset_max(&cg_proto->memory_allocated);
break;
case RES_FAILCNT:
- res_counter_reset_failcnt(&tcp->tcp_memory_allocated);
+ res_counter_reset_failcnt(&cg_proto->memory_allocated);
break;
}
return 0;
}
-unsigned long long tcp_max_memory(const struct mem_cgroup *memcg)
-{
- struct tcp_memcontrol *tcp;
- struct cg_proto *cg_proto;
-
- cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg);
- if (!cg_proto)
- return 0;
-
- tcp = tcp_from_cgproto(cg_proto);
- return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
-}
-
-void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
-{
- struct tcp_memcontrol *tcp;
- struct cg_proto *cg_proto;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return;
-
- tcp = tcp_from_cgproto(cg_proto);
-
- tcp->tcp_prot_mem[idx] = val;
-}
-
static struct cftype tcp_files[] = {
{
.name = "kmem.tcp.limit_in_bytes",
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4a2a84110dfb..2ab09cbae74d 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -671,8 +671,9 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
write_seqlock_bh(&fastopen_seqlock);
- tfom->mss = mss;
- if (cookie->len > 0)
+ if (mss)
+ tfom->mss = mss;
+ if (cookie && cookie->len > 0)
tfom->cookie = *cookie;
if (syn_lost) {
++tfom->syn_loss;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 3a7525e6c086..a2b68a108eae 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -14,10 +14,11 @@
#include <net/tcp.h>
#include <net/protocol.h>
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int sum_truesize = 0;
struct tcphdr *th;
unsigned int thlen;
unsigned int seq;
@@ -56,6 +57,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 |
SKB_GSO_GRE |
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT |
SKB_GSO_MPLS |
SKB_GSO_UDP_TUNNEL |
0) ||
@@ -102,13 +105,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
if (copy_destructor) {
skb->destructor = gso_skb->destructor;
skb->sk = gso_skb->sk;
- /* {tcp|sock}_wfree() use exact truesize accounting :
- * sum(skb->truesize) MUST be exactly be gso_skb->truesize
- * So we account mss bytes of 'true size' for each segment.
- * The last segment will contain the remaining.
- */
- skb->truesize = mss;
- gso_skb->truesize -= mss;
+ sum_truesize += skb->truesize;
}
skb = skb->next;
th = tcp_hdr(skb);
@@ -125,7 +122,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
if (copy_destructor) {
swap(gso_skb->sk, skb->sk);
swap(gso_skb->destructor, skb->destructor);
- swap(gso_skb->truesize, skb->truesize);
+ sum_truesize += skb->truesize;
+ atomic_add(sum_truesize - gso_skb->truesize,
+ &skb->sk->sk_wmem_alloc);
}
delta = htonl(oldlen + (skb_tail_pointer(skb) -
@@ -139,7 +138,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
out:
return segs;
}
-EXPORT_SYMBOL(tcp_tso_segment);
+EXPORT_SYMBOL(tcp_gso_segment);
struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
@@ -320,7 +319,7 @@ static int tcp4_gro_complete(struct sk_buff *skb)
static const struct net_offload tcpv4_offload = {
.callbacks = {
.gso_send_check = tcp_v4_gso_send_check,
- .gso_segment = tcp_tso_segment,
+ .gso_segment = tcp_gso_segment,
.gro_receive = tcp4_gro_receive,
.gro_complete = tcp4_gro_complete,
},
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e5ce0e1d13b7..672854664ff5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -986,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
- if (skb->len <= mss_now || !sk_can_gso(sk) ||
- skb->ip_summed == CHECKSUM_NONE) {
+ /* Make sure we own this skb before messing gso_size/gso_segs */
+ WARN_ON_ONCE(skb_cloned(skb));
+
+ if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
@@ -1067,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
if (nsize < 0)
nsize = 0;
- if (skb_cloned(skb) &&
- skb_is_nonlinear(skb) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_unclone(skb, GFP_ATOMIC))
return -ENOMEM;
/* Get a new skb... force flag on. */
@@ -2344,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
int oldpcount = tcp_skb_pcount(skb);
if (unlikely(oldpcount > 1)) {
+ if (skb_unclone(skb, GFP_ATOMIC))
+ return -ENOMEM;
tcp_init_tso_segs(sk, skb, cur_mss);
tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
}
@@ -2351,21 +2353,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
tcp_retrans_try_collapse(sk, skb, cur_mss);
- /* Some Solaris stacks overoptimize and ignore the FIN on a
- * retransmit when old data is attached. So strip it off
- * since it is cheap to do so and saves bytes on the network.
- */
- if (skb->len > 0 &&
- (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
- tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
- if (!pskb_trim(skb, 0)) {
- /* Reuse, even though it does some unnecessary work */
- tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
- TCP_SKB_CB(skb)->tcp_flags);
- skb->ip_summed = CHECKSUM_NONE;
- }
- }
-
/* Make a copy, if the first transmission SKB clone we made
* is still in somebody's hands, else make a clone.
*/
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index af07b5b23ebf..64f0354c84c7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -156,12 +156,16 @@ static bool retransmits_timed_out(struct sock *sk,
static int tcp_write_timeout(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
int retry_until;
bool do_reset, syn_set = false;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
- if (icsk->icsk_retransmits)
+ if (icsk->icsk_retransmits) {
dst_negative_advice(sk);
+ if (tp->syn_fastopen || tp->syn_data)
+ tcp_fastopen_cache_set(sk, 0, NULL, true);
+ }
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
syn_set = true;
} else {
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index 6c0eea2f8249..0531b99d8637 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -15,10 +15,10 @@ struct vegas {
u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
};
-extern void tcp_vegas_init(struct sock *sk);
-extern void tcp_vegas_state(struct sock *sk, u8 ca_state);
-extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us);
-extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
-extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+void tcp_vegas_init(struct sock *sk);
+void tcp_vegas_state(struct sock *sk, u8 ca_state);
+void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us);
+void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
+void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
#endif /* __TCP_VEGAS_H */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9f27bb800607..89909dd730dd 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -407,6 +407,18 @@ static inline int compute_score2(struct sock *sk, struct net *net,
return score;
}
+static unsigned int udp_ehashfn(struct net *net, const __be32 laddr,
+ const __u16 lport, const __be32 faddr,
+ const __be16 fport)
+{
+ static u32 udp_ehash_secret __read_mostly;
+
+ net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret));
+
+ return __inet_ehashfn(laddr, lport, faddr, fport,
+ udp_ehash_secret + net_hash_mix(net));
+}
+
/* called with read_rcu_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
@@ -430,8 +442,8 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp_ehashfn(net, daddr, hnum,
+ saddr, sport);
matches = 1;
}
} else if (score == badness && reuseport) {
@@ -511,8 +523,8 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp_ehashfn(net, daddr, hnum,
+ saddr, sport);
matches = 1;
}
} else if (score == badness && reuseport) {
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 5a681e298b90..f3c27899f62b 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -5,30 +5,30 @@
#include <net/protocol.h>
#include <net/inet_common.h>
-extern int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int );
-extern void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
+int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int);
+void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
-extern int udp_v4_get_port(struct sock *sk, unsigned short snum);
+int udp_v4_get_port(struct sock *sk, unsigned short snum);
-extern int udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-extern int udp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
+int udp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
+int udp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPAT
-extern int compat_udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-extern int compat_udp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
+int compat_udp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
+int compat_udp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
#endif
-extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len);
-extern int udp_sendpage(struct sock *sk, struct page *page, int offset,
- size_t size, int flags);
-extern int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-extern void udp_destroy_sock(struct sock *sk);
+int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len);
+int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
+ int flags);
+int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+void udp_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
-extern int udp4_seq_show(struct seq_file *seq, void *v);
+int udp4_seq_show(struct seq_file *seq, void *v);
#endif
#endif /* _UDP4_IMPL_H */
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index f35eccaa855e..83206de2bc76 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -52,6 +52,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_IPIP |
SKB_GSO_GRE | SKB_GSO_MPLS) ||
!(type & (SKB_GSO_UDP))))
goto out;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 9a459be24af7..e1a63930a967 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -104,9 +104,14 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
const struct iphdr *iph = ip_hdr(skb);
u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
struct flowi4 *fl4 = &fl->u.ip4;
+ int oif = 0;
+
+ if (skb_dst(skb))
+ oif = skb_dst(skb)->dev->ifindex;
memset(fl4, 0, sizeof(struct flowi4));
fl4->flowi4_mark = skb->mark;
+ fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
@@ -235,7 +240,7 @@ static struct dst_ops xfrm4_dst_ops = {
.destroy = xfrm4_dst_destroy,
.ifdown = xfrm4_dst_ifdown,
.local_out = __ip_local_out,
- .gc_thresh = 1024,
+ .gc_thresh = 32768,
};
static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 11b13ea69db4..d92e5586783e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -21,24 +21,6 @@ menuconfig IPV6
if IPV6
-config IPV6_PRIVACY
- bool "IPv6: Privacy Extensions (RFC 3041) support"
- ---help---
- Privacy Extensions for Stateless Address Autoconfiguration in IPv6
- support. With this option, additional periodically-altered
- pseudo-random global-scope unicast address(es) will be assigned to
- your interface(s).
-
- We use our standard pseudo-random algorithm to generate the
- randomized interface identifier, instead of one described in RFC 3041.
-
- By default the kernel does not generate temporary addresses.
- To use temporary addresses, do
-
- echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr
-
- See <file:Documentation/networking/ip-sysctl.txt> for details.
-
config IPV6_ROUTER_PREF
bool "IPv6: Router Preference (RFC 4191) support"
---help---
@@ -153,6 +135,17 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION
---help---
Support for MIPv6 route optimization mode.
+config IPV6_VTI
+tristate "Virtual (secure) IPv6: tunneling"
+ select IPV6_TUNNEL
+ depends on INET6_XFRM_MODE_TUNNEL
+ ---help---
+ Tunneling means encapsulating data of one protocol type within
+ another protocol and sending it over a channel that understands the
+ encapsulating protocol. This can be used with xfrm mode tunnel to give
+ the notion of a secure tunnel for IPSEC and then use routing protocol
+ on top.
+
config IPV6_SIT
tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
select INET_TUNNEL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 470a9c008e9b..17bb830872db 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
obj-$(CONFIG_NETFILTER) += netfilter/
+obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
obj-$(CONFIG_IPV6_SIT) += sit.o
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cd3fb301da38..542d09561ed6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -83,11 +83,7 @@
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
-
-#ifdef CONFIG_IPV6_PRIVACY
#include <linux/random.h>
-#endif
-
#include <linux/uaccess.h>
#include <asm/unaligned.h>
@@ -124,11 +120,9 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
}
#endif
-#ifdef CONFIG_IPV6_PRIVACY
static void __ipv6_regen_rndid(struct inet6_dev *idev);
static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static void ipv6_regen_rndid(unsigned long data);
-#endif
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
static int ipv6_count_addresses(struct inet6_dev *idev);
@@ -183,13 +177,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
-#endif
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_pinfo = 1,
@@ -221,13 +213,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
-#endif
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_pinfo = 1,
@@ -371,7 +361,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
}
#endif
-#ifdef CONFIG_IPV6_PRIVACY
INIT_LIST_HEAD(&ndev->tempaddr_list);
setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
if ((dev->flags&IFF_LOOPBACK) ||
@@ -384,7 +373,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
in6_dev_hold(ndev);
ipv6_regen_rndid((unsigned long) ndev);
}
-#endif
+
ndev->token = in6addr_any;
if (netif_running(dev) && addrconf_qdisc_ok(dev))
@@ -865,12 +854,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
/* Add to inet6_dev unicast addr list. */
ipv6_link_dev_addr(idev, ifa);
-#ifdef CONFIG_IPV6_PRIVACY
if (ifa->flags&IFA_F_TEMPORARY) {
list_add(&ifa->tmp_list, &idev->tempaddr_list);
in6_ifa_hold(ifa);
}
-#endif
in6_ifa_hold(ifa);
write_unlock(&idev->lock);
@@ -913,7 +900,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
spin_unlock_bh(&addrconf_hash_lock);
write_lock_bh(&idev->lock);
-#ifdef CONFIG_IPV6_PRIVACY
+
if (ifp->flags&IFA_F_TEMPORARY) {
list_del(&ifp->tmp_list);
if (ifp->ifpub) {
@@ -922,7 +909,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
}
__in6_ifa_put(ifp);
}
-#endif
list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
if (ifa == ifp) {
@@ -1013,7 +999,6 @@ out:
in6_ifa_put(ifp);
}
-#ifdef CONFIG_IPV6_PRIVACY
static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
{
struct inet6_dev *idev = ifp->idev;
@@ -1116,7 +1101,6 @@ retry:
out:
return ret;
}
-#endif
/*
* Choose an appropriate source address (RFC3484)
@@ -1131,9 +1115,7 @@ enum {
#endif
IPV6_SADDR_RULE_OIF,
IPV6_SADDR_RULE_LABEL,
-#ifdef CONFIG_IPV6_PRIVACY
IPV6_SADDR_RULE_PRIVACY,
-#endif
IPV6_SADDR_RULE_ORCHID,
IPV6_SADDR_RULE_PREFIX,
IPV6_SADDR_RULE_MAX
@@ -1247,7 +1229,6 @@ static int ipv6_get_saddr_eval(struct net *net,
&score->ifa->addr, score->addr_type,
score->ifa->idev->dev->ifindex) == dst->label;
break;
-#ifdef CONFIG_IPV6_PRIVACY
case IPV6_SADDR_RULE_PRIVACY:
{
/* Rule 7: Prefer public address
@@ -1259,7 +1240,6 @@ static int ipv6_get_saddr_eval(struct net *net,
ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
break;
}
-#endif
case IPV6_SADDR_RULE_ORCHID:
/* Rule 8-: Prefer ORCHID vs ORCHID or
* non-ORCHID vs non-ORCHID
@@ -1588,7 +1568,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (dad_failed)
ipv6_ifa_notify(0, ifp);
in6_ifa_put(ifp);
-#ifdef CONFIG_IPV6_PRIVACY
} else if (ifp->flags&IFA_F_TEMPORARY) {
struct inet6_ifaddr *ifpub;
spin_lock_bh(&ifp->lock);
@@ -1602,7 +1581,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_unlock_bh(&ifp->lock);
}
ipv6_del_addr(ifp);
-#endif
} else
ipv6_del_addr(ifp);
}
@@ -1851,7 +1829,6 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
return err;
}
-#ifdef CONFIG_IPV6_PRIVACY
/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
static void __ipv6_regen_rndid(struct inet6_dev *idev)
{
@@ -1919,7 +1896,6 @@ static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp
if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
__ipv6_regen_rndid(idev);
}
-#endif
/*
* Add prefix route.
@@ -2207,9 +2183,7 @@ ok:
if (ifp) {
int flags;
unsigned long now;
-#ifdef CONFIG_IPV6_PRIVACY
struct inet6_ifaddr *ift;
-#endif
u32 stored_lft;
/* update lifetime (RFC2462 5.5.3 e) */
@@ -2250,7 +2224,6 @@ ok:
} else
spin_unlock(&ifp->lock);
-#ifdef CONFIG_IPV6_PRIVACY
read_lock_bh(&in6_dev->lock);
/* update all temporary addresses in the list */
list_for_each_entry(ift, &in6_dev->tempaddr_list,
@@ -2315,7 +2288,7 @@ ok:
} else {
read_unlock_bh(&in6_dev->lock);
}
-#endif
+
in6_ifa_put(ifp);
addrconf_verify(0);
}
@@ -2995,7 +2968,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
if (!how)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
-#ifdef CONFIG_IPV6_PRIVACY
if (how && del_timer(&idev->regen_timer))
in6_dev_put(idev);
@@ -3015,7 +2987,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
in6_ifa_put(ifa);
write_lock_bh(&idev->lock);
}
-#endif
while (!list_empty(&idev->addr_list)) {
ifa = list_first_entry(&idev->addr_list,
@@ -3528,7 +3499,6 @@ restart:
in6_ifa_put(ifp);
goto restart;
}
-#ifdef CONFIG_IPV6_PRIVACY
} else if ((ifp->flags&IFA_F_TEMPORARY) &&
!(ifp->flags&IFA_F_TENTATIVE)) {
unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
@@ -3556,7 +3526,6 @@ restart:
} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
spin_unlock(&ifp->lock);
-#endif
} else {
/* ifp->prefered_lft <= ifp->valid_lft */
if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -4128,13 +4097,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
-#ifdef CONFIG_IPV6_PRIVACY
array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
-#endif
array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
@@ -4828,7 +4795,6 @@ static struct addrconf_sysctl_table
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
-#ifdef CONFIG_IPV6_PRIVACY
{
.procname = "use_tempaddr",
.data = &ipv6_devconf.use_tempaddr,
@@ -4864,7 +4830,6 @@ static struct addrconf_sysctl_table
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#endif
{
.procname = "max_addresses",
.data = &ipv6_devconf.max_addresses,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a2cb07cd3850..6468bda1f2b9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -110,11 +110,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
- if (sock->type != SOCK_RAW &&
- sock->type != SOCK_DGRAM &&
- !inet_ehash_secret)
- build_ehash_secret();
-
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
@@ -870,8 +865,6 @@ static int __init inet6_init(void)
if (err)
goto out_sock_register_fail;
- tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;
-
/*
* ipngwg API draft makes clear that the correct semantics
* for TCP and UDP is to consider one TCP and UDP instance
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 73784c3d4642..82e1da3a40b9 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -618,8 +618,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG &&
+ if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
return;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index d3618a78fcac..b8719df0366e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -164,10 +164,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
u8 *iv;
u8 *tail;
__be32 *seqhi;
- struct esp_data *esp = x->data;
/* skb is pure payload to encrypt */
- aead = esp->aead;
+ aead = x->data;
alen = crypto_aead_authsize(aead);
tfclen = 0;
@@ -181,8 +180,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
}
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(skb->len + 2 + tfclen, blksize);
- if (esp->padlen)
- clen = ALIGN(clen, esp->padlen);
plen = clen - skb->len - tfclen;
err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
@@ -271,8 +268,7 @@ error:
static int esp_input_done2(struct sk_buff *skb, int err)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct esp_data *esp = x->data;
- struct crypto_aead *aead = esp->aead;
+ struct crypto_aead *aead = x->data;
int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
int elen = skb->len - hlen;
@@ -325,8 +321,7 @@ static void esp_input_done(struct crypto_async_request *base, int err)
static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ip_esp_hdr *esph;
- struct esp_data *esp = x->data;
- struct crypto_aead *aead = esp->aead;
+ struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
@@ -414,9 +409,8 @@ out:
static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
{
- struct esp_data *esp = x->data;
- u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
- u32 align = max_t(u32, blksize, esp->padlen);
+ struct crypto_aead *aead = x->data;
+ u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
unsigned int net_adj;
if (x->props.mode != XFRM_MODE_TUNNEL)
@@ -424,8 +418,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
else
net_adj = 0;
- return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
- net_adj) & ~(align - 1)) + net_adj - 2;
+ return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
+ net_adj) & ~(blksize - 1)) + net_adj - 2;
}
static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -436,8 +430,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG &&
+ if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
return;
@@ -455,18 +448,16 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static void esp6_destroy(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
+ struct crypto_aead *aead = x->data;
- if (!esp)
+ if (!aead)
return;
- crypto_free_aead(esp->aead);
- kfree(esp);
+ crypto_free_aead(aead);
}
static int esp_init_aead(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
struct crypto_aead *aead;
int err;
@@ -475,7 +466,7 @@ static int esp_init_aead(struct xfrm_state *x)
if (IS_ERR(aead))
goto error;
- esp->aead = aead;
+ x->data = aead;
err = crypto_aead_setkey(aead, x->aead->alg_key,
(x->aead->alg_key_len + 7) / 8);
@@ -492,7 +483,6 @@ error:
static int esp_init_authenc(struct xfrm_state *x)
{
- struct esp_data *esp = x->data;
struct crypto_aead *aead;
struct crypto_authenc_key_param *param;
struct rtattr *rta;
@@ -527,7 +517,7 @@ static int esp_init_authenc(struct xfrm_state *x)
if (IS_ERR(aead))
goto error;
- esp->aead = aead;
+ x->data = aead;
keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
(x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
@@ -582,7 +572,6 @@ error:
static int esp6_init_state(struct xfrm_state *x)
{
- struct esp_data *esp;
struct crypto_aead *aead;
u32 align;
int err;
@@ -590,11 +579,7 @@ static int esp6_init_state(struct xfrm_state *x)
if (x->encap)
return -EINVAL;
- esp = kzalloc(sizeof(*esp), GFP_KERNEL);
- if (esp == NULL)
- return -ENOMEM;
-
- x->data = esp;
+ x->data = NULL;
if (x->aead)
err = esp_init_aead(x);
@@ -604,9 +589,7 @@ static int esp6_init_state(struct xfrm_state *x)
if (err)
goto error;
- aead = esp->aead;
-
- esp->padlen = 0;
+ aead = x->data;
x->props.header_len = sizeof(struct ip_esp_hdr) +
crypto_aead_ivsize(aead);
@@ -626,9 +609,7 @@ static int esp6_init_state(struct xfrm_state *x)
}
align = ALIGN(crypto_aead_blocksize(aead), 4);
- if (esp->padlen)
- align = max_t(u32, align, esp->padlen);
- x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
+ x->props.trailer_len = align + 1 + crypto_aead_authsize(aead);
error:
return err;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 842d833dfc18..262e13c02ec2 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -23,6 +23,39 @@
#include <net/secure_seq.h>
#include <net/ip.h>
+static unsigned int inet6_ehashfn(struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
+{
+ static u32 inet6_ehash_secret __read_mostly;
+ static u32 ipv6_hash_secret __read_mostly;
+
+ u32 lhash, fhash;
+
+ net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
+ net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+
+ lhash = (__force u32)laddr->s6_addr32[3];
+ fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+
+ return __inet6_ehashfn(lhash, lport, fhash, fport,
+ inet6_ehash_secret + net_hash_mix(net));
+}
+
+static int inet6_sk_ehashfn(const struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
+ const struct in6_addr *faddr = &sk->sk_v6_daddr;
+ const __u16 lport = inet->inet_num;
+ const __be16 fport = inet->inet_dport;
+ struct net *net = sock_net(sk);
+
+ return inet6_ehashfn(net, laddr, lport, faddr, fport);
+}
+
int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1ef1fa2b22a6..bf4a9a084de5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -976,6 +976,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
if (t->parms.o_flags&GRE_SEQ)
addend += 4;
}
+ t->hlen = addend;
if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
@@ -1002,8 +1003,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
}
ip6_rt_put(rt);
}
-
- t->hlen = addend;
}
static int ip6gre_tnl_change(struct ip6_tnl *t,
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d82de7228100..4b851692b1f6 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -66,7 +66,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
__skb_pull(skb, sizeof(*ipv6h));
err = -EPROTONOSUPPORT;
- rcu_read_lock();
ops = rcu_dereference(inet6_offloads[
ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
@@ -74,7 +73,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
skb_reset_transport_header(skb);
err = ops->callbacks.gso_send_check(skb);
}
- rcu_read_unlock();
out:
return err;
@@ -92,46 +90,58 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
u8 *prevhdr;
int offset = 0;
bool tunnel;
+ int nhoff;
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
goto out;
+ skb_reset_network_header(skb);
+ nhoff = skb_network_header(skb) - skb_mac_header(skb);
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
- tunnel = skb->encapsulation;
+ tunnel = SKB_GSO_CB(skb)->encap_level > 0;
+ if (tunnel)
+ features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
+
ipv6h = ipv6_hdr(skb);
__skb_pull(skb, sizeof(*ipv6h));
segs = ERR_PTR(-EPROTONOSUPPORT);
proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
- rcu_read_lock();
+
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
segs = ops->callbacks.gso_segment(skb, features);
}
- rcu_read_unlock();
if (IS_ERR(segs))
goto out;
for (skb = segs; skb; skb = skb->next) {
- ipv6h = ipv6_hdr(skb);
- ipv6h->payload_len = htons(skb->len - skb->mac_len -
- sizeof(*ipv6h));
+ ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
+ ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
+ if (tunnel) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+ skb->network_header = (u8 *)ipv6h - skb->head;
+
if (!tunnel && proto == IPPROTO_UDP) {
unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- fptr = (struct frag_hdr *)(skb_network_header(skb) +
- unfrag_ip6hlen);
+ fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
fptr->frag_off = htons(offset);
if (skb->next != NULL)
fptr->frag_off |= htons(IP6_MF);
@@ -267,6 +277,13 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
},
};
+static const struct net_offload sit_offload = {
+ .callbacks = {
+ .gso_send_check = ipv6_gso_send_check,
+ .gso_segment = ipv6_gso_segment,
+ },
+};
+
static int __init ipv6_offload_init(void)
{
@@ -278,6 +295,9 @@ static int __init ipv6_offload_init(void)
pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
dev_add_offload(&ipv6_packet_offload);
+
+ inet_add_offload(&sit_offload, IPPROTO_IPV6);
+
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a54c45ce4a48..91fb4e8212f5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+ nexthop = rt6_nexthop((struct rt6_info *)dst);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
@@ -874,7 +874,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
*/
rt = (struct rt6_info *) *dst;
rcu_read_lock_bh();
- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
+ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh();
@@ -1008,6 +1008,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
{
struct sk_buff *skb;
+ struct frag_hdr fhdr;
int err;
/* There is support for UDP large send offload by network
@@ -1015,8 +1016,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
* udp datagram
*/
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
- struct frag_hdr fhdr;
-
skb = sock_alloc_send_skb(sk,
hh_len + fragheaderlen + transhdrlen + 20,
(flags & MSG_DONTWAIT), &err);
@@ -1036,20 +1035,24 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb->transport_header = skb->network_header + fragheaderlen;
skb->protocol = htons(ETH_P_IPV6);
- skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- /* Specify the length of each IPv6 datagram fragment.
- * It has to be a multiple of 8.
- */
- skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
- sizeof(struct frag_hdr)) & ~7;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
- ipv6_select_ident(&fhdr, rt);
- skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
__skb_queue_tail(&sk->sk_write_queue, skb);
+ } else if (skb_is_gso(skb)) {
+ goto append;
}
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ /* Specify the length of each IPv6 datagram fragment.
+ * It has to be a multiple of 8.
+ */
+ skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
+ sizeof(struct frag_hdr)) & ~7;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ ipv6_select_ident(&fhdr, rt);
+ skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+
+append:
return skb_append_datato_frags(sk, skb, getfrag, from,
(length - transhdrlen));
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
new file mode 100644
index 000000000000..ed94ba61dda0
--- /dev/null
+++ b/net/ipv6/ip6_vti.c
@@ -0,0 +1,1056 @@
+/*
+ * IPv6 virtual tunneling interface
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv6/ip6_tunnel.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+{
+ u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
+
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+static int vti6_dev_init(struct net_device *dev);
+static void vti6_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops vti6_link_ops __read_mostly;
+
+static int vti6_net_id __read_mostly;
+struct vti6_net {
+ /* the vti6 tunnel fallback device */
+ struct net_device *fb_tnl_dev;
+ /* lists for storing tunnels in use */
+ struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_wc[1];
+ struct ip6_tnl __rcu **tnls[2];
+};
+
+static struct net_device_stats *vti6_get_stats(struct net_device *dev)
+{
+ struct pcpu_tstats sum = { 0 };
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+
+ sum.rx_packets += tstats->rx_packets;
+ sum.rx_bytes += tstats->rx_bytes;
+ sum.tx_packets += tstats->tx_packets;
+ sum.tx_bytes += tstats->tx_bytes;
+ }
+ dev->stats.rx_packets = sum.rx_packets;
+ dev->stats.rx_bytes = sum.rx_bytes;
+ dev->stats.tx_packets = sum.tx_packets;
+ dev->stats.tx_bytes = sum.tx_bytes;
+ return &dev->stats;
+}
+
+#define for_each_vti6_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/**
+ * vti6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @net: network namespace
+ * @remote: the address of the tunnel exit-point
+ * @local: the address of the tunnel entry-point
+ *
+ * Return:
+ * tunnel matching given end-points if found,
+ * else fallback tunnel if its device is up,
+ * else %NULL
+ **/
+static struct ip6_tnl *
+vti6_tnl_lookup(struct net *net, const struct in6_addr *remote,
+ const struct in6_addr *local)
+{
+ unsigned int hash = HASH(remote, local);
+ struct ip6_tnl *t;
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+ t = rcu_dereference(ip6n->tnls_wc[0]);
+ if (t && (t->dev->flags & IFF_UP))
+ return t;
+
+ return NULL;
+}
+
+/**
+ * vti6_tnl_bucket - get head of list matching given tunnel parameters
+ * @p: parameters containing tunnel end-points
+ *
+ * Description:
+ * vti6_tnl_bucket() returns the head of the list matching the
+ * &struct in6_addr entries laddr and raddr in @p.
+ *
+ * Return: head of IPv6 tunnel list
+ **/
+static struct ip6_tnl __rcu **
+vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ unsigned int h = 0;
+ int prio = 0;
+
+ if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
+ prio = 1;
+ h = HASH(remote, local);
+ }
+ return &ip6n->tnls[prio][h];
+}
+
+static void
+vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms);
+
+ rcu_assign_pointer(t->next , rtnl_dereference(*tp));
+ rcu_assign_pointer(*tp, t);
+}
+
+static void
+vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *iter;
+
+ for (tp = vti6_tnl_bucket(ip6n, &t->parms);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
+ break;
+ }
+ }
+}
+
+static void vti6_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
+static int vti6_tnl_create2(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ int err;
+
+ err = vti6_dev_init(dev);
+ if (err < 0)
+ goto out;
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto out;
+
+ strcpy(t->parms.name, dev->name);
+ dev->rtnl_link_ops = &vti6_link_ops;
+
+ dev_hold(dev);
+ vti6_tnl_link(ip6n, t);
+
+ return 0;
+
+out:
+ return err;
+}
+
+static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
+{
+ struct net_device *dev;
+ struct ip6_tnl *t;
+ char name[IFNAMSIZ];
+ int err;
+
+ if (p->name[0])
+ strlcpy(name, p->name, IFNAMSIZ);
+ else
+ sprintf(name, "ip6_vti%%d");
+
+ dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup);
+ if (dev == NULL)
+ goto failed;
+
+ dev_net_set(dev, net);
+
+ t = netdev_priv(dev);
+ t->parms = *p;
+ t->net = dev_net(dev);
+
+ err = vti6_tnl_create2(dev);
+ if (err < 0)
+ goto failed_free;
+
+ return t;
+
+failed_free:
+ vti6_dev_free(dev);
+failed:
+ return NULL;
+}
+
+/**
+ * vti6_locate - find or create tunnel matching given parameters
+ * @net: network namespace
+ * @p: tunnel parameters
+ * @create: != 0 if allowed to create new tunnel if no match found
+ *
+ * Description:
+ * vti6_locate() first tries to locate an existing tunnel
+ * based on @parms. If this is unsuccessful, but @create is set a new
+ * tunnel device is created and registered for use.
+ *
+ * Return:
+ * matching tunnel or NULL
+ **/
+static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
+ int create)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *t;
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ for (tp = vti6_tnl_bucket(ip6n, p);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr))
+ return t;
+ }
+ if (!create)
+ return NULL;
+ return vti6_tnl_create(net, p);
+}
+
+/**
+ * vti6_dev_uninit - tunnel device uninitializer
+ * @dev: the device to be destroyed
+ *
+ * Description:
+ * vti6_dev_uninit() removes tunnel from its list
+ **/
+static void vti6_dev_uninit(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
+ else
+ vti6_tnl_unlink(ip6n, t);
+ ip6_tnl_dst_reset(t);
+ dev_put(dev);
+}
+
+static int vti6_rcv(struct sk_buff *skb)
+{
+ struct ip6_tnl *t;
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+ rcu_read_lock();
+
+ if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
+ &ipv6h->daddr)) != NULL) {
+ struct pcpu_tstats *tstats;
+
+ if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
+ rcu_read_unlock();
+ goto discard;
+ }
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
+ t->dev->stats.rx_dropped++;
+ rcu_read_unlock();
+ goto discard;
+ }
+
+ tstats = this_cpu_ptr(t->dev->tstats);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+
+ skb->mark = 0;
+ secpath_reset(skb);
+ skb->dev = t->dev;
+
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+ return 1;
+
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+/**
+ * vti6_addr_conflict - compare packet addresses to tunnel's own
+ * @t: the outgoing tunnel device
+ * @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ * Avoid trivial tunneling loop by checking that tunnel exit-point
+ * doesn't match source of incoming packet.
+ *
+ * Return:
+ * 1 if conflict,
+ * 0 else
+ **/
+static inline bool
+vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
+{
+ return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+/**
+ * vti6_xmit - send a packet
+ * @skb: the outgoing socket buffer
+ * @dev: the outgoing tunnel device
+ **/
+static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ struct dst_entry *dst = NULL, *ndst = NULL;
+ struct flowi6 fl6;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct net_device *tdev;
+ int err = -1;
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+ return err;
+
+ dst = ip6_tnl_dst_check(t);
+ if (!dst) {
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+
+ ndst = ip6_route_output(net, NULL, &fl6);
+
+ if (ndst->error)
+ goto tx_err_link_failure;
+ ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(ndst)) {
+ err = PTR_ERR(ndst);
+ ndst = NULL;
+ goto tx_err_link_failure;
+ }
+ dst = ndst;
+ }
+
+ if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL)
+ goto tx_err_link_failure;
+
+ tdev = dst->dev;
+
+ if (tdev == dev) {
+ stats->collisions++;
+ net_warn_ratelimited("%s: Local routing loop detected!\n",
+ t->parms.name);
+ goto tx_err_dst_release;
+ }
+
+
+ skb_dst_drop(skb);
+ skb_dst_set_noref(skb, dst);
+
+ ip6tunnel_xmit(skb, dev);
+ if (ndst) {
+ dev->mtu = dst_mtu(ndst);
+ ip6_tnl_dst_store(t, ndst);
+ }
+
+ return 0;
+tx_err_link_failure:
+ stats->tx_carrier_errors++;
+ dst_link_failure(skb);
+tx_err_dst_release:
+ dst_release(ndst);
+ return err;
+}
+
+static netdev_tx_t
+vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ int ret;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IPV6):
+ ret = vti6_xmit(skb, dev);
+ break;
+ default:
+ goto tx_err;
+ }
+
+ if (ret < 0)
+ goto tx_err;
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static void vti6_link_config(struct ip6_tnl *t)
+{
+ struct dst_entry *dst;
+ struct net_device *dev = t->dev;
+ struct __ip6_tnl_parm *p = &t->parms;
+ struct flowi6 *fl6 = &t->fl.u.ip6;
+
+ memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+
+ /* Set up flowi template */
+ fl6->saddr = p->laddr;
+ fl6->daddr = p->raddr;
+ fl6->flowi6_oif = p->link;
+ fl6->flowi6_mark = be32_to_cpu(p->i_key);
+ fl6->flowi6_proto = p->proto;
+ fl6->flowlabel = 0;
+
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
+ IP6_TNL_F_CAP_PER_PACKET);
+ p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT && p->flags & IP6_TNL_F_CAP_RCV)
+ dev->flags |= IFF_POINTOPOINT;
+ else
+ dev->flags &= ~IFF_POINTOPOINT;
+
+ dev->iflink = p->link;
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+
+ dst = ip6_route_output(dev_net(dev), NULL, fl6);
+ if (dst->error)
+ return;
+
+ dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6),
+ NULL, 0);
+ if (IS_ERR(dst))
+ return;
+
+ if (dst->dev) {
+ dev->hard_header_len = dst->dev->hard_header_len;
+
+ dev->mtu = dst_mtu(dst);
+
+ if (dev->mtu < IPV6_MIN_MTU)
+ dev->mtu = IPV6_MIN_MTU;
+ }
+ dst_release(dst);
+ }
+}
+
+/**
+ * vti6_tnl_change - update the tunnel parameters
+ * @t: tunnel to be changed
+ * @p: tunnel configuration parameters
+ *
+ * Description:
+ * vti6_tnl_change() updates the tunnel parameters
+ **/
+static int
+vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
+{
+ t->parms.laddr = p->laddr;
+ t->parms.raddr = p->raddr;
+ t->parms.link = p->link;
+ t->parms.i_key = p->i_key;
+ t->parms.o_key = p->o_key;
+ t->parms.proto = p->proto;
+ ip6_tnl_dst_reset(t);
+ vti6_link_config(t);
+ return 0;
+}
+
+static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+{
+ struct net *net = dev_net(t->dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ int err;
+
+ vti6_tnl_unlink(ip6n, t);
+ synchronize_net();
+ err = vti6_tnl_change(t, p);
+ vti6_tnl_link(ip6n, t);
+ netdev_state_change(t->dev);
+ return err;
+}
+
+static void
+vti6_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u)
+{
+ p->laddr = u->laddr;
+ p->raddr = u->raddr;
+ p->link = u->link;
+ p->i_key = u->i_key;
+ p->o_key = u->o_key;
+ p->proto = u->proto;
+
+ memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void
+vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
+{
+ u->laddr = p->laddr;
+ u->raddr = p->raddr;
+ u->link = p->link;
+ u->i_key = p->i_key;
+ u->o_key = p->o_key;
+ u->proto = p->proto;
+
+ memcpy(u->name, p->name, sizeof(u->name));
+}
+
+/**
+ * vti6_tnl_ioctl - configure vti6 tunnels from userspace
+ * @dev: virtual device associated with tunnel
+ * @ifr: parameters passed from userspace
+ * @cmd: command to be performed
+ *
+ * Description:
+ * vti6_ioctl() is used for managing vti6 tunnels
+ * from userspace.
+ *
+ * The possible commands are the following:
+ * %SIOCGETTUNNEL: get tunnel parameters for device
+ * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
+ * %SIOCCHGTUNNEL: change tunnel parameters to those given
+ * %SIOCDELTUNNEL: delete tunnel
+ *
+ * The fallback device "ip6_vti0", created during module
+ * initialization, can be used for creating other tunnel devices.
+ *
+ * Return:
+ * 0 on success,
+ * %-EFAULT if unable to copy data to or from userspace,
+ * %-EPERM if current process hasn't %CAP_NET_ADMIN set
+ * %-EINVAL if passed tunnel parameters are invalid,
+ * %-EEXIST if changing a tunnel's parameters would cause a conflict
+ * %-ENODEV if attempting to change or delete a nonexisting device
+ **/
+static int
+vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip6_tnl_parm2 p;
+ struct __ip6_tnl_parm p1;
+ struct ip6_tnl *t = NULL;
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ if (dev == ip6n->fb_tnl_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ err = -EFAULT;
+ break;
+ }
+ vti6_parm_from_user(&p1, &p);
+ t = vti6_locate(net, &p1, 0);
+ } else {
+ memset(&p, 0, sizeof(p));
+ }
+ if (t == NULL)
+ t = netdev_priv(dev);
+ vti6_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ break;
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ break;
+ err = -EINVAL;
+ if (p.proto != IPPROTO_IPV6 && p.proto != 0)
+ break;
+ vti6_parm_from_user(&p1, &p);
+ t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL);
+ if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else
+ t = netdev_priv(dev);
+
+ err = vti6_update(t, &p1);
+ }
+ if (t) {
+ err = 0;
+ vti6_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ break;
+
+ if (dev == ip6n->fb_tnl_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ break;
+ err = -ENOENT;
+ vti6_parm_from_user(&p1, &p);
+ t = vti6_locate(net, &p1, 0);
+ if (t == NULL)
+ break;
+ err = -EPERM;
+ if (t->dev == ip6n->fb_tnl_dev)
+ break;
+ dev = t->dev;
+ }
+ err = 0;
+ unregister_netdevice(dev);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ return err;
+}
+
+/**
+ * vti6_tnl_change_mtu - change mtu manually for tunnel device
+ * @dev: virtual device associated with tunnel
+ * @new_mtu: the new mtu
+ *
+ * Return:
+ * 0 on success,
+ * %-EINVAL if mtu too small
+ **/
+static int vti6_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (new_mtu < IPV6_MIN_MTU)
+ return -EINVAL;
+
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static const struct net_device_ops vti6_netdev_ops = {
+ .ndo_uninit = vti6_dev_uninit,
+ .ndo_start_xmit = vti6_tnl_xmit,
+ .ndo_do_ioctl = vti6_ioctl,
+ .ndo_change_mtu = vti6_change_mtu,
+ .ndo_get_stats = vti6_get_stats,
+};
+
+/**
+ * vti6_dev_setup - setup virtual tunnel device
+ * @dev: virtual device associated with tunnel
+ *
+ * Description:
+ * Initialize function pointers and device parameters
+ **/
+static void vti6_dev_setup(struct net_device *dev)
+{
+ struct ip6_tnl *t;
+
+ dev->netdev_ops = &vti6_netdev_ops;
+ dev->destructor = vti6_dev_free;
+
+ dev->type = ARPHRD_TUNNEL6;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
+ dev->mtu = ETH_DATA_LEN;
+ t = netdev_priv(dev);
+ dev->flags |= IFF_NOARP;
+ dev->addr_len = sizeof(struct in6_addr);
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+/**
+ * vti6_dev_init_gen - general initializer for all tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+static inline int vti6_dev_init_gen(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ t->dev = dev;
+ t->net = dev_net(dev);
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * vti6_dev_init - initializer for all non fallback tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+static int vti6_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ int err = vti6_dev_init_gen(dev);
+
+ if (err)
+ return err;
+ vti6_link_config(t);
+ return 0;
+}
+
+/**
+ * vti6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * @dev: fallback device
+ *
+ * Return: 0
+ **/
+static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ int err = vti6_dev_init_gen(dev);
+
+ if (err)
+ return err;
+
+ t->parms.proto = IPPROTO_IPV6;
+ dev_hold(dev);
+
+ vti6_link_config(t);
+
+ rcu_assign_pointer(ip6n->tnls_wc[0], t);
+ return 0;
+}
+
+static int vti6_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ return 0;
+}
+
+static void vti6_netlink_parms(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ if (!data)
+ return;
+
+ if (data[IFLA_VTI_LINK])
+ parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
+
+ if (data[IFLA_VTI_LOCAL])
+ nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_VTI_REMOTE])
+ nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE],
+ sizeof(struct in6_addr));
+
+ if (data[IFLA_VTI_IKEY])
+ parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
+
+ if (data[IFLA_VTI_OKEY])
+ parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
+}
+
+static int vti6_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *nt;
+
+ nt = netdev_priv(dev);
+ vti6_netlink_parms(data, &nt->parms);
+
+ nt->parms.proto = IPPROTO_IPV6;
+
+ if (vti6_locate(net, &nt->parms, 0))
+ return -EEXIST;
+
+ return vti6_tnl_create2(dev);
+}
+
+static int vti6_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip6_tnl *t;
+ struct __ip6_tnl_parm p;
+ struct net *net = dev_net(dev);
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ if (dev == ip6n->fb_tnl_dev)
+ return -EINVAL;
+
+ vti6_netlink_parms(data, &p);
+
+ t = vti6_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else
+ t = netdev_priv(dev);
+
+ return vti6_update(t, &p);
+}
+
+static size_t vti6_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_VTI_LINK */
+ nla_total_size(4) +
+ /* IFLA_VTI_LOCAL */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_VTI_REMOTE */
+ nla_total_size(sizeof(struct in6_addr)) +
+ /* IFLA_VTI_IKEY */
+ nla_total_size(4) +
+ /* IFLA_VTI_OKEY */
+ nla_total_size(4) +
+ 0;
+}
+
+static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct __ip6_tnl_parm *parm = &tunnel->parms;
+
+ if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) ||
+ nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr),
+ &parm->laddr) ||
+ nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr),
+ &parm->raddr) ||
+ nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) ||
+ nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = {
+ [IFLA_VTI_LINK] = { .type = NLA_U32 },
+ [IFLA_VTI_LOCAL] = { .len = sizeof(struct in6_addr) },
+ [IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) },
+ [IFLA_VTI_IKEY] = { .type = NLA_U32 },
+ [IFLA_VTI_OKEY] = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops vti6_link_ops __read_mostly = {
+ .kind = "vti6",
+ .maxtype = IFLA_VTI_MAX,
+ .policy = vti6_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = vti6_dev_setup,
+ .validate = vti6_validate,
+ .newlink = vti6_newlink,
+ .changelink = vti6_changelink,
+ .get_size = vti6_get_size,
+ .fill_info = vti6_fill_info,
+};
+
+static struct xfrm_tunnel_notifier vti6_handler __read_mostly = {
+ .handler = vti6_rcv,
+ .priority = 1,
+};
+
+static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
+{
+ int h;
+ struct ip6_tnl *t;
+ LIST_HEAD(list);
+
+ for (h = 0; h < HASH_SIZE; h++) {
+ t = rtnl_dereference(ip6n->tnls_r_l[h]);
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, &list);
+ t = rtnl_dereference(t->next);
+ }
+ }
+
+ t = rtnl_dereference(ip6n->tnls_wc[0]);
+ unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_many(&list);
+}
+
+static int __net_init vti6_init_net(struct net *net)
+{
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct ip6_tnl *t = NULL;
+ int err;
+
+ ip6n->tnls[0] = ip6n->tnls_wc;
+ ip6n->tnls[1] = ip6n->tnls_r_l;
+
+ err = -ENOMEM;
+ ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
+ vti6_dev_setup);
+
+ if (!ip6n->fb_tnl_dev)
+ goto err_alloc_dev;
+ dev_net_set(ip6n->fb_tnl_dev, net);
+
+ err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
+ if (err < 0)
+ goto err_register;
+
+ err = register_netdev(ip6n->fb_tnl_dev);
+ if (err < 0)
+ goto err_register;
+
+ t = netdev_priv(ip6n->fb_tnl_dev);
+
+ strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
+ return 0;
+
+err_register:
+ vti6_dev_free(ip6n->fb_tnl_dev);
+err_alloc_dev:
+ return err;
+}
+
+static void __net_exit vti6_exit_net(struct net *net)
+{
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+
+ rtnl_lock();
+ vti6_destroy_tunnels(ip6n);
+ rtnl_unlock();
+}
+
+static struct pernet_operations vti6_net_ops = {
+ .init = vti6_init_net,
+ .exit = vti6_exit_net,
+ .id = &vti6_net_id,
+ .size = sizeof(struct vti6_net),
+};
+
+/**
+ * vti6_tunnel_init - register protocol and reserve needed resources
+ *
+ * Return: 0 on success
+ **/
+static int __init vti6_tunnel_init(void)
+{
+ int err;
+
+ err = register_pernet_device(&vti6_net_ops);
+ if (err < 0)
+ goto out_pernet;
+
+ err = xfrm6_mode_tunnel_input_register(&vti6_handler);
+ if (err < 0) {
+ pr_err("%s: can't register vti6\n", __func__);
+ goto out;
+ }
+ err = rtnl_link_register(&vti6_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+ return 0;
+
+rtnl_link_failed:
+ xfrm6_mode_tunnel_input_deregister(&vti6_handler);
+out:
+ unregister_pernet_device(&vti6_net_ops);
+out_pernet:
+ return err;
+}
+
+/**
+ * vti6_tunnel_cleanup - free resources and unregister protocol
+ **/
+static void __exit vti6_tunnel_cleanup(void)
+{
+ rtnl_link_unregister(&vti6_link_ops);
+ if (xfrm6_mode_tunnel_input_deregister(&vti6_handler))
+ pr_info("%s: can't deregister vti6\n", __func__);
+
+ unregister_pernet_device(&vti6_net_ops);
+}
+
+module_init(vti6_tunnel_init);
+module_exit(vti6_tunnel_cleanup);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("vti6");
+MODULE_ALIAS_NETDEV("ip6_vti0");
+MODULE_AUTHOR("Steffen Klassert");
+MODULE_DESCRIPTION("IPv6 virtual tunnel interface");
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5636a912074a..ce507d9e1c90 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -64,8 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
(struct ip_comp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG &&
+ if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
return;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 44400c216dc6..710238f58aa9 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
private = table->private;
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
cpu = smp_processor_id();
table_base = private->entries[cpu];
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 56eef30ee5f6..da00a2ecde55 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
MODULE_LICENSE("GPL");
/* Send RST reply */
-static void send_reset(struct net *net, struct sk_buff *oldskb)
+static void send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
struct tcphdr otcph, *tcph;
@@ -88,8 +88,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
}
/* Check checksum. */
- if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
- skb_checksum(oldskb, tcphoff, otcplen, 0))) {
+ if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
pr_debug("TCP checksum is invalid\n");
return;
}
@@ -227,7 +226,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
/* Do nothing */
break;
case IP6T_TCP_RESET:
- send_reset(net, skb);
+ send_reset(net, skb, par->hooknum);
break;
default:
net_info_ratelimited("case %u not handled yet\n", reject->with);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index dffdc1a389c5..4a258263d8ec 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -144,12 +144,24 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
+static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
+{
+ u32 c;
+
+ net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
+ c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+ (__force u32)id, nf_frags.rnd);
+ return c & (INETFRAGS_HASHSZ - 1);
+}
+
+
static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
const struct frag_queue *nq;
nq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
+ return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
}
static void nf_skb_free(struct sk_buff *skb)
@@ -185,7 +197,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.ecn = ecn;
read_lock_bh(&nf_frags.lock);
- hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
+ hash = nf_hash_frag(id, src, dst);
q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
local_bh_enable();
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1aeb473b2cc6..cc85a9ba5010 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -82,24 +82,24 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
* callers should be careful not to use the hash value outside the ipfrag_lock
* as doing so could race with ipfrag_hash_rnd being recalculated.
*/
-unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr, u32 rnd)
+static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
{
u32 c;
+ net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, rnd);
+ (__force u32)id, ip6_frags.rnd);
return c & (INETFRAGS_HASHSZ - 1);
}
-EXPORT_SYMBOL_GPL(inet6_hash_frag);
static unsigned int ip6_hashfn(struct inet_frag_queue *q)
{
struct frag_queue *fq;
fq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd);
+ return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
}
bool ip6_frag_match(struct inet_frag_queue *q, void *a)
@@ -193,7 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.ecn = ecn;
read_lock(&ip6_frags.lock);
- hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
+ hash = inet6_hash_frag(id, src, dst);
q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
if (IS_ERR_OR_NULL(q)) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c3130ffc3bca..fd399ac6c1f7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -476,6 +476,24 @@ out:
}
#ifdef CONFIG_IPV6_ROUTER_PREF
+struct __rt6_probe_work {
+ struct work_struct work;
+ struct in6_addr target;
+ struct net_device *dev;
+};
+
+static void rt6_probe_deferred(struct work_struct *w)
+{
+ struct in6_addr mcaddr;
+ struct __rt6_probe_work *work =
+ container_of(w, struct __rt6_probe_work, work);
+
+ addrconf_addr_solict_mult(&work->target, &mcaddr);
+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+ dev_put(work->dev);
+ kfree(w);
+}
+
static void rt6_probe(struct rt6_info *rt)
{
struct neighbour *neigh;
@@ -499,17 +517,23 @@ static void rt6_probe(struct rt6_info *rt)
if (!neigh ||
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
- struct in6_addr mcaddr;
- struct in6_addr *target;
+ struct __rt6_probe_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (neigh) {
+ if (neigh && work)
neigh->updated = jiffies;
+
+ if (neigh)
write_unlock(&neigh->lock);
- }
- target = (struct in6_addr *)&rt->rt6i_gateway;
- addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
+ if (work) {
+ INIT_WORK(&work->work, rt6_probe_deferred);
+ work->target = rt->rt6i_gateway;
+ dev_hold(rt->dst.dev);
+ work->dev = rt->dst.dev;
+ schedule_work(&work->work);
+ }
} else {
out:
write_unlock(&neigh->lock);
@@ -595,7 +619,7 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
goto out;
m = rt6_score_route(rt, oif, strict);
- if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
+ if (m == RT6_NUD_FAIL_SOFT) {
match_do_rr = true;
m = 0; /* lowest valid score */
} else if (m < 0) {
@@ -851,7 +875,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
if (ort->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
- rt->rt6i_gateway = *daddr;
}
rt->rt6i_flags |= RTF_CACHE;
@@ -1064,10 +1087,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
return NULL;
- if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
- return dst;
+ if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ return NULL;
- return NULL;
+ if (rt6_check_expired(rt))
+ return NULL;
+
+ return dst;
}
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -1335,6 +1361,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
atomic_set(&rt->dst.__refcnt, 1);
+ rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
rt->rt6i_idev = idev;
@@ -1870,7 +1897,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
in6_dev_hold(rt->rt6i_idev);
rt->dst.lastuse = jiffies;
- rt->rt6i_gateway = ort->rt6i_gateway;
+ if (ort->rt6i_flags & RTF_GATEWAY)
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ else
+ rt->rt6i_gateway = *dest;
rt->rt6i_flags = ort->rt6i_flags;
if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
(RTF_DEFAULT | RTF_ADDRCONF))
@@ -2157,6 +2187,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
else
rt->rt6i_flags |= RTF_LOCAL;
+ rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 19269453a8ea..3a9038dd818d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -933,10 +933,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
ttl = iph6->hop_limit;
tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+ if (IS_ERR(skb))
+ goto out;
err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
ttl, df, !net_eq(tunnel->net, dev_net(dev)));
@@ -946,8 +945,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
tx_error_icmp:
dst_link_failure(skb);
tx_error:
- dev->stats.tx_errors++;
dev_kfree_skb(skb);
+out:
+ dev->stats.tx_errors++;
return NETDEV_TX_OK;
}
@@ -956,13 +956,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ if (IS_ERR(skb))
+ goto out;
ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
return NETDEV_TX_OK;
+out:
+ dev->stats.tx_errors++;
+ return NETDEV_TX_OK;
}
static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
@@ -1292,6 +1294,12 @@ static void ipip6_dev_free(struct net_device *dev)
free_netdev(dev);
}
+#define SIT_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_HW_CSUM)
+
static void ipip6_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipip6_netdev_ops;
@@ -1305,6 +1313,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_LLTX;
+ dev->features |= SIT_FEATURES;
+ dev->hw_features |= SIT_FEATURES;
}
static int ipip6_tunnel_init(struct net_device *dev)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d04d3f1dd9b7..535a3ad262f1 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -24,6 +24,8 @@
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS];
+
/* RFC 2460, Section 8.3:
* [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
*
@@ -61,14 +63,18 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
__be16 sport, __be16 dport, u32 count, int c)
{
- __u32 *tmp = __get_cpu_var(ipv6_cookie_scratch);
+ __u32 *tmp;
+
+ net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret));
+
+ tmp = __get_cpu_var(ipv6_cookie_scratch);
/*
* we have 320 bits of information to hash, copy in the remaining
- * 192 bits required for sha_transform, from the syncookie_secret
+ * 192 bits required for sha_transform, from the syncookie6_secret
* and overwrite the digest with the secret
*/
- memcpy(tmp + 10, syncookie_secret[c], 44);
+ memcpy(tmp + 10, syncookie6_secret[c], 44);
memcpy(tmp, saddr, 16);
memcpy(tmp + 4, daddr, 16);
tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b996ee2005a9..0740f93a114a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1929,6 +1929,7 @@ struct proto tcpv6_prot = {
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
+ .sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 2ec6bf6a0aa0..c1097c798900 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -83,7 +83,7 @@ static int tcp6_gro_complete(struct sk_buff *skb)
static const struct net_offload tcpv6_offload = {
.callbacks = {
.gso_send_check = tcp_v6_gso_send_check,
- .gso_segment = tcp_tso_segment,
+ .gso_segment = tcp_gso_segment,
.gro_receive = tcp6_gro_receive,
.gro_complete = tcp6_gro_complete,
},
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b496de19a341..f3893e897f72 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,6 +53,29 @@
#include <trace/events/skb.h>
#include "udp_impl.h"
+static unsigned int udp6_ehashfn(struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
+{
+ static u32 udp6_ehash_secret __read_mostly;
+ static u32 udp_ipv6_hash_secret __read_mostly;
+
+ u32 lhash, fhash;
+
+ net_get_random_once(&udp6_ehash_secret,
+ sizeof(udp6_ehash_secret));
+ net_get_random_once(&udp_ipv6_hash_secret,
+ sizeof(udp_ipv6_hash_secret));
+
+ lhash = (__force u32)laddr->s6_addr32[3];
+ fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
+
+ return __inet6_ehashfn(lhash, lport, fhash, fport,
+ udp_ipv6_hash_secret + net_hash_mix(net));
+}
+
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
@@ -214,8 +237,8 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp6_ehashfn(net, daddr, hnum,
+ saddr, sport);
matches = 1;
} else if (score == SCORE2_MAX)
goto exact_match;
@@ -295,8 +318,8 @@ begin:
badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- hash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
+ hash = udp6_ehashfn(net, daddr, hnum,
+ saddr, sport);
matches = 1;
}
} else if (score == badness && reuseport) {
@@ -1220,9 +1243,6 @@ do_udp_sendmsg:
if (tclass < 0)
tclass = np->tclass;
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
-
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
@@ -1241,6 +1261,8 @@ back_from_confirm:
up->pending = AF_INET6;
do_append_data:
+ if (dontfrag < 0)
+ dontfrag = np->dontfrag;
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 4691ed50a928..c779c3c90b9d 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -7,33 +7,32 @@
#include <net/inet_common.h>
#include <net/transp_v6.h>
-extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int );
-extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
- u8 , u8 , int , __be32 , struct udp_table *);
+int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
+void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
+ __be32, struct udp_table *);
-extern int udp_v6_get_port(struct sock *sk, unsigned short snum);
+int udp_v6_get_port(struct sock *sk, unsigned short snum);
-extern int udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-extern int udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+int udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
#ifdef CONFIG_COMPAT
-extern int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-extern int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
+int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
+int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
#endif
-extern int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len);
-extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len);
-extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
-extern void udpv6_destroy_sock(struct sock *sk);
+int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len);
+int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len);
+int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+void udpv6_destroy_sock(struct sock *sk);
-extern void udp_v6_clear_sk(struct sock *sk, int size);
+void udp_v6_clear_sk(struct sock *sk, int size);
#ifdef CONFIG_PROC_FS
-extern int udp6_seq_show(struct seq_file *seq, void *v);
+int udp6_seq_show(struct seq_file *seq, void *v);
#endif
#endif /* _UDP6_IMPL_H */
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 60559511bd9c..08e23b0bf302 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -64,6 +64,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_GRE |
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT |
SKB_GSO_MPLS) ||
!(type & (SKB_GSO_UDP))))
goto out;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 4770d515c2c8..cb04f7a16b5e 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,6 +18,65 @@
#include <net/ipv6.h>
#include <net/xfrm.h>
+/* Informational hook. The decap is still done here. */
+static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex);
+
+int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler)
+{
+ struct xfrm_tunnel_notifier __rcu **pprev;
+ struct xfrm_tunnel_notifier *t;
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ mutex_lock(&xfrm6_mode_tunnel_input_mutex);
+
+ for (pprev = &rcv_notify_handlers;
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority > priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register);
+
+int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler)
+{
+ struct xfrm_tunnel_notifier __rcu **pprev;
+ struct xfrm_tunnel_notifier *t;
+ int ret = -ENOENT;
+
+ mutex_lock(&xfrm6_mode_tunnel_input_mutex);
+ for (pprev = &rcv_notify_handlers;
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+ mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister);
+
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
@@ -63,8 +122,15 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
+#define for_each_input_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next))
+
+
static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
+ struct xfrm_tunnel_notifier *handler;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
@@ -72,6 +138,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
+ for_each_input_rcu(rcv_notify_handlers, handler)
+ handler->handler(skb);
+
err = skb_unclone(skb, GFP_ATOMIC);
if (err)
goto out;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 23ed03d786c8..5f8e128c512d 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -135,9 +135,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
struct ipv6_opt_hdr *exthdr;
const unsigned char *nh = skb_network_header(skb);
u8 nexthdr = nh[IP6CB(skb)->nhoff];
+ int oif = 0;
+
+ if (skb_dst(skb))
+ oif = skb_dst(skb)->dev->ifindex;
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
+ fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
@@ -284,7 +289,7 @@ static struct dst_ops xfrm6_dst_ops = {
.destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out,
- .gc_thresh = 1024,
+ .gc_thresh = 32768,
};
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 564eb0b8afa3..8d65bb9477fc 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -509,16 +509,11 @@ typedef struct irnet_ctrl_channel
*/
/* -------------------------- IRDA PART -------------------------- */
-extern int
- irda_irnet_create(irnet_socket *); /* Initialise a IrNET socket */
-extern int
- irda_irnet_connect(irnet_socket *); /* Try to connect over IrDA */
-extern void
- irda_irnet_destroy(irnet_socket *); /* Teardown a IrNET socket */
-extern int
- irda_irnet_init(void); /* Initialise IrDA part of IrNET */
-extern void
- irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */
+int irda_irnet_create(irnet_socket *); /* Initialise an IrNET socket */
+int irda_irnet_connect(irnet_socket *); /* Try to connect over IrDA */
+void irda_irnet_destroy(irnet_socket *); /* Teardown an IrNET socket */
+int irda_irnet_init(void); /* Initialise IrDA part of IrNET */
+void irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */
/**************************** VARIABLES ****************************/
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9d585370c5b4..911ef03bf8fb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1098,7 +1098,8 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
x->id.proto = proto;
x->id.spi = sa->sadb_sa_spi;
- x->props.replay_window = sa->sadb_sa_replay;
+ x->props.replay_window = min_t(unsigned int, sa->sadb_sa_replay,
+ (sizeof(x->replay.bitmap) * 8));
if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN)
x->props.flags |= XFRM_STATE_NOECN;
if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP)
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 6f251cbc2ed7..1ee9f6965d68 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -238,29 +238,40 @@ out:
return tunnel;
}
-extern struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel);
-extern void l2tp_tunnel_sock_put(struct sock *sk);
-extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
-extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
-extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
-extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
-extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
-
-extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
-extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
-extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
-extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
-extern void __l2tp_session_unhash(struct l2tp_session *session);
-extern int l2tp_session_delete(struct l2tp_session *session);
-extern void l2tp_session_free(struct l2tp_session *session);
-extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
-extern int l2tp_session_queue_purge(struct l2tp_session *session);
-extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
-
-extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
-
-extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
-extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
+struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel);
+void l2tp_tunnel_sock_put(struct sock *sk);
+struct l2tp_session *l2tp_session_find(struct net *net,
+ struct l2tp_tunnel *tunnel,
+ u32 session_id);
+struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
+struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
+struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
+
+int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
+ u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
+ struct l2tp_tunnel **tunnelp);
+void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
+struct l2tp_session *l2tp_session_create(int priv_size,
+ struct l2tp_tunnel *tunnel,
+ u32 session_id, u32 peer_session_id,
+ struct l2tp_session_cfg *cfg);
+void __l2tp_session_unhash(struct l2tp_session *session);
+int l2tp_session_delete(struct l2tp_session *session);
+void l2tp_session_free(struct l2tp_session *session);
+void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
+ unsigned char *ptr, unsigned char *optr, u16 hdrflags,
+ int length, int (*payload_hook)(struct sk_buff *skb));
+int l2tp_session_queue_purge(struct l2tp_session *session);
+int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
+
+int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb,
+ int hdr_len);
+
+int l2tp_nl_register_ops(enum l2tp_pwtype pw_type,
+ const struct l2tp_nl_cmd_ops *ops);
+void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
/* Session reference counts. Incremented when code obtains a reference
* to a session.
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f0a7adaef2ea..ffda81ef1a70 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -353,7 +353,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
goto error_put_sess_tun;
}
+ local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
+ local_bh_enable();
sock_put(ps->tunnel_sock);
sock_put(sk);
@@ -422,7 +424,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
skb->data[0] = ppph[0];
skb->data[1] = ppph[1];
+ local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
+ local_bh_enable();
sock_put(sk_tun);
sock_put(sk);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 2e7855a1b10d..b0a651cc389f 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2865,30 +2865,43 @@ void ieee80211_csa_finalize_work(struct work_struct *work)
if (!ieee80211_sdata_running(sdata))
return;
- if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
- return;
-
sdata->radar_required = sdata->csa_radar_required;
err = ieee80211_vif_change_channel(sdata, &local->csa_chandef,
&changed);
if (WARN_ON(err < 0))
return;
- err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon);
- if (err < 0)
- return;
+ if (!local->use_chanctx) {
+ local->_oper_chandef = local->csa_chandef;
+ ieee80211_hw_config(local, 0);
+ }
- changed |= err;
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
+ ieee80211_bss_info_change_notify(sdata, changed);
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP:
+ err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon);
+ if (err < 0)
+ return;
+ changed |= err;
+ kfree(sdata->u.ap.next_beacon);
+ sdata->u.ap.next_beacon = NULL;
+
+ ieee80211_bss_info_change_notify(sdata, err);
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ ieee80211_ibss_finish_csa(sdata);
+ break;
+ default:
+ WARN_ON(1);
+ return;
+ }
sdata->vif.csa_active = false;
ieee80211_wake_queues_by_reason(&sdata->local->hw,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
- ieee80211_bss_info_change_notify(sdata, changed);
-
cfg80211_ch_switch_notify(sdata->dev, &local->csa_chandef);
}
@@ -2936,20 +2949,56 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (sdata->vif.csa_active)
return -EBUSY;
- /* only handle AP for now. */
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
+ sdata->csa_counter_offset_beacon =
+ params->counter_offset_beacon;
+ sdata->csa_counter_offset_presp = params->counter_offset_presp;
+ sdata->u.ap.next_beacon =
+ cfg80211_beacon_dup(&params->beacon_after);
+ if (!sdata->u.ap.next_beacon)
+ return -ENOMEM;
+
+ err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
+ if (err < 0) {
+ kfree(sdata->u.ap.next_beacon);
+ return err;
+ }
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ if (!sdata->vif.bss_conf.ibss_joined)
+ return -EINVAL;
+
+ if (params->chandef.width != sdata->u.ibss.chandef.width)
+ return -EINVAL;
+
+ switch (params->chandef.width) {
+ case NL80211_CHAN_WIDTH_40:
+ if (cfg80211_get_chandef_type(&params->chandef) !=
+ cfg80211_get_chandef_type(&sdata->u.ibss.chandef))
+ return -EINVAL;
+ case NL80211_CHAN_WIDTH_5:
+ case NL80211_CHAN_WIDTH_10:
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* changes into another band are not supported */
+ if (sdata->u.ibss.chandef.chan->band !=
+ params->chandef.chan->band)
+ return -EINVAL;
+
+ err = ieee80211_ibss_csa_beacon(sdata, params);
+ if (err < 0)
+ return err;
break;
default:
return -EOPNOTSUPP;
}
- sdata->u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_after);
- if (!sdata->u.ap.next_beacon)
- return -ENOMEM;
-
- sdata->csa_counter_offset_beacon = params->counter_offset_beacon;
- sdata->csa_counter_offset_presp = params->counter_offset_presp;
sdata->csa_radar_required = params->radar_required;
if (params->block_tx)
@@ -2957,10 +3006,6 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
- err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
- if (err < 0)
- return err;
-
local->csa_chandef = params->chandef;
sdata->vif.csa_active = true;
@@ -3014,7 +3059,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
need_offchan = true;
if (!ieee80211_is_action(mgmt->frame_control) ||
mgmt->u.action.category == WLAN_CATEGORY_PUBLIC ||
- mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED)
+ mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED ||
+ mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT)
break;
rcu_read_lock();
sta = sta_info_get(sdata, mgmt->da);
@@ -3518,7 +3564,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
return -EINVAL;
}
band = chanctx_conf->def.chan->band;
- sta = sta_info_get(sdata, peer);
+ sta = sta_info_get_bss(sdata, peer);
if (sta) {
qos = test_sta_flag(sta, WLAN_STA_WME);
} else {
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 3a4764b2869e..03ba6b5c5373 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -453,11 +453,6 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL;
drv_change_chanctx(local, ctx, chanctx_changed);
- if (!local->use_chanctx) {
- local->_oper_chandef = *chandef;
- ieee80211_hw_config(local, 0);
- }
-
ieee80211_recalc_chanctx_chantype(local, ctx);
ieee80211_recalc_smps_chanctx(local, ctx);
ieee80211_recalc_radar_chanctx(local, ctx);
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b0e32d628114..5c090e41d9bb 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -103,54 +103,57 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
if (!buf)
return 0;
- sf += snprintf(buf, mxln - sf, "0x%x\n", local->hw.flags);
+ sf += scnprintf(buf, mxln - sf, "0x%x\n", local->hw.flags);
if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
- sf += snprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n");
+ sf += scnprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n");
if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
- sf += snprintf(buf + sf, mxln - sf, "RX_INCLUDES_FCS\n");
+ sf += scnprintf(buf + sf, mxln - sf, "RX_INCLUDES_FCS\n");
if (local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)
- sf += snprintf(buf + sf, mxln - sf,
- "HOST_BCAST_PS_BUFFERING\n");
+ sf += scnprintf(buf + sf, mxln - sf,
+ "HOST_BCAST_PS_BUFFERING\n");
if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)
- sf += snprintf(buf + sf, mxln - sf,
- "2GHZ_SHORT_SLOT_INCAPABLE\n");
+ sf += scnprintf(buf + sf, mxln - sf,
+ "2GHZ_SHORT_SLOT_INCAPABLE\n");
if (local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)
- sf += snprintf(buf + sf, mxln - sf,
- "2GHZ_SHORT_PREAMBLE_INCAPABLE\n");
+ sf += scnprintf(buf + sf, mxln - sf,
+ "2GHZ_SHORT_PREAMBLE_INCAPABLE\n");
if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
- sf += snprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n");
+ sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_UNSPEC\n");
if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
- sf += snprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n");
+ sf += scnprintf(buf + sf, mxln - sf, "SIGNAL_DBM\n");
if (local->hw.flags & IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC)
- sf += snprintf(buf + sf, mxln - sf, "NEED_DTIM_BEFORE_ASSOC\n");
+ sf += scnprintf(buf + sf, mxln - sf,
+ "NEED_DTIM_BEFORE_ASSOC\n");
if (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)
- sf += snprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n");
+ sf += scnprintf(buf + sf, mxln - sf, "SPECTRUM_MGMT\n");
if (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)
- sf += snprintf(buf + sf, mxln - sf, "AMPDU_AGGREGATION\n");
+ sf += scnprintf(buf + sf, mxln - sf, "AMPDU_AGGREGATION\n");
if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS)
- sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_PS\n");
+ sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PS\n");
if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
- sf += snprintf(buf + sf, mxln - sf, "PS_NULLFUNC_STACK\n");
+ sf += scnprintf(buf + sf, mxln - sf, "PS_NULLFUNC_STACK\n");
if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
- sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n");
+ sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n");
if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE)
- sf += snprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n");
+ sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n");
if (local->hw.flags & IEEE80211_HW_SUPPORTS_STATIC_SMPS)
- sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_STATIC_SMPS\n");
+ sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_STATIC_SMPS\n");
if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS)
- sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_SMPS\n");
+ sf += scnprintf(buf + sf, mxln - sf,
+ "SUPPORTS_DYNAMIC_SMPS\n");
if (local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
- sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n");
+ sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n");
if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
- sf += snprintf(buf + sf, mxln - sf, "REPORTS_TX_ACK_STATUS\n");
+ sf += scnprintf(buf + sf, mxln - sf,
+ "REPORTS_TX_ACK_STATUS\n");
if (local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
- sf += snprintf(buf + sf, mxln - sf, "CONNECTION_MONITOR\n");
+ sf += scnprintf(buf + sf, mxln - sf, "CONNECTION_MONITOR\n");
if (local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK)
- sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n");
+ sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n");
if (local->hw.flags & IEEE80211_HW_AP_LINK_PS)
- sf += snprintf(buf + sf, mxln - sf, "AP_LINK_PS\n");
+ sf += scnprintf(buf + sf, mxln - sf, "AP_LINK_PS\n");
if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)
- sf += snprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n");
+ sf += scnprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n");
rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
kfree(buf);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index b3ea11f3d526..5d03c47c0a4c 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1085,4 +1085,31 @@ drv_channel_switch_beacon(struct ieee80211_sub_if_data *sdata,
}
}
+static inline int drv_join_ibss(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ int ret = 0;
+
+ might_sleep();
+ check_sdata_in_driver(sdata);
+
+ trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf);
+ if (local->ops->join_ibss)
+ ret = local->ops->join_ibss(&local->hw, &sdata->vif);
+ trace_drv_return_int(local, ret);
+ return ret;
+}
+
+static inline void drv_leave_ibss(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ might_sleep();
+ check_sdata_in_driver(sdata);
+
+ trace_drv_leave_ibss(local, sdata);
+ if (local->ops->leave_ibss)
+ local->ops->leave_ibss(&local->hw, &sdata->vif);
+ trace_drv_return_void(local);
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a12afe77bb26..21a0b8835cb3 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -39,7 +39,8 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
const int beacon_int, const u32 basic_rates,
const u16 capability, u64 tsf,
struct cfg80211_chan_def *chandef,
- bool *have_higher_than_11mbit)
+ bool *have_higher_than_11mbit,
+ struct cfg80211_csa_settings *csa_settings)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_local *local = sdata->local;
@@ -59,6 +60,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
2 + 8 /* max Supported Rates */ +
3 /* max DS params */ +
4 /* IBSS params */ +
+ 5 /* Channel Switch Announcement */ +
2 + (IEEE80211_MAX_SUPP_RATES - 8) +
2 + sizeof(struct ieee80211_ht_cap) +
2 + sizeof(struct ieee80211_ht_operation) +
@@ -135,6 +137,16 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
*pos++ = 0;
*pos++ = 0;
+ if (csa_settings) {
+ *pos++ = WLAN_EID_CHANNEL_SWITCH;
+ *pos++ = 3;
+ *pos++ = csa_settings->block_tx ? 1 : 0;
+ *pos++ = ieee80211_frequency_to_channel(
+ csa_settings->chandef.chan->center_freq);
+ sdata->csa_counter_offset_beacon = (pos - presp->head);
+ *pos++ = csa_settings->count;
+ }
+
/* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */
if (rates_n > 8) {
*pos++ = WLAN_EID_EXT_SUPP_RATES;
@@ -217,6 +229,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
struct beacon_data *presp;
enum nl80211_bss_scan_width scan_width;
bool have_higher_than_11mbit;
+ int err;
sdata_assert_lock(sdata);
@@ -235,6 +248,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
ieee80211_bss_info_change_notify(sdata,
BSS_CHANGED_IBSS |
BSS_CHANGED_BEACON_ENABLED);
+ drv_leave_ibss(local, sdata);
}
presp = rcu_dereference_protected(ifibss->presp,
@@ -276,7 +290,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates,
capability, tsf, &chandef,
- &have_higher_than_11mbit);
+ &have_higher_than_11mbit, NULL);
if (!presp)
return;
@@ -317,11 +331,26 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
else
sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
+ ieee80211_set_wmm_default(sdata, true);
+
sdata->vif.bss_conf.ibss_joined = true;
sdata->vif.bss_conf.ibss_creator = creator;
- ieee80211_bss_info_change_notify(sdata, bss_change);
- ieee80211_set_wmm_default(sdata, true);
+ err = drv_join_ibss(local, sdata);
+ if (err) {
+ sdata->vif.bss_conf.ibss_joined = false;
+ sdata->vif.bss_conf.ibss_creator = false;
+ sdata->vif.bss_conf.enable_beacon = false;
+ sdata->vif.bss_conf.ssid_len = 0;
+ RCU_INIT_POINTER(ifibss->presp, NULL);
+ kfree_rcu(presp, rcu_head);
+ ieee80211_vif_release_channel(sdata);
+ sdata_info(sdata, "Failed to join IBSS, driver failure: %d\n",
+ err);
+ return;
+ }
+
+ ieee80211_bss_info_change_notify(sdata, bss_change);
ifibss->state = IEEE80211_IBSS_MLME_JOINED;
mod_timer(&ifibss->timer,
@@ -416,6 +445,169 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
tsf, false);
}
+static int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_csa_settings *csa_settings)
+{
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *mgmt;
+ struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+ struct ieee80211_local *local = sdata->local;
+ int freq;
+ int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) +
+ sizeof(mgmt->u.action.u.chan_switch);
+ u8 *pos;
+
+ skb = dev_alloc_skb(local->tx_headroom + hdr_len +
+ 5 + /* channel switch announcement element */
+ 3); /* secondary channel offset element */
+ if (!skb)
+ return -1;
+
+ skb_reserve(skb, local->tx_headroom);
+ mgmt = (struct ieee80211_mgmt *)skb_put(skb, hdr_len);
+ memset(mgmt, 0, hdr_len);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+
+ eth_broadcast_addr(mgmt->da);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN);
+ mgmt->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT;
+ mgmt->u.action.u.chan_switch.action_code = WLAN_ACTION_SPCT_CHL_SWITCH;
+ pos = skb_put(skb, 5);
+ *pos++ = WLAN_EID_CHANNEL_SWITCH; /* EID */
+ *pos++ = 3; /* IE length */
+ *pos++ = csa_settings->block_tx ? 1 : 0; /* CSA mode */
+ freq = csa_settings->chandef.chan->center_freq;
+ *pos++ = ieee80211_frequency_to_channel(freq); /* channel */
+ *pos++ = csa_settings->count; /* count */
+
+ if (csa_settings->chandef.width == NL80211_CHAN_WIDTH_40) {
+ enum nl80211_channel_type ch_type;
+
+ skb_put(skb, 3);
+ *pos++ = WLAN_EID_SECONDARY_CHANNEL_OFFSET; /* EID */
+ *pos++ = 1; /* IE length */
+ ch_type = cfg80211_get_chandef_type(&csa_settings->chandef);
+ if (ch_type == NL80211_CHAN_HT40PLUS)
+ *pos++ = IEEE80211_HT_PARAM_CHA_SEC_ABOVE;
+ else
+ *pos++ = IEEE80211_HT_PARAM_CHA_SEC_BELOW;
+ }
+
+ ieee80211_tx_skb(sdata, skb);
+ return 0;
+}
+
+int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_csa_settings *csa_settings)
+{
+ struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+ struct beacon_data *presp, *old_presp;
+ struct cfg80211_bss *cbss;
+ const struct cfg80211_bss_ies *ies;
+ u16 capability;
+ u64 tsf;
+ int ret = 0;
+
+ sdata_assert_lock(sdata);
+
+ capability = WLAN_CAPABILITY_IBSS;
+
+ if (ifibss->privacy)
+ capability |= WLAN_CAPABILITY_PRIVACY;
+
+ cbss = cfg80211_get_bss(sdata->local->hw.wiphy, ifibss->chandef.chan,
+ ifibss->bssid, ifibss->ssid,
+ ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
+ WLAN_CAPABILITY_PRIVACY,
+ capability);
+
+ if (WARN_ON(!cbss)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ rcu_read_lock();
+ ies = rcu_dereference(cbss->ies);
+ tsf = ies->tsf;
+ rcu_read_unlock();
+ cfg80211_put_bss(sdata->local->hw.wiphy, cbss);
+
+ old_presp = rcu_dereference_protected(ifibss->presp,
+ lockdep_is_held(&sdata->wdev.mtx));
+
+ presp = ieee80211_ibss_build_presp(sdata,
+ sdata->vif.bss_conf.beacon_int,
+ sdata->vif.bss_conf.basic_rates,
+ capability, tsf, &ifibss->chandef,
+ NULL, csa_settings);
+ if (!presp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rcu_assign_pointer(ifibss->presp, presp);
+ if (old_presp)
+ kfree_rcu(old_presp, rcu_head);
+
+ /* it might not send the beacon for a while. send an action frame
+ * immediately to announce the channel switch.
+ */
+ if (csa_settings)
+ ieee80211_send_action_csa(sdata, csa_settings);
+
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+ out:
+ return ret;
+}
+
+int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+ struct cfg80211_bss *cbss;
+ int err;
+ u16 capability;
+
+ sdata_lock(sdata);
+ /* update cfg80211 bss information with the new channel */
+ if (!is_zero_ether_addr(ifibss->bssid)) {
+ capability = WLAN_CAPABILITY_IBSS;
+
+ if (ifibss->privacy)
+ capability |= WLAN_CAPABILITY_PRIVACY;
+
+ cbss = cfg80211_get_bss(sdata->local->hw.wiphy,
+ ifibss->chandef.chan,
+ ifibss->bssid, ifibss->ssid,
+ ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
+ WLAN_CAPABILITY_PRIVACY,
+ capability);
+ /* XXX: should not really modify cfg80211 data */
+ if (cbss) {
+ cbss->channel = sdata->local->csa_chandef.chan;
+ cfg80211_put_bss(sdata->local->hw.wiphy, cbss);
+ }
+ }
+
+ ifibss->chandef = sdata->local->csa_chandef;
+
+ /* generate the beacon */
+ err = ieee80211_ibss_csa_beacon(sdata, NULL);
+ sdata_unlock(sdata);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+
+ cancel_work_sync(&ifibss->csa_connection_drop_work);
+}
+
static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta)
__acquires(RCU)
{
@@ -499,6 +691,295 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
return ieee80211_ibss_finish_sta(sta);
}
+static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ int active = 0;
+ struct sta_info *sta;
+
+ sdata_assert_lock(sdata);
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ if (sta->sdata == sdata &&
+ time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL,
+ jiffies)) {
+ active++;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return active;
+}
+
+static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+ struct ieee80211_local *local = sdata->local;
+ struct cfg80211_bss *cbss;
+ struct beacon_data *presp;
+ struct sta_info *sta;
+ int active_ibss;
+ u16 capability;
+
+ active_ibss = ieee80211_sta_active_ibss(sdata);
+
+ if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) {
+ capability = WLAN_CAPABILITY_IBSS;
+
+ if (ifibss->privacy)
+ capability |= WLAN_CAPABILITY_PRIVACY;
+
+ cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan,
+ ifibss->bssid, ifibss->ssid,
+ ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
+ WLAN_CAPABILITY_PRIVACY,
+ capability);
+
+ if (cbss) {
+ cfg80211_unlink_bss(local->hw.wiphy, cbss);
+ cfg80211_put_bss(sdata->local->hw.wiphy, cbss);
+ }
+ }
+
+ ifibss->state = IEEE80211_IBSS_MLME_SEARCH;
+
+ sta_info_flush(sdata);
+
+ spin_lock_bh(&ifibss->incomplete_lock);
+ while (!list_empty(&ifibss->incomplete_stations)) {
+ sta = list_first_entry(&ifibss->incomplete_stations,
+ struct sta_info, list);
+ list_del(&sta->list);
+ spin_unlock_bh(&ifibss->incomplete_lock);
+
+ sta_info_free(local, sta);
+ spin_lock_bh(&ifibss->incomplete_lock);
+ }
+ spin_unlock_bh(&ifibss->incomplete_lock);
+
+ netif_carrier_off(sdata->dev);
+
+ sdata->vif.bss_conf.ibss_joined = false;
+ sdata->vif.bss_conf.ibss_creator = false;
+ sdata->vif.bss_conf.enable_beacon = false;
+ sdata->vif.bss_conf.ssid_len = 0;
+
+ /* remove beacon */
+ presp = rcu_dereference_protected(ifibss->presp,
+ lockdep_is_held(&sdata->wdev.mtx));
+ RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
+ if (presp)
+ kfree_rcu(presp, rcu_head);
+
+ clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
+ BSS_CHANGED_IBSS);
+ drv_leave_ibss(local, sdata);
+ ieee80211_vif_release_channel(sdata);
+}
+
+static void ieee80211_csa_connection_drop_work(struct work_struct *work)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ u.ibss.csa_connection_drop_work);
+
+ ieee80211_ibss_disconnect(sdata);
+ synchronize_rcu();
+ skb_queue_purge(&sdata->skb_queue);
+
+ /* trigger a scan to find another IBSS network to join */
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+}
+
+static bool
+ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
+ struct ieee802_11_elems *elems,
+ bool beacon)
+{
+ struct cfg80211_csa_settings params;
+ struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ struct ieee80211_chanctx *chanctx;
+ enum nl80211_channel_type ch_type;
+ int err, num_chanctx;
+ u32 sta_flags;
+ u8 mode;
+
+ if (sdata->vif.csa_active)
+ return true;
+
+ if (!sdata->vif.bss_conf.ibss_joined)
+ return false;
+
+ sta_flags = IEEE80211_STA_DISABLE_VHT;
+ switch (ifibss->chandef.width) {
+ case NL80211_CHAN_WIDTH_5:
+ case NL80211_CHAN_WIDTH_10:
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ sta_flags |= IEEE80211_STA_DISABLE_HT;
+ /* fall through */
+ case NL80211_CHAN_WIDTH_20:
+ sta_flags |= IEEE80211_STA_DISABLE_40MHZ;
+ break;
+ default:
+ break;
+ }
+
+ memset(&params, 0, sizeof(params));
+ err = ieee80211_parse_ch_switch_ie(sdata, elems, beacon,
+ ifibss->chandef.chan->band,
+ sta_flags, ifibss->bssid,
+ &params.count, &mode,
+ &params.chandef);
+
+ /* can't switch to destination channel, fail */
+ if (err < 0)
+ goto disconnect;
+
+ /* did not contain a CSA */
+ if (err)
+ return false;
+
+ if (ifibss->chandef.chan->band != params.chandef.chan->band)
+ goto disconnect;
+
+ switch (ifibss->chandef.width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ case NL80211_CHAN_WIDTH_40:
+ /* keep our current HT mode (HT20/HT40+/HT40-), even if
+ * another mode has been announced. The mode is not adopted
+ * within the beacon while doing CSA and we should therefore
+ * keep the mode which we announce.
+ */
+ ch_type = cfg80211_get_chandef_type(&ifibss->chandef);
+ cfg80211_chandef_create(&params.chandef, params.chandef.chan,
+ ch_type);
+ break;
+ case NL80211_CHAN_WIDTH_5:
+ case NL80211_CHAN_WIDTH_10:
+ if (params.chandef.width != ifibss->chandef.width) {
+ sdata_info(sdata,
+ "IBSS %pM received channel switch from incompatible channel width (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
+ ifibss->bssid,
+ params.chandef.chan->center_freq,
+ params.chandef.width,
+ params.chandef.center_freq1,
+ params.chandef.center_freq2);
+ goto disconnect;
+ }
+ break;
+ default:
+ /* should not happen, sta_flags should prevent VHT modes. */
+ WARN_ON(1);
+ goto disconnect;
+ }
+
+ if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, &params.chandef,
+ IEEE80211_CHAN_DISABLED)) {
+ sdata_info(sdata,
+ "IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
+ ifibss->bssid,
+ params.chandef.chan->center_freq,
+ params.chandef.width,
+ params.chandef.center_freq1,
+ params.chandef.center_freq2);
+ goto disconnect;
+ }
+
+ err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
+ &params.chandef);
+ if (err < 0)
+ goto disconnect;
+ if (err) {
+ params.radar_required = true;
+
+ /* TODO: IBSS-DFS not (yet) supported, disconnect. */
+ goto disconnect;
+ }
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (!chanctx_conf) {
+ rcu_read_unlock();
+ goto disconnect;
+ }
+
+ /* don't handle for multi-VIF cases */
+ chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
+ if (chanctx->refcount > 1) {
+ rcu_read_unlock();
+ goto disconnect;
+ }
+ num_chanctx = 0;
+ list_for_each_entry_rcu(chanctx, &sdata->local->chanctx_list, list)
+ num_chanctx++;
+
+ if (num_chanctx > 1) {
+ rcu_read_unlock();
+ goto disconnect;
+ }
+ rcu_read_unlock();
+
+ /* all checks done, now perform the channel switch. */
+ ibss_dbg(sdata,
+ "received channel switch announcement to go to channel %d MHz\n",
+ params.chandef.chan->center_freq);
+
+ params.block_tx = !!mode;
+
+ ieee80211_ibss_csa_beacon(sdata, &params);
+ sdata->csa_radar_required = params.radar_required;
+
+ if (params.block_tx)
+ ieee80211_stop_queues_by_reason(&sdata->local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+
+ sdata->local->csa_chandef = params.chandef;
+ sdata->vif.csa_active = true;
+
+ ieee80211_bss_info_change_notify(sdata, err);
+ drv_channel_switch_beacon(sdata, &params.chandef);
+
+ return true;
+disconnect:
+ ibss_dbg(sdata, "Can't handle channel switch, disconnect\n");
+ ieee80211_queue_work(&sdata->local->hw,
+ &ifibss->csa_connection_drop_work);
+
+ return true;
+}
+
+static void
+ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt, size_t len,
+ struct ieee80211_rx_status *rx_status,
+ struct ieee802_11_elems *elems)
+{
+ int required_len;
+
+ if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+ return;
+
+ /* CSA is the only action we handle for now */
+ if (mgmt->u.action.u.measurement.action_code !=
+ WLAN_ACTION_SPCT_CHL_SWITCH)
+ return;
+
+ required_len = IEEE80211_MIN_ACTION_SIZE +
+ sizeof(mgmt->u.action.u.chan_switch);
+ if (len < required_len)
+ return;
+
+ ieee80211_ibss_process_chanswitch(sdata, elems, false);
+}
+
static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt,
size_t len)
@@ -661,10 +1142,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
/* check if we need to merge IBSS */
- /* we use a fixed BSSID */
- if (sdata->u.ibss.fixed_bssid)
- goto put_bss;
-
/* not an IBSS */
if (!(cbss->capability & WLAN_CAPABILITY_IBSS))
goto put_bss;
@@ -680,10 +1157,18 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
sdata->u.ibss.ssid_len))
goto put_bss;
+ /* process channel switch */
+ if (ieee80211_ibss_process_chanswitch(sdata, elems, true))
+ goto put_bss;
+
/* same BSSID */
if (ether_addr_equal(cbss->bssid, sdata->u.ibss.bssid))
goto put_bss;
+ /* we use a fixed BSSID */
+ if (sdata->u.ibss.fixed_bssid)
+ goto put_bss;
+
if (ieee80211_have_rx_timestamp(rx_status)) {
/* time when timestamp field was received */
rx_timestamp =
@@ -775,30 +1260,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
ieee80211_queue_work(&local->hw, &sdata->work);
}
-static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_local *local = sdata->local;
- int active = 0;
- struct sta_info *sta;
-
- sdata_assert_lock(sdata);
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
- if (sta->sdata == sdata &&
- time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL,
- jiffies)) {
- active++;
- break;
- }
- }
-
- rcu_read_unlock();
-
- return active;
-}
-
static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
@@ -1076,6 +1537,8 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct ieee80211_rx_status *rx_status;
struct ieee80211_mgmt *mgmt;
u16 fc;
+ struct ieee802_11_elems elems;
+ int ies_len;
rx_status = IEEE80211_SKB_RXCB(skb);
mgmt = (struct ieee80211_mgmt *) skb->data;
@@ -1101,6 +1564,27 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
case IEEE80211_STYPE_DEAUTH:
ieee80211_rx_mgmt_deauth_ibss(sdata, mgmt, skb->len);
break;
+ case IEEE80211_STYPE_ACTION:
+ switch (mgmt->u.action.category) {
+ case WLAN_CATEGORY_SPECTRUM_MGMT:
+ ies_len = skb->len -
+ offsetof(struct ieee80211_mgmt,
+ u.action.u.chan_switch.variable);
+
+ if (ies_len < 0)
+ break;
+
+ ieee802_11_parse_elems(
+ mgmt->u.action.u.chan_switch.variable,
+ ies_len, true, &elems);
+
+ if (elems.parse_error)
+ break;
+
+ ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt, skb->len,
+ rx_status, &elems);
+ break;
+ }
}
mgmt_out:
@@ -1167,6 +1651,8 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
(unsigned long) sdata);
INIT_LIST_HEAD(&ifibss->incomplete_stations);
spin_lock_init(&ifibss->incomplete_lock);
+ INIT_WORK(&ifibss->csa_connection_drop_work,
+ ieee80211_csa_connection_drop_work);
}
/* scan finished notification */
@@ -1265,73 +1751,19 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
- struct ieee80211_local *local = sdata->local;
- struct cfg80211_bss *cbss;
- u16 capability;
- int active_ibss;
- struct sta_info *sta;
- struct beacon_data *presp;
-
- active_ibss = ieee80211_sta_active_ibss(sdata);
-
- if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) {
- capability = WLAN_CAPABILITY_IBSS;
-
- if (ifibss->privacy)
- capability |= WLAN_CAPABILITY_PRIVACY;
-
- cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan,
- ifibss->bssid, ifibss->ssid,
- ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
- WLAN_CAPABILITY_PRIVACY,
- capability);
- if (cbss) {
- cfg80211_unlink_bss(local->hw.wiphy, cbss);
- cfg80211_put_bss(local->hw.wiphy, cbss);
- }
- }
-
- ifibss->state = IEEE80211_IBSS_MLME_SEARCH;
- memset(ifibss->bssid, 0, ETH_ALEN);
+ ieee80211_ibss_disconnect(sdata);
ifibss->ssid_len = 0;
-
- sta_info_flush(sdata);
-
- spin_lock_bh(&ifibss->incomplete_lock);
- while (!list_empty(&ifibss->incomplete_stations)) {
- sta = list_first_entry(&ifibss->incomplete_stations,
- struct sta_info, list);
- list_del(&sta->list);
- spin_unlock_bh(&ifibss->incomplete_lock);
-
- sta_info_free(local, sta);
- spin_lock_bh(&ifibss->incomplete_lock);
- }
- spin_unlock_bh(&ifibss->incomplete_lock);
-
- netif_carrier_off(sdata->dev);
+ memset(ifibss->bssid, 0, ETH_ALEN);
/* remove beacon */
kfree(sdata->u.ibss.ie);
- presp = rcu_dereference_protected(ifibss->presp,
- lockdep_is_held(&sdata->wdev.mtx));
- RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
/* on the next join, re-program HT parameters */
memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa));
memset(&ifibss->ht_capa_mask, 0, sizeof(ifibss->ht_capa_mask));
- sdata->vif.bss_conf.ibss_joined = false;
- sdata->vif.bss_conf.ibss_creator = false;
- sdata->vif.bss_conf.enable_beacon = false;
- sdata->vif.bss_conf.ssid_len = 0;
- clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
- BSS_CHANGED_IBSS);
- ieee80211_vif_release_channel(sdata);
synchronize_rcu();
- kfree(presp);
skb_queue_purge(&sdata->skb_queue);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b6186517ec56..e73cd0637f3b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -322,7 +322,6 @@ struct ieee80211_roc_work {
/* flags used in struct ieee80211_if_managed.flags */
enum ieee80211_sta_flags {
- IEEE80211_STA_BEACON_POLL = BIT(0),
IEEE80211_STA_CONNECTION_POLL = BIT(1),
IEEE80211_STA_CONTROL_PORT = BIT(2),
IEEE80211_STA_DISABLE_HT = BIT(4),
@@ -487,6 +486,7 @@ struct ieee80211_if_managed {
struct ieee80211_if_ibss {
struct timer_list timer;
+ struct work_struct csa_connection_drop_work;
unsigned long last_scan_completed;
@@ -893,6 +893,8 @@ struct tpt_led_trigger {
* that the scan completed.
* @SCAN_ABORTED: Set for our scan work function when the driver reported
* a scan complete for an aborted scan.
+ * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
+ * cancelled.
*/
enum {
SCAN_SW_SCANNING,
@@ -900,6 +902,7 @@ enum {
SCAN_ONCHANNEL_SCANNING,
SCAN_COMPLETED,
SCAN_ABORTED,
+ SCAN_HW_CANCELLED,
};
/**
@@ -1330,6 +1333,10 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata);
void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
+int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_csa_settings *csa_settings);
+int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata);
+void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata);
/* mesh code */
void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata);
@@ -1481,6 +1488,29 @@ void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt,
size_t len);
+/**
+ * ieee80211_parse_ch_switch_ie - parses channel switch IEs
+ * @sdata: the sdata of the interface which has received the frame
+ * @elems: parsed 802.11 elements received with the frame
+ * @beacon: indicates if the frame was a beacon or probe response
+ * @current_band: indicates the current band
+ * @sta_flags: contains information about own capabilities and restrictions
+ * to decide which channel switch announcements can be accepted. Only the
+ * following subset of &enum ieee80211_sta_flags are evaluated:
+ * %IEEE80211_STA_DISABLE_HT, %IEEE80211_STA_DISABLE_VHT,
+ * %IEEE80211_STA_DISABLE_40MHZ, %IEEE80211_STA_DISABLE_80P80MHZ,
+ * %IEEE80211_STA_DISABLE_160MHZ.
+ * @count: to be filled with the counter until the switch (on success only)
+ * @bssid: the currently connected bssid (for reporting)
+ * @mode: to be filled with CSA mode (on success only)
+ * @new_chandef: to be filled with destination chandef (on success only)
+ * Return: 0 on success, <0 on error and >0 if there is nothing to parse.
+ */
+int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
+ struct ieee802_11_elems *elems, bool beacon,
+ enum ieee80211_band current_band,
+ u32 sta_flags, u8 *bssid, u8 *count, u8 *mode,
+ struct cfg80211_chan_def *new_chandef);
/* Suspend/resume and hw reconfiguration */
int ieee80211_reconfig(struct ieee80211_local *local);
@@ -1654,6 +1684,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
const struct ieee80211_ht_operation *ht_oper,
struct cfg80211_chan_def *chandef);
+u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
int __must_check
ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index fcecd633514e..e48f103b9ade 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -766,6 +766,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
if (sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_mgd_stop(sdata);
+ if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+ ieee80211_ibss_stop(sdata);
+
+
/*
* Remove all stations associated with this interface.
*
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 620677e897bd..3e51dd7d98b3 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -879,7 +879,7 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
keyconf->keylen, keyconf->key,
0, NULL);
if (IS_ERR(key))
- return ERR_PTR(PTR_ERR(key));
+ return ERR_CAST(key);
if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED)
key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 86e4ad56b573..91cc8281e266 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -145,66 +145,6 @@ static int ecw2cw(int ecw)
return (1 << ecw) - 1;
}
-static u32 chandef_downgrade(struct cfg80211_chan_def *c)
-{
- u32 ret;
- int tmp;
-
- switch (c->width) {
- case NL80211_CHAN_WIDTH_20:
- c->width = NL80211_CHAN_WIDTH_20_NOHT;
- ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
- break;
- case NL80211_CHAN_WIDTH_40:
- c->width = NL80211_CHAN_WIDTH_20;
- c->center_freq1 = c->chan->center_freq;
- ret = IEEE80211_STA_DISABLE_40MHZ |
- IEEE80211_STA_DISABLE_VHT;
- break;
- case NL80211_CHAN_WIDTH_80:
- tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P40 */
- tmp /= 2;
- /* freq_P40 */
- c->center_freq1 = c->center_freq1 - 20 + 40 * tmp;
- c->width = NL80211_CHAN_WIDTH_40;
- ret = IEEE80211_STA_DISABLE_VHT;
- break;
- case NL80211_CHAN_WIDTH_80P80:
- c->center_freq2 = 0;
- c->width = NL80211_CHAN_WIDTH_80;
- ret = IEEE80211_STA_DISABLE_80P80MHZ |
- IEEE80211_STA_DISABLE_160MHZ;
- break;
- case NL80211_CHAN_WIDTH_160:
- /* n_P20 */
- tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
- /* n_P80 */
- tmp /= 4;
- c->center_freq1 = c->center_freq1 - 40 + 80 * tmp;
- c->width = NL80211_CHAN_WIDTH_80;
- ret = IEEE80211_STA_DISABLE_80P80MHZ |
- IEEE80211_STA_DISABLE_160MHZ;
- break;
- default:
- case NL80211_CHAN_WIDTH_20_NOHT:
- WARN_ON_ONCE(1);
- c->width = NL80211_CHAN_WIDTH_20_NOHT;
- ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
- break;
- case NL80211_CHAN_WIDTH_5:
- case NL80211_CHAN_WIDTH_10:
- WARN_ON_ONCE(1);
- /* keep c->width */
- ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
- break;
- }
-
- WARN_ON_ONCE(!cfg80211_chandef_valid(c));
-
- return ret;
-}
-
static u32
ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
@@ -352,7 +292,7 @@ out:
break;
}
- ret |= chandef_downgrade(chandef);
+ ret |= ieee80211_chandef_downgrade(chandef);
}
if (chandef->width != vht_chandef.width && !tracking)
@@ -406,13 +346,13 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
*/
if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ &&
chandef.width == NL80211_CHAN_WIDTH_80P80)
- flags |= chandef_downgrade(&chandef);
+ flags |= ieee80211_chandef_downgrade(&chandef);
if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ &&
chandef.width == NL80211_CHAN_WIDTH_160)
- flags |= chandef_downgrade(&chandef);
+ flags |= ieee80211_chandef_downgrade(&chandef);
if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ &&
chandef.width > NL80211_CHAN_WIDTH_20)
- flags |= chandef_downgrade(&chandef);
+ flags |= ieee80211_chandef_downgrade(&chandef);
if (cfg80211_chandef_identical(&chandef, &sdata->vif.bss_conf.chandef))
return 0;
@@ -893,8 +833,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
- if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
- IEEE80211_STA_CONNECTION_POLL))
+ if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL)
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE;
ieee80211_tx_skb(sdata, skb);
@@ -937,6 +876,8 @@ static void ieee80211_chswitch_work(struct work_struct *work)
container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work);
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ u32 changed = 0;
+ int ret;
if (!ieee80211_sdata_running(sdata))
return;
@@ -945,24 +886,39 @@ static void ieee80211_chswitch_work(struct work_struct *work)
if (!ifmgd->associated)
goto out;
- local->_oper_chandef = local->csa_chandef;
+ ret = ieee80211_vif_change_channel(sdata, &local->csa_chandef,
+ &changed);
+ if (ret) {
+ sdata_info(sdata,
+ "vif channel switch failed, disconnecting\n");
+ ieee80211_queue_work(&sdata->local->hw,
+ &ifmgd->csa_connection_drop_work);
+ goto out;
+ }
- if (!local->ops->channel_switch) {
- /* call "hw_config" only if doing sw channel switch */
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
- } else {
- /* update the device channel directly */
- local->hw.conf.chandef = local->_oper_chandef;
+ if (!local->use_chanctx) {
+ local->_oper_chandef = local->csa_chandef;
+ /* Call "hw_config" only if doing sw channel switch.
+ * Otherwise update the channel directly
+ */
+ if (!local->ops->channel_switch)
+ ieee80211_hw_config(local, 0);
+ else
+ local->hw.conf.chandef = local->_oper_chandef;
}
/* XXX: shouldn't really modify cfg80211-owned data! */
- ifmgd->associated->channel = local->_oper_chandef.chan;
+ ifmgd->associated->channel = local->csa_chandef.chan;
/* XXX: wait for a beacon first? */
ieee80211_wake_queues_by_reason(&local->hw,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
+
+ ieee80211_bss_info_change_notify(sdata, changed);
+
out:
+ sdata->vif.csa_active = false;
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
sdata_unlock(sdata);
}
@@ -1000,20 +956,12 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct cfg80211_bss *cbss = ifmgd->associated;
- struct ieee80211_bss *bss;
struct ieee80211_chanctx *chanctx;
- enum ieee80211_band new_band;
- int new_freq;
- u8 new_chan_no;
+ enum ieee80211_band current_band;
u8 count;
u8 mode;
- struct ieee80211_channel *new_chan;
struct cfg80211_chan_def new_chandef = {};
- struct cfg80211_chan_def new_vht_chandef = {};
- const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
- const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
- const struct ieee80211_ht_operation *ht_oper;
- int secondary_channel_offset = -1;
+ int res;
sdata_assert_lock(sdata);
@@ -1027,162 +975,23 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED)
return;
- sec_chan_offs = elems->sec_chan_offs;
- wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
- ht_oper = elems->ht_operation;
-
- if (ifmgd->flags & (IEEE80211_STA_DISABLE_HT |
- IEEE80211_STA_DISABLE_40MHZ)) {
- sec_chan_offs = NULL;
- wide_bw_chansw_ie = NULL;
- /* only used for bandwidth here */
- ht_oper = NULL;
- }
-
- if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
- wide_bw_chansw_ie = NULL;
-
- if (elems->ext_chansw_ie) {
- if (!ieee80211_operating_class_to_band(
- elems->ext_chansw_ie->new_operating_class,
- &new_band)) {
- sdata_info(sdata,
- "cannot understand ECSA IE operating class %d, disconnecting\n",
- elems->ext_chansw_ie->new_operating_class);
- ieee80211_queue_work(&local->hw,
- &ifmgd->csa_connection_drop_work);
- }
- new_chan_no = elems->ext_chansw_ie->new_ch_num;
- count = elems->ext_chansw_ie->count;
- mode = elems->ext_chansw_ie->mode;
- } else if (elems->ch_switch_ie) {
- new_band = cbss->channel->band;
- new_chan_no = elems->ch_switch_ie->new_ch_num;
- count = elems->ch_switch_ie->count;
- mode = elems->ch_switch_ie->mode;
- } else {
- /* nothing here we understand */
- return;
- }
-
- bss = (void *)cbss->priv;
-
- new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band);
- new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
- if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) {
- sdata_info(sdata,
- "AP %pM switches to unsupported channel (%d MHz), disconnecting\n",
- ifmgd->associated->bssid, new_freq);
+ current_band = cbss->channel->band;
+ res = ieee80211_parse_ch_switch_ie(sdata, elems, beacon, current_band,
+ ifmgd->flags,
+ ifmgd->associated->bssid, &count,
+ &mode, &new_chandef);
+ if (res < 0)
ieee80211_queue_work(&local->hw,
&ifmgd->csa_connection_drop_work);
+ if (res)
return;
- }
-
- if (!beacon && sec_chan_offs) {
- secondary_channel_offset = sec_chan_offs->sec_chan_offs;
- } else if (beacon && ht_oper) {
- secondary_channel_offset =
- ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
- } else if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
- /*
- * If it's not a beacon, HT is enabled and the IE not present,
- * it's 20 MHz, 802.11-2012 8.5.2.6:
- * This element [the Secondary Channel Offset Element] is
- * present when switching to a 40 MHz channel. It may be
- * present when switching to a 20 MHz channel (in which
- * case the secondary channel offset is set to SCN).
- */
- secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE;
- }
-
- switch (secondary_channel_offset) {
- default:
- /* secondary_channel_offset was present but is invalid */
- case IEEE80211_HT_PARAM_CHA_SEC_NONE:
- cfg80211_chandef_create(&new_chandef, new_chan,
- NL80211_CHAN_HT20);
- break;
- case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
- cfg80211_chandef_create(&new_chandef, new_chan,
- NL80211_CHAN_HT40PLUS);
- break;
- case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
- cfg80211_chandef_create(&new_chandef, new_chan,
- NL80211_CHAN_HT40MINUS);
- break;
- case -1:
- cfg80211_chandef_create(&new_chandef, new_chan,
- NL80211_CHAN_NO_HT);
- /* keep width for 5/10 MHz channels */
- switch (sdata->vif.bss_conf.chandef.width) {
- case NL80211_CHAN_WIDTH_5:
- case NL80211_CHAN_WIDTH_10:
- new_chandef.width = sdata->vif.bss_conf.chandef.width;
- break;
- default:
- break;
- }
- break;
- }
-
- if (wide_bw_chansw_ie) {
- new_vht_chandef.chan = new_chan;
- new_vht_chandef.center_freq1 =
- ieee80211_channel_to_frequency(
- wide_bw_chansw_ie->new_center_freq_seg0,
- new_band);
-
- switch (wide_bw_chansw_ie->new_channel_width) {
- default:
- /* hmmm, ignore VHT and use HT if present */
- case IEEE80211_VHT_CHANWIDTH_USE_HT:
- new_vht_chandef.chan = NULL;
- break;
- case IEEE80211_VHT_CHANWIDTH_80MHZ:
- new_vht_chandef.width = NL80211_CHAN_WIDTH_80;
- break;
- case IEEE80211_VHT_CHANWIDTH_160MHZ:
- new_vht_chandef.width = NL80211_CHAN_WIDTH_160;
- break;
- case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
- /* field is otherwise reserved */
- new_vht_chandef.center_freq2 =
- ieee80211_channel_to_frequency(
- wide_bw_chansw_ie->new_center_freq_seg1,
- new_band);
- new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
- break;
- }
- if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ &&
- new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
- chandef_downgrade(&new_vht_chandef);
- if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ &&
- new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
- chandef_downgrade(&new_vht_chandef);
- if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ &&
- new_vht_chandef.width > NL80211_CHAN_WIDTH_20)
- chandef_downgrade(&new_vht_chandef);
- }
-
- /* if VHT data is there validate & use it */
- if (new_vht_chandef.chan) {
- if (!cfg80211_chandef_compatible(&new_vht_chandef,
- &new_chandef)) {
- sdata_info(sdata,
- "AP %pM CSA has inconsistent channel data, disconnecting\n",
- ifmgd->associated->bssid);
- ieee80211_queue_work(&local->hw,
- &ifmgd->csa_connection_drop_work);
- return;
- }
- new_chandef = new_vht_chandef;
- }
if (!cfg80211_chandef_usable(local->hw.wiphy, &new_chandef,
IEEE80211_CHAN_DISABLED)) {
sdata_info(sdata,
"AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
- ifmgd->associated->bssid, new_freq,
+ ifmgd->associated->bssid,
+ new_chandef.chan->center_freq,
new_chandef.width, new_chandef.center_freq1,
new_chandef.center_freq2);
ieee80211_queue_work(&local->hw,
@@ -1191,17 +1000,28 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
}
ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
+ sdata->vif.csa_active = true;
+ mutex_lock(&local->chanctx_mtx);
if (local->use_chanctx) {
- sdata_info(sdata,
- "not handling channel switch with channel contexts\n");
- ieee80211_queue_work(&local->hw,
- &ifmgd->csa_connection_drop_work);
- return;
+ u32 num_chanctx = 0;
+ list_for_each_entry(chanctx, &local->chanctx_list, list)
+ num_chanctx++;
+
+ if (num_chanctx > 1 ||
+ !(local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA)) {
+ sdata_info(sdata,
+ "not handling chan-switch with channel contexts\n");
+ ieee80211_queue_work(&local->hw,
+ &ifmgd->csa_connection_drop_work);
+ mutex_unlock(&local->chanctx_mtx);
+ return;
+ }
}
- mutex_lock(&local->chanctx_mtx);
if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) {
+ ieee80211_queue_work(&local->hw,
+ &ifmgd->csa_connection_drop_work);
mutex_unlock(&local->chanctx_mtx);
return;
}
@@ -1374,8 +1194,7 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata)
if (!mgd->associated)
return false;
- if (mgd->flags & (IEEE80211_STA_BEACON_POLL |
- IEEE80211_STA_CONNECTION_POLL))
+ if (mgd->flags & IEEE80211_STA_CONNECTION_POLL)
return false;
if (!mgd->have_beacon)
@@ -1691,8 +1510,7 @@ static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata)
{
lockdep_assert_held(&sdata->local->mtx);
- sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
- IEEE80211_STA_BEACON_POLL);
+ sdata->u.mgd.flags &= ~IEEE80211_STA_CONNECTION_POLL;
ieee80211_run_deferred_scan(sdata->local);
}
@@ -1954,11 +1772,8 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
mutex_lock(&local->mtx);
- if (!(ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
- IEEE80211_STA_CONNECTION_POLL))) {
- mutex_unlock(&local->mtx);
- return;
- }
+ if (!(ifmgd->flags & IEEE80211_STA_CONNECTION_POLL))
+ goto out;
__ieee80211_stop_poll(sdata);
@@ -2094,15 +1909,9 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
* because otherwise we would reset the timer every time and
* never check whether we received a probe response!
*/
- if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
- IEEE80211_STA_CONNECTION_POLL))
+ if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL)
already = true;
- if (beacon)
- ifmgd->flags |= IEEE80211_STA_BEACON_POLL;
- else
- ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL;
-
mutex_unlock(&sdata->local->mtx);
if (already)
@@ -2174,6 +1983,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
true, frame_buf);
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+ sdata->vif.csa_active = false;
ieee80211_wake_queues_by_reason(&sdata->local->hw,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
@@ -3061,17 +2871,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
}
- if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) {
+ if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) {
mlme_dbg_ratelimited(sdata,
"cancelling AP probe due to a received beacon\n");
- mutex_lock(&local->mtx);
- ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL;
- ieee80211_run_deferred_scan(local);
- mutex_unlock(&local->mtx);
-
- mutex_lock(&local->iflist_mtx);
- ieee80211_recalc_ps(local, -1);
- mutex_unlock(&local->iflist_mtx);
+ ieee80211_reset_ap_probe(sdata);
}
/*
@@ -3543,8 +3346,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
} else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started)
run_again(sdata, ifmgd->assoc_data->timeout);
- if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
- IEEE80211_STA_CONNECTION_POLL) &&
+ if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL &&
ifmgd->associated) {
u8 bssid[ETH_ALEN];
int max_tries;
@@ -3876,7 +3678,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
return ret;
while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) {
- ifmgd->flags |= chandef_downgrade(&chandef);
+ ifmgd->flags |= ieee80211_chandef_downgrade(&chandef);
ret = ieee80211_vif_use_channel(sdata, &chandef,
IEEE80211_CHANCTX_SHARED);
}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index acd1f71adc03..0c2a29484c07 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -394,6 +394,8 @@ void ieee80211_sw_roc_work(struct work_struct *work)
if (started)
ieee80211_start_next_roc(local);
+ else if (list_empty(&local->roc_list))
+ ieee80211_run_deferred_scan(local);
}
out_unlock:
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 5dedc56c94db..505bc0dea074 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -144,8 +144,8 @@ void rate_control_deinitialize(struct ieee80211_local *local);
/* Rate control algorithms */
#ifdef CONFIG_MAC80211_RC_PID
-extern int rc80211_pid_init(void);
-extern void rc80211_pid_exit(void);
+int rc80211_pid_init(void);
+void rc80211_pid_exit(void);
#else
static inline int rc80211_pid_init(void)
{
@@ -157,8 +157,8 @@ static inline void rc80211_pid_exit(void)
#endif
#ifdef CONFIG_MAC80211_RC_MINSTREL
-extern int rc80211_minstrel_init(void);
-extern void rc80211_minstrel_exit(void);
+int rc80211_minstrel_init(void);
+void rc80211_minstrel_exit(void);
#else
static inline int rc80211_minstrel_init(void)
{
@@ -170,8 +170,8 @@ static inline void rc80211_minstrel_exit(void)
#endif
#ifdef CONFIG_MAC80211_RC_MINSTREL_HT
-extern int rc80211_minstrel_ht_init(void);
-extern void rc80211_minstrel_ht_exit(void);
+int rc80211_minstrel_ht_init(void);
+void rc80211_minstrel_ht_exit(void);
#else
static inline int rc80211_minstrel_ht_init(void)
{
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 8b5f7ef7c0c9..7fa1b36e6202 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -203,6 +203,15 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate));
mi->max_prob_rate = tmp_prob_rate;
+#ifdef CONFIG_MAC80211_DEBUGFS
+ /* use fixed index if set */
+ if (mp->fixed_rate_idx != -1) {
+ mi->max_tp_rate[0] = mp->fixed_rate_idx;
+ mi->max_tp_rate[1] = mp->fixed_rate_idx;
+ mi->max_prob_rate = mp->fixed_rate_idx;
+ }
+#endif
+
/* Reset update timer */
mi->stats_update = jiffies;
@@ -310,6 +319,11 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
/* increase sum packet counter */
mi->packet_count++;
+#ifdef CONFIG_MAC80211_DEBUGFS
+ if (mp->fixed_rate_idx != -1)
+ return;
+#endif
+
delta = (mi->packet_count * sampling_ratio / 100) -
(mi->sample_count + mi->sample_deferred / 2);
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 7c323f27ba23..5d60779a0c1b 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -365,6 +365,14 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
}
}
+#ifdef CONFIG_MAC80211_DEBUGFS
+ /* use fixed index if set */
+ if (mp->fixed_rate_idx != -1) {
+ mi->max_tp_rate = mp->fixed_rate_idx;
+ mi->max_tp_rate2 = mp->fixed_rate_idx;
+ mi->max_prob_rate = mp->fixed_rate_idx;
+ }
+#endif
mi->stats_update = jiffies;
}
@@ -774,6 +782,11 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
info->flags |= mi->tx_flags;
minstrel_ht_check_cck_shortpreamble(mp, mi, txrc->short_preamble);
+#ifdef CONFIG_MAC80211_DEBUGFS
+ if (mp->fixed_rate_idx != -1)
+ return;
+#endif
+
/* Don't use EAPOL frames for sampling on non-mrr hw */
if (mp->hw->max_rates == 1 &&
(info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
@@ -781,16 +794,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
else
sample_idx = minstrel_get_sample_rate(mp, mi);
-#ifdef CONFIG_MAC80211_DEBUGFS
- /* use fixed index if set */
- if (mp->fixed_rate_idx != -1) {
- mi->max_tp_rate = mp->fixed_rate_idx;
- mi->max_tp_rate2 = mp->fixed_rate_idx;
- mi->max_prob_rate = mp->fixed_rate_idx;
- sample_idx = -1;
- }
-#endif
-
mi->total_packets++;
/* wraparound */
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index c97a0657c043..6ff134650a84 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -167,29 +167,29 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
* provide large enough buffers. */
length = length < RC_PID_PRINT_BUF_SIZE ?
length : RC_PID_PRINT_BUF_SIZE;
- p = snprintf(pb, length, "%u %lu ", ev->id, ev->timestamp);
+ p = scnprintf(pb, length, "%u %lu ", ev->id, ev->timestamp);
switch (ev->type) {
case RC_PID_EVENT_TYPE_TX_STATUS:
- p += snprintf(pb + p, length - p, "tx_status %u %u",
- !(ev->data.flags & IEEE80211_TX_STAT_ACK),
- ev->data.tx_status.status.rates[0].idx);
+ p += scnprintf(pb + p, length - p, "tx_status %u %u",
+ !(ev->data.flags & IEEE80211_TX_STAT_ACK),
+ ev->data.tx_status.status.rates[0].idx);
break;
case RC_PID_EVENT_TYPE_RATE_CHANGE:
- p += snprintf(pb + p, length - p, "rate_change %d %d",
- ev->data.index, ev->data.rate);
+ p += scnprintf(pb + p, length - p, "rate_change %d %d",
+ ev->data.index, ev->data.rate);
break;
case RC_PID_EVENT_TYPE_TX_RATE:
- p += snprintf(pb + p, length - p, "tx_rate %d %d",
- ev->data.index, ev->data.rate);
+ p += scnprintf(pb + p, length - p, "tx_rate %d %d",
+ ev->data.index, ev->data.rate);
break;
case RC_PID_EVENT_TYPE_PF_SAMPLE:
- p += snprintf(pb + p, length - p,
- "pf_sample %d %d %d %d",
- ev->data.pf_sample, ev->data.prop_err,
- ev->data.int_err, ev->data.der_err);
+ p += scnprintf(pb + p, length - p,
+ "pf_sample %d %d %d %d",
+ ev->data.pf_sample, ev->data.prop_err,
+ ev->data.int_err, ev->data.der_err);
break;
}
- p += snprintf(pb + p, length - p, "\n");
+ p += scnprintf(pb + p, length - p, "\n");
spin_unlock_irqrestore(&events->lock, status);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 674eac1f996c..0011ac815097 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -995,8 +995,9 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
rx->sta->num_duplicates++;
}
return RX_DROP_UNUSABLE;
- } else
+ } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
+ }
}
if (unlikely(rx->skb->len < 16)) {
@@ -2402,7 +2403,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC &&
- mgmt->u.action.category != WLAN_CATEGORY_SELF_PROTECTED)
+ mgmt->u.action.category != WLAN_CATEGORY_SELF_PROTECTED &&
+ mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT)
return RX_DROP_UNUSABLE;
if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
@@ -2566,31 +2568,46 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
goto queue;
case WLAN_CATEGORY_SPECTRUM_MGMT:
- if (status->band != IEEE80211_BAND_5GHZ)
- break;
-
- if (sdata->vif.type != NL80211_IFTYPE_STATION)
- break;
-
/* verify action_code is present */
if (len < IEEE80211_MIN_ACTION_SIZE + 1)
break;
switch (mgmt->u.action.u.measurement.action_code) {
case WLAN_ACTION_SPCT_MSR_REQ:
+ if (status->band != IEEE80211_BAND_5GHZ)
+ break;
+
if (len < (IEEE80211_MIN_ACTION_SIZE +
sizeof(mgmt->u.action.u.measurement)))
break;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ break;
+
ieee80211_process_measurement_req(sdata, mgmt, len);
goto handled;
- case WLAN_ACTION_SPCT_CHL_SWITCH:
- if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ case WLAN_ACTION_SPCT_CHL_SWITCH: {
+ u8 *bssid;
+ if (len < (IEEE80211_MIN_ACTION_SIZE +
+ sizeof(mgmt->u.action.u.chan_switch)))
+ break;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+ sdata->vif.type != NL80211_IFTYPE_ADHOC)
break;
- if (!ether_addr_equal(mgmt->bssid, sdata->u.mgd.bssid))
+ if (sdata->vif.type == NL80211_IFTYPE_STATION)
+ bssid = sdata->u.mgd.bssid;
+ else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+ bssid = sdata->u.ibss.bssid;
+ else
+ break;
+
+ if (!ether_addr_equal(mgmt->bssid, bssid))
break;
goto queue;
+ }
}
break;
case WLAN_CATEGORY_SA_QUERY:
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 08afe74b98f4..5ad66a83ef7f 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -238,6 +238,9 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
enum ieee80211_band band;
int i, ielen, n_chans;
+ if (test_bit(SCAN_HW_CANCELLED, &local->scanning))
+ return false;
+
do {
if (local->hw_scan_band == IEEE80211_NUM_BANDS)
return false;
@@ -391,8 +394,7 @@ static bool ieee80211_can_scan(struct ieee80211_local *local,
return false;
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
- sdata->u.mgd.flags & (IEEE80211_STA_BEACON_POLL |
- IEEE80211_STA_CONNECTION_POLL))
+ sdata->u.mgd.flags & IEEE80211_STA_CONNECTION_POLL)
return false;
return true;
@@ -940,7 +942,23 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
if (!local->scan_req)
goto out;
+ /*
+ * We have a scan running and the driver already reported completion,
+ * but the worker hasn't run yet or is stuck on the mutex - mark it as
+ * cancelled.
+ */
+ if (test_bit(SCAN_HW_SCANNING, &local->scanning) &&
+ test_bit(SCAN_COMPLETED, &local->scanning)) {
+ set_bit(SCAN_HW_CANCELLED, &local->scanning);
+ goto out;
+ }
+
if (test_bit(SCAN_HW_SCANNING, &local->scanning)) {
+ /*
+ * Make sure that __ieee80211_scan_completed doesn't trigger a
+ * scan on another band.
+ */
+ set_bit(SCAN_HW_CANCELLED, &local->scanning);
if (local->ops->cancel_hw_scan)
drv_cancel_hw_scan(local,
rcu_dereference_protected(local->scan_sdata,
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 578eea3fc04d..921597e279a3 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -21,6 +21,168 @@
#include "sta_info.h"
#include "wme.h"
+int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
+ struct ieee802_11_elems *elems, bool beacon,
+ enum ieee80211_band current_band,
+ u32 sta_flags, u8 *bssid, u8 *count, u8 *mode,
+ struct cfg80211_chan_def *new_chandef)
+{
+ enum ieee80211_band new_band;
+ int new_freq;
+ u8 new_chan_no;
+ struct ieee80211_channel *new_chan;
+ struct cfg80211_chan_def new_vht_chandef = {};
+ const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
+ const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
+ const struct ieee80211_ht_operation *ht_oper;
+ int secondary_channel_offset = -1;
+
+ sec_chan_offs = elems->sec_chan_offs;
+ wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
+ ht_oper = elems->ht_operation;
+
+ if (sta_flags & (IEEE80211_STA_DISABLE_HT |
+ IEEE80211_STA_DISABLE_40MHZ)) {
+ sec_chan_offs = NULL;
+ wide_bw_chansw_ie = NULL;
+ /* only used for bandwidth here */
+ ht_oper = NULL;
+ }
+
+ if (sta_flags & IEEE80211_STA_DISABLE_VHT)
+ wide_bw_chansw_ie = NULL;
+
+ if (elems->ext_chansw_ie) {
+ if (!ieee80211_operating_class_to_band(
+ elems->ext_chansw_ie->new_operating_class,
+ &new_band)) {
+ sdata_info(sdata,
+ "cannot understand ECSA IE operating class %d, disconnecting\n",
+ elems->ext_chansw_ie->new_operating_class);
+ return -EINVAL;
+ }
+ new_chan_no = elems->ext_chansw_ie->new_ch_num;
+ *count = elems->ext_chansw_ie->count;
+ *mode = elems->ext_chansw_ie->mode;
+ } else if (elems->ch_switch_ie) {
+ new_band = current_band;
+ new_chan_no = elems->ch_switch_ie->new_ch_num;
+ *count = elems->ch_switch_ie->count;
+ *mode = elems->ch_switch_ie->mode;
+ } else {
+ /* nothing here we understand */
+ return 1;
+ }
+
+ new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band);
+ new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
+ if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) {
+ sdata_info(sdata,
+ "BSS %pM switches to unsupported channel (%d MHz), disconnecting\n",
+ bssid, new_freq);
+ return -EINVAL;
+ }
+
+ if (!beacon && sec_chan_offs) {
+ secondary_channel_offset = sec_chan_offs->sec_chan_offs;
+ } else if (beacon && ht_oper) {
+ secondary_channel_offset =
+ ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
+ } else if (!(sta_flags & IEEE80211_STA_DISABLE_HT)) {
+ /* If it's not a beacon, HT is enabled and the IE not present,
+ * it's 20 MHz, 802.11-2012 8.5.2.6:
+ * This element [the Secondary Channel Offset Element] is
+ * present when switching to a 40 MHz channel. It may be
+ * present when switching to a 20 MHz channel (in which
+ * case the secondary channel offset is set to SCN).
+ */
+ secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE;
+ }
+
+ switch (secondary_channel_offset) {
+ default:
+ /* secondary_channel_offset was present but is invalid */
+ case IEEE80211_HT_PARAM_CHA_SEC_NONE:
+ cfg80211_chandef_create(new_chandef, new_chan,
+ NL80211_CHAN_HT20);
+ break;
+ case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+ cfg80211_chandef_create(new_chandef, new_chan,
+ NL80211_CHAN_HT40PLUS);
+ break;
+ case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+ cfg80211_chandef_create(new_chandef, new_chan,
+ NL80211_CHAN_HT40MINUS);
+ break;
+ case -1:
+ cfg80211_chandef_create(new_chandef, new_chan,
+ NL80211_CHAN_NO_HT);
+ /* keep width for 5/10 MHz channels */
+ switch (sdata->vif.bss_conf.chandef.width) {
+ case NL80211_CHAN_WIDTH_5:
+ case NL80211_CHAN_WIDTH_10:
+ new_chandef->width = sdata->vif.bss_conf.chandef.width;
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+
+ if (wide_bw_chansw_ie) {
+ new_vht_chandef.chan = new_chan;
+ new_vht_chandef.center_freq1 =
+ ieee80211_channel_to_frequency(
+ wide_bw_chansw_ie->new_center_freq_seg0,
+ new_band);
+
+ switch (wide_bw_chansw_ie->new_channel_width) {
+ default:
+ /* hmmm, ignore VHT and use HT if present */
+ case IEEE80211_VHT_CHANWIDTH_USE_HT:
+ new_vht_chandef.chan = NULL;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_80MHZ:
+ new_vht_chandef.width = NL80211_CHAN_WIDTH_80;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_160MHZ:
+ new_vht_chandef.width = NL80211_CHAN_WIDTH_160;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+ /* field is otherwise reserved */
+ new_vht_chandef.center_freq2 =
+ ieee80211_channel_to_frequency(
+ wide_bw_chansw_ie->new_center_freq_seg1,
+ new_band);
+ new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
+ break;
+ }
+ if (sta_flags & IEEE80211_STA_DISABLE_80P80MHZ &&
+ new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
+ ieee80211_chandef_downgrade(&new_vht_chandef);
+ if (sta_flags & IEEE80211_STA_DISABLE_160MHZ &&
+ new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
+ ieee80211_chandef_downgrade(&new_vht_chandef);
+ if (sta_flags & IEEE80211_STA_DISABLE_40MHZ &&
+ new_vht_chandef.width > NL80211_CHAN_WIDTH_20)
+ ieee80211_chandef_downgrade(&new_vht_chandef);
+ }
+
+ /* if VHT data is there validate & use it */
+ if (new_vht_chandef.chan) {
+ if (!cfg80211_chandef_compatible(&new_vht_chandef,
+ new_chandef)) {
+ sdata_info(sdata,
+ "BSS %pM: CSA has inconsistent channel data, disconnecting\n",
+ bssid);
+ return -EINVAL;
+ }
+ *new_chandef = new_vht_chandef;
+ }
+
+ return 0;
+}
+
static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata,
struct ieee80211_msrment_ie *request_ie,
const u8 *da, const u8 *bssid,
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 368837fe3b80..78dc2e99027e 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -180,6 +180,9 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
+ if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ sta->last_rx = jiffies;
+
if (ieee80211_is_data_qos(mgmt->frame_control)) {
struct ieee80211_hdr *hdr = (void *) skb->data;
u8 *qc = ieee80211_get_qos_ctl(hdr);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3fb9dd6d02fc..d4cee98533fd 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1475,6 +1475,41 @@ DEFINE_EVENT(local_sdata_evt, drv_ipv6_addr_change,
);
#endif
+TRACE_EVENT(drv_join_ibss,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *info),
+
+ TP_ARGS(local, sdata, info),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u8, dtimper)
+ __field(u16, bcnint)
+ __dynamic_array(u8, ssid, info->ssid_len);
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->dtimper = info->dtim_period;
+ __entry->bcnint = info->beacon_int;
+ memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len);
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG
+ )
+);
+
+DEFINE_EVENT(local_sdata_evt, drv_leave_ibss,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata),
+ TP_ARGS(local, sdata)
+);
+
/*
* Tracing for API calls that drivers call.
*/
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3456c0486b48..9993fcb19ecd 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1120,7 +1120,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
tx->sta = rcu_dereference(sdata->u.vlan.sta);
if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr)
return TX_DROP;
- } else if (info->flags & IEEE80211_TX_CTL_INJECTED ||
+ } else if (info->flags & (IEEE80211_TX_CTL_INJECTED |
+ IEEE80211_TX_INTFL_NL80211_FRAME_TX) ||
tx->sdata->control_port_protocol == tx->skb->protocol) {
tx->sta = sta_info_get_bss(sdata, hdr->addr1);
}
@@ -1981,7 +1982,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
* EAPOL frames from the local station.
*/
if (unlikely(!ieee80211_vif_is_mesh(&sdata->vif) &&
- !is_multicast_ether_addr(hdr.addr1) && !authorized &&
+ !multicast && !authorized &&
(cpu_to_be16(ethertype) != sdata->control_port_protocol ||
!ether_addr_equal(sdata->vif.addr, skb->data + ETH_ALEN)))) {
#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -2357,15 +2358,31 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
struct probe_resp *resp;
int counter_offset_beacon = sdata->csa_counter_offset_beacon;
int counter_offset_presp = sdata->csa_counter_offset_presp;
+ u8 *beacon_data;
+ size_t beacon_data_len;
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP:
+ beacon_data = beacon->tail;
+ beacon_data_len = beacon->tail_len;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ beacon_data = beacon->head;
+ beacon_data_len = beacon->head_len;
+ break;
+ default:
+ return;
+ }
+ if (WARN_ON(counter_offset_beacon >= beacon_data_len))
+ return;
/* warn if the driver did not check for/react to csa completeness */
- if (WARN_ON(((u8 *)beacon->tail)[counter_offset_beacon] == 0))
+ if (WARN_ON(beacon_data[counter_offset_beacon] == 0))
return;
- ((u8 *)beacon->tail)[counter_offset_beacon]--;
+ beacon_data[counter_offset_beacon]--;
- if (sdata->vif.type == NL80211_IFTYPE_AP &&
- counter_offset_presp) {
+ if (sdata->vif.type == NL80211_IFTYPE_AP && counter_offset_presp) {
rcu_read_lock();
resp = rcu_dereference(sdata->u.ap.probe_resp);
@@ -2400,6 +2417,15 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
goto out;
beacon_data = beacon->tail;
beacon_data_len = beacon->tail_len;
+ } else if (vif->type == NL80211_IFTYPE_ADHOC) {
+ struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+
+ beacon = rcu_dereference(ifibss->presp);
+ if (!beacon)
+ goto out;
+
+ beacon_data = beacon->head;
+ beacon_data_len = beacon->head_len;
} else {
WARN_ON(1);
goto out;
@@ -2484,6 +2510,10 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (!presp)
goto out;
+ if (sdata->vif.csa_active)
+ ieee80211_update_csa(sdata, presp);
+
+
skb = dev_alloc_skb(local->tx_headroom + presp->head_len);
if (!skb)
goto out;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 9c3200bcfc02..aefb9d5b9620 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -567,18 +567,15 @@ void ieee80211_flush_queues(struct ieee80211_local *local,
IEEE80211_QUEUE_STOP_REASON_FLUSH);
}
-void ieee80211_iterate_active_interfaces(
- struct ieee80211_hw *hw, u32 iter_flags,
- void (*iterator)(void *data, u8 *mac,
- struct ieee80211_vif *vif),
- void *data)
+static void __iterate_active_interfaces(struct ieee80211_local *local,
+ u32 iter_flags,
+ void (*iterator)(void *data, u8 *mac,
+ struct ieee80211_vif *vif),
+ void *data)
{
- struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_sub_if_data *sdata;
- mutex_lock(&local->iflist_mtx);
-
- list_for_each_entry(sdata, &local->interfaces, list) {
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
@@ -597,13 +594,25 @@ void ieee80211_iterate_active_interfaces(
&sdata->vif);
}
- sdata = rcu_dereference_protected(local->monitor_sdata,
- lockdep_is_held(&local->iflist_mtx));
+ sdata = rcu_dereference_check(local->monitor_sdata,
+ lockdep_is_held(&local->iflist_mtx) ||
+ lockdep_rtnl_is_held());
if (sdata &&
(iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL ||
sdata->flags & IEEE80211_SDATA_IN_DRIVER))
iterator(data, sdata->vif.addr, &sdata->vif);
+}
+
+void ieee80211_iterate_active_interfaces(
+ struct ieee80211_hw *hw, u32 iter_flags,
+ void (*iterator)(void *data, u8 *mac,
+ struct ieee80211_vif *vif),
+ void *data)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ mutex_lock(&local->iflist_mtx);
+ __iterate_active_interfaces(local, iter_flags, iterator, data);
mutex_unlock(&local->iflist_mtx);
}
EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
@@ -615,38 +624,26 @@ void ieee80211_iterate_active_interfaces_atomic(
void *data)
{
struct ieee80211_local *local = hw_to_local(hw);
- struct ieee80211_sub_if_data *sdata;
rcu_read_lock();
+ __iterate_active_interfaces(local, iter_flags, iterator, data);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- switch (sdata->vif.type) {
- case NL80211_IFTYPE_MONITOR:
- if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
- continue;
- break;
- case NL80211_IFTYPE_AP_VLAN:
- continue;
- default:
- break;
- }
- if (!(iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL) &&
- !(sdata->flags & IEEE80211_SDATA_IN_DRIVER))
- continue;
- if (ieee80211_sdata_running(sdata))
- iterator(data, sdata->vif.addr,
- &sdata->vif);
- }
+void ieee80211_iterate_active_interfaces_rtnl(
+ struct ieee80211_hw *hw, u32 iter_flags,
+ void (*iterator)(void *data, u8 *mac,
+ struct ieee80211_vif *vif),
+ void *data)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
- sdata = rcu_dereference(local->monitor_sdata);
- if (sdata &&
- (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL ||
- sdata->flags & IEEE80211_SDATA_IN_DRIVER))
- iterator(data, sdata->vif.addr, &sdata->vif);
+ ASSERT_RTNL();
- rcu_read_unlock();
+ __iterate_active_interfaces(local, iter_flags, iterator, data);
}
-EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_rtnl);
/*
* Nothing should have been stuffed into the workqueue during
@@ -1007,14 +1004,21 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
*/
enable_qos = (sdata->vif.type != NL80211_IFTYPE_STATION);
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- /* Set defaults according to 802.11-2007 Table 7-37 */
- aCWmax = 1023;
- if (use_11b)
- aCWmin = 31;
- else
- aCWmin = 15;
+ /* Set defaults according to 802.11-2007 Table 7-37 */
+ aCWmax = 1023;
+ if (use_11b)
+ aCWmin = 31;
+ else
+ aCWmin = 15;
+
+ /* Confiure old 802.11b/g medium access rules. */
+ qparam.cw_max = aCWmax;
+ qparam.cw_min = aCWmin;
+ qparam.txop = 0;
+ qparam.aifs = 2;
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ /* Update if QoS is enabled. */
if (enable_qos) {
switch (ac) {
case IEEE80211_AC_BK:
@@ -1050,12 +1054,6 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
qparam.aifs = 2;
break;
}
- } else {
- /* Confiure old 802.11b/g medium access rules. */
- qparam.cw_max = aCWmax;
- qparam.cw_min = aCWmin;
- qparam.txop = 0;
- qparam.aifs = 2;
}
qparam.uapsd = false;
@@ -1084,8 +1082,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt;
int err;
- skb = dev_alloc_skb(local->hw.extra_tx_headroom +
- sizeof(*mgmt) + 6 + extra_len);
+ /* 24 + 6 = header + auth_algo + auth_transaction + status_code */
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 6 + extra_len);
if (!skb)
return;
@@ -2238,6 +2236,10 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
}
rate = cfg80211_calculate_bitrate(&ri);
+ if (WARN_ONCE(!rate,
+ "Invalid bitrate: flags=0x%x, idx=%d, vht_nss=%d\n",
+ status->flag, status->rate_idx, status->vht_nss))
+ return 0;
/* rewind from end of MPDU */
if (status->flag & RX_FLAG_MACTIME_END)
@@ -2292,3 +2294,63 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw)
ieee80211_queue_work(hw, &local->radar_detected_work);
}
EXPORT_SYMBOL(ieee80211_radar_detected);
+
+u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c)
+{
+ u32 ret;
+ int tmp;
+
+ switch (c->width) {
+ case NL80211_CHAN_WIDTH_20:
+ c->width = NL80211_CHAN_WIDTH_20_NOHT;
+ ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+ break;
+ case NL80211_CHAN_WIDTH_40:
+ c->width = NL80211_CHAN_WIDTH_20;
+ c->center_freq1 = c->chan->center_freq;
+ ret = IEEE80211_STA_DISABLE_40MHZ |
+ IEEE80211_STA_DISABLE_VHT;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
+ /* n_P40 */
+ tmp /= 2;
+ /* freq_P40 */
+ c->center_freq1 = c->center_freq1 - 20 + 40 * tmp;
+ c->width = NL80211_CHAN_WIDTH_40;
+ ret = IEEE80211_STA_DISABLE_VHT;
+ break;
+ case NL80211_CHAN_WIDTH_80P80:
+ c->center_freq2 = 0;
+ c->width = NL80211_CHAN_WIDTH_80;
+ ret = IEEE80211_STA_DISABLE_80P80MHZ |
+ IEEE80211_STA_DISABLE_160MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ /* n_P20 */
+ tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
+ /* n_P80 */
+ tmp /= 4;
+ c->center_freq1 = c->center_freq1 - 40 + 80 * tmp;
+ c->width = NL80211_CHAN_WIDTH_80;
+ ret = IEEE80211_STA_DISABLE_80P80MHZ |
+ IEEE80211_STA_DISABLE_160MHZ;
+ break;
+ default:
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ WARN_ON_ONCE(1);
+ c->width = NL80211_CHAN_WIDTH_20_NOHT;
+ ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+ break;
+ case NL80211_CHAN_WIDTH_5:
+ case NL80211_CHAN_WIDTH_10:
+ WARN_ON_ONCE(1);
+ /* keep c->width */
+ ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+ break;
+ }
+
+ WARN_ON_ONCE(!cfg80211_chandef_valid(c));
+
+ return ret;
+}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 97c289414e32..de0112785aae 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -185,13 +185,13 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) {
vht_cap->cap |= cap_info &
(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
- IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX |
IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX);
}
if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)
vht_cap->cap |= cap_info &
- IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
+ (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+ IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX);
if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)
vht_cap->cap |= cap_info &
diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c
index b7c7f815deae..52ae6646a411 100644
--- a/net/mac802154/ieee802154_dev.c
+++ b/net/mac802154/ieee802154_dev.c
@@ -174,8 +174,7 @@ ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops)
if (!ops || !ops->xmit || !ops->ed || !ops->start ||
!ops->stop || !ops->set_channel) {
- printk(KERN_ERR
- "undefined IEEE802.15.4 device operations\n");
+ pr_err("undefined IEEE802.15.4 device operations\n");
return NULL;
}
@@ -201,8 +200,7 @@ ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops)
phy = wpan_phy_alloc(priv_size);
if (!phy) {
- printk(KERN_ERR
- "failure to allocate master IEEE802.15.4 device\n");
+ pr_err("failure to allocate master IEEE802.15.4 device\n");
return NULL;
}
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 2ca2f4dceab7..e24bcf977296 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -208,6 +208,8 @@ static int mac802154_header_create(struct sk_buff *skb,
head[1] = fc >> 8;
memcpy(skb_push(skb, pos), head, pos);
+ skb_reset_mac_header(skb);
+ skb->mac_len = pos;
return pos;
}
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 1bec1219ab81..851cd880b0c0 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_IPIP |
SKB_GSO_MPLS)))
goto out;
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index a13e15be7911..f2c7d83dc23f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -198,13 +198,14 @@ mtype_list(const struct ip_set *set,
struct mtype *map = set->data;
struct nlattr *adt, *nested;
void *x;
- u32 id, first = cb->args[2];
+ u32 id, first = cb->args[IPSET_CB_ARG0];
adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!adt)
return -EMSGSIZE;
- for (; cb->args[2] < map->elements; cb->args[2]++) {
- id = cb->args[2];
+ for (; cb->args[IPSET_CB_ARG0] < map->elements;
+ cb->args[IPSET_CB_ARG0]++) {
+ id = cb->args[IPSET_CB_ARG0];
x = get_ext(set, map, id);
if (!test_bit(id, map->members) ||
(SET_WITH_TIMEOUT(set) &&
@@ -231,14 +232,14 @@ mtype_list(const struct ip_set *set,
ipset_nest_end(skb, adt);
/* Set listing finished */
- cb->args[2] = 0;
+ cb->args[IPSET_CB_ARG0] = 0;
return 0;
nla_put_failure:
nla_nest_cancel(skb, nested);
if (unlikely(id == first)) {
- cb->args[2] = 0;
+ cb->args[IPSET_CB_ARG0] = 0;
return -EMSGSIZE;
}
ipset_nest_end(skb, adt);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index e7603c5b53d7..cf99676e69f8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -254,7 +254,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = last_port - first_port + 1;
- map->memsize = map->elements * sizeof(unsigned long);
+ map->memsize = bitmap_bytes(0, map->elements);
set->variant = &bitmap_port;
set->dsize = ip_set_elem_len(set, tb, 0);
if (!init_map_port(set, map, first_port, last_port)) {
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index dc9284bdd2dd..bac7e01df67f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1182,10 +1182,12 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
static int
ip_set_dump_done(struct netlink_callback *cb)
{
- struct ip_set_net *inst = (struct ip_set_net *)cb->data;
- if (cb->args[2]) {
- pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name);
- __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]);
+ struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
+ if (cb->args[IPSET_CB_ARG0]) {
+ pr_debug("release set %s\n",
+ nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name);
+ __ip_set_put_byindex(inst,
+ (ip_set_id_t) cb->args[IPSET_CB_INDEX]);
}
return 0;
}
@@ -1203,7 +1205,7 @@ dump_attrs(struct nlmsghdr *nlh)
}
static int
-dump_init(struct netlink_callback *cb)
+dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
{
struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
@@ -1211,15 +1213,15 @@ dump_init(struct netlink_callback *cb)
struct nlattr *attr = (void *)nlh + min_len;
u32 dump_type;
ip_set_id_t index;
- struct ip_set_net *inst = (struct ip_set_net *)cb->data;
/* Second pass, so parser can't fail */
nla_parse(cda, IPSET_ATTR_CMD_MAX,
attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
- /* cb->args[0] : dump single set/all sets
- * [1] : set index
- * [..]: type specific
+ /* cb->args[IPSET_CB_NET]: net namespace
+ * [IPSET_CB_DUMP]: dump single set/all sets
+ * [IPSET_CB_INDEX]: set index
+ * [IPSET_CB_ARG0]: type specific
*/
if (cda[IPSET_ATTR_SETNAME]) {
@@ -1231,7 +1233,7 @@ dump_init(struct netlink_callback *cb)
return -ENOENT;
dump_type = DUMP_ONE;
- cb->args[1] = index;
+ cb->args[IPSET_CB_INDEX] = index;
} else
dump_type = DUMP_ALL;
@@ -1239,7 +1241,8 @@ dump_init(struct netlink_callback *cb)
u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
dump_type |= (f << 16);
}
- cb->args[0] = dump_type;
+ cb->args[IPSET_CB_NET] = (unsigned long)inst;
+ cb->args[IPSET_CB_DUMP] = dump_type;
return 0;
}
@@ -1251,12 +1254,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
struct ip_set *set = NULL;
struct nlmsghdr *nlh = NULL;
unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
+ struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
u32 dump_type, dump_flags;
int ret = 0;
- struct ip_set_net *inst = (struct ip_set_net *)cb->data;
- if (!cb->args[0]) {
- ret = dump_init(cb);
+ if (!cb->args[IPSET_CB_DUMP]) {
+ ret = dump_init(cb, inst);
if (ret < 0) {
nlh = nlmsg_hdr(cb->skb);
/* We have to create and send the error message
@@ -1267,17 +1270,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
}
}
- if (cb->args[1] >= inst->ip_set_max)
+ if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
goto out;
- dump_type = DUMP_TYPE(cb->args[0]);
- dump_flags = DUMP_FLAGS(cb->args[0]);
- max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max;
+ dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]);
+ dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]);
+ max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1
+ : inst->ip_set_max;
dump_last:
- pr_debug("args[0]: %u %u args[1]: %ld\n",
- dump_type, dump_flags, cb->args[1]);
- for (; cb->args[1] < max; cb->args[1]++) {
- index = (ip_set_id_t) cb->args[1];
+ pr_debug("dump type, flag: %u %u index: %ld\n",
+ dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
+ for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
+ index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
set = nfnl_set(inst, index);
if (set == NULL) {
if (dump_type == DUMP_ONE) {
@@ -1294,7 +1298,7 @@ dump_last:
!!(set->type->features & IPSET_DUMP_LAST)))
continue;
pr_debug("List set: %s\n", set->name);
- if (!cb->args[2]) {
+ if (!cb->args[IPSET_CB_ARG0]) {
/* Start listing: make sure set won't be destroyed */
pr_debug("reference set\n");
__ip_set_get(set);
@@ -1311,7 +1315,7 @@ dump_last:
goto nla_put_failure;
if (dump_flags & IPSET_FLAG_LIST_SETNAME)
goto next_set;
- switch (cb->args[2]) {
+ switch (cb->args[IPSET_CB_ARG0]) {
case 0:
/* Core header data */
if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
@@ -1331,7 +1335,7 @@ dump_last:
read_lock_bh(&set->lock);
ret = set->variant->list(set, skb, cb);
read_unlock_bh(&set->lock);
- if (!cb->args[2])
+ if (!cb->args[IPSET_CB_ARG0])
/* Set is done, proceed with next one */
goto next_set;
goto release_refcount;
@@ -1340,8 +1344,8 @@ dump_last:
/* If we dump all sets, continue with dumping last ones */
if (dump_type == DUMP_ALL) {
dump_type = DUMP_LAST;
- cb->args[0] = dump_type | (dump_flags << 16);
- cb->args[1] = 0;
+ cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
+ cb->args[IPSET_CB_INDEX] = 0;
goto dump_last;
}
goto out;
@@ -1350,15 +1354,15 @@ nla_put_failure:
ret = -EFAULT;
next_set:
if (dump_type == DUMP_ONE)
- cb->args[1] = IPSET_INVALID_ID;
+ cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID;
else
- cb->args[1]++;
+ cb->args[IPSET_CB_INDEX]++;
release_refcount:
/* If there was an error or set is done, release set */
- if (ret || !cb->args[2]) {
+ if (ret || !cb->args[IPSET_CB_ARG0]) {
pr_debug("release set %s\n", nfnl_set(inst, index)->name);
__ip_set_put_byindex(inst, index);
- cb->args[2] = 0;
+ cb->args[IPSET_CB_ARG0] = 0;
}
out:
if (nlh) {
@@ -1375,8 +1379,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
-
if (unlikely(protocol_failed(attr)))
return -IPSET_ERR_PROTOCOL;
@@ -1384,7 +1386,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
struct netlink_dump_control c = {
.dump = ip_set_dump_start,
.done = ip_set_dump_done,
- .data = (void *)inst
};
return netlink_dump_start(ctnl, skb, nlh, &c);
}
@@ -1961,7 +1962,6 @@ static int __net_init
ip_set_net_init(struct net *net)
{
struct ip_set_net *inst = ip_set_pernet(net);
-
struct ip_set **list;
inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 6a80dbd30df7..be6932ad3a86 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -234,7 +234,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
#define mtype MTYPE
-#define mtype_elem IPSET_TOKEN(MTYPE, _elem)
#define mtype_add IPSET_TOKEN(MTYPE, _add)
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
@@ -931,7 +930,7 @@ mtype_list(const struct ip_set *set,
struct nlattr *atd, *nested;
const struct hbucket *n;
const struct mtype_elem *e;
- u32 first = cb->args[2];
+ u32 first = cb->args[IPSET_CB_ARG0];
/* We assume that one hash bucket fills into one page */
void *incomplete;
int i;
@@ -940,20 +939,22 @@ mtype_list(const struct ip_set *set,
if (!atd)
return -EMSGSIZE;
pr_debug("list hash set %s\n", set->name);
- for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
+ for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
+ cb->args[IPSET_CB_ARG0]++) {
incomplete = skb_tail_pointer(skb);
- n = hbucket(t, cb->args[2]);
- pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
+ n = hbucket(t, cb->args[IPSET_CB_ARG0]);
+ pr_debug("cb->arg bucket: %lu, t %p n %p\n",
+ cb->args[IPSET_CB_ARG0], t, n);
for (i = 0; i < n->pos; i++) {
e = ahash_data(n, i, set->dsize);
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
pr_debug("list hash %lu hbucket %p i %u, data %p\n",
- cb->args[2], n, i, e);
+ cb->args[IPSET_CB_ARG0], n, i, e);
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested) {
- if (cb->args[2] == first) {
+ if (cb->args[IPSET_CB_ARG0] == first) {
nla_nest_cancel(skb, atd);
return -EMSGSIZE;
} else
@@ -968,16 +969,16 @@ mtype_list(const struct ip_set *set,
}
ipset_nest_end(skb, atd);
/* Set listing finished */
- cb->args[2] = 0;
+ cb->args[IPSET_CB_ARG0] = 0;
return 0;
nla_put_failure:
nlmsg_trim(skb, incomplete);
- if (unlikely(first == cb->args[2])) {
+ if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
pr_warning("Can't list set %s: one bucket does not fit into "
"a message. Please report it!\n", set->name);
- cb->args[2] = 0;
+ cb->args[IPSET_CB_ARG0] = 0;
return -EMSGSIZE;
}
ipset_nest_end(skb, atd);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 426032706ca9..2bc2dec20b00 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -137,12 +137,11 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet4_elem e = {
- .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK,
- .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK,
- };
+ struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+ e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
@@ -160,14 +159,14 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK,
- .cidr[1] = HOST_MASK };
+ struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, last;
u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
u8 cidr, cidr2;
int ret;
+ e.cidr[0] = e.cidr[1] = HOST_MASK;
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
@@ -364,12 +363,11 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet6_elem e = {
- .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK,
- .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK
- };
+ struct hash_netnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+ e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK;
@@ -386,11 +384,11 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK,
- .cidr[1] = HOST_MASK };
+ struct hash_netnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
+ e.cidr[0] = e.cidr[1] = HOST_MASK;
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 363fab933d48..703d1192a6a2 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -147,12 +147,11 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet4_elem e = {
- .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
- .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK),
- };
+ struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+ e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
@@ -174,8 +173,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK,
- .cidr[1] = HOST_MASK };
+ struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
@@ -183,6 +181,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
u8 cidr, cidr2;
int ret;
+ e.cidr[0] = e.cidr[1] = HOST_MASK;
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -419,12 +418,11 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet6_elem e = {
- .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
- .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK),
- };
+ struct hash_netportnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+ e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
@@ -446,13 +444,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK,
- .cidr[1] = HOST_MASK };
+ struct hash_netportnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
int ret;
+ e.cidr[0] = e.cidr[1] = HOST_MASK;
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index ec6f6d15dded..3e2317f3cf68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -490,14 +490,15 @@ list_set_list(const struct ip_set *set,
{
const struct list_set *map = set->data;
struct nlattr *atd, *nested;
- u32 i, first = cb->args[2];
+ u32 i, first = cb->args[IPSET_CB_ARG0];
const struct set_elem *e;
atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!atd)
return -EMSGSIZE;
- for (; cb->args[2] < map->size; cb->args[2]++) {
- i = cb->args[2];
+ for (; cb->args[IPSET_CB_ARG0] < map->size;
+ cb->args[IPSET_CB_ARG0]++) {
+ i = cb->args[IPSET_CB_ARG0];
e = list_set_elem(set, map, i);
if (e->id == IPSET_INVALID_ID)
goto finish;
@@ -522,13 +523,13 @@ list_set_list(const struct ip_set *set,
finish:
ipset_nest_end(skb, atd);
/* Set listing finished */
- cb->args[2] = 0;
+ cb->args[IPSET_CB_ARG0] = 0;
return 0;
nla_put_failure:
nla_nest_cancel(skb, nested);
if (unlikely(i == first)) {
- cb->args[2] = 0;
+ cb->args[IPSET_CB_ARG0] = 0;
return -EMSGSIZE;
}
ipset_nest_end(skb, atd);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a3df9bddc4f7..62786a495cea 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -704,7 +704,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest)
__ip_vs_dst_cache_reset(dest);
__ip_vs_svc_put(svc, false);
free_percpu(dest->stats.cpustats);
- kfree(dest);
+ ip_vs_dest_put_and_free(dest);
}
/*
@@ -3820,10 +3820,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
- /* Some dest can be in grace period even before cleanup, we have to
- * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
- */
- rcu_barrier();
ip_vs_trash_cleanup(net);
ip_vs_stop_estimator(net, &ipvs->tot_stats);
ip_vs_control_net_cleanup_sysctl(net);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index eff13c94498e..ca056a331e60 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -136,7 +136,7 @@ static void ip_vs_lblc_rcu_free(struct rcu_head *head)
struct ip_vs_lblc_entry,
rcu_head);
- ip_vs_dest_put(en->dest);
+ ip_vs_dest_put_and_free(en->dest);
kfree(en);
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0b8550089a2e..3f21a2f47de1 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -130,7 +130,7 @@ static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
struct ip_vs_dest_set_elem *e;
e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
- ip_vs_dest_put(e->dest);
+ ip_vs_dest_put_and_free(e->dest);
kfree(e);
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 23e596e438b3..2f7ea7564044 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -20,13 +20,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
sctp_sctphdr_t *sh, _sctph;
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
- if (sh == NULL)
+ if (sh == NULL) {
+ *verdict = NF_DROP;
return 0;
+ }
sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
sizeof(_schunkh), &_schunkh);
- if (sch == NULL)
+ if (sch == NULL) {
+ *verdict = NF_DROP;
return 0;
+ }
+
net = skb_net(skb);
ipvs = net_ipvs(net);
rcu_read_lock();
@@ -76,6 +81,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
{
sctp_sctphdr_t *sctph;
unsigned int sctphoff = iph->len;
+ bool payload_csum = false;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6 && iph->fragoffs)
@@ -87,19 +93,31 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0;
if (unlikely(cp->app != NULL)) {
+ int ret;
+
/* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/* Call application helper if needed */
- if (!ip_vs_app_pkt_out(cp, skb))
+ ret = ip_vs_app_pkt_out(cp, skb);
+ if (ret == 0)
return 0;
+ /* ret=2: csum update is needed after payload mangling */
+ if (ret == 2)
+ payload_csum = true;
}
sctph = (void *) skb_network_header(skb) + sctphoff;
- sctph->source = cp->vport;
- sctp_nat_csum(skb, sctph, sctphoff);
+ /* Only update csum if we really have to */
+ if (sctph->source != cp->vport || payload_csum ||
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ sctph->source = cp->vport;
+ sctp_nat_csum(skb, sctph, sctphoff);
+ } else {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
return 1;
}
@@ -110,6 +128,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
{
sctp_sctphdr_t *sctph;
unsigned int sctphoff = iph->len;
+ bool payload_csum = false;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6 && iph->fragoffs)
@@ -121,19 +140,32 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0;
if (unlikely(cp->app != NULL)) {
+ int ret;
+
/* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/* Call application helper if needed */
- if (!ip_vs_app_pkt_in(cp, skb))
+ ret = ip_vs_app_pkt_in(cp, skb);
+ if (ret == 0)
return 0;
+ /* ret=2: csum update is needed after payload mangling */
+ if (ret == 2)
+ payload_csum = true;
}
sctph = (void *) skb_network_header(skb) + sctphoff;
- sctph->dest = cp->dport;
- sctp_nat_csum(skb, sctph, sctphoff);
+ /* Only update csum if we really have to */
+ if (sctph->dest != cp->dport || payload_csum ||
+ (skb->ip_summed == CHECKSUM_PARTIAL &&
+ !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
+ sctph->dest = cp->dport;
+ sctp_nat_csum(skb, sctph, sctphoff);
+ } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 3588faebe529..cc65b2f42cd4 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -115,27 +115,46 @@ ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
}
-/* As ip_vs_sh_get, but with fallback if selected server is unavailable */
+/* As ip_vs_sh_get, but with fallback if selected server is unavailable
+ *
+ * The fallback strategy loops around the table starting from a "random"
+ * point (in fact, it is chosen to be the original hash value to make the
+ * algorithm deterministic) to find a new server.
+ */
static inline struct ip_vs_dest *
ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
const union nf_inet_addr *addr, __be16 port)
{
- unsigned int offset;
- unsigned int hash;
+ unsigned int offset, roffset;
+ unsigned int hash, ihash;
struct ip_vs_dest *dest;
+ /* first try the dest it's supposed to go to */
+ ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+ dest = rcu_dereference(s->buckets[ihash].dest);
+ if (!dest)
+ return NULL;
+ if (!is_unavailable(dest))
+ return dest;
+
+ IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
+ IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+
+ /* if the original dest is unavailable, loop around the table
+ * starting from ihash to find a new dest
+ */
for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
- hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
+ roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE;
+ hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset);
dest = rcu_dereference(s->buckets[hash].dest);
if (!dest)
break;
- if (is_unavailable(dest))
- IP_VS_DBG_BUF(6, "SH: selected unavailable server "
- "%s:%d (offset %d)",
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
- ntohs(dest->port), offset);
- else
+ if (!is_unavailable(dest))
return dest;
+ IP_VS_DBG_BUF(6, "SH: selected unavailable "
+ "server %s:%d (offset %d), reselecting",
+ IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ ntohs(dest->port), roffset);
}
return NULL;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 2d3030ab5b61..a4b5e2a435ac 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -39,21 +39,23 @@ static struct ctl_table acct_sysctl_table[] = {
unsigned int
seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
{
- struct nf_conn_counter *acct;
+ struct nf_conn_acct *acct;
+ struct nf_conn_counter *counter;
acct = nf_conn_acct_find(ct);
if (!acct)
return 0;
+ counter = acct->counter;
return seq_printf(s, "packets=%llu bytes=%llu ",
- (unsigned long long)atomic64_read(&acct[dir].packets),
- (unsigned long long)atomic64_read(&acct[dir].bytes));
+ (unsigned long long)atomic64_read(&counter[dir].packets),
+ (unsigned long long)atomic64_read(&counter[dir].bytes));
};
EXPORT_SYMBOL_GPL(seq_print_acct);
static struct nf_ct_ext_type acct_extend __read_mostly = {
- .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]),
- .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]),
+ .len = sizeof(struct nf_conn_acct),
+ .align = __alignof__(struct nf_conn_acct),
.id = NF_CT_EXT_ACCT,
};
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5d892febd64c..e22d950c60b3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1109,12 +1109,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
acct:
if (do_acct) {
- struct nf_conn_counter *acct;
+ struct nf_conn_acct *acct;
acct = nf_conn_acct_find(ct);
if (acct) {
- atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
- atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes);
+ struct nf_conn_counter *counter = acct->counter;
+
+ atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
+ atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
}
}
}
@@ -1126,13 +1128,15 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
int do_acct)
{
if (do_acct) {
- struct nf_conn_counter *acct;
+ struct nf_conn_acct *acct;
acct = nf_conn_acct_find(ct);
if (acct) {
- atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
+ struct nf_conn_counter *counter = acct->counter;
+
+ atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
atomic64_add(skb->len - skb_network_offset(skb),
- &acct[CTINFO2DIR(ctinfo)].bytes);
+ &counter[CTINFO2DIR(ctinfo)].bytes);
}
}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index bdebd03bc8cd..70866d192efc 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src,
flowi6_to_flowi(&fl1), false)) {
if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
flowi6_to_flowi(&fl2), false)) {
- if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
- sizeof(rt1->rt6i_gateway)) &&
+ if (ipv6_addr_equal(rt6_nexthop(rt1),
+ rt6_nexthop(rt2)) &&
rt1->dst.dev == rt2->dst.dev)
ret = 1;
dst_release(&rt2->dst);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index eea936b70d15..08870b859046 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -211,13 +211,23 @@ nla_put_failure:
}
static int
-dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes,
- enum ip_conntrack_dir dir)
+dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct,
+ enum ip_conntrack_dir dir, int type)
{
- enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+ enum ctattr_type attr = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+ struct nf_conn_counter *counter = acct->counter;
struct nlattr *nest_count;
+ u64 pkts, bytes;
- nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
+ if (type == IPCTNL_MSG_CT_GET_CTRZERO) {
+ pkts = atomic64_xchg(&counter[dir].packets, 0);
+ bytes = atomic64_xchg(&counter[dir].bytes, 0);
+ } else {
+ pkts = atomic64_read(&counter[dir].packets);
+ bytes = atomic64_read(&counter[dir].bytes);
+ }
+
+ nest_count = nla_nest_start(skb, attr | NLA_F_NESTED);
if (!nest_count)
goto nla_put_failure;
@@ -234,24 +244,19 @@ nla_put_failure:
}
static int
-ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
- enum ip_conntrack_dir dir, int type)
+ctnetlink_dump_acct(struct sk_buff *skb, const struct nf_conn *ct, int type)
{
- struct nf_conn_counter *acct;
- u64 pkts, bytes;
+ struct nf_conn_acct *acct = nf_conn_acct_find(ct);
- acct = nf_conn_acct_find(ct);
if (!acct)
return 0;
- if (type == IPCTNL_MSG_CT_GET_CTRZERO) {
- pkts = atomic64_xchg(&acct[dir].packets, 0);
- bytes = atomic64_xchg(&acct[dir].bytes, 0);
- } else {
- pkts = atomic64_read(&acct[dir].packets);
- bytes = atomic64_read(&acct[dir].bytes);
- }
- return dump_counters(skb, pkts, bytes, dir);
+ if (dump_counters(skb, acct, IP_CT_DIR_ORIGINAL, type) < 0)
+ return -1;
+ if (dump_counters(skb, acct, IP_CT_DIR_REPLY, type) < 0)
+ return -1;
+
+ return 0;
}
static int
@@ -488,8 +493,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (ctnetlink_dump_status(skb, ct) < 0 ||
ctnetlink_dump_timeout(skb, ct) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 ||
+ ctnetlink_dump_acct(skb, ct, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0 ||
ctnetlink_dump_protoinfo(skb, ct) < 0 ||
ctnetlink_dump_helpinfo(skb, ct) < 0 ||
@@ -530,7 +534,7 @@ ctnetlink_proto_size(const struct nf_conn *ct)
}
static inline size_t
-ctnetlink_counters_size(const struct nf_conn *ct)
+ctnetlink_acct_size(const struct nf_conn *ct)
{
if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT))
return 0;
@@ -579,7 +583,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
+ 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
+ nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
+ nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
- + ctnetlink_counters_size(ct)
+ + ctnetlink_acct_size(ct)
+ ctnetlink_timestamp_size(ct)
+ nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
+ nla_total_size(0) /* CTA_PROTOINFO */
@@ -673,10 +677,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
goto nla_put_failure;
if (events & (1 << IPCT_DESTROY)) {
- if (ctnetlink_dump_counters(skb, ct,
- IP_CT_DIR_ORIGINAL, type) < 0 ||
- ctnetlink_dump_counters(skb, ct,
- IP_CT_DIR_REPLY, type) < 0 ||
+ if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0)
goto nla_put_failure;
} else {
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 3deec997be89..61a3c927e63c 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -13,26 +13,20 @@
/* core.c */
-extern unsigned int nf_iterate(struct list_head *head,
- struct sk_buff *skb,
- unsigned int hook,
- const struct net_device *indev,
- const struct net_device *outdev,
- struct nf_hook_ops **elemp,
- int (*okfn)(struct sk_buff *),
- int hook_thresh);
+unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
+ unsigned int hook, const struct net_device *indev,
+ const struct net_device *outdev,
+ struct nf_hook_ops **elemp,
+ int (*okfn)(struct sk_buff *), int hook_thresh);
/* nf_queue.c */
-extern int nf_queue(struct sk_buff *skb,
- struct nf_hook_ops *elem,
- u_int8_t pf, unsigned int hook,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *),
- unsigned int queuenum);
-extern int __init netfilter_queue_init(void);
+int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf,
+ unsigned int hook, struct net_device *indev,
+ struct net_device *outdev, int (*okfn)(struct sk_buff *),
+ unsigned int queuenum);
+int __init netfilter_queue_init(void);
/* nf_log.c */
-extern int __init netfilter_log_init(void);
+int __init netfilter_log_init(void);
#endif
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8b03028cca69..227aa11e8409 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -845,8 +845,13 @@ xt_replace_table(struct xt_table *table,
return NULL;
}
- table->private = newinfo;
newinfo->initial_entries = private->initial_entries;
+ /*
+ * Ensure contents of newinfo are visible before assigning to
+ * private.
+ */
+ smp_wmb();
+ table->private = newinfo;
/*
* Even though table entries have now been swapped, other CPU's
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 1e2fae32f81b..ed00fef58996 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -147,6 +147,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_NFQ_info_v3 *info = par->targinfo;
u32 queue = info->queuenum;
+ int ret;
if (info->queues_total > 1) {
if (info->flags & NFQ_FLAG_CPU_FANOUT) {
@@ -157,7 +158,11 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
queue = nfqueue_hash(skb, par);
}
- return NF_QUEUE_NR(queue);
+ ret = NF_QUEUE_NR(queue);
+ if (info->flags & NFQ_FLAG_BYPASS)
+ ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+ return ret;
}
static struct xt_target nfqueue_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index e595e07a759b..1e634615ab9d 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -26,16 +26,18 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
u_int64_t what = 0; /* initialize to make gcc happy */
u_int64_t bytes = 0;
u_int64_t pkts = 0;
+ const struct nf_conn_acct *acct;
const struct nf_conn_counter *counters;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return false;
- counters = nf_conn_acct_find(ct);
- if (!counters)
+ acct = nf_conn_acct_find(ct);
+ if (!acct)
return false;
+ counters = acct->counter;
switch (sinfo->what) {
case XT_CONNBYTES_PKTS:
switch (sinfo->direction) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 3dd0e374bc2b..1ba67931eb1b 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -35,15 +35,6 @@
#include <net/netfilter/nf_conntrack.h>
#endif
-static void
-xt_socket_put_sk(struct sock *sk)
-{
- if (sk->sk_state == TCP_TIME_WAIT)
- inet_twsk_put(inet_twsk(sk));
- else
- sock_put(sk);
-}
-
static int
extract_icmp4_fields(const struct sk_buff *skb,
u8 *protocol,
@@ -216,7 +207,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
inet_twsk(sk)->tw_transparent));
if (sk != skb->sk)
- xt_socket_put_sk(sk);
+ sock_gen_put(sk);
if (wildcard || !transparent)
sk = NULL;
@@ -381,7 +372,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
inet_twsk(sk)->tw_transparent));
if (sk != skb->sk)
- xt_socket_put_sk(sk);
+ sock_gen_put(sk);
if (wildcard || !transparent)
sk = NULL;
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index ea36e99089af..3591cb5dae91 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -9,6 +9,8 @@ openvswitch-y := \
datapath.o \
dp_notify.o \
flow.o \
+ flow_netlink.o \
+ flow_table.o \
vport.o \
vport-internal_dev.o \
vport-netdev.o
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 2aa13bd7f2b2..1408adc2a2a7 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,14 +55,10 @@
#include "datapath.h"
#include "flow.h"
+#include "flow_netlink.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
-
-#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
-static void rehash_flow_table(struct work_struct *work);
-static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
-
int ovs_net_id __read_mostly;
static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
@@ -165,7 +161,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
- ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);
+ ovs_flow_tbl_destroy(&dp->table);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@@ -225,6 +221,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
struct dp_stats_percpu *stats;
struct sw_flow_key key;
u64 *stats_counter;
+ u32 n_mask_hit;
int error;
stats = this_cpu_ptr(dp->stats_percpu);
@@ -237,7 +234,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
}
/* Look up flow. */
- flow = ovs_flow_lookup(rcu_dereference(dp->table), &key);
+ flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
@@ -262,6 +259,7 @@ out:
/* Update datapath statistics. */
u64_stats_update_begin(&stats->sync);
(*stats_counter)++;
+ stats->n_mask_hit += n_mask_hit;
u64_stats_update_end(&stats->sync);
}
@@ -435,7 +433,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
+ ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
nla_nest_end(user_skb, nla);
if (upcall_info->userdata)
@@ -455,398 +453,6 @@ out:
return err;
}
-/* Called with ovs_mutex. */
-static int flush_flows(struct datapath *dp)
-{
- struct flow_table *old_table;
- struct flow_table *new_table;
-
- old_table = ovsl_dereference(dp->table);
- new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
- if (!new_table)
- return -ENOMEM;
-
- rcu_assign_pointer(dp->table, new_table);
-
- ovs_flow_tbl_destroy(old_table, true);
- return 0;
-}
-
-static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
-{
-
- struct sw_flow_actions *acts;
- int new_acts_size;
- int req_size = NLA_ALIGN(attr_len);
- int next_offset = offsetof(struct sw_flow_actions, actions) +
- (*sfa)->actions_len;
-
- if (req_size <= (ksize(*sfa) - next_offset))
- goto out;
-
- new_acts_size = ksize(*sfa) * 2;
-
- if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
- return ERR_PTR(-EMSGSIZE);
- new_acts_size = MAX_ACTIONS_BUFSIZE;
- }
-
- acts = ovs_flow_actions_alloc(new_acts_size);
- if (IS_ERR(acts))
- return (void *)acts;
-
- memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
- acts->actions_len = (*sfa)->actions_len;
- kfree(*sfa);
- *sfa = acts;
-
-out:
- (*sfa)->actions_len += req_size;
- return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
-}
-
-static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
-{
- struct nlattr *a;
-
- a = reserve_sfa_size(sfa, nla_attr_size(len));
- if (IS_ERR(a))
- return PTR_ERR(a);
-
- a->nla_type = attrtype;
- a->nla_len = nla_attr_size(len);
-
- if (data)
- memcpy(nla_data(a), data, len);
- memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
-
- return 0;
-}
-
-static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
-{
- int used = (*sfa)->actions_len;
- int err;
-
- err = add_action(sfa, attrtype, NULL, 0);
- if (err)
- return err;
-
- return used;
-}
-
-static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
-{
- struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
-
- a->nla_len = sfa->actions_len - st_offset;
-}
-
-static int validate_and_copy_actions(const struct nlattr *attr,
- const struct sw_flow_key *key, int depth,
- struct sw_flow_actions **sfa);
-
-static int validate_and_copy_sample(const struct nlattr *attr,
- const struct sw_flow_key *key, int depth,
- struct sw_flow_actions **sfa)
-{
- const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
- const struct nlattr *probability, *actions;
- const struct nlattr *a;
- int rem, start, err, st_acts;
-
- memset(attrs, 0, sizeof(attrs));
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
- return -EINVAL;
- attrs[type] = a;
- }
- if (rem)
- return -EINVAL;
-
- probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
- if (!probability || nla_len(probability) != sizeof(u32))
- return -EINVAL;
-
- actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
- if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
- return -EINVAL;
-
- /* validation done, copy sample action. */
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
- if (start < 0)
- return start;
- err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
- if (err)
- return err;
- st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
- if (st_acts < 0)
- return st_acts;
-
- err = validate_and_copy_actions(actions, key, depth + 1, sfa);
- if (err)
- return err;
-
- add_nested_action_end(*sfa, st_acts);
- add_nested_action_end(*sfa, start);
-
- return 0;
-}
-
-static int validate_tp_port(const struct sw_flow_key *flow_key)
-{
- if (flow_key->eth.type == htons(ETH_P_IP)) {
- if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
- return 0;
- } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
- if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
- return 0;
- }
-
- return -EINVAL;
-}
-
-static int validate_and_copy_set_tun(const struct nlattr *attr,
- struct sw_flow_actions **sfa)
-{
- struct sw_flow_match match;
- struct sw_flow_key key;
- int err, start;
-
- ovs_match_init(&match, &key, NULL);
- err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false);
- if (err)
- return err;
-
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
- if (start < 0)
- return start;
-
- err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
- sizeof(match.key->tun_key));
- add_nested_action_end(*sfa, start);
-
- return err;
-}
-
-static int validate_set(const struct nlattr *a,
- const struct sw_flow_key *flow_key,
- struct sw_flow_actions **sfa,
- bool *set_tun)
-{
- const struct nlattr *ovs_key = nla_data(a);
- int key_type = nla_type(ovs_key);
-
- /* There can be only one key in a action */
- if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
- return -EINVAL;
-
- if (key_type > OVS_KEY_ATTR_MAX ||
- (ovs_key_lens[key_type] != nla_len(ovs_key) &&
- ovs_key_lens[key_type] != -1))
- return -EINVAL;
-
- switch (key_type) {
- const struct ovs_key_ipv4 *ipv4_key;
- const struct ovs_key_ipv6 *ipv6_key;
- int err;
-
- case OVS_KEY_ATTR_PRIORITY:
- case OVS_KEY_ATTR_SKB_MARK:
- case OVS_KEY_ATTR_ETHERNET:
- break;
-
- case OVS_KEY_ATTR_TUNNEL:
- *set_tun = true;
- err = validate_and_copy_set_tun(a, sfa);
- if (err)
- return err;
- break;
-
- case OVS_KEY_ATTR_IPV4:
- if (flow_key->eth.type != htons(ETH_P_IP))
- return -EINVAL;
-
- if (!flow_key->ip.proto)
- return -EINVAL;
-
- ipv4_key = nla_data(ovs_key);
- if (ipv4_key->ipv4_proto != flow_key->ip.proto)
- return -EINVAL;
-
- if (ipv4_key->ipv4_frag != flow_key->ip.frag)
- return -EINVAL;
-
- break;
-
- case OVS_KEY_ATTR_IPV6:
- if (flow_key->eth.type != htons(ETH_P_IPV6))
- return -EINVAL;
-
- if (!flow_key->ip.proto)
- return -EINVAL;
-
- ipv6_key = nla_data(ovs_key);
- if (ipv6_key->ipv6_proto != flow_key->ip.proto)
- return -EINVAL;
-
- if (ipv6_key->ipv6_frag != flow_key->ip.frag)
- return -EINVAL;
-
- if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
- return -EINVAL;
-
- break;
-
- case OVS_KEY_ATTR_TCP:
- if (flow_key->ip.proto != IPPROTO_TCP)
- return -EINVAL;
-
- return validate_tp_port(flow_key);
-
- case OVS_KEY_ATTR_UDP:
- if (flow_key->ip.proto != IPPROTO_UDP)
- return -EINVAL;
-
- return validate_tp_port(flow_key);
-
- case OVS_KEY_ATTR_SCTP:
- if (flow_key->ip.proto != IPPROTO_SCTP)
- return -EINVAL;
-
- return validate_tp_port(flow_key);
-
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int validate_userspace(const struct nlattr *attr)
-{
- static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
- [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
- [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
- };
- struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
- int error;
-
- error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
- attr, userspace_policy);
- if (error)
- return error;
-
- if (!a[OVS_USERSPACE_ATTR_PID] ||
- !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
- return -EINVAL;
-
- return 0;
-}
-
-static int copy_action(const struct nlattr *from,
- struct sw_flow_actions **sfa)
-{
- int totlen = NLA_ALIGN(from->nla_len);
- struct nlattr *to;
-
- to = reserve_sfa_size(sfa, from->nla_len);
- if (IS_ERR(to))
- return PTR_ERR(to);
-
- memcpy(to, from, totlen);
- return 0;
-}
-
-static int validate_and_copy_actions(const struct nlattr *attr,
- const struct sw_flow_key *key,
- int depth,
- struct sw_flow_actions **sfa)
-{
- const struct nlattr *a;
- int rem, err;
-
- if (depth >= SAMPLE_ACTION_DEPTH)
- return -EOVERFLOW;
-
- nla_for_each_nested(a, attr, rem) {
- /* Expected argument lengths, (u32)-1 for variable length. */
- static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
- [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
- [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
- [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
- [OVS_ACTION_ATTR_POP_VLAN] = 0,
- [OVS_ACTION_ATTR_SET] = (u32)-1,
- [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
- };
- const struct ovs_action_push_vlan *vlan;
- int type = nla_type(a);
- bool skip_copy;
-
- if (type > OVS_ACTION_ATTR_MAX ||
- (action_lens[type] != nla_len(a) &&
- action_lens[type] != (u32)-1))
- return -EINVAL;
-
- skip_copy = false;
- switch (type) {
- case OVS_ACTION_ATTR_UNSPEC:
- return -EINVAL;
-
- case OVS_ACTION_ATTR_USERSPACE:
- err = validate_userspace(a);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_OUTPUT:
- if (nla_get_u32(a) >= DP_MAX_PORTS)
- return -EINVAL;
- break;
-
-
- case OVS_ACTION_ATTR_POP_VLAN:
- break;
-
- case OVS_ACTION_ATTR_PUSH_VLAN:
- vlan = nla_data(a);
- if (vlan->vlan_tpid != htons(ETH_P_8021Q))
- return -EINVAL;
- if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
- return -EINVAL;
- break;
-
- case OVS_ACTION_ATTR_SET:
- err = validate_set(a, key, sfa, &skip_copy);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(a, key, depth, sfa);
- if (err)
- return err;
- skip_copy = true;
- break;
-
- default:
- return -EINVAL;
- }
- if (!skip_copy) {
- err = copy_action(a, sfa);
- if (err)
- return err;
- }
- }
-
- if (rem > 0)
- return -EINVAL;
-
- return 0;
-}
-
static void clear_stats(struct sw_flow *flow)
{
flow->used = 0;
@@ -902,15 +508,16 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (err)
goto err_flow_free;
- err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]);
+ err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);
if (err)
goto err_flow_free;
- acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
+ acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
err = PTR_ERR(acts);
if (IS_ERR(acts))
goto err_flow_free;
- err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
+ err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
+ &flow->key, 0, &acts);
rcu_assign_pointer(flow->sf_acts, acts);
if (err)
goto err_flow_free;
@@ -958,15 +565,18 @@ static struct genl_ops dp_packet_genl_ops[] = {
}
};
-static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
+static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
+ struct ovs_dp_megaflow_stats *mega_stats)
{
- struct flow_table *table;
int i;
- table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held());
- stats->n_flows = ovs_flow_tbl_count(table);
+ memset(mega_stats, 0, sizeof(*mega_stats));
+
+ stats->n_flows = ovs_flow_tbl_count(&dp->table);
+ mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
stats->n_hit = stats->n_missed = stats->n_lost = 0;
+
for_each_possible_cpu(i) {
const struct dp_stats_percpu *percpu_stats;
struct dp_stats_percpu local_stats;
@@ -982,6 +592,7 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
stats->n_hit += local_stats.n_hit;
stats->n_missed += local_stats.n_missed;
stats->n_lost += local_stats.n_lost;
+ mega_stats->n_mask_hit += local_stats.n_mask_hit;
}
}
@@ -1005,100 +616,6 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = {
.name = OVS_FLOW_MCGROUP
};
-static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
-static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
-{
- const struct nlattr *a;
- struct nlattr *start;
- int err = 0, rem;
-
- start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
- if (!start)
- return -EMSGSIZE;
-
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- struct nlattr *st_sample;
-
- switch (type) {
- case OVS_SAMPLE_ATTR_PROBABILITY:
- if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
- return -EMSGSIZE;
- break;
- case OVS_SAMPLE_ATTR_ACTIONS:
- st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
- if (!st_sample)
- return -EMSGSIZE;
- err = actions_to_attr(nla_data(a), nla_len(a), skb);
- if (err)
- return err;
- nla_nest_end(skb, st_sample);
- break;
- }
- }
-
- nla_nest_end(skb, start);
- return err;
-}
-
-static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
-{
- const struct nlattr *ovs_key = nla_data(a);
- int key_type = nla_type(ovs_key);
- struct nlattr *start;
- int err;
-
- switch (key_type) {
- case OVS_KEY_ATTR_IPV4_TUNNEL:
- start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
- if (!start)
- return -EMSGSIZE;
-
- err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
- nla_data(ovs_key));
- if (err)
- return err;
- nla_nest_end(skb, start);
- break;
- default:
- if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
- return -EMSGSIZE;
- break;
- }
-
- return 0;
-}
-
-static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
-{
- const struct nlattr *a;
- int rem, err;
-
- nla_for_each_attr(a, attr, len, rem) {
- int type = nla_type(a);
-
- switch (type) {
- case OVS_ACTION_ATTR_SET:
- err = set_action_to_attr(a, skb);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_SAMPLE:
- err = sample_action_to_attr(a, skb);
- if (err)
- return err;
- break;
- default:
- if (nla_put(skb, type, nla_len(a), nla_data(a)))
- return -EMSGSIZE;
- break;
- }
- }
-
- return 0;
-}
-
static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
{
return NLMSG_ALIGN(sizeof(struct ovs_header))
@@ -1135,8 +652,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
if (!nla)
goto nla_put_failure;
- err = ovs_flow_to_nlattrs(&flow->unmasked_key,
- &flow->unmasked_key, skb);
+ err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
if (err)
goto error;
nla_nest_end(skb, nla);
@@ -1145,7 +661,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
if (!nla)
goto nla_put_failure;
- err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb);
+ err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
if (err)
goto error;
@@ -1155,7 +671,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
used = flow->used;
stats.n_packets = flow->packet_count;
stats.n_bytes = flow->byte_count;
- tcp_flags = flow->tcp_flags;
+ tcp_flags = (u8)ntohs(flow->tcp_flags);
spin_unlock_bh(&flow->lock);
if (used &&
@@ -1188,7 +704,8 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
sf_acts = rcu_dereference_check(flow->sf_acts,
lockdep_ovsl_is_held());
- err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
+ err = ovs_nla_put_actions(sf_acts->actions,
+ sf_acts->actions_len, skb);
if (!err)
nla_nest_end(skb, start);
else {
@@ -1234,6 +751,14 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
return skb;
}
+static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl,
+ const struct sw_flow_key *key)
+{
+ u32 __always_unused n_mask_hit;
+
+ return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit);
+}
+
static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
@@ -1243,7 +768,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_mask mask;
struct sk_buff *reply;
struct datapath *dp;
- struct flow_table *table;
struct sw_flow_actions *acts = NULL;
struct sw_flow_match match;
int error;
@@ -1254,21 +778,21 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto error;
ovs_match_init(&match, &key, &mask);
- error = ovs_match_from_nlattrs(&match,
- a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+ error = ovs_nla_get_match(&match,
+ a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error)
goto error;
/* Validate actions. */
if (a[OVS_FLOW_ATTR_ACTIONS]) {
- acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
+ acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
error = PTR_ERR(acts);
if (IS_ERR(acts))
goto error;
- ovs_flow_key_mask(&masked_key, &key, &mask);
- error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
- &masked_key, 0, &acts);
+ ovs_flow_mask_key(&masked_key, &key, &mask);
+ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
+ &masked_key, 0, &acts);
if (error) {
OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
goto err_kfree;
@@ -1284,29 +808,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
if (!dp)
goto err_unlock_ovs;
- table = ovsl_dereference(dp->table);
-
/* Check if this is a duplicate flow */
- flow = ovs_flow_lookup(table, &key);
+ flow = __ovs_flow_tbl_lookup(&dp->table, &key);
if (!flow) {
- struct sw_flow_mask *mask_p;
/* Bail out if we're not allowed to create a new flow. */
error = -ENOENT;
if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
goto err_unlock_ovs;
- /* Expand table, if necessary, to make room. */
- if (ovs_flow_tbl_need_to_expand(table)) {
- struct flow_table *new_table;
-
- new_table = ovs_flow_tbl_expand(table);
- if (!IS_ERR(new_table)) {
- rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_destroy(table, true);
- table = ovsl_dereference(dp->table);
- }
- }
-
/* Allocate flow. */
flow = ovs_flow_alloc();
if (IS_ERR(flow)) {
@@ -1317,25 +826,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
flow->key = masked_key;
flow->unmasked_key = key;
-
- /* Make sure mask is unique in the system */
- mask_p = ovs_sw_flow_mask_find(table, &mask);
- if (!mask_p) {
- /* Allocate a new mask if none exsits. */
- mask_p = ovs_sw_flow_mask_alloc();
- if (!mask_p)
- goto err_flow_free;
- mask_p->key = mask.key;
- mask_p->range = mask.range;
- ovs_sw_flow_mask_insert(table, mask_p);
- }
-
- ovs_sw_flow_mask_add_ref(mask_p);
- flow->mask = mask_p;
rcu_assign_pointer(flow->sf_acts, acts);
/* Put flow in bucket. */
- ovs_flow_insert(table, flow);
+ error = ovs_flow_tbl_insert(&dp->table, flow, &mask);
+ if (error) {
+ acts = NULL;
+ goto err_flow_free;
+ }
reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
info->snd_seq, OVS_FLOW_CMD_NEW);
@@ -1356,7 +854,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
/* The unmasked key has to be the same for flow updates. */
error = -EINVAL;
- if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) {
+ if (!ovs_flow_cmp_unmasked_key(flow, &match)) {
OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
goto err_unlock_ovs;
}
@@ -1364,7 +862,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
rcu_assign_pointer(flow->sf_acts, acts);
- ovs_flow_deferred_free_acts(old_acts);
+ ovs_nla_free_flow_actions(old_acts);
reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
info->snd_seq, OVS_FLOW_CMD_NEW);
@@ -1403,7 +901,6 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *reply;
struct sw_flow *flow;
struct datapath *dp;
- struct flow_table *table;
struct sw_flow_match match;
int err;
@@ -1413,7 +910,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
}
ovs_match_init(&match, &key, NULL);
- err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
return err;
@@ -1424,9 +921,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- table = ovsl_dereference(dp->table);
- flow = ovs_flow_lookup_unmasked_key(table, &match);
- if (!flow) {
+ flow = __ovs_flow_tbl_lookup(&dp->table, &key);
+ if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
err = -ENOENT;
goto unlock;
}
@@ -1453,7 +949,6 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *reply;
struct sw_flow *flow;
struct datapath *dp;
- struct flow_table *table;
struct sw_flow_match match;
int err;
@@ -1465,18 +960,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
}
if (!a[OVS_FLOW_ATTR_KEY]) {
- err = flush_flows(dp);
+ err = ovs_flow_tbl_flush(&dp->table);
goto unlock;
}
ovs_match_init(&match, &key, NULL);
- err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
goto unlock;
- table = ovsl_dereference(dp->table);
- flow = ovs_flow_lookup_unmasked_key(table, &match);
- if (!flow) {
+ flow = __ovs_flow_tbl_lookup(&dp->table, &key);
+ if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
err = -ENOENT;
goto unlock;
}
@@ -1487,7 +981,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- ovs_flow_remove(table, flow);
+ ovs_flow_tbl_remove(&dp->table, flow);
err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_FLOW_CMD_DEL);
@@ -1506,8 +1000,8 @@ unlock:
static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+ struct table_instance *ti;
struct datapath *dp;
- struct flow_table *table;
rcu_read_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1516,14 +1010,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
return -ENODEV;
}
- table = rcu_dereference(dp->table);
+ ti = rcu_dereference(dp->table.ti);
for (;;) {
struct sw_flow *flow;
u32 bucket, obj;
bucket = cb->args[0];
obj = cb->args[1];
- flow = ovs_flow_dump_next(table, &bucket, &obj);
+ flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
if (!flow)
break;
@@ -1589,6 +1083,7 @@ static size_t ovs_dp_cmd_msg_size(void)
msgsize += nla_total_size(IFNAMSIZ);
msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
+ msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
return msgsize;
}
@@ -1598,6 +1093,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
{
struct ovs_header *ovs_header;
struct ovs_dp_stats dp_stats;
+ struct ovs_dp_megaflow_stats dp_megaflow_stats;
int err;
ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
@@ -1613,8 +1109,14 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
if (err)
goto nla_put_failure;
- get_dp_stats(dp, &dp_stats);
- if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
+ get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
+ if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
+ &dp_stats))
+ goto nla_put_failure;
+
+ if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
+ sizeof(struct ovs_dp_megaflow_stats),
+ &dp_megaflow_stats))
goto nla_put_failure;
return genlmsg_end(skb, ovs_header);
@@ -1687,9 +1189,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
/* Allocate table. */
- err = -ENOMEM;
- rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
- if (!dp->table)
+ err = ovs_flow_tbl_init(&dp->table);
+ if (err)
goto err_free_dp;
dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
@@ -1699,7 +1200,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
- GFP_KERNEL);
+ GFP_KERNEL);
if (!dp->ports) {
err = -ENOMEM;
goto err_destroy_percpu;
@@ -1746,7 +1247,7 @@ err_destroy_ports_array:
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false);
+ ovs_flow_tbl_destroy(&dp->table);
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
@@ -2336,32 +1837,6 @@ error:
return err;
}
-static void rehash_flow_table(struct work_struct *work)
-{
- struct datapath *dp;
- struct net *net;
-
- ovs_lock();
- rtnl_lock();
- for_each_net(net) {
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- list_for_each_entry(dp, &ovs_net->dps, list_node) {
- struct flow_table *old_table = ovsl_dereference(dp->table);
- struct flow_table *new_table;
-
- new_table = ovs_flow_tbl_rehash(old_table);
- if (!IS_ERR(new_table)) {
- rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_destroy(old_table, true);
- }
- }
- }
- rtnl_unlock();
- ovs_unlock();
- schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
-}
-
static int __net_init ovs_init_net(struct net *net)
{
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
@@ -2419,8 +1894,6 @@ static int __init dp_init(void)
if (err < 0)
goto error_unreg_notifier;
- schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
-
return 0;
error_unreg_notifier:
@@ -2437,7 +1910,6 @@ error:
static void dp_cleanup(void)
{
- cancel_delayed_work_sync(&rehash_flow_wq);
dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 4d109c176ef3..d3d14a58aa91 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -27,6 +27,7 @@
#include <linux/u64_stats_sync.h>
#include "flow.h"
+#include "flow_table.h"
#include "vport.h"
#define DP_MAX_PORTS USHRT_MAX
@@ -45,11 +46,15 @@
* @n_lost: Number of received packets that had no matching flow in the flow
* table that could not be sent to userspace (normally due to an overflow in
* one of the datapath's queues).
+ * @n_mask_hit: Number of masks looked up for flow match.
+ * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked
+ * up per packet.
*/
struct dp_stats_percpu {
u64 n_hit;
u64 n_missed;
u64 n_lost;
+ u64 n_mask_hit;
struct u64_stats_sync sync;
};
@@ -57,7 +62,7 @@ struct dp_stats_percpu {
* struct datapath - datapath for flow-based packet switching
* @rcu: RCU callback head for deferred destruction.
* @list_node: Element in global 'dps' list.
- * @table: Current flow table. Protected by ovs_mutex and RCU.
+ * @table: flow table.
* @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
* ovs_mutex and RCU.
* @stats_percpu: Per-CPU datapath statistics.
@@ -71,7 +76,7 @@ struct datapath {
struct list_head list_node;
/* Flow table. */
- struct flow_table __rcu *table;
+ struct flow_table table;
/* Switch ports. */
struct hlist_head *ports;
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index c3235675f359..5c2dab276109 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -65,8 +65,7 @@ void ovs_dp_notify_wq(struct work_struct *work)
continue;
netdev_vport = netdev_vport_priv(vport);
- if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
- netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
+ if (!(netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH))
dp_detach_port_notify(vport);
}
}
@@ -88,6 +87,10 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
return NOTIFY_DONE;
if (event == NETDEV_UNREGISTER) {
+ /* upper_dev_unlink and decrement promisc immediately */
+ ovs_netdev_detach_dev(vport);
+
+ /* schedule vport destroy, dev_put and genl notification */
ovs_net = net_generic(dev_net(dev), ovs_net_id);
queue_work(system_wq, &ovs_net->dp_notify_work);
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 410db90db73d..b409f5279601 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -45,202 +45,38 @@
#include <net/ipv6.h>
#include <net/ndisc.h>
-static struct kmem_cache *flow_cache;
-
-static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
- struct sw_flow_key_range *range, u8 val);
-
-static void update_range__(struct sw_flow_match *match,
- size_t offset, size_t size, bool is_mask)
+u64 ovs_flow_used_time(unsigned long flow_jiffies)
{
- struct sw_flow_key_range *range = NULL;
- size_t start = rounddown(offset, sizeof(long));
- size_t end = roundup(offset + size, sizeof(long));
-
- if (!is_mask)
- range = &match->range;
- else if (match->mask)
- range = &match->mask->range;
-
- if (!range)
- return;
-
- if (range->start == range->end) {
- range->start = start;
- range->end = end;
- return;
- }
-
- if (range->start > start)
- range->start = start;
+ struct timespec cur_ts;
+ u64 cur_ms, idle_ms;
- if (range->end < end)
- range->end = end;
-}
+ ktime_get_ts(&cur_ts);
+ idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
+ cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+ cur_ts.tv_nsec / NSEC_PER_MSEC;
-#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
- do { \
- update_range__(match, offsetof(struct sw_flow_key, field), \
- sizeof((match)->key->field), is_mask); \
- if (is_mask) { \
- if ((match)->mask) \
- (match)->mask->key.field = value; \
- } else { \
- (match)->key->field = value; \
- } \
- } while (0)
-
-#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
- do { \
- update_range__(match, offsetof(struct sw_flow_key, field), \
- len, is_mask); \
- if (is_mask) { \
- if ((match)->mask) \
- memcpy(&(match)->mask->key.field, value_p, len);\
- } else { \
- memcpy(&(match)->key->field, value_p, len); \
- } \
- } while (0)
-
-static u16 range_n_bytes(const struct sw_flow_key_range *range)
-{
- return range->end - range->start;
+ return cur_ms - idle_ms;
}
-void ovs_match_init(struct sw_flow_match *match,
- struct sw_flow_key *key,
- struct sw_flow_mask *mask)
-{
- memset(match, 0, sizeof(*match));
- match->key = key;
- match->mask = mask;
+#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
- memset(key, 0, sizeof(*key));
-
- if (mask) {
- memset(&mask->key, 0, sizeof(mask->key));
- mask->range.start = mask->range.end = 0;
- }
-}
-
-static bool ovs_match_validate(const struct sw_flow_match *match,
- u64 key_attrs, u64 mask_attrs)
+void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
{
- u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
- u64 mask_allowed = key_attrs; /* At most allow all key attributes */
-
- /* The following mask attributes allowed only if they
- * pass the validation tests. */
- mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
- | (1 << OVS_KEY_ATTR_IPV6)
- | (1 << OVS_KEY_ATTR_TCP)
- | (1 << OVS_KEY_ATTR_UDP)
- | (1 << OVS_KEY_ATTR_SCTP)
- | (1 << OVS_KEY_ATTR_ICMP)
- | (1 << OVS_KEY_ATTR_ICMPV6)
- | (1 << OVS_KEY_ATTR_ARP)
- | (1 << OVS_KEY_ATTR_ND));
-
- /* Always allowed mask fields. */
- mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
- | (1 << OVS_KEY_ATTR_IN_PORT)
- | (1 << OVS_KEY_ATTR_ETHERTYPE));
-
- /* Check key attributes. */
- if (match->key->eth.type == htons(ETH_P_ARP)
- || match->key->eth.type == htons(ETH_P_RARP)) {
- key_expected |= 1 << OVS_KEY_ATTR_ARP;
- if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
- mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
- }
+ __be16 tcp_flags = 0;
- if (match->key->eth.type == htons(ETH_P_IP)) {
- key_expected |= 1 << OVS_KEY_ATTR_IPV4;
- if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
- mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
-
- if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
- if (match->key->ip.proto == IPPROTO_UDP) {
- key_expected |= 1 << OVS_KEY_ATTR_UDP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
- }
-
- if (match->key->ip.proto == IPPROTO_SCTP) {
- key_expected |= 1 << OVS_KEY_ATTR_SCTP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
- }
-
- if (match->key->ip.proto == IPPROTO_TCP) {
- key_expected |= 1 << OVS_KEY_ATTR_TCP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
- }
-
- if (match->key->ip.proto == IPPROTO_ICMP) {
- key_expected |= 1 << OVS_KEY_ATTR_ICMP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
- }
- }
- }
-
- if (match->key->eth.type == htons(ETH_P_IPV6)) {
- key_expected |= 1 << OVS_KEY_ATTR_IPV6;
- if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
- mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
-
- if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
- if (match->key->ip.proto == IPPROTO_UDP) {
- key_expected |= 1 << OVS_KEY_ATTR_UDP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
- }
-
- if (match->key->ip.proto == IPPROTO_SCTP) {
- key_expected |= 1 << OVS_KEY_ATTR_SCTP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
- }
-
- if (match->key->ip.proto == IPPROTO_TCP) {
- key_expected |= 1 << OVS_KEY_ATTR_TCP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
- }
-
- if (match->key->ip.proto == IPPROTO_ICMPV6) {
- key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
-
- if (match->key->ipv6.tp.src ==
- htons(NDISC_NEIGHBOUR_SOLICITATION) ||
- match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
- key_expected |= 1 << OVS_KEY_ATTR_ND;
- if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
- mask_allowed |= 1 << OVS_KEY_ATTR_ND;
- }
- }
- }
- }
-
- if ((key_attrs & key_expected) != key_expected) {
- /* Key attributes check failed. */
- OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
- key_attrs, key_expected);
- return false;
- }
-
- if ((mask_attrs & mask_allowed) != mask_attrs) {
- /* Mask attributes check failed. */
- OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
- mask_attrs, mask_allowed);
- return false;
+ if ((flow->key.eth.type == htons(ETH_P_IP) ||
+ flow->key.eth.type == htons(ETH_P_IPV6)) &&
+ flow->key.ip.proto == IPPROTO_TCP &&
+ likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
+ tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
}
- return true;
+ spin_lock(&flow->lock);
+ flow->used = jiffies;
+ flow->packet_count++;
+ flow->byte_count += skb->len;
+ flow->tcp_flags |= tcp_flags;
+ spin_unlock(&flow->lock);
}
static int check_header(struct sk_buff *skb, int len)
@@ -311,19 +147,6 @@ static bool icmphdr_ok(struct sk_buff *skb)
sizeof(struct icmphdr));
}
-u64 ovs_flow_used_time(unsigned long flow_jiffies)
-{
- struct timespec cur_ts;
- u64 cur_ms, idle_ms;
-
- ktime_get_ts(&cur_ts);
- idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
- cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
- cur_ts.tv_nsec / NSEC_PER_MSEC;
-
- return cur_ms - idle_ms;
-}
-
static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
{
unsigned int nh_ofs = skb_network_offset(skb);
@@ -372,311 +195,6 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
sizeof(struct icmp6hdr));
}
-void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
- const struct sw_flow_mask *mask)
-{
- const long *m = (long *)((u8 *)&mask->key + mask->range.start);
- const long *s = (long *)((u8 *)src + mask->range.start);
- long *d = (long *)((u8 *)dst + mask->range.start);
- int i;
-
- /* The memory outside of the 'mask->range' are not set since
- * further operations on 'dst' only uses contents within
- * 'mask->range'.
- */
- for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
- *d++ = *s++ & *m++;
-}
-
-#define TCP_FLAGS_OFFSET 13
-#define TCP_FLAG_MASK 0x3f
-
-void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
-{
- u8 tcp_flags = 0;
-
- if ((flow->key.eth.type == htons(ETH_P_IP) ||
- flow->key.eth.type == htons(ETH_P_IPV6)) &&
- flow->key.ip.proto == IPPROTO_TCP &&
- likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
- u8 *tcp = (u8 *)tcp_hdr(skb);
- tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
- }
-
- spin_lock(&flow->lock);
- flow->used = jiffies;
- flow->packet_count++;
- flow->byte_count += skb->len;
- flow->tcp_flags |= tcp_flags;
- spin_unlock(&flow->lock);
-}
-
-struct sw_flow_actions *ovs_flow_actions_alloc(int size)
-{
- struct sw_flow_actions *sfa;
-
- if (size > MAX_ACTIONS_BUFSIZE)
- return ERR_PTR(-EINVAL);
-
- sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
- if (!sfa)
- return ERR_PTR(-ENOMEM);
-
- sfa->actions_len = 0;
- return sfa;
-}
-
-struct sw_flow *ovs_flow_alloc(void)
-{
- struct sw_flow *flow;
-
- flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
- if (!flow)
- return ERR_PTR(-ENOMEM);
-
- spin_lock_init(&flow->lock);
- flow->sf_acts = NULL;
- flow->mask = NULL;
-
- return flow;
-}
-
-static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
-{
- hash = jhash_1word(hash, table->hash_seed);
- return flex_array_get(table->buckets,
- (hash & (table->n_buckets - 1)));
-}
-
-static struct flex_array *alloc_buckets(unsigned int n_buckets)
-{
- struct flex_array *buckets;
- int i, err;
-
- buckets = flex_array_alloc(sizeof(struct hlist_head),
- n_buckets, GFP_KERNEL);
- if (!buckets)
- return NULL;
-
- err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
- if (err) {
- flex_array_free(buckets);
- return NULL;
- }
-
- for (i = 0; i < n_buckets; i++)
- INIT_HLIST_HEAD((struct hlist_head *)
- flex_array_get(buckets, i));
-
- return buckets;
-}
-
-static void free_buckets(struct flex_array *buckets)
-{
- flex_array_free(buckets);
-}
-
-static struct flow_table *__flow_tbl_alloc(int new_size)
-{
- struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
-
- if (!table)
- return NULL;
-
- table->buckets = alloc_buckets(new_size);
-
- if (!table->buckets) {
- kfree(table);
- return NULL;
- }
- table->n_buckets = new_size;
- table->count = 0;
- table->node_ver = 0;
- table->keep_flows = false;
- get_random_bytes(&table->hash_seed, sizeof(u32));
- table->mask_list = NULL;
-
- return table;
-}
-
-static void __flow_tbl_destroy(struct flow_table *table)
-{
- int i;
-
- if (table->keep_flows)
- goto skip_flows;
-
- for (i = 0; i < table->n_buckets; i++) {
- struct sw_flow *flow;
- struct hlist_head *head = flex_array_get(table->buckets, i);
- struct hlist_node *n;
- int ver = table->node_ver;
-
- hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
- hlist_del(&flow->hash_node[ver]);
- ovs_flow_free(flow, false);
- }
- }
-
- BUG_ON(!list_empty(table->mask_list));
- kfree(table->mask_list);
-
-skip_flows:
- free_buckets(table->buckets);
- kfree(table);
-}
-
-struct flow_table *ovs_flow_tbl_alloc(int new_size)
-{
- struct flow_table *table = __flow_tbl_alloc(new_size);
-
- if (!table)
- return NULL;
-
- table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
- if (!table->mask_list) {
- table->keep_flows = true;
- __flow_tbl_destroy(table);
- return NULL;
- }
- INIT_LIST_HEAD(table->mask_list);
-
- return table;
-}
-
-static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
-{
- struct flow_table *table = container_of(rcu, struct flow_table, rcu);
-
- __flow_tbl_destroy(table);
-}
-
-void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
-{
- if (!table)
- return;
-
- if (deferred)
- call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
- else
- __flow_tbl_destroy(table);
-}
-
-struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
-{
- struct sw_flow *flow;
- struct hlist_head *head;
- int ver;
- int i;
-
- ver = table->node_ver;
- while (*bucket < table->n_buckets) {
- i = 0;
- head = flex_array_get(table->buckets, *bucket);
- hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
- if (i < *last) {
- i++;
- continue;
- }
- *last = i + 1;
- return flow;
- }
- (*bucket)++;
- *last = 0;
- }
-
- return NULL;
-}
-
-static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
-{
- struct hlist_head *head;
-
- head = find_bucket(table, flow->hash);
- hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
-
- table->count++;
-}
-
-static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
-{
- int old_ver;
- int i;
-
- old_ver = old->node_ver;
- new->node_ver = !old_ver;
-
- /* Insert in new table. */
- for (i = 0; i < old->n_buckets; i++) {
- struct sw_flow *flow;
- struct hlist_head *head;
-
- head = flex_array_get(old->buckets, i);
-
- hlist_for_each_entry(flow, head, hash_node[old_ver])
- __tbl_insert(new, flow);
- }
-
- new->mask_list = old->mask_list;
- old->keep_flows = true;
-}
-
-static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets)
-{
- struct flow_table *new_table;
-
- new_table = __flow_tbl_alloc(n_buckets);
- if (!new_table)
- return ERR_PTR(-ENOMEM);
-
- flow_table_copy_flows(table, new_table);
-
- return new_table;
-}
-
-struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table)
-{
- return __flow_tbl_rehash(table, table->n_buckets);
-}
-
-struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
-{
- return __flow_tbl_rehash(table, table->n_buckets * 2);
-}
-
-static void __flow_free(struct sw_flow *flow)
-{
- kfree((struct sf_flow_acts __force *)flow->sf_acts);
- kmem_cache_free(flow_cache, flow);
-}
-
-static void rcu_free_flow_callback(struct rcu_head *rcu)
-{
- struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
-
- __flow_free(flow);
-}
-
-void ovs_flow_free(struct sw_flow *flow, bool deferred)
-{
- if (!flow)
- return;
-
- ovs_sw_flow_mask_del_ref(flow->mask, deferred);
-
- if (deferred)
- call_rcu(&flow->rcu, rcu_free_flow_callback);
- else
- __flow_free(flow);
-}
-
-/* Schedules 'sf_acts' to be freed after the next RCU grace period.
- * The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
-{
- kfree_rcu(sf_acts, rcu);
-}
-
static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
{
struct qtag_prefix {
@@ -910,6 +428,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv4.tp.src = tcp->source;
key->ipv4.tp.dst = tcp->dest;
+ key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp);
}
} else if (key->ip.proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
@@ -978,6 +497,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv6.tp.src = tcp->source;
key->ipv6.tp.dst = tcp->dest;
+ key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp);
}
} else if (key->ip.proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
@@ -1002,1080 +522,3 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
return 0;
}
-
-static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start,
- int key_end)
-{
- u32 *hash_key = (u32 *)((u8 *)key + key_start);
- int hash_u32s = (key_end - key_start) >> 2;
-
- /* Make sure number of hash bytes are multiple of u32. */
- BUILD_BUG_ON(sizeof(long) % sizeof(u32));
-
- return jhash2(hash_key, hash_u32s, 0);
-}
-
-static int flow_key_start(const struct sw_flow_key *key)
-{
- if (key->tun_key.ipv4_dst)
- return 0;
- else
- return rounddown(offsetof(struct sw_flow_key, phy),
- sizeof(long));
-}
-
-static bool __cmp_key(const struct sw_flow_key *key1,
- const struct sw_flow_key *key2, int key_start, int key_end)
-{
- const long *cp1 = (long *)((u8 *)key1 + key_start);
- const long *cp2 = (long *)((u8 *)key2 + key_start);
- long diffs = 0;
- int i;
-
- for (i = key_start; i < key_end; i += sizeof(long))
- diffs |= *cp1++ ^ *cp2++;
-
- return diffs == 0;
-}
-
-static bool __flow_cmp_masked_key(const struct sw_flow *flow,
- const struct sw_flow_key *key, int key_start, int key_end)
-{
- return __cmp_key(&flow->key, key, key_start, key_end);
-}
-
-static bool __flow_cmp_unmasked_key(const struct sw_flow *flow,
- const struct sw_flow_key *key, int key_start, int key_end)
-{
- return __cmp_key(&flow->unmasked_key, key, key_start, key_end);
-}
-
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- const struct sw_flow_key *key, int key_end)
-{
- int key_start;
- key_start = flow_key_start(key);
-
- return __flow_cmp_unmasked_key(flow, key, key_start, key_end);
-
-}
-
-struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
- struct sw_flow_match *match)
-{
- struct sw_flow_key *unmasked = match->key;
- int key_end = match->range.end;
- struct sw_flow *flow;
-
- flow = ovs_flow_lookup(table, unmasked);
- if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end)))
- flow = NULL;
-
- return flow;
-}
-
-static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table,
- const struct sw_flow_key *unmasked,
- struct sw_flow_mask *mask)
-{
- struct sw_flow *flow;
- struct hlist_head *head;
- int key_start = mask->range.start;
- int key_end = mask->range.end;
- u32 hash;
- struct sw_flow_key masked_key;
-
- ovs_flow_key_mask(&masked_key, unmasked, mask);
- hash = ovs_flow_hash(&masked_key, key_start, key_end);
- head = find_bucket(table, hash);
- hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
- if (flow->mask == mask &&
- __flow_cmp_masked_key(flow, &masked_key,
- key_start, key_end))
- return flow;
- }
- return NULL;
-}
-
-struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
- const struct sw_flow_key *key)
-{
- struct sw_flow *flow = NULL;
- struct sw_flow_mask *mask;
-
- list_for_each_entry_rcu(mask, tbl->mask_list, list) {
- flow = ovs_masked_flow_lookup(tbl, key, mask);
- if (flow) /* Found */
- break;
- }
-
- return flow;
-}
-
-
-void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow)
-{
- flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start,
- flow->mask->range.end);
- __tbl_insert(table, flow);
-}
-
-void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow)
-{
- BUG_ON(table->count == 0);
- hlist_del_rcu(&flow->hash_node[table->node_ver]);
- table->count--;
-}
-
-/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
-const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
- [OVS_KEY_ATTR_ENCAP] = -1,
- [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
- [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
- [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
- [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
- [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
- [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
- [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
- [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
- [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
- [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
- [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
- [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
- [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
- [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
- [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
- [OVS_KEY_ATTR_TUNNEL] = -1,
-};
-
-static bool is_all_zero(const u8 *fp, size_t size)
-{
- int i;
-
- if (!fp)
- return false;
-
- for (i = 0; i < size; i++)
- if (fp[i])
- return false;
-
- return true;
-}
-
-static int __parse_flow_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[],
- u64 *attrsp, bool nz)
-{
- const struct nlattr *nla;
- u32 attrs;
- int rem;
-
- attrs = *attrsp;
- nla_for_each_nested(nla, attr, rem) {
- u16 type = nla_type(nla);
- int expected_len;
-
- if (type > OVS_KEY_ATTR_MAX) {
- OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
- type, OVS_KEY_ATTR_MAX);
- return -EINVAL;
- }
-
- if (attrs & (1 << type)) {
- OVS_NLERR("Duplicate key attribute (type %d).\n", type);
- return -EINVAL;
- }
-
- expected_len = ovs_key_lens[type];
- if (nla_len(nla) != expected_len && expected_len != -1) {
- OVS_NLERR("Key attribute has unexpected length (type=%d"
- ", length=%d, expected=%d).\n", type,
- nla_len(nla), expected_len);
- return -EINVAL;
- }
-
- if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
- attrs |= 1 << type;
- a[type] = nla;
- }
- }
- if (rem) {
- OVS_NLERR("Message has %d unknown bytes.\n", rem);
- return -EINVAL;
- }
-
- *attrsp = attrs;
- return 0;
-}
-
-static int parse_flow_mask_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[], u64 *attrsp)
-{
- return __parse_flow_nlattrs(attr, a, attrsp, true);
-}
-
-static int parse_flow_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[], u64 *attrsp)
-{
- return __parse_flow_nlattrs(attr, a, attrsp, false);
-}
-
-int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask)
-{
- struct nlattr *a;
- int rem;
- bool ttl = false;
- __be16 tun_flags = 0;
-
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
- [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
- [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
- [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
- [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
- [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
- [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
- [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
- };
-
- if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
- OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
- type, OVS_TUNNEL_KEY_ATTR_MAX);
- return -EINVAL;
- }
-
- if (ovs_tunnel_key_lens[type] != nla_len(a)) {
- OVS_NLERR("IPv4 tunnel attribute type has unexpected "
- " length (type=%d, length=%d, expected=%d).\n",
- type, nla_len(a), ovs_tunnel_key_lens[type]);
- return -EINVAL;
- }
-
- switch (type) {
- case OVS_TUNNEL_KEY_ATTR_ID:
- SW_FLOW_KEY_PUT(match, tun_key.tun_id,
- nla_get_be64(a), is_mask);
- tun_flags |= TUNNEL_KEY;
- break;
- case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
- nla_get_be32(a), is_mask);
- break;
- case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
- nla_get_be32(a), is_mask);
- break;
- case OVS_TUNNEL_KEY_ATTR_TOS:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
- nla_get_u8(a), is_mask);
- break;
- case OVS_TUNNEL_KEY_ATTR_TTL:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
- nla_get_u8(a), is_mask);
- ttl = true;
- break;
- case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
- tun_flags |= TUNNEL_DONT_FRAGMENT;
- break;
- case OVS_TUNNEL_KEY_ATTR_CSUM:
- tun_flags |= TUNNEL_CSUM;
- break;
- default:
- return -EINVAL;
- }
- }
-
- SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
-
- if (rem > 0) {
- OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
- return -EINVAL;
- }
-
- if (!is_mask) {
- if (!match->key->tun_key.ipv4_dst) {
- OVS_NLERR("IPv4 tunnel destination address is zero.\n");
- return -EINVAL;
- }
-
- if (!ttl) {
- OVS_NLERR("IPv4 tunnel TTL not specified.\n");
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key,
- const struct ovs_key_ipv4_tunnel *output)
-{
- struct nlattr *nla;
-
- nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
- if (!nla)
- return -EMSGSIZE;
-
- if (output->tun_flags & TUNNEL_KEY &&
- nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
- return -EMSGSIZE;
- if (output->ipv4_src &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
- return -EMSGSIZE;
- if (output->ipv4_dst &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
- return -EMSGSIZE;
- if (output->ipv4_tos &&
- nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
- return -EMSGSIZE;
- if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
- return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
- nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
- return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_CSUM) &&
- nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
- return -EMSGSIZE;
-
- nla_nest_end(skb, nla);
- return 0;
-}
-
-static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
- const struct nlattr **a, bool is_mask)
-{
- if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
- SW_FLOW_KEY_PUT(match, phy.priority,
- nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
- *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
- }
-
- if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
- u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
-
- if (is_mask)
- in_port = 0xffffffff; /* Always exact match in_port. */
- else if (in_port >= DP_MAX_PORTS)
- return -EINVAL;
-
- SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
- *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
- } else if (!is_mask) {
- SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
- }
-
- if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
- uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
-
- SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
- *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
- }
- if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
- if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
- is_mask))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
- }
- return 0;
-}
-
-static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
- const struct nlattr **a, bool is_mask)
-{
- int err;
- u64 orig_attrs = attrs;
-
- err = metadata_from_nlattrs(match, &attrs, a, is_mask);
- if (err)
- return err;
-
- if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
- const struct ovs_key_ethernet *eth_key;
-
- eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
- SW_FLOW_KEY_MEMCPY(match, eth.src,
- eth_key->eth_src, ETH_ALEN, is_mask);
- SW_FLOW_KEY_MEMCPY(match, eth.dst,
- eth_key->eth_dst, ETH_ALEN, is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
- __be16 tci;
-
- tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
- if (!(tci & htons(VLAN_TAG_PRESENT))) {
- if (is_mask)
- OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
- else
- OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
-
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
- } else if (!is_mask)
- SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
-
- if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
- __be16 eth_type;
-
- eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
- if (is_mask) {
- /* Always exact match EtherType. */
- eth_type = htons(0xffff);
- } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
- OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
- ntohs(eth_type), ETH_P_802_3_MIN);
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
- } else if (!is_mask) {
- SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- const struct ovs_key_ipv4 *ipv4_key;
-
- ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
- if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
- OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
- ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
- return -EINVAL;
- }
- SW_FLOW_KEY_PUT(match, ip.proto,
- ipv4_key->ipv4_proto, is_mask);
- SW_FLOW_KEY_PUT(match, ip.tos,
- ipv4_key->ipv4_tos, is_mask);
- SW_FLOW_KEY_PUT(match, ip.ttl,
- ipv4_key->ipv4_ttl, is_mask);
- SW_FLOW_KEY_PUT(match, ip.frag,
- ipv4_key->ipv4_frag, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.addr.src,
- ipv4_key->ipv4_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
- ipv4_key->ipv4_dst, is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
- const struct ovs_key_ipv6 *ipv6_key;
-
- ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
- if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
- OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
- ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
- return -EINVAL;
- }
- SW_FLOW_KEY_PUT(match, ipv6.label,
- ipv6_key->ipv6_label, is_mask);
- SW_FLOW_KEY_PUT(match, ip.proto,
- ipv6_key->ipv6_proto, is_mask);
- SW_FLOW_KEY_PUT(match, ip.tos,
- ipv6_key->ipv6_tclass, is_mask);
- SW_FLOW_KEY_PUT(match, ip.ttl,
- ipv6_key->ipv6_hlimit, is_mask);
- SW_FLOW_KEY_PUT(match, ip.frag,
- ipv6_key->ipv6_frag, is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
- ipv6_key->ipv6_src,
- sizeof(match->key->ipv6.addr.src),
- is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
- ipv6_key->ipv6_dst,
- sizeof(match->key->ipv6.addr.dst),
- is_mask);
-
- attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
- const struct ovs_key_arp *arp_key;
-
- arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
- if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
- OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
- arp_key->arp_op);
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, ipv4.addr.src,
- arp_key->arp_sip, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
- arp_key->arp_tip, is_mask);
- SW_FLOW_KEY_PUT(match, ip.proto,
- ntohs(arp_key->arp_op), is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
- arp_key->arp_sha, ETH_ALEN, is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
- arp_key->arp_tha, ETH_ALEN, is_mask);
-
- attrs &= ~(1 << OVS_KEY_ATTR_ARP);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
- const struct ovs_key_tcp *tcp_key;
-
- tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- tcp_key->tcp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- tcp_key->tcp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- tcp_key->tcp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- tcp_key->tcp_dst, is_mask);
- }
- attrs &= ~(1 << OVS_KEY_ATTR_TCP);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
- const struct ovs_key_udp *udp_key;
-
- udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- udp_key->udp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- udp_key->udp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- udp_key->udp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- udp_key->udp_dst, is_mask);
- }
- attrs &= ~(1 << OVS_KEY_ATTR_UDP);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
- const struct ovs_key_sctp *sctp_key;
-
- sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- sctp_key->sctp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- sctp_key->sctp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- sctp_key->sctp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- sctp_key->sctp_dst, is_mask);
- }
- attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
- const struct ovs_key_icmp *icmp_key;
-
- icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- htons(icmp_key->icmp_type), is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- htons(icmp_key->icmp_code), is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
- const struct ovs_key_icmpv6 *icmpv6_key;
-
- icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- htons(icmpv6_key->icmpv6_type), is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- htons(icmpv6_key->icmpv6_code), is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_ND)) {
- const struct ovs_key_nd *nd_key;
-
- nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
- SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
- nd_key->nd_target,
- sizeof(match->key->ipv6.nd.target),
- is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
- nd_key->nd_sll, ETH_ALEN, is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
- nd_key->nd_tll, ETH_ALEN, is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_ND);
- }
-
- if (attrs != 0)
- return -EINVAL;
-
- return 0;
-}
-
-/**
- * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and
- * mask. In case the 'mask' is NULL, the flow is treated as exact match
- * flow. Otherwise, it is treated as a wildcarded flow, except the mask
- * does not include any don't care bit.
- * @match: receives the extracted flow match information.
- * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence. The fields should of the packet that triggered the creation
- * of this flow.
- * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
- * attribute specifies the mask field of the wildcarded flow.
- */
-int ovs_match_from_nlattrs(struct sw_flow_match *match,
- const struct nlattr *key,
- const struct nlattr *mask)
-{
- const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
- const struct nlattr *encap;
- u64 key_attrs = 0;
- u64 mask_attrs = 0;
- bool encap_valid = false;
- int err;
-
- err = parse_flow_nlattrs(key, a, &key_attrs);
- if (err)
- return err;
-
- if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
- (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
- (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
- __be16 tci;
-
- if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
- (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
- OVS_NLERR("Invalid Vlan frame.\n");
- return -EINVAL;
- }
-
- key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
- tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
- encap = a[OVS_KEY_ATTR_ENCAP];
- key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
- encap_valid = true;
-
- if (tci & htons(VLAN_TAG_PRESENT)) {
- err = parse_flow_nlattrs(encap, a, &key_attrs);
- if (err)
- return err;
- } else if (!tci) {
- /* Corner case for truncated 802.1Q header. */
- if (nla_len(encap)) {
- OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
- return -EINVAL;
- }
- } else {
- OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
- return -EINVAL;
- }
- }
-
- err = ovs_key_from_nlattrs(match, key_attrs, a, false);
- if (err)
- return err;
-
- if (mask) {
- err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
- if (err)
- return err;
-
- if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
- __be16 eth_type = 0;
- __be16 tci = 0;
-
- if (!encap_valid) {
- OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
- return -EINVAL;
- }
-
- mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
- if (a[OVS_KEY_ATTR_ETHERTYPE])
- eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-
- if (eth_type == htons(0xffff)) {
- mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
- encap = a[OVS_KEY_ATTR_ENCAP];
- err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
- } else {
- OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
- ntohs(eth_type));
- return -EINVAL;
- }
-
- if (a[OVS_KEY_ATTR_VLAN])
- tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-
- if (!(tci & htons(VLAN_TAG_PRESENT))) {
- OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
- return -EINVAL;
- }
- }
-
- err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
- if (err)
- return err;
- } else {
- /* Populate exact match flow's key mask. */
- if (match->mask)
- ovs_sw_flow_mask_set(match->mask, &match->range, 0xff);
- }
-
- if (!ovs_match_validate(match, key_attrs, mask_attrs))
- return -EINVAL;
-
- return 0;
-}
-
-/**
- * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
- * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
- * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence.
- *
- * This parses a series of Netlink attributes that form a flow key, which must
- * take the same form accepted by flow_from_nlattrs(), but only enough of it to
- * get the metadata, that is, the parts of the flow key that cannot be
- * extracted from the packet itself.
- */
-
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
- const struct nlattr *attr)
-{
- struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
- const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
- u64 attrs = 0;
- int err;
- struct sw_flow_match match;
-
- flow->key.phy.in_port = DP_MAX_PORTS;
- flow->key.phy.priority = 0;
- flow->key.phy.skb_mark = 0;
- memset(tun_key, 0, sizeof(flow->key.tun_key));
-
- err = parse_flow_nlattrs(attr, a, &attrs);
- if (err)
- return -EINVAL;
-
- memset(&match, 0, sizeof(match));
- match.key = &flow->key;
-
- err = metadata_from_nlattrs(&match, &attrs, a, false);
- if (err)
- return err;
-
- return 0;
-}
-
-int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
- const struct sw_flow_key *output, struct sk_buff *skb)
-{
- struct ovs_key_ethernet *eth_key;
- struct nlattr *nla, *encap;
- bool is_mask = (swkey != output);
-
- if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
- goto nla_put_failure;
-
- if ((swkey->tun_key.ipv4_dst || is_mask) &&
- ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
- goto nla_put_failure;
-
- if (swkey->phy.in_port == DP_MAX_PORTS) {
- if (is_mask && (output->phy.in_port == 0xffff))
- if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
- goto nla_put_failure;
- } else {
- u16 upper_u16;
- upper_u16 = !is_mask ? 0 : 0xffff;
-
- if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
- (upper_u16 << 16) | output->phy.in_port))
- goto nla_put_failure;
- }
-
- if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
- goto nla_put_failure;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
- if (!nla)
- goto nla_put_failure;
-
- eth_key = nla_data(nla);
- memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
- memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
-
- if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
- __be16 eth_type;
- eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
- nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
- goto nla_put_failure;
- encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
- if (!swkey->eth.tci)
- goto unencap;
- } else
- encap = NULL;
-
- if (swkey->eth.type == htons(ETH_P_802_2)) {
- /*
- * Ethertype 802.2 is represented in the netlink with omitted
- * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
- * 0xffff in the mask attribute. Ethertype can also
- * be wildcarded.
- */
- if (is_mask && output->eth.type)
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
- output->eth.type))
- goto nla_put_failure;
- goto unencap;
- }
-
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
- goto nla_put_failure;
-
- if (swkey->eth.type == htons(ETH_P_IP)) {
- struct ovs_key_ipv4 *ipv4_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
- if (!nla)
- goto nla_put_failure;
- ipv4_key = nla_data(nla);
- ipv4_key->ipv4_src = output->ipv4.addr.src;
- ipv4_key->ipv4_dst = output->ipv4.addr.dst;
- ipv4_key->ipv4_proto = output->ip.proto;
- ipv4_key->ipv4_tos = output->ip.tos;
- ipv4_key->ipv4_ttl = output->ip.ttl;
- ipv4_key->ipv4_frag = output->ip.frag;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- struct ovs_key_ipv6 *ipv6_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
- if (!nla)
- goto nla_put_failure;
- ipv6_key = nla_data(nla);
- memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
- sizeof(ipv6_key->ipv6_src));
- memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
- sizeof(ipv6_key->ipv6_dst));
- ipv6_key->ipv6_label = output->ipv6.label;
- ipv6_key->ipv6_proto = output->ip.proto;
- ipv6_key->ipv6_tclass = output->ip.tos;
- ipv6_key->ipv6_hlimit = output->ip.ttl;
- ipv6_key->ipv6_frag = output->ip.frag;
- } else if (swkey->eth.type == htons(ETH_P_ARP) ||
- swkey->eth.type == htons(ETH_P_RARP)) {
- struct ovs_key_arp *arp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
- if (!nla)
- goto nla_put_failure;
- arp_key = nla_data(nla);
- memset(arp_key, 0, sizeof(struct ovs_key_arp));
- arp_key->arp_sip = output->ipv4.addr.src;
- arp_key->arp_tip = output->ipv4.addr.dst;
- arp_key->arp_op = htons(output->ip.proto);
- memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
- memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
- }
-
- if ((swkey->eth.type == htons(ETH_P_IP) ||
- swkey->eth.type == htons(ETH_P_IPV6)) &&
- swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
-
- if (swkey->ip.proto == IPPROTO_TCP) {
- struct ovs_key_tcp *tcp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
- if (!nla)
- goto nla_put_failure;
- tcp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- tcp_key->tcp_src = output->ipv4.tp.src;
- tcp_key->tcp_dst = output->ipv4.tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- tcp_key->tcp_src = output->ipv6.tp.src;
- tcp_key->tcp_dst = output->ipv6.tp.dst;
- }
- } else if (swkey->ip.proto == IPPROTO_UDP) {
- struct ovs_key_udp *udp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
- if (!nla)
- goto nla_put_failure;
- udp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- udp_key->udp_src = output->ipv4.tp.src;
- udp_key->udp_dst = output->ipv4.tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- udp_key->udp_src = output->ipv6.tp.src;
- udp_key->udp_dst = output->ipv6.tp.dst;
- }
- } else if (swkey->ip.proto == IPPROTO_SCTP) {
- struct ovs_key_sctp *sctp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
- if (!nla)
- goto nla_put_failure;
- sctp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- sctp_key->sctp_src = swkey->ipv4.tp.src;
- sctp_key->sctp_dst = swkey->ipv4.tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- sctp_key->sctp_src = swkey->ipv6.tp.src;
- sctp_key->sctp_dst = swkey->ipv6.tp.dst;
- }
- } else if (swkey->eth.type == htons(ETH_P_IP) &&
- swkey->ip.proto == IPPROTO_ICMP) {
- struct ovs_key_icmp *icmp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
- if (!nla)
- goto nla_put_failure;
- icmp_key = nla_data(nla);
- icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
- icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
- } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
- swkey->ip.proto == IPPROTO_ICMPV6) {
- struct ovs_key_icmpv6 *icmpv6_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
- sizeof(*icmpv6_key));
- if (!nla)
- goto nla_put_failure;
- icmpv6_key = nla_data(nla);
- icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
- icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
-
- if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
- icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
- struct ovs_key_nd *nd_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
- if (!nla)
- goto nla_put_failure;
- nd_key = nla_data(nla);
- memcpy(nd_key->nd_target, &output->ipv6.nd.target,
- sizeof(nd_key->nd_target));
- memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
- memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
- }
- }
- }
-
-unencap:
- if (encap)
- nla_nest_end(skb, encap);
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-/* Initializes the flow module.
- * Returns zero if successful or a negative error code. */
-int ovs_flow_init(void)
-{
- BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
- BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
-
- flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
- 0, NULL);
- if (flow_cache == NULL)
- return -ENOMEM;
-
- return 0;
-}
-
-/* Uninitializes the flow module. */
-void ovs_flow_exit(void)
-{
- kmem_cache_destroy(flow_cache);
-}
-
-struct sw_flow_mask *ovs_sw_flow_mask_alloc(void)
-{
- struct sw_flow_mask *mask;
-
- mask = kmalloc(sizeof(*mask), GFP_KERNEL);
- if (mask)
- mask->ref_count = 0;
-
- return mask;
-}
-
-void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask)
-{
- mask->ref_count++;
-}
-
-void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
-{
- if (!mask)
- return;
-
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- if (deferred)
- kfree_rcu(mask, rcu);
- else
- kfree(mask);
- }
-}
-
-static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a,
- const struct sw_flow_mask *b)
-{
- u8 *a_ = (u8 *)&a->key + a->range.start;
- u8 *b_ = (u8 *)&b->key + b->range.start;
-
- return (a->range.end == b->range.end)
- && (a->range.start == b->range.start)
- && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
-}
-
-struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl,
- const struct sw_flow_mask *mask)
-{
- struct list_head *ml;
-
- list_for_each(ml, tbl->mask_list) {
- struct sw_flow_mask *m;
- m = container_of(ml, struct sw_flow_mask, list);
- if (ovs_sw_flow_mask_equal(mask, m))
- return m;
- }
-
- return NULL;
-}
-
-/**
- * add a new mask into the mask list.
- * The caller needs to make sure that 'mask' is not the same
- * as any masks that are already on the list.
- */
-void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask)
-{
- list_add_rcu(&mask->list, tbl->mask_list);
-}
-
-/**
- * Set 'range' fields in the mask to the value of 'val'.
- */
-static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
- struct sw_flow_key_range *range, u8 val)
-{
- u8 *m = (u8 *)&mask->key + range->start;
-
- mask->range = *range;
- memset(m, val, range_n_bytes(range));
-}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 212fbf7510c4..1510f51dbf74 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -33,14 +33,6 @@
#include <net/inet_ecn.h>
struct sk_buff;
-struct sw_flow_mask;
-struct flow_table;
-
-struct sw_flow_actions {
- struct rcu_head rcu;
- u32 actions_len;
- struct nlattr actions[];
-};
/* Used to memset ovs_key_ipv4_tunnel padding. */
#define OVS_TUNNEL_KEY_SIZE \
@@ -101,6 +93,7 @@ struct sw_flow_key {
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
+ __be16 flags; /* TCP flags. */
} tp;
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
@@ -117,6 +110,7 @@ struct sw_flow_key {
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
+ __be16 flags; /* TCP flags. */
} tp;
struct {
struct in6_addr target; /* ND target address. */
@@ -127,6 +121,31 @@ struct sw_flow_key {
};
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
+struct sw_flow_key_range {
+ size_t start;
+ size_t end;
+};
+
+struct sw_flow_mask {
+ int ref_count;
+ struct rcu_head rcu;
+ struct list_head list;
+ struct sw_flow_key_range range;
+ struct sw_flow_key key;
+};
+
+struct sw_flow_match {
+ struct sw_flow_key *key;
+ struct sw_flow_key_range range;
+ struct sw_flow_mask *mask;
+};
+
+struct sw_flow_actions {
+ struct rcu_head rcu;
+ u32 actions_len;
+ struct nlattr actions[];
+};
+
struct sw_flow {
struct rcu_head rcu;
struct hlist_node hash_node[2];
@@ -141,23 +160,9 @@ struct sw_flow {
unsigned long used; /* Last used time (in jiffies). */
u64 packet_count; /* Number of packets matched. */
u64 byte_count; /* Number of bytes matched. */
- u8 tcp_flags; /* Union of seen TCP flags. */
-};
-
-struct sw_flow_key_range {
- size_t start;
- size_t end;
+ __be16 tcp_flags; /* Union of seen TCP flags. */
};
-struct sw_flow_match {
- struct sw_flow_key *key;
- struct sw_flow_key_range range;
- struct sw_flow_mask *mask;
-};
-
-void ovs_match_init(struct sw_flow_match *match,
- struct sw_flow_key *key, struct sw_flow_mask *mask);
-
struct arp_eth_header {
__be16 ar_hrd; /* format of hardware address */
__be16 ar_pro; /* format of protocol address */
@@ -172,88 +177,9 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */
} __packed;
-int ovs_flow_init(void);
-void ovs_flow_exit(void);
-
-struct sw_flow *ovs_flow_alloc(void);
-void ovs_flow_deferred_free(struct sw_flow *);
-void ovs_flow_free(struct sw_flow *, bool deferred);
-
-struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
-void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
-
-int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
void ovs_flow_used(struct sw_flow *, struct sk_buff *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
-int ovs_flow_to_nlattrs(const struct sw_flow_key *,
- const struct sw_flow_key *, struct sk_buff *);
-int ovs_match_from_nlattrs(struct sw_flow_match *match,
- const struct nlattr *,
- const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
- const struct nlattr *attr);
-#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-#define TBL_MIN_BUCKETS 1024
-
-struct flow_table {
- struct flex_array *buckets;
- unsigned int count, n_buckets;
- struct rcu_head rcu;
- struct list_head *mask_list;
- int node_ver;
- u32 hash_seed;
- bool keep_flows;
-};
-
-static inline int ovs_flow_tbl_count(struct flow_table *table)
-{
- return table->count;
-}
-
-static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
-{
- return (table->count > table->n_buckets);
-}
-
-struct sw_flow *ovs_flow_lookup(struct flow_table *,
- const struct sw_flow_key *);
-struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
- struct sw_flow_match *match);
-
-void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
-struct flow_table *ovs_flow_tbl_alloc(int new_size);
-struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
-struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
-
-void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow);
-void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow);
-
-struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx);
-extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
-int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask);
-int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key,
- const struct ovs_key_ipv4_tunnel *output);
-
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- const struct sw_flow_key *key, int key_end);
-
-struct sw_flow_mask {
- int ref_count;
- struct rcu_head rcu;
- struct list_head list;
- struct sw_flow_key_range range;
- struct sw_flow_key key;
-};
+int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
-struct sw_flow_mask *ovs_sw_flow_mask_alloc(void);
-void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *);
-void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred);
-void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *);
-struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *,
- const struct sw_flow_mask *);
-void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
- const struct sw_flow_mask *mask);
#endif /* flow.h */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
new file mode 100644
index 000000000000..2bc1bc1aca3b
--- /dev/null
+++ b/net/openvswitch/flow_netlink.c
@@ -0,0 +1,1630 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/sctp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+#include "flow_netlink.h"
+
+static void update_range__(struct sw_flow_match *match,
+ size_t offset, size_t size, bool is_mask)
+{
+ struct sw_flow_key_range *range = NULL;
+ size_t start = rounddown(offset, sizeof(long));
+ size_t end = roundup(offset + size, sizeof(long));
+
+ if (!is_mask)
+ range = &match->range;
+ else if (match->mask)
+ range = &match->mask->range;
+
+ if (!range)
+ return;
+
+ if (range->start == range->end) {
+ range->start = start;
+ range->end = end;
+ return;
+ }
+
+ if (range->start > start)
+ range->start = start;
+
+ if (range->end < end)
+ range->end = end;
+}
+
+#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
+ do { \
+ update_range__(match, offsetof(struct sw_flow_key, field), \
+ sizeof((match)->key->field), is_mask); \
+ if (is_mask) { \
+ if ((match)->mask) \
+ (match)->mask->key.field = value; \
+ } else { \
+ (match)->key->field = value; \
+ } \
+ } while (0)
+
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+ do { \
+ update_range__(match, offsetof(struct sw_flow_key, field), \
+ len, is_mask); \
+ if (is_mask) { \
+ if ((match)->mask) \
+ memcpy(&(match)->mask->key.field, value_p, len);\
+ } else { \
+ memcpy(&(match)->key->field, value_p, len); \
+ } \
+ } while (0)
+
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+ return range->end - range->start;
+}
+
+static bool match_validate(const struct sw_flow_match *match,
+ u64 key_attrs, u64 mask_attrs)
+{
+ u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
+ u64 mask_allowed = key_attrs; /* At most allow all key attributes */
+
+ /* The following mask attributes allowed only if they
+ * pass the validation tests. */
+ mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
+ | (1 << OVS_KEY_ATTR_IPV6)
+ | (1 << OVS_KEY_ATTR_TCP)
+ | (1 << OVS_KEY_ATTR_TCP_FLAGS)
+ | (1 << OVS_KEY_ATTR_UDP)
+ | (1 << OVS_KEY_ATTR_SCTP)
+ | (1 << OVS_KEY_ATTR_ICMP)
+ | (1 << OVS_KEY_ATTR_ICMPV6)
+ | (1 << OVS_KEY_ATTR_ARP)
+ | (1 << OVS_KEY_ATTR_ND));
+
+ /* Always allowed mask fields. */
+ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
+ | (1 << OVS_KEY_ATTR_IN_PORT)
+ | (1 << OVS_KEY_ATTR_ETHERTYPE));
+
+ /* Check key attributes. */
+ if (match->key->eth.type == htons(ETH_P_ARP)
+ || match->key->eth.type == htons(ETH_P_RARP)) {
+ key_expected |= 1 << OVS_KEY_ATTR_ARP;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
+ }
+
+ if (match->key->eth.type == htons(ETH_P_IP)) {
+ key_expected |= 1 << OVS_KEY_ATTR_IPV4;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
+
+ if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+ if (match->key->ip.proto == IPPROTO_UDP) {
+ key_expected |= 1 << OVS_KEY_ATTR_UDP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_SCTP) {
+ key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_TCP) {
+ key_expected |= 1 << OVS_KEY_ATTR_TCP;
+ key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+ if (match->mask && (match->mask->key.ip.proto == 0xff)) {
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+ }
+ }
+
+ if (match->key->ip.proto == IPPROTO_ICMP) {
+ key_expected |= 1 << OVS_KEY_ATTR_ICMP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
+ }
+ }
+ }
+
+ if (match->key->eth.type == htons(ETH_P_IPV6)) {
+ key_expected |= 1 << OVS_KEY_ATTR_IPV6;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
+
+ if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+ if (match->key->ip.proto == IPPROTO_UDP) {
+ key_expected |= 1 << OVS_KEY_ATTR_UDP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_SCTP) {
+ key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_TCP) {
+ key_expected |= 1 << OVS_KEY_ATTR_TCP;
+ key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+ if (match->mask && (match->mask->key.ip.proto == 0xff)) {
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+ }
+ }
+
+ if (match->key->ip.proto == IPPROTO_ICMPV6) {
+ key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
+
+ if (match->key->ipv6.tp.src ==
+ htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ key_expected |= 1 << OVS_KEY_ATTR_ND;
+ if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ND;
+ }
+ }
+ }
+ }
+
+ if ((key_attrs & key_expected) != key_expected) {
+ /* Key attributes check failed. */
+ OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
+ key_attrs, key_expected);
+ return false;
+ }
+
+ if ((mask_attrs & mask_allowed) != mask_attrs) {
+ /* Mask attributes check failed. */
+ OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
+ mask_attrs, mask_allowed);
+ return false;
+ }
+
+ return true;
+}
+
+/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
+static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+ [OVS_KEY_ATTR_ENCAP] = -1,
+ [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
+ [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
+ [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
+ [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
+ [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
+ [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
+ [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
+ [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
+ [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
+ [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
+ [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+ [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
+ [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
+ [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
+ [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
+ [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+ [OVS_KEY_ATTR_TUNNEL] = -1,
+};
+
+static bool is_all_zero(const u8 *fp, size_t size)
+{
+ int i;
+
+ if (!fp)
+ return false;
+
+ for (i = 0; i < size; i++)
+ if (fp[i])
+ return false;
+
+ return true;
+}
+
+static int __parse_flow_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[],
+ u64 *attrsp, bool nz)
+{
+ const struct nlattr *nla;
+ u64 attrs;
+ int rem;
+
+ attrs = *attrsp;
+ nla_for_each_nested(nla, attr, rem) {
+ u16 type = nla_type(nla);
+ int expected_len;
+
+ if (type > OVS_KEY_ATTR_MAX) {
+ OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
+ type, OVS_KEY_ATTR_MAX);
+ return -EINVAL;
+ }
+
+ if (attrs & (1 << type)) {
+ OVS_NLERR("Duplicate key attribute (type %d).\n", type);
+ return -EINVAL;
+ }
+
+ expected_len = ovs_key_lens[type];
+ if (nla_len(nla) != expected_len && expected_len != -1) {
+ OVS_NLERR("Key attribute has unexpected length (type=%d"
+ ", length=%d, expected=%d).\n", type,
+ nla_len(nla), expected_len);
+ return -EINVAL;
+ }
+
+ if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
+ attrs |= 1 << type;
+ a[type] = nla;
+ }
+ }
+ if (rem) {
+ OVS_NLERR("Message has %d unknown bytes.\n", rem);
+ return -EINVAL;
+ }
+
+ *attrsp = attrs;
+ return 0;
+}
+
+static int parse_flow_mask_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[], u64 *attrsp)
+{
+ return __parse_flow_nlattrs(attr, a, attrsp, true);
+}
+
+static int parse_flow_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[], u64 *attrsp)
+{
+ return __parse_flow_nlattrs(attr, a, attrsp, false);
+}
+
+static int ipv4_tun_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask)
+{
+ struct nlattr *a;
+ int rem;
+ bool ttl = false;
+ __be16 tun_flags = 0;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
+ [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
+ [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
+ [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
+ [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
+ [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
+ [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
+ [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+ };
+
+ if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
+ OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
+ type, OVS_TUNNEL_KEY_ATTR_MAX);
+ return -EINVAL;
+ }
+
+ if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+ OVS_NLERR("IPv4 tunnel attribute type has unexpected "
+ " length (type=%d, length=%d, expected=%d).\n",
+ type, nla_len(a), ovs_tunnel_key_lens[type]);
+ return -EINVAL;
+ }
+
+ switch (type) {
+ case OVS_TUNNEL_KEY_ATTR_ID:
+ SW_FLOW_KEY_PUT(match, tun_key.tun_id,
+ nla_get_be64(a), is_mask);
+ tun_flags |= TUNNEL_KEY;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
+ nla_get_be32(a), is_mask);
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
+ nla_get_be32(a), is_mask);
+ break;
+ case OVS_TUNNEL_KEY_ATTR_TOS:
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+ nla_get_u8(a), is_mask);
+ break;
+ case OVS_TUNNEL_KEY_ATTR_TTL:
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+ nla_get_u8(a), is_mask);
+ ttl = true;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
+ tun_flags |= TUNNEL_DONT_FRAGMENT;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_CSUM:
+ tun_flags |= TUNNEL_CSUM;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+
+ if (rem > 0) {
+ OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
+ return -EINVAL;
+ }
+
+ if (!is_mask) {
+ if (!match->key->tun_key.ipv4_dst) {
+ OVS_NLERR("IPv4 tunnel destination address is zero.\n");
+ return -EINVAL;
+ }
+
+ if (!ttl) {
+ OVS_NLERR("IPv4 tunnel TTL not specified.\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int ipv4_tun_to_nlattr(struct sk_buff *skb,
+ const struct ovs_key_ipv4_tunnel *tun_key,
+ const struct ovs_key_ipv4_tunnel *output)
+{
+ struct nlattr *nla;
+
+ nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
+ if (!nla)
+ return -EMSGSIZE;
+
+ if (output->tun_flags & TUNNEL_KEY &&
+ nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
+ return -EMSGSIZE;
+ if (output->ipv4_src &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
+ return -EMSGSIZE;
+ if (output->ipv4_dst &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
+ return -EMSGSIZE;
+ if (output->ipv4_tos &&
+ nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+ return -EMSGSIZE;
+ if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
+ return -EMSGSIZE;
+ if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+ return -EMSGSIZE;
+ if ((output->tun_flags & TUNNEL_CSUM) &&
+ nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, nla);
+ return 0;
+}
+
+
+static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
+ const struct nlattr **a, bool is_mask)
+{
+ if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
+ SW_FLOW_KEY_PUT(match, phy.priority,
+ nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+ }
+
+ if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
+ u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
+
+ if (is_mask)
+ in_port = 0xffffffff; /* Always exact match in_port. */
+ else if (in_port >= DP_MAX_PORTS)
+ return -EINVAL;
+
+ SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
+ } else if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
+ }
+
+ if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
+ uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
+
+ SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
+ if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+ is_mask))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
+ }
+ return 0;
+}
+
+static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
+ const struct nlattr **a, bool is_mask)
+{
+ int err;
+ u64 orig_attrs = attrs;
+
+ err = metadata_from_nlattrs(match, &attrs, a, is_mask);
+ if (err)
+ return err;
+
+ if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
+ const struct ovs_key_ethernet *eth_key;
+
+ eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+ SW_FLOW_KEY_MEMCPY(match, eth.src,
+ eth_key->eth_src, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, eth.dst,
+ eth_key->eth_dst, ETH_ALEN, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
+ __be16 tci;
+
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+ if (!(tci & htons(VLAN_TAG_PRESENT))) {
+ if (is_mask)
+ OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
+ else
+ OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
+
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+ } else if (!is_mask)
+ SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+
+ if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
+ __be16 eth_type;
+
+ eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+ if (is_mask) {
+ /* Always exact match EtherType. */
+ eth_type = htons(0xffff);
+ } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+ OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
+ ntohs(eth_type), ETH_P_802_3_MIN);
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ } else if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ const struct ovs_key_ipv4 *ipv4_key;
+
+ ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
+ if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
+ OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
+ ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
+ return -EINVAL;
+ }
+ SW_FLOW_KEY_PUT(match, ip.proto,
+ ipv4_key->ipv4_proto, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.tos,
+ ipv4_key->ipv4_tos, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.ttl,
+ ipv4_key->ipv4_ttl, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.frag,
+ ipv4_key->ipv4_frag, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+ ipv4_key->ipv4_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+ ipv4_key->ipv4_dst, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
+ const struct ovs_key_ipv6 *ipv6_key;
+
+ ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
+ if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
+ OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
+ ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
+ return -EINVAL;
+ }
+ SW_FLOW_KEY_PUT(match, ipv6.label,
+ ipv6_key->ipv6_label, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.proto,
+ ipv6_key->ipv6_proto, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.tos,
+ ipv6_key->ipv6_tclass, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.ttl,
+ ipv6_key->ipv6_hlimit, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.frag,
+ ipv6_key->ipv6_frag, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
+ ipv6_key->ipv6_src,
+ sizeof(match->key->ipv6.addr.src),
+ is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
+ ipv6_key->ipv6_dst,
+ sizeof(match->key->ipv6.addr.dst),
+ is_mask);
+
+ attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
+ const struct ovs_key_arp *arp_key;
+
+ arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+ if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
+ OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
+ arp_key->arp_op);
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+ arp_key->arp_sip, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+ arp_key->arp_tip, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.proto,
+ ntohs(arp_key->arp_op), is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
+ arp_key->arp_sha, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
+ arp_key->arp_tha, ETH_ALEN, is_mask);
+
+ attrs &= ~(1 << OVS_KEY_ATTR_ARP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
+ const struct ovs_key_tcp *tcp_key;
+
+ tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ tcp_key->tcp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ tcp_key->tcp_dst, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ tcp_key->tcp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ tcp_key->tcp_dst, is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.flags,
+ nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
+ is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.flags,
+ nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
+ is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
+ const struct ovs_key_udp *udp_key;
+
+ udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ udp_key->udp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ udp_key->udp_dst, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ udp_key->udp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ udp_key->udp_dst, is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
+ const struct ovs_key_sctp *sctp_key;
+
+ sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ sctp_key->sctp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ sctp_key->sctp_dst, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ sctp_key->sctp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ sctp_key->sctp_dst, is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
+ const struct ovs_key_icmp *icmp_key;
+
+ icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ htons(icmp_key->icmp_type), is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ htons(icmp_key->icmp_code), is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
+ const struct ovs_key_icmpv6 *icmpv6_key;
+
+ icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ htons(icmpv6_key->icmpv6_type), is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ htons(icmpv6_key->icmpv6_code), is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ND)) {
+ const struct ovs_key_nd *nd_key;
+
+ nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
+ nd_key->nd_target,
+ sizeof(match->key->ipv6.nd.target),
+ is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
+ nd_key->nd_sll, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
+ nd_key->nd_tll, ETH_ALEN, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ND);
+ }
+
+ if (attrs != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void sw_flow_mask_set(struct sw_flow_mask *mask,
+ struct sw_flow_key_range *range, u8 val)
+{
+ u8 *m = (u8 *)&mask->key + range->start;
+
+ mask->range = *range;
+ memset(m, val, range_n_bytes(range));
+}
+
+/**
+ * ovs_nla_get_match - parses Netlink attributes into a flow key and
+ * mask. In case the 'mask' is NULL, the flow is treated as exact match
+ * flow. Otherwise, it is treated as a wildcarded flow, except the mask
+ * does not include any don't care bit.
+ * @match: receives the extracted flow match information.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence. The fields should of the packet that triggered the creation
+ * of this flow.
+ * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
+ * attribute specifies the mask field of the wildcarded flow.
+ */
+int ovs_nla_get_match(struct sw_flow_match *match,
+ const struct nlattr *key,
+ const struct nlattr *mask)
+{
+ const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+ const struct nlattr *encap;
+ u64 key_attrs = 0;
+ u64 mask_attrs = 0;
+ bool encap_valid = false;
+ int err;
+
+ err = parse_flow_nlattrs(key, a, &key_attrs);
+ if (err)
+ return err;
+
+ if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
+ (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
+ (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
+ __be16 tci;
+
+ if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
+ (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
+ OVS_NLERR("Invalid Vlan frame.\n");
+ return -EINVAL;
+ }
+
+ key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+ encap = a[OVS_KEY_ATTR_ENCAP];
+ key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+ encap_valid = true;
+
+ if (tci & htons(VLAN_TAG_PRESENT)) {
+ err = parse_flow_nlattrs(encap, a, &key_attrs);
+ if (err)
+ return err;
+ } else if (!tci) {
+ /* Corner case for truncated 802.1Q header. */
+ if (nla_len(encap)) {
+ OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
+ return -EINVAL;
+ }
+ } else {
+ OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
+ return -EINVAL;
+ }
+ }
+
+ err = ovs_key_from_nlattrs(match, key_attrs, a, false);
+ if (err)
+ return err;
+
+ if (mask) {
+ err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
+ if (err)
+ return err;
+
+ if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
+ __be16 eth_type = 0;
+ __be16 tci = 0;
+
+ if (!encap_valid) {
+ OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
+ return -EINVAL;
+ }
+
+ mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+ if (a[OVS_KEY_ATTR_ETHERTYPE])
+ eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+ if (eth_type == htons(0xffff)) {
+ mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ encap = a[OVS_KEY_ATTR_ENCAP];
+ err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
+ } else {
+ OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
+ ntohs(eth_type));
+ return -EINVAL;
+ }
+
+ if (a[OVS_KEY_ATTR_VLAN])
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+ if (!(tci & htons(VLAN_TAG_PRESENT))) {
+ OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
+ return -EINVAL;
+ }
+ }
+
+ err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
+ if (err)
+ return err;
+ } else {
+ /* Populate exact match flow's key mask. */
+ if (match->mask)
+ sw_flow_mask_set(match->mask, &match->range, 0xff);
+ }
+
+ if (!match_validate(match, key_attrs, mask_attrs))
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
+ * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
+ * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ *
+ * This parses a series of Netlink attributes that form a flow key, which must
+ * take the same form accepted by flow_from_nlattrs(), but only enough of it to
+ * get the metadata, that is, the parts of the flow key that cannot be
+ * extracted from the packet itself.
+ */
+
+int ovs_nla_get_flow_metadata(struct sw_flow *flow,
+ const struct nlattr *attr)
+{
+ struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
+ const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+ u64 attrs = 0;
+ int err;
+ struct sw_flow_match match;
+
+ flow->key.phy.in_port = DP_MAX_PORTS;
+ flow->key.phy.priority = 0;
+ flow->key.phy.skb_mark = 0;
+ memset(tun_key, 0, sizeof(flow->key.tun_key));
+
+ err = parse_flow_nlattrs(attr, a, &attrs);
+ if (err)
+ return -EINVAL;
+
+ memset(&match, 0, sizeof(match));
+ match.key = &flow->key;
+
+ err = metadata_from_nlattrs(&match, &attrs, a, false);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int ovs_nla_put_flow(const struct sw_flow_key *swkey,
+ const struct sw_flow_key *output, struct sk_buff *skb)
+{
+ struct ovs_key_ethernet *eth_key;
+ struct nlattr *nla, *encap;
+ bool is_mask = (swkey != output);
+
+ if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
+ goto nla_put_failure;
+
+ if ((swkey->tun_key.ipv4_dst || is_mask) &&
+ ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
+ goto nla_put_failure;
+
+ if (swkey->phy.in_port == DP_MAX_PORTS) {
+ if (is_mask && (output->phy.in_port == 0xffff))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
+ goto nla_put_failure;
+ } else {
+ u16 upper_u16;
+ upper_u16 = !is_mask ? 0 : 0xffff;
+
+ if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
+ (upper_u16 << 16) | output->phy.in_port))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
+ goto nla_put_failure;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+ if (!nla)
+ goto nla_put_failure;
+
+ eth_key = nla_data(nla);
+ memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
+ memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
+
+ if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
+ __be16 eth_type;
+ eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+ nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
+ goto nla_put_failure;
+ encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+ if (!swkey->eth.tci)
+ goto unencap;
+ } else
+ encap = NULL;
+
+ if (swkey->eth.type == htons(ETH_P_802_2)) {
+ /*
+ * Ethertype 802.2 is represented in the netlink with omitted
+ * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+ * 0xffff in the mask attribute. Ethertype can also
+ * be wildcarded.
+ */
+ if (is_mask && output->eth.type)
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+ output->eth.type))
+ goto nla_put_failure;
+ goto unencap;
+ }
+
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
+ goto nla_put_failure;
+
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ struct ovs_key_ipv4 *ipv4_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
+ if (!nla)
+ goto nla_put_failure;
+ ipv4_key = nla_data(nla);
+ ipv4_key->ipv4_src = output->ipv4.addr.src;
+ ipv4_key->ipv4_dst = output->ipv4.addr.dst;
+ ipv4_key->ipv4_proto = output->ip.proto;
+ ipv4_key->ipv4_tos = output->ip.tos;
+ ipv4_key->ipv4_ttl = output->ip.ttl;
+ ipv4_key->ipv4_frag = output->ip.frag;
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ struct ovs_key_ipv6 *ipv6_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
+ if (!nla)
+ goto nla_put_failure;
+ ipv6_key = nla_data(nla);
+ memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
+ sizeof(ipv6_key->ipv6_src));
+ memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
+ sizeof(ipv6_key->ipv6_dst));
+ ipv6_key->ipv6_label = output->ipv6.label;
+ ipv6_key->ipv6_proto = output->ip.proto;
+ ipv6_key->ipv6_tclass = output->ip.tos;
+ ipv6_key->ipv6_hlimit = output->ip.ttl;
+ ipv6_key->ipv6_frag = output->ip.frag;
+ } else if (swkey->eth.type == htons(ETH_P_ARP) ||
+ swkey->eth.type == htons(ETH_P_RARP)) {
+ struct ovs_key_arp *arp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
+ if (!nla)
+ goto nla_put_failure;
+ arp_key = nla_data(nla);
+ memset(arp_key, 0, sizeof(struct ovs_key_arp));
+ arp_key->arp_sip = output->ipv4.addr.src;
+ arp_key->arp_tip = output->ipv4.addr.dst;
+ arp_key->arp_op = htons(output->ip.proto);
+ memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
+ memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
+ }
+
+ if ((swkey->eth.type == htons(ETH_P_IP) ||
+ swkey->eth.type == htons(ETH_P_IPV6)) &&
+ swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+
+ if (swkey->ip.proto == IPPROTO_TCP) {
+ struct ovs_key_tcp *tcp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
+ if (!nla)
+ goto nla_put_failure;
+ tcp_key = nla_data(nla);
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ tcp_key->tcp_src = output->ipv4.tp.src;
+ tcp_key->tcp_dst = output->ipv4.tp.dst;
+ if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
+ output->ipv4.tp.flags))
+ goto nla_put_failure;
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ tcp_key->tcp_src = output->ipv6.tp.src;
+ tcp_key->tcp_dst = output->ipv6.tp.dst;
+ if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
+ output->ipv6.tp.flags))
+ goto nla_put_failure;
+ }
+ } else if (swkey->ip.proto == IPPROTO_UDP) {
+ struct ovs_key_udp *udp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
+ if (!nla)
+ goto nla_put_failure;
+ udp_key = nla_data(nla);
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ udp_key->udp_src = output->ipv4.tp.src;
+ udp_key->udp_dst = output->ipv4.tp.dst;
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ udp_key->udp_src = output->ipv6.tp.src;
+ udp_key->udp_dst = output->ipv6.tp.dst;
+ }
+ } else if (swkey->ip.proto == IPPROTO_SCTP) {
+ struct ovs_key_sctp *sctp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
+ if (!nla)
+ goto nla_put_failure;
+ sctp_key = nla_data(nla);
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ sctp_key->sctp_src = swkey->ipv4.tp.src;
+ sctp_key->sctp_dst = swkey->ipv4.tp.dst;
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ sctp_key->sctp_src = swkey->ipv6.tp.src;
+ sctp_key->sctp_dst = swkey->ipv6.tp.dst;
+ }
+ } else if (swkey->eth.type == htons(ETH_P_IP) &&
+ swkey->ip.proto == IPPROTO_ICMP) {
+ struct ovs_key_icmp *icmp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
+ if (!nla)
+ goto nla_put_failure;
+ icmp_key = nla_data(nla);
+ icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
+ icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
+ } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
+ swkey->ip.proto == IPPROTO_ICMPV6) {
+ struct ovs_key_icmpv6 *icmpv6_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
+ sizeof(*icmpv6_key));
+ if (!nla)
+ goto nla_put_failure;
+ icmpv6_key = nla_data(nla);
+ icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
+ icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
+
+ if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+ icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+ struct ovs_key_nd *nd_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
+ if (!nla)
+ goto nla_put_failure;
+ nd_key = nla_data(nla);
+ memcpy(nd_key->nd_target, &output->ipv6.nd.target,
+ sizeof(nd_key->nd_target));
+ memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
+ memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
+ }
+ }
+ }
+
+unencap:
+ if (encap)
+ nla_nest_end(skb, encap);
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+#define MAX_ACTIONS_BUFSIZE (32 * 1024)
+
+struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
+{
+ struct sw_flow_actions *sfa;
+
+ if (size > MAX_ACTIONS_BUFSIZE)
+ return ERR_PTR(-EINVAL);
+
+ sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
+ if (!sfa)
+ return ERR_PTR(-ENOMEM);
+
+ sfa->actions_len = 0;
+ return sfa;
+}
+
+/* RCU callback used by ovs_nla_free_flow_actions. */
+static void rcu_free_acts_callback(struct rcu_head *rcu)
+{
+ struct sw_flow_actions *sf_acts = container_of(rcu,
+ struct sw_flow_actions, rcu);
+ kfree(sf_acts);
+}
+
+/* Schedules 'sf_acts' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+ call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
+}
+
+static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
+ int attr_len)
+{
+
+ struct sw_flow_actions *acts;
+ int new_acts_size;
+ int req_size = NLA_ALIGN(attr_len);
+ int next_offset = offsetof(struct sw_flow_actions, actions) +
+ (*sfa)->actions_len;
+
+ if (req_size <= (ksize(*sfa) - next_offset))
+ goto out;
+
+ new_acts_size = ksize(*sfa) * 2;
+
+ if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
+ if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
+ return ERR_PTR(-EMSGSIZE);
+ new_acts_size = MAX_ACTIONS_BUFSIZE;
+ }
+
+ acts = ovs_nla_alloc_flow_actions(new_acts_size);
+ if (IS_ERR(acts))
+ return (void *)acts;
+
+ memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
+ acts->actions_len = (*sfa)->actions_len;
+ kfree(*sfa);
+ *sfa = acts;
+
+out:
+ (*sfa)->actions_len += req_size;
+ return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
+}
+
+static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
+{
+ struct nlattr *a;
+
+ a = reserve_sfa_size(sfa, nla_attr_size(len));
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+
+ a->nla_type = attrtype;
+ a->nla_len = nla_attr_size(len);
+
+ if (data)
+ memcpy(nla_data(a), data, len);
+ memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
+
+ return 0;
+}
+
+static inline int add_nested_action_start(struct sw_flow_actions **sfa,
+ int attrtype)
+{
+ int used = (*sfa)->actions_len;
+ int err;
+
+ err = add_action(sfa, attrtype, NULL, 0);
+ if (err)
+ return err;
+
+ return used;
+}
+
+static inline void add_nested_action_end(struct sw_flow_actions *sfa,
+ int st_offset)
+{
+ struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
+ st_offset);
+
+ a->nla_len = sfa->actions_len - st_offset;
+}
+
+static int validate_and_copy_sample(const struct nlattr *attr,
+ const struct sw_flow_key *key, int depth,
+ struct sw_flow_actions **sfa)
+{
+ const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
+ const struct nlattr *probability, *actions;
+ const struct nlattr *a;
+ int rem, start, err, st_acts;
+
+ memset(attrs, 0, sizeof(attrs));
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
+ return -EINVAL;
+ attrs[type] = a;
+ }
+ if (rem)
+ return -EINVAL;
+
+ probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
+ if (!probability || nla_len(probability) != sizeof(u32))
+ return -EINVAL;
+
+ actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
+ if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+ return -EINVAL;
+
+ /* validation done, copy sample action. */
+ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
+ if (start < 0)
+ return start;
+ err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
+ nla_data(probability), sizeof(u32));
+ if (err)
+ return err;
+ st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
+ if (st_acts < 0)
+ return st_acts;
+
+ err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
+ if (err)
+ return err;
+
+ add_nested_action_end(*sfa, st_acts);
+ add_nested_action_end(*sfa, start);
+
+ return 0;
+}
+
+static int validate_tp_port(const struct sw_flow_key *flow_key)
+{
+ if (flow_key->eth.type == htons(ETH_P_IP)) {
+ if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
+ return 0;
+ } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
+ if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+void ovs_match_init(struct sw_flow_match *match,
+ struct sw_flow_key *key,
+ struct sw_flow_mask *mask)
+{
+ memset(match, 0, sizeof(*match));
+ match->key = key;
+ match->mask = mask;
+
+ memset(key, 0, sizeof(*key));
+
+ if (mask) {
+ memset(&mask->key, 0, sizeof(mask->key));
+ mask->range.start = mask->range.end = 0;
+ }
+}
+
+static int validate_and_copy_set_tun(const struct nlattr *attr,
+ struct sw_flow_actions **sfa)
+{
+ struct sw_flow_match match;
+ struct sw_flow_key key;
+ int err, start;
+
+ ovs_match_init(&match, &key, NULL);
+ err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
+ if (err)
+ return err;
+
+ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
+ if (start < 0)
+ return start;
+
+ err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
+ sizeof(match.key->tun_key));
+ add_nested_action_end(*sfa, start);
+
+ return err;
+}
+
+static int validate_set(const struct nlattr *a,
+ const struct sw_flow_key *flow_key,
+ struct sw_flow_actions **sfa,
+ bool *set_tun)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+ int key_type = nla_type(ovs_key);
+
+ /* There can be only one key in a action */
+ if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
+ return -EINVAL;
+
+ if (key_type > OVS_KEY_ATTR_MAX ||
+ (ovs_key_lens[key_type] != nla_len(ovs_key) &&
+ ovs_key_lens[key_type] != -1))
+ return -EINVAL;
+
+ switch (key_type) {
+ const struct ovs_key_ipv4 *ipv4_key;
+ const struct ovs_key_ipv6 *ipv6_key;
+ int err;
+
+ case OVS_KEY_ATTR_PRIORITY:
+ case OVS_KEY_ATTR_SKB_MARK:
+ case OVS_KEY_ATTR_ETHERNET:
+ break;
+
+ case OVS_KEY_ATTR_TUNNEL:
+ *set_tun = true;
+ err = validate_and_copy_set_tun(a, sfa);
+ if (err)
+ return err;
+ break;
+
+ case OVS_KEY_ATTR_IPV4:
+ if (flow_key->eth.type != htons(ETH_P_IP))
+ return -EINVAL;
+
+ if (!flow_key->ip.proto)
+ return -EINVAL;
+
+ ipv4_key = nla_data(ovs_key);
+ if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+ return -EINVAL;
+
+ if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+ return -EINVAL;
+
+ break;
+
+ case OVS_KEY_ATTR_IPV6:
+ if (flow_key->eth.type != htons(ETH_P_IPV6))
+ return -EINVAL;
+
+ if (!flow_key->ip.proto)
+ return -EINVAL;
+
+ ipv6_key = nla_data(ovs_key);
+ if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+ return -EINVAL;
+
+ if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+ return -EINVAL;
+
+ if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
+ return -EINVAL;
+
+ break;
+
+ case OVS_KEY_ATTR_TCP:
+ if (flow_key->ip.proto != IPPROTO_TCP)
+ return -EINVAL;
+
+ return validate_tp_port(flow_key);
+
+ case OVS_KEY_ATTR_UDP:
+ if (flow_key->ip.proto != IPPROTO_UDP)
+ return -EINVAL;
+
+ return validate_tp_port(flow_key);
+
+ case OVS_KEY_ATTR_SCTP:
+ if (flow_key->ip.proto != IPPROTO_SCTP)
+ return -EINVAL;
+
+ return validate_tp_port(flow_key);
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int validate_userspace(const struct nlattr *attr)
+{
+ static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
+ [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
+ [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
+ };
+ struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
+ int error;
+
+ error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
+ attr, userspace_policy);
+ if (error)
+ return error;
+
+ if (!a[OVS_USERSPACE_ATTR_PID] ||
+ !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int copy_action(const struct nlattr *from,
+ struct sw_flow_actions **sfa)
+{
+ int totlen = NLA_ALIGN(from->nla_len);
+ struct nlattr *to;
+
+ to = reserve_sfa_size(sfa, from->nla_len);
+ if (IS_ERR(to))
+ return PTR_ERR(to);
+
+ memcpy(to, from, totlen);
+ return 0;
+}
+
+int ovs_nla_copy_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ int depth,
+ struct sw_flow_actions **sfa)
+{
+ const struct nlattr *a;
+ int rem, err;
+
+ if (depth >= SAMPLE_ACTION_DEPTH)
+ return -EOVERFLOW;
+
+ nla_for_each_nested(a, attr, rem) {
+ /* Expected argument lengths, (u32)-1 for variable length. */
+ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
+ [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+ [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+ [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
+ [OVS_ACTION_ATTR_POP_VLAN] = 0,
+ [OVS_ACTION_ATTR_SET] = (u32)-1,
+ [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
+ };
+ const struct ovs_action_push_vlan *vlan;
+ int type = nla_type(a);
+ bool skip_copy;
+
+ if (type > OVS_ACTION_ATTR_MAX ||
+ (action_lens[type] != nla_len(a) &&
+ action_lens[type] != (u32)-1))
+ return -EINVAL;
+
+ skip_copy = false;
+ switch (type) {
+ case OVS_ACTION_ATTR_UNSPEC:
+ return -EINVAL;
+
+ case OVS_ACTION_ATTR_USERSPACE:
+ err = validate_userspace(a);
+ if (err)
+ return err;
+ break;
+
+ case OVS_ACTION_ATTR_OUTPUT:
+ if (nla_get_u32(a) >= DP_MAX_PORTS)
+ return -EINVAL;
+ break;
+
+
+ case OVS_ACTION_ATTR_POP_VLAN:
+ break;
+
+ case OVS_ACTION_ATTR_PUSH_VLAN:
+ vlan = nla_data(a);
+ if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+ return -EINVAL;
+ if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
+ return -EINVAL;
+ break;
+
+ case OVS_ACTION_ATTR_SET:
+ err = validate_set(a, key, sfa, &skip_copy);
+ if (err)
+ return err;
+ break;
+
+ case OVS_ACTION_ATTR_SAMPLE:
+ err = validate_and_copy_sample(a, key, depth, sfa);
+ if (err)
+ return err;
+ skip_copy = true;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ if (!skip_copy) {
+ err = copy_action(a, sfa);
+ if (err)
+ return err;
+ }
+ }
+
+ if (rem > 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+{
+ const struct nlattr *a;
+ struct nlattr *start;
+ int err = 0, rem;
+
+ start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
+ if (!start)
+ return -EMSGSIZE;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ struct nlattr *st_sample;
+
+ switch (type) {
+ case OVS_SAMPLE_ATTR_PROBABILITY:
+ if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
+ sizeof(u32), nla_data(a)))
+ return -EMSGSIZE;
+ break;
+ case OVS_SAMPLE_ATTR_ACTIONS:
+ st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
+ if (!st_sample)
+ return -EMSGSIZE;
+ err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+ if (err)
+ return err;
+ nla_nest_end(skb, st_sample);
+ break;
+ }
+ }
+
+ nla_nest_end(skb, start);
+ return err;
+}
+
+static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+ int key_type = nla_type(ovs_key);
+ struct nlattr *start;
+ int err;
+
+ switch (key_type) {
+ case OVS_KEY_ATTR_IPV4_TUNNEL:
+ start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
+ if (!start)
+ return -EMSGSIZE;
+
+ err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
+ nla_data(ovs_key));
+ if (err)
+ return err;
+ nla_nest_end(skb, start);
+ break;
+ default:
+ if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
+ return -EMSGSIZE;
+ break;
+ }
+
+ return 0;
+}
+
+int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
+{
+ const struct nlattr *a;
+ int rem, err;
+
+ nla_for_each_attr(a, attr, len, rem) {
+ int type = nla_type(a);
+
+ switch (type) {
+ case OVS_ACTION_ATTR_SET:
+ err = set_action_to_attr(a, skb);
+ if (err)
+ return err;
+ break;
+
+ case OVS_ACTION_ATTR_SAMPLE:
+ err = sample_action_to_attr(a, skb);
+ if (err)
+ return err;
+ break;
+ default:
+ if (nla_put(skb, type, nla_len(a), nla_data(a)))
+ return -EMSGSIZE;
+ break;
+ }
+ }
+
+ return 0;
+}
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
new file mode 100644
index 000000000000..440151045d39
--- /dev/null
+++ b/net/openvswitch/flow_netlink.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+
+#ifndef FLOW_NETLINK_H
+#define FLOW_NETLINK_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+
+#include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+
+#include "flow.h"
+
+void ovs_match_init(struct sw_flow_match *match,
+ struct sw_flow_key *key, struct sw_flow_mask *mask);
+
+int ovs_nla_put_flow(const struct sw_flow_key *,
+ const struct sw_flow_key *, struct sk_buff *);
+int ovs_nla_get_flow_metadata(struct sw_flow *flow,
+ const struct nlattr *attr);
+int ovs_nla_get_match(struct sw_flow_match *match,
+ const struct nlattr *,
+ const struct nlattr *);
+
+int ovs_nla_copy_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key, int depth,
+ struct sw_flow_actions **sfa);
+int ovs_nla_put_actions(const struct nlattr *attr,
+ int len, struct sk_buff *skb);
+
+struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
+void ovs_nla_free_flow_actions(struct sw_flow_actions *);
+
+#endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
new file mode 100644
index 000000000000..e42542706087
--- /dev/null
+++ b/net/openvswitch/flow_table.c
@@ -0,0 +1,592 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/sctp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+#include "datapath.h"
+
+#define TBL_MIN_BUCKETS 1024
+#define REHASH_INTERVAL (10 * 60 * HZ)
+
+static struct kmem_cache *flow_cache;
+
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+ return range->end - range->start;
+}
+
+void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+ const struct sw_flow_mask *mask)
+{
+ const long *m = (long *)((u8 *)&mask->key + mask->range.start);
+ const long *s = (long *)((u8 *)src + mask->range.start);
+ long *d = (long *)((u8 *)dst + mask->range.start);
+ int i;
+
+ /* The memory outside of the 'mask->range' are not set since
+ * further operations on 'dst' only uses contents within
+ * 'mask->range'.
+ */
+ for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
+ *d++ = *s++ & *m++;
+}
+
+struct sw_flow *ovs_flow_alloc(void)
+{
+ struct sw_flow *flow;
+
+ flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&flow->lock);
+ flow->sf_acts = NULL;
+ flow->mask = NULL;
+
+ return flow;
+}
+
+int ovs_flow_tbl_count(struct flow_table *table)
+{
+ return table->count;
+}
+
+static struct flex_array *alloc_buckets(unsigned int n_buckets)
+{
+ struct flex_array *buckets;
+ int i, err;
+
+ buckets = flex_array_alloc(sizeof(struct hlist_head),
+ n_buckets, GFP_KERNEL);
+ if (!buckets)
+ return NULL;
+
+ err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
+ if (err) {
+ flex_array_free(buckets);
+ return NULL;
+ }
+
+ for (i = 0; i < n_buckets; i++)
+ INIT_HLIST_HEAD((struct hlist_head *)
+ flex_array_get(buckets, i));
+
+ return buckets;
+}
+
+static void flow_free(struct sw_flow *flow)
+{
+ kfree((struct sf_flow_acts __force *)flow->sf_acts);
+ kmem_cache_free(flow_cache, flow);
+}
+
+static void rcu_free_flow_callback(struct rcu_head *rcu)
+{
+ struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
+
+ flow_free(flow);
+}
+
+static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu)
+{
+ struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu);
+
+ kfree(mask);
+}
+
+static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
+{
+ if (!mask)
+ return;
+
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count) {
+ list_del_rcu(&mask->list);
+ if (deferred)
+ call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
+ else
+ kfree(mask);
+ }
+}
+
+void ovs_flow_free(struct sw_flow *flow, bool deferred)
+{
+ if (!flow)
+ return;
+
+ flow_mask_del_ref(flow->mask, deferred);
+
+ if (deferred)
+ call_rcu(&flow->rcu, rcu_free_flow_callback);
+ else
+ flow_free(flow);
+}
+
+static void free_buckets(struct flex_array *buckets)
+{
+ flex_array_free(buckets);
+}
+
+static void __table_instance_destroy(struct table_instance *ti)
+{
+ int i;
+
+ if (ti->keep_flows)
+ goto skip_flows;
+
+ for (i = 0; i < ti->n_buckets; i++) {
+ struct sw_flow *flow;
+ struct hlist_head *head = flex_array_get(ti->buckets, i);
+ struct hlist_node *n;
+ int ver = ti->node_ver;
+
+ hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
+ hlist_del(&flow->hash_node[ver]);
+ ovs_flow_free(flow, false);
+ }
+ }
+
+skip_flows:
+ free_buckets(ti->buckets);
+ kfree(ti);
+}
+
+static struct table_instance *table_instance_alloc(int new_size)
+{
+ struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+
+ if (!ti)
+ return NULL;
+
+ ti->buckets = alloc_buckets(new_size);
+
+ if (!ti->buckets) {
+ kfree(ti);
+ return NULL;
+ }
+ ti->n_buckets = new_size;
+ ti->node_ver = 0;
+ ti->keep_flows = false;
+ get_random_bytes(&ti->hash_seed, sizeof(u32));
+
+ return ti;
+}
+
+int ovs_flow_tbl_init(struct flow_table *table)
+{
+ struct table_instance *ti;
+
+ ti = table_instance_alloc(TBL_MIN_BUCKETS);
+
+ if (!ti)
+ return -ENOMEM;
+
+ rcu_assign_pointer(table->ti, ti);
+ INIT_LIST_HEAD(&table->mask_list);
+ table->last_rehash = jiffies;
+ table->count = 0;
+ return 0;
+}
+
+static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
+{
+ struct table_instance *ti = container_of(rcu, struct table_instance, rcu);
+
+ __table_instance_destroy(ti);
+}
+
+static void table_instance_destroy(struct table_instance *ti, bool deferred)
+{
+ if (!ti)
+ return;
+
+ if (deferred)
+ call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+ else
+ __table_instance_destroy(ti);
+}
+
+void ovs_flow_tbl_destroy(struct flow_table *table)
+{
+ struct table_instance *ti = ovsl_dereference(table->ti);
+
+ table_instance_destroy(ti, false);
+}
+
+struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
+ u32 *bucket, u32 *last)
+{
+ struct sw_flow *flow;
+ struct hlist_head *head;
+ int ver;
+ int i;
+
+ ver = ti->node_ver;
+ while (*bucket < ti->n_buckets) {
+ i = 0;
+ head = flex_array_get(ti->buckets, *bucket);
+ hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+ if (i < *last) {
+ i++;
+ continue;
+ }
+ *last = i + 1;
+ return flow;
+ }
+ (*bucket)++;
+ *last = 0;
+ }
+
+ return NULL;
+}
+
+static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
+{
+ hash = jhash_1word(hash, ti->hash_seed);
+ return flex_array_get(ti->buckets,
+ (hash & (ti->n_buckets - 1)));
+}
+
+static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+{
+ struct hlist_head *head;
+
+ head = find_bucket(ti, flow->hash);
+ hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+}
+
+static void flow_table_copy_flows(struct table_instance *old,
+ struct table_instance *new)
+{
+ int old_ver;
+ int i;
+
+ old_ver = old->node_ver;
+ new->node_ver = !old_ver;
+
+ /* Insert in new table. */
+ for (i = 0; i < old->n_buckets; i++) {
+ struct sw_flow *flow;
+ struct hlist_head *head;
+
+ head = flex_array_get(old->buckets, i);
+
+ hlist_for_each_entry(flow, head, hash_node[old_ver])
+ table_instance_insert(new, flow);
+ }
+
+ old->keep_flows = true;
+}
+
+static struct table_instance *table_instance_rehash(struct table_instance *ti,
+ int n_buckets)
+{
+ struct table_instance *new_ti;
+
+ new_ti = table_instance_alloc(n_buckets);
+ if (!new_ti)
+ return NULL;
+
+ flow_table_copy_flows(ti, new_ti);
+
+ return new_ti;
+}
+
+int ovs_flow_tbl_flush(struct flow_table *flow_table)
+{
+ struct table_instance *old_ti;
+ struct table_instance *new_ti;
+
+ old_ti = ovsl_dereference(flow_table->ti);
+ new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ if (!new_ti)
+ return -ENOMEM;
+
+ rcu_assign_pointer(flow_table->ti, new_ti);
+ flow_table->last_rehash = jiffies;
+ flow_table->count = 0;
+
+ table_instance_destroy(old_ti, true);
+ return 0;
+}
+
+static u32 flow_hash(const struct sw_flow_key *key, int key_start,
+ int key_end)
+{
+ u32 *hash_key = (u32 *)((u8 *)key + key_start);
+ int hash_u32s = (key_end - key_start) >> 2;
+
+ /* Make sure number of hash bytes are multiple of u32. */
+ BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+
+ return jhash2(hash_key, hash_u32s, 0);
+}
+
+static int flow_key_start(const struct sw_flow_key *key)
+{
+ if (key->tun_key.ipv4_dst)
+ return 0;
+ else
+ return rounddown(offsetof(struct sw_flow_key, phy),
+ sizeof(long));
+}
+
+static bool cmp_key(const struct sw_flow_key *key1,
+ const struct sw_flow_key *key2,
+ int key_start, int key_end)
+{
+ const long *cp1 = (long *)((u8 *)key1 + key_start);
+ const long *cp2 = (long *)((u8 *)key2 + key_start);
+ long diffs = 0;
+ int i;
+
+ for (i = key_start; i < key_end; i += sizeof(long))
+ diffs |= *cp1++ ^ *cp2++;
+
+ return diffs == 0;
+}
+
+static bool flow_cmp_masked_key(const struct sw_flow *flow,
+ const struct sw_flow_key *key,
+ int key_start, int key_end)
+{
+ return cmp_key(&flow->key, key, key_start, key_end);
+}
+
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+ struct sw_flow_match *match)
+{
+ struct sw_flow_key *key = match->key;
+ int key_start = flow_key_start(key);
+ int key_end = match->range.end;
+
+ return cmp_key(&flow->unmasked_key, key, key_start, key_end);
+}
+
+static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
+ const struct sw_flow_key *unmasked,
+ struct sw_flow_mask *mask)
+{
+ struct sw_flow *flow;
+ struct hlist_head *head;
+ int key_start = mask->range.start;
+ int key_end = mask->range.end;
+ u32 hash;
+ struct sw_flow_key masked_key;
+
+ ovs_flow_mask_key(&masked_key, unmasked, mask);
+ hash = flow_hash(&masked_key, key_start, key_end);
+ head = find_bucket(ti, hash);
+ hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
+ if (flow->mask == mask && flow->hash == hash &&
+ flow_cmp_masked_key(flow, &masked_key,
+ key_start, key_end))
+ return flow;
+ }
+ return NULL;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
+ const struct sw_flow_key *key,
+ u32 *n_mask_hit)
+{
+ struct table_instance *ti = rcu_dereference(tbl->ti);
+ struct sw_flow_mask *mask;
+ struct sw_flow *flow;
+
+ *n_mask_hit = 0;
+ list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
+ (*n_mask_hit)++;
+ flow = masked_flow_lookup(ti, key, mask);
+ if (flow) /* Found */
+ return flow;
+ }
+ return NULL;
+}
+
+int ovs_flow_tbl_num_masks(const struct flow_table *table)
+{
+ struct sw_flow_mask *mask;
+ int num = 0;
+
+ list_for_each_entry(mask, &table->mask_list, list)
+ num++;
+
+ return num;
+}
+
+static struct table_instance *table_instance_expand(struct table_instance *ti)
+{
+ return table_instance_rehash(ti, ti->n_buckets * 2);
+}
+
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+{
+ struct table_instance *ti = ovsl_dereference(table->ti);
+
+ BUG_ON(table->count == 0);
+ hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+ table->count--;
+}
+
+static struct sw_flow_mask *mask_alloc(void)
+{
+ struct sw_flow_mask *mask;
+
+ mask = kmalloc(sizeof(*mask), GFP_KERNEL);
+ if (mask)
+ mask->ref_count = 0;
+
+ return mask;
+}
+
+static void mask_add_ref(struct sw_flow_mask *mask)
+{
+ mask->ref_count++;
+}
+
+static bool mask_equal(const struct sw_flow_mask *a,
+ const struct sw_flow_mask *b)
+{
+ u8 *a_ = (u8 *)&a->key + a->range.start;
+ u8 *b_ = (u8 *)&b->key + b->range.start;
+
+ return (a->range.end == b->range.end)
+ && (a->range.start == b->range.start)
+ && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
+}
+
+static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
+ const struct sw_flow_mask *mask)
+{
+ struct list_head *ml;
+
+ list_for_each(ml, &tbl->mask_list) {
+ struct sw_flow_mask *m;
+ m = container_of(ml, struct sw_flow_mask, list);
+ if (mask_equal(mask, m))
+ return m;
+ }
+
+ return NULL;
+}
+
+/**
+ * add a new mask into the mask list.
+ * The caller needs to make sure that 'mask' is not the same
+ * as any masks that are already on the list.
+ */
+static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
+ struct sw_flow_mask *new)
+{
+ struct sw_flow_mask *mask;
+ mask = flow_mask_find(tbl, new);
+ if (!mask) {
+ /* Allocate a new mask if none exsits. */
+ mask = mask_alloc();
+ if (!mask)
+ return -ENOMEM;
+ mask->key = new->key;
+ mask->range = new->range;
+ list_add_rcu(&mask->list, &tbl->mask_list);
+ }
+
+ mask_add_ref(mask);
+ flow->mask = mask;
+ return 0;
+}
+
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+ struct sw_flow_mask *mask)
+{
+ struct table_instance *new_ti = NULL;
+ struct table_instance *ti;
+ int err;
+
+ err = flow_mask_insert(table, flow, mask);
+ if (err)
+ return err;
+
+ flow->hash = flow_hash(&flow->key, flow->mask->range.start,
+ flow->mask->range.end);
+ ti = ovsl_dereference(table->ti);
+ table_instance_insert(ti, flow);
+ table->count++;
+
+ /* Expand table, if necessary, to make room. */
+ if (table->count > ti->n_buckets)
+ new_ti = table_instance_expand(ti);
+ else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
+ new_ti = table_instance_rehash(ti, ti->n_buckets);
+
+ if (new_ti) {
+ rcu_assign_pointer(table->ti, new_ti);
+ table_instance_destroy(ti, true);
+ table->last_rehash = jiffies;
+ }
+ return 0;
+}
+
+/* Initializes the flow module.
+ * Returns zero if successful or a negative error code. */
+int ovs_flow_init(void)
+{
+ BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
+ BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
+
+ flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
+ 0, NULL);
+ if (flow_cache == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Uninitializes the flow module. */
+void ovs_flow_exit(void)
+{
+ kmem_cache_destroy(flow_cache);
+}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
new file mode 100644
index 000000000000..fbe45d5ad07d
--- /dev/null
+++ b/net/openvswitch/flow_table.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef FLOW_TABLE_H
+#define FLOW_TABLE_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+
+#include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+
+#include "flow.h"
+
+struct table_instance {
+ struct flex_array *buckets;
+ unsigned int n_buckets;
+ struct rcu_head rcu;
+ int node_ver;
+ u32 hash_seed;
+ bool keep_flows;
+};
+
+struct flow_table {
+ struct table_instance __rcu *ti;
+ struct list_head mask_list;
+ unsigned long last_rehash;
+ unsigned int count;
+};
+
+int ovs_flow_init(void);
+void ovs_flow_exit(void);
+
+struct sw_flow *ovs_flow_alloc(void);
+void ovs_flow_free(struct sw_flow *, bool deferred);
+
+int ovs_flow_tbl_init(struct flow_table *);
+int ovs_flow_tbl_count(struct flow_table *table);
+void ovs_flow_tbl_destroy(struct flow_table *table);
+int ovs_flow_tbl_flush(struct flow_table *flow_table);
+
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+ struct sw_flow_mask *mask);
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
+int ovs_flow_tbl_num_masks(const struct flow_table *table);
+struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
+ u32 *bucket, u32 *idx);
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
+ const struct sw_flow_key *,
+ u32 *n_mask_hit);
+
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+ struct sw_flow_match *match);
+
+void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+ const struct sw_flow_mask *mask);
+#endif /* flow_table.h */
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index c99dea543d64..a3d6951602db 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -24,8 +24,6 @@
#include <linux/if_tunnel.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
-#include <linux/if_vlan.h>
-#include <linux/in.h>
#include <linux/in_route.h>
#include <linux/inetdevice.h>
#include <linux/jhash.h>
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 98d3edbbc235..729c68763fe7 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -134,7 +134,7 @@ static void do_setup(struct net_device *netdev)
netdev->tx_queue_len = 0;
netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
+ NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
netdev->vlan_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 09d93c13cfd6..d21f77d875ba 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -150,15 +150,25 @@ static void free_port_rcu(struct rcu_head *rcu)
ovs_vport_free(vport_from_priv(netdev_vport));
}
-static void netdev_destroy(struct vport *vport)
+void ovs_netdev_detach_dev(struct vport *vport)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
- rtnl_lock();
+ ASSERT_RTNL();
netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(netdev_vport->dev);
- netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
+ netdev_upper_dev_unlink(netdev_vport->dev,
+ netdev_master_upper_dev_get(netdev_vport->dev));
dev_set_promiscuity(netdev_vport->dev, -1);
+}
+
+static void netdev_destroy(struct vport *vport)
+{
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+ rtnl_lock();
+ if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)
+ ovs_netdev_detach_dev(vport);
rtnl_unlock();
call_rcu(&netdev_vport->rcu, free_port_rcu);
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index dd298b5c5cdb..8df01c1127e5 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -39,5 +39,6 @@ netdev_vport_priv(const struct vport *vport)
}
const char *ovs_netdev_get_name(const struct vport *);
+void ovs_netdev_detach_dev(struct vport *);
#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 56e22b74cf96..e797a50ac2be 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -29,7 +29,6 @@
#include <net/ip.h>
#include <net/udp.h>
#include <net/ip_tunnels.h>
-#include <net/udp.h>
#include <net/rtnetlink.h>
#include <net/route.h>
#include <net/dsfield.h>
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 642ad42c416b..378c3a6acf84 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -51,10 +51,16 @@ static struct kmem_cache *rds_conn_slab;
static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
{
+ static u32 rds_hash_secret __read_mostly;
+
+ unsigned long hash;
+
+ net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret));
+
/* Pass NULL, don't need struct net for hash */
- unsigned long hash = inet_ehashfn(NULL,
- be32_to_cpu(laddr), 0,
- be32_to_cpu(faddr), 0);
+ hash = __inet_ehashfn(be32_to_cpu(laddr), 0,
+ be32_to_cpu(faddr), 0,
+ rds_hash_secret);
return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
}
diff --git a/net/rds/rds.h b/net/rds/rds.h
index ec1d731ecff0..48f8ffc60f8f 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -749,7 +749,7 @@ void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
struct cmsghdr *cmsg);
-extern void __rds_put_mr_final(struct rds_mr *mr);
+void __rds_put_mr_final(struct rds_mr *mr);
static inline void rds_mr_put(struct rds_mr *mr)
{
if (atomic_dec_and_test(&mr->r_refcount))
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index a693aca2ae2e..5f43675ee1df 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -426,17 +426,16 @@ extern struct workqueue_struct *rxrpc_workqueue;
/*
* ar-accept.c
*/
-extern void rxrpc_accept_incoming_calls(struct work_struct *);
-extern struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *,
- unsigned long);
-extern int rxrpc_reject_call(struct rxrpc_sock *);
+void rxrpc_accept_incoming_calls(struct work_struct *);
+struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long);
+int rxrpc_reject_call(struct rxrpc_sock *);
/*
* ar-ack.c
*/
-extern void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
-extern void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
-extern void rxrpc_process_call(struct work_struct *);
+void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
+void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
+void rxrpc_process_call(struct work_struct *);
/*
* ar-call.c
@@ -445,19 +444,18 @@ extern struct kmem_cache *rxrpc_call_jar;
extern struct list_head rxrpc_calls;
extern rwlock_t rxrpc_call_lock;
-extern struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
- struct rxrpc_transport *,
- struct rxrpc_conn_bundle *,
- unsigned long, int, gfp_t);
-extern struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
- struct rxrpc_connection *,
- struct rxrpc_header *, gfp_t);
-extern struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *,
- unsigned long);
-extern void rxrpc_release_call(struct rxrpc_call *);
-extern void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
-extern void __rxrpc_put_call(struct rxrpc_call *);
-extern void __exit rxrpc_destroy_all_calls(void);
+struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
+ struct rxrpc_transport *,
+ struct rxrpc_conn_bundle *,
+ unsigned long, int, gfp_t);
+struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
+ struct rxrpc_connection *,
+ struct rxrpc_header *, gfp_t);
+struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long);
+void rxrpc_release_call(struct rxrpc_call *);
+void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
+void __rxrpc_put_call(struct rxrpc_call *);
+void __exit rxrpc_destroy_all_calls(void);
/*
* ar-connection.c
@@ -465,19 +463,16 @@ extern void __exit rxrpc_destroy_all_calls(void);
extern struct list_head rxrpc_connections;
extern rwlock_t rxrpc_connection_lock;
-extern struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
- struct rxrpc_transport *,
- struct key *,
- __be16, gfp_t);
-extern void rxrpc_put_bundle(struct rxrpc_transport *,
- struct rxrpc_conn_bundle *);
-extern int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
- struct rxrpc_conn_bundle *, struct rxrpc_call *,
- gfp_t);
-extern void rxrpc_put_connection(struct rxrpc_connection *);
-extern void __exit rxrpc_destroy_all_connections(void);
-extern struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
- struct rxrpc_header *);
+struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
+ struct rxrpc_transport *,
+ struct key *, __be16, gfp_t);
+void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *);
+int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
+ struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t);
+void rxrpc_put_connection(struct rxrpc_connection *);
+void __exit rxrpc_destroy_all_connections(void);
+struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
+ struct rxrpc_header *);
extern struct rxrpc_connection *
rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *,
gfp_t);
@@ -485,15 +480,15 @@ rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *,
/*
* ar-connevent.c
*/
-extern void rxrpc_process_connection(struct work_struct *);
-extern void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
-extern void rxrpc_reject_packets(struct work_struct *);
+void rxrpc_process_connection(struct work_struct *);
+void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
+void rxrpc_reject_packets(struct work_struct *);
/*
* ar-error.c
*/
-extern void rxrpc_UDP_error_report(struct sock *);
-extern void rxrpc_UDP_error_handler(struct work_struct *);
+void rxrpc_UDP_error_report(struct sock *);
+void rxrpc_UDP_error_handler(struct work_struct *);
/*
* ar-input.c
@@ -501,18 +496,17 @@ extern void rxrpc_UDP_error_handler(struct work_struct *);
extern unsigned long rxrpc_ack_timeout;
extern const char *rxrpc_pkts[];
-extern void rxrpc_data_ready(struct sock *, int);
-extern int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool,
- bool);
-extern void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
+void rxrpc_data_ready(struct sock *, int);
+int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool);
+void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
/*
* ar-local.c
*/
extern rwlock_t rxrpc_local_lock;
-extern struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
-extern void rxrpc_put_local(struct rxrpc_local *);
-extern void __exit rxrpc_destroy_all_locals(void);
+struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
+void rxrpc_put_local(struct rxrpc_local *);
+void __exit rxrpc_destroy_all_locals(void);
/*
* ar-key.c
@@ -520,31 +514,29 @@ extern void __exit rxrpc_destroy_all_locals(void);
extern struct key_type key_type_rxrpc;
extern struct key_type key_type_rxrpc_s;
-extern int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
-extern int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
-extern int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *,
- time_t, u32);
+int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
+int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
+int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
+ u32);
/*
* ar-output.c
*/
extern int rxrpc_resend_timeout;
-extern int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
-extern int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
- struct rxrpc_transport *, struct msghdr *,
- size_t);
-extern int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *,
- struct msghdr *, size_t);
+int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
+int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
+ struct rxrpc_transport *, struct msghdr *, size_t);
+int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, struct msghdr *,
+ size_t);
/*
* ar-peer.c
*/
-extern struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t);
-extern void rxrpc_put_peer(struct rxrpc_peer *);
-extern struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *,
- __be32, __be16);
-extern void __exit rxrpc_destroy_all_peers(void);
+struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t);
+void rxrpc_put_peer(struct rxrpc_peer *);
+struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *, __be32, __be16);
+void __exit rxrpc_destroy_all_peers(void);
/*
* ar-proc.c
@@ -556,38 +548,36 @@ extern const struct file_operations rxrpc_connection_seq_fops;
/*
* ar-recvmsg.c
*/
-extern void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
-extern int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *,
- size_t, int);
+void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
+int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t,
+ int);
/*
* ar-security.c
*/
-extern int rxrpc_register_security(struct rxrpc_security *);
-extern void rxrpc_unregister_security(struct rxrpc_security *);
-extern int rxrpc_init_client_conn_security(struct rxrpc_connection *);
-extern int rxrpc_init_server_conn_security(struct rxrpc_connection *);
-extern int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *,
- size_t, void *);
-extern int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *,
- u32 *);
-extern void rxrpc_clear_conn_security(struct rxrpc_connection *);
+int rxrpc_register_security(struct rxrpc_security *);
+void rxrpc_unregister_security(struct rxrpc_security *);
+int rxrpc_init_client_conn_security(struct rxrpc_connection *);
+int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *, size_t,
+ void *);
+int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *, u32 *);
+void rxrpc_clear_conn_security(struct rxrpc_connection *);
/*
* ar-skbuff.c
*/
-extern void rxrpc_packet_destructor(struct sk_buff *);
+void rxrpc_packet_destructor(struct sk_buff *);
/*
* ar-transport.c
*/
-extern struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
- struct rxrpc_peer *,
- gfp_t);
-extern void rxrpc_put_transport(struct rxrpc_transport *);
-extern void __exit rxrpc_destroy_all_transports(void);
-extern struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
- struct rxrpc_peer *);
+struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
+ struct rxrpc_peer *, gfp_t);
+void rxrpc_put_transport(struct rxrpc_transport *);
+void __exit rxrpc_destroy_all_transports(void);
+struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
+ struct rxrpc_peer *);
/*
* debug tracing
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c03a32a0418e..ad1f1d819203 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -443,6 +443,16 @@ config NET_CLS_CGROUP
To compile this code as a module, choose M here: the
module will be called cls_cgroup.
+config NET_CLS_BPF
+ tristate "BPF-based classifier"
+ select NET_CLS
+ ---help---
+ If you say Y here, you will be able to classify packets based on
+ programmable BPF (JIT'ed) filters as an alternative to ematches.
+
+ To compile this code as a module, choose M here: the module will
+ be called cls_bpf.
+
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e5f9abe9a5db..35fa47a494ab 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
+obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
new file mode 100644
index 000000000000..1002a8226281
--- /dev/null
+++ b/net/sched/cls_bpf.c
@@ -0,0 +1,385 @@
+/*
+ * Berkeley Packet Filter based traffic classifier
+ *
+ * Might be used to classify traffic through flexible, user-defined and
+ * possibly JIT-ed BPF filters for traffic control as an alternative to
+ * ematches.
+ *
+ * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/filter.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_DESCRIPTION("TC BPF based classifier");
+
+struct cls_bpf_head {
+ struct list_head plist;
+ u32 hgen;
+};
+
+struct cls_bpf_prog {
+ struct sk_filter *filter;
+ struct sock_filter *bpf_ops;
+ struct tcf_exts exts;
+ struct tcf_result res;
+ struct list_head link;
+ u32 handle;
+ u16 bpf_len;
+};
+
+static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
+ [TCA_BPF_CLASSID] = { .type = NLA_U32 },
+ [TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
+ [TCA_BPF_OPS] = { .type = NLA_BINARY,
+ .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
+};
+
+static const struct tcf_ext_map bpf_ext_map = {
+ .action = TCA_BPF_ACT,
+ .police = TCA_BPF_POLICE,
+};
+
+static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog;
+ int ret;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ int filter_res = SK_RUN_FILTER(prog->filter, skb);
+
+ if (filter_res == 0)
+ continue;
+
+ *res = prog->res;
+ if (filter_res != -1)
+ res->classid = filter_res;
+
+ ret = tcf_exts_exec(skb, &prog->exts, res);
+ if (ret < 0)
+ continue;
+
+ return ret;
+ }
+
+ return -1;
+}
+
+static int cls_bpf_init(struct tcf_proto *tp)
+{
+ struct cls_bpf_head *head;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+
+ INIT_LIST_HEAD(&head->plist);
+ tp->root = head;
+
+ return 0;
+}
+
+static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
+{
+ tcf_unbind_filter(tp, &prog->res);
+ tcf_exts_destroy(tp, &prog->exts);
+
+ sk_unattached_filter_destroy(prog->filter);
+
+ kfree(prog->bpf_ops);
+ kfree(prog);
+}
+
+static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ if (prog == todel) {
+ tcf_tree_lock(tp);
+ list_del(&prog->link);
+ tcf_tree_unlock(tp);
+
+ cls_bpf_delete_prog(tp, prog);
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static void cls_bpf_destroy(struct tcf_proto *tp)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog, *tmp;
+
+ list_for_each_entry_safe(prog, tmp, &head->plist, link) {
+ list_del(&prog->link);
+ cls_bpf_delete_prog(tp, prog);
+ }
+
+ kfree(head);
+}
+
+static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog;
+ unsigned long ret = 0UL;
+
+ if (head == NULL)
+ return 0UL;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ if (prog->handle == handle) {
+ ret = (unsigned long) prog;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void cls_bpf_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
+ struct cls_bpf_prog *prog,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est)
+{
+ struct sock_filter *bpf_ops, *bpf_old;
+ struct tcf_exts exts;
+ struct sock_fprog tmp;
+ struct sk_filter *fp, *fp_old;
+ u16 bpf_size, bpf_len;
+ u32 classid;
+ int ret;
+
+ if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
+ return -EINVAL;
+
+ ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map);
+ if (ret < 0)
+ return ret;
+
+ classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
+ if (bpf_len > BPF_MAXINSNS || bpf_len == 0) {
+ ret = -EINVAL;
+ goto errout;
+ }
+
+ bpf_size = bpf_len * sizeof(*bpf_ops);
+ bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+ if (bpf_ops == NULL) {
+ ret = -ENOMEM;
+ goto errout;
+ }
+
+ memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
+
+ tmp.len = bpf_len;
+ tmp.filter = (struct sock_filter __user *) bpf_ops;
+
+ ret = sk_unattached_filter_create(&fp, &tmp);
+ if (ret)
+ goto errout_free;
+
+ tcf_tree_lock(tp);
+ fp_old = prog->filter;
+ bpf_old = prog->bpf_ops;
+
+ prog->bpf_len = bpf_len;
+ prog->bpf_ops = bpf_ops;
+ prog->filter = fp;
+ prog->res.classid = classid;
+ tcf_tree_unlock(tp);
+
+ tcf_bind_filter(tp, &prog->res, base);
+ tcf_exts_change(tp, &prog->exts, &exts);
+
+ if (fp_old)
+ sk_unattached_filter_destroy(fp_old);
+ if (bpf_old)
+ kfree(bpf_old);
+
+ return 0;
+
+errout_free:
+ kfree(bpf_ops);
+errout:
+ tcf_exts_destroy(tp, &exts);
+ return ret;
+}
+
+static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
+ struct cls_bpf_head *head)
+{
+ unsigned int i = 0x80000000;
+
+ do {
+ if (++head->hgen == 0x7FFFFFFF)
+ head->hgen = 1;
+ } while (--i > 0 && cls_bpf_get(tp, head->hgen));
+ if (i == 0)
+ pr_err("Insufficient number of handles\n");
+
+ return i;
+}
+
+static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+ unsigned long *arg)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
+ struct nlattr *tb[TCA_BPF_MAX + 1];
+ int ret;
+
+ if (tca[TCA_OPTIONS] == NULL)
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
+ if (ret < 0)
+ return ret;
+
+ if (prog != NULL) {
+ if (handle && prog->handle != handle)
+ return -EINVAL;
+ return cls_bpf_modify_existing(net, tp, prog, base, tb,
+ tca[TCA_RATE]);
+ }
+
+ prog = kzalloc(sizeof(*prog), GFP_KERNEL);
+ if (prog == NULL)
+ return -ENOBUFS;
+
+ if (handle == 0)
+ prog->handle = cls_bpf_grab_new_handle(tp, head);
+ else
+ prog->handle = handle;
+ if (prog->handle == 0) {
+ ret = -EINVAL;
+ goto errout;
+ }
+
+ ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]);
+ if (ret < 0)
+ goto errout;
+
+ tcf_tree_lock(tp);
+ list_add(&prog->link, &head->plist);
+ tcf_tree_unlock(tp);
+
+ *arg = (unsigned long) prog;
+
+ return 0;
+errout:
+ if (*arg == 0UL && prog)
+ kfree(prog);
+
+ return ret;
+}
+
+static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *tm)
+{
+ struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
+ struct nlattr *nest, *nla;
+
+ if (prog == NULL)
+ return skb->len;
+
+ tm->tcm_handle = prog->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len))
+ goto nla_put_failure;
+
+ nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len *
+ sizeof(struct sock_filter));
+ if (nla == NULL)
+ goto nla_put_failure;
+
+ memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+ if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0)
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct cls_bpf_head *head = tp->root;
+ struct cls_bpf_prog *prog;
+
+ list_for_each_entry(prog, &head->plist, link) {
+ if (arg->count < arg->skip)
+ goto skip;
+ if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+skip:
+ arg->count++;
+ }
+}
+
+static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
+ .kind = "bpf",
+ .owner = THIS_MODULE,
+ .classify = cls_bpf_classify,
+ .init = cls_bpf_init,
+ .destroy = cls_bpf_destroy,
+ .get = cls_bpf_get,
+ .put = cls_bpf_put,
+ .change = cls_bpf_change,
+ .delete = cls_bpf_delete,
+ .walk = cls_bpf_walk,
+ .dump = cls_bpf_dump,
+};
+
+static int __init cls_bpf_init_mod(void)
+{
+ return register_tcf_proto_ops(&cls_bpf_ops);
+}
+
+static void __exit cls_bpf_exit_mod(void)
+{
+ unregister_tcf_proto_ops(&cls_bpf_ops);
+}
+
+module_init(cls_bpf_init_mod);
+module_exit(cls_bpf_exit_mod);
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 1ac41d3de5c3..527aeb7a3ff0 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -24,7 +24,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
{
struct xt_set_info *set = data;
ip_set_id_t index;
- struct net *net = qdisc_dev(tp->q)->nd_net;
+ struct net *net = dev_net(qdisc_dev(tp->q));
if (data_len != sizeof(*set))
return -EINVAL;
@@ -46,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em)
{
const struct xt_set_info *set = (const void *) em->data;
if (set) {
- ip_set_nfnl_put(qdisc_dev(p->q)->nd_net, set->index);
+ ip_set_nfnl_put(dev_net(qdisc_dev(p->q)), set->index);
kfree((void *) em->data);
}
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a9dfdda9ed1d..fdc041c57853 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -255,6 +255,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
f->socket_hash != sk->sk_hash)) {
f->credit = q->initial_quantum;
f->socket_hash = sk->sk_hash;
+ f->time_next_packet = 0ULL;
}
return f;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a6d788d45216..75c94e59a3bd 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -235,7 +235,6 @@ static bool loss_4state(struct netem_sched_data *q)
clg->state = 2;
else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
clg->state = 1;
- return true;
} else if (clg->a2 + clg->a3 < rnd) {
clg->state = 3;
return true;
@@ -358,6 +357,21 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
return PSCHED_NS2TICKS(ticks);
}
+static void tfifo_reset(struct Qdisc *sch)
+{
+ struct netem_sched_data *q = qdisc_priv(sch);
+ struct rb_node *p;
+
+ while ((p = rb_first(&q->t_root))) {
+ struct sk_buff *skb = netem_rb_to_skb(p);
+
+ rb_erase(p, &q->t_root);
+ skb->next = NULL;
+ skb->prev = NULL;
+ kfree_skb(skb);
+ }
+}
+
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -520,6 +534,7 @@ static unsigned int netem_drop(struct Qdisc *sch)
skb->next = NULL;
skb->prev = NULL;
len = qdisc_pkt_len(skb);
+ sch->qstats.backlog -= len;
kfree_skb(skb);
}
}
@@ -609,6 +624,7 @@ static void netem_reset(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch);
qdisc_reset_queue(sch);
+ tfifo_reset(sch);
if (q->qdisc)
qdisc_reset(q->qdisc);
qdisc_watchdog_cancel(&q->watchdog);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index cef509985192..c9b91cb1cb0d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -602,7 +602,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
/* Start a T3 timer here in case it wasn't running so
* that these migrated packets have a chance to get
- * retrnasmitted.
+ * retransmitted.
*/
if (!timer_pending(&active->T3_rtx_timer))
if (!mod_timer(&active->T3_rtx_timer,
@@ -665,7 +665,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
/* Set the path max_retrans. */
peer->pathmaxrxt = asoc->pathmaxrxt;
- /* And the partial failure retrnas threshold */
+ /* And the partial failure retrans threshold */
peer->pf_retrans = asoc->pf_retrans;
/* Initialize the peer's SACK delay timeout based on the
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 8c4fa5dec824..46b5977978a1 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -539,18 +539,14 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
for (i = 0; i < n_elt; i++) {
id = ntohs(hmacs->hmac_ids[i]);
- /* Check the id is in the supported range */
- if (id > SCTP_AUTH_HMAC_ID_MAX) {
- id = 0;
- continue;
- }
-
- /* See is we support the id. Supported IDs have name and
- * length fields set, so that we can allocated and use
+ /* Check the id is in the supported range. And
+ * see if we support the id. Supported IDs have name and
+ * length fields set, so that we can allocate and use
* them. We can safely just check for name, for without the
* name, we can't allocate the TFM.
*/
- if (!sctp_hmac_list[id].hmac_name) {
+ if (id > SCTP_AUTH_HMAC_ID_MAX ||
+ !sctp_hmac_list[id].hmac_name) {
id = 0;
continue;
}
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7bd5ed4a8657..f2044fcb9dd1 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -201,7 +201,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
max = asoc->frag_point;
/* If the the peer requested that we authenticate DATA chunks
- * we need to accound for bundling of the AUTH chunks along with
+ * we need to account for bundling of the AUTH chunks along with
* DATA.
*/
if (sctp_auth_send_cid(SCTP_CID_DATA, asoc)) {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f6334aa19151..7567e6f1a920 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -279,7 +279,9 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
sctp_v6_to_addr(&dst_saddr, &fl6->saddr, htons(bp->port));
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) {
- if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
+ if (!laddr->valid || laddr->state == SCTP_ADDR_DEL ||
+ (laddr->state != SCTP_ADDR_SRC &&
+ !asoc->src_out_of_asoc_ok))
continue;
/* Do not compare against v4 addrs */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0ac3a65daccb..e650978daf27 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -390,7 +390,6 @@ int sctp_packet_transmit(struct sctp_packet *packet)
__u8 has_data = 0;
struct dst_entry *dst = tp->dst;
unsigned char *auth = NULL; /* pointer to auth in skb data */
- __u32 cksum_buf_len = sizeof(struct sctphdr);
pr_debug("%s: packet:%p\n", __func__, packet);
@@ -493,7 +492,6 @@ int sctp_packet_transmit(struct sctp_packet *packet)
if (chunk == packet->auth)
auth = skb_tail_pointer(nskb);
- cksum_buf_len += chunk->skb->len;
memcpy(skb_put(nskb, chunk->skb->len),
chunk->skb->data, chunk->skb->len);
@@ -536,13 +534,9 @@ int sctp_packet_transmit(struct sctp_packet *packet)
* by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
*/
if (!sctp_checksum_disable) {
- if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) {
- __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
-
- /* 3) Put the resultant value into the checksum field in the
- * common header, and leave the rest of the bits unchanged.
- */
- sh->checksum = sctp_end_cksum(crc32);
+ if (!(dst->dev->features & NETIF_F_SCTP_CSUM) ||
+ (dst_xfrm(dst) != NULL) || packet->ipfragok) {
+ sh->checksum = sctp_compute_cksum(nskb, 0);
} else {
/* no need to seed pseudo checksum for SCTP */
nskb->ip_summed = CHECKSUM_PARTIAL;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d244a23ab8d3..fe690320b1e4 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1297,6 +1297,13 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
/* Turn an skb into a chunk.
* FIXME: Eventually move the structure directly inside the skb->cb[].
+ *
+ * sctpimpguide-05.txt Section 2.8.2
+ * M1) Each time a new DATA chunk is transmitted
+ * set the 'TSN.Missing.Report' count for that TSN to 0. The
+ * 'TSN.Missing.Report' count will be used to determine missing chunks
+ * and when to fast retransmit.
+ *
*/
struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
const struct sctp_association *asoc,
@@ -1314,29 +1321,9 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
INIT_LIST_HEAD(&retval->list);
retval->skb = skb;
retval->asoc = (struct sctp_association *)asoc;
- retval->has_tsn = 0;
- retval->has_ssn = 0;
- retval->rtt_in_progress = 0;
- retval->sent_at = 0;
retval->singleton = 1;
- retval->end_of_packet = 0;
- retval->ecn_ce_done = 0;
- retval->pdiscard = 0;
-
- /* sctpimpguide-05.txt Section 2.8.2
- * M1) Each time a new DATA chunk is transmitted
- * set the 'TSN.Missing.Report' count for that TSN to 0. The
- * 'TSN.Missing.Report' count will be used to determine missing chunks
- * and when to fast retransmit.
- */
- retval->tsn_missing_report = 0;
- retval->tsn_gap_acked = 0;
- retval->fast_retransmit = SCTP_CAN_FRTX;
- /* If this is a fragmented message, track all fragments
- * of the message (for SEND_FAILED).
- */
- retval->msg = NULL;
+ retval->fast_retransmit = SCTP_CAN_FRTX;
/* Polish the bead hole. */
INIT_LIST_HEAD(&retval->transmitted_list);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 666c66842799..1a6eef39ab2f 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -860,7 +860,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
(!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK))
return;
- BUG_ON(asoc->peer.primary_path == NULL);
sctp_unhash_established(asoc);
sctp_association_free(asoc);
}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 609c30c80816..3f9707a16d06 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -387,7 +387,7 @@ restart:
b_ptr = &tipc_bearers[bearer_id];
strcpy(b_ptr->name, name);
- res = m_ptr->enable_bearer(b_ptr);
+ res = m_ptr->enable_media(b_ptr);
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
@@ -420,23 +420,15 @@ exit:
}
/**
- * tipc_block_bearer - Block the bearer with the given name, and reset all its links
+ * tipc_block_bearer - Block the bearer, and reset all its links
*/
-int tipc_block_bearer(const char *name)
+int tipc_block_bearer(struct tipc_bearer *b_ptr)
{
- struct tipc_bearer *b_ptr = NULL;
struct tipc_link *l_ptr;
struct tipc_link *temp_l_ptr;
read_lock_bh(&tipc_net_lock);
- b_ptr = tipc_bearer_find(name);
- if (!b_ptr) {
- pr_warn("Attempt to block unknown bearer <%s>\n", name);
- read_unlock_bh(&tipc_net_lock);
- return -EINVAL;
- }
-
- pr_info("Blocking bearer <%s>\n", name);
+ pr_info("Blocking bearer <%s>\n", b_ptr->name);
spin_lock_bh(&b_ptr->lock);
b_ptr->blocked = 1;
list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
@@ -465,7 +457,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr)
pr_info("Disabling bearer <%s>\n", b_ptr->name);
spin_lock_bh(&b_ptr->lock);
b_ptr->blocked = 1;
- b_ptr->media->disable_bearer(b_ptr);
+ b_ptr->media->disable_media(b_ptr);
list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
tipc_link_delete(l_ptr);
}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 09c869adcfcf..e5e04be6fffa 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -75,8 +75,8 @@ struct tipc_bearer;
/**
* struct tipc_media - TIPC media information available to internal users
* @send_msg: routine which handles buffer transmission
- * @enable_bearer: routine which enables a bearer
- * @disable_bearer: routine which disables a bearer
+ * @enable_media: routine which enables a media
+ * @disable_media: routine which disables a media
* @addr2str: routine which converts media address to string
* @addr2msg: routine which converts media address to protocol message area
* @msg2addr: routine which converts media address from protocol message area
@@ -91,8 +91,8 @@ struct tipc_media {
int (*send_msg)(struct sk_buff *buf,
struct tipc_bearer *b_ptr,
struct tipc_media_addr *dest);
- int (*enable_bearer)(struct tipc_bearer *b_ptr);
- void (*disable_bearer)(struct tipc_bearer *b_ptr);
+ int (*enable_media)(struct tipc_bearer *b_ptr);
+ void (*disable_media)(struct tipc_bearer *b_ptr);
int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size);
int (*addr2msg)(struct tipc_media_addr *a, char *msg_area);
int (*msg2addr)(const struct tipc_bearer *b_ptr,
@@ -163,7 +163,7 @@ int tipc_register_media(struct tipc_media *m_ptr);
void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr);
-int tipc_block_bearer(const char *name);
+int tipc_block_bearer(struct tipc_bearer *b_ptr);
void tipc_continue(struct tipc_bearer *tb_ptr);
int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index be72f8cebc53..94895d4e86ab 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -90,21 +90,21 @@ extern int tipc_random __read_mostly;
/*
* Routines available to privileged subsystems
*/
-extern int tipc_core_start_net(unsigned long);
-extern int tipc_handler_start(void);
-extern void tipc_handler_stop(void);
-extern int tipc_netlink_start(void);
-extern void tipc_netlink_stop(void);
-extern int tipc_socket_init(void);
-extern void tipc_socket_stop(void);
-extern int tipc_sock_create_local(int type, struct socket **res);
-extern void tipc_sock_release_local(struct socket *sock);
-extern int tipc_sock_accept_local(struct socket *sock,
- struct socket **newsock, int flags);
+int tipc_core_start_net(unsigned long);
+int tipc_handler_start(void);
+void tipc_handler_stop(void);
+int tipc_netlink_start(void);
+void tipc_netlink_stop(void);
+int tipc_socket_init(void);
+void tipc_socket_stop(void);
+int tipc_sock_create_local(int type, struct socket **res);
+void tipc_sock_release_local(struct socket *sock);
+int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
+ int flags);
#ifdef CONFIG_SYSCTL
-extern int tipc_register_sysctl(void);
-extern void tipc_unregister_sysctl(void);
+int tipc_register_sysctl(void);
+void tipc_unregister_sysctl(void);
#else
#define tipc_register_sysctl() 0
#define tipc_unregister_sysctl()
@@ -201,6 +201,6 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
return (struct tipc_msg *)skb->data;
}
-extern struct sk_buff *tipc_buf_acquire(u32 size);
+struct sk_buff *tipc_buf_acquire(u32 size);
#endif
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 40ea40cf6204..f80d59f5a161 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -2,7 +2,7 @@
* net/tipc/eth_media.c: Ethernet bearer support for TIPC
*
* Copyright (c) 2001-2007, Ericsson AB
- * Copyright (c) 2005-2008, 2011, Wind River Systems
+ * Copyright (c) 2005-2008, 2011-2013, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,19 +37,19 @@
#include "core.h"
#include "bearer.h"
-#define MAX_ETH_BEARERS MAX_BEARERS
+#define MAX_ETH_MEDIA MAX_BEARERS
#define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */
/**
- * struct eth_bearer - Ethernet bearer data structure
+ * struct eth_media - Ethernet bearer data structure
* @bearer: ptr to associated "generic" bearer structure
* @dev: ptr to associated Ethernet network device
* @tipc_packet_type: used in binding TIPC to Ethernet driver
* @setup: work item used when enabling bearer
* @cleanup: work item used when disabling bearer
*/
-struct eth_bearer {
+struct eth_media {
struct tipc_bearer *bearer;
struct net_device *dev;
struct packet_type tipc_packet_type;
@@ -58,7 +58,7 @@ struct eth_bearer {
};
static struct tipc_media eth_media_info;
-static struct eth_bearer eth_bearers[MAX_ETH_BEARERS];
+static struct eth_media eth_media_array[MAX_ETH_MEDIA];
static int eth_started;
static int recv_notification(struct notifier_block *nb, unsigned long evt,
@@ -100,7 +100,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
if (!clone)
return 0;
- dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
+ dev = ((struct eth_media *)(tb_ptr->usr_handle))->dev;
delta = dev->hard_header_len - skb_headroom(buf);
if ((delta > 0) &&
@@ -128,43 +128,43 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
static int recv_msg(struct sk_buff *buf, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
+ struct eth_media *eb_ptr = (struct eth_media *)pt->af_packet_priv;
if (!net_eq(dev_net(dev), &init_net)) {
kfree_skb(buf);
- return 0;
+ return NET_RX_DROP;
}
if (likely(eb_ptr->bearer)) {
if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
buf->next = NULL;
tipc_recv_msg(buf, eb_ptr->bearer);
- return 0;
+ return NET_RX_SUCCESS;
}
}
kfree_skb(buf);
- return 0;
+ return NET_RX_DROP;
}
/**
- * setup_bearer - setup association between Ethernet bearer and interface
+ * setup_media - setup association between Ethernet bearer and interface
*/
-static void setup_bearer(struct work_struct *work)
+static void setup_media(struct work_struct *work)
{
- struct eth_bearer *eb_ptr =
- container_of(work, struct eth_bearer, setup);
+ struct eth_media *eb_ptr =
+ container_of(work, struct eth_media, setup);
dev_add_pack(&eb_ptr->tipc_packet_type);
}
/**
- * enable_bearer - attach TIPC bearer to an Ethernet interface
+ * enable_media - attach TIPC bearer to an Ethernet interface
*/
-static int enable_bearer(struct tipc_bearer *tb_ptr)
+static int enable_media(struct tipc_bearer *tb_ptr)
{
struct net_device *dev;
- struct eth_bearer *eb_ptr = &eth_bearers[0];
- struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
+ struct eth_media *eb_ptr = &eth_media_array[0];
+ struct eth_media *stop = &eth_media_array[MAX_ETH_MEDIA];
char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
int pending_dev = 0;
@@ -188,7 +188,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
eb_ptr->tipc_packet_type.func = recv_msg;
eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
- INIT_WORK(&eb_ptr->setup, setup_bearer);
+ INIT_WORK(&eb_ptr->setup, setup_media);
schedule_work(&eb_ptr->setup);
/* Associate TIPC bearer with Ethernet bearer */
@@ -205,14 +205,14 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
}
/**
- * cleanup_bearer - break association between Ethernet bearer and interface
+ * cleanup_media - break association between Ethernet bearer and interface
*
* This routine must be invoked from a work queue because it can sleep.
*/
-static void cleanup_bearer(struct work_struct *work)
+static void cleanup_media(struct work_struct *work)
{
- struct eth_bearer *eb_ptr =
- container_of(work, struct eth_bearer, cleanup);
+ struct eth_media *eb_ptr =
+ container_of(work, struct eth_media, cleanup);
dev_remove_pack(&eb_ptr->tipc_packet_type);
dev_put(eb_ptr->dev);
@@ -220,18 +220,18 @@ static void cleanup_bearer(struct work_struct *work)
}
/**
- * disable_bearer - detach TIPC bearer from an Ethernet interface
+ * disable_media - detach TIPC bearer from an Ethernet interface
*
* Mark Ethernet bearer as inactive so that incoming buffers are thrown away,
* then get worker thread to complete bearer cleanup. (Can't do cleanup
* here because cleanup code needs to sleep and caller holds spinlocks.)
*/
-static void disable_bearer(struct tipc_bearer *tb_ptr)
+static void disable_media(struct tipc_bearer *tb_ptr)
{
- struct eth_bearer *eb_ptr = (struct eth_bearer *)tb_ptr->usr_handle;
+ struct eth_media *eb_ptr = (struct eth_media *)tb_ptr->usr_handle;
eb_ptr->bearer = NULL;
- INIT_WORK(&eb_ptr->cleanup, cleanup_bearer);
+ INIT_WORK(&eb_ptr->cleanup, cleanup_media);
schedule_work(&eb_ptr->cleanup);
}
@@ -245,8 +245,8 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct eth_bearer *eb_ptr = &eth_bearers[0];
- struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
+ struct eth_media *eb_ptr = &eth_media_array[0];
+ struct eth_media *stop = &eth_media_array[MAX_ETH_MEDIA];
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
@@ -265,17 +265,17 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt,
if (netif_carrier_ok(dev))
tipc_continue(eb_ptr->bearer);
else
- tipc_block_bearer(eb_ptr->bearer->name);
+ tipc_block_bearer(eb_ptr->bearer);
break;
case NETDEV_UP:
tipc_continue(eb_ptr->bearer);
break;
case NETDEV_DOWN:
- tipc_block_bearer(eb_ptr->bearer->name);
+ tipc_block_bearer(eb_ptr->bearer);
break;
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
- tipc_block_bearer(eb_ptr->bearer->name);
+ tipc_block_bearer(eb_ptr->bearer);
tipc_continue(eb_ptr->bearer);
break;
case NETDEV_UNREGISTER:
@@ -327,8 +327,8 @@ static int eth_msg2addr(const struct tipc_bearer *tb_ptr,
*/
static struct tipc_media eth_media_info = {
.send_msg = send_msg,
- .enable_bearer = enable_bearer,
- .disable_bearer = disable_bearer,
+ .enable_media = enable_media,
+ .disable_media = disable_media,
.addr2str = eth_addr2str,
.addr2msg = eth_addr2msg,
.msg2addr = eth_msg2addr,
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index 9934a32bfa87..c13989297464 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -42,17 +42,17 @@
#include "core.h"
#include "bearer.h"
-#define MAX_IB_BEARERS MAX_BEARERS
+#define MAX_IB_MEDIA MAX_BEARERS
/**
- * struct ib_bearer - Infiniband bearer data structure
+ * struct ib_media - Infiniband media data structure
* @bearer: ptr to associated "generic" bearer structure
* @dev: ptr to associated Infiniband network device
* @tipc_packet_type: used in binding TIPC to Infiniband driver
* @cleanup: work item used when disabling bearer
*/
-struct ib_bearer {
+struct ib_media {
struct tipc_bearer *bearer;
struct net_device *dev;
struct packet_type tipc_packet_type;
@@ -61,7 +61,7 @@ struct ib_bearer {
};
static struct tipc_media ib_media_info;
-static struct ib_bearer ib_bearers[MAX_IB_BEARERS];
+static struct ib_media ib_media_array[MAX_IB_MEDIA];
static int ib_started;
/**
@@ -93,7 +93,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
if (!clone)
return 0;
- dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev;
+ dev = ((struct ib_media *)(tb_ptr->usr_handle))->dev;
delta = dev->hard_header_len - skb_headroom(buf);
if ((delta > 0) &&
@@ -121,43 +121,43 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
static int recv_msg(struct sk_buff *buf, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv;
+ struct ib_media *ib_ptr = (struct ib_media *)pt->af_packet_priv;
if (!net_eq(dev_net(dev), &init_net)) {
kfree_skb(buf);
- return 0;
+ return NET_RX_DROP;
}
if (likely(ib_ptr->bearer)) {
if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
buf->next = NULL;
tipc_recv_msg(buf, ib_ptr->bearer);
- return 0;
+ return NET_RX_SUCCESS;
}
}
kfree_skb(buf);
- return 0;
+ return NET_RX_DROP;
}
/**
* setup_bearer - setup association between InfiniBand bearer and interface
*/
-static void setup_bearer(struct work_struct *work)
+static void setup_media(struct work_struct *work)
{
- struct ib_bearer *ib_ptr =
- container_of(work, struct ib_bearer, setup);
+ struct ib_media *ib_ptr =
+ container_of(work, struct ib_media, setup);
dev_add_pack(&ib_ptr->tipc_packet_type);
}
/**
- * enable_bearer - attach TIPC bearer to an InfiniBand interface
+ * enable_media - attach TIPC bearer to an InfiniBand interface
*/
-static int enable_bearer(struct tipc_bearer *tb_ptr)
+static int enable_media(struct tipc_bearer *tb_ptr)
{
struct net_device *dev;
- struct ib_bearer *ib_ptr = &ib_bearers[0];
- struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
+ struct ib_media *ib_ptr = &ib_media_array[0];
+ struct ib_media *stop = &ib_media_array[MAX_IB_MEDIA];
char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
int pending_dev = 0;
@@ -181,7 +181,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
ib_ptr->tipc_packet_type.func = recv_msg;
ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr;
INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list));
- INIT_WORK(&ib_ptr->setup, setup_bearer);
+ INIT_WORK(&ib_ptr->setup, setup_media);
schedule_work(&ib_ptr->setup);
/* Associate TIPC bearer with InfiniBand bearer */
@@ -204,8 +204,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
*/
static void cleanup_bearer(struct work_struct *work)
{
- struct ib_bearer *ib_ptr =
- container_of(work, struct ib_bearer, cleanup);
+ struct ib_media *ib_ptr =
+ container_of(work, struct ib_media, cleanup);
dev_remove_pack(&ib_ptr->tipc_packet_type);
dev_put(ib_ptr->dev);
@@ -213,15 +213,15 @@ static void cleanup_bearer(struct work_struct *work)
}
/**
- * disable_bearer - detach TIPC bearer from an InfiniBand interface
+ * disable_media - detach TIPC bearer from an InfiniBand interface
*
* Mark InfiniBand bearer as inactive so that incoming buffers are thrown away,
* then get worker thread to complete bearer cleanup. (Can't do cleanup
* here because cleanup code needs to sleep and caller holds spinlocks.)
*/
-static void disable_bearer(struct tipc_bearer *tb_ptr)
+static void disable_media(struct tipc_bearer *tb_ptr)
{
- struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle;
+ struct ib_media *ib_ptr = (struct ib_media *)tb_ptr->usr_handle;
ib_ptr->bearer = NULL;
INIT_WORK(&ib_ptr->cleanup, cleanup_bearer);
@@ -238,8 +238,8 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct ib_bearer *ib_ptr = &ib_bearers[0];
- struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
+ struct ib_media *ib_ptr = &ib_media_array[0];
+ struct ib_media *stop = &ib_media_array[MAX_IB_MEDIA];
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
@@ -258,17 +258,17 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt,
if (netif_carrier_ok(dev))
tipc_continue(ib_ptr->bearer);
else
- tipc_block_bearer(ib_ptr->bearer->name);
+ tipc_block_bearer(ib_ptr->bearer);
break;
case NETDEV_UP:
tipc_continue(ib_ptr->bearer);
break;
case NETDEV_DOWN:
- tipc_block_bearer(ib_ptr->bearer->name);
+ tipc_block_bearer(ib_ptr->bearer);
break;
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
- tipc_block_bearer(ib_ptr->bearer->name);
+ tipc_block_bearer(ib_ptr->bearer);
tipc_continue(ib_ptr->bearer);
break;
case NETDEV_UNREGISTER:
@@ -323,8 +323,8 @@ static int ib_msg2addr(const struct tipc_bearer *tb_ptr,
*/
static struct tipc_media ib_media_info = {
.send_msg = send_msg,
- .enable_bearer = enable_bearer,
- .disable_bearer = disable_bearer,
+ .enable_media = enable_media,
+ .disable_media = disable_media,
.addr2str = ib_addr2str,
.addr2msg = ib_addr2msg,
.msg2addr = ib_msg2addr,
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 0cc3d9015c5d..54163f91b8ae 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -75,20 +75,6 @@ static const char *link_unk_evt = "Unknown link event ";
*/
#define START_CHANGEOVER 100000u
-/**
- * struct tipc_link_name - deconstructed link name
- * @addr_local: network address of node at this end
- * @if_local: name of interface at this end
- * @addr_peer: network address of node at far end
- * @if_peer: name of interface at far end
- */
-struct tipc_link_name {
- u32 addr_local;
- char if_local[TIPC_MAX_IF_NAME];
- u32 addr_peer;
- char if_peer[TIPC_MAX_IF_NAME];
-};
-
static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
struct sk_buff *buf);
static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf);
@@ -97,8 +83,7 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance);
static int link_send_sections_long(struct tipc_port *sender,
struct iovec const *msg_sect,
- u32 num_sect, unsigned int total_len,
- u32 destnode);
+ unsigned int len, u32 destnode);
static void link_state_event(struct tipc_link *l_ptr, u32 event);
static void link_reset_statistics(struct tipc_link *l_ptr);
static void link_print(struct tipc_link *l_ptr, const char *str);
@@ -161,72 +146,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr)
}
/**
- * link_name_validate - validate & (optionally) deconstruct tipc_link name
- * @name: ptr to link name string
- * @name_parts: ptr to area for link name components (or NULL if not needed)
- *
- * Returns 1 if link name is valid, otherwise 0.
- */
-static int link_name_validate(const char *name,
- struct tipc_link_name *name_parts)
-{
- char name_copy[TIPC_MAX_LINK_NAME];
- char *addr_local;
- char *if_local;
- char *addr_peer;
- char *if_peer;
- char dummy;
- u32 z_local, c_local, n_local;
- u32 z_peer, c_peer, n_peer;
- u32 if_local_len;
- u32 if_peer_len;
-
- /* copy link name & ensure length is OK */
- name_copy[TIPC_MAX_LINK_NAME - 1] = 0;
- /* need above in case non-Posix strncpy() doesn't pad with nulls */
- strncpy(name_copy, name, TIPC_MAX_LINK_NAME);
- if (name_copy[TIPC_MAX_LINK_NAME - 1] != 0)
- return 0;
-
- /* ensure all component parts of link name are present */
- addr_local = name_copy;
- if_local = strchr(addr_local, ':');
- if (if_local == NULL)
- return 0;
- *(if_local++) = 0;
- addr_peer = strchr(if_local, '-');
- if (addr_peer == NULL)
- return 0;
- *(addr_peer++) = 0;
- if_local_len = addr_peer - if_local;
- if_peer = strchr(addr_peer, ':');
- if (if_peer == NULL)
- return 0;
- *(if_peer++) = 0;
- if_peer_len = strlen(if_peer) + 1;
-
- /* validate component parts of link name */
- if ((sscanf(addr_local, "%u.%u.%u%c",
- &z_local, &c_local, &n_local, &dummy) != 3) ||
- (sscanf(addr_peer, "%u.%u.%u%c",
- &z_peer, &c_peer, &n_peer, &dummy) != 3) ||
- (z_local > 255) || (c_local > 4095) || (n_local > 4095) ||
- (z_peer > 255) || (c_peer > 4095) || (n_peer > 4095) ||
- (if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) ||
- (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME))
- return 0;
-
- /* return link name components, if necessary */
- if (name_parts) {
- name_parts->addr_local = tipc_addr(z_local, c_local, n_local);
- strcpy(name_parts->if_local, if_local);
- name_parts->addr_peer = tipc_addr(z_peer, c_peer, n_peer);
- strcpy(name_parts->if_peer, if_peer);
- }
- return 1;
-}
-
-/**
* link_timeout - handle expiration of link timer
* @l_ptr: pointer to link
*
@@ -1065,8 +984,7 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
*/
int tipc_link_send_sections_fast(struct tipc_port *sender,
struct iovec const *msg_sect,
- const u32 num_sect, unsigned int total_len,
- u32 destaddr)
+ unsigned int len, u32 destaddr)
{
struct tipc_msg *hdr = &sender->phdr;
struct tipc_link *l_ptr;
@@ -1080,8 +998,7 @@ again:
* Try building message using port's max_pkt hint.
* (Must not hold any locks while building message.)
*/
- res = tipc_msg_build(hdr, msg_sect, num_sect, total_len,
- sender->max_pkt, &buf);
+ res = tipc_msg_build(hdr, msg_sect, len, sender->max_pkt, &buf);
/* Exit if build request was invalid */
if (unlikely(res < 0))
return res;
@@ -1121,8 +1038,7 @@ exit:
if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
goto again;
- return link_send_sections_long(sender, msg_sect,
- num_sect, total_len,
+ return link_send_sections_long(sender, msg_sect, len,
destaddr);
}
tipc_node_unlock(node);
@@ -1133,8 +1049,8 @@ exit:
if (buf)
return tipc_reject_msg(buf, TIPC_ERR_NO_NODE);
if (res >= 0)
- return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect,
- total_len, TIPC_ERR_NO_NODE);
+ return tipc_port_reject_sections(sender, hdr, msg_sect,
+ len, TIPC_ERR_NO_NODE);
return res;
}
@@ -1154,18 +1070,17 @@ exit:
*/
static int link_send_sections_long(struct tipc_port *sender,
struct iovec const *msg_sect,
- u32 num_sect, unsigned int total_len,
- u32 destaddr)
+ unsigned int len, u32 destaddr)
{
struct tipc_link *l_ptr;
struct tipc_node *node;
struct tipc_msg *hdr = &sender->phdr;
- u32 dsz = total_len;
+ u32 dsz = len;
u32 max_pkt, fragm_sz, rest;
struct tipc_msg fragm_hdr;
struct sk_buff *buf, *buf_chain, *prev;
u32 fragm_crs, fragm_rest, hsz, sect_rest;
- const unchar *sect_crs;
+ const unchar __user *sect_crs;
int curr_sect;
u32 fragm_no;
int res = 0;
@@ -1207,7 +1122,7 @@ again:
if (!sect_rest) {
sect_rest = msg_sect[++curr_sect].iov_len;
- sect_crs = (const unchar *)msg_sect[curr_sect].iov_base;
+ sect_crs = msg_sect[curr_sect].iov_base;
}
if (sect_rest < fragm_rest)
@@ -1283,8 +1198,8 @@ reject:
buf = buf_chain->next;
kfree_skb(buf_chain);
}
- return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect,
- total_len, TIPC_ERR_NO_NODE);
+ return tipc_port_reject_sections(sender, hdr, msg_sect,
+ len, TIPC_ERR_NO_NODE);
}
/* Append chain of fragments to send queue & send them */
@@ -1592,15 +1507,15 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
/* Ensure bearer is still enabled */
if (unlikely(!b_ptr->active))
- goto cont;
+ goto discard;
/* Ensure message is well-formed */
if (unlikely(!link_recv_buf_validate(buf)))
- goto cont;
+ goto discard;
/* Ensure message data is a single contiguous unit */
if (unlikely(skb_linearize(buf)))
- goto cont;
+ goto discard;
/* Handle arrival of a non-unicast link message */
msg = buf_msg(buf);
@@ -1616,20 +1531,18 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
/* Discard unicast link messages destined for another node */
if (unlikely(!msg_short(msg) &&
(msg_destnode(msg) != tipc_own_addr)))
- goto cont;
+ goto discard;
/* Locate neighboring node that sent message */
n_ptr = tipc_node_find(msg_prevnode(msg));
if (unlikely(!n_ptr))
- goto cont;
+ goto discard;
tipc_node_lock(n_ptr);
/* Locate unicast link endpoint that should handle message */
l_ptr = n_ptr->links[b_ptr->identity];
- if (unlikely(!l_ptr)) {
- tipc_node_unlock(n_ptr);
- goto cont;
- }
+ if (unlikely(!l_ptr))
+ goto unlock_discard;
/* Verify that communication with node is currently allowed */
if ((n_ptr->block_setup & WAIT_PEER_DOWN) &&
@@ -1639,10 +1552,8 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
!msg_redundant_link(msg))
n_ptr->block_setup &= ~WAIT_PEER_DOWN;
- if (n_ptr->block_setup) {
- tipc_node_unlock(n_ptr);
- goto cont;
- }
+ if (n_ptr->block_setup)
+ goto unlock_discard;
/* Validate message sequence number info */
seq_no = msg_seqno(msg);
@@ -1678,98 +1589,97 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
/* Now (finally!) process the incoming message */
protocol_check:
- if (likely(link_working_working(l_ptr))) {
- if (likely(seq_no == mod(l_ptr->next_in_no))) {
- l_ptr->next_in_no++;
- if (unlikely(l_ptr->oldest_deferred_in))
- head = link_insert_deferred_queue(l_ptr,
- head);
-deliver:
- if (likely(msg_isdata(msg))) {
- tipc_node_unlock(n_ptr);
- tipc_port_recv_msg(buf);
- continue;
- }
- switch (msg_user(msg)) {
- int ret;
- case MSG_BUNDLER:
- l_ptr->stats.recv_bundles++;
- l_ptr->stats.recv_bundled +=
- msg_msgcnt(msg);
- tipc_node_unlock(n_ptr);
- tipc_link_recv_bundle(buf);
- continue;
- case NAME_DISTRIBUTOR:
- n_ptr->bclink.recv_permitted = true;
- tipc_node_unlock(n_ptr);
- tipc_named_recv(buf);
- continue;
- case BCAST_PROTOCOL:
- tipc_link_recv_sync(n_ptr, buf);
- tipc_node_unlock(n_ptr);
- continue;
- case CONN_MANAGER:
- tipc_node_unlock(n_ptr);
- tipc_port_recv_proto_msg(buf);
- continue;
- case MSG_FRAGMENTER:
- l_ptr->stats.recv_fragments++;
- ret = tipc_link_recv_fragment(
- &l_ptr->defragm_buf,
- &buf, &msg);
- if (ret == 1) {
- l_ptr->stats.recv_fragmented++;
- goto deliver;
- }
- if (ret == -1)
- l_ptr->next_in_no--;
- break;
- case CHANGEOVER_PROTOCOL:
- type = msg_type(msg);
- if (link_recv_changeover_msg(&l_ptr,
- &buf)) {
- msg = buf_msg(buf);
- seq_no = msg_seqno(msg);
- if (type == ORIGINAL_MSG)
- goto deliver;
- goto protocol_check;
- }
- break;
- default:
- kfree_skb(buf);
- buf = NULL;
- break;
- }
+ if (unlikely(!link_working_working(l_ptr))) {
+ if (msg_user(msg) == LINK_PROTOCOL) {
+ link_recv_proto_msg(l_ptr, buf);
+ head = link_insert_deferred_queue(l_ptr, head);
tipc_node_unlock(n_ptr);
- tipc_net_route_msg(buf);
continue;
}
+
+ /* Traffic message. Conditionally activate link */
+ link_state_event(l_ptr, TRAFFIC_MSG_EVT);
+
+ if (link_working_working(l_ptr)) {
+ /* Re-insert buffer in front of queue */
+ buf->next = head;
+ head = buf;
+ tipc_node_unlock(n_ptr);
+ continue;
+ }
+ goto unlock_discard;
+ }
+
+ /* Link is now in state WORKING_WORKING */
+ if (unlikely(seq_no != mod(l_ptr->next_in_no))) {
link_handle_out_of_seq_msg(l_ptr, buf);
head = link_insert_deferred_queue(l_ptr, head);
tipc_node_unlock(n_ptr);
continue;
}
-
- /* Link is not in state WORKING_WORKING */
- if (msg_user(msg) == LINK_PROTOCOL) {
- link_recv_proto_msg(l_ptr, buf);
+ l_ptr->next_in_no++;
+ if (unlikely(l_ptr->oldest_deferred_in))
head = link_insert_deferred_queue(l_ptr, head);
+deliver:
+ if (likely(msg_isdata(msg))) {
tipc_node_unlock(n_ptr);
+ tipc_port_recv_msg(buf);
continue;
}
-
- /* Traffic message. Conditionally activate link */
- link_state_event(l_ptr, TRAFFIC_MSG_EVT);
-
- if (link_working_working(l_ptr)) {
- /* Re-insert buffer in front of queue */
- buf->next = head;
- head = buf;
+ switch (msg_user(msg)) {
+ int ret;
+ case MSG_BUNDLER:
+ l_ptr->stats.recv_bundles++;
+ l_ptr->stats.recv_bundled += msg_msgcnt(msg);
+ tipc_node_unlock(n_ptr);
+ tipc_link_recv_bundle(buf);
+ continue;
+ case NAME_DISTRIBUTOR:
+ n_ptr->bclink.recv_permitted = true;
tipc_node_unlock(n_ptr);
+ tipc_named_recv(buf);
continue;
+ case BCAST_PROTOCOL:
+ tipc_link_recv_sync(n_ptr, buf);
+ tipc_node_unlock(n_ptr);
+ continue;
+ case CONN_MANAGER:
+ tipc_node_unlock(n_ptr);
+ tipc_port_recv_proto_msg(buf);
+ continue;
+ case MSG_FRAGMENTER:
+ l_ptr->stats.recv_fragments++;
+ ret = tipc_link_recv_fragment(&l_ptr->defragm_buf,
+ &buf, &msg);
+ if (ret == 1) {
+ l_ptr->stats.recv_fragmented++;
+ goto deliver;
+ }
+ if (ret == -1)
+ l_ptr->next_in_no--;
+ break;
+ case CHANGEOVER_PROTOCOL:
+ type = msg_type(msg);
+ if (link_recv_changeover_msg(&l_ptr, &buf)) {
+ msg = buf_msg(buf);
+ seq_no = msg_seqno(msg);
+ if (type == ORIGINAL_MSG)
+ goto deliver;
+ goto protocol_check;
+ }
+ break;
+ default:
+ kfree_skb(buf);
+ buf = NULL;
+ break;
}
tipc_node_unlock(n_ptr);
-cont:
+ tipc_net_route_msg(buf);
+ continue;
+unlock_discard:
+
+ tipc_node_unlock(n_ptr);
+discard:
kfree_skb(buf);
}
read_unlock_bh(&tipc_net_lock);
@@ -2585,25 +2495,21 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window)
static struct tipc_link *link_find_link(const char *name,
struct tipc_node **node)
{
- struct tipc_link_name link_name_parts;
- struct tipc_bearer *b_ptr;
struct tipc_link *l_ptr;
+ struct tipc_node *n_ptr;
+ int i;
- if (!link_name_validate(name, &link_name_parts))
- return NULL;
-
- b_ptr = tipc_bearer_find_interface(link_name_parts.if_local);
- if (!b_ptr)
- return NULL;
-
- *node = tipc_node_find(link_name_parts.addr_peer);
- if (!*node)
- return NULL;
-
- l_ptr = (*node)->links[b_ptr->identity];
- if (!l_ptr || strcmp(l_ptr->name, name))
- return NULL;
-
+ list_for_each_entry(n_ptr, &tipc_node_list, list) {
+ for (i = 0; i < MAX_BEARERS; i++) {
+ l_ptr = n_ptr->links[i];
+ if (l_ptr && !strcmp(l_ptr->name, name))
+ goto found;
+ }
+ }
+ l_ptr = NULL;
+ n_ptr = NULL;
+found:
+ *node = n_ptr;
return l_ptr;
}
@@ -2646,6 +2552,7 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
struct tipc_link *l_ptr;
struct tipc_bearer *b_ptr;
struct tipc_media *m_ptr;
+ int res = 0;
l_ptr = link_find_link(name, &node);
if (l_ptr) {
@@ -2668,9 +2575,12 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
case TIPC_CMD_SET_LINK_WINDOW:
tipc_link_set_queue_limits(l_ptr, new_value);
break;
+ default:
+ res = -EINVAL;
+ break;
}
tipc_node_unlock(node);
- return 0;
+ return res;
}
b_ptr = tipc_bearer_find(name);
@@ -2678,15 +2588,18 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
switch (cmd) {
case TIPC_CMD_SET_LINK_TOL:
b_ptr->tolerance = new_value;
- return 0;
+ break;
case TIPC_CMD_SET_LINK_PRI:
b_ptr->priority = new_value;
- return 0;
+ break;
case TIPC_CMD_SET_LINK_WINDOW:
b_ptr->window = new_value;
- return 0;
+ break;
+ default:
+ res = -EINVAL;
+ break;
}
- return -EINVAL;
+ return res;
}
m_ptr = tipc_media_find(name);
@@ -2695,15 +2608,18 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
switch (cmd) {
case TIPC_CMD_SET_LINK_TOL:
m_ptr->tolerance = new_value;
- return 0;
+ break;
case TIPC_CMD_SET_LINK_PRI:
m_ptr->priority = new_value;
- return 0;
+ break;
case TIPC_CMD_SET_LINK_WINDOW:
m_ptr->window = new_value;
- return 0;
+ break;
+ default:
+ res = -EINVAL;
+ break;
}
- return -EINVAL;
+ return res;
}
struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space,
diff --git a/net/tipc/link.h b/net/tipc/link.h
index c048ed1cbd76..55cf8554a08b 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -227,9 +227,7 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
int tipc_link_send_sections_fast(struct tipc_port *sender,
struct iovec const *msg_sect,
- const u32 num_sect,
- unsigned int total_len,
- u32 destnode);
+ unsigned int len, u32 destnode);
void tipc_link_recv_bundle(struct sk_buff *buf);
int tipc_link_recv_fragment(struct sk_buff **pending,
struct sk_buff **fb,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ced60e2fc4f7..e525f8ce1dee 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -73,13 +73,13 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
* Returns message data size or errno
*/
int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
- u32 num_sect, unsigned int total_len, int max_size,
- struct sk_buff **buf)
+ unsigned int len, int max_size, struct sk_buff **buf)
{
- int dsz, sz, hsz, pos, res, cnt;
+ int dsz, sz, hsz;
+ unsigned char *to;
- dsz = total_len;
- pos = hsz = msg_hdr_sz(hdr);
+ dsz = len;
+ hsz = msg_hdr_sz(hdr);
sz = hsz + dsz;
msg_set_size(hdr, sz);
if (unlikely(sz > max_size)) {
@@ -91,16 +91,11 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
if (!(*buf))
return -ENOMEM;
skb_copy_to_linear_data(*buf, hdr, hsz);
- for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
- skb_copy_to_linear_data_offset(*buf, pos,
- msg_sect[cnt].iov_base,
- msg_sect[cnt].iov_len);
- pos += msg_sect[cnt].iov_len;
+ to = (*buf)->data + hsz;
+ if (len && memcpy_fromiovecend(to, msg_sect, 0, dsz)) {
+ kfree_skb(*buf);
+ *buf = NULL;
+ return -EFAULT;
}
- if (likely(res))
- return dsz;
-
- kfree_skb(*buf);
- *buf = NULL;
- return -EFAULT;
+ return dsz;
}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 5e4ccf5c27df..559b73a9bf35 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -722,6 +722,5 @@ u32 tipc_msg_tot_importance(struct tipc_msg *m);
void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
u32 destnode);
int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
- u32 num_sect, unsigned int total_len, int max_size,
- struct sk_buff **buf);
+ unsigned int len, int max_size, struct sk_buff **buf);
#endif
diff --git a/net/tipc/port.c b/net/tipc/port.c
index b3ed2fcab4fb..c081a7632302 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -90,8 +90,7 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg)
* tipc_multicast - send a multicast message to local and remote destinations
*/
int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
- u32 num_sect, struct iovec const *msg_sect,
- unsigned int total_len)
+ struct iovec const *msg_sect, unsigned int len)
{
struct tipc_msg *hdr;
struct sk_buff *buf;
@@ -114,8 +113,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
msg_set_namelower(hdr, seq->lower);
msg_set_nameupper(hdr, seq->upper);
msg_set_hdr_sz(hdr, MCAST_H_SIZE);
- res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE,
- &buf);
+ res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf);
if (unlikely(!buf))
return res;
@@ -436,14 +434,13 @@ exit:
}
int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
- struct iovec const *msg_sect, u32 num_sect,
- unsigned int total_len, int err)
+ struct iovec const *msg_sect, unsigned int len,
+ int err)
{
struct sk_buff *buf;
int res;
- res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE,
- &buf);
+ res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf);
if (!buf)
return res;
@@ -918,15 +915,14 @@ int tipc_port_recv_msg(struct sk_buff *buf)
* tipc_port_recv_sections(): Concatenate and deliver sectioned
* message for this node.
*/
-static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_sect,
+static int tipc_port_recv_sections(struct tipc_port *sender,
struct iovec const *msg_sect,
- unsigned int total_len)
+ unsigned int len)
{
struct sk_buff *buf;
int res;
- res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len,
- MAX_MSG_SIZE, &buf);
+ res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf);
if (likely(buf))
tipc_port_recv_msg(buf);
return res;
@@ -935,8 +931,7 @@ static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_se
/**
* tipc_send - send message sections on connection
*/
-int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len)
+int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len)
{
struct tipc_port *p_ptr;
u32 destnode;
@@ -950,11 +945,10 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect,
if (!tipc_port_congested(p_ptr)) {
destnode = port_peernode(p_ptr);
if (likely(!in_own_node(destnode)))
- res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
- total_len, destnode);
+ res = tipc_link_send_sections_fast(p_ptr, msg_sect,
+ len, destnode);
else
- res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect,
- total_len);
+ res = tipc_port_recv_sections(p_ptr, msg_sect, len);
if (likely(res != -ELINKCONG)) {
p_ptr->congested = 0;
@@ -965,7 +959,7 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect,
}
if (port_unreliable(p_ptr)) {
p_ptr->congested = 0;
- return total_len;
+ return len;
}
return -ELINKCONG;
}
@@ -974,8 +968,7 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect,
* tipc_send2name - send message sections to port name
*/
int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
- unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len)
+ struct iovec const *msg_sect, unsigned int len)
{
struct tipc_port *p_ptr;
struct tipc_msg *msg;
@@ -999,36 +992,32 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
if (likely(destport || destnode)) {
if (likely(in_own_node(destnode)))
- res = tipc_port_recv_sections(p_ptr, num_sect,
- msg_sect, total_len);
+ res = tipc_port_recv_sections(p_ptr, msg_sect, len);
else if (tipc_own_addr)
res = tipc_link_send_sections_fast(p_ptr, msg_sect,
- num_sect, total_len,
- destnode);
+ len, destnode);
else
res = tipc_port_reject_sections(p_ptr, msg, msg_sect,
- num_sect, total_len,
- TIPC_ERR_NO_NODE);
+ len, TIPC_ERR_NO_NODE);
if (likely(res != -ELINKCONG)) {
if (res > 0)
p_ptr->sent++;
return res;
}
if (port_unreliable(p_ptr)) {
- return total_len;
+ return len;
}
return -ELINKCONG;
}
- return tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect,
- total_len, TIPC_ERR_NO_NAME);
+ return tipc_port_reject_sections(p_ptr, msg, msg_sect, len,
+ TIPC_ERR_NO_NAME);
}
/**
* tipc_send2port - send message sections to port identity
*/
int tipc_send2port(u32 ref, struct tipc_portid const *dest,
- unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len)
+ struct iovec const *msg_sect, unsigned int len)
{
struct tipc_port *p_ptr;
struct tipc_msg *msg;
@@ -1046,21 +1035,20 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest,
msg_set_hdr_sz(msg, BASIC_H_SIZE);
if (in_own_node(dest->node))
- res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect,
- total_len);
+ res = tipc_port_recv_sections(p_ptr, msg_sect, len);
else if (tipc_own_addr)
- res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
- total_len, dest->node);
+ res = tipc_link_send_sections_fast(p_ptr, msg_sect, len,
+ dest->node);
else
- res = tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect,
- total_len, TIPC_ERR_NO_NODE);
+ res = tipc_port_reject_sections(p_ptr, msg, msg_sect, len,
+ TIPC_ERR_NO_NODE);
if (likely(res != -ELINKCONG)) {
if (res > 0)
p_ptr->sent++;
return res;
}
if (port_unreliable(p_ptr)) {
- return total_len;
+ return len;
}
return -ELINKCONG;
}
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 5a7026b9c345..912253597343 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -151,24 +151,20 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg);
* TIPC messaging routines
*/
int tipc_port_recv_msg(struct sk_buff *buf);
-int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len);
+int tipc_send(u32 portref, struct iovec const *msg_sect, unsigned int len);
int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain,
- unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len);
+ struct iovec const *msg_sect, unsigned int len);
int tipc_send2port(u32 portref, struct tipc_portid const *dest,
- unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len);
+ struct iovec const *msg_sect, unsigned int len);
int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
- unsigned int section_count, struct iovec const *msg,
- unsigned int total_len);
+ struct iovec const *msg, unsigned int len);
int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
- struct iovec const *msg_sect, u32 num_sect,
- unsigned int total_len, int err);
+ struct iovec const *msg_sect, unsigned int len,
+ int err);
struct sk_buff *tipc_port_get_ports(void);
void tipc_port_recv_proto_msg(struct sk_buff *buf);
void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 6cc7ddd2fb7c..3906527259d1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -338,7 +338,7 @@ static int release(struct socket *sock)
buf = __skb_dequeue(&sk->sk_receive_queue);
if (buf == NULL)
break;
- if (TIPC_SKB_CB(buf)->handle != 0)
+ if (TIPC_SKB_CB(buf)->handle != NULL)
kfree_skb(buf);
else {
if ((sock->state == SS_CONNECTING) ||
@@ -622,13 +622,11 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
res = tipc_send2name(tport->ref,
&dest->addr.name.name,
dest->addr.name.domain,
- m->msg_iovlen,
m->msg_iov,
total_len);
} else if (dest->addrtype == TIPC_ADDR_ID) {
res = tipc_send2port(tport->ref,
&dest->addr.id,
- m->msg_iovlen,
m->msg_iov,
total_len);
} else if (dest->addrtype == TIPC_ADDR_MCAST) {
@@ -641,7 +639,6 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
break;
res = tipc_multicast(tport->ref,
&dest->addr.nameseq,
- m->msg_iovlen,
m->msg_iov,
total_len);
}
@@ -707,8 +704,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
break;
}
- res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov,
- total_len);
+ res = tipc_send(tport->ref, m->msg_iov, total_len);
if (likely(res != -ELINKCONG))
break;
if (timeout_val <= 0L) {
@@ -1368,7 +1364,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
return TIPC_ERR_OVERLOAD;
/* Enqueue message */
- TIPC_SKB_CB(buf)->handle = 0;
+ TIPC_SKB_CB(buf)->handle = NULL;
__skb_queue_tail(&sk->sk_receive_queue, buf);
skb_set_owner_r(buf, sk);
@@ -1691,7 +1687,7 @@ restart:
/* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
buf = __skb_dequeue(&sk->sk_receive_queue);
if (buf) {
- if (TIPC_SKB_CB(buf)->handle != 0) {
+ if (TIPC_SKB_CB(buf)->handle != NULL) {
kfree_skb(buf);
goto restart;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 86de99ad2976..c1f403bed683 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1246,6 +1246,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
return 0;
}
+static void unix_sock_inherit_flags(const struct socket *old,
+ struct socket *new)
+{
+ if (test_bit(SOCK_PASSCRED, &old->flags))
+ set_bit(SOCK_PASSCRED, &new->flags);
+ if (test_bit(SOCK_PASSSEC, &old->flags))
+ set_bit(SOCK_PASSSEC, &new->flags);
+}
+
static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct sock *sk = sock->sk;
@@ -1280,6 +1289,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
/* attach accepted sock to socket */
unix_state_lock(tsk);
newsock->state = SS_CONNECTED;
+ unix_sock_inherit_flags(sock, newsock);
sock_graft(tsk, newsock);
unix_state_unlock(tsk);
return 0;
diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h
index 1e743d214856..5dcd9c067bf0 100644
--- a/net/wimax/wimax-internal.h
+++ b/net/wimax/wimax-internal.h
@@ -63,11 +63,11 @@ void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state)
{
wimax_dev->state = state;
}
-extern void __wimax_state_change(struct wimax_dev *, enum wimax_st);
+void __wimax_state_change(struct wimax_dev *, enum wimax_st);
#ifdef CONFIG_DEBUG_FS
-extern int wimax_debugfs_add(struct wimax_dev *);
-extern void wimax_debugfs_rm(struct wimax_dev *);
+int wimax_debugfs_add(struct wimax_dev *);
+void wimax_debugfs_rm(struct wimax_dev *);
#else
static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev)
{
@@ -76,13 +76,13 @@ static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev)
static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {}
#endif
-extern void wimax_id_table_add(struct wimax_dev *);
-extern struct wimax_dev *wimax_dev_get_by_genl_info(struct genl_info *, int);
-extern void wimax_id_table_rm(struct wimax_dev *);
-extern void wimax_id_table_release(void);
+void wimax_id_table_add(struct wimax_dev *);
+struct wimax_dev *wimax_dev_get_by_genl_info(struct genl_info *, int);
+void wimax_id_table_rm(struct wimax_dev *);
+void wimax_id_table_release(void);
-extern int wimax_rfkill_add(struct wimax_dev *);
-extern void wimax_rfkill_rm(struct wimax_dev *);
+int wimax_rfkill_add(struct wimax_dev *);
+void wimax_rfkill_rm(struct wimax_dev *);
extern struct genl_family wimax_gnl_family;
extern struct genl_multicast_group wimax_gnl_mcg;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 50f6195c8b70..16f3c3a7b2c1 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -328,6 +328,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2,
width);
}
+EXPORT_SYMBOL(cfg80211_chandef_dfs_required);
static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
u32 center_freq, u32 bandwidth,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index fe8d4f2be49b..aff959e5a1b3 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -958,8 +958,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
case NETDEV_PRE_UP:
if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
return notifier_from_errno(-EOPNOTSUPP);
- if (rfkill_blocked(rdev->rfkill))
- return notifier_from_errno(-ERFKILL);
ret = cfg80211_can_add_interface(rdev, wdev->iftype);
if (ret)
return notifier_from_errno(ret);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 9ad43c619c54..af10e59af2d8 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -234,10 +234,10 @@ struct cfg80211_beacon_registration {
};
/* free object */
-extern void cfg80211_dev_free(struct cfg80211_registered_device *rdev);
+void cfg80211_dev_free(struct cfg80211_registered_device *rdev);
-extern int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
- char *newname);
+int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
+ char *newname);
void ieee80211_set_bitrate_flags(struct wiphy *wiphy);
@@ -382,15 +382,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
enum cfg80211_chan_mode chanmode,
u8 radar_detect);
-/**
- * cfg80211_chandef_dfs_required - checks if radar detection is required
- * @wiphy: the wiphy to validate against
- * @chandef: the channel definition to check
- * Return: 1 if radar detection is required, 0 if it is not, < 0 on error
- */
-int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
- const struct cfg80211_chan_def *c);
-
void cfg80211_set_dfs_state(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef,
enum nl80211_dfs_state dfs_state);
@@ -411,6 +402,9 @@ static inline int
cfg80211_can_add_interface(struct cfg80211_registered_device *rdev,
enum nl80211_iftype iftype)
{
+ if (rfkill_blocked(rdev->rfkill))
+ return -ERFKILL;
+
return cfg80211_can_change_interface(rdev, NULL, iftype);
}
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 90d050036624..454157717efa 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -47,17 +47,19 @@ static int ht_print_chan(struct ieee80211_channel *chan,
return 0;
if (chan->flags & IEEE80211_CHAN_DISABLED)
- return snprintf(buf + offset,
- buf_size - offset,
- "%d Disabled\n",
- chan->center_freq);
-
- return snprintf(buf + offset,
- buf_size - offset,
- "%d HT40 %c%c\n",
- chan->center_freq,
- (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) ? ' ' : '-',
- (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) ? ' ' : '+');
+ return scnprintf(buf + offset,
+ buf_size - offset,
+ "%d Disabled\n",
+ chan->center_freq);
+
+ return scnprintf(buf + offset,
+ buf_size - offset,
+ "%d HT40 %c%c\n",
+ chan->center_freq,
+ (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) ?
+ ' ' : '-',
+ (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) ?
+ ' ' : '+');
}
static ssize_t ht40allow_map_read(struct file *file,
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index 9392f8cbb901..42ed274e81f4 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk
@@ -46,6 +46,12 @@ BEGIN {
sub(/:/, "", country)
printf "static const struct ieee80211_regdomain regdom_%s = {\n", country
printf "\t.alpha2 = \"%s\",\n", country
+ if ($NF ~ /DFS-ETSI/)
+ printf "\t.dfs_region = NL80211_DFS_ETSI,\n"
+ else if ($NF ~ /DFS-FCC/)
+ printf "\t.dfs_region = NL80211_DFS_FCC,\n"
+ else if ($NF ~ /DFS-JP/)
+ printf "\t.dfs_region = NL80211_DFS_JP,\n"
printf "\t.reg_rules = {\n"
active = 1
regdb = regdb "\t&regdom_" country ",\n"
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 626dc3b5fd8d..cbbef88a8ebd 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5591,6 +5591,9 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
if (err)
return err;
+ if (netif_carrier_ok(dev))
+ return -EBUSY;
+
if (wdev->cac_started)
return -EBUSY;
@@ -5634,15 +5637,26 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1];
u8 radar_detect_width = 0;
int err;
+ bool need_new_beacon = false;
if (!rdev->ops->channel_switch ||
!(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH))
return -EOPNOTSUPP;
- /* may add IBSS support later */
- if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
- dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
+ switch (dev->ieee80211_ptr->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ need_new_beacon = true;
+
+ /* useless if AP is not running */
+ if (!wdev->beacon_interval)
+ return -EINVAL;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ break;
+ default:
return -EOPNOTSUPP;
+ }
memset(&params, 0, sizeof(params));
@@ -5651,15 +5665,16 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
/* only important for AP, IBSS and mesh create IEs internally */
- if (!info->attrs[NL80211_ATTR_CSA_IES])
- return -EINVAL;
-
- /* useless if AP is not running */
- if (!wdev->beacon_interval)
+ if (need_new_beacon &&
+ (!info->attrs[NL80211_ATTR_CSA_IES] ||
+ !info->attrs[NL80211_ATTR_CSA_C_OFF_BEACON]))
return -EINVAL;
params.count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]);
+ if (!need_new_beacon)
+ goto skip_beacons;
+
err = nl80211_parse_beacon(info->attrs, &params.beacon_after);
if (err)
return err;
@@ -5699,6 +5714,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
+skip_beacons:
err = nl80211_parse_chandef(rdev, info, &params.chandef);
if (err)
return err;
@@ -5706,12 +5722,17 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
return -EINVAL;
- err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef);
- if (err < 0) {
- return err;
- } else if (err) {
- radar_detect_width = BIT(params.chandef.width);
- params.radar_required = true;
+ /* DFS channels are only supported for AP/P2P GO ... for now. */
+ if (dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP ||
+ dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) {
+ err = cfg80211_chandef_dfs_required(wdev->wiphy,
+ &params.chandef);
+ if (err < 0) {
+ return err;
+ } else if (err) {
+ radar_detect_width = BIT(params.chandef.width);
+ params.radar_required = true;
+ }
}
err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
@@ -10740,7 +10761,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
wdev_lock(wdev);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
- wdev->iftype != NL80211_IFTYPE_P2P_GO))
+ wdev->iftype != NL80211_IFTYPE_P2P_GO &&
+ wdev->iftype != NL80211_IFTYPE_ADHOC))
goto out;
wdev->channel = chandef->chan;
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 7d604c06c3dc..a271c27fac77 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -97,6 +97,10 @@ int ieee80211_radiotap_iterator_init(
struct ieee80211_radiotap_header *radiotap_header,
int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns)
{
+ /* check the radiotap header can actually be present */
+ if (max_length < sizeof(struct ieee80211_radiotap_header))
+ return -EINVAL;
+
/* Linux only supports version 0 radiotap format */
if (radiotap_header->it_version)
return -EINVAL;
@@ -131,7 +135,8 @@ int ieee80211_radiotap_iterator_init(
*/
if ((unsigned long)iterator->_arg -
- (unsigned long)iterator->_rtheader >
+ (unsigned long)iterator->_rtheader +
+ sizeof(uint32_t) >
(unsigned long)iterator->_max_length)
return -EINVAL;
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index de06d5d1287f..d62cb1e91475 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -172,11 +172,21 @@ static const struct ieee80211_regdomain world_regdom = {
NL80211_RRF_NO_IBSS |
NL80211_RRF_NO_OFDM),
/* IEEE 802.11a, channel 36..48 */
- REG_RULE(5180-10, 5240+10, 80, 6, 20,
+ REG_RULE(5180-10, 5240+10, 160, 6, 20,
NL80211_RRF_PASSIVE_SCAN |
NL80211_RRF_NO_IBSS),
- /* NB: 5260 MHz - 5700 MHz requires DFS */
+ /* IEEE 802.11a, channel 52..64 - DFS required */
+ REG_RULE(5260-10, 5320+10, 160, 6, 20,
+ NL80211_RRF_PASSIVE_SCAN |
+ NL80211_RRF_NO_IBSS |
+ NL80211_RRF_DFS),
+
+ /* IEEE 802.11a, channel 100..144 - DFS required */
+ REG_RULE(5500-10, 5720+10, 160, 6, 20,
+ NL80211_RRF_PASSIVE_SCAN |
+ NL80211_RRF_NO_IBSS |
+ NL80211_RRF_DFS),
/* IEEE 802.11a, channel 149..165 */
REG_RULE(5745-10, 5825+10, 80, 6, 20,
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
index 65acbebd3711..b533ed71daff 100644
--- a/net/wireless/sysfs.h
+++ b/net/wireless/sysfs.h
@@ -1,8 +1,8 @@
#ifndef __WIRELESS_SYSFS_H
#define __WIRELESS_SYSFS_H
-extern int wiphy_sysfs_init(void);
-extern void wiphy_sysfs_exit(void);
+int wiphy_sysfs_init(void);
+void wiphy_sysfs_exit(void);
extern struct class ieee80211_class;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index ce090c1c5e4f..3c8be6104ba4 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -10,6 +10,7 @@
#include <net/cfg80211.h>
#include <net/ip.h>
#include <net/dsfield.h>
+#include <linux/if_vlan.h>
#include "core.h"
#include "rdev-ops.h"
@@ -691,6 +692,7 @@ EXPORT_SYMBOL(ieee80211_amsdu_to_8023s);
unsigned int cfg80211_classify8021d(struct sk_buff *skb)
{
unsigned int dscp;
+ unsigned char vlan_priority;
/* skb->priority values from 256->263 are magic values to
* directly indicate a specific 802.1d priority. This is used
@@ -700,6 +702,13 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb)
if (skb->priority >= 256 && skb->priority <= 263)
return skb->priority - 256;
+ if (vlan_tx_tag_present(skb)) {
+ vlan_priority = (vlan_tx_tag_get(skb) & VLAN_PRIO_MASK)
+ >> VLAN_PRIO_SHIFT;
+ if (vlan_priority > 0)
+ return vlan_priority;
+ }
+
switch (skb->protocol) {
case htons(ETH_P_IP):
dscp = ipv4_get_dsfield(ip_hdr(skb)) & 0xfc;
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index c959312c45e3..e2fa133f9fba 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -16,8 +16,8 @@ config X25
if you want that) and the lower level data link layer protocol LAPB
(say Y to "LAPB Data Link Driver" below if you want that).
- You can read more about X.25 at <http://www.sangoma.com/x25.htm> and
- <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>.
+ You can read more about X.25 at <http://www.sangoma.com/tutorials/x25/> and
+ <http://docwiki.cisco.com/wiki/X.25>.
Information about X.25 for Linux is contained in the files
<file:Documentation/networking/x25.txt> and
<file:Documentation/networking/x25-iface.txt>.
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 716502ada53b..0622d319e1f2 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -130,7 +130,7 @@ static inline unsigned int __addr_hash(const xfrm_address_t *daddr,
return h & hmask;
}
-extern struct hlist_head *xfrm_hash_alloc(unsigned int sz);
-extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz);
+struct hlist_head *xfrm_hash_alloc(unsigned int sz);
+void xfrm_hash_free(struct hlist_head *n, unsigned int sz);
#endif /* _XFRM_HASH_H */
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 2906d520eea7..ccfdc7115a83 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -141,14 +141,14 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
const int plen = skb->len;
int dlen = IPCOMP_SCRATCH_SIZE;
u8 *start = skb->data;
- const int cpu = get_cpu();
- u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
- struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+ struct crypto_comp *tfm;
+ u8 *scratch;
int err;
local_bh_disable();
+ scratch = *this_cpu_ptr(ipcomp_scratches);
+ tfm = *this_cpu_ptr(ipcd->tfms);
err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
- local_bh_enable();
if (err)
goto out;
@@ -158,13 +158,13 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
}
memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
- put_cpu();
+ local_bh_enable();
pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
return 0;
out:
- put_cpu();
+ local_bh_enable();
return err;
}
@@ -220,8 +220,8 @@ static void ipcomp_free_scratches(void)
static void * __percpu *ipcomp_alloc_scratches(void)
{
- int i;
void * __percpu *scratches;
+ int i;
if (ipcomp_scratch_users++)
return ipcomp_scratches;
@@ -233,7 +233,9 @@ static void * __percpu *ipcomp_alloc_scratches(void)
ipcomp_scratches = scratches;
for_each_possible_cpu(i) {
- void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
+ void *scratch;
+
+ scratch = vmalloc_node(IPCOMP_SCRATCH_SIZE, cpu_to_node(i));
if (!scratch)
return NULL;
*per_cpu_ptr(scratches, i) = scratch;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ed38d5d81f9e..9a91f7431c41 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -334,7 +334,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
atomic_inc(&policy->genid);
- del_timer(&policy->polq.hold_timer);
+ if (del_timer(&policy->polq.hold_timer))
+ xfrm_pol_put(policy);
xfrm_queue_purge(&policy->polq.hold_queue);
if (del_timer(&policy->timer))
@@ -589,7 +590,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_lock_bh(&pq->hold_queue.lock);
skb_queue_splice_init(&pq->hold_queue, &list);
- del_timer(&pq->hold_timer);
+ if (del_timer(&pq->hold_timer))
+ xfrm_pol_put(old);
spin_unlock_bh(&pq->hold_queue.lock);
if (skb_queue_empty(&list))
@@ -600,7 +602,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_lock_bh(&pq->hold_queue.lock);
skb_queue_splice(&list, &pq->hold_queue);
pq->timeout = XFRM_QUEUE_TMO_MIN;
- mod_timer(&pq->hold_timer, jiffies);
+ if (!mod_timer(&pq->hold_timer, jiffies))
+ xfrm_pol_hold(new);
spin_unlock_bh(&pq->hold_queue.lock);
}
@@ -1769,6 +1772,10 @@ static void xfrm_policy_queue_process(unsigned long arg)
spin_lock(&pq->hold_queue.lock);
skb = skb_peek(&pq->hold_queue);
+ if (!skb) {
+ spin_unlock(&pq->hold_queue.lock);
+ goto out;
+ }
dst = skb_dst(skb);
sk = skb->sk;
xfrm_decode_session(skb, &fl, dst->ops->family);
@@ -1787,8 +1794,9 @@ static void xfrm_policy_queue_process(unsigned long arg)
goto purge_queue;
pq->timeout = pq->timeout << 1;
- mod_timer(&pq->hold_timer, jiffies + pq->timeout);
- return;
+ if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
+ xfrm_pol_hold(pol);
+ goto out;
}
dst_release(dst);
@@ -1819,11 +1827,14 @@ static void xfrm_policy_queue_process(unsigned long arg)
err = dst_output(skb);
}
+out:
+ xfrm_pol_put(pol);
return;
purge_queue:
pq->timeout = 0;
xfrm_queue_purge(&pq->hold_queue);
+ xfrm_pol_put(pol);
}
static int xdst_queue_output(struct sk_buff *skb)
@@ -1831,7 +1842,15 @@ static int xdst_queue_output(struct sk_buff *skb)
unsigned long sched_next;
struct dst_entry *dst = skb_dst(skb);
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
- struct xfrm_policy_queue *pq = &xdst->pols[0]->polq;
+ struct xfrm_policy *pol = xdst->pols[0];
+ struct xfrm_policy_queue *pq = &pol->polq;
+ const struct sk_buff *fclone = skb + 1;
+
+ if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
+ fclone->fclone == SKB_FCLONE_CLONE)) {
+ kfree_skb(skb);
+ return 0;
+ }
if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
kfree_skb(skb);
@@ -1850,10 +1869,12 @@ static int xdst_queue_output(struct sk_buff *skb)
if (del_timer(&pq->hold_timer)) {
if (time_before(pq->hold_timer.expires, sched_next))
sched_next = pq->hold_timer.expires;
+ xfrm_pol_put(pol);
}
__skb_queue_tail(&pq->hold_queue, skb);
- mod_timer(&pq->hold_timer, sched_next);
+ if (!mod_timer(&pq->hold_timer, sched_next))
+ xfrm_pol_hold(pol);
spin_unlock_bh(&pq->hold_queue.lock);
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 8dafe6d3c6e4..dab57daae408 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -61,9 +61,9 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event)
switch (event) {
case XFRM_REPLAY_UPDATE:
- if (x->replay_maxdiff &&
- (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
- (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
+ if (!x->replay_maxdiff ||
+ ((x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
+ (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))) {
if (x->xflags & XFRM_TIME_DEFER)
event = XFRM_REPLAY_TIMEOUT;
else
@@ -129,8 +129,7 @@ static int xfrm_replay_check(struct xfrm_state *x,
return 0;
diff = x->replay.seq - seq;
- if (diff >= min_t(unsigned int, x->props.replay_window,
- sizeof(x->replay.bitmap) * 8)) {
+ if (diff >= x->props.replay_window) {
x->stats.replay_window++;
goto err;
}
@@ -302,9 +301,10 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
switch (event) {
case XFRM_REPLAY_UPDATE:
- if (x->replay_maxdiff &&
- (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) &&
- (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) {
+ if (!x->replay_maxdiff ||
+ ((replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) &&
+ (replay_esn->oseq - preplay_esn->oseq
+ < x->replay_maxdiff))) {
if (x->xflags & XFRM_TIME_DEFER)
event = XFRM_REPLAY_TIMEOUT;
else
@@ -353,28 +353,30 @@ static void xfrm_replay_notify_esn(struct xfrm_state *x, int event)
switch (event) {
case XFRM_REPLAY_UPDATE:
- if (!x->replay_maxdiff)
- break;
-
- if (replay_esn->seq_hi == preplay_esn->seq_hi)
- seq_diff = replay_esn->seq - preplay_esn->seq;
- else
- seq_diff = ~preplay_esn->seq + replay_esn->seq + 1;
-
- if (replay_esn->oseq_hi == preplay_esn->oseq_hi)
- oseq_diff = replay_esn->oseq - preplay_esn->oseq;
- else
- oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1;
-
- if (seq_diff < x->replay_maxdiff &&
- oseq_diff < x->replay_maxdiff) {
+ if (x->replay_maxdiff) {
+ if (replay_esn->seq_hi == preplay_esn->seq_hi)
+ seq_diff = replay_esn->seq - preplay_esn->seq;
+ else
+ seq_diff = ~preplay_esn->seq + replay_esn->seq
+ + 1;
- if (x->xflags & XFRM_TIME_DEFER)
- event = XFRM_REPLAY_TIMEOUT;
+ if (replay_esn->oseq_hi == preplay_esn->oseq_hi)
+ oseq_diff = replay_esn->oseq
+ - preplay_esn->oseq;
else
- return;
+ oseq_diff = ~preplay_esn->oseq
+ + replay_esn->oseq + 1;
+
+ if (seq_diff >= x->replay_maxdiff ||
+ oseq_diff >= x->replay_maxdiff)
+ break;
}
+ if (x->xflags & XFRM_TIME_DEFER)
+ event = XFRM_REPLAY_TIMEOUT;
+ else
+ return;
+
break;
case XFRM_REPLAY_TIMEOUT:
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d6e7f98fbfbf..68c2f357a183 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -815,7 +815,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
xfrm_state_look_at(pol, x, fl, encap_family,
&best, &acquire_in_progress, &error);
}
- if (best)
+ if (best || acquire_in_progress)
goto found;
h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
@@ -824,7 +824,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
!(x->props.flags & XFRM_STATE_WILDRECV) &&
- xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
+ xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
tmpl->mode == x->props.mode &&
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3f565e495ac6..f964d4c00ffb 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -446,7 +446,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
memcpy(&x->sel, &p->sel, sizeof(x->sel));
memcpy(&x->lft, &p->lft, sizeof(x->lft));
x->props.mode = p->mode;
- x->props.replay_window = p->replay_window;
+ x->props.replay_window = min_t(unsigned int, p->replay_window,
+ sizeof(x->replay.bitmap) * 8);
x->props.reqid = p->reqid;
x->props.family = p->family;
memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr));
@@ -1856,7 +1857,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
if (x->km.state != XFRM_STATE_VALID)
goto out;
- err = xfrm_replay_verify_len(x->replay_esn, rp);
+ err = xfrm_replay_verify_len(x->replay_esn, re);
if (err)
goto out;