diff options
Diffstat (limited to 'drivers/net/wireguard/queueing.h')
| -rw-r--r-- | drivers/net/wireguard/queueing.h | 106 |
1 files changed, 51 insertions, 55 deletions
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index c58df439dbbe..79b6d70de236 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -11,17 +11,19 @@ #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <net/ip_tunnels.h> struct wg_device; struct wg_peer; struct multicore_worker; struct crypt_queue; +struct prev_queue; struct sk_buff; /* queueing.c APIs: */ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, - bool multicore, unsigned int len); -void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); + unsigned int len); +void wg_packet_queue_free(struct crypt_queue *queue, bool purge); struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); @@ -65,25 +67,9 @@ struct packet_cb { #define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) -/* Returns either the correct skb->protocol value, or 0 if invalid. */ -static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb) -{ - if (skb_network_header(skb) >= skb->head && - (skb_network_header(skb) + sizeof(struct iphdr)) <= - skb_tail_pointer(skb) && - ip_hdr(skb)->version == 4) - return htons(ETH_P_IP); - if (skb_network_header(skb) >= skb->head && - (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= - skb_tail_pointer(skb) && - ipv6_hdr(skb)->version == 6) - return htons(ETH_P_IPV6); - return 0; -} - static inline bool wg_check_packet_protocol(struct sk_buff *skb) { - __be16 real_protocol = wg_examine_packet_protocol(skb); + __be16 real_protocol = ip_tunnel_parse_protocol(skb); return real_protocol && skb->protocol == real_protocol; } @@ -93,9 +79,7 @@ static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating) u8 sw_hash = skb->sw_hash; u32 hash = skb->hash; skb_scrub_packet(skb, true); - memset(&skb->headers_start, 0, - offsetof(struct sk_buff, headers_end) - - offsetof(struct sk_buff, headers_start)); + memset(&skb->headers, 0, sizeof(skb->headers)); if (encapsulating) { skb->l4_hash = l4_hash; skb->sw_hash = sw_hash; @@ -120,39 +104,54 @@ static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating) static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) { - unsigned int cpu = *stored_cpu, cpu_index, i; + unsigned int cpu = *stored_cpu; + + while (unlikely(cpu >= nr_cpu_ids || !cpu_online(cpu))) + cpu = *stored_cpu = cpumask_nth(id % num_online_cpus(), cpu_online_mask); - if (unlikely(cpu == nr_cpumask_bits || - !cpumask_test_cpu(cpu, cpu_online_mask))) { - cpu_index = id % cpumask_weight(cpu_online_mask); - cpu = cpumask_first(cpu_online_mask); - for (i = 0; i < cpu_index; ++i) - cpu = cpumask_next(cpu, cpu_online_mask); - *stored_cpu = cpu; - } return cpu; } -/* This function is racy, in the sense that next is unlocked, so it could return - * the same CPU twice. A race-free version of this would be to instead store an - * atomic sequence number, do an increment-and-return, and then iterate through - * every possible CPU until we get to that index -- choose_cpu. However that's - * a bit slower, and it doesn't seem like this potential race actually - * introduces any performance loss, so we live with it. +/* This function is racy, in the sense that it's called while last_cpu is + * unlocked, so it could return the same CPU twice. Adding locking or using + * atomic sequence numbers is slower though, and the consequences of racing are + * harmless, so live with it. */ -static inline int wg_cpumask_next_online(int *next) +static inline int wg_cpumask_next_online(int *last_cpu) { - int cpu = *next; - - while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) - cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; - *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + int cpu = cpumask_next(READ_ONCE(*last_cpu), cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + WRITE_ONCE(*last_cpu, cpu); return cpu; } +void wg_prev_queue_init(struct prev_queue *queue); + +/* Multi producer */ +bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb); + +/* Single consumer */ +struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue); + +/* Single consumer */ +static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue) +{ + if (queue->peeked) + return queue->peeked; + queue->peeked = wg_prev_queue_dequeue(queue); + return queue->peeked; +} + +/* Single consumer */ +static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue) +{ + queue->peeked = NULL; +} + static inline int wg_queue_enqueue_per_device_and_peer( - struct crypt_queue *device_queue, struct crypt_queue *peer_queue, - struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) + struct crypt_queue *device_queue, struct prev_queue *peer_queue, + struct sk_buff *skb, struct workqueue_struct *wq) { int cpu; @@ -160,21 +159,20 @@ static inline int wg_queue_enqueue_per_device_and_peer( /* We first queue this up for the peer ingestion, but the consumer * will wait for the state to change to CRYPTED or DEAD before. */ - if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) + if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb))) return -ENOSPC; + /* Then we queue it up in the device queue, which consumes the * packet as soon as it can. */ - cpu = wg_cpumask_next_online(next_cpu); + cpu = wg_cpumask_next_online(&device_queue->last_cpu); if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) return -EPIPE; queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); return 0; } -static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, - struct sk_buff *skb, - enum packet_state state) +static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state) { /* We take a reference, because as soon as we call atomic_set, the * peer can be freed from below us. @@ -182,14 +180,12 @@ static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); atomic_set_release(&PACKET_CB(skb)->state, state); - queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, - peer->internal_id), - peer->device->packet_crypt_wq, &queue->work); + queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id), + peer->device->packet_crypt_wq, &peer->transmit_packet_work); wg_peer_put(peer); } -static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, - enum packet_state state) +static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state) { /* We take a reference, because as soon as we call atomic_set, the * peer can be freed from below us. |
