diff options
Diffstat (limited to 'include')
112 files changed, 2358 insertions, 1694 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 88a51b242adc..669d96d074ad 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -225,24 +225,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ ({ \ - u32 __unused_flags; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, \ - &__unused_flags); \ + NULL, NULL); \ __ret; \ }) #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ ({ \ - u32 __unused_flags; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, \ - &__unused_flags); \ + t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bdb5298735ce..a7080c86fa76 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/percpu-refcount.h> #include <linux/bpfptr.h> +#include <linux/btf.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -88,6 +89,7 @@ struct bpf_map_ops { int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); int (*map_pop_elem)(struct bpf_map *map, void *value); int (*map_peek_elem)(struct bpf_map *map, void *value); + void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -147,14 +149,48 @@ struct bpf_map_ops { bpf_callback_t callback_fn, void *callback_ctx, u64 flags); - /* BTF name and id of struct allocated by map_alloc */ - const char * const map_btf_name; + /* BTF id of struct allocated by map_alloc */ int *map_btf_id; /* bpf_iter info used to open a seq_file */ const struct bpf_iter_seq_info *iter_seq_info; }; +enum { + /* Support at most 8 pointers in a BPF map value */ + BPF_MAP_VALUE_OFF_MAX = 8, + BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX + + 1 + /* for bpf_spin_lock */ + 1, /* for bpf_timer */ +}; + +enum bpf_kptr_type { + BPF_KPTR_UNREF, + BPF_KPTR_REF, +}; + +struct bpf_map_value_off_desc { + u32 offset; + enum bpf_kptr_type type; + struct { + struct btf *btf; + struct module *module; + btf_dtor_kfunc_t dtor; + u32 btf_id; + } kptr; +}; + +struct bpf_map_value_off { + u32 nr_off; + struct bpf_map_value_off_desc off[]; +}; + +struct bpf_map_off_arr { + u32 cnt; + u32 field_off[BPF_MAP_OFF_ARR_MAX]; + u8 field_sz[BPF_MAP_OFF_ARR_MAX]; +}; + struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -171,6 +207,7 @@ struct bpf_map { u64 map_extra; /* any per-map-type extra fields */ u32 map_flags; int spin_lock_off; /* >=0 valid offset, <0 error */ + struct bpf_map_value_off *kptr_off_tab; int timer_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; @@ -182,10 +219,7 @@ struct bpf_map { struct mem_cgroup *memcg; #endif char name[BPF_OBJ_NAME_LEN]; - bool bypass_spec_v1; - bool frozen; /* write-once; write-protected by freeze_mutex */ - /* 14 bytes hole */ - + struct bpf_map_off_arr *off_arr; /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ @@ -205,6 +239,8 @@ struct bpf_map { bool jited; bool xdp_has_frags; } owner; + bool bypass_spec_v1; + bool frozen; /* write-once; write-protected by freeze_mutex */ }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -217,43 +253,44 @@ static inline bool map_value_has_timer(const struct bpf_map *map) return map->timer_off >= 0; } +static inline bool map_value_has_kptrs(const struct bpf_map *map) +{ + return !IS_ERR_OR_NULL(map->kptr_off_tab); +} + static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { if (unlikely(map_value_has_spin_lock(map))) memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); if (unlikely(map_value_has_timer(map))) memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); + if (unlikely(map_value_has_kptrs(map))) { + struct bpf_map_value_off *tab = map->kptr_off_tab; + int i; + + for (i = 0; i < tab->nr_off; i++) + *(u64 *)(dst + tab->off[i].offset) = 0; + } } /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { - u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0; + u32 curr_off = 0; + int i; - if (unlikely(map_value_has_spin_lock(map))) { - s_off = map->spin_lock_off; - s_sz = sizeof(struct bpf_spin_lock); - } - if (unlikely(map_value_has_timer(map))) { - t_off = map->timer_off; - t_sz = sizeof(struct bpf_timer); + if (likely(!map->off_arr)) { + memcpy(dst, src, map->value_size); + return; } - if (unlikely(s_sz || t_sz)) { - if (s_off < t_off || !s_sz) { - swap(s_off, t_off); - swap(s_sz, t_sz); - } - memcpy(dst, src, t_off); - memcpy(dst + t_off + t_sz, - src + t_off + t_sz, - s_off - t_off - t_sz); - memcpy(dst + s_off + s_sz, - src + s_off + s_sz, - map->value_size - s_off - s_sz); - } else { - memcpy(dst, src, map->value_size); + for (i = 0; i < map->off_arr->cnt; i++) { + u32 next_off = map->off_arr->field_off[i]; + + memcpy(dst + curr_off, src + curr_off, next_off - curr_off); + curr_off += map->off_arr->field_sz[i]; } + memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); @@ -342,9 +379,31 @@ enum bpf_type_flag { */ MEM_PERCPU = BIT(4 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_PERCPU, + /* Indicates that the argument will be released. */ + OBJ_RELEASE = BIT(5 + BPF_BASE_TYPE_BITS), + + /* PTR is not trusted. This is only used with PTR_TO_BTF_ID, to mark + * unreferenced and referenced kptr loaded from map value using a load + * instruction, so that they can only be dereferenced but not escape the + * BPF program into the kernel (i.e. cannot be passed as arguments to + * kfunc or bpf helpers). + */ + PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), + + MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to memory local to the bpf program. */ + DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to a ringbuf record. */ + DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_FLAG_MAX, + __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF) + /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -361,16 +420,11 @@ enum bpf_arg_type { ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ - ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ - /* the following constraints used to prototype bpf_memcmp() and other - * functions that access data on eBPF program stack + /* Used to prototype bpf_memcmp() and other functions that access data + * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ - ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, - * helper function must fill all bytes or clear - * them in error case. - */ ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -391,6 +445,8 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ + ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ @@ -400,6 +456,11 @@ enum bpf_arg_type { ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, + ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, + /* pointer to memory does not need to be initialized, helper function must fill + * all bytes or clear them in error case. + */ + ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -427,6 +488,7 @@ enum bpf_return_type { RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is @@ -672,15 +734,17 @@ struct btf_func_model { #define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 - * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 + * bytes on x86. */ -#define BPF_MAX_TRAMP_PROGS 38 +#define BPF_MAX_TRAMP_LINKS 38 -struct bpf_tramp_progs { - struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; - int nr_progs; +struct bpf_tramp_links { + struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; + int nr_links; }; +struct bpf_tramp_run_ctx; + /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS @@ -704,13 +768,14 @@ struct bpf_tramp_progs { struct bpf_tramp_image; int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call); /* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog); -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -803,9 +868,10 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( { return bpf_func(ctx, insnsi); } + #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -856,12 +922,12 @@ int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; } -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; @@ -960,7 +1026,6 @@ struct bpf_prog_aux { bool tail_call_reachable; bool xdp_has_frags; bool use_bpf_prog_pack; - struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1047,6 +1112,19 @@ struct bpf_link_ops { struct bpf_link_info *info); }; +struct bpf_tramp_link { + struct bpf_link link; + struct hlist_node tramp_hlist; + u64 cookie; +}; + +struct bpf_tracing_link { + struct bpf_tramp_link link; + enum bpf_attach_type attach_type; + struct bpf_trampoline *trampoline; + struct bpf_prog *tgt_prog; +}; + struct bpf_link_primer { struct bpf_link *link; struct file *file; @@ -1084,8 +1162,8 @@ bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); -int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, - struct bpf_prog *prog, +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, + struct bpf_tramp_link *link, const struct btf_func_model *model, void *image, void *image_end); static inline bool bpf_try_module_get(const void *data, struct module *owner) @@ -1221,7 +1299,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, /* an array of programs to be executed under rcu_lock. * * Typical usage: - * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run); + * ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run); * * the structure returned by bpf_prog_array_alloc() should be populated * with program pointers and the last pointer must be NULL. @@ -1290,6 +1368,12 @@ struct bpf_trace_run_ctx { u64 bpf_cookie; }; +struct bpf_tramp_run_ctx { + struct bpf_run_ctx run_ctx; + u64 bpf_cookie; + struct bpf_run_ctx *saved_run_ctx; +}; + static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) { struct bpf_run_ctx *old_ctx = NULL; @@ -1315,83 +1399,22 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); -static __always_inline int -BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog, - int retval, u32 *ret_flags) -{ - const struct bpf_prog_array_item *item; - const struct bpf_prog *prog; - const struct bpf_prog_array *array; - struct bpf_run_ctx *old_run_ctx; - struct bpf_cg_run_ctx run_ctx; - u32 func_ret; - - run_ctx.retval = retval; - migrate_disable(); - rcu_read_lock(); - array = rcu_dereference(array_rcu); - item = &array->items[0]; - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - while ((prog = READ_ONCE(item->prog))) { - run_ctx.prog_item = item; - func_ret = run_prog(prog, ctx); - if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval)) - run_ctx.retval = -EPERM; - *(ret_flags) |= (func_ret >> 1); - item++; - } - bpf_reset_run_ctx(old_run_ctx); - rcu_read_unlock(); - migrate_enable(); - return run_ctx.retval; -} - -static __always_inline int -BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog, - int retval) -{ - const struct bpf_prog_array_item *item; - const struct bpf_prog *prog; - const struct bpf_prog_array *array; - struct bpf_run_ctx *old_run_ctx; - struct bpf_cg_run_ctx run_ctx; - - run_ctx.retval = retval; - migrate_disable(); - rcu_read_lock(); - array = rcu_dereference(array_rcu); - item = &array->items[0]; - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - while ((prog = READ_ONCE(item->prog))) { - run_ctx.prog_item = item; - if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval)) - run_ctx.retval = -EPERM; - item++; - } - bpf_reset_run_ctx(old_run_ctx); - rcu_read_unlock(); - migrate_enable(); - return run_ctx.retval; -} - static __always_inline u32 -BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, +bpf_prog_run_array(const struct bpf_prog_array *array, const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; - const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; u32 ret = 1; - migrate_disable(); - rcu_read_lock(); - array = rcu_dereference(array_rcu); + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held"); + if (unlikely(!array)) - goto out; + return ret; + + migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { @@ -1400,50 +1423,10 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, item++; } bpf_reset_run_ctx(old_run_ctx); -out: - rcu_read_unlock(); migrate_enable(); return ret; } -/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs - * so BPF programs can request cwr for TCP packets. - * - * Current cgroup skb programs can only return 0 or 1 (0 to drop the - * packet. This macro changes the behavior so the low order bit - * indicates whether the packet should be dropped (0) or not (1) - * and the next bit is a congestion notification bit. This could be - * used by TCP to call tcp_enter_cwr() - * - * Hence, new allowed return values of CGROUP EGRESS BPF programs are: - * 0: drop packet - * 1: keep packet - * 2: drop packet and cn - * 3: keep packet and cn - * - * This macro then converts it to one of the NET_XMIT or an error - * code that is then interpreted as drop packet (and no cn): - * 0: NET_XMIT_SUCCESS skb should be transmitted - * 1: NET_XMIT_DROP skb should be dropped and cn - * 2: NET_XMIT_CN skb should be transmitted and cn - * 3: -err skb should be dropped - */ -#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ - ({ \ - u32 _flags = 0; \ - bool _cn; \ - u32 _ret; \ - _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ - _cn = _flags & BPF_RET_SET_CN; \ - if (_ret && !IS_ERR_VALUE((long)_ret)) \ - _ret = -EFAULT; \ - if (!_ret) \ - _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ - else \ - _ret = (_cn ? NET_XMIT_DROP : _ret); \ - _ret; \ - }) - #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; @@ -1497,6 +1480,12 @@ void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); +struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset); +void bpf_map_free_kptr_off_tab(struct bpf_map *map); +struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map); +bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b); +void bpf_map_free_kptrs(struct bpf_map *map, void *map_value); + struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); @@ -1590,6 +1579,7 @@ void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); +struct bpf_link *bpf_link_get_curr_or_next(u32 *id); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); @@ -1793,7 +1783,8 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, u32 *next_btf_id, enum bpf_type_flag *flag); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, - const struct btf *need_btf, u32 need_type_id); + const struct btf *need_btf, u32 need_type_id, + bool strict); int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, @@ -2206,6 +2197,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_map_push_elem_proto; extern const struct bpf_func_proto bpf_map_pop_elem_proto; extern const struct bpf_func_proto bpf_map_peek_elem_proto; +extern const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; @@ -2243,12 +2235,16 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; +extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto; extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; @@ -2268,6 +2264,7 @@ extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_strncmp_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; +extern const struct bpf_func_proto bpf_kptr_xchg_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2381,6 +2378,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); void *bpf_arch_text_copy(void *dst, void *src, size_t len); +int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); @@ -2391,4 +2389,33 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 **bin_buf, u32 num_args); void bpf_bprintf_cleanup(void); +/* the implementation of the opaque uapi struct bpf_dynptr */ +struct bpf_dynptr_kern { + void *data; + /* Size represents the number of usable bytes of dynptr data. + * If for example the offset is at 4 for a local dynptr whose data is + * of type u64, the number of usable bytes is 4. + * + * The upper 8 bits are reserved. It is as follows: + * Bits 0 - 23 = size + * Bits 24 - 30 = dynptr type + * Bit 31 = whether dynptr is read-only + */ + u32 size; + u32 offset; +} __aligned(8); + +enum bpf_dynptr_type { + BPF_DYNPTR_TYPE_INVALID, + /* Points to memory that is local to the bpf program */ + BPF_DYNPTR_TYPE_LOCAL, + /* Underlying data is a ringbuf record */ + BPF_DYNPTR_TYPE_RINGBUF, +}; + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size); +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); +int bpf_dynptr_check_size(u32 size); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 493e63258497..7ea18d4da84b 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -143,9 +143,9 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_omem); + bool uncharge_omem, bool use_trace_rcu); -void bpf_selem_unlink(struct bpf_local_storage_elem *selem); +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 3e24ad0c4b3c..2b9112b80171 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -141,3 +141,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) +BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3a9d2d7cc6b7..e8439f6cbe57 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -72,6 +72,18 @@ struct bpf_reg_state { u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ + /* For dynptr stack slots */ + struct { + enum bpf_dynptr_type type; + /* A dynptr is 16 bytes so it takes up 2 stack slots. + * We need to track which slot is the first slot + * to protect against cases where the user may try to + * pass in an address starting at the second slot of the + * dynptr. + */ + bool first_slot; + } dynptr; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -88,6 +100,8 @@ struct bpf_reg_state { * for the purpose of tracking that it's freed. * For PTR_TO_SOCKET this is used to share which pointers retain the * same reference to the socket, to determine proper reference freeing. + * For stack slots that are dynptrs, this is used to track references to + * the dynptr to determine proper reference freeing. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned @@ -174,9 +188,15 @@ enum bpf_stack_slot_type { STACK_SPILL, /* register spilled into stack */ STACK_MISC, /* BPF program wrote some data into this slot */ STACK_ZERO, /* BPF program wrote constant zero */ + /* A dynptr is stored in this stack slot. The type of dynptr + * is stored in bpf_stack_state->spilled_ptr.dynptr.type + */ + STACK_DYNPTR, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) +#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) struct bpf_stack_state { struct bpf_reg_state spilled_ptr; @@ -523,8 +543,7 @@ int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); int check_func_arg_reg_off(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, - enum bpf_arg_type arg_type, - bool is_release_func); + enum bpf_arg_type arg_type); int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, diff --git a/include/linux/btf.h b/include/linux/btf.h index 36bc09b8e890..2611cea2c2b6 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -17,6 +17,7 @@ enum btf_kfunc_type { BTF_KFUNC_TYPE_ACQUIRE, BTF_KFUNC_TYPE_RELEASE, BTF_KFUNC_TYPE_RET_NULL, + BTF_KFUNC_TYPE_KPTR_ACQUIRE, BTF_KFUNC_TYPE_MAX, }; @@ -35,11 +36,19 @@ struct btf_kfunc_id_set { struct btf_id_set *acquire_set; struct btf_id_set *release_set; struct btf_id_set *ret_null_set; + struct btf_id_set *kptr_acquire_set; }; struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; }; }; +struct btf_id_dtor_kfunc { + u32 btf_id; + u32 kfunc_btf_id; +}; + +typedef void (*btf_dtor_kfunc_t)(void *); + extern const struct file_operations btf_fops; void btf_get(struct btf *btf); @@ -123,6 +132,8 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); int btf_find_timer(const struct btf *btf, const struct btf_type *t); +struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, + const struct btf_type *t); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, @@ -344,6 +355,9 @@ bool btf_kfunc_id_set_contains(const struct btf *btf, enum btf_kfunc_type type, u32 kfunc_btf_id); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); +s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); +int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, + struct module *owner); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -367,6 +381,15 @@ static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, { return 0; } +static inline s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id) +{ + return -ENOENT; +} +static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, + u32 add_cnt, struct module *owner) +{ + return 0; +} #endif #endif diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index bc5d9cc34e4c..335a19092368 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -178,7 +178,8 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) enum { #define BTF_SOCK_TYPE(name, str) name, diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c2ea47f30046..e22dc03c850e 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -17,7 +17,6 @@ #include <linux/can.h> #include <linux/can/bittiming.h> #include <linux/can/error.h> -#include <linux/can/led.h> #include <linux/can/length.h> #include <linux/can/netlink.h> #include <linux/can/skb.h> @@ -85,15 +84,6 @@ struct can_priv { int (*do_get_berr_counter)(const struct net_device *dev, struct can_berr_counter *bec); int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); - -#ifdef CONFIG_CAN_LEDS - struct led_trigger *tx_led_trig; - char tx_led_trig_name[CAN_LED_NAME_SZ]; - struct led_trigger *rx_led_trig; - char rx_led_trig_name[CAN_LED_NAME_SZ]; - struct led_trigger *rxtx_led_trig; - char rxtx_led_trig_name[CAN_LED_NAME_SZ]; -#endif }; static inline bool can_tdc_is_enabled(const struct can_priv *priv) diff --git a/include/linux/can/led.h b/include/linux/can/led.h deleted file mode 100644 index 7c3cfd798c56..000000000000 --- a/include/linux/can/led.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2012, Fabio Baltieri <fabio.baltieri@gmail.com> - */ - -#ifndef _CAN_LED_H -#define _CAN_LED_H - -#include <linux/if.h> -#include <linux/leds.h> -#include <linux/netdevice.h> - -enum can_led_event { - CAN_LED_EVENT_OPEN, - CAN_LED_EVENT_STOP, - CAN_LED_EVENT_TX, - CAN_LED_EVENT_RX, -}; - -#ifdef CONFIG_CAN_LEDS - -/* keep space for interface name + "-tx"/"-rx"/"-rxtx" - * suffix and null terminator - */ -#define CAN_LED_NAME_SZ (IFNAMSIZ + 6) - -void can_led_event(struct net_device *netdev, enum can_led_event event); -void devm_can_led_init(struct net_device *netdev); -int __init can_led_notifier_init(void); -void __exit can_led_notifier_exit(void); - -#else - -static inline void can_led_event(struct net_device *netdev, - enum can_led_event event) -{ -} -static inline void devm_can_led_init(struct net_device *netdev) -{ -} -static inline int can_led_notifier_init(void) -{ - return 0; -} -static inline void can_led_notifier_exit(void) -{ -} - -#endif - -#endif /* !_CAN_LED_H */ diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index c11477620403..c205c51d79c9 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -42,8 +42,8 @@ int can_rx_offload_add_manual(struct net_device *dev, int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); -int can_rx_offload_queue_sorted(struct can_rx_offload *offload, - struct sk_buff *skb, u32 timestamp); +int can_rx_offload_queue_timestamp(struct can_rx_offload *offload, + struct sk_buff *skb, u32 timestamp); unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, unsigned int idx, u32 timestamp, unsigned int *frame_len_ptr); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4af58459a1e7..99dc7bfbcd3c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -71,11 +71,13 @@ enum { * struct kernel_ethtool_ringparam - RX/TX ring configuration * @rx_buf_len: Current length of buffers on the rx ring. * @tcp_data_split: Scatter packet headers and data to separate buffers + * @tx_push: The flag of tx push mode * @cqe_size: Size of TX/RX completion queue event */ struct kernel_ethtool_ringparam { u32 rx_buf_len; u8 tcp_data_split; + u8 tx_push; u32 cqe_size; }; @@ -83,10 +85,12 @@ struct kernel_ethtool_ringparam { * enum ethtool_supported_ring_param - indicator caps for setting ring params * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size + * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), ETHTOOL_RING_USE_CQE_SIZE = BIT(1), + ETHTOOL_RING_USE_TX_PUSH = BIT(2), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 295637a66c46..3b401fa0f374 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -52,6 +52,22 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #define __underlying_strncpy __builtin_strncpy #endif +/** + * unsafe_memcpy - memcpy implementation with no FORTIFY bounds checking + * + * @dst: Destination memory address to write to + * @src: Source memory address to read from + * @bytes: How many bytes to write to @dst from @src + * @justification: Free-form text or comment describing why the use is needed + * + * This should be used for corner cases where the compiler cannot do the + * right thing, or during transitions between APIs, etc. It should be used + * very rarely, and includes a place for justification detailing where bounds + * checking has happened, and why existing solutions cannot be employed. + */ +#define unsafe_memcpy(dst, src, bytes, justification) \ + __underlying_memcpy(dst, src, bytes) + /* * Clang's use of __builtin_object_size() within inlines needs hinting via * __pass_object_size(). The preference is to only ever use type 1 (member diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4816b7e11047..820500430eae 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -303,6 +303,8 @@ int unregister_ftrace_function(struct ftrace_ops *ops); extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct ftrace_regs *fregs); + +int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs); #else /* !CONFIG_FUNCTION_TRACER */ /* * (un)register_ftrace_function must be a macro since the ops parameter @@ -313,6 +315,10 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1, static inline void ftrace_kill(void) { } static inline void ftrace_free_init_mem(void) { } static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } +static inline int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_FUNCTION_TRACER */ struct ftrace_func_entry { diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 9055cb380ee2..db0f4fcfdaf4 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -79,8 +79,9 @@ extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, int *err); extern void icmpv6_cleanup(void); -extern void icmpv6_param_prob(struct sk_buff *skb, - u8 code, int pos); +extern void icmpv6_param_prob_reason(struct sk_buff *skb, + u8 code, int pos, + enum skb_drop_reason reason); struct flowi6; struct in6_addr; @@ -91,6 +92,12 @@ extern void icmpv6_flow_init(struct sock *sk, const struct in6_addr *daddr, int oif); +static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) +{ + icmpv6_param_prob_reason(skb, code, pos, + SKB_DROP_REASON_NOT_SPECIFIED); +} + static inline bool icmpv6_is_err(int type) { switch (type) { diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 95c831162212..f1f9412b6ac6 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -134,18 +134,46 @@ enum { * a successful transmission. */ IEEE802154_SUCCESS = 0x0, - + /* The requested operation failed. */ + IEEE802154_MAC_ERROR = 0x1, + /* The requested operation has been cancelled. */ + IEEE802154_CANCELLED = 0x2, + /* + * Device is ready to poll the coordinator for data in a non beacon + * enabled PAN. + */ + IEEE802154_READY_FOR_POLL = 0x3, + /* Wrong frame counter. */ + IEEE802154_COUNTER_ERROR = 0xdb, + /* + * The frame does not conforms to the incoming key usage policy checking + * procedure. + */ + IEEE802154_IMPROPER_KEY_TYPE = 0xdc, + /* + * The frame does not conforms to the incoming security level usage + * policy checking procedure. + */ + IEEE802154_IMPROPER_SECURITY_LEVEL = 0xdd, + /* Secured frame received with an empty Frame Version field. */ + IEEE802154_UNSUPPORTED_LEGACY = 0xde, + /* + * A secured frame is received or must be sent but security is not + * enabled in the device. Or, the Auxiliary Security Header has security + * level of zero in it. + */ + IEEE802154_UNSUPPORTED_SECURITY = 0xdf, /* The beacon was lost following a synchronization request. */ - IEEE802154_BEACON_LOSS = 0xe0, + IEEE802154_BEACON_LOST = 0xe0, /* * A transmission could not take place due to activity on the * channel, i.e., the CSMA-CA mechanism has failed. */ - IEEE802154_CHNL_ACCESS_FAIL = 0xe1, + IEEE802154_CHANNEL_ACCESS_FAILURE = 0xe1, /* The GTS request has been denied by the PAN coordinator. */ - IEEE802154_DENINED = 0xe2, + IEEE802154_DENIED = 0xe2, /* The attempt to disable the transceiver has failed. */ - IEEE802154_DISABLE_TRX_FAIL = 0xe3, + IEEE802154_DISABLE_TRX_FAILURE = 0xe3, /* * The received frame induces a failed security check according to * the security suite. @@ -185,9 +213,9 @@ enum { * A PAN identifier conflict has been detected and communicated to the * PAN coordinator. */ - IEEE802154_PANID_CONFLICT = 0xee, + IEEE802154_PAN_ID_CONFLICT = 0xee, /* A coordinator realignment command has been received. */ - IEEE802154_REALIGMENT = 0xef, + IEEE802154_REALIGNMENT = 0xef, /* The transaction has expired and its information discarded. */ IEEE802154_TRANSACTION_EXPIRED = 0xf0, /* There is no capacity to store the transaction. */ @@ -203,12 +231,49 @@ enum { * A SET/GET request was issued with the identifier of a PIB attribute * that is not supported. */ - IEEE802154_UNSUPPORTED_ATTR = 0xf4, + IEEE802154_UNSUPPORTED_ATTRIBUTE = 0xf4, + /* Missing source or destination address or address mode. */ + IEEE802154_INVALID_ADDRESS = 0xf5, + /* + * MLME asked to turn the receiver on, but the on time duration is too + * big compared to the macBeaconOrder. + */ + IEEE802154_ON_TIME_TOO_LONG = 0xf6, + /* + * MLME asaked to turn the receiver on, but the request was delayed for + * too long before getting processed. + */ + IEEE802154_PAST_TIME = 0xf7, + /* + * The StartTime parameter is nonzero, and the MLME is not currently + * tracking the beacon of the coordinator through which it is + * associated. + */ + IEEE802154_TRACKING_OFF = 0xf8, + /* + * The index inside the hierarchical values in PIBAttribute is out of + * range. + */ + IEEE802154_INVALID_INDEX = 0xf9, + /* + * The number of PAN descriptors discovered during a scan has been + * reached. + */ + IEEE802154_LIMIT_REACHED = 0xfa, + /* + * The PIBAttribute parameter specifies an attribute that is a read-only + * attribute. + */ + IEEE802154_READ_ONLY = 0xfb, /* * A request to perform a scan operation failed because the MLME was * in the process of performing a previously initiated scan operation. */ IEEE802154_SCAN_IN_PROGRESS = 0xfc, + /* The outgoing superframe overlaps the incoming superframe. */ + IEEE802154_SUPERFRAME_OVERLAP = 0xfd, + /* Any other error situation. */ + IEEE802154_SYSTEM_ERROR = 0xff, }; /* frame control handling */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 16870f86c74d..38c8203d52cb 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -61,6 +61,7 @@ struct ipv6_devconf { __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; __s32 drop_unsolicited_na; + __s32 accept_unsolicited_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; @@ -144,6 +145,7 @@ struct inet6_skb_parm { #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 #define IP6SKB_SEG6 256 +#define IP6SKB_FAKEJUMBO 512 }; #if defined(CONFIG_NET_L3_MASTER_DEV) @@ -339,8 +341,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return (struct raw6_sock *)sk; } -#define __ipv6_only_sock(sk) (sk->sk_ipv6only) -#define ipv6_only_sock(sk) (__ipv6_only_sock(sk)) +#define ipv6_only_sock(sk) (sk->sk_ipv6only) #define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \ inet6_sk(sk)->rxopt.bits.rxinfo) @@ -357,7 +358,6 @@ static inline int inet_v6_ipv6only(const struct sock *sk) return ipv6_only_sock(sk); } #else -#define __ipv6_only_sock(sk) 0 #define ipv6_only_sock(sk) 0 #define ipv6_sk_rxinfo(sk) 0 diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index ce1bd2fbf23e..ad39636e0c3f 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -65,11 +65,11 @@ static inline void *dereference_symbol_descriptor(void *ptr) return ptr; } +#ifdef CONFIG_KALLSYMS int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); -#ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); @@ -163,6 +163,11 @@ static inline bool kallsyms_show_value(const struct cred *cred) return false; } +static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, + unsigned long), void *data) +{ + return -EOPNOTSUPP; +} #endif /*CONFIG_KALLSYMS*/ static inline void print_ip_sym(const char *loglvl, unsigned long ip) diff --git a/include/linux/list.h b/include/linux/list.h index dd6c2041d09c..57e8b559cdf6 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -564,6 +564,19 @@ static inline void list_splice_tail_init(struct list_head *list, list_entry((pos)->member.next, typeof(*(pos)), member) /** + * list_next_entry_circular - get the next element in list + * @pos: the type * to cursor. + * @head: the list head to take the element from. + * @member: the name of the list_head within the struct. + * + * Wraparound if pos is the last element (return the first element). + * Note, that list is expected to be not empty. + */ +#define list_next_entry_circular(pos, head, member) \ + (list_is_last(&(pos)->member, head) ? \ + list_first_entry(head, typeof(*(pos)), member) : list_next_entry(pos, member)) + +/** * list_prev_entry - get the prev element in list * @pos: the type * to cursor * @member: the name of the list_head within the struct. @@ -572,6 +585,19 @@ static inline void list_splice_tail_init(struct list_head *list, list_entry((pos)->member.prev, typeof(*(pos)), member) /** + * list_prev_entry_circular - get the prev element in list + * @pos: the type * to cursor. + * @head: the list head to take the element from. + * @member: the name of the list_head within the struct. + * + * Wraparound if pos is the first element (return the last element). + * Note, that list is expected to be not empty. + */ +#define list_prev_entry_circular(pos, head, member) \ + (list_is_first(&(pos)->member, head) ? \ + list_last_entry(head, typeof(*(pos)), member) : list_prev_entry(pos, member)) + +/** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. @@ -580,6 +606,16 @@ static inline void list_splice_tail_init(struct list_head *list, for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) /** + * list_for_each_rcu - Iterate over a list in an RCU-safe fashion + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference((head)->next); \ + !list_is_head(pos, (head)); \ + pos = rcu_dereference(pos->next)) + +/** * list_for_each_continue - continue iteration over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. diff --git a/include/linux/mdio.h b/include/linux/mdio.h index ecac96d52e01..00177567cfef 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -340,6 +340,76 @@ static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising, advertising, lpa & MDIO_AN_10GBT_STAT_LP10G); } +/** + * mii_t1_adv_l_mod_linkmode_t + * @advertising: target the linkmode advertisement settings + * @lpa: value of the BASE-T1 Autonegotiation Advertisement [15:0] Register + * + * A small helper function that translates BASE-T1 Autonegotiation + * Advertisement [15:0] Register bits to linkmode advertisement settings. + * Other bits in advertising aren't changed. + */ +static inline void mii_t1_adv_l_mod_linkmode_t(unsigned long *advertising, u32 lpa) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising, + lpa & MDIO_AN_T1_ADV_L_PAUSE_CAP); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising, + lpa & MDIO_AN_T1_ADV_L_PAUSE_ASYM); +} + +/** + * mii_t1_adv_m_mod_linkmode_t + * @advertising: target the linkmode advertisement settings + * @lpa: value of the BASE-T1 Autonegotiation Advertisement [31:16] Register + * + * A small helper function that translates BASE-T1 Autonegotiation + * Advertisement [31:16] Register bits to linkmode advertisement settings. + * Other bits in advertising aren't changed. + */ +static inline void mii_t1_adv_m_mod_linkmode_t(unsigned long *advertising, u32 lpa) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_B10L); +} + +/** + * linkmode_adv_to_mii_t1_adv_l_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * BASE-T1 Autonegotiation Advertisement [15:0] Register. + */ +static inline u32 linkmode_adv_to_mii_t1_adv_l_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising)) + result |= MDIO_AN_T1_ADV_L_PAUSE_CAP; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising)) + result |= MDIO_AN_T1_ADV_L_PAUSE_ASYM; + + return result; +} + +/** + * linkmode_adv_to_mii_t1_adv_m_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * BASE-T1 Autonegotiation Advertisement [31:16] Register. + */ +static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_B10L; + + return result; +} + int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, diff --git a/include/linux/mfd/idt8a340_reg.h b/include/linux/mfd/idt8a340_reg.h index a18c1539a152..0c706085c205 100644 --- a/include/linux/mfd/idt8a340_reg.h +++ b/include/linux/mfd/idt8a340_reg.h @@ -407,7 +407,7 @@ #define TOD_READ_PRIMARY_0 0xcc40 #define TOD_READ_PRIMARY_0_V520 0xcc50 /* 8-bit subns, 32-bit ns, 48-bit seconds */ -#define TOD_READ_PRIMARY 0x0000 +#define TOD_READ_PRIMARY_BASE 0x0000 /* Counter increments after TOD write is completed */ #define TOD_READ_PRIMARY_COUNTER 0x000b /* Read trigger configuration */ @@ -424,6 +424,16 @@ #define TOD_READ_SECONDARY_0 0xcc90 #define TOD_READ_SECONDARY_0_V520 0xcca0 +/* 8-bit subns, 32-bit ns, 48-bit seconds */ +#define TOD_READ_SECONDARY_BASE 0x0000 +/* Counter increments after TOD write is completed */ +#define TOD_READ_SECONDARY_COUNTER 0x000b +/* Read trigger configuration */ +#define TOD_READ_SECONDARY_SEL_CFG_0 0x000c +/* Read trigger selection */ +#define TOD_READ_SECONDARY_CMD 0x000e +#define TOD_READ_SECONDARY_CMD_V520 0x000f + #define TOD_READ_SECONDARY_1 0xcca0 #define TOD_READ_SECONDARY_1_V520 0xccb0 #define TOD_READ_SECONDARY_2 0xccb0 diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h deleted file mode 100644 index dacf69516002..000000000000 --- a/include/linux/mlx5/accel.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_ACCEL_H__ -#define __MLX5_ACCEL_H__ - -#include <linux/mlx5/driver.h> - -enum mlx5_accel_esp_aes_gcm_keymat_iv_algo { - MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ, -}; - -enum mlx5_accel_esp_flags { - MLX5_ACCEL_ESP_FLAGS_TUNNEL = 0, /* Default */ - MLX5_ACCEL_ESP_FLAGS_TRANSPORT = 1UL << 0, - MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED = 1UL << 1, - MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2, -}; - -enum mlx5_accel_esp_action { - MLX5_ACCEL_ESP_ACTION_DECRYPT, - MLX5_ACCEL_ESP_ACTION_ENCRYPT, -}; - -enum mlx5_accel_esp_keymats { - MLX5_ACCEL_ESP_KEYMAT_AES_NONE, - MLX5_ACCEL_ESP_KEYMAT_AES_GCM, -}; - -enum mlx5_accel_esp_replay { - MLX5_ACCEL_ESP_REPLAY_NONE, - MLX5_ACCEL_ESP_REPLAY_BMP, -}; - -struct aes_gcm_keymat { - u64 seq_iv; - enum mlx5_accel_esp_aes_gcm_keymat_iv_algo iv_algo; - - u32 salt; - u32 icv_len; - - u32 key_len; - u32 aes_key[256 / 32]; -}; - -struct mlx5_accel_esp_xfrm_attrs { - enum mlx5_accel_esp_action action; - u32 esn; - __be32 spi; - u32 seq; - u32 tfc_pad; - u32 flags; - u32 sa_handle; - enum mlx5_accel_esp_replay replay_type; - union { - struct { - u32 size; - - } bmp; - } replay; - enum mlx5_accel_esp_keymats keymat_type; - union { - struct aes_gcm_keymat aes_gcm; - } keymat; - - union { - __be32 a4; - __be32 a6[4]; - } saddr; - - union { - __be32 a4; - __be32 a6[4]; - } daddr; - - u8 is_ipv6; -}; - -struct mlx5_accel_esp_xfrm { - struct mlx5_core_dev *mdev; - struct mlx5_accel_esp_xfrm_attrs attrs; -}; - -enum { - MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0, -}; - -enum mlx5_accel_ipsec_cap { - MLX5_ACCEL_IPSEC_CAP_DEVICE = 1 << 0, - MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA = 1 << 1, - MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 2, - MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 3, - MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 4, - MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER = 1 << 5, - MLX5_ACCEL_IPSEC_CAP_ESN = 1 << 6, - MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN = 1 << 7, -}; - -#ifdef CONFIG_MLX5_ACCEL - -u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); - -struct mlx5_accel_esp_xfrm * -mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags); -void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm); -int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs); - -#else - -static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } - -static inline struct mlx5_accel_esp_xfrm * -mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) { return ERR_PTR(-EOPNOTSUPP); } -static inline void -mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {} -static inline int -mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; } - -#endif /* CONFIG_MLX5_ACCEL */ -#endif /* __MLX5_ACCEL_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9424503eb8d3..b064bc278f52 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -84,7 +84,7 @@ enum mlx5_sqp_t { }; enum { - MLX5_MAX_PORTS = 2, + MLX5_MAX_PORTS = 4, }; enum { @@ -272,6 +272,8 @@ struct mlx5_cmd_stats { u32 last_failed_errno; /* last bad status returned by FW */ u8 last_failed_mbox_status; + /* last command failed syndrome returned by FW */ + u32 last_failed_syndrome; struct dentry *root; /* protect command average calculations */ spinlock_t lock; @@ -558,6 +560,7 @@ struct mlx5_debugfs_entries { struct dentry *cq_debugfs; struct dentry *cmdif_debugfs; struct dentry *pages_debugfs; + struct dentry *lag_debugfs; }; struct mlx5_ft_pool; @@ -632,6 +635,7 @@ enum mlx5_device_state { enum mlx5_interface_state { MLX5_INTERFACE_STATE_UP = BIT(0), + MLX5_BREAK_FW_WAIT = BIT(1), }; enum mlx5_pci_status { @@ -778,9 +782,6 @@ struct mlx5_core_dev { #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif -#ifdef CONFIG_MLX5_ACCEL - const struct mlx5_accel_ipsec_ops *ipsec_ops; -#endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; @@ -1052,9 +1053,14 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); -int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node); + +static inline int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + return mlx5_db_alloc_node(dev, db, dev->priv.numa_node); +} + void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); const char *mlx5_command_str(int command); @@ -1144,6 +1150,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int num_counters, size_t *offsets); struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev); +u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e3bfed68b08a..8135713b0d2d 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -40,6 +40,18 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +enum mlx5_flow_destination_type { + MLX5_FLOW_DESTINATION_TYPE_NONE, + MLX5_FLOW_DESTINATION_TYPE_VPORT, + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + MLX5_FLOW_DESTINATION_TYPE_TIR, + MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER, + MLX5_FLOW_DESTINATION_TYPE_UPLINK, + MLX5_FLOW_DESTINATION_TYPE_PORT, + MLX5_FLOW_DESTINATION_TYPE_COUNTER, + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM, +}; + enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7d2d0ba82144..78b3d3465dd7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1359,7 +1359,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vhca_resource_manager[0x1]; u8 hca_cap_2[0x1]; - u8 reserved_at_21[0x1]; + u8 create_lag_when_not_master_up[0x1]; u8 dtor[0x1]; u8 event_on_vhca_state_teardown_request[0x1]; u8 event_on_vhca_state_in_use[0x1]; @@ -1806,16 +1806,12 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_c0[0x740]; }; -enum mlx5_flow_destination_type { - MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, - MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, - MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, - MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, - MLX5_FLOW_DESTINATION_TYPE_UPLINK = 0x8, - - MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99, - MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, - MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101, +enum mlx5_ifc_flow_destination_type { + MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT = 0x0, + MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, + MLX5_IFC_FLOW_DESTINATION_TYPE_TIR = 0x2, + MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, + MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK = 0x8, }; enum mlx5_flow_table_miss_action { @@ -10820,7 +10816,8 @@ struct mlx5_ifc_dcbx_param_bits { enum { MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY = 0, - MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT, + MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT = 1, + MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW = 2, }; struct mlx5_ifc_lagc_bits { @@ -11383,8 +11380,6 @@ enum { enum { MLX5_IPSEC_OBJECT_ICV_LEN_16B, - MLX5_IPSEC_OBJECT_ICV_LEN_12B, - MLX5_IPSEC_OBJECT_ICV_LEN_8B, }; struct mlx5_ifc_ipsec_obj_bits { diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 07d77323f78a..45c7c0d67635 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -54,7 +54,6 @@ enum { enum { MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2, - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS = 0x3, }; struct mlx5_ifc_fpga_shell_caps_bits { @@ -387,89 +386,6 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_tls_extended_cap_bits { - u8 aes_gcm_128[0x1]; - u8 aes_gcm_256[0x1]; - u8 reserved_at_2[0x1e]; - u8 reserved_at_20[0x20]; - u8 context_capacity_total[0x20]; - u8 context_capacity_rx[0x20]; - u8 context_capacity_tx[0x20]; - u8 reserved_at_a0[0x10]; - u8 tls_counter_size[0x10]; - u8 tls_counters_addr_low[0x20]; - u8 tls_counters_addr_high[0x20]; - u8 rx[0x1]; - u8 tx[0x1]; - u8 tls_v12[0x1]; - u8 tls_v13[0x1]; - u8 lro[0x1]; - u8 ipv6[0x1]; - u8 reserved_at_106[0x1a]; -}; - -struct mlx5_ifc_ipsec_extended_cap_bits { - u8 encapsulation[0x20]; - - u8 reserved_0[0x12]; - u8 v2_command[0x1]; - u8 udp_encap[0x1]; - u8 rx_no_trailer[0x1]; - u8 ipv4_fragment[0x1]; - u8 ipv6[0x1]; - u8 esn[0x1]; - u8 lso[0x1]; - u8 transport_and_tunnel_mode[0x1]; - u8 tunnel_mode[0x1]; - u8 transport_mode[0x1]; - u8 ah_esp[0x1]; - u8 esp[0x1]; - u8 ah[0x1]; - u8 ipv4_options[0x1]; - - u8 auth_alg[0x20]; - - u8 enc_alg[0x20]; - - u8 sa_cap[0x20]; - - u8 reserved_1[0x10]; - u8 number_of_ipsec_counters[0x10]; - - u8 ipsec_counters_addr_low[0x20]; - u8 ipsec_counters_addr_high[0x20]; -}; - -struct mlx5_ifc_ipsec_counters_bits { - u8 dec_in_packets[0x40]; - - u8 dec_out_packets[0x40]; - - u8 dec_bypass_packets[0x40]; - - u8 enc_in_packets[0x40]; - - u8 enc_out_packets[0x40]; - - u8 enc_bypass_packets[0x40]; - - u8 drop_dec_packets[0x40]; - - u8 failed_auth_dec_packets[0x40]; - - u8 drop_enc_packets[0x40]; - - u8 success_add_sa[0x40]; - - u8 fail_add_sa[0x40]; - - u8 success_delete_sa[0x40]; - - u8 fail_delete_sa[0x40]; - - u8 dropped_cmd[0x40]; -}; - enum { MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RETRY_COUNTER_EXPIRED = 0x1, MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RNR_EXPIRED = 0x2, @@ -486,131 +402,4 @@ struct mlx5_ifc_fpga_qp_error_event_bits { u8 reserved_at_c0[0x8]; u8 fpga_qpn[0x18]; }; -enum mlx5_ifc_fpga_ipsec_response_syndrome { - MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0, - MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1, - MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2, - MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3, -}; - -struct mlx5_ifc_fpga_ipsec_cmd_resp { - __be32 syndrome; - union { - __be32 sw_sa_handle; - __be32 flags; - }; - u8 reserved[24]; -} __packed; - -enum mlx5_ifc_fpga_ipsec_cmd_opcode { - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0, - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1, - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2, - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3, - MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4, - MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5, -}; - -enum mlx5_ifc_fpga_ipsec_cap { - MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0), -}; - -struct mlx5_ifc_fpga_ipsec_cmd_cap { - __be32 cmd; - __be32 flags; - u8 reserved[24]; -} __packed; - -enum mlx5_ifc_fpga_ipsec_sa_flags { - MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0), - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1), - MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2), - MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3), - MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4), - MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5), - MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6), - MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7), -}; - -enum mlx5_ifc_fpga_ipsec_sa_enc_mode { - MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0, - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1, - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3, -}; - -struct mlx5_ifc_fpga_ipsec_sa_v1 { - __be32 cmd; - u8 key_enc[32]; - u8 key_auth[32]; - __be32 sip[4]; - __be32 dip[4]; - union { - struct { - __be32 reserved; - u8 salt_iv[8]; - __be32 salt; - } __packed gcm; - struct { - u8 salt[16]; - } __packed cbc; - }; - __be32 spi; - __be32 sw_sa_handle; - __be16 tfclen; - u8 enc_mode; - u8 reserved1[2]; - u8 flags; - u8 reserved2[2]; -}; - -struct mlx5_ifc_fpga_ipsec_sa { - struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1; - __be16 udp_sp; - __be16 udp_dp; - u8 reserved1[4]; - __be32 esn; - __be16 vid; /* only 12 bits, rest is reserved */ - __be16 reserved2; -} __packed; - -enum fpga_tls_cmds { - CMD_SETUP_STREAM = 0x1001, - CMD_TEARDOWN_STREAM = 0x1002, - CMD_RESYNC_RX = 0x1003, -}; - -#define MLX5_TLS_1_2 (0) - -#define MLX5_TLS_ALG_AES_GCM_128 (0) -#define MLX5_TLS_ALG_AES_GCM_256 (1) - -struct mlx5_ifc_tls_cmd_bits { - u8 command_type[0x20]; - u8 ipv6[0x1]; - u8 direction_sx[0x1]; - u8 tls_version[0x2]; - u8 reserved[0x1c]; - u8 swid[0x20]; - u8 src_port[0x10]; - u8 dst_port[0x10]; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; - u8 tls_rcd_sn[0x40]; - u8 tcp_sn[0x20]; - u8 tls_implicit_iv[0x20]; - u8 tls_xor_iv[0x40]; - u8 encryption_key[0x100]; - u8 alg[4]; - u8 reserved2[0x1c]; - u8 reserved3[0x4a0]; -}; - -struct mlx5_ifc_tls_resp_bits { - u8 syndrome[0x20]; - u8 stream_id[0x20]; - u8 reserved[0x40]; -}; - -#define MLX5_TLS_COMMAND_SIZE (0x100) - #endif /* MLX5_IFC_FPGA_H */ diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 28a928b0684b..e96ee1e348cb 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -141,7 +141,7 @@ enum mlx5_ptys_width { MLX5_PTYS_WIDTH_12X = 1 << 4, }; -#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define MLX5E_PROT_MASK(link_mode) (1U << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ MLX5_GET(reg, out, field)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f736c020cde2..f615a66c89e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -50,6 +50,7 @@ #include <linux/hashtable.h> #include <linux/rbtree.h> #include <net/net_trackers.h> +#include <net/net_debug.h> struct netpoll_info; struct device; @@ -59,7 +60,8 @@ struct dsa_port; struct ip_tunnel_parm; struct macsec_context; struct macsec_ops; - +struct netdev_name_node; +struct sd_flow_limit; struct sfp_bus; /* 802.11 specific */ struct wireless_dev; @@ -202,6 +204,7 @@ struct net_device_core_stats { unsigned long rx_dropped; unsigned long tx_dropped; unsigned long rx_nohandler; + unsigned long rx_otherhost_dropped; } __aligned(4 * sizeof(unsigned long)); #include <linux/cache.h> @@ -862,6 +865,7 @@ enum net_device_path_type { DEV_PATH_BRIDGE, DEV_PATH_PPPOE, DEV_PATH_DSA, + DEV_PATH_MTK_WDMA, }; struct net_device_path { @@ -887,6 +891,12 @@ struct net_device_path { int port; u16 proto; } dsa; + struct { + u8 wdma_idx; + u8 queue; + u16 wcid; + u8 bss; + } mtk_wdma; }; }; @@ -1013,16 +1023,6 @@ struct dev_ifalias { struct devlink; struct tlsdev_ops; -struct netdev_name_node { - struct hlist_node hlist; - struct list_head list; - struct net_device *dev; - const char *name; -}; - -int netdev_name_node_alt_create(struct net_device *dev, const char *name); -int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); - struct netdev_net_notifier { struct list_head list; struct notifier_block *nb; @@ -1261,6 +1261,10 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. + * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, + * u16 vid, + * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, * int *idx) @@ -1353,6 +1357,12 @@ struct netdev_net_notifier { * The caller must be under RCU read context. * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); * Get the forwarding path to reach the real device from the HW destination address + * ktime_t (*ndo_get_tstamp)(struct net_device *dev, + * const struct skb_shared_hwtstamps *hwtstamps, + * bool cycles); + * Get hardware timestamp based on normal/adjustable time or free running + * cycle counter. This function is required if physical clock supports a + * free running cycle counter. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1510,7 +1520,12 @@ struct net_device_ops { struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid); + u16 vid, struct netlink_ext_ack *extack); + int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + u16 vid, + struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, @@ -1570,6 +1585,9 @@ struct net_device_ops { struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); + ktime_t (*ndo_get_tstamp)(struct net_device *dev, + const struct skb_shared_hwtstamps *hwtstamps, + bool cycles); }; /** @@ -1909,8 +1927,10 @@ enum netdev_ml_priv_type { * @rtnl_link_ops: Rtnl_link_ops * * @gso_max_size: Maximum size of generic segmentation offload + * @tso_max_size: Device (as in HW) limit on the max TSO request size * @gso_max_segs: Maximum number of segments that can be passed to the * NIC for GSO + * @tso_max_segs: Device (as in HW) limit on the max TSO segment count * * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device @@ -2099,6 +2119,8 @@ struct net_device { /* Protocol-specific pointers */ + struct in_device __rcu *ip_ptr; + struct inet6_dev __rcu *ip6_ptr; #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; #endif @@ -2111,16 +2133,18 @@ struct net_device { #if IS_ENABLED(CONFIG_ATALK) void *atalk_ptr; #endif - struct in_device __rcu *ip_ptr; #if IS_ENABLED(CONFIG_DECNET) struct dn_dev __rcu *dn_ptr; #endif - struct inet6_dev __rcu *ip6_ptr; #if IS_ENABLED(CONFIG_AX25) void *ax25_ptr; #endif +#if IS_ENABLED(CONFIG_CFG80211) struct wireless_dev *ieee80211_ptr; +#endif +#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN) struct wpan_dev *ieee802154_ptr; +#endif #if IS_ENABLED(CONFIG_MPLS_ROUTING) struct mpls_dev __rcu *mpls_ptr; #endif @@ -2141,7 +2165,11 @@ struct net_device { struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; int napi_defer_hard_irqs; -#define GRO_MAX_SIZE 65536 +#define GRO_LEGACY_MAX_SIZE 65536u +/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), + * and shinfo->gso_segs is a 16bit field. + */ +#define GRO_MAX_SIZE (8 * 65535u) unsigned int gro_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -2252,10 +2280,20 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; /* for setting kernel sock attribute on TCP connection setup */ -#define GSO_MAX_SIZE 65536 +#define GSO_MAX_SEGS 65535u +#define GSO_LEGACY_MAX_SIZE 65536u +/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), + * and shinfo->gso_segs is a 16bit field. + */ +#define GSO_MAX_SIZE (8 * GSO_MAX_SEGS) + unsigned int gso_max_size; -#define GSO_MAX_SEGS 65535 +#define TSO_LEGACY_MAX_SIZE 65536 +#define TSO_MAX_SIZE UINT_MAX + unsigned int tso_max_size; u16 gso_max_segs; +#define TSO_MAX_SEGS U16_MAX + u16 tso_max_segs; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; @@ -2491,37 +2529,53 @@ static inline void *netdev_priv(const struct net_device *dev) */ #define NAPI_POLL_WEIGHT 64 +void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight); + /** - * netif_napi_add - initialize a NAPI context - * @dev: network device - * @napi: NAPI context - * @poll: polling function - * @weight: default weight + * netif_napi_add() - initialize a NAPI context + * @dev: network device + * @napi: NAPI context + * @poll: polling function + * @weight: default weight * * netif_napi_add() must be used to initialize a NAPI context prior to calling * *any* of the other NAPI-related functions. */ -void netif_napi_add(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight); +static inline void +netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) +{ + netif_napi_add_weight(dev, napi, poll, weight); +} + +static inline void +netif_napi_add_tx_weight(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight) +{ + set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state); + netif_napi_add_weight(dev, napi, poll, weight); +} + +#define netif_tx_napi_add netif_napi_add_tx_weight /** - * netif_tx_napi_add - initialize a NAPI context - * @dev: network device - * @napi: NAPI context - * @poll: polling function - * @weight: default weight + * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only + * @dev: network device + * @napi: NAPI context + * @poll: polling function * * This variant of netif_napi_add() should be used from drivers using NAPI * to exclusively poll a TX queue. * This will avoid we add it into napi_hash[], thus polluting this hash table. */ -static inline void netif_tx_napi_add(struct net_device *dev, +static inline void netif_napi_add_tx(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), - int weight) + int (*poll)(struct napi_struct *, int)) { - set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state); - netif_napi_add(dev, napi, poll, weight); + netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT); } /** @@ -2932,10 +2986,20 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); -int dev_queue_xmit(struct sk_buff *skb); -int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); +int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); +static inline int dev_queue_xmit(struct sk_buff *skb) +{ + return __dev_queue_xmit(skb, NULL); +} + +static inline int dev_queue_xmit_accel(struct sk_buff *skb, + struct net_device *sb_dev) +{ + return __dev_queue_xmit(skb, sb_dev); +} + static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { int ret; @@ -2968,7 +3032,6 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); -int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); @@ -3027,19 +3090,6 @@ static inline bool dev_has_header(const struct net_device *dev) return dev->header_ops && dev->header_ops->create; } -#ifdef CONFIG_NET_FLOW_LIMIT -#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ -struct sd_flow_limit { - u64 count; - unsigned int num_buckets; - unsigned int history_head; - u16 history[FLOW_LIMIT_HISTORY]; - u8 buckets[]; -}; - -extern int netdev_flow_limit_table_len; -#endif /* CONFIG_NET_FLOW_LIMIT */ - /* * Incoming packets are placed on per-CPU queues */ @@ -3067,6 +3117,9 @@ struct softnet_data { struct { u16 recursion; u8 more; +#ifdef CONFIG_NET_EGRESS + u8 skip_txqueue; +#endif } xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, @@ -3084,6 +3137,12 @@ struct softnet_data { struct sk_buff_head input_pkt_queue; struct napi_struct backlog; + /* Another possibly contended cache line */ + spinlock_t defer_lock ____cacheline_aligned_in_smp; + int defer_count; + int defer_ipi_scheduled; + struct sk_buff *defer_list; + call_single_data_t defer_csd; }; static inline void input_queue_head_incr(struct softnet_data *sd) @@ -3763,7 +3822,6 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); void __dev_notify_flags(struct net_device *, unsigned int old_flags, unsigned int gchanges); -int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int __dev_change_net_namespace(struct net_device *dev, struct net *net, @@ -3775,13 +3833,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, return __dev_change_net_namespace(dev, net, pat, 0); } int __dev_set_mtu(struct net_device *, int); -int dev_validate_mtu(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); -int dev_set_mtu_ext(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); int dev_set_mtu(struct net_device *, int); -int dev_change_tx_queue_len(struct net_device *, unsigned long); -void dev_set_group(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, @@ -3789,24 +3841,13 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); -int dev_change_carrier(struct net_device *, bool new_carrier); -int dev_get_phys_port_id(struct net_device *dev, - struct netdev_phys_item_id *ppid); -int dev_get_phys_port_name(struct net_device *dev, - char *name, size_t len); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); -int dev_change_proto_down(struct net_device *dev, bool proto_down); -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, int expected_fd, u32 flags); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); @@ -3868,6 +3909,7 @@ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) +DEV_CORE_STATS_INC(rx_otherhost_dropped) static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, @@ -3888,12 +3930,6 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, bool dev_nit_active(struct net_device *dev); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); -extern int netdev_budget; -extern unsigned int netdev_budget_usecs; - -/* Called by rtnetlink.c:rtnl_unlock() */ -void netdev_run_todo(void); - static inline void __dev_put(struct net_device *dev) { if (dev) { @@ -4010,10 +4046,7 @@ static inline void dev_replace_track(struct net_device *odev, * called netif_lowerlayer_*() because they represent the state of any * kind of lower layer not just hardware media. */ - -void linkwatch_init_dev(struct net_device *dev); void linkwatch_fire_event(struct net_device *dev); -void linkwatch_forget_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4459,9 +4492,6 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); -void dev_addr_flush(struct net_device *dev); -int dev_addr_init(struct net_device *dev); -void dev_addr_check(struct net_device *dev); /* Functions used for unicast addresses handling */ int dev_uc_add(struct net_device *dev, const unsigned char *addr); @@ -4551,7 +4581,6 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); -void __dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int dev_set_allmulti(struct net_device *dev, int inc); void netdev_state_change(struct net_device *dev); @@ -4569,11 +4598,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; -extern int netdev_tstamp_prequeue; -extern int netdev_unregister_timeout_secs; -extern int weight_p; -extern int dev_weight_rx_bias; -extern int dev_weight_tx_bias; extern int dev_rx_weight; extern int dev_tx_weight; extern int gro_normal_batch; @@ -4761,11 +4785,17 @@ static inline void netdev_rx_csum_fault(struct net_device *dev, void net_enable_timestamp(void); void net_disable_timestamp(void); -#ifdef CONFIG_PROC_FS -int __init dev_proc_init(void); -#else -#define dev_proc_init() 0 -#endif +static inline ktime_t netdev_get_tstamp(struct net_device *dev, + const struct skb_shared_hwtstamps *hwtstamps, + bool cycles) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_get_tstamp) + return ops->ndo_get_tstamp(dev, hwtstamps, cycles); + + return hwtstamps->hwtstamp; +} static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, struct sk_buff *skb, struct net_device *dev, @@ -4802,8 +4832,6 @@ extern const struct kobj_ns_type_operations net_ns_type_operations; const char *netdev_drivername(const struct net_device *dev); -void linkwatch_run_queue(void); - static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { @@ -4889,26 +4917,10 @@ static inline bool netif_needs_gso(struct sk_buff *skb, (skb->ip_summed != CHECKSUM_UNNECESSARY))); } -static inline void netif_set_gso_max_size(struct net_device *dev, - unsigned int size) -{ - /* dev->gso_max_size is read locklessly from sk_setup_caps() */ - WRITE_ONCE(dev->gso_max_size, size); -} - -static inline void netif_set_gso_max_segs(struct net_device *dev, - unsigned int segs) -{ - /* dev->gso_max_segs is read locklessly from sk_setup_caps() */ - WRITE_ONCE(dev->gso_max_segs, segs); -} - -static inline void netif_set_gro_max_size(struct net_device *dev, - unsigned int size) -{ - /* This pairs with the READ_ONCE() in skb_gro_receive() */ - WRITE_ONCE(dev->gro_max_size, size); -} +void netif_set_tso_max_size(struct net_device *dev, unsigned int size); +void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs); +void netif_inherit_tso_max(struct net_device *to, + const struct net_device *from); static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, int pulled_hlen, u16 mac_offset, @@ -5074,81 +5086,9 @@ static inline const char *netdev_reg_state(const struct net_device *dev) return " (unknown)"; } -__printf(3, 4) __cold -void netdev_printk(const char *level, const struct net_device *dev, - const char *format, ...); -__printf(2, 3) __cold -void netdev_emerg(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_alert(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_crit(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_err(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_warn(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_notice(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_info(const struct net_device *dev, const char *format, ...); - -#define netdev_level_once(level, dev, fmt, ...) \ -do { \ - static bool __section(".data.once") __print_once; \ - \ - if (!__print_once) { \ - __print_once = true; \ - netdev_printk(level, dev, fmt, ##__VA_ARGS__); \ - } \ -} while (0) - -#define netdev_emerg_once(dev, fmt, ...) \ - netdev_level_once(KERN_EMERG, dev, fmt, ##__VA_ARGS__) -#define netdev_alert_once(dev, fmt, ...) \ - netdev_level_once(KERN_ALERT, dev, fmt, ##__VA_ARGS__) -#define netdev_crit_once(dev, fmt, ...) \ - netdev_level_once(KERN_CRIT, dev, fmt, ##__VA_ARGS__) -#define netdev_err_once(dev, fmt, ...) \ - netdev_level_once(KERN_ERR, dev, fmt, ##__VA_ARGS__) -#define netdev_warn_once(dev, fmt, ...) \ - netdev_level_once(KERN_WARNING, dev, fmt, ##__VA_ARGS__) -#define netdev_notice_once(dev, fmt, ...) \ - netdev_level_once(KERN_NOTICE, dev, fmt, ##__VA_ARGS__) -#define netdev_info_once(dev, fmt, ...) \ - netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__) - #define MODULE_ALIAS_NETDEV(device) \ MODULE_ALIAS("netdev-" device) -#if defined(CONFIG_DYNAMIC_DEBUG) || \ - (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) -#define netdev_dbg(__dev, format, args...) \ -do { \ - dynamic_netdev_dbg(__dev, format, ##args); \ -} while (0) -#elif defined(DEBUG) -#define netdev_dbg(__dev, format, args...) \ - netdev_printk(KERN_DEBUG, __dev, format, ##args) -#else -#define netdev_dbg(__dev, format, args...) \ -({ \ - if (0) \ - netdev_printk(KERN_DEBUG, __dev, format, ##args); \ -}) -#endif - -#if defined(VERBOSE_DEBUG) -#define netdev_vdbg netdev_dbg -#else - -#define netdev_vdbg(dev, format, args...) \ -({ \ - if (0) \ - netdev_printk(KERN_DEBUG, dev, format, ##args); \ - 0; \ -}) -#endif - /* * netdev_WARN() acts like dev_printk(), but with the key difference * of using a WARN/WARN_ON to get the message out, including the @@ -5162,74 +5102,6 @@ do { \ WARN_ONCE(1, "netdevice: %s%s: " format, netdev_name(dev), \ netdev_reg_state(dev), ##args) -/* netif printk helpers, similar to netdev_printk */ - -#define netif_printk(priv, type, level, dev, fmt, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - netdev_printk(level, (dev), fmt, ##args); \ -} while (0) - -#define netif_level(level, priv, type, dev, fmt, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - netdev_##level(dev, fmt, ##args); \ -} while (0) - -#define netif_emerg(priv, type, dev, fmt, args...) \ - netif_level(emerg, priv, type, dev, fmt, ##args) -#define netif_alert(priv, type, dev, fmt, args...) \ - netif_level(alert, priv, type, dev, fmt, ##args) -#define netif_crit(priv, type, dev, fmt, args...) \ - netif_level(crit, priv, type, dev, fmt, ##args) -#define netif_err(priv, type, dev, fmt, args...) \ - netif_level(err, priv, type, dev, fmt, ##args) -#define netif_warn(priv, type, dev, fmt, args...) \ - netif_level(warn, priv, type, dev, fmt, ##args) -#define netif_notice(priv, type, dev, fmt, args...) \ - netif_level(notice, priv, type, dev, fmt, ##args) -#define netif_info(priv, type, dev, fmt, args...) \ - netif_level(info, priv, type, dev, fmt, ##args) - -#if defined(CONFIG_DYNAMIC_DEBUG) || \ - (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) -#define netif_dbg(priv, type, netdev, format, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - dynamic_netdev_dbg(netdev, format, ##args); \ -} while (0) -#elif defined(DEBUG) -#define netif_dbg(priv, type, dev, format, args...) \ - netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) -#else -#define netif_dbg(priv, type, dev, format, args...) \ -({ \ - if (0) \ - netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ - 0; \ -}) -#endif - -/* if @cond then downgrade to debug, else print at @level */ -#define netif_cond_dbg(priv, type, netdev, cond, level, fmt, args...) \ - do { \ - if (cond) \ - netif_dbg(priv, type, netdev, fmt, ##args); \ - else \ - netif_ ## level(priv, type, netdev, fmt, ##args); \ - } while (0) - -#if defined(VERBOSE_DEBUG) -#define netif_vdbg netif_dbg -#else -#define netif_vdbg(priv, type, dev, format, args...) \ -({ \ - if (0) \ - netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ - 0; \ -}) -#endif - /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. diff --git a/include/linux/phy.h b/include/linux/phy.h index 36ca2b5c2253..508f1149665b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -65,7 +65,7 @@ extern const int phy_basic_ports_array[3]; extern const int phy_fibre_port_array[1]; extern const int phy_all_ports_features_array[7]; extern const int phy_10_100_features_array[4]; -extern const int phy_basic_t1_features_array[2]; +extern const int phy_basic_t1_features_array[3]; extern const int phy_gbit_features_array[2]; extern const int phy_10gbit_features_array[1]; @@ -570,6 +570,7 @@ struct macsec_ops; * @autoneg_complete: Flag auto negotiation of the link has completed * @mdix: Current crossover * @mdix_ctrl: User setting of crossover + * @pma_extable: Cached value of PMA/PMD Extended Abilities Register * @interrupts: Flag interrupts have been enabled * @interface: enum phy_interface_t value * @skb: Netlink message for cable diagnostics @@ -698,6 +699,8 @@ struct phy_device { u8 mdix; u8 mdix_ctrl; + int pma_extable; + void (*phy_link_change)(struct phy_device *phydev, bool up); void (*adjust_link)(struct net_device *dev); @@ -1611,11 +1614,14 @@ int genphy_c45_read_link(struct phy_device *phydev); int genphy_c45_read_lpa(struct phy_device *phydev); int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); +int genphy_c45_pma_baset1_setup_master_slave(struct phy_device *phydev); int genphy_c45_an_config_aneg(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); +int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); +int genphy_c45_baset1_read_status(struct phy_device *phydev); int genphy_c45_config_aneg(struct phy_device *phydev); int genphy_c45_loopback(struct phy_device *phydev, bool enable); int genphy_c45_pma_resume(struct phy_device *phydev); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 223781622b33..6d06896fc20d 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -160,11 +160,6 @@ struct phylink_mac_ops { * clearing unsupported speeds and duplex settings. The port modes * should not be cleared; phylink_set_port_modes() will help with this. * - * If the @state->interface mode is %PHY_INTERFACE_MODE_1000BASEX - * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode - * based on @state->advertising and/or @state->speed and update - * @state->interface accordingly. See phylink_helper_basex_speed(). - * * When @config->supported_interfaces has been set, phylink will iterate * over the supported interfaces to determine the full capability of the * MAC. The validation function must not print errors if @state->interface @@ -579,7 +574,6 @@ int phylink_speed_up(struct phylink *pl); #define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) void phylink_set_port_modes(unsigned long *bits); -void phylink_helper_basex_speed(struct phylink_link_state *state); void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, u16 bmsr, u16 lpa); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 554454cb8693..92b44161408e 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -108,6 +108,32 @@ struct ptp_system_timestamp { * @settime64: Set the current time on the hardware clock. * parameter ts: Time value to set. * + * @getcycles64: Reads the current free running cycle counter from the hardware + * clock. + * If @getcycles64 and @getcyclesx64 are not supported, then + * @gettime64 or @gettimex64 will be used as default + * implementation. + * parameter ts: Holds the result. + * + * @getcyclesx64: Reads the current free running cycle counter from the + * hardware clock and optionally also the system clock. + * If @getcycles64 and @getcyclesx64 are not supported, then + * @gettimex64 will be used as default implementation if + * available. + * parameter ts: Holds the PHC timestamp. + * parameter sts: If not NULL, it holds a pair of timestamps + * from the system clock. The first reading is made right before + * reading the lowest bits of the PHC timestamp and the second + * reading immediately follows that. + * + * @getcrosscycles: Reads the current free running cycle counter from the + * hardware clock and system clock simultaneously. + * If @getcycles64 and @getcyclesx64 are not supported, then + * @getcrosststamp will be used as default implementation if + * available. + * parameter cts: Contains timestamp (device,system) pair, + * where system time is realtime and monotonic. + * * @enable: Request driver to enable or disable an ancillary feature. * parameter request: Desired resource to enable or disable. * parameter on: Caller passes one to enable or zero to disable. @@ -155,6 +181,11 @@ struct ptp_clock_info { int (*getcrosststamp)(struct ptp_clock_info *ptp, struct system_device_crosststamp *cts); int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); + int (*getcycles64)(struct ptp_clock_info *ptp, struct timespec64 *ts); + int (*getcyclesx64)(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts); + int (*getcrosscycles)(struct ptp_clock_info *ptp, + struct system_device_crosststamp *cts); int (*enable)(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); int (*verify)(struct ptp_clock_info *ptp, unsigned int pin, @@ -321,6 +352,10 @@ static inline int ptp_clock_index(struct ptp_clock *ptp) static inline int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan) { return -1; } +static inline int ptp_find_pin_unlocked(struct ptp_clock *ptp, + enum ptp_pin_function func, + unsigned int chan) +{ return -1; } static inline int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) { return -EOPNOTSUPP; } @@ -349,17 +384,16 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); /** * ptp_convert_timestamp() - convert timestamp to a ptp vclock time * - * @hwtstamps: skb_shared_hwtstamps structure pointer + * @hwtstamp: timestamp * @vclock_index: phc index of ptp vclock. * * Returns converted timestamp, or 0 on error. */ -ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, - int vclock_index); +ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index); #else static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { return 0; } -static inline ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, +static inline ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index) { return 0; } diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h index 16752eca5cbd..90e3045b2dcb 100644 --- a/include/linux/qed/qed_fcoe_if.h +++ b/include/linux/qed/qed_fcoe_if.h @@ -76,7 +76,7 @@ void qed_fcoe_set_pf_params(struct qed_dev *cdev, * @fill_dev_info: fills FCoE specific information * @param cdev * @param info - * @return 0 on sucesss, otherwise error value. + * @return 0 on success, otherwise error value. * @register_ops: register FCoE operations * @param cdev * @param ops - specified using qed_iscsi_cb_ops @@ -96,7 +96,7 @@ void qed_fcoe_set_pf_params(struct qed_dev *cdev, * connection. * @param p_doorbell - qed will fill the address of the * doorbell. - * return 0 on sucesss, otherwise error value. + * return 0 on success, otherwise error value. * @release_conn: release a previously acquired fcoe connection * @param cdev * @param handle - the connection handle. diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h index 494cdc3cd840..fbf7973ae9ba 100644 --- a/include/linux/qed/qed_iscsi_if.h +++ b/include/linux/qed/qed_iscsi_if.h @@ -133,7 +133,7 @@ struct qed_iscsi_cb_ops { * @fill_dev_info: fills iSCSI specific information * @param cdev * @param info - * @return 0 on sucesss, otherwise error value. + * @return 0 on success, otherwise error value. * @register_ops: register iscsi operations * @param cdev * @param ops - specified using qed_iscsi_cb_ops @@ -152,7 +152,7 @@ struct qed_iscsi_cb_ops { * connection. * @param p_doorbell - qed will fill the address of the * doorbell. - * @return 0 on sucesss, otherwise error value. + * @return 0 on success, otherwise error value. * @release_conn: release a previously acquired iscsi connection * @param cdev * @param handle - the connection handle. diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h index 1d51df347560..bbfbfba51f37 100644 --- a/include/linux/qed/qed_nvmetcp_if.h +++ b/include/linux/qed/qed_nvmetcp_if.h @@ -132,7 +132,7 @@ struct nvmetcp_task_params { * connection. * @param p_doorbell - qed will fill the address of the * doorbell. - * @return 0 on sucesss, otherwise error value. + * @return 0 on success, otherwise error value. * @release_conn: release a previously acquired nvmetcp connection * @param cdev * @param handle - the connection handle. diff --git a/include/linux/qed/qed_nvmetcp_ip_services_if.h b/include/linux/qed/qed_nvmetcp_ip_services_if.h deleted file mode 100644 index 3604aee53796..000000000000 --- a/include/linux/qed/qed_nvmetcp_ip_services_if.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ -/* - * Copyright 2021 Marvell. All rights reserved. - */ - -#ifndef _QED_IP_SERVICES_IF_H -#define _QED_IP_SERVICES_IF_H - -#include <linux/types.h> -#include <net/route.h> -#include <net/ip6_route.h> -#include <linux/inetdevice.h> - -int qed_route_ipv4(struct sockaddr_storage *local_addr, - struct sockaddr_storage *remote_addr, - struct sockaddr *hardware_address, - struct net_device **ndev); -int qed_route_ipv6(struct sockaddr_storage *local_addr, - struct sockaddr_storage *remote_addr, - struct sockaddr *hardware_address, - struct net_device **ndev); -void qed_vlan_get_ndev(struct net_device **ndev, u16 *vlan_id); -struct pci_dev *qed_validate_ndev(struct net_device *ndev); -void qed_return_tcp_port(struct socket *sock); -int qed_fetch_tcp_port(struct sockaddr_storage local_ip_addr, - struct socket **sock, u16 *port); -__be16 qed_get_in_port(struct sockaddr_storage *sa); - -#endif /* _QED_IP_SERVICES_IF_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7f970b16da3a..ae2c6a3cec5d 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -100,6 +100,7 @@ void net_dec_ingress_queue(void); #ifdef CONFIG_NET_EGRESS void net_inc_egress_queue(void); void net_dec_egress_queue(void); +void netdev_xmit_skip_txqueue(bool skip); #endif void rtnetlink_init(void); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 60820ab511d2..bd023dd38ae6 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -277,6 +277,10 @@ extern struct list_head *seq_list_start_head(struct list_head *head, extern struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos); +extern struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos); +extern struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos); +extern struct list_head *seq_list_next_rcu(void *v, struct list_head *head, loff_t *ppos); + /* * Helpers for iteration over hlist_head-s in seq_files */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a30cae8b0a5..da96f0d3e753 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -42,99 +42,114 @@ #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_common.h> #endif +#include <net/net_debug.h> -/* The interface for checksum offload between the stack and networking drivers +/** + * DOC: skb checksums + * + * The interface for checksum offload between the stack and networking drivers * is as follows... * - * A. IP checksum related features + * IP checksum related features + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Drivers advertise checksum offload capabilities in the features of a device. * From the stack's point of view these are capabilities offered by the driver. * A driver typically only advertises features that it is capable of offloading * to its device. * - * The checksum related features are: - * - * NETIF_F_HW_CSUM - The driver (or its device) is able to compute one - * IP (one's complement) checksum for any combination - * of protocols or protocol layering. The checksum is - * computed and set in a packet per the CHECKSUM_PARTIAL - * interface (see below). - * - * NETIF_F_IP_CSUM - Driver (device) is only able to checksum plain - * TCP or UDP packets over IPv4. These are specifically - * unencapsulated packets of the form IPv4|TCP or - * IPv4|UDP where the Protocol field in the IPv4 header - * is TCP or UDP. The IPv4 header may contain IP options. - * This feature cannot be set in features for a device - * with NETIF_F_HW_CSUM also set. This feature is being - * DEPRECATED (see below). - * - * NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain - * TCP or UDP packets over IPv6. These are specifically - * unencapsulated packets of the form IPv6|TCP or - * IPv6|UDP where the Next Header field in the IPv6 - * header is either TCP or UDP. IPv6 extension headers - * are not supported with this feature. This feature - * cannot be set in features for a device with - * NETIF_F_HW_CSUM also set. This feature is being - * DEPRECATED (see below). - * - * NETIF_F_RXCSUM - Driver (device) performs receive checksum offload. - * This flag is only used to disable the RX checksum - * feature for a device. The stack will accept receive - * checksum indication in packets received on a device - * regardless of whether NETIF_F_RXCSUM is set. - * - * B. Checksumming of received packets by device. Indication of checksum - * verification is set in skb->ip_summed. Possible values are: - * - * CHECKSUM_NONE: + * .. flat-table:: Checksum related device features + * :widths: 1 10 + * + * * - %NETIF_F_HW_CSUM + * - The driver (or its device) is able to compute one + * IP (one's complement) checksum for any combination + * of protocols or protocol layering. The checksum is + * computed and set in a packet per the CHECKSUM_PARTIAL + * interface (see below). + * + * * - %NETIF_F_IP_CSUM + * - Driver (device) is only able to checksum plain + * TCP or UDP packets over IPv4. These are specifically + * unencapsulated packets of the form IPv4|TCP or + * IPv4|UDP where the Protocol field in the IPv4 header + * is TCP or UDP. The IPv4 header may contain IP options. + * This feature cannot be set in features for a device + * with NETIF_F_HW_CSUM also set. This feature is being + * DEPRECATED (see below). + * + * * - %NETIF_F_IPV6_CSUM + * - Driver (device) is only able to checksum plain + * TCP or UDP packets over IPv6. These are specifically + * unencapsulated packets of the form IPv6|TCP or + * IPv6|UDP where the Next Header field in the IPv6 + * header is either TCP or UDP. IPv6 extension headers + * are not supported with this feature. This feature + * cannot be set in features for a device with + * NETIF_F_HW_CSUM also set. This feature is being + * DEPRECATED (see below). + * + * * - %NETIF_F_RXCSUM + * - Driver (device) performs receive checksum offload. + * This flag is only used to disable the RX checksum + * feature for a device. The stack will accept receive + * checksum indication in packets received on a device + * regardless of whether NETIF_F_RXCSUM is set. + * + * Checksumming of received packets by device + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Indication of checksum verification is set in &sk_buff.ip_summed. + * Possible values are: + * + * - %CHECKSUM_NONE * * Device did not checksum this packet e.g. due to lack of capabilities. * The packet contains full (though not verified) checksum in packet but * not in skb->csum. Thus, skb->csum is undefined in this case. * - * CHECKSUM_UNNECESSARY: + * - %CHECKSUM_UNNECESSARY * * The hardware you're dealing with doesn't calculate the full checksum - * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums - * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY - * if their checksums are okay. skb->csum is still undefined in this case + * (as in %CHECKSUM_COMPLETE), but it does parse headers and verify checksums + * for specific protocols. For such packets it will set %CHECKSUM_UNNECESSARY + * if their checksums are okay. &sk_buff.csum is still undefined in this case * though. A driver or device must never modify the checksum field in the * packet even if checksum is verified. * - * CHECKSUM_UNNECESSARY is applicable to following protocols: - * TCP: IPv6 and IPv4. - * UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a + * %CHECKSUM_UNNECESSARY is applicable to following protocols: + * + * - TCP: IPv6 and IPv4. + * - UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a * zero UDP checksum for either IPv4 or IPv6, the networking stack * may perform further validation in this case. - * GRE: only if the checksum is present in the header. - * SCTP: indicates the CRC in SCTP header has been validated. - * FCOE: indicates the CRC in FC frame has been validated. + * - GRE: only if the checksum is present in the header. + * - SCTP: indicates the CRC in SCTP header has been validated. + * - FCOE: indicates the CRC in FC frame has been validated. * - * skb->csum_level indicates the number of consecutive checksums found in - * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. + * &sk_buff.csum_level indicates the number of consecutive checksums found in + * the packet minus one that have been verified as %CHECKSUM_UNNECESSARY. * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet * and a device is able to verify the checksums for UDP (possibly zero), - * GRE (checksum flag is set) and TCP, skb->csum_level would be set to + * GRE (checksum flag is set) and TCP, &sk_buff.csum_level would be set to * two. If the device were only able to verify the UDP checksum and not * GRE, either because it doesn't support GRE checksum or because GRE * checksum is bad, skb->csum_level would be set to zero (TCP checksum is * not considered in this case). * - * CHECKSUM_COMPLETE: + * - %CHECKSUM_COMPLETE * * This is the most generic way. The device supplied checksum of the _whole_ - * packet as seen by netif_rx() and fills in skb->csum. This means the + * packet as seen by netif_rx() and fills in &sk_buff.csum. This means the * hardware doesn't need to parse L3/L4 headers to implement this. * * Notes: + * * - Even if device supports only some protocols, but is able to produce * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY. * - CHECKSUM_COMPLETE is not applicable to SCTP and FCoE protocols. * - * CHECKSUM_PARTIAL: + * - %CHECKSUM_PARTIAL * * A checksum is set up to be offloaded to a device as described in the * output description for CHECKSUM_PARTIAL. This may occur on a packet @@ -146,14 +161,18 @@ * packet that are after the checksum being offloaded are not considered to * be verified. * - * C. Checksumming on transmit for non-GSO. The stack requests checksum offload - * in the skb->ip_summed for a packet. Values are: + * Checksumming on transmit for non-GSO + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * - * CHECKSUM_PARTIAL: + * The stack requests checksum offload in the &sk_buff.ip_summed for a packet. + * Values are: + * + * - %CHECKSUM_PARTIAL * * The driver is required to checksum the packet as seen by hard_start_xmit() - * from skb->csum_start up to the end, and to record/write the checksum at - * offset skb->csum_start + skb->csum_offset. A driver may verify that the + * from &sk_buff.csum_start up to the end, and to record/write the checksum at + * offset &sk_buff.csum_start + &sk_buff.csum_offset. + * A driver may verify that the * csum_start and csum_offset values are valid values given the length and * offset of the packet, but it should not attempt to validate that the * checksum refers to a legitimate transport layer checksum -- it is the @@ -165,55 +184,66 @@ * checksum calculation to the device, or call skb_checksum_help (in the case * that the device does not support offload for a particular checksum). * - * NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM are being deprecated in favor of - * NETIF_F_HW_CSUM. New devices should use NETIF_F_HW_CSUM to indicate + * %NETIF_F_IP_CSUM and %NETIF_F_IPV6_CSUM are being deprecated in favor of + * %NETIF_F_HW_CSUM. New devices should use %NETIF_F_HW_CSUM to indicate * checksum offload capability. - * skb_csum_hwoffload_help() can be called to resolve CHECKSUM_PARTIAL based + * skb_csum_hwoffload_help() can be called to resolve %CHECKSUM_PARTIAL based * on network device checksumming capabilities: if a packet does not match - * them, skb_checksum_help or skb_crc32c_help (depending on the value of - * csum_not_inet, see item D.) is called to resolve the checksum. + * them, skb_checksum_help() or skb_crc32c_help() (depending on the value of + * &sk_buff.csum_not_inet, see :ref:`crc`) + * is called to resolve the checksum. * - * CHECKSUM_NONE: + * - %CHECKSUM_NONE * * The skb was already checksummed by the protocol, or a checksum is not * required. * - * CHECKSUM_UNNECESSARY: + * - %CHECKSUM_UNNECESSARY * * This has the same meaning as CHECKSUM_NONE for checksum offload on * output. * - * CHECKSUM_COMPLETE: + * - %CHECKSUM_COMPLETE + * * Not used in checksum output. If a driver observes a packet with this value - * set in skbuff, it should treat the packet as if CHECKSUM_NONE were set. - * - * D. Non-IP checksum (CRC) offloads - * - * NETIF_F_SCTP_CRC - This feature indicates that a device is capable of - * offloading the SCTP CRC in a packet. To perform this offload the stack - * will set csum_start and csum_offset accordingly, set ip_summed to - * CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication in - * the skbuff that the CHECKSUM_PARTIAL refers to CRC32c. - * A driver that supports both IP checksum offload and SCTP CRC32c offload - * must verify which offload is configured for a packet by testing the - * value of skb->csum_not_inet; skb_crc32c_csum_help is provided to resolve - * CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1. - * - * NETIF_F_FCOE_CRC - This feature indicates that a device is capable of - * offloading the FCOE CRC in a packet. To perform this offload the stack - * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset - * accordingly. Note that there is no indication in the skbuff that the - * CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports - * both IP checksum offload and FCOE CRC offload must verify which offload - * is configured for a packet, presumably by inspecting packet headers. - * - * E. Checksumming on output with GSO. - * - * In the case of a GSO packet (skb_is_gso(skb) is true), checksum offload + * set in skbuff, it should treat the packet as if %CHECKSUM_NONE were set. + * + * .. _crc: + * + * Non-IP checksum (CRC) offloads + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * .. flat-table:: + * :widths: 1 10 + * + * * - %NETIF_F_SCTP_CRC + * - This feature indicates that a device is capable of + * offloading the SCTP CRC in a packet. To perform this offload the stack + * will set csum_start and csum_offset accordingly, set ip_summed to + * %CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication + * in the skbuff that the %CHECKSUM_PARTIAL refers to CRC32c. + * A driver that supports both IP checksum offload and SCTP CRC32c offload + * must verify which offload is configured for a packet by testing the + * value of &sk_buff.csum_not_inet; skb_crc32c_csum_help() is provided to + * resolve %CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1. + * + * * - %NETIF_F_FCOE_CRC + * - This feature indicates that a device is capable of offloading the FCOE + * CRC in a packet. To perform this offload the stack will set ip_summed + * to %CHECKSUM_PARTIAL and set csum_start and csum_offset + * accordingly. Note that there is no indication in the skbuff that the + * %CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports + * both IP checksum offload and FCOE CRC offload must verify which offload + * is configured for a packet, presumably by inspecting packet headers. + * + * Checksumming on output with GSO + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * In the case of a GSO packet (skb_is_gso() is true), checksum offload * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the - * gso_type is SKB_GSO_TCPV4 or SKB_GSO_TCPV6, TCP checksum offload as + * gso_type is %SKB_GSO_TCPV4 or %SKB_GSO_TCPV6, TCP checksum offload as * part of the GSO operation is implied. If a checksum is being offloaded - * with GSO then ip_summed is CHECKSUM_PARTIAL, and both csum_start and + * with GSO then ip_summed is %CHECKSUM_PARTIAL, and both csum_start and * csum_offset are set to refer to the outermost checksum being offloaded * (two offloaded checksums are possible with UDP encapsulation). */ @@ -381,6 +411,19 @@ enum skb_drop_reason { * the ofo queue, corresponding to * LINUX_MIB_TCPOFOMERGE */ + SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED + */ + SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ + SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ + SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ + SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ + SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ + SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ + SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ + SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ + SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by * BPF_PROG_TYPE_CGROUP_SKB @@ -408,11 +451,9 @@ enum skb_drop_reason { */ SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_PTYPE_ABSENT, /* not packet_type found to handle - * the skb. For an etner packet, - * this means that L3 protocol is - * not supported - */ + SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented + * or not supported + */ SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation * error */ @@ -444,9 +485,36 @@ enum skb_drop_reason { SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented * at tun/tap, e.g., check_filter() */ + SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ + SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC + * 2211, such as a broadcasts + * ICMP_TIMESTAMP + */ + SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed + * the MTU) + */ SKB_DROP_REASON_MAX, }; +#define SKB_DR_INIT(name, reason) \ + enum skb_drop_reason name = SKB_DROP_REASON_##reason +#define SKB_DR(name) \ + SKB_DR_INIT(name, NOT_SPECIFIED) +#define SKB_DR_SET(name, reason) \ + (name = SKB_DROP_REASON_##reason) +#define SKB_DR_OR(name, reason) \ + do { \ + if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ + name == SKB_NOT_DROPPED_YET) \ + SKB_DR_SET(name, reason); \ + } while (0) + /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. @@ -551,8 +619,10 @@ static inline bool skb_frag_must_loop(struct page *p) /** * struct skb_shared_hwtstamps - hardware time stamps - * @hwtstamp: hardware time stamp transformed into duration - * since arbitrary point in time + * @hwtstamp: hardware time stamp transformed into duration + * since arbitrary point in time + * @netdev_data: address/cookie of network device driver used as + * reference to actual hardware time stamp * * Software time stamps generated by ktime_get_real() are stored in * skb->tstamp. @@ -564,7 +634,10 @@ static inline bool skb_frag_must_loop(struct page *p) * &skb_shared_info. Use skb_hwtstamps() to get a pointer. */ struct skb_shared_hwtstamps { - ktime_t hwtstamp; + union { + ktime_t hwtstamp; + void *netdev_data; + }; }; /* Definitions for tx_flags in struct skb_shared_info */ @@ -578,16 +651,24 @@ enum { /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, + /* generate hardware time stamp based on cycles if supported */ + SKBTX_HW_TSTAMP_USE_CYCLES = 1 << 3, + /* generate wifi status information (where possible) */ SKBTX_WIFI_STATUS = 1 << 4, + /* determine hardware time stamp based on time or cycles */ + SKBTX_HW_TSTAMP_NETDEV = 1 << 5, + /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, }; #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP) -#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) +#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | \ + SKBTX_HW_TSTAMP_USE_CYCLES | \ + SKBTX_ANY_SW_TSTAMP) /* Definitions for flags in struct skb_shared_info */ enum { @@ -647,20 +728,6 @@ struct ubuf_info { int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); -struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size); -struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, - struct ubuf_info *uarg); - -void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); - -void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, - bool success); - -int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len); -int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, - struct msghdr *msg, int len, - struct ubuf_info *uarg); - /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -691,16 +758,32 @@ struct skb_shared_info { skb_frag_t frags[MAX_SKB_FRAGS]; }; -/* We divide dataref into two halves. The higher 16 bits hold references - * to the payload part of skb->data. The lower 16 bits hold references to - * the entire skb->data. A clone of a headerless skb holds the length of - * the header in skb->hdr_len. - * - * All users must obey the rule that the skb->data reference count must be - * greater than or equal to the payload reference count. - * - * Holding a reference to the payload part means that the user does not - * care about modifications to the header part of skb->data. +/** + * DOC: dataref and headerless skbs + * + * Transport layers send out clones of payload skbs they hold for + * retransmissions. To allow lower layers of the stack to prepend their headers + * we split &skb_shared_info.dataref into two halves. + * The lower 16 bits count the overall number of references. + * The higher 16 bits indicate how many of the references are payload-only. + * skb_header_cloned() checks if skb is allowed to add / write the headers. + * + * The creator of the skb (e.g. TCP) marks its skb as &sk_buff.nohdr + * (via __skb_header_release()). Any clone created from marked skb will get + * &sk_buff.hdr_len populated with the available headroom. + * If there's the only clone in existence it's able to modify the headroom + * at will. The sequence of calls inside the transport layer is:: + * + * <alloc skb> + * skb_reserve() + * __skb_header_release() + * skb_clone() + * // send the clone down the stack + * + * This is not a very generic construct and it depends on the transport layers + * doing the right thing. In practice there's usually only one payload-only skb. + * Having multiple payload-only skbs with different lengths of hdr_len is not + * possible. The payload-only skbs should never leave their owner. */ #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) @@ -765,6 +848,46 @@ typedef unsigned char *sk_buff_data_t; #endif /** + * DOC: Basic sk_buff geometry + * + * struct sk_buff itself is a metadata structure and does not hold any packet + * data. All the data is held in associated buffers. + * + * &sk_buff.head points to the main "head" buffer. The head buffer is divided + * into two parts: + * + * - data buffer, containing headers and sometimes payload; + * this is the part of the skb operated on by the common helpers + * such as skb_put() or skb_pull(); + * - shared info (struct skb_shared_info) which holds an array of pointers + * to read-only data in the (page, offset, length) format. + * + * Optionally &skb_shared_info.frag_list may point to another skb. + * + * Basic diagram may look like this:: + * + * --------------- + * | sk_buff | + * --------------- + * ,--------------------------- + head + * / ,----------------- + data + * / / ,----------- + tail + * | | | , + end + * | | | | + * v v v v + * ----------------------------------------------- + * | headroom | data | tailroom | skb_shared_info | + * ----------------------------------------------- + * + [page frag] + * + [page frag] + * + [page frag] + * + [page frag] --------- + * + frag_list --> | sk_buff | + * --------- + * + */ + +/** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list @@ -851,6 +974,7 @@ typedef unsigned char *sk_buff_data_t; * delivery_time at egress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS + * @alloc_cpu: CPU which did the skb allocation. * @secmark: security marking * @mark: Generic packet mark * @reserved_tailroom: (aka @mark) number of bytes of free space available @@ -1043,6 +1167,7 @@ struct sk_buff { unsigned int sender_cpu; }; #endif + u16 alloc_cpu; #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif @@ -1284,6 +1409,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size); +void skb_attempt_defer_free(struct sk_buff *skb); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); @@ -1639,6 +1765,27 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) } #endif +struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg); + +void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); + +void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, + bool success); + +int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length); + +static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, + struct msghdr *msg, int len) +{ + return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); +} + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg); + /* Internal */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) @@ -1922,8 +2069,10 @@ static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri) } /** - * __skb_header_release - release reference to header - * @skb: buffer to operate on + * __skb_header_release() - allow clones to use the headroom + * @skb: buffer to operate on + * + * See "DOC: dataref and headerless skbs". */ static inline void __skb_header_release(struct sk_buff *skb) { @@ -2752,6 +2901,7 @@ static inline bool skb_transport_header_was_set(const struct sk_buff *skb) static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->head + skb->transport_header; } @@ -3836,8 +3986,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err); -struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, - int *err); +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, @@ -3886,7 +4035,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); -int skb_ensure_writable(struct sk_buff *skb, int write_len); +int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); @@ -4895,9 +5044,7 @@ static inline void skb_forward_csum(struct sk_buff *skb) */ static inline void skb_checksum_none_assert(const struct sk_buff *skb) { -#ifdef DEBUG - BUG_ON(skb->ip_summed != CHECKSUM_NONE); -#endif + DEBUG_NET_WARN_ON_ONCE(skb->ip_summed != CHECKSUM_NONE); } bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h new file mode 100644 index 000000000000..7e00cca06709 --- /dev/null +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -0,0 +1,131 @@ +#ifndef __MTK_WED_H +#define __MTK_WED_H + +#include <linux/kernel.h> +#include <linux/rcupdate.h> +#include <linux/regmap.h> +#include <linux/pci.h> + +#define MTK_WED_TX_QUEUES 2 + +struct mtk_wed_hw; +struct mtk_wdma_desc; + +struct mtk_wed_ring { + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + int size; + + u32 reg_base; + void __iomem *wpdma; +}; + +struct mtk_wed_device { +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + const struct mtk_wed_ops *ops; + struct device *dev; + struct mtk_wed_hw *hw; + bool init_done, running; + int wdma_idx; + int irq; + + struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES]; + struct mtk_wed_ring txfree_ring; + struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; + + struct { + int size; + void **pages; + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + } buf_ring; + + /* filled by driver: */ + struct { + struct pci_dev *pci_dev; + + u32 wpdma_phys; + + u16 token_start; + unsigned int nbuf; + + u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); + int (*offload_enable)(struct mtk_wed_device *wed); + void (*offload_disable)(struct mtk_wed_device *wed); + } wlan; +#endif +}; + +struct mtk_wed_ops { + int (*attach)(struct mtk_wed_device *dev); + int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*txfree_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); + void (*detach)(struct mtk_wed_device *dev); + + void (*stop)(struct mtk_wed_device *dev); + void (*start)(struct mtk_wed_device *dev, u32 irq_mask); + void (*reset_dma)(struct mtk_wed_device *dev); + + u32 (*reg_read)(struct mtk_wed_device *dev, u32 reg); + void (*reg_write)(struct mtk_wed_device *dev, u32 reg, u32 val); + + u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask); + void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); +}; + +extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; + +static inline int +mtk_wed_device_attach(struct mtk_wed_device *dev) +{ + int ret = -ENODEV; + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + rcu_read_lock(); + dev->ops = rcu_dereference(mtk_soc_wed_ops); + if (dev->ops) + ret = dev->ops->attach(dev); + else + rcu_read_unlock(); + + if (ret) + dev->ops = NULL; +#endif + + return ret; +} + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED +#define mtk_wed_device_active(_dev) !!(_dev)->ops +#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) +#define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->tx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_txfree_ring_setup(_dev, _regs) \ + (_dev)->ops->txfree_ring_setup(_dev, _regs) +#define mtk_wed_device_reg_read(_dev, _reg) \ + (_dev)->ops->reg_read(_dev, _reg) +#define mtk_wed_device_reg_write(_dev, _reg, _val) \ + (_dev)->ops->reg_write(_dev, _reg, _val) +#define mtk_wed_device_irq_get(_dev, _mask) \ + (_dev)->ops->irq_get(_dev, _mask) +#define mtk_wed_device_irq_set_mask(_dev, _mask) \ + (_dev)->ops->irq_set_mask(_dev, _mask) +#else +static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) +{ + return false; +} +#define mtk_wed_device_detach(_dev) do {} while (0) +#define mtk_wed_device_start(_dev, _mask) do {} while (0) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_reg_read(_dev, _reg) 0 +#define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0) +#define mtk_wed_device_irq_get(_dev, _mask) 0 +#define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0) +#endif + +#endif diff --git a/include/linux/socket.h b/include/linux/socket.h index 6f85f5d957ef..12085c9a8544 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -50,6 +50,9 @@ struct linger { struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ + + int msg_inq; /* output, data left in socket */ + struct iov_iter msg_iter; /* data */ /* @@ -62,8 +65,9 @@ struct msghdr { void __user *msg_control_user; }; bool msg_control_is_user : 1; - __kernel_size_t msg_controllen; /* ancillary data buffer length */ + bool msg_get_inq : 1;/* return INQ after receive */ unsigned int msg_flags; /* flags on received message */ + __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; diff --git a/include/linux/string.h b/include/linux/string.h index b6572aeca2f5..61ec7e4f6311 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -252,6 +252,10 @@ static inline const char *kbasename(const char *path) #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) #include <linux/fortify-string.h> #endif +#ifndef unsafe_memcpy +#define unsafe_memcpy(dst, src, bytes, justification) \ + memcpy(dst, src, bytes) +#endif void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, int pad); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6353d6db69b2..80263f7cdb77 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -38,10 +38,10 @@ struct ctl_table_header; struct ctl_dir; /* Keep the same order as in fs/proc/proc_sysctl.c */ -#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0]) -#define SYSCTL_ZERO ((void *)&sysctl_vals[1]) -#define SYSCTL_ONE ((void *)&sysctl_vals[2]) -#define SYSCTL_TWO ((void *)&sysctl_vals[3]) +#define SYSCTL_ZERO ((void *)&sysctl_vals[0]) +#define SYSCTL_ONE ((void *)&sysctl_vals[1]) +#define SYSCTL_TWO ((void *)&sysctl_vals[2]) +#define SYSCTL_THREE ((void *)&sysctl_vals[3]) #define SYSCTL_FOUR ((void *)&sysctl_vals[4]) #define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5]) #define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6]) @@ -51,6 +51,7 @@ struct ctl_dir; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ #define SYSCTL_MAXOLDUID ((void *)&sysctl_vals[10]) +#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[11]) extern const int sysctl_vals[]; diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h index 809bccd08455..cc42db51bbba 100644 --- a/include/linux/usb/rndis_host.h +++ b/include/linux/usb/rndis_host.h @@ -197,6 +197,7 @@ struct rndis_keepalive_c { /* IN (optionally OUT) */ /* Flags for driver_info::data */ #define RNDIS_DRIVER_DATA_POLL_STATUS 1 /* poll status before control */ +#define RNDIS_DRIVER_DATA_DST_MAC_FIXUP 2 /* device ignores configured MAC address */ extern void rndis_status(struct usbnet *dev, struct urb *urb); extern int diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 8336e86ce606..1b4d72d5e891 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -214,6 +214,7 @@ extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf) extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_status(struct usbnet *, struct urb *); +extern int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb); /* CDC and RNDIS support the same host-chosen packet filters for IN transfers */ #define DEFAULT_FILTER (USB_CDC_PACKET_TYPE_BROADCAST \ diff --git a/include/net/act_api.h b/include/net/act_api.h index 3049cb69c025..9cf6870b526e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -134,7 +134,8 @@ struct tc_action_ops { (*get_psample_group)(const struct tc_action *a, tc_action_priv_destructor *destructor); int (*offload_act_setup)(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind); + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack); }; struct tc_action_net { diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 69ef31cea582..fe7935be7dc4 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -265,6 +265,15 @@ enum { * runtime suspend, because event filtering takes place there. */ HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, + + /* + * When this quirk is set, disables the use of + * HCI_OP_ENHANCED_SETUP_SYNC_CONN command to setup SCO connections. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, }; /* HCI device flags */ @@ -616,6 +625,7 @@ enum { #define EIR_SSP_RAND_R192 0x0F /* Simple Pairing Randomizer R-192 */ #define EIR_DEVICE_ID 0x10 /* device ID */ #define EIR_APPEARANCE 0x19 /* Device appearance */ +#define EIR_SERVICE_DATA 0x16 /* Service Data */ #define EIR_LE_BDADDR 0x1B /* LE Bluetooth device address */ #define EIR_LE_ROLE 0x1C /* LE role */ #define EIR_SSP_HASH_C256 0x1D /* Simple Pairing Hash C-256 */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 62d7b81b1cb7..5a52a2018b56 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1495,8 +1495,12 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ (hdev->commands[39] & 0x04)) -/* Use enhanced synchronous connection if command is supported */ -#define enhanced_sco_capable(dev) ((dev)->commands[29] & 0x08) +/* Use enhanced synchronous connection if command is supported and its quirk + * has not been set. + */ +#define enhanced_sync_conn_capable(dev) \ + (((dev)->commands[29] & 0x08) && \ + !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks)) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 68713388b617..6d02e12e4702 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1183,6 +1183,9 @@ struct cfg80211_mbssid_elems { * Token (measurement type 11) * @lci_len: LCI data length * @civicloc_len: Civic location data length + * @he_bss_color: BSS Color settings + * @he_bss_color_valid: indicates whether bss color + * attribute is present in beacon data or not. */ struct cfg80211_beacon_data { const u8 *head, *tail; @@ -1202,6 +1205,8 @@ struct cfg80211_beacon_data { size_t probe_resp_len; size_t lci_len; size_t civicloc_len; + struct cfg80211_he_bss_color he_bss_color; + bool he_bss_color_valid; }; struct mac_address { @@ -1292,7 +1297,6 @@ struct cfg80211_unsol_bcast_probe_resp { * @sae_h2e_required: stations must support direct H2E technique in SAE * @flags: flags, as defined in enum cfg80211_ap_settings_flags * @he_obss_pd: OBSS Packet Detection settings - * @he_bss_color: BSS Color settings * @he_oper: HE operation IE (or %NULL if HE isn't enabled) * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters @@ -1326,7 +1330,6 @@ struct cfg80211_ap_settings { bool twt_responder; u32 flags; struct ieee80211_he_obss_pd he_obss_pd; - struct cfg80211_he_bss_color he_bss_color; struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; struct cfg80211_mbssid_config mbssid_config; @@ -2735,6 +2738,7 @@ struct cfg80211_auth_request { * userspace if this flag is set. Only applicable for cfg80211_connect() * request (connect callback). * @ASSOC_REQ_DISABLE_HE: Disable HE + * @ASSOC_REQ_DISABLE_EHT: Disable EHT */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), @@ -2742,6 +2746,7 @@ enum cfg80211_assoc_req_flags { ASSOC_REQ_USE_RRM = BIT(2), CONNECT_REQ_EXTERNAL_AUTH_SUPPORT = BIT(3), ASSOC_REQ_DISABLE_HE = BIT(4), + ASSOC_REQ_DISABLE_EHT = BIT(5), }; /** @@ -5549,8 +5554,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * @conn_owner_nlportid: (private) connection owner socket port ID * @disconnect_wk: (private) auto-disconnect work * @disconnect_bssid: (private) the BSSID to use for auto-disconnect - * @ibss_fixed: (private) IBSS is using fixed BSSID - * @ibss_dfs_possible: (private) IBSS may change to a DFS channel * @event_list: (private) list for internal event processing * @event_lock: (private) lock for event list * @owner_nlportid: (private) owner socket port ID @@ -5599,9 +5602,6 @@ struct wireless_dev { struct cfg80211_chan_def preset_chandef; struct cfg80211_chan_def chandef; - bool ibss_fixed; - bool ibss_dfs_possible; - bool ps; int ps_timeout; @@ -8006,7 +8006,9 @@ int cfg80211_register_netdevice(struct net_device *dev); */ static inline void cfg80211_unregister_netdevice(struct net_device *dev) { +#if IS_ENABLED(CONFIG_CFG80211) cfg80211_unregister_wdev(dev->ieee80211_ptr); +#endif } /** diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 833672d6fbe4..d8d8719315fd 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -203,8 +203,8 @@ struct wpan_phy { /* PHY depended MAC PIB values */ - /* 802.15.4 acronym: Tdsym in usec */ - u8 symbol_duration; + /* 802.15.4 acronym: Tdsym in nsec */ + u32 symbol_duration; /* lifs and sifs periods timing */ u16 lifs_period; u16 sifs_period; @@ -373,6 +373,7 @@ struct wpan_dev { #define to_phy(_dev) container_of(_dev, struct wpan_phy, dev) +#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN) static inline int wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, const struct ieee802154_addr *daddr, @@ -383,6 +384,7 @@ wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, return wpan_dev->header_ops->create(skb, dev, daddr, saddr, len); } +#endif struct wpan_phy * wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size); @@ -415,4 +417,6 @@ static inline const char *wpan_phy_name(struct wpan_phy *phy) return dev_name(&phy->dev); } +void ieee802154_configure_durations(struct wpan_phy *phy); + #endif /* __NET_CFG802154_H */ diff --git a/include/net/devlink.h b/include/net/devlink.h index a30180c0988a..2a2a2a0c93f7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -22,6 +22,7 @@ #include <linux/firmware.h> struct devlink; +struct devlink_linecard; struct devlink_port_phys_attrs { u32 port_number; /* Same value as "split group". @@ -135,6 +136,7 @@ struct devlink_port { struct mutex reporters_lock; /* Protects reporter_list */ struct devlink_rate *devlink_rate; + struct devlink_linecard *linecard; }; struct devlink_port_new_attrs { @@ -148,6 +150,40 @@ struct devlink_port_new_attrs { sfnum_valid:1; }; +/** + * struct devlink_linecard_ops - Linecard operations + * @provision: callback to provision the linecard slot with certain + * type of linecard. As a result of this operation, + * driver is expected to eventually (could be after + * the function call returns) call one of: + * devlink_linecard_provision_set() + * devlink_linecard_provision_fail() + * @unprovision: callback to unprovision the linecard slot. As a result + * of this operation, driver is expected to eventually + * (could be after the function call returns) call + * devlink_linecard_provision_clear() + * devlink_linecard_provision_fail() + * @same_provision: callback to ask the driver if linecard is already + * provisioned in the same way user asks this linecard to be + * provisioned. + * @types_count: callback to get number of supported types + * @types_get: callback to get next type in list + */ +struct devlink_linecard_ops { + int (*provision)(struct devlink_linecard *linecard, void *priv, + const char *type, const void *type_priv, + struct netlink_ext_ack *extack); + int (*unprovision)(struct devlink_linecard *linecard, void *priv, + struct netlink_ext_ack *extack); + bool (*same_provision)(struct devlink_linecard *linecard, void *priv, + const char *type, const void *type_priv); + unsigned int (*types_count)(struct devlink_linecard *linecard, + void *priv); + void (*types_get)(struct devlink_linecard *linecard, + void *priv, unsigned int index, const char **type, + const void **type_priv); +}; + struct devlink_sb_pool_info { enum devlink_sb_pool_type pool_type; u32 size; @@ -1536,6 +1572,18 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, int devlink_rate_leaf_create(struct devlink_port *port, void *priv); void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); void devlink_rate_nodes_destroy(struct devlink *devlink); +void devlink_port_linecard_set(struct devlink_port *devlink_port, + struct devlink_linecard *linecard); +struct devlink_linecard * +devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, + const struct devlink_linecard_ops *ops, void *priv); +void devlink_linecard_destroy(struct devlink_linecard *linecard); +void devlink_linecard_provision_set(struct devlink_linecard *linecard, + const char *type); +void devlink_linecard_provision_clear(struct devlink_linecard *linecard); +void devlink_linecard_provision_fail(struct devlink_linecard *linecard); +void devlink_linecard_activate(struct devlink_linecard *linecard); +void devlink_linecard_deactivate(struct devlink_linecard *linecard); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/net/dsa.h b/include/net/dsa.h index 934958fda962..14f07275852b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -579,6 +579,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) dsa_switch_for_each_port((_dp), (_ds)) \ if (dsa_port_is_cpu((_dp))) +#define dsa_switch_for_each_cpu_port_continue_reverse(_dp, _ds) \ + dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \ + if (dsa_port_is_cpu((_dp))) + static inline u32 dsa_user_ports(struct dsa_switch *ds) { struct dsa_port *dp; @@ -590,6 +594,17 @@ static inline u32 dsa_user_ports(struct dsa_switch *ds) return mask; } +static inline u32 dsa_cpu_ports(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + u32 mask = 0; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) + mask |= BIT(cpu_dp->index); + + return mask; +} + /* Return the local port used to reach an arbitrary switch device */ static inline unsigned int dsa_routing_port(struct dsa_switch *ds, int device) { @@ -792,7 +807,7 @@ struct dsa_switch_ops { enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); - int (*change_tag_protocol)(struct dsa_switch *ds, int port, + int (*change_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); /* * Method for switch drivers to connect to the tagging protocol driver @@ -967,6 +982,8 @@ struct dsa_switch_ops { int (*port_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); + void (*port_set_host_flood)(struct dsa_switch *ds, int port, + bool uc, bool mc); /* * VLAN support @@ -1239,12 +1256,6 @@ struct dsa_switch_driver { struct net_device *dsa_dev_to_net_device(struct device *dev); -typedef int dsa_fdb_walk_cb_t(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid, - struct dsa_db db); - -int dsa_port_walk_fdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb); -int dsa_port_walk_mdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb); bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 9f65f1bfbd24..a4c6057c7097 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -253,6 +253,14 @@ struct flow_dissector_key_hash { u32 hash; }; +/** + * struct flow_dissector_key_num_of_vlans: + * @num_of_vlans: num_of_vlans value + */ +struct flow_dissector_key_num_of_vlans { + u8 num_of_vlans; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -282,6 +290,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ + FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 4cfdef6ca4f6..c8490729b4ae 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,6 +64,14 @@ struct inet6_ifaddr { struct hlist_node addr_lst; struct list_head if_list; + /* + * Used to safely traverse idev->addr_list in process context + * if the idev->lock needed to protect idev->addr_list cannot be held. + * In that case, add the items to this list temporarily and iterate + * without holding idev->lock. + * See addrconf_ifdown and dev_forward_change. + */ + struct list_head if_list_aux; struct list_head tmp_list; struct inet6_ifaddr *ifpub; diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 81b965953036..f259e1ae14ba 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -103,15 +103,25 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, const int dif); int inet6_hash(struct sock *sk); -#endif /* IS_ENABLED(CONFIG_IPV6) */ -#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ - ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) +static inline bool inet6_match(struct net *net, const struct sock *sk, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + const __portpair ports, + const int dif, const int sdif) +{ + int bound_dev_if; + + if (!net_eq(sock_net(sk), net) || + sk->sk_family != AF_INET6 || + sk->sk_portpair != ports || + !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || + !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return false; + + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + return bound_dev_if == dif || bound_dev_if == sdif; +} +#endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 3908296d103f..077cd730ce2f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -25,6 +25,7 @@ #undef INET_CSK_CLEAR_TIMERS struct inet_bind_bucket; +struct inet_bind2_bucket; struct tcp_congestion_ops; /* @@ -57,6 +58,7 @@ struct inet_connection_sock_af_ops { * * @icsk_accept_queue: FIFO of established children * @icsk_bind_hash: Bind node + * @icsk_bind2_hash: Bind node in the bhash2 table * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout @@ -66,7 +68,6 @@ struct inet_connection_sock_af_ops { * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data * @icsk_clean_acked Clean acked data hook - * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -84,6 +85,7 @@ struct inet_connection_sock { struct inet_sock icsk_inet; struct request_sock_queue icsk_accept_queue; struct inet_bind_bucket *icsk_bind_hash; + struct inet_bind2_bucket *icsk_bind2_hash; unsigned long icsk_timeout; struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; @@ -96,7 +98,6 @@ struct inet_connection_sock { const struct tcp_ulp_ops *icsk_ulp_ops; void __rcu *icsk_ulp_data; void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq); - struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:5, icsk_ca_initialized:1, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 98e1ec1a14f0..a0887b70967b 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -90,11 +90,32 @@ struct inet_bind_bucket { struct hlist_head owners; }; +struct inet_bind2_bucket { + possible_net_t ib_net; + int l3mdev; + unsigned short port; + union { +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr v6_rcv_saddr; +#endif + __be32 rcv_saddr; + }; + /* Node in the inet2_bind_hashbucket chain */ + struct hlist_node node; + /* List of sockets hashed to this bucket */ + struct hlist_head owners; +}; + static inline struct net *ib_net(struct inet_bind_bucket *ib) { return read_pnet(&ib->ib_net); } +static inline struct net *ib2_net(struct inet_bind2_bucket *ib) +{ + return read_pnet(&ib->ib_net); +} + #define inet_bind_bucket_for_each(tb, head) \ hlist_for_each_entry(tb, head, node) @@ -103,6 +124,15 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; +/* This is synchronized using the inet_bind_hashbucket's spinlock. + * Instead of having separate spinlocks, the inet_bind2_hashbucket can share + * the inet_bind_hashbucket's given that in every case where the bhash2 table + * is useful, a lookup in the bhash table also occurs. + */ +struct inet_bind2_hashbucket { + struct hlist_head chain; +}; + /* Sockets can be hashed in established or listening table. * We must use different 'nulls' end-of-chain value for all hash buckets : * A socket might transition from ESTABLISH to LISTEN state without @@ -111,11 +141,7 @@ struct inet_bind_hashbucket { #define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - unsigned int count; - union { - struct hlist_head head; - struct hlist_nulls_head nulls_head; - }; + struct hlist_nulls_head nulls_head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ @@ -138,37 +164,19 @@ struct inet_hashinfo { */ struct kmem_cache *bind_bucket_cachep; struct inet_bind_hashbucket *bhash; + /* The 2nd binding table hashed by port and address. + * This is used primarily for expediting the resolution of bind + * conflicts. + */ + struct kmem_cache *bind2_bucket_cachep; + struct inet_bind2_hashbucket *bhash2; unsigned int bhash_size; /* The 2nd listener table hashed by local port and address */ unsigned int lhash2_mask; struct inet_listen_hashbucket *lhash2; - - /* All the above members are written once at bootup and - * never written again _or_ are predominantly read-access. - * - * Now align to a new cache line as all the following members - * might be often dirty. - */ - /* All sockets in TCP_LISTEN state will be in listening_hash. - * This is the only table where wildcard'd TCP sockets can - * exist. listening_hash is only hashed by local port number. - * If lhash2 is initialized, the same socket will also be hashed - * to lhash2 by port and address. - */ - struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] - ____cacheline_aligned_in_smp; }; -#define inet_lhash2_for_each_icsk_continue(__icsk) \ - hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node) - -#define inet_lhash2_for_each_icsk(__icsk, list) \ - hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node) - -#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ - hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) - static inline struct inet_listen_hashbucket * inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) { @@ -221,32 +229,50 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); -static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, - const u32 bhash_size) +static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb, + struct net *net, + const unsigned short port, + int l3mdev) { - return (lport + net_hash_mix(net)) & (bhash_size - 1); + return net_eq(ib_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev; } -void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - const unsigned short snum); +struct inet_bind2_bucket * +inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, + struct inet_bind2_hashbucket *head, + const unsigned short port, int l3mdev, + const struct sock *sk); -/* These can have wildcards, don't try too hard. */ -static inline u32 inet_lhashfn(const struct net *net, const unsigned short num) -{ - return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1); -} +void inet_bind2_bucket_destroy(struct kmem_cache *cachep, + struct inet_bind2_bucket *tb); + +struct inet_bind2_bucket * +inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net, + const unsigned short port, int l3mdev, + struct sock *sk, + struct inet_bind2_hashbucket **head); -static inline int inet_sk_listen_hashfn(const struct sock *sk) +bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb, + struct net *net, + const unsigned short port, + int l3mdev, + const struct sock *sk); + +static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, + const u32 bhash_size) { - return inet_lhashfn(sock_net(sk), inet_sk(sk)->inet_num); + return (lport + net_hash_mix(net)) & (bhash_size - 1); } +void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + struct inet_bind2_bucket *tb2, const unsigned short snum); + /* Caller must disable local BH processing. */ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); -void inet_hashinfo_init(struct inet_hashinfo *h); void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, @@ -295,7 +321,6 @@ static inline struct sock *inet_lookup_listener(struct net *net, ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) #endif -#if (BITS_PER_LONG == 64) #ifdef __BIG_ENDIAN #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __addrpair __name = (__force __addrpair) ( \ @@ -307,24 +332,22 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_addrpair == (__cookie)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) -#else /* 32-bit arch */ -#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ - const int __name __deprecated __attribute__((unused)) - -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_daddr == (__saddr)) && \ - ((__sk)->sk_rcv_saddr == (__daddr)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) -#endif /* 64-bit arch */ + +static inline bool inet_match(struct net *net, const struct sock *sk, + const __addrpair cookie, const __portpair ports, + int dif, int sdif) +{ + int bound_dev_if; + + if (!net_eq(sock_net(sk), net) || + sk->sk_portpair != ports || + sk->sk_addrpair != cookie) + return false; + + /* Paired with WRITE_ONCE() from sock_bindtoindex_locked() */ + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + return bound_dev_if == dif || bound_dev_if == sdif; +} /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 234d70ae5f4c..c1b5dcd6597c 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -116,14 +116,15 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) static inline int inet_request_bound_dev_if(const struct sock *sk, struct sk_buff *skb) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif - return sk->sk_bound_dev_if; + return bound_dev_if; } static inline int inet_sk_bound_l3mdev(const struct sock *sk) diff --git a/include/net/ip.h b/include/net/ip.h index 0161137914cf..26fffda78cca 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -94,7 +94,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->sockc.mark = inet->sk.sk_mark; ipcm->sockc.tsflags = inet->sk.sk_tsflags; - ipcm->oif = inet->sk.sk_bound_dev_if; + ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; } diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 6a82bcb8813b..a378eff827c7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -212,7 +212,7 @@ struct fib_rt_info { u32 tb_id; __be32 dst; int dst_len; - u8 tos; + dscp_t dscp; u8 type; u8 offload:1, trap:1, @@ -225,7 +225,7 @@ struct fib_entry_notifier_info { u32 dst; int dst_len; struct fib_info *fi; - u8 tos; + dscp_t dscp; u8 type; u32 tb_id; }; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 213612f1680c..5b38bf1a586b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -151,6 +151,17 @@ struct frag_hdr { __be32 identification; }; +/* + * Jumbo payload option, as described in RFC 2675 2. + */ +struct hop_jumbo_hdr { + u8 nexthdr; + u8 hdrlen; + u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */ + u8 tlv_len; /* 4 */ + __be32 jumbo_payload_len; +}; + #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 @@ -456,6 +467,39 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); +/* This helper is specialized for BIG TCP needs. + * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. + * It assumes headers are already in skb->head. + * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there. + */ +static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) +{ + const struct hop_jumbo_hdr *jhdr; + const struct ipv6hdr *nhdr; + + if (likely(skb->len <= GRO_LEGACY_MAX_SIZE)) + return 0; + + if (skb->protocol != htons(ETH_P_IPV6)) + return 0; + + if (skb_network_offset(skb) + + sizeof(struct ipv6hdr) + + sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) + return 0; + + nhdr = ipv6_hdr(skb); + + if (nhdr->nexthdr != NEXTHDR_HOP) + return 0; + + jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1); + if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 || + jhdr->nexthdr != IPPROTO_TCP) + return 0; + return jhdr->nexthdr; +} + static inline bool ipv6_accept_ra(struct inet6_dev *idev) { /* If forwarding is enabled, RA are not accepted unless the special diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 382ebb862ea8..ebadb2103968 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -514,7 +514,6 @@ struct ieee80211_fils_discovery { * to that BSS) that can change during the lifetime of the BSS. * * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE - * @multi_sta_back_32bit: supports BA bitmap of 32-bits in Multi-STA BACK * @uora_exists: is the UORA element advertised by AP * @ack_enabled: indicates support to receive a multi-TID that solicits either * ACK, BACK or both @@ -1144,20 +1143,41 @@ ieee80211_info_get_tx_time_est(struct ieee80211_tx_info *info) return info->tx_time_est << 2; } +/*** + * struct ieee80211_rate_status - mrr stage for status path + * + * This struct is used in struct ieee80211_tx_status to provide drivers a + * dynamic way to report about used rates and power levels per packet. + * + * @rate_idx The actual used rate. + * @try_count How often the rate was tried. + * @tx_power_idx An idx into the ieee80211_hw->tx_power_levels list of the + * corresponding wifi hardware. The idx shall point to the power level + * that was used when sending the packet. + */ +struct ieee80211_rate_status { + struct rate_info rate_idx; + u8 try_count; + u8 tx_power_idx; +}; + /** * struct ieee80211_tx_status - extended tx status info for rate control * * @sta: Station that the packet was transmitted for * @info: Basic tx status information * @skb: Packet skb (can be NULL if not provided by the driver) - * @rate: The TX rate that was used when sending the packet + * @rates: Mrr stages that were used when sending the packet + * @n_rates: Number of mrr stages (count of instances for @rates) * @free_list: list where processed skbs are stored to be free'd by the driver */ struct ieee80211_tx_status { struct ieee80211_sta *sta; struct ieee80211_tx_info *info; struct sk_buff *skb; - struct rate_info *rate; + struct ieee80211_rate_status *rates; + u8 n_rates; + struct list_head *free_list; }; @@ -1201,9 +1221,9 @@ static inline struct ieee80211_rx_status *IEEE80211_SKB_RXCB(struct sk_buff *skb * in the TX status but the rate control information (it does clear * the count since you need to fill that in anyway). * - * NOTE: You can only use this function if you do NOT use - * info->driver_data! Use info->rate_driver_data - * instead if you need only the less space that allows. + * NOTE: While the rates array is kept intact, this will wipe all of the + * driver_data fields in info, so it's up to the driver to restore + * any fields it needs after calling this helper. */ static inline void ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) @@ -1701,7 +1721,7 @@ enum ieee80211_offload_flags { * these need to be set (or cleared) when the interface is added * or, if supported by the driver, the interface type is changed * at runtime, mac80211 will never touch this field - * @offloaad_flags: hardware offload capabilities/flags for this interface. + * @offload_flags: hardware offload capabilities/flags for this interface. * These are initialized by mac80211 before calling .add_interface, * .change_interface or .update_vif_offload and updated by the driver * within these ops, based on supported features or runtime change @@ -2056,6 +2076,45 @@ struct ieee80211_sta_txpwr { enum nl80211_tx_power_setting type; }; +#define MAX_STA_LINKS 15 + +/** + * struct ieee80211_link_sta - station Link specific info + * All link specific info for a STA link for a non MLD STA(single) + * or a MLD STA(multiple entries) are stored here. + * + * @addr: MAC address of the Link STA. For non-MLO STA this is same as the addr + * in ieee80211_sta. For MLO Link STA this addr can be same or different + * from addr in ieee80211_sta (representing MLD STA addr) + * @supp_rates: Bitmap of supported rates + * @ht_cap: HT capabilities of this STA; restricted to our own capabilities + * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities + * @he_cap: HE capabilities of this STA + * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities + * @eht_cap: EHT capabilities of this STA + * @bandwidth: current bandwidth the station can receive with + * @rx_nss: in HT/VHT, the maximum number of spatial streams the + * station can receive at the moment, changed by operating mode + * notifications and capabilities. The value is only valid after + * the station moves to associated state. + * @txpwr: the station tx power configuration + * + */ +struct ieee80211_link_sta { + u8 addr[ETH_ALEN]; + + u32 supp_rates[NUM_NL80211_BANDS]; + struct ieee80211_sta_ht_cap ht_cap; + struct ieee80211_sta_vht_cap vht_cap; + struct ieee80211_sta_he_cap he_cap; + struct ieee80211_he_6ghz_capa he_6ghz_capa; + struct ieee80211_sta_eht_cap eht_cap; + + u8 rx_nss; + enum ieee80211_sta_rx_bandwidth bandwidth; + struct ieee80211_sta_txpwr txpwr; +}; + /** * struct ieee80211_sta - station table entry * @@ -2065,15 +2124,11 @@ struct ieee80211_sta_txpwr { * either be protected by rcu_read_lock() explicitly or implicitly, * or you must take good care to not use such a pointer after a * call to your sta_remove callback that removed it. + * This also represents the MLD STA in case of MLO association + * and holds pointers to various link STA's * * @addr: MAC address * @aid: AID we assigned to the station if we're an AP - * @supp_rates: Bitmap of supported rates (per band) - * @ht_cap: HT capabilities of this STA; restricted to our own capabilities - * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities - * @he_cap: HE capabilities of this STA - * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities - * @eht_cap: EHT capabilities of this STA * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU * that this station is allowed to transmit to us. * Can be modified by driver. @@ -2085,11 +2140,6 @@ struct ieee80211_sta_txpwr { * if wme is supported. The bits order is like in * IEEE80211_WMM_IE_STA_QOSINFO_AC_*. * @max_sp: max Service Period. Only valid if wme is supported. - * @bandwidth: current bandwidth the station can receive with - * @rx_nss: in HT/VHT, the maximum number of spatial streams the - * station can receive at the moment, changed by operating mode - * notifications and capabilities. The value is only valid after - * the station moves to associated state. * @smps_mode: current SMPS mode (off, static or dynamic) * @rates: rate control selection table * @tdls: indicates whether the STA is a TDLS peer @@ -2102,25 +2152,28 @@ struct ieee80211_sta_txpwr { * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID - * @txpwr: the station tx power configuration * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames + * @multi_link_sta: Identifies if this sta is a MLD STA + * @deflink: This holds the default link STA information, for non MLO STA all link + * specific STA information is accessed through @deflink or through + * link[0] which points to address of @deflink. For MLO Link STA + * the first added link STA will point to deflink. + * @link: reference to Link Sta entries. For Non MLO STA, except 1st link, + * i.e link[0] all links would be assigned to NULL by default and + * would access link information via @deflink or link[0]. For MLO + * STA, first link STA being added will point its link pointer to + * @deflink address and remaining would be allocated and the address + * would be assigned to link[link_id] where link_id is the id assigned + * by the AP. */ struct ieee80211_sta { - u32 supp_rates[NUM_NL80211_BANDS]; u8 addr[ETH_ALEN]; u16 aid; - struct ieee80211_sta_ht_cap ht_cap; - struct ieee80211_sta_vht_cap vht_cap; - struct ieee80211_sta_he_cap he_cap; - struct ieee80211_he_6ghz_capa he_6ghz_capa; - struct ieee80211_sta_eht_cap eht_cap; u16 max_rx_aggregation_subframes; bool wme; u8 uapsd_queues; u8 max_sp; - u8 rx_nss; - enum ieee80211_sta_rx_bandwidth bandwidth; enum ieee80211_smps_mode smps_mode; struct ieee80211_sta_rates __rcu *rates; bool tdls; @@ -2147,10 +2200,13 @@ struct ieee80211_sta { bool support_p2p_ps; u16 max_rc_amsdu_len; u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; - struct ieee80211_sta_txpwr txpwr; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; + bool multi_link_sta; + struct ieee80211_link_sta deflink; + struct ieee80211_link_sta *link[MAX_STA_LINKS]; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -2434,6 +2490,9 @@ struct ieee80211_txq { * usage and 802.11 frames with %RX_FLAG_ONLY_MONITOR set for monitor to * the stack. * + * @IEEE80211_HW_DETECTS_COLOR_COLLISION: HW/driver has support for BSS color + * collision detection and doesn't need it in software. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2489,6 +2548,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD, IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD, IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP, + IEEE80211_HW_DETECTS_COLOR_COLLISION, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2618,6 +2678,12 @@ enum ieee80211_hw_flags { * refilling deficit of each TXQ. * * @max_mtu: the max mtu could be set. + * + * @tx_power_levels: a list of power levels supported by the wifi hardware. + * The power levels can be specified either as integer or fractions. + * The power level at idx 0 shall be the maximum positive power level. + * + * @max_txpwr_levels_idx: the maximum valid idx of 'tx_power_levels' list. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2656,6 +2722,8 @@ struct ieee80211_hw { u8 tx_sk_pacing_shift; u8 weight_multiplier; u32 max_mtu; + const s8 *tx_power_levels; + u8 max_txpwr_levels_idx; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -6367,7 +6435,7 @@ static inline int rate_supported(struct ieee80211_sta *sta, enum nl80211_band band, int index) { - return (sta == NULL || sta->supp_rates[band] & BIT(index)); + return (sta == NULL || sta->deflink.supp_rates[band] & BIT(index)); } static inline s8 diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 2c3bbc6645ba..bdac0ddbdcdb 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -498,4 +498,23 @@ void ieee802154_stop_queue(struct ieee802154_hw *hw); void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb, bool ifs_handling); +/** + * ieee802154_xmit_error - offloaded frame transmission failed + * + * @hw: pointer as obtained from ieee802154_alloc_hw(). + * @skb: buffer for transmission + * @reason: error code + */ +void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb, + int reason); + +/** + * ieee802154_xmit_hw_error - frame could not be offloaded to the transmitter + * because of a hardware error (bus error, timeout, etc) + * + * @hw: pointer as obtained from ieee802154_alloc_hw(). + * @skb: buffer for transmission + */ +void ieee802154_xmit_hw_error(struct ieee802154_hw *hw, struct sk_buff *skb); + #endif /* NET_MAC802154_H */ diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 0a3b0fb04a3b..4d761ad530c9 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -35,7 +35,8 @@ struct mptcp_ext { frozen:1, reset_transient:1; u8 reset_reason:4, - csum_reqd:1; + csum_reqd:1, + infinite_map:1; }; #define MPTCP_RM_IDS_MAX 8 @@ -124,7 +125,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts); bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); -void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, +void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp, struct mptcp_out_options *opts); void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info); @@ -283,4 +284,10 @@ static inline int mptcpv6_init(void) { return 0; } static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } #endif +#if defined(CONFIG_MPTCP) && defined(CONFIG_BPF_SYSCALL) +struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk); +#else +static inline struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { return NULL; } +#endif + #endif /* __NET_MPTCP_H */ diff --git a/include/net/net_debug.h b/include/net/net_debug.h new file mode 100644 index 000000000000..1e74684cbbdb --- /dev/null +++ b/include/net/net_debug.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NET_DEBUG_H +#define _LINUX_NET_DEBUG_H + +#include <linux/bug.h> +#include <linux/kern_levels.h> + +struct net_device; + +__printf(3, 4) __cold +void netdev_printk(const char *level, const struct net_device *dev, + const char *format, ...); +__printf(2, 3) __cold +void netdev_emerg(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_alert(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_crit(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_err(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_warn(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_notice(const struct net_device *dev, const char *format, ...); +__printf(2, 3) __cold +void netdev_info(const struct net_device *dev, const char *format, ...); + +#define netdev_level_once(level, dev, fmt, ...) \ +do { \ + static bool __section(".data.once") __print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ + netdev_printk(level, dev, fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define netdev_emerg_once(dev, fmt, ...) \ + netdev_level_once(KERN_EMERG, dev, fmt, ##__VA_ARGS__) +#define netdev_alert_once(dev, fmt, ...) \ + netdev_level_once(KERN_ALERT, dev, fmt, ##__VA_ARGS__) +#define netdev_crit_once(dev, fmt, ...) \ + netdev_level_once(KERN_CRIT, dev, fmt, ##__VA_ARGS__) +#define netdev_err_once(dev, fmt, ...) \ + netdev_level_once(KERN_ERR, dev, fmt, ##__VA_ARGS__) +#define netdev_warn_once(dev, fmt, ...) \ + netdev_level_once(KERN_WARNING, dev, fmt, ##__VA_ARGS__) +#define netdev_notice_once(dev, fmt, ...) \ + netdev_level_once(KERN_NOTICE, dev, fmt, ##__VA_ARGS__) +#define netdev_info_once(dev, fmt, ...) \ + netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__) + +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define netdev_dbg(__dev, format, args...) \ +do { \ + dynamic_netdev_dbg(__dev, format, ##args); \ +} while (0) +#elif defined(DEBUG) +#define netdev_dbg(__dev, format, args...) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args) +#else +#define netdev_dbg(__dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args); \ +}) +#endif + +#if defined(VERBOSE_DEBUG) +#define netdev_vdbg netdev_dbg +#else + +#define netdev_vdbg(dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, dev, format, ##args); \ + 0; \ +}) +#endif + +/* netif printk helpers, similar to netdev_printk */ + +#define netif_printk(priv, type, level, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_printk(level, (dev), fmt, ##args); \ +} while (0) + +#define netif_level(level, priv, type, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_##level(dev, fmt, ##args); \ +} while (0) + +#define netif_emerg(priv, type, dev, fmt, args...) \ + netif_level(emerg, priv, type, dev, fmt, ##args) +#define netif_alert(priv, type, dev, fmt, args...) \ + netif_level(alert, priv, type, dev, fmt, ##args) +#define netif_crit(priv, type, dev, fmt, args...) \ + netif_level(crit, priv, type, dev, fmt, ##args) +#define netif_err(priv, type, dev, fmt, args...) \ + netif_level(err, priv, type, dev, fmt, ##args) +#define netif_warn(priv, type, dev, fmt, args...) \ + netif_level(warn, priv, type, dev, fmt, ##args) +#define netif_notice(priv, type, dev, fmt, args...) \ + netif_level(notice, priv, type, dev, fmt, ##args) +#define netif_info(priv, type, dev, fmt, args...) \ + netif_level(info, priv, type, dev, fmt, ##args) + +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define netif_dbg(priv, type, netdev, format, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + dynamic_netdev_dbg(netdev, format, ##args); \ +} while (0) +#elif defined(DEBUG) +#define netif_dbg(priv, type, dev, format, args...) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) +#else +#define netif_dbg(priv, type, dev, format, args...) \ +({ \ + if (0) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ + 0; \ +}) +#endif + +/* if @cond then downgrade to debug, else print at @level */ +#define netif_cond_dbg(priv, type, netdev, cond, level, fmt, args...) \ + do { \ + if (cond) \ + netif_dbg(priv, type, netdev, fmt, ##args); \ + else \ + netif_ ## level(priv, type, netdev, fmt, ##args); \ + } while (0) + +#if defined(VERBOSE_DEBUG) +#define netif_vdbg netif_dbg +#else +#define netif_vdbg(priv, type, dev, format, args...) \ +({ \ + if (0) \ + netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ + 0; \ +}) +#endif + + +#if defined(CONFIG_DEBUG_NET) +#define DEBUG_NET_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) +#else +#define DEBUG_NET_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#endif + +#endif /* _LINUX_NET_DEBUG_H */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b08b70989d2c..a32be8aa7ed2 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -43,6 +43,12 @@ union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; +struct nf_conntrack_net_ecache { + struct delayed_work dwork; + spinlock_t dying_lock; + struct hlist_nulls_head dying_list; +}; + struct nf_conntrack_net { /* only used when new connection is allocated: */ atomic_t count; @@ -58,8 +64,7 @@ struct nf_conntrack_net { struct ctl_table_header *sysctl_header; #endif #ifdef CONFIG_NF_CONNTRACK_EVENTS - struct delayed_work ecache_dwork; - struct netns_ct *ct_net; + struct nf_conntrack_net_ecache ecache; #endif }; @@ -96,7 +101,6 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - u16 cpu; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) @@ -232,13 +236,16 @@ static inline bool nf_ct_kill(struct nf_conn *ct) return nf_ct_delete(ct, 0, 0); } -/* Set all unconfirmed conntrack as dying */ -void nf_ct_unconfirmed_destroy(struct net *); +struct nf_ct_iter_data { + struct net *net; + void *data; + u32 portid; + int report; +}; /* Iterate over all conntracks: if iter returns true, it's deleted. */ -void nf_ct_iterate_cleanup_net(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report); +void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data), + const struct nf_ct_iter_data *iter_data); /* also set unconfirmed conntracks as dying. Only use in module exit path. */ void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 13807ea94cd2..6406cfee34c2 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -60,7 +60,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) if (ct) { if (!nf_ct_is_confirmed(ct)) ret = __nf_conntrack_confirm(skb); - if (likely(ret == NF_ACCEPT)) + if (ret == NF_ACCEPT && nf_ct_ecache_exist(ct)) nf_ct_deliver_cached_events(ct); } return ret; diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 9645b47fa7e4..e227d997fc71 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -10,6 +10,7 @@ struct nf_conncount_data; struct nf_conncount_list { spinlock_t list_lock; + u32 last_gc; /* jiffies at most recent gc */ struct list_head head; /* connections with the same filtering key */ unsigned int count; /* length of list */ }; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 6c4c490a3e34..0c1dac318e02 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -14,7 +14,6 @@ #include <net/netfilter/nf_conntrack_extend.h> enum nf_ct_ecache_state { - NFCT_ECACHE_UNKNOWN, /* destroy event not sent */ NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ }; @@ -23,7 +22,6 @@ struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ - enum nf_ct_ecache_state state:8;/* ecache state */ u32 missed; /* missed events */ u32 portid; /* netlink portid of destroyer */ }; @@ -38,28 +36,12 @@ nf_ct_ecache_find(const struct nf_conn *ct) #endif } -static inline struct nf_conntrack_ecache * -nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) +static inline bool nf_ct_ecache_exist(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - struct net *net = nf_ct_net(ct); - struct nf_conntrack_ecache *e; - - if (!ctmask && !expmask && net->ct.sysctl_events) { - ctmask = ~0; - expmask = ~0; - } - if (!ctmask && !expmask) - return NULL; - - e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); - if (e) { - e->ctmask = ctmask; - e->expmask = expmask; - } - return e; + return nf_ct_ext_exist(ct, NF_CT_EXT_ECACHE); #else - return NULL; + return false; #endif } @@ -91,6 +73,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct); int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, u32 portid, int report); +bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp); #else static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) @@ -105,6 +88,10 @@ static inline int nf_conntrack_eventmask_report(unsigned int eventmask, return 0; } +static inline bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) +{ + return false; +} #endif static inline void @@ -130,30 +117,20 @@ nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, u32 portid, int report) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - const struct net *net = nf_ct_net(ct); - - if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) - return 0; - - return nf_conntrack_eventmask_report(1 << event, ct, portid, report); -#else - return 0; + if (nf_ct_ecache_exist(ct)) + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); #endif + return 0; } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - const struct net *net = nf_ct_net(ct); - - if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) - return 0; - - return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); -#else - return 0; + if (nf_ct_ecache_exist(ct)) + return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); #endif + return 0; } #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -166,6 +143,8 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state); void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); +struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net); + static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return net->ct.ecache_dwork_pending; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 96635ad2acc7..0b247248b032 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -34,21 +34,11 @@ enum nf_ct_ext_id { NF_CT_EXT_NUM, }; -#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help -#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat -#define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj -#define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct -#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache -#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp -#define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout -#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels -#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy -#define NF_CT_EXT_ACT_CT_TYPE struct nf_conn_act_ct_ext - /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { u8 offset[NF_CT_EXT_NUM]; u8 len; + unsigned int gen_id; char data[] __aligned(8); }; @@ -62,17 +52,28 @@ static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id) return (ct->ext && __nf_ct_ext_exist(ct->ext, id)); } -static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) +void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id); + +static inline void *nf_ct_ext_find(const struct nf_conn *ct, u8 id) { - if (!nf_ct_ext_exist(ct, id)) + struct nf_ct_ext *ext = ct->ext; + + if (!ext || !__nf_ct_ext_exist(ext, id)) return NULL; + if (unlikely(ext->gen_id)) + return __nf_ct_ext_find(ext, id); + return (void *)ct->ext + ct->ext->offset[id]; } -#define nf_ct_ext_find(ext, id) \ - ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) /* Add this type, returns pointer to data or NULL. */ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); +/* ext genid. if ext->id != ext_genid, extensions cannot be used + * anymore unless conntrack has CONFIRMED bit set. + */ +extern atomic_t nf_conntrack_ext_genid; +void nf_ct_ext_bump_genid(void); + #endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 3c23298e68ca..66bab6c60d12 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -17,10 +17,18 @@ struct nf_conn_labels { unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)]; }; +/* Can't use nf_ct_ext_find(), flow dissector cannot use symbols + * exported by nf_conntrack module. + */ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_LABELS - return nf_ct_ext_find(ct, NF_CT_EXT_LABELS); + struct nf_ct_ext *ext = ct->ext; + + if (!ext || !__nf_ct_ext_exist(ext, NF_CT_EXT_LABELS)) + return NULL; + + return (void *)ct->ext + ct->ext->offset[NF_CT_EXT_LABELS]; #else return NULL; #endif diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 3ea94f6f3844..fea258983d23 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -17,14 +17,6 @@ struct nf_ct_timeout { char data[]; }; -struct ctnl_timeout { - struct list_head head; - struct rcu_head rcu_head; - refcount_t refcnt; - char name[CTNL_TIMEOUT_NAME_MAX]; - struct nf_ct_timeout timeout; -}; - struct nf_conn_timeout { struct nf_ct_timeout __rcu *timeout; }; diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h index 9051c3a0c8e7..7c669792fb9c 100644 --- a/include/net/netfilter/nf_reject.h +++ b/include/net/netfilter/nf_reject.h @@ -5,12 +5,28 @@ #include <linux/types.h> #include <uapi/linux/in.h> -static inline bool nf_reject_verify_csum(__u8 proto) +static inline bool nf_reject_verify_csum(struct sk_buff *skb, int dataoff, + __u8 proto) { /* Skip protocols that don't use 16-bit one's complement checksum * of the entire payload. */ switch (proto) { + /* Protocols with optional checksums. */ + case IPPROTO_UDP: { + const struct udphdr *udp_hdr; + struct udphdr _udp_hdr; + + udp_hdr = skb_header_pointer(skb, dataoff, + sizeof(_udp_hdr), + &_udp_hdr); + if (!udp_hdr || udp_hdr->check) + return true; + + return false; + } + case IPPROTO_GRE: + /* Protocols with other integrity checks. */ case IPPROTO_AH: case IPPROTO_ESP: @@ -19,9 +35,6 @@ static inline bool nf_reject_verify_csum(__u8 proto) /* Protocols with partial checksums. */ case IPPROTO_UDPLITE: case IPPROTO_DCCP: - - /* Protocols with optional checksums. */ - case IPPROTO_GRE: return false; } return true; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0294f3d473af..0677cd3de034 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -93,14 +93,9 @@ struct nf_ip_net { #endif }; -struct ct_pcpu { - spinlock_t lock; - struct hlist_nulls_head unconfirmed; - struct hlist_nulls_head dying; -}; - struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS + bool ctnetlink_has_listener; bool ecache_dwork_pending; #endif u8 sysctl_log_invalid; /* Log invalid packets */ @@ -110,7 +105,6 @@ struct netns_ct { u8 sysctl_tstamp; u8 sysctl_checksum; - struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_ip_net nf_ct_proto; diff --git a/include/net/page_pool.h b/include/net/page_pool.h index ea5fb70e5101..813c93499f20 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -117,6 +117,10 @@ struct page_pool_stats { struct page_pool_recycle_stats recycle_stats; }; +int page_pool_ethtool_stats_get_count(void); +u8 *page_pool_ethtool_stats_get_strings(u8 *data); +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); + /* * Drivers that wish to harvest page pool stats and report them to users * (perhaps via ethtool, debugfs, or another mechanism) can allocate a @@ -124,6 +128,23 @@ struct page_pool_stats { */ bool page_pool_get_stats(struct page_pool *pool, struct page_pool_stats *stats); +#else + +static inline int page_pool_ethtool_stats_get_count(void) +{ + return 0; +} + +static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) +{ + return data; +} + +static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +{ + return data; +} + #endif struct page_pool { diff --git a/include/net/ping.h b/include/net/ping.h index 2fe78874318c..e4ff3911cbf5 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -71,12 +71,12 @@ void ping_err(struct sk_buff *skb, int offset, u32 info); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); -int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -bool ping_rcv(struct sk_buff *skb); +enum skb_drop_reason ping_rcv(struct sk_buff *skb); #ifdef CONFIG_PROC_FS void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a3b57a93228a..8cf001aed858 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -547,10 +547,12 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } int tc_setup_offload_action(struct flow_action *flow_action, - const struct tcf_exts *exts); + const struct tcf_exts *exts, + struct netlink_ext_ack *extack); void tc_cleanup_offload_action(struct flow_action *flow_action); int tc_setup_action(struct flow_action *flow_action, - struct tc_action *actions[]); + struct tc_action *actions[], + struct netlink_ext_ack *extack); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); diff --git a/include/net/route.h b/include/net/route.h index 25404fc2b483..991a3985712d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -43,6 +43,19 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) +static inline __u8 ip_sock_rt_scope(const struct sock *sk) +{ + if (sock_flag(sk, SOCK_LOCALROUTE)) + return RT_SCOPE_LINK; + + return RT_SCOPE_UNIVERSE; +} + +static inline __u8 ip_sock_rt_tos(const struct sock *sk) +{ + return RT_TOS(inet_sk(sk)->tos); +} + struct ip_tunnel_info; struct fib_nh; struct fib_info; @@ -289,39 +302,38 @@ static inline char rt_tos2priority(u8 tos) * ip_route_newports() calls. */ -static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src, - u32 tos, int oif, u8 protocol, +static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, + __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, - struct sock *sk) + const struct sock *sk) { __u8 flow_flags = 0; if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport, - sk->sk_uid); + flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk), + ip_sock_rt_scope(sk), protocol, flow_flags, dst, + src, dport, sport, sk->sk_uid); } -static inline struct rtable *ip_route_connect(struct flowi4 *fl4, - __be32 dst, __be32 src, u32 tos, - int oif, u8 protocol, +static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, + __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, struct sock *sk) { struct net *net = sock_net(sk); struct rtable *rt; - ip_route_connect_init(fl4, dst, src, tos, oif, protocol, - sport, dport, sk); + ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk); if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) return rt; ip_rt_put(rt); - flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); + flowi4_update_output(fl4, oif, fl4->flowi4_tos, fl4->daddr, + fl4->saddr); } security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 9f48733bfd21..bf8bb3357825 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -10,9 +10,23 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { - RTNL_FLAG_DOIT_UNLOCKED = 1, + RTNL_FLAG_DOIT_UNLOCKED = BIT(0), + RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), }; +enum rtnl_kinds { + RTNL_KIND_NEW, + RTNL_KIND_DEL, + RTNL_KIND_GET, + RTNL_KIND_SET +}; +#define RTNL_KIND_MASK 0x3 + +static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype) +{ + return msgtype & RTNL_KIND_MASK; +} + void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_register_module(struct module *owner, int protocol, int msgtype, diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index bf3716fe83e0..a04999ee99b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,7 +103,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc); extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); -struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *); typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); diff --git a/include/net/sock.h b/include/net/sock.h index c4b91fc19b9c..c585ef6565d9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -161,9 +161,6 @@ typedef __u64 __bitwise __addrpair; * for struct sock and struct inet_timewait_sock. */ struct sock_common { - /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned - * address on 64bit arches : cf INET_MATCH() - */ union { __addrpair skc_addrpair; struct { @@ -292,7 +289,6 @@ struct sk_filter; * @sk_pacing_shift: scaling factor for TCP Small Queues * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held - * @defer_list: head of llist storing skbs to be freed * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, @@ -352,6 +348,7 @@ struct sk_filter; * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference + * @sk_bind2_node: bind node in the bhash2 table */ struct sock { /* @@ -417,7 +414,6 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; - struct llist_head defer_list; #define sk_rmem_alloc sk_backlog.rmem_alloc @@ -542,6 +538,7 @@ struct sock { #endif struct rcu_head sk_rcu; netns_tracker ns_tracker; + struct hlist_node sk_bind2_node; }; enum sk_pacing { @@ -822,6 +819,16 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } +static inline void __sk_del_bind2_node(struct sock *sk) +{ + __hlist_del(&sk->sk_bind2_node); +} + +static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) +{ + hlist_add_head(&sk->sk_bind2_node, list); +} + #define sk_for_each(__sk, list) \ hlist_for_each_entry(__sk, list, sk_node) #define sk_for_each_rcu(__sk, list) \ @@ -839,6 +846,8 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry_safe(__sk, tmp, list, sk_node) #define sk_for_each_bound(__sk, list) \ hlist_for_each_entry(__sk, list, sk_bind_node) +#define sk_for_each_bound_bhash2(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_bind2_node) /** * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset @@ -895,6 +904,7 @@ enum sock_flags { SOCK_TXTIME, SOCK_XDP, /* XDP is attached */ SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ + SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -1202,8 +1212,7 @@ struct proto { int (*sendmsg)(struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, - int *addr_len); + size_t len, int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*bind)(struct sock *sk, @@ -1825,11 +1834,17 @@ int sock_getsockopt(struct socket *sock, int level, int op, char __user *optval, int __user *optlen); int sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32); -struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, - int noblock, int *errcode); struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, unsigned long data_len, int noblock, int *errcode, int max_page_order); + +static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk, + unsigned long size, + int noblock, int *errcode) +{ + return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0); +} + void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); void sock_kfree_s(struct sock *sk, void *mem, int size); void sock_kzfree_s(struct sock *sk, void *mem, int size); @@ -2392,7 +2407,14 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, void (*destructor)(struct sock *sk, struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason); + +static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + return sock_queue_rcv_skb_reason(sk, skb, NULL); +} int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); struct sk_buff *sock_dequeue_err_skb(struct sock *sk); @@ -2643,20 +2665,21 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) __sock_recv_wifi_status(msg, sk, skb); } -void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); +void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); #define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC) -static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb) +static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { -#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ - (1UL << SOCK_RCVTSTAMP)) +#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ + (1UL << SOCK_RCVTSTAMP) | \ + (1UL << SOCK_RCVMARK)) #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) - if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) - __sock_recv_ts_and_drops(msg, sk, skb); + if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY) + __sock_recv_cmsgs(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sock_write_timestamp(sk, skb->tstamp); else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) @@ -2866,13 +2889,14 @@ static inline void sk_pacing_shift_update(struct sock *sk, int val) */ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); int mdif; - if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif) + if (!bound_dev_if || bound_dev_if == dif) return true; mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif); - if (mdif && mdif == sk->sk_bound_dev_if) + if (mdif && mdif == bound_dev_if) return true; return false; diff --git a/include/net/strparser.h b/include/net/strparser.h index 732b7097d78e..a191486eb1e4 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -70,6 +70,10 @@ struct sk_skb_cb { * when dst_reg == src_reg. */ u64 temp_reg; + struct tls_msg { + u8 control; + u8 decrypted; + } tls; }; static inline struct strp_msg *strp_msg(struct sk_buff *skb) diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index eb8f01c819e6..832efd40e023 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -59,4 +59,19 @@ static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK; } +static inline bool is_tcf_gact_continue(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_UNSPEC, false); +} + +static inline bool is_tcf_gact_reclassify(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_RECLASSIFY, false); +} + +static inline bool is_tcf_gact_pipe(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_PIPE, false); +} + #endif /* __NET_TC_GACT_H */ diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 00bfee70609e..dc1079f28e13 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -17,6 +17,7 @@ struct tcf_skbedit_params { u32 mark; u32 mask; u16 queue_mapping; + u16 mapping_mod; u16 ptype; struct rcu_head rcu; }; @@ -94,4 +95,16 @@ static inline u32 tcf_skbedit_priority(const struct tc_action *a) return priority; } +/* Return true iff action is queue_mapping */ +static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING); +} + +/* Return true iff action is inheritdsfield */ +static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_INHERITDSFIELD); +} + #endif /* __NET_TC_SKBEDIT_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index cc1295037533..1e99f5c61f84 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -407,7 +407,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); int tcp_set_rcvlowat(struct sock *sk, int val); int tcp_set_window_clamp(struct sock *sk, int val); @@ -1142,15 +1142,6 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk) return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; } -static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ca_ops->set_state) - icsk->icsk_ca_ops->set_state(sk, ca_state); - icsk->icsk_ca_state = ca_state; -} - static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1159,6 +1150,9 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* From tcp_cong.c */ +void tcp_set_ca_state(struct sock *sk, const u8 ca_state); + /* From tcp_rate.c */ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, @@ -1215,9 +1209,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) #define TCP_INFINITE_SSTHRESH 0x7fffffff +static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) +{ + return tp->snd_cwnd; +} + +static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) +{ + WARN_ON_ONCE((int)val <= 0); + tp->snd_cwnd = val; +} + static inline bool tcp_in_slow_start(const struct tcp_sock *tp) { - return tp->snd_cwnd < tp->snd_ssthresh; + return tcp_snd_cwnd(tp) < tp->snd_ssthresh; } static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) @@ -1243,8 +1248,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, - ((tp->snd_cwnd >> 1) + - (tp->snd_cwnd >> 2))); + ((tcp_snd_cwnd(tp) >> 1) + + (tcp_snd_cwnd(tp) >> 2))); } /* Use define here intentionally to get WARN_ON location shown at the caller */ @@ -1286,7 +1291,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) /* If in slow start, ensure cwnd grows to twice what was ACKed. */ if (tcp_in_slow_start(tp)) - return tp->snd_cwnd < 2 * tp->max_packets_out; + return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out; return tp->is_cwnd_limited; } @@ -1378,18 +1383,6 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); -#ifdef CONFIG_INET -void __sk_defer_free_flush(struct sock *sk); - -static inline void sk_defer_free_flush(struct sock *sk) -{ - if (llist_empty(&sk->defer_list)) - return; - __sk_defer_free_flush(sk); -} -#else -static inline void sk_defer_free_flush(struct sock *sk) {} -#endif int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); diff --git a/include/net/tls.h b/include/net/tls.h index b6968a5b5538..8017f1703447 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -64,6 +64,7 @@ #define TLS_AAD_SPACE_SIZE 13 #define MAX_IV_SIZE 16 +#define TLS_TAG_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 /* For CCM mode, the full 16-bytes of IV is made of '4' fields of given sizes. @@ -117,11 +118,6 @@ struct tls_rec { u8 aead_req_ctx[]; }; -struct tls_msg { - struct strp_msg rxm; - u8 control; -}; - struct tx_work { struct delayed_work work; struct sock *sk; @@ -152,13 +148,10 @@ struct tls_sw_context_rx { void (*saved_data_ready)(struct sock *sk); struct sk_buff *recv_pkt; - u8 control; u8 async_capable:1; - u8 decrypted:1; atomic_t decrypt_pending; /* protect crypto_wait with decrypt_pending*/ spinlock_t decrypt_compl_lock; - bool async_notify; }; struct tls_record_info { @@ -245,6 +238,7 @@ struct tls_context { u8 tx_conf:3; u8 rx_conf:3; + u8 zerocopy_sendfile:1; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); @@ -378,7 +372,7 @@ void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len); + int flags, int *addr_len); bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, @@ -411,7 +405,9 @@ void tls_free_partial_record(struct sock *sk, struct tls_context *ctx); static inline struct tls_msg *tls_msg(struct sk_buff *skb) { - return (struct tls_msg *)strp_msg(skb); + struct sk_skb_cb *scb = (struct sk_skb_cb *)skb->cb; + + return &scb->tls; } static inline bool tls_is_partially_sent_record(struct tls_context *ctx) diff --git a/include/net/udp.h b/include/net/udp.h index f1c2a88c9005..b83a00330566 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -250,14 +250,14 @@ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); -struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *off, int *err); +struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, + int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *err) + int *err) { int off = 0; - return __skb_recv_udp(sk, flags, noblock, &off, err); + return __skb_recv_udp(sk, flags, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d2efddce65d4..c39d910d4b45 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -126,13 +126,17 @@ struct xfrm_state_walk { struct xfrm_address_filter *filter; }; -struct xfrm_state_offload { +enum { + XFRM_DEV_OFFLOAD_IN = 1, + XFRM_DEV_OFFLOAD_OUT, +}; + +struct xfrm_dev_offload { struct net_device *dev; netdevice_tracker dev_tracker; struct net_device *real_dev; unsigned long offload_handle; - unsigned int num_exthdrs; - u8 flags; + u8 dir : 2; }; struct xfrm_mode { @@ -247,7 +251,7 @@ struct xfrm_state { struct xfrm_lifetime_cur curlft; struct hrtimer mtimer; - struct xfrm_state_offload xso; + struct xfrm_dev_offload xso; /* used to fix curlft->add_time when changing date */ long saved_tmo; @@ -1006,7 +1010,7 @@ struct xfrm_offload { #define CRYPTO_FALLBACK 8 #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 -#define XFRM_ESP_NO_TRAILER 64 +/* 64 is free */ #define XFRM_DEV_RESUME 128 #define XFRM_XMIT 256 @@ -1878,7 +1882,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) { - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn) xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); @@ -1904,7 +1908,7 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) static inline void xfrm_dev_state_delete(struct xfrm_state *x) { - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; if (xso->dev) xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); @@ -1912,7 +1916,7 @@ static inline void xfrm_dev_state_delete(struct xfrm_state *x) static inline void xfrm_dev_state_free(struct xfrm_state *x) { - struct xfrm_state_offload *xso = &x->xso; + struct xfrm_dev_offload *xso = &x->xso; struct net_device *dev = xso->dev; if (dev && dev->xfrmdev_ops) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 69d883f7fb41..11ee4eaf84bd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2497,15 +2497,7 @@ struct ib_device_ops { struct ib_flow_attr *flow_attr, struct ib_udata *udata); int (*destroy_flow)(struct ib_flow *flow_id); - struct ib_flow_action *(*create_flow_action_esp)( - struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); int (*destroy_flow_action)(struct ib_flow_action *action); - int (*modify_flow_action_esp)( - struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port, int state); int (*get_vf_config)(struct ib_device *device, int vf, u32 port, diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 9b4e6c78d0f4..5f88385a7748 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -105,6 +105,11 @@ #define REG_RESERVED_ADDR 0xffffffff #define REG_RESERVED(reg) REG(reg, REG_RESERVED_ADDR) +#define for_each_stat(ocelot, stat) \ + for ((stat) = (ocelot)->stats_layout; \ + ((stat)->name[0] != '\0'); \ + (stat)++) + enum ocelot_target { ANA = 1, QS, @@ -538,6 +543,8 @@ struct ocelot_stat_layout { char name[ETH_GSTRING_LEN]; }; +#define OCELOT_STAT_END { .name = "" } + struct ocelot_stats_region { struct list_head node; u32 offset; @@ -647,34 +654,41 @@ struct ocelot_mirror { int to; }; +struct ocelot_port; + struct ocelot_port { struct ocelot *ocelot; struct regmap *target; - bool vlan_aware; + struct net_device *bond; + struct net_device *bridge; + + struct ocelot_port *dsa_8021q_cpu; + /* VLAN that untagged frames are classified to, on ingress */ const struct ocelot_bridge_vlan *pvid_vlan; + phy_interface_t phy_mode; + unsigned int ptp_skbs_in_flight; - u8 ptp_cmd; struct sk_buff_head tx_skbs; + + u16 mrp_ring_id; + + u8 ptp_cmd; u8 ts_id; - phy_interface_t phy_mode; + u8 index; - u8 *xmit_template; + u8 stp_state; + bool vlan_aware; bool is_dsa_8021q_cpu; bool learn_ena; - struct net_device *bond; bool lag_tx_active; - u16 mrp_ring_id; - - struct net_device *bridge; int bridge_num; - u8 stp_state; int speed; }; @@ -855,8 +869,9 @@ void ocelot_deinit(struct ocelot *ocelot); void ocelot_init_port(struct ocelot *ocelot, int port); void ocelot_deinit_port(struct ocelot *ocelot, int port); -void ocelot_port_set_dsa_8021q_cpu(struct ocelot *ocelot, int port); -void ocelot_port_unset_dsa_8021q_cpu(struct ocelot *ocelot, int port); +void ocelot_port_assign_dsa_8021q_cpu(struct ocelot *ocelot, int port, int cpu); +void ocelot_port_unassign_dsa_8021q_cpu(struct ocelot *ocelot, int port); +u32 ocelot_port_assigned_dsa_8021q_cpu_mask(struct ocelot *ocelot, int port); /* DSA callbacks */ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data); @@ -868,9 +883,7 @@ void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs); int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled, struct netlink_ext_ack *extack); void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state); -u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot); u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port); -void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot, bool joining); int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, @@ -991,6 +1004,9 @@ int ocelot_mact_learn_streamdata(struct ocelot *ocelot, int dst_idx, enum macaccess_entry_type type, int sfid, int ssid); +int ocelot_migrate_mdbs(struct ocelot *ocelot, unsigned long from_mask, + unsigned long to_mask); + int ocelot_vcap_policer_add(struct ocelot *ocelot, u32 pol_ix, struct ocelot_policer *pol); int ocelot_vcap_policer_del(struct ocelot *ocelot, u32 pol_ix); diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index de26c992f821..c601a4598b0d 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -11,7 +11,7 @@ /* Cookie definitions for private VCAP filters installed by the driver. * Must be unique per VCAP block. */ -#define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, upstream) ((upstream) << 16 | (port)) #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index f8e28e686c65..563e48617374 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -84,6 +84,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __field(u8, reset_transient) __field(u8, reset_reason) __field(u8, csum_reqd) + __field(u8, infinite_map) ), TP_fast_assign( @@ -102,9 +103,10 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __entry->reset_transient = mpext->reset_transient; __entry->reset_reason = mpext->reset_reason; __entry->csum_reqd = mpext->csum_reqd; + __entry->infinite_map = mpext->infinite_map; ), - TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u", + TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u infinite_map=%u", __entry->data_ack, __entry->data_seq, __entry->subflow_seq, __entry->data_len, __entry->csum, __entry->use_map, @@ -112,7 +114,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, __entry->use_ack, __entry->ack64, __entry->mpc_map, __entry->frozen, __entry->reset_transient, __entry->reset_reason, - __entry->csum_reqd) + __entry->csum_reqd, __entry->infinite_map) ); DEFINE_EVENT(mptcp_dump_mpext, mptcp_sendmsg_frag, diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 1c714336b863..d20bf4aa0204 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -14,215 +14,6 @@ #include <linux/errqueue.h> /* - * Define enums for tracing information. - * - * These should all be kept sorted, making it easier to match the string - * mapping tables further on. - */ -#ifndef __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY -#define __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY - -enum rxrpc_skb_trace { - rxrpc_skb_cleaned, - rxrpc_skb_freed, - rxrpc_skb_got, - rxrpc_skb_lost, - rxrpc_skb_new, - rxrpc_skb_purged, - rxrpc_skb_received, - rxrpc_skb_rotated, - rxrpc_skb_seen, - rxrpc_skb_unshared, - rxrpc_skb_unshared_nomem, -}; - -enum rxrpc_local_trace { - rxrpc_local_got, - rxrpc_local_new, - rxrpc_local_processing, - rxrpc_local_put, - rxrpc_local_queued, -}; - -enum rxrpc_peer_trace { - rxrpc_peer_got, - rxrpc_peer_new, - rxrpc_peer_processing, - rxrpc_peer_put, -}; - -enum rxrpc_conn_trace { - rxrpc_conn_got, - rxrpc_conn_new_client, - rxrpc_conn_new_service, - rxrpc_conn_put_client, - rxrpc_conn_put_service, - rxrpc_conn_queued, - rxrpc_conn_reap_service, - rxrpc_conn_seen, -}; - -enum rxrpc_client_trace { - rxrpc_client_activate_chans, - rxrpc_client_alloc, - rxrpc_client_chan_activate, - rxrpc_client_chan_disconnect, - rxrpc_client_chan_pass, - rxrpc_client_chan_wait_failed, - rxrpc_client_cleanup, - rxrpc_client_discard, - rxrpc_client_duplicate, - rxrpc_client_exposed, - rxrpc_client_replace, - rxrpc_client_to_active, - rxrpc_client_to_idle, -}; - -enum rxrpc_call_trace { - rxrpc_call_connected, - rxrpc_call_error, - rxrpc_call_got, - rxrpc_call_got_kernel, - rxrpc_call_got_timer, - rxrpc_call_got_userid, - rxrpc_call_new_client, - rxrpc_call_new_service, - rxrpc_call_put, - rxrpc_call_put_kernel, - rxrpc_call_put_noqueue, - rxrpc_call_put_notimer, - rxrpc_call_put_timer, - rxrpc_call_put_userid, - rxrpc_call_queued, - rxrpc_call_queued_ref, - rxrpc_call_release, - rxrpc_call_seen, -}; - -enum rxrpc_transmit_trace { - rxrpc_transmit_await_reply, - rxrpc_transmit_end, - rxrpc_transmit_queue, - rxrpc_transmit_queue_last, - rxrpc_transmit_rotate, - rxrpc_transmit_rotate_last, - rxrpc_transmit_wait, -}; - -enum rxrpc_receive_trace { - rxrpc_receive_end, - rxrpc_receive_front, - rxrpc_receive_incoming, - rxrpc_receive_queue, - rxrpc_receive_queue_last, - rxrpc_receive_rotate, -}; - -enum rxrpc_recvmsg_trace { - rxrpc_recvmsg_cont, - rxrpc_recvmsg_data_return, - rxrpc_recvmsg_dequeue, - rxrpc_recvmsg_enter, - rxrpc_recvmsg_full, - rxrpc_recvmsg_hole, - rxrpc_recvmsg_next, - rxrpc_recvmsg_requeue, - rxrpc_recvmsg_return, - rxrpc_recvmsg_terminal, - rxrpc_recvmsg_to_be_accepted, - rxrpc_recvmsg_wait, -}; - -enum rxrpc_rtt_tx_trace { - rxrpc_rtt_tx_cancel, - rxrpc_rtt_tx_data, - rxrpc_rtt_tx_no_slot, - rxrpc_rtt_tx_ping, -}; - -enum rxrpc_rtt_rx_trace { - rxrpc_rtt_rx_cancel, - rxrpc_rtt_rx_lost, - rxrpc_rtt_rx_obsolete, - rxrpc_rtt_rx_ping_response, - rxrpc_rtt_rx_requested_ack, -}; - -enum rxrpc_timer_trace { - rxrpc_timer_begin, - rxrpc_timer_exp_ack, - rxrpc_timer_exp_hard, - rxrpc_timer_exp_idle, - rxrpc_timer_exp_keepalive, - rxrpc_timer_exp_lost_ack, - rxrpc_timer_exp_normal, - rxrpc_timer_exp_ping, - rxrpc_timer_exp_resend, - rxrpc_timer_expired, - rxrpc_timer_init_for_reply, - rxrpc_timer_init_for_send_reply, - rxrpc_timer_restart, - rxrpc_timer_set_for_ack, - rxrpc_timer_set_for_hard, - rxrpc_timer_set_for_idle, - rxrpc_timer_set_for_keepalive, - rxrpc_timer_set_for_lost_ack, - rxrpc_timer_set_for_normal, - rxrpc_timer_set_for_ping, - rxrpc_timer_set_for_resend, - rxrpc_timer_set_for_send, -}; - -enum rxrpc_propose_ack_trace { - rxrpc_propose_ack_client_tx_end, - rxrpc_propose_ack_input_data, - rxrpc_propose_ack_ping_for_check_life, - rxrpc_propose_ack_ping_for_keepalive, - rxrpc_propose_ack_ping_for_lost_ack, - rxrpc_propose_ack_ping_for_lost_reply, - rxrpc_propose_ack_ping_for_params, - rxrpc_propose_ack_processing_op, - rxrpc_propose_ack_respond_to_ack, - rxrpc_propose_ack_respond_to_ping, - rxrpc_propose_ack_retry_tx, - rxrpc_propose_ack_rotate_rx, - rxrpc_propose_ack_terminal_ack, -}; - -enum rxrpc_propose_ack_outcome { - rxrpc_propose_ack_subsume, - rxrpc_propose_ack_update, - rxrpc_propose_ack_use, -}; - -enum rxrpc_congest_change { - rxrpc_cong_begin_retransmission, - rxrpc_cong_cleared_nacks, - rxrpc_cong_new_low_nack, - rxrpc_cong_no_change, - rxrpc_cong_progress, - rxrpc_cong_retransmit_again, - rxrpc_cong_rtt_window_end, - rxrpc_cong_saw_nack, -}; - -enum rxrpc_tx_point { - rxrpc_tx_point_call_abort, - rxrpc_tx_point_call_ack, - rxrpc_tx_point_call_data_frag, - rxrpc_tx_point_call_data_nofrag, - rxrpc_tx_point_call_final_resend, - rxrpc_tx_point_conn_abort, - rxrpc_tx_point_rxkad_challenge, - rxrpc_tx_point_rxkad_response, - rxrpc_tx_point_reject, - rxrpc_tx_point_version_keepalive, - rxrpc_tx_point_version_reply, -}; - -#endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */ - -/* * Declare tracing information enums and their string mappings for display. */ #define rxrpc_skb_traces \ @@ -452,6 +243,36 @@ enum rxrpc_tx_point { E_(rxrpc_tx_point_version_reply, "VerReply") /* + * Generate enums for tracing information. + */ +#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY + +#undef EM +#undef E_ +#define EM(a, b) a, +#define E_(a, b) a + +enum rxrpc_call_trace { rxrpc_call_traces } __mode(byte); +enum rxrpc_client_trace { rxrpc_client_traces } __mode(byte); +enum rxrpc_congest_change { rxrpc_congest_changes } __mode(byte); +enum rxrpc_conn_trace { rxrpc_conn_traces } __mode(byte); +enum rxrpc_local_trace { rxrpc_local_traces } __mode(byte); +enum rxrpc_peer_trace { rxrpc_peer_traces } __mode(byte); +enum rxrpc_propose_ack_outcome { rxrpc_propose_ack_outcomes } __mode(byte); +enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte); +enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte); +enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte); +enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte); +enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte); +enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte); +enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); +enum rxrpc_transmit_trace { rxrpc_transmit_traces } __mode(byte); +enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); + +#endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */ + +/* * Export enum symbols via userspace. */ #undef EM @@ -459,21 +280,21 @@ enum rxrpc_tx_point { #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); -rxrpc_skb_traces; -rxrpc_local_traces; -rxrpc_conn_traces; -rxrpc_client_traces; rxrpc_call_traces; -rxrpc_transmit_traces; +rxrpc_client_traces; +rxrpc_congest_changes; +rxrpc_congest_modes; +rxrpc_conn_traces; +rxrpc_local_traces; +rxrpc_propose_ack_outcomes; +rxrpc_propose_ack_traces; rxrpc_receive_traces; rxrpc_recvmsg_traces; -rxrpc_rtt_tx_traces; rxrpc_rtt_rx_traces; +rxrpc_rtt_tx_traces; +rxrpc_skb_traces; rxrpc_timer_traces; -rxrpc_propose_ack_traces; -rxrpc_propose_ack_outcomes; -rxrpc_congest_modes; -rxrpc_congest_changes; +rxrpc_transmit_traces; rxrpc_tx_points; /* @@ -583,7 +404,7 @@ TRACE_EVENT(rxrpc_client, TP_fast_assign( __entry->conn = conn ? conn->debug_id : 0; __entry->channel = channel; - __entry->usage = conn ? atomic_read(&conn->usage) : -2; + __entry->usage = conn ? refcount_read(&conn->ref) : -2; __entry->op = op; __entry->cid = conn ? conn->proto.cid : 0; ), @@ -1574,6 +1395,8 @@ TRACE_EVENT(rxrpc_rx_discard_ack, __entry->call_ackr_prev) ); +#undef EM +#undef E_ #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index e1670e1e4934..a477bf907498 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -37,6 +37,20 @@ EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ + EM(SKB_DROP_REASON_TCP_OFO_DROP, TCP_OFO_DROP) \ + EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS) \ + EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE, \ + TCP_INVALID_SEQUENCE) \ + EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET) \ + EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN) \ + EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE) \ + EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN) \ + EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK) \ + EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK) \ + EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, \ + TCP_ACK_UNSENT_DATA) \ + EM(SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, \ + TCP_OFO_QUEUE_PRUNE) \ EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ BPF_CGROUP_EGRESS) \ @@ -50,7 +64,7 @@ EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ EM(SKB_DROP_REASON_XDP, XDP) \ EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ - EM(SKB_DROP_REASON_PTYPE_ABSENT, PTYPE_ABSENT) \ + EM(SKB_DROP_REASON_UNHANDLED_PROTO, UNHANDLED_PROTO) \ EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \ EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ @@ -61,6 +75,11 @@ EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ + EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \ + EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \ + EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS) \ + EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES) \ + EM(SKB_DROP_REASON_PKT_TOO_BIG, PKT_TOO_BIG) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 521059d8dc0a..901b440238d5 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -279,7 +279,7 @@ TRACE_EVENT(tcp_probe, __entry->data_len = skb->len - __tcp_hdrlen(th); __entry->snd_nxt = tp->snd_nxt; __entry->snd_una = tp->snd_una; - __entry->snd_cwnd = tp->snd_cwnd; + __entry->snd_cwnd = tcp_snd_cwnd(tp); __entry->snd_wnd = tp->snd_wnd; __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); @@ -371,6 +371,51 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum, TP_ARGS(skb) ); +TRACE_EVENT(tcp_cong_state_set, + + TP_PROTO(struct sock *sk, const u8 ca_state), + + TP_ARGS(sk, ca_state), + + TP_STRUCT__entry( + __field(const void *, skaddr) + __field(__u16, sport) + __field(__u16, dport) + __array(__u8, saddr, 4) + __array(__u8, daddr, 4) + __array(__u8, saddr_v6, 16) + __array(__u8, daddr_v6, 16) + __field(__u8, cong_state) + ), + + TP_fast_assign( + struct inet_sock *inet = inet_sk(sk); + __be32 *p32; + + __entry->skaddr = sk; + + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + + p32 = (__be32 *) __entry->saddr; + *p32 = inet->inet_saddr; + + p32 = (__be32 *) __entry->daddr; + *p32 = inet->inet_daddr; + + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); + + __entry->cong_state = ca_state; + ), + + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u", + __entry->sport, __entry->dport, + __entry->saddr, __entry->daddr, + __entry->saddr_v6, __entry->daddr_v6, + __entry->cong_state) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 467ca2f28760..638230899e98 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -130,6 +130,8 @@ #define SO_TXREHASH 74 +#define SO_RCVMARK 75 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h deleted file mode 100644 index 5135027b93c1..000000000000 --- a/include/uapi/linux/atm_zatm.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* atm_zatm.h - Driver-specific declarations of the ZATM driver (for use by - driver-specific utilities) */ - -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ - - -#ifndef LINUX_ATM_ZATM_H -#define LINUX_ATM_ZATM_H - -/* - * Note: non-kernel programs including this file must also include - * sys/types.h for struct timeval - */ - -#include <linux/atmapi.h> -#include <linux/atmioc.h> - -#define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) - /* get pool statistics */ -#define ZATM_GETPOOLZ _IOW('a',ATMIOC_SARPRV+2,struct atmif_sioc) - /* get statistics and zero */ -#define ZATM_SETPOOL _IOW('a',ATMIOC_SARPRV+3,struct atmif_sioc) - /* set pool parameters */ - -struct zatm_pool_info { - int ref_count; /* free buffer pool usage counters */ - int low_water,high_water; /* refill parameters */ - int rqa_count,rqu_count; /* queue condition counters */ - int offset,next_off; /* alignment optimizations: offset */ - int next_cnt,next_thres; /* repetition counter and threshold */ -}; - -struct zatm_pool_req { - int pool_num; /* pool number */ - struct zatm_pool_info info; /* actual information */ -}; - -#define ZATM_OAM_POOL 0 /* free buffer pool for OAM cells */ -#define ZATM_AAL0_POOL 1 /* free buffer pool for AAL0 cells */ -#define ZATM_AAL5_POOL_BASE 2 /* first AAL5 free buffer pool */ -#define ZATM_LAST_POOL ZATM_AAL5_POOL_BASE+10 /* max. 64 kB */ - -#define ZATM_TIMER_HISTORY_SIZE 16 /* number of timer adjustments to - record; must be 2^n */ - -#endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d14b10b85e51..f4009dbdf62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1013,6 +1013,7 @@ enum bpf_link_type { BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, MAX_BPF_LINK_TYPE, }; @@ -1489,6 +1490,15 @@ union bpf_attr { __aligned_u64 addrs; __aligned_u64 cookies; } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; }; } link_create; @@ -5143,6 +5153,102 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. + * + * void *bpf_kptr_xchg(void *map_value, void *ptr) + * Description + * Exchange kptr at pointer *map_value* with *ptr*, and return the + * old value. *ptr* can be NULL, otherwise it must be a referenced + * pointer which will be released when this helper is called. + * Return + * The old value of kptr (which can be NULL). The returned pointer + * if not NULL, is a reference which must be released using its + * corresponding release function, or moved into a BPF map before + * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5339,6 +5445,16 @@ union bpf_attr { FN(copy_from_user_task), \ FN(skb_set_tstamp), \ FN(ima_file_hash), \ + FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ + FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5592,6 +5708,10 @@ struct bpf_tunnel_key { __u8 tunnel_ttl; __u16 tunnel_ext; /* Padding, future use. */ __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. @@ -6486,6 +6606,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index b0d8fea1951d..a9162a6c0284 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -33,8 +33,8 @@ struct btf_type { /* "info" bits arrangement * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused - * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-30: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused * bit 31: kind_flag, currently used by * struct, union and fwd */ diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 590f8aea2b6d..439c982f7e81 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -124,18 +124,19 @@ struct can_isotp_ll_options { /* flags for isotp behaviour */ -#define CAN_ISOTP_LISTEN_MODE 0x001 /* listen only (do not send FC) */ -#define CAN_ISOTP_EXTEND_ADDR 0x002 /* enable extended addressing */ -#define CAN_ISOTP_TX_PADDING 0x004 /* enable CAN frame padding tx path */ -#define CAN_ISOTP_RX_PADDING 0x008 /* enable CAN frame padding rx path */ -#define CAN_ISOTP_CHK_PAD_LEN 0x010 /* check received CAN frame padding */ -#define CAN_ISOTP_CHK_PAD_DATA 0x020 /* check received CAN frame padding */ -#define CAN_ISOTP_HALF_DUPLEX 0x040 /* half duplex error state handling */ -#define CAN_ISOTP_FORCE_TXSTMIN 0x080 /* ignore stmin from received FC */ -#define CAN_ISOTP_FORCE_RXSTMIN 0x100 /* ignore CFs depending on rx stmin */ -#define CAN_ISOTP_RX_EXT_ADDR 0x200 /* different rx extended addressing */ -#define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ -#define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ +#define CAN_ISOTP_LISTEN_MODE 0x0001 /* listen only (do not send FC) */ +#define CAN_ISOTP_EXTEND_ADDR 0x0002 /* enable extended addressing */ +#define CAN_ISOTP_TX_PADDING 0x0004 /* enable CAN frame padding tx path */ +#define CAN_ISOTP_RX_PADDING 0x0008 /* enable CAN frame padding rx path */ +#define CAN_ISOTP_CHK_PAD_LEN 0x0010 /* check received CAN frame padding */ +#define CAN_ISOTP_CHK_PAD_DATA 0x0020 /* check received CAN frame padding */ +#define CAN_ISOTP_HALF_DUPLEX 0x0040 /* half duplex error state handling */ +#define CAN_ISOTP_FORCE_TXSTMIN 0x0080 /* ignore stmin from received FC */ +#define CAN_ISOTP_FORCE_RXSTMIN 0x0100 /* ignore CFs depending on rx stmin */ +#define CAN_ISOTP_RX_EXT_ADDR 0x0200 /* different rx extended addressing */ +#define CAN_ISOTP_WAIT_TX_DONE 0x0400 /* wait for tx completion */ +#define CAN_ISOTP_SF_BROADCAST 0x0800 /* 1-to-N functional addressing */ +#define CAN_ISOTP_CF_BROADCAST 0x1000 /* 1-to-N transmission w/o FC */ /* protocol machine default values */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b897b80770f6..b3d40a5d72ff 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -131,6 +131,11 @@ enum devlink_command { DEVLINK_CMD_RATE_NEW, DEVLINK_CMD_RATE_DEL, + DEVLINK_CMD_LINECARD_GET, /* can dump */ + DEVLINK_CMD_LINECARD_SET, + DEVLINK_CMD_LINECARD_NEW, + DEVLINK_CMD_LINECARD_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -338,6 +343,19 @@ enum devlink_reload_limit { #define DEVLINK_RELOAD_LIMITS_VALID_MASK (_BITUL(__DEVLINK_RELOAD_LIMIT_MAX) - 1) +enum devlink_linecard_state { + DEVLINK_LINECARD_STATE_UNSPEC, + DEVLINK_LINECARD_STATE_UNPROVISIONED, + DEVLINK_LINECARD_STATE_UNPROVISIONING, + DEVLINK_LINECARD_STATE_PROVISIONING, + DEVLINK_LINECARD_STATE_PROVISIONING_FAILED, + DEVLINK_LINECARD_STATE_PROVISIONED, + DEVLINK_LINECARD_STATE_ACTIVE, + + __DEVLINK_LINECARD_STATE_MAX, + DEVLINK_LINECARD_STATE_MAX = __DEVLINK_LINECARD_STATE_MAX - 1 +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -553,6 +571,11 @@ enum devlink_attr { DEVLINK_ATTR_REGION_MAX_SNAPSHOTS, /* u32 */ + DEVLINK_ATTR_LINECARD_INDEX, /* u32 */ + DEVLINK_ATTR_LINECARD_STATE, /* u8 */ + DEVLINK_ATTR_LINECARD_TYPE, /* string */ + DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 7bc4b8def12c..e0f0ee9bc89e 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1691,6 +1691,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89, ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90, ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91, + ETHTOOL_LINK_MODE_10baseT1L_Full_BIT = 92, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 979850221b8d..d2fb4f7be61b 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -338,6 +338,7 @@ enum { ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cc284c048e69..5f58dcfe2787 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -211,6 +211,9 @@ struct rtnl_link_stats { * @rx_nohandler: Number of packets received on the interface * but dropped by the networking stack because the device is * not designated to receive packets (e.g. backup link in a bond). + * + * @rx_otherhost_dropped: Number of packets dropped due to mismatch + * in destination MAC address. */ struct rtnl_link_stats64 { __u64 rx_packets; @@ -243,6 +246,8 @@ struct rtnl_link_stats64 { __u64 rx_compressed; __u64 tx_compressed; __u64 rx_nohandler; + + __u64 rx_otherhost_dropped; }; /* Subset of link stats useful for in-HW collection. Meaning of the fields is as @@ -363,6 +368,8 @@ enum { IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_BUS_NAME, IFLA_GRO_MAX_SIZE, + IFLA_TSO_MAX_SIZE, + IFLA_TSO_MAX_SEGS, __IFLA_MAX }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index d4178dace0bf..549ddeaf788b 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -194,6 +194,7 @@ enum { DEVCONF_IOAM6_ID, DEVCONF_IOAM6_ID_WIDE, DEVCONF_NDISC_EVICT_NOCARRIER, + DEVCONF_ACCEPT_UNSOLICITED_NA, DEVCONF_MAX }; diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index c54e6eae5366..75b7257a51e1 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -67,6 +67,19 @@ #define MDIO_PCS_10GBRT_STAT2 33 /* 10GBASE-R/-T PCS status 2 */ #define MDIO_AN_10GBT_CTRL 32 /* 10GBASE-T auto-negotiation control */ #define MDIO_AN_10GBT_STAT 33 /* 10GBASE-T auto-negotiation status */ +#define MDIO_B10L_PMA_CTRL 2294 /* 10BASE-T1L PMA control */ +#define MDIO_PMA_10T1L_STAT 2295 /* 10BASE-T1L PMA status */ +#define MDIO_PCS_10T1L_CTRL 2278 /* 10BASE-T1L PCS control */ +#define MDIO_PMA_PMD_BT1 18 /* BASE-T1 PMA/PMD extended ability */ +#define MDIO_AN_T1_CTRL 512 /* BASE-T1 AN control */ +#define MDIO_AN_T1_STAT 513 /* BASE-T1 AN status */ +#define MDIO_AN_T1_ADV_L 514 /* BASE-T1 AN advertisement register [15:0] */ +#define MDIO_AN_T1_ADV_M 515 /* BASE-T1 AN advertisement register [31:16] */ +#define MDIO_AN_T1_ADV_H 516 /* BASE-T1 AN advertisement register [47:32] */ +#define MDIO_AN_T1_LP_L 517 /* BASE-T1 AN LP Base Page ability register [15:0] */ +#define MDIO_AN_T1_LP_M 518 /* BASE-T1 AN LP Base Page ability register [31:16] */ +#define MDIO_AN_T1_LP_H 519 /* BASE-T1 AN LP Base Page ability register [47:32] */ +#define MDIO_PMA_PMD_BT1_CTRL 2100 /* BASE-T1 PMA/PMD control register */ /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */ #define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */ @@ -159,6 +172,7 @@ #define MDIO_PMA_CTRL2_10BT 0x000f /* 10BASE-T type */ #define MDIO_PMA_CTRL2_2_5GBT 0x0030 /* 2.5GBaseT type */ #define MDIO_PMA_CTRL2_5GBT 0x0031 /* 5GBaseT type */ +#define MDIO_PMA_CTRL2_BASET1 0x003D /* BASE-T1 type */ #define MDIO_PCS_CTRL2_TYPE 0x0003 /* PCS type selection */ #define MDIO_PCS_CTRL2_10GBR 0x0000 /* 10GBASE-R type */ #define MDIO_PCS_CTRL2_10GBX 0x0001 /* 10GBASE-X type */ @@ -212,6 +226,7 @@ #define MDIO_PMA_EXTABLE_1000BKX 0x0040 /* 1000BASE-KX ability */ #define MDIO_PMA_EXTABLE_100BTX 0x0080 /* 100BASE-TX ability */ #define MDIO_PMA_EXTABLE_10BT 0x0100 /* 10BASE-T ability */ +#define MDIO_PMA_EXTABLE_BT1 0x0800 /* BASE-T1 ability */ #define MDIO_PMA_EXTABLE_NBT 0x4000 /* 2.5/5GBASE-T ability */ /* PHY XGXS lane state register. */ @@ -268,6 +283,66 @@ #define MDIO_AN_10GBT_STAT_MS 0x4000 /* Master/slave config */ #define MDIO_AN_10GBT_STAT_MSFLT 0x8000 /* Master/slave config fault */ +/* 10BASE-T1L PMA control */ +#define MDIO_PMA_10T1L_CTRL_LB_EN 0x0001 /* Enable loopback mode */ +#define MDIO_PMA_10T1L_CTRL_EEE_EN 0x0400 /* Enable EEE mode */ +#define MDIO_PMA_10T1L_CTRL_LOW_POWER 0x0800 /* Low-power mode */ +#define MDIO_PMA_10T1L_CTRL_2V4_EN 0x1000 /* Enable 2.4 Vpp operating mode */ +#define MDIO_PMA_10T1L_CTRL_TX_DIS 0x4000 /* Transmit disable */ +#define MDIO_PMA_10T1L_CTRL_PMA_RST 0x8000 /* MA reset */ + +/* 10BASE-T1L PMA status register. */ +#define MDIO_PMA_10T1L_STAT_LINK 0x0001 /* PMA receive link up */ +#define MDIO_PMA_10T1L_STAT_FAULT 0x0002 /* Fault condition detected */ +#define MDIO_PMA_10T1L_STAT_POLARITY 0x0004 /* Receive polarity is reversed */ +#define MDIO_PMA_10T1L_STAT_RECV_FAULT 0x0200 /* Able to detect fault on receive path */ +#define MDIO_PMA_10T1L_STAT_EEE 0x0400 /* PHY has EEE ability */ +#define MDIO_PMA_10T1L_STAT_LOW_POWER 0x0800 /* PMA has low-power ability */ +#define MDIO_PMA_10T1L_STAT_2V4_ABLE 0x1000 /* PHY has 2.4 Vpp operating mode ability */ +#define MDIO_PMA_10T1L_STAT_LB_ABLE 0x2000 /* PHY has loopback ability */ + +/* 10BASE-T1L PCS control register. */ +#define MDIO_PCS_10T1L_CTRL_LB 0x4000 /* Enable PCS level loopback mode */ +#define MDIO_PCS_10T1L_CTRL_RESET 0x8000 /* PCS reset */ + +/* BASE-T1 PMA/PMD extended ability register. */ +#define MDIO_PMA_PMD_BT1_B10L_ABLE 0x0004 /* 10BASE-T1L Ability */ + +/* BASE-T1 auto-negotiation advertisement register [15:0] */ +#define MDIO_AN_T1_ADV_L_PAUSE_CAP ADVERTISE_PAUSE_CAP +#define MDIO_AN_T1_ADV_L_PAUSE_ASYM ADVERTISE_PAUSE_ASYM +#define MDIO_AN_T1_ADV_L_FORCE_MS 0x1000 /* Force Master/slave Configuration */ +#define MDIO_AN_T1_ADV_L_REMOTE_FAULT ADVERTISE_RFAULT +#define MDIO_AN_T1_ADV_L_ACK ADVERTISE_LPACK +#define MDIO_AN_T1_ADV_L_NEXT_PAGE_REQ ADVERTISE_NPAGE + +/* BASE-T1 auto-negotiation advertisement register [31:16] */ +#define MDIO_AN_T1_ADV_M_B10L 0x4000 /* device is compatible with 10BASE-T1L */ +#define MDIO_AN_T1_ADV_M_MST 0x0010 /* advertise master preference */ + +/* BASE-T1 auto-negotiation advertisement register [47:32] */ +#define MDIO_AN_T1_ADV_H_10L_TX_HI_REQ 0x1000 /* 10BASE-T1L High Level Transmit Request */ +#define MDIO_AN_T1_ADV_H_10L_TX_HI 0x2000 /* 10BASE-T1L High Level Transmit Ability */ + +/* BASE-T1 AN LP Base Page ability register [15:0] */ +#define MDIO_AN_T1_LP_L_PAUSE_CAP LPA_PAUSE_CAP +#define MDIO_AN_T1_LP_L_PAUSE_ASYM LPA_PAUSE_ASYM +#define MDIO_AN_T1_LP_L_FORCE_MS 0x1000 /* LP Force Master/slave Configuration */ +#define MDIO_AN_T1_LP_L_REMOTE_FAULT LPA_RFAULT +#define MDIO_AN_T1_LP_L_ACK LPA_LPACK +#define MDIO_AN_T1_LP_L_NEXT_PAGE_REQ LPA_NPAGE + +/* BASE-T1 AN LP Base Page ability register [31:16] */ +#define MDIO_AN_T1_LP_M_MST 0x0010 /* LP master preference */ +#define MDIO_AN_T1_LP_M_B10L 0x4000 /* LP is compatible with 10BASE-T1L */ + +/* BASE-T1 AN LP Base Page ability register [47:32] */ +#define MDIO_AN_T1_LP_H_10L_TX_HI_REQ 0x1000 /* 10BASE-T1L High Level LP Transmit Request */ +#define MDIO_AN_T1_LP_H_10L_TX_HI 0x2000 /* 10BASE-T1L High Level LP Transmit Ability */ + +/* BASE-T1 PMA/PMD control register */ +#define MDIO_PMA_PMD_BT1_CTRL_CFG_MST 0x4000 /* MASTER-SLAVE config value */ + /* EEE Supported/Advertisement/LP Advertisement registers. * * EEE capability Register (3.20), Advertisement (7.60) and diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 9690efedb5fa..921963589904 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -55,6 +55,9 @@ enum { MPTCP_PM_ATTR_ADDR, /* nested address */ MPTCP_PM_ATTR_RCV_ADD_ADDRS, /* u32 */ MPTCP_PM_ATTR_SUBFLOWS, /* u32 */ + MPTCP_PM_ATTR_TOKEN, /* u32 */ + MPTCP_PM_ATTR_LOC_ID, /* u8 */ + MPTCP_PM_ATTR_ADDR_REMOTE, /* nested address */ __MPTCP_PM_ATTR_MAX }; @@ -93,6 +96,10 @@ enum { MPTCP_PM_CMD_SET_LIMITS, MPTCP_PM_CMD_GET_LIMITS, MPTCP_PM_CMD_SET_FLAGS, + MPTCP_PM_CMD_ANNOUNCE, + MPTCP_PM_CMD_REMOVE, + MPTCP_PM_CMD_SUBFLOW_CREATE, + MPTCP_PM_CMD_SUBFLOW_DESTROY, __MPTCP_PM_CMD_AFTER_LAST }; @@ -188,6 +195,7 @@ enum mptcp_event_attr { MPTCP_ATTR_IF_IDX, /* s32 */ MPTCP_ATTR_RESET_REASON,/* u32 */ MPTCP_ATTR_RESET_FLAGS, /* u32 */ + MPTCP_ATTR_SERVER_SIDE, /* u8 */ __MPTCP_ATTR_AFTER_LAST }; diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index db05fb55055e..39c565e460c7 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -32,6 +32,8 @@ enum { NDA_NH_ID, NDA_FDB_EXT_ATTRS, NDA_FLAGS_EXT, + NDA_NDM_STATE_MASK, + NDA_NDM_FLAGS_MASK, __NDA_MAX }; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 4c0cde075c27..855dffb4c1c3 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -72,6 +72,7 @@ struct nlmsghdr { /* Modifiers to DELETE request */ #define NLM_F_NONREC 0x100 /* Do not delete recursively */ +#define NLM_F_BULK 0x200 /* Delete multiple objects */ /* Flags for ACK message */ #define NLM_F_CAPPED 0x100 /* request was capped */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0568a79097b8..d9490e3062a7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3175,6 +3175,8 @@ enum nl80211_attrs { NL80211_ATTR_EHT_CAPABILITY, + NL80211_ATTR_DISABLE_EHT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 404f97fb239c..9a2ee1e39fad 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -587,6 +587,8 @@ enum { TCA_FLOWER_KEY_HASH, /* u32 */ TCA_FLOWER_KEY_HASH_MASK, /* u32 */ + TCA_FLOWER_KEY_NUM_OF_VLANS, /* u8 */ + __TCA_FLOWER_MAX, }; diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index 800e93377218..6cb6101208d0 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -29,6 +29,7 @@ #define SKBEDIT_F_PTYPE 0x8 #define SKBEDIT_F_MASK 0x10 #define SKBEDIT_F_INHERITDSFIELD 0x20 +#define SKBEDIT_F_TXQ_SKBHASH 0x40 struct tc_skbedit { tc_gen; @@ -45,6 +46,7 @@ enum { TCA_SKBEDIT_PTYPE, TCA_SKBEDIT_MASK, TCA_SKBEDIT_FLAGS, + TCA_SKBEDIT_QUEUE_MAPPING_MAX, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 4dfc05651c98..c00adf2fe868 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -43,10 +43,6 @@ #include <linux/tipc.h> #include <asm/byteorder.h> -#ifndef __KERNEL__ -#include <arpa/inet.h> /* for ntohs etc. */ -#endif - /* * Configuration * @@ -269,33 +265,33 @@ static inline int TLV_OK(const void *tlv, __u16 space) */ return (space >= TLV_SPACE(0)) && - (ntohs(((struct tlv_desc *)tlv)->tlv_len) <= space); + (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_len) <= space); } static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type) { return TLV_OK(tlv, space) && - (ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type); + (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_type) == exp_type); } static inline int TLV_GET_LEN(struct tlv_desc *tlv) { - return ntohs(tlv->tlv_len); + return __be16_to_cpu(tlv->tlv_len); } static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len) { - tlv->tlv_len = htons(len); + tlv->tlv_len = __cpu_to_be16(len); } static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv, __u16 type) { - return (ntohs(tlv->tlv_type) == type); + return (__be16_to_cpu(tlv->tlv_type) == type); } static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type) { - tlv->tlv_type = htons(type); + tlv->tlv_type = __cpu_to_be16(type); } static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) @@ -305,8 +301,8 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) tlv_len = TLV_LENGTH(len); tlv_ptr = (struct tlv_desc *)tlv; - tlv_ptr->tlv_type = htons(type); - tlv_ptr->tlv_len = htons(tlv_len); + tlv_ptr->tlv_type = __cpu_to_be16(type); + tlv_ptr->tlv_len = __cpu_to_be16(tlv_len); if (len && data) { memcpy(TLV_DATA(tlv_ptr), data, len); memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); @@ -348,7 +344,7 @@ static inline void *TLV_LIST_DATA(struct tlv_list_desc *list) static inline void TLV_LIST_STEP(struct tlv_list_desc *list) { - __u16 tlv_space = TLV_ALIGN(ntohs(list->tlv_ptr->tlv_len)); + __u16 tlv_space = TLV_ALIGN(__be16_to_cpu(list->tlv_ptr->tlv_len)); list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space); list->tlv_space -= tlv_space; @@ -404,9 +400,9 @@ static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags, msg_len = TCM_LENGTH(data_len); tcm_hdr = (struct tipc_cfg_msg_hdr *)msg; - tcm_hdr->tcm_len = htonl(msg_len); - tcm_hdr->tcm_type = htons(cmd); - tcm_hdr->tcm_flags = htons(flags); + tcm_hdr->tcm_len = __cpu_to_be32(msg_len); + tcm_hdr->tcm_type = __cpu_to_be16(cmd); + tcm_hdr->tcm_flags = __cpu_to_be16(flags); if (data_len && data) { memcpy(TCM_DATA(msg), data, data_len); memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 5f38be0ec0f3..ac39328eabe7 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -39,6 +39,7 @@ /* TLS socket options */ #define TLS_TX 1 /* Set transmit parameters */ #define TLS_RX 2 /* Set receive parameters */ +#define TLS_TX_ZEROCOPY_SENDFILE 3 /* transmit zerocopy sendfile */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -160,6 +161,7 @@ enum { TLS_INFO_CIPHER, TLS_INFO_TXCONF, TLS_INFO_RXCONF, + TLS_INFO_ZC_SENDFILE, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) |