diff options
Diffstat (limited to 'include')
94 files changed, 3483 insertions, 700 deletions
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 2ce27e8e4f19..d91af50ac58d 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -136,7 +136,8 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_CHANNELS = 31, VIRTCHNL_OP_ADD_CLOUD_FILTER = 32, VIRTCHNL_OP_DEL_CLOUD_FILTER = 33, - /* opcode 34 - 44 are reserved */ + /* opcode 34 - 43 are reserved */ + VIRTCHNL_OP_GET_SUPPORTED_RXDIDS = 44, VIRTCHNL_OP_ADD_RSS_CFG = 45, VIRTCHNL_OP_DEL_RSS_CFG = 46, VIRTCHNL_OP_ADD_FDIR_FILTER = 47, @@ -263,6 +264,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM BIT(22) #define VIRTCHNL_VF_OFFLOAD_ADQ BIT(23) #define VIRTCHNL_VF_OFFLOAD_USO BIT(25) +#define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC BIT(26) #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) @@ -318,7 +320,9 @@ struct virtchnl_rxq_info { u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; - u32 pad1; + u8 pad0; + u8 rxdid; + u8 pad1[2]; u64 dma_ring_addr; enum virtchnl_rx_hsplit rx_split_pos; /* deprecated with AVF 1.0 */ u32 pad2; @@ -970,6 +974,10 @@ struct virtchnl_filter { VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); +struct virtchnl_supported_rxdids { + u64 supported_rxdids; +}; + /* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other @@ -1499,6 +1507,8 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DEL_CLOUD_FILTER: valid_len = sizeof(struct virtchnl_filter); break; + case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS: + break; case VIRTCHNL_OP_ADD_RSS_CFG: case VIRTCHNL_OP_DEL_RSS_CFG: valid_len = sizeof(struct virtchnl_rss_cfg); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c1bd1bd10506..c577e4c7c84b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -165,35 +165,43 @@ struct bpf_map_ops { }; enum { - /* Support at most 8 pointers in a BPF map value */ - BPF_MAP_VALUE_OFF_MAX = 8, - BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX + - 1 + /* for bpf_spin_lock */ - 1, /* for bpf_timer */ + /* Support at most 8 pointers in a BTF type */ + BTF_FIELDS_MAX = 10, + BPF_MAP_OFF_ARR_MAX = BTF_FIELDS_MAX, }; -enum bpf_kptr_type { - BPF_KPTR_UNREF, - BPF_KPTR_REF, +enum btf_field_type { + BPF_SPIN_LOCK = (1 << 0), + BPF_TIMER = (1 << 1), + BPF_KPTR_UNREF = (1 << 2), + BPF_KPTR_REF = (1 << 3), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, }; -struct bpf_map_value_off_desc { +struct btf_field_kptr { + struct btf *btf; + struct module *module; + btf_dtor_kfunc_t dtor; + u32 btf_id; +}; + +struct btf_field { u32 offset; - enum bpf_kptr_type type; - struct { - struct btf *btf; - struct module *module; - btf_dtor_kfunc_t dtor; - u32 btf_id; - } kptr; + enum btf_field_type type; + union { + struct btf_field_kptr kptr; + }; }; -struct bpf_map_value_off { - u32 nr_off; - struct bpf_map_value_off_desc off[]; +struct btf_record { + u32 cnt; + u32 field_mask; + int spin_lock_off; + int timer_off; + struct btf_field fields[]; }; -struct bpf_map_off_arr { +struct btf_field_offs { u32 cnt; u32 field_off[BPF_MAP_OFF_ARR_MAX]; u8 field_sz[BPF_MAP_OFF_ARR_MAX]; @@ -214,10 +222,8 @@ struct bpf_map { u32 max_entries; u64 map_extra; /* any per-map-type extra fields */ u32 map_flags; - int spin_lock_off; /* >=0 valid offset, <0 error */ - struct bpf_map_value_off *kptr_off_tab; - int timer_off; /* >=0 valid offset, <0 error */ u32 id; + struct btf_record *record; int numa_node; u32 btf_key_type_id; u32 btf_value_type_id; @@ -227,7 +233,7 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - struct bpf_map_off_arr *off_arr; + struct btf_field_offs *field_offs; /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ @@ -251,33 +257,70 @@ struct bpf_map { bool frozen; /* write-once; write-protected by freeze_mutex */ }; -static inline bool map_value_has_spin_lock(const struct bpf_map *map) +static inline const char *btf_field_type_name(enum btf_field_type type) +{ + switch (type) { + case BPF_SPIN_LOCK: + return "bpf_spin_lock"; + case BPF_TIMER: + return "bpf_timer"; + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + return "kptr"; + default: + WARN_ON_ONCE(1); + return "unknown"; + } +} + +static inline u32 btf_field_type_size(enum btf_field_type type) { - return map->spin_lock_off >= 0; + switch (type) { + case BPF_SPIN_LOCK: + return sizeof(struct bpf_spin_lock); + case BPF_TIMER: + return sizeof(struct bpf_timer); + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + return sizeof(u64); + default: + WARN_ON_ONCE(1); + return 0; + } } -static inline bool map_value_has_timer(const struct bpf_map *map) +static inline u32 btf_field_type_align(enum btf_field_type type) { - return map->timer_off >= 0; + switch (type) { + case BPF_SPIN_LOCK: + return __alignof__(struct bpf_spin_lock); + case BPF_TIMER: + return __alignof__(struct bpf_timer); + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + return __alignof__(u64); + default: + WARN_ON_ONCE(1); + return 0; + } } -static inline bool map_value_has_kptrs(const struct bpf_map *map) +static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type) { - return !IS_ERR_OR_NULL(map->kptr_off_tab); + if (IS_ERR_OR_NULL(rec)) + return false; + return rec->field_mask & type; } static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { - if (unlikely(map_value_has_spin_lock(map))) - memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); - if (unlikely(map_value_has_timer(map))) - memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); - if (unlikely(map_value_has_kptrs(map))) { - struct bpf_map_value_off *tab = map->kptr_off_tab; + if (!IS_ERR_OR_NULL(map->record)) { + struct btf_field *fields = map->record->fields; + u32 cnt = map->record->cnt; int i; - for (i = 0; i < tab->nr_off; i++) - *(u64 *)(dst + tab->off[i].offset) = 0; + for (i = 0; i < cnt; i++) + memset(dst + fields[i].offset, 0, btf_field_type_size(fields[i].type)); } } @@ -298,55 +341,64 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ -static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, bool long_memcpy) +static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, + void *dst, void *src, u32 size, + bool long_memcpy) { u32 curr_off = 0; int i; - if (likely(!map->off_arr)) { + if (likely(!foffs)) { if (long_memcpy) - bpf_long_memcpy(dst, src, round_up(map->value_size, 8)); + bpf_long_memcpy(dst, src, round_up(size, 8)); else - memcpy(dst, src, map->value_size); + memcpy(dst, src, size); return; } - for (i = 0; i < map->off_arr->cnt; i++) { - u32 next_off = map->off_arr->field_off[i]; + for (i = 0; i < foffs->cnt; i++) { + u32 next_off = foffs->field_off[i]; + u32 sz = next_off - curr_off; - memcpy(dst + curr_off, src + curr_off, next_off - curr_off); - curr_off = next_off + map->off_arr->field_sz[i]; + memcpy(dst + curr_off, src + curr_off, sz); + curr_off = next_off + foffs->field_sz[i]; } - memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); + memcpy(dst + curr_off, src + curr_off, size - curr_off); } static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { - __copy_map_value(map, dst, src, false); + bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false); } static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) { - __copy_map_value(map, dst, src, true); + bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true); } -static inline void zero_map_value(struct bpf_map *map, void *dst) +static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size) { u32 curr_off = 0; int i; - if (likely(!map->off_arr)) { - memset(dst, 0, map->value_size); + if (likely(!foffs)) { + memset(dst, 0, size); return; } - for (i = 0; i < map->off_arr->cnt; i++) { - u32 next_off = map->off_arr->field_off[i]; + for (i = 0; i < foffs->cnt; i++) { + u32 next_off = foffs->field_off[i]; + u32 sz = next_off - curr_off; - memset(dst + curr_off, 0, next_off - curr_off); - curr_off = next_off + map->off_arr->field_sz[i]; + memset(dst + curr_off, 0, sz); + curr_off = next_off + foffs->field_sz[i]; } - memset(dst + curr_off, 0, map->value_size - curr_off); + memset(dst + curr_off, 0, size - curr_off); +} + +static inline void zero_map_value(struct bpf_map *map, void *dst) +{ + bpf_obj_memzero(map->field_offs, dst, map->value_size); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, @@ -855,22 +907,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *orig_call); -/* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx); -u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx); -u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); +typedef u64 (*bpf_trampoline_enter_t)(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); +bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog); +bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog); struct bpf_ksym { unsigned long start; @@ -1721,11 +1769,14 @@ void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); -struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset); -void bpf_map_free_kptr_off_tab(struct bpf_map *map); -struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map); -bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b); -void bpf_map_free_kptrs(struct bpf_map *map, void *map_value); +struct btf_field *btf_record_find(const struct btf_record *rec, + u32 offset, enum btf_field_type type); +void btf_record_free(struct btf_record *rec); +void bpf_map_free_record(struct bpf_map *map); +struct btf_record *btf_record_dup(const struct btf_record *rec); +bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); +void bpf_obj_free_timer(const struct btf_record *rec, void *obj); +void bpf_obj_free_fields(const struct btf_record *rec, void *obj); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); @@ -2075,6 +2126,7 @@ struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); void bpf_task_storage_free(struct task_struct *task); +void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, @@ -2329,6 +2381,10 @@ static inline bool has_current_bpf_ctx(void) static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) { } + +static inline void bpf_cgrp_storage_free(struct cgroup *cgroup) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -2553,7 +2609,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; +extern const struct bpf_func_proto bpf_task_storage_get_recur_proto; extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; @@ -2567,6 +2625,8 @@ extern const struct bpf_func_proto bpf_copy_from_user_task_proto; extern const struct bpf_func_proto bpf_set_retval_proto; extern const struct bpf_func_proto bpf_get_retval_proto; extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto; +extern const struct bpf_func_proto bpf_cgrp_storage_get_proto; +extern const struct bpf_func_proto bpf_cgrp_storage_delete_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 7ea18d4da84b..6d37a40cd90e 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -116,21 +116,22 @@ static struct bpf_local_storage_cache name = { \ .idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock), \ } -u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache); -void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, - u16 idx); - /* Helper functions for bpf_local_storage */ int bpf_local_storage_map_alloc_check(union bpf_attr *attr); -struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr); +struct bpf_map * +bpf_local_storage_map_alloc(union bpf_attr *attr, + struct bpf_local_storage_cache *cache); struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit); -void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, +bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage); + +void bpf_local_storage_map_free(struct bpf_map *map, + struct bpf_local_storage_cache *cache, int __percpu *busy_counter); int bpf_local_storage_map_check_btf(const struct bpf_map *map, @@ -141,10 +142,6 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem); -bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, - struct bpf_local_storage_elem *selem, - bool uncharge_omem, bool use_trace_rcu); - void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); void bpf_selem_link_map(struct bpf_local_storage_map *smap, diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2c6a4f2562a7..d4ee3ccd3753 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -86,6 +86,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) #ifdef CONFIG_CGROUPS BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_CGRP_STORAGE, cgrp_storage_map_ops) #endif #ifdef CONFIG_CGROUP_BPF BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9e1e6965f407..1a32baa78ce2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -642,10 +642,23 @@ static inline u32 type_flag(u32 type) } /* only use after check_attach_btf_id() */ -static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) +static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { return prog->type == BPF_PROG_TYPE_EXT ? prog->aux->dst_prog->type : prog->type; } +static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) +{ + switch (resolve_prog_type(prog)) { + case BPF_PROG_TYPE_TRACING: + return prog->expected_attach_type != BPF_TRACE_ITER; + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_LSM: + return false; + default: + return true; + } +} + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/btf.h b/include/linux/btf.h index f9aababc5d78..d80345fa566b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -163,8 +163,9 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); int btf_find_timer(const struct btf *btf, const struct btf_type *t); -struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, - const struct btf_type *t); +struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, + u32 field_mask, u32 value_size); +struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, @@ -288,6 +289,11 @@ static inline bool btf_type_is_typedef(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF; } +static inline bool btf_type_is_volatile(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_VOLATILE; +} + static inline bool btf_type_is_func(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC; diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 2aea877d644f..c9744efd202f 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -265,5 +265,6 @@ MAX_BTF_TRACING_TYPE, }; extern u32 btf_tracing_ids[]; +extern u32 bpf_cgroup_btf_id[]; #endif diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6e01f10f0d88..8a0d5466c7be 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -507,6 +507,10 @@ struct cgroup { /* Used to store internal freezer state */ struct cgroup_freezer_state freezer; +#ifdef CONFIG_BPF_SYSCALL + struct bpf_local_storage __rcu *bpf_cgrp_storage; +#endif + /* All ancestors including self */ struct cgroup *ancestors[]; }; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f61447913db9..c8bc85a87b1e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -69,6 +69,7 @@ enum cpuhp_state { CPUHP_X86_APB_DEAD, CPUHP_X86_MCE_DEAD, CPUHP_VIRT_NET_DEAD, + CPUHP_IBMVNIC_DEAD, CPUHP_SLUB_DEAD, CPUHP_DEBUG_OBJ_DEAD, CPUHP_MM_WRITEBACK_DEAD, diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 99dc7bfbcd3c..5c51c7fda32a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -125,6 +125,20 @@ struct ethtool_link_ext_state_info { }; }; +struct ethtool_link_ext_stats { + /* Custom Linux statistic for PHY level link down events. + * In a simpler world it should be equal to netdev->carrier_down_count + * unfortunately netdev also counts local reconfigurations which don't + * actually take the physical link down, not to mention NC-SI which, + * if present, keeps the link up regardless of host state. + * This statistic counts when PHY _actually_ went down, or lost link. + * + * Note that we need u64 for ethtool_stats_init() and comparisons + * to ETHTOOL_STAT_NOT_SET, but only u32 is exposed to the user. + */ + u64 link_down_events; +}; + /** * ethtool_rxfh_indir_default - get default value for RX flow hash indirection * @index: Index in RX flow hash indirection table @@ -481,6 +495,7 @@ struct ethtool_module_power_mode_params { * do not attach ext_substate attribute to netlink message). If link_ext_state * and link_ext_substate are unknown, return -ENODATA. If not implemented, * link_ext_state and link_ext_substate will not be sent to userspace. + * @get_link_ext_stats: Read extra link-related counters. * @get_eeprom_len: Read range of EEPROM addresses for validation of * @get_eeprom and @set_eeprom requests. * Returns 0 if device does not support EEPROM access. @@ -652,6 +667,8 @@ struct ethtool_ops { u32 (*get_link)(struct net_device *); int (*get_link_ext_state)(struct net_device *, struct ethtool_link_ext_state_info *); + void (*get_link_ext_stats)(struct net_device *dev, + struct ethtool_link_ext_stats *stats); int (*get_eeprom_len)(struct net_device *); int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 79690938d9a2..6252f02f38b7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4573,18 +4573,17 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: - common += sizeof(struct ieee80211_mle_basic_common_info); - break; case IEEE80211_ML_CONTROL_TYPE_PREQ: - common += sizeof(struct ieee80211_mle_preq_common_info); + case IEEE80211_ML_CONTROL_TYPE_TDLS: + /* + * The length is the first octet pointed by mle->variable so no + * need to add anything + */ break; case IEEE80211_ML_CONTROL_TYPE_RECONF: if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) common += ETH_ALEN; return common; - case IEEE80211_ML_CONTROL_TYPE_TDLS: - common += sizeof(struct ieee80211_mle_tdls_common_info); - break; case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR) common += ETH_ALEN; @@ -4594,7 +4593,7 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) return 0; } - return common + mle->variable[0]; + return sizeof(*mle) + common + mle->variable[0]; } /** @@ -4602,7 +4601,7 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) * @data: pointer to the element data * @len: length of the containing element */ -static inline bool ieee80211_mle_size_ok(const u8 *data, u8 len) +static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u8 fixed = sizeof(*mle); @@ -4667,6 +4666,7 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, u8 len) enum ieee80211_mle_subelems { IEEE80211_MLE_SUBELEM_PER_STA_PROFILE = 0, + IEEE80211_MLE_SUBELEM_FRAGMENT = 254, }; #define IEEE80211_MLE_STA_CONTROL_LINK_ID 0x000f @@ -4685,6 +4685,46 @@ struct ieee80211_mle_per_sta_profile { u8 variable[]; } __packed; +/** + * ieee80211_mle_sta_prof_size_ok - validate multi-link element sta profile size + * @data: pointer to the sub element data + * @len: length of the containing sub element + */ +static inline bool ieee80211_mle_sta_prof_size_ok(const u8 *data, size_t len) +{ + const struct ieee80211_mle_per_sta_profile *prof = (const void *)data; + u16 control; + u8 fixed = sizeof(*prof); + u8 info_len = 1; + + if (len < fixed) + return false; + + control = le16_to_cpu(prof->control); + + if (control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT) + info_len += 6; + if (control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT) + info_len += 2; + if (control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT) + info_len += 8; + if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT) + info_len += 2; + if (control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT) + info_len += 1; + + if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE && + control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) { + if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) + info_len += 2; + else + info_len += 1; + } + + return prof->sta_info_len >= info_len && + fixed + prof->sta_info_len <= len; +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index f1f9412b6ac6..0303eb84d596 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -276,6 +276,30 @@ enum { IEEE802154_SYSTEM_ERROR = 0xff, }; +/** + * enum ieee802154_filtering_level - Filtering levels applicable to a PHY + * + * @IEEE802154_FILTERING_NONE: No filtering at all, what is received is + * forwarded to the softMAC + * @IEEE802154_FILTERING_1_FCS: First filtering level, frames with an invalid + * FCS should be dropped + * @IEEE802154_FILTERING_2_PROMISCUOUS: Second filtering level, promiscuous + * mode as described in the spec, identical in terms of filtering to the + * level one on PHY side, but at the MAC level the frame should be + * forwarded to the upper layer directly + * @IEEE802154_FILTERING_3_SCAN: Third filtering level, scan related, where + * only beacons must be processed, all remaining traffic gets dropped + * @IEEE802154_FILTERING_4_FRAME_FIELDS: Fourth filtering level actually + * enforcing the validity of the content of the frame with various checks + */ +enum ieee802154_filtering_level { + IEEE802154_FILTERING_NONE, + IEEE802154_FILTERING_1_FCS, + IEEE802154_FILTERING_2_PROMISCUOUS, + IEEE802154_FILTERING_3_SCAN, + IEEE802154_FILTERING_4_FRAME_FIELDS, +}; + /* frame control handling */ #define IEEE802154_FCTL_FTYPE 0x0003 #define IEEE802154_FCTL_ACKREQ 0x0020 diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index d62ef428e3aa..1668ac4d7adc 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -59,6 +59,7 @@ struct br_ip_list { #define BR_MRP_LOST_IN_CONT BIT(19) #define BR_TX_FWD_OFFLOAD BIT(20) #define BR_PORT_LOCKED BIT(21) +#define BR_PORT_MAB BIT(22) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index e00c4ee81ff7..6864b89ef868 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -76,7 +76,7 @@ static inline bool is_vlan_dev(const struct net_device *dev) return dev->priv_flags & IFF_802_1Q_VLAN; } -#define skb_vlan_tag_present(__skb) ((__skb)->vlan_present) +#define skb_vlan_tag_present(__skb) (!!(__skb)->vlan_all) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) #define skb_vlan_tag_get_cfi(__skb) (!!((__skb)->vlan_tci & VLAN_CFI_MASK)) @@ -471,7 +471,7 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, */ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) { - skb->vlan_present = 0; + skb->vlan_all = 0; } /** @@ -483,9 +483,7 @@ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) */ static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src) { - dst->vlan_present = src->vlan_present; - dst->vlan_proto = src->vlan_proto; - dst->vlan_tci = src->vlan_tci; + dst->vlan_all = src->vlan_all; } /* @@ -519,7 +517,6 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, { skb->vlan_proto = vlan_proto; skb->vlan_tci = vlan_tci; - skb->vlan_present = 1; } /** diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 00177567cfef..f7fbbf3069e7 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -488,6 +488,19 @@ static inline int mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad, return mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), val); } +static inline int mdiodev_c45_read(struct mdio_device *mdiodev, int devad, + u16 regnum) +{ + return mdiobus_c45_read(mdiodev->bus, mdiodev->addr, devad, regnum); +} + +static inline int mdiodev_c45_write(struct mdio_device *mdiodev, u32 devad, + u16 regnum, u16 val) +{ + return mdiobus_c45_write(mdiodev->bus, mdiodev->addr, devad, regnum, + val); +} + int mdiobus_register_device(struct mdio_device *mdiodev); int mdiobus_unregister_device(struct mdio_device *mdiodev); bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1ff91cb79ded..eb3fac30488b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -882,6 +882,12 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe) return cqe->op_own >> 4; } +static inline u8 get_cqe_enhanced_num_mini_cqes(struct mlx5_cqe64 *cqe) +{ + /* num_of_mini_cqes is zero based */ + return get_cqe_opcode(cqe) + 1; +} + static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro.tcppsh_abort_dupack >> 6) & 1; diff --git a/include/linux/module.h b/include/linux/module.h index ec61fb53979a..35876e89eb93 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -879,8 +879,17 @@ static inline bool module_sig_ok(struct module *module) } #endif /* CONFIG_MODULE_SIG */ +#if defined(CONFIG_MODULES) && defined(CONFIG_KALLSYMS) int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); +#else +static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, unsigned long), + void *data) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_MODULES && CONFIG_KALLSYMS */ #endif /* _LINUX_MODULE_H */ diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 3682ae75c7aa..145169be2ed8 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -8,6 +8,7 @@ #include <linux/mbus.h> #include <linux/if_ether.h> +#include <linux/phy.h> #define MV643XX_ETH_SHARED_NAME "mv643xx_eth" #define MV643XX_ETH_NAME "mv643xx_eth_port" @@ -59,6 +60,7 @@ struct mv643xx_eth_platform_data { */ int speed; int duplex; + phy_interface_t interface; /* * How many RX/TX queues to use. diff --git a/include/linux/net.h b/include/linux/net.h index 18d942bbdf6e..b73ad8e3c212 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -42,6 +42,7 @@ struct net; #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 #define SOCK_SUPPORT_ZC 5 +#define SOCK_CUSTOM_SOCKOPT 6 #ifndef ARCH_HAS_SOCKET_TYPES /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eddf8ee270e7..23b3903b0678 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -171,31 +171,38 @@ static inline bool dev_xmit_complete(int rc) * (unsigned long) so they can be read and written atomically. */ +#define NET_DEV_STAT(FIELD) \ + union { \ + unsigned long FIELD; \ + atomic_long_t __##FIELD; \ + } + struct net_device_stats { - unsigned long rx_packets; - unsigned long tx_packets; - unsigned long rx_bytes; - unsigned long tx_bytes; - unsigned long rx_errors; - unsigned long tx_errors; - unsigned long rx_dropped; - unsigned long tx_dropped; - unsigned long multicast; - unsigned long collisions; - unsigned long rx_length_errors; - unsigned long rx_over_errors; - unsigned long rx_crc_errors; - unsigned long rx_frame_errors; - unsigned long rx_fifo_errors; - unsigned long rx_missed_errors; - unsigned long tx_aborted_errors; - unsigned long tx_carrier_errors; - unsigned long tx_fifo_errors; - unsigned long tx_heartbeat_errors; - unsigned long tx_window_errors; - unsigned long rx_compressed; - unsigned long tx_compressed; + NET_DEV_STAT(rx_packets); + NET_DEV_STAT(tx_packets); + NET_DEV_STAT(rx_bytes); + NET_DEV_STAT(tx_bytes); + NET_DEV_STAT(rx_errors); + NET_DEV_STAT(tx_errors); + NET_DEV_STAT(rx_dropped); + NET_DEV_STAT(tx_dropped); + NET_DEV_STAT(multicast); + NET_DEV_STAT(collisions); + NET_DEV_STAT(rx_length_errors); + NET_DEV_STAT(rx_over_errors); + NET_DEV_STAT(rx_crc_errors); + NET_DEV_STAT(rx_frame_errors); + NET_DEV_STAT(rx_fifo_errors); + NET_DEV_STAT(rx_missed_errors); + NET_DEV_STAT(tx_aborted_errors); + NET_DEV_STAT(tx_carrier_errors); + NET_DEV_STAT(tx_fifo_errors); + NET_DEV_STAT(tx_heartbeat_errors); + NET_DEV_STAT(tx_window_errors); + NET_DEV_STAT(rx_compressed); + NET_DEV_STAT(tx_compressed); }; +#undef NET_DEV_STAT /* per-cpu stats, allocated on demand. * Try to fit them in a single cache line, for dev_get_stats() sake. @@ -1366,10 +1373,6 @@ struct netdev_net_notifier { * queue id bound to an AF_XDP socket. The flags field specifies if * only RX, only Tx, or both should be woken up using the flags * XDP_WAKEUP_RX and XDP_WAKEUP_TX. - * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev); - * Get devlink port instance associated with a given netdev. - * Called with a reference on the netdevice and devlink locks only, - * rtnl_lock is not held. * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, * int cmd); * Add, change, delete or get information on an IPv4 tunnel. @@ -1600,7 +1603,6 @@ struct net_device_ops { struct xdp_buff *xdp); int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); - struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); @@ -1655,7 +1657,6 @@ struct net_device_ops { * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device - * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN @@ -1691,7 +1692,7 @@ enum netdev_priv_flags { IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, - IFF_LIVE_RENAME_OK = 1<<30, + /* was IFF_LIVE_RENAME_OK */ IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), }; @@ -1726,7 +1727,6 @@ enum netdev_priv_flags { #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER -#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK #define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR /* Specifies the type of the struct net_device::ml_priv pointer */ @@ -1999,6 +1999,11 @@ enum netdev_ml_priv_type { * registered * @offload_xstats_l3: L3 HW stats for this netdevice. * + * @devlink_port: Pointer to related devlink port structure. + * Assigned by a driver before netdev registration using + * SET_NETDEV_DEVLINK_PORT macro. This pointer is static + * during the time netdevice is registered. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2349,9 +2354,22 @@ struct net_device { netdevice_tracker watchdog_dev_tracker; netdevice_tracker dev_registered_tracker; struct rtnl_hw_stats64 *offload_xstats_l3; + + struct devlink_port *devlink_port; }; #define to_net_dev(d) container_of(d, struct net_device, dev) +/* + * Driver should use this to assign devlink port instance to a netdevice + * before it registers the netdevice. Therefore devlink_port is static + * during the netdev lifetime after it is registered. + */ +#define SET_NETDEV_DEVLINK_PORT(dev, port) \ +({ \ + WARN_ON((dev)->reg_state != NETREG_UNINITIALIZED); \ + ((dev)->devlink_port = (port)); \ +}) + static inline bool netif_elide_gro(const struct net_device *dev) { if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog) @@ -2785,6 +2803,7 @@ enum netdev_cmd { NETDEV_PRE_TYPE_CHANGE, NETDEV_POST_TYPE_CHANGE, NETDEV_POST_INIT, + NETDEV_PRE_UNINIT, NETDEV_RELEASE, NETDEV_NOTIFY_PEERS, NETDEV_JOIN, @@ -2814,6 +2833,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb); int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int unregister_netdevice_notifier_net(struct net *net, struct notifier_block *nb); +void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, + struct notifier_block *nb); int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn); @@ -3855,8 +3876,6 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); -void __dev_notify_flags(struct net_device *, unsigned int old_flags, - unsigned int gchanges); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int __dev_change_net_namespace(struct net_device *dev, struct net *net, @@ -5101,11 +5120,6 @@ static inline const char *netdev_name(const struct net_device *dev) return dev->name; } -static inline bool netdev_unregistering(const struct net_device *dev) -{ - return dev->reg_state == NETREG_UNREGISTERING; -} - static inline const char *netdev_reg_state(const struct net_device *dev) { switch (dev->reg_state) { @@ -5164,4 +5178,9 @@ extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; extern struct net_device *blackhole_netdev; +/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */ +#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) +#define DEV_STATS_ADD(DEV, FIELD, VAL) \ + atomic_long_add((VAL), &(DEV)->stats.__##FIELD) + #endif /* _LINUX_NETDEVICE_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d51e041d2242..d81bde5a5844 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -64,6 +64,7 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) /* this can be increased when necessary - don't expose to userland */ #define NETLINK_MAX_COOKIE_LEN 20 +#define NETLINK_MAX_FMTMSG_LEN 80 /** * struct netlink_ext_ack - netlink extended ACK report struct @@ -75,6 +76,8 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) * @miss_nest: nest missing an attribute (%NULL if missing top level attr) * @cookie: cookie data to return to userspace (for success) * @cookie_len: actual cookie data length + * @_msg_buf: output buffer for formatted message strings - don't access + * directly, use %NL_SET_ERR_MSG_FMT */ struct netlink_ext_ack { const char *_msg; @@ -84,13 +87,13 @@ struct netlink_ext_ack { u16 miss_type; u8 cookie[NETLINK_MAX_COOKIE_LEN]; u8 cookie_len; + char _msg_buf[NETLINK_MAX_FMTMSG_LEN]; }; /* Always use this macro, this allows later putting the * message into a separate section or such for things * like translation or listing all possible messages. - * Currently string formatting is not supported (due - * to the lack of an output buffer.) + * If string formatting is needed use NL_SET_ERR_MSG_FMT. */ #define NL_SET_ERR_MSG(extack, msg) do { \ static const char __msg[] = msg; \ @@ -102,9 +105,31 @@ struct netlink_ext_ack { __extack->_msg = __msg; \ } while (0) +/* We splice fmt with %s at each end even in the snprintf so that both calls + * can use the same string constant, avoiding its duplication in .ro + */ +#define NL_SET_ERR_MSG_FMT(extack, fmt, args...) do { \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (!__extack) \ + break; \ + if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \ + "%s" fmt "%s", "", ##args, "") >= \ + NETLINK_MAX_FMTMSG_LEN) \ + net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \ + ##args, "\n"); \ + \ + do_trace_netlink_extack(__extack->_msg_buf); \ + \ + __extack->_msg = __extack->_msg_buf; \ +} while (0) + #define NL_SET_ERR_MSG_MOD(extack, msg) \ NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) +#define NL_SET_ERR_MSG_FMT_MOD(extack, fmt, args...) \ + NL_SET_ERR_MSG_FMT((extack), KBUILD_MODNAME ": " fmt, ##args) + #define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \ if ((extack)) { \ (extack)->bad_attr = (attr); \ diff --git a/include/linux/phy.h b/include/linux/phy.h index ddf66198f751..9a3752c0c444 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -600,6 +600,7 @@ struct macsec_ops; * @psec: Pointer to Power Sourcing Equipment control struct * @lock: Mutex for serialization access to PHY * @state_queue: Work queue for state machine + * @link_down_events: Number of times link was lost * @shared: Pointer to private data shared by phys in one package * @priv: Pointer to driver private data * @@ -723,6 +724,8 @@ struct phy_device { int pma_extable; + unsigned int link_down_events; + void (*phy_link_change)(struct phy_device *phydev, bool up); void (*adjust_link)(struct net_device *dev); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 3f01ac8017e0..c492c26202b5 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -207,6 +207,11 @@ struct phylink_mac_ops { * * If the @state->interface mode is not supported, then the @supported * mask must be cleared. + * + * This member is optional; if not set, the generic validator will be + * used making use of @config->mac_capabilities and + * @config->supported_interfaces to determine which link modes are + * supported. */ void validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); @@ -558,6 +563,9 @@ void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps); unsigned long phylink_get_capabilities(phy_interface_t interface, unsigned long mac_capabilities, int rate_matching); +void phylink_validate_mask_caps(unsigned long *supported, + struct phylink_link_state *state, + unsigned long caps); void phylink_generic_validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); @@ -613,6 +621,30 @@ int phylink_speed_up(struct phylink *pl); void phylink_set_port_modes(unsigned long *bits); +/** + * phylink_get_link_timer_ns - return the PCS link timer value + * @interface: link &typedef phy_interface_t mode + * + * Return the PCS link timer setting in nanoseconds for the PHY @interface + * mode, or -EINVAL if not appropriate. + */ +static inline int phylink_get_link_timer_ns(phy_interface_t interface) +{ + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_USXGMII: + return 1600000; + + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + return 10000000; + + default: + return -EINVAL; + } +} + void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, u16 bmsr, u16 lpa); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 81d6e4ec2294..0260f5ea98fe 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -208,8 +208,10 @@ static inline void proc_remove(struct proc_dir_entry *de) {} static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; } #define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;}) +#define proc_create_net_data_write(name, mode, parent, ops, write, state_size, data) ({NULL;}) #define proc_create_net(name, mode, parent, state_size, ops) ({NULL;}) #define proc_create_net_single(name, mode, parent, show, data) ({NULL;}) +#define proc_create_net_single_write(name, mode, parent, show, write, data) ({NULL;}) static inline struct pid *tgid_pidfd_to_pid(const struct file *file) { diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 92b44161408e..fdffa6a98d79 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -45,6 +45,8 @@ struct system_device_crosststamp; /** * struct ptp_system_timestamp - system time corresponding to a PHC timestamp + * @pre_ts: system timestamp before capturing PHC + * @post_ts: system timestamp after capturing PHC */ struct ptp_system_timestamp { struct timespec64 pre_ts; @@ -75,12 +77,6 @@ struct ptp_system_timestamp { * nominal frequency in parts per million, but with a * 16 bit binary fractional field. * - * @adjfreq: Adjusts the frequency of the hardware clock. - * This method is deprecated. New drivers should implement - * the @adjfine method instead. - * parameter delta: Desired frequency offset from nominal frequency - * in parts per billion - * * @adjphase: Adjusts the phase offset of the hardware clock. * parameter delta: Desired change in nanoseconds. * @@ -172,7 +168,6 @@ struct ptp_clock_info { int pps; struct ptp_pin_desc *pin_config; int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); - int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjphase)(struct ptp_clock_info *ptp, s32 phase); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); @@ -246,6 +241,52 @@ static inline long scaled_ppm_to_ppb(long ppm) return (long)ppb; } +/** + * diff_by_scaled_ppm - Calculate difference using scaled ppm + * @base: the base increment value to adjust + * @scaled_ppm: scaled parts per million to adjust by + * @diff: on return, the absolute value of calculated diff + * + * Calculate the difference to adjust the base increment using scaled parts + * per million. + * + * Use mul_u64_u64_div_u64 to perform the difference calculation in avoid + * possible overflow. + * + * Returns: true if scaled_ppm is negative, false otherwise + */ +static inline bool diff_by_scaled_ppm(u64 base, long scaled_ppm, u64 *diff) +{ + bool negative = false; + + if (scaled_ppm < 0) { + negative = true; + scaled_ppm = -scaled_ppm; + } + + *diff = mul_u64_u64_div_u64(base, (u64)scaled_ppm, 1000000ULL << 16); + + return negative; +} + +/** + * adjust_by_scaled_ppm - Adjust a base increment by scaled parts per million + * @base: the base increment value to adjust + * @scaled_ppm: scaled parts per million frequency adjustment + * + * Helper function which calculates a new increment value based on the + * requested scaled parts per million adjustment. + */ +static inline u64 adjust_by_scaled_ppm(u64 base, long scaled_ppm) +{ + u64 diff; + + if (diff_by_scaled_ppm(base, scaled_ppm, &diff)) + return base - diff; + + return base + diff; +} + #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) /** @@ -316,6 +357,11 @@ int ptp_find_pin(struct ptp_clock *ptp, * should most likely call ptp_find_pin() directly from their * ptp_clock_info::enable() method. * +* @ptp: The clock obtained from ptp_clock_register(). +* @func: One of the ptp_pin_function enumerated values. +* @chan: The particular functional channel to find. +* Return: Pin index in the range of zero to ptp_clock_caps.n_pins - 1, +* or -1 if the auxiliary function cannot be found. */ int ptp_find_pin_unlocked(struct ptp_clock *ptp, diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 08605ce7379d..8822f06e4b40 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -241,6 +241,18 @@ static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ /** + * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period? + * + * As an accident of implementation, an RCU Tasks Trace grace period also + * acts as an RCU grace period. However, this could change at any time. + * Code relying on this accident must call this function to verify that + * this accident is still happening. + * + * You have been warned! + */ +static inline bool rcu_trace_implies_rcu_gp(void) { return true; } + +/** * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU * * This macro resembles cond_resched(), except that it is defined to diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ae2c6a3cec5d..92ad75549e9c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -12,21 +12,22 @@ extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, - u32 group, struct nlmsghdr *nlh, gfp_t flags); + u32 group, const struct nlmsghdr *nlh, gfp_t flags); extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags, + u32 portid, const struct nlmsghdr *nlh); void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid, int new_ifindex); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, gfp_t flags, int *new_nsid, - int new_ifindex); + int new_ifindex, u32 portid, u32 seq); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, - gfp_t flags); + gfp_t flags, u32 portid, const struct nlmsghdr *nlh); /* RTNL is used as a global lock for all changes to network configuration */ diff --git a/include/linux/sfp.h b/include/linux/sfp.h index d1f343853b6c..52b98f9666a2 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -332,39 +332,33 @@ enum { /* SFP EEPROM registers */ enum { - SFP_PHYS_ID = 0x00, - SFP_PHYS_EXT_ID = 0x01, - SFP_CONNECTOR = 0x02, - SFP_COMPLIANCE = 0x03, - SFP_ENCODING = 0x0b, - SFP_BR_NOMINAL = 0x0c, - SFP_RATE_ID = 0x0d, - SFP_LINK_LEN_SM_KM = 0x0e, - SFP_LINK_LEN_SM_100M = 0x0f, - SFP_LINK_LEN_50UM_OM2_10M = 0x10, - SFP_LINK_LEN_62_5UM_OM1_10M = 0x11, - SFP_LINK_LEN_COPPER_1M = 0x12, - SFP_LINK_LEN_50UM_OM4_10M = 0x12, - SFP_LINK_LEN_50UM_OM3_10M = 0x13, - SFP_VENDOR_NAME = 0x14, - SFP_VENDOR_OUI = 0x25, - SFP_VENDOR_PN = 0x28, - SFP_VENDOR_REV = 0x38, - SFP_OPTICAL_WAVELENGTH_MSB = 0x3c, - SFP_OPTICAL_WAVELENGTH_LSB = 0x3d, - SFP_CABLE_SPEC = 0x3c, - SFP_CC_BASE = 0x3f, - SFP_OPTIONS = 0x40, /* 2 bytes, MSB, LSB */ - SFP_BR_MAX = 0x42, - SFP_BR_MIN = 0x43, - SFP_VENDOR_SN = 0x44, - SFP_DATECODE = 0x54, - SFP_DIAGMON = 0x5c, - SFP_ENHOPTS = 0x5d, - SFP_SFF8472_COMPLIANCE = 0x5e, - SFP_CC_EXT = 0x5f, + SFP_PHYS_ID = 0, + SFP_PHYS_EXT_ID = 1, SFP_PHYS_EXT_ID_SFP = 0x04, + + SFP_CONNECTOR = 2, + SFP_COMPLIANCE = 3, + SFP_ENCODING = 11, + SFP_BR_NOMINAL = 12, + SFP_RATE_ID = 13, + SFP_LINK_LEN_SM_KM = 14, + SFP_LINK_LEN_SM_100M = 15, + SFP_LINK_LEN_50UM_OM2_10M = 16, + SFP_LINK_LEN_62_5UM_OM1_10M = 17, + SFP_LINK_LEN_COPPER_1M = 18, + SFP_LINK_LEN_50UM_OM4_10M = 18, + SFP_LINK_LEN_50UM_OM3_10M = 19, + SFP_VENDOR_NAME = 20, + SFP_VENDOR_OUI = 37, + SFP_VENDOR_PN = 40, + SFP_VENDOR_REV = 56, + SFP_OPTICAL_WAVELENGTH_MSB = 60, + SFP_OPTICAL_WAVELENGTH_LSB = 61, + SFP_CABLE_SPEC = 60, + SFP_CC_BASE = 63, + + SFP_OPTIONS = 64, /* 2 bytes, MSB, LSB */ SFP_OPTIONS_HIGH_POWER_LEVEL = BIT(13), SFP_OPTIONS_PAGING_A2 = BIT(12), SFP_OPTIONS_RETIMER = BIT(11), @@ -378,11 +372,20 @@ enum { SFP_OPTIONS_TX_FAULT = BIT(3), SFP_OPTIONS_LOS_INVERTED = BIT(2), SFP_OPTIONS_LOS_NORMAL = BIT(1), + + SFP_BR_MAX = 66, + SFP_BR_MIN = 67, + SFP_VENDOR_SN = 68, + SFP_DATECODE = 84, + + SFP_DIAGMON = 92, SFP_DIAGMON_DDM = BIT(6), SFP_DIAGMON_INT_CAL = BIT(5), SFP_DIAGMON_EXT_CAL = BIT(4), SFP_DIAGMON_RXPWR_AVG = BIT(3), SFP_DIAGMON_ADDRMODE = BIT(2), + + SFP_ENHOPTS = 93, SFP_ENHOPTS_ALARMWARN = BIT(7), SFP_ENHOPTS_SOFT_TX_DISABLE = BIT(6), SFP_ENHOPTS_SOFT_TX_FAULT = BIT(5), @@ -390,6 +393,8 @@ enum { SFP_ENHOPTS_SOFT_RATE_SELECT = BIT(3), SFP_ENHOPTS_APP_SELECT_SFF8079 = BIT(2), SFP_ENHOPTS_SOFT_RATE_SFF8431 = BIT(1), + + SFP_SFF8472_COMPLIANCE = 94, SFP_SFF8472_COMPLIANCE_NONE = 0x00, SFP_SFF8472_COMPLIANCE_REV9_3 = 0x01, SFP_SFF8472_COMPLIANCE_REV9_5 = 0x02, @@ -399,68 +404,70 @@ enum { SFP_SFF8472_COMPLIANCE_REV11_3 = 0x06, SFP_SFF8472_COMPLIANCE_REV11_4 = 0x07, SFP_SFF8472_COMPLIANCE_REV12_0 = 0x08, + + SFP_CC_EXT = 95, }; /* SFP Diagnostics */ enum { /* Alarm and warnings stored MSB at lower address then LSB */ - SFP_TEMP_HIGH_ALARM = 0x00, - SFP_TEMP_LOW_ALARM = 0x02, - SFP_TEMP_HIGH_WARN = 0x04, - SFP_TEMP_LOW_WARN = 0x06, - SFP_VOLT_HIGH_ALARM = 0x08, - SFP_VOLT_LOW_ALARM = 0x0a, - SFP_VOLT_HIGH_WARN = 0x0c, - SFP_VOLT_LOW_WARN = 0x0e, - SFP_BIAS_HIGH_ALARM = 0x10, - SFP_BIAS_LOW_ALARM = 0x12, - SFP_BIAS_HIGH_WARN = 0x14, - SFP_BIAS_LOW_WARN = 0x16, - SFP_TXPWR_HIGH_ALARM = 0x18, - SFP_TXPWR_LOW_ALARM = 0x1a, - SFP_TXPWR_HIGH_WARN = 0x1c, - SFP_TXPWR_LOW_WARN = 0x1e, - SFP_RXPWR_HIGH_ALARM = 0x20, - SFP_RXPWR_LOW_ALARM = 0x22, - SFP_RXPWR_HIGH_WARN = 0x24, - SFP_RXPWR_LOW_WARN = 0x26, - SFP_LASER_TEMP_HIGH_ALARM = 0x28, - SFP_LASER_TEMP_LOW_ALARM = 0x2a, - SFP_LASER_TEMP_HIGH_WARN = 0x2c, - SFP_LASER_TEMP_LOW_WARN = 0x2e, - SFP_TEC_CUR_HIGH_ALARM = 0x30, - SFP_TEC_CUR_LOW_ALARM = 0x32, - SFP_TEC_CUR_HIGH_WARN = 0x34, - SFP_TEC_CUR_LOW_WARN = 0x36, - SFP_CAL_RXPWR4 = 0x38, - SFP_CAL_RXPWR3 = 0x3c, - SFP_CAL_RXPWR2 = 0x40, - SFP_CAL_RXPWR1 = 0x44, - SFP_CAL_RXPWR0 = 0x48, - SFP_CAL_TXI_SLOPE = 0x4c, - SFP_CAL_TXI_OFFSET = 0x4e, - SFP_CAL_TXPWR_SLOPE = 0x50, - SFP_CAL_TXPWR_OFFSET = 0x52, - SFP_CAL_T_SLOPE = 0x54, - SFP_CAL_T_OFFSET = 0x56, - SFP_CAL_V_SLOPE = 0x58, - SFP_CAL_V_OFFSET = 0x5a, - SFP_CHKSUM = 0x5f, - - SFP_TEMP = 0x60, - SFP_VCC = 0x62, - SFP_TX_BIAS = 0x64, - SFP_TX_POWER = 0x66, - SFP_RX_POWER = 0x68, - SFP_LASER_TEMP = 0x6a, - SFP_TEC_CUR = 0x6c, - - SFP_STATUS = 0x6e, + SFP_TEMP_HIGH_ALARM = 0, + SFP_TEMP_LOW_ALARM = 2, + SFP_TEMP_HIGH_WARN = 4, + SFP_TEMP_LOW_WARN = 6, + SFP_VOLT_HIGH_ALARM = 8, + SFP_VOLT_LOW_ALARM = 10, + SFP_VOLT_HIGH_WARN = 12, + SFP_VOLT_LOW_WARN = 14, + SFP_BIAS_HIGH_ALARM = 16, + SFP_BIAS_LOW_ALARM = 18, + SFP_BIAS_HIGH_WARN = 20, + SFP_BIAS_LOW_WARN = 22, + SFP_TXPWR_HIGH_ALARM = 24, + SFP_TXPWR_LOW_ALARM = 26, + SFP_TXPWR_HIGH_WARN = 28, + SFP_TXPWR_LOW_WARN = 30, + SFP_RXPWR_HIGH_ALARM = 32, + SFP_RXPWR_LOW_ALARM = 34, + SFP_RXPWR_HIGH_WARN = 36, + SFP_RXPWR_LOW_WARN = 38, + SFP_LASER_TEMP_HIGH_ALARM = 40, + SFP_LASER_TEMP_LOW_ALARM = 42, + SFP_LASER_TEMP_HIGH_WARN = 44, + SFP_LASER_TEMP_LOW_WARN = 46, + SFP_TEC_CUR_HIGH_ALARM = 48, + SFP_TEC_CUR_LOW_ALARM = 50, + SFP_TEC_CUR_HIGH_WARN = 52, + SFP_TEC_CUR_LOW_WARN = 54, + SFP_CAL_RXPWR4 = 56, + SFP_CAL_RXPWR3 = 60, + SFP_CAL_RXPWR2 = 64, + SFP_CAL_RXPWR1 = 68, + SFP_CAL_RXPWR0 = 72, + SFP_CAL_TXI_SLOPE = 76, + SFP_CAL_TXI_OFFSET = 78, + SFP_CAL_TXPWR_SLOPE = 80, + SFP_CAL_TXPWR_OFFSET = 82, + SFP_CAL_T_SLOPE = 84, + SFP_CAL_T_OFFSET = 86, + SFP_CAL_V_SLOPE = 88, + SFP_CAL_V_OFFSET = 90, + SFP_CHKSUM = 95, + + SFP_TEMP = 96, + SFP_VCC = 98, + SFP_TX_BIAS = 100, + SFP_TX_POWER = 102, + SFP_RX_POWER = 104, + SFP_LASER_TEMP = 106, + SFP_TEC_CUR = 108, + + SFP_STATUS = 110, SFP_STATUS_TX_DISABLE = BIT(7), SFP_STATUS_TX_DISABLE_FORCE = BIT(6), SFP_STATUS_TX_FAULT = BIT(2), SFP_STATUS_RX_LOS = BIT(1), - SFP_ALARM0 = 0x70, + SFP_ALARM0 = 112, SFP_ALARM0_TEMP_HIGH = BIT(7), SFP_ALARM0_TEMP_LOW = BIT(6), SFP_ALARM0_VCC_HIGH = BIT(5), @@ -470,11 +477,11 @@ enum { SFP_ALARM0_TXPWR_HIGH = BIT(1), SFP_ALARM0_TXPWR_LOW = BIT(0), - SFP_ALARM1 = 0x71, + SFP_ALARM1 = 113, SFP_ALARM1_RXPWR_HIGH = BIT(7), SFP_ALARM1_RXPWR_LOW = BIT(6), - SFP_WARN0 = 0x74, + SFP_WARN0 = 116, SFP_WARN0_TEMP_HIGH = BIT(7), SFP_WARN0_TEMP_LOW = BIT(6), SFP_WARN0_VCC_HIGH = BIT(5), @@ -484,13 +491,15 @@ enum { SFP_WARN0_TXPWR_HIGH = BIT(1), SFP_WARN0_TXPWR_LOW = BIT(0), - SFP_WARN1 = 0x75, + SFP_WARN1 = 117, SFP_WARN1_RXPWR_HIGH = BIT(7), SFP_WARN1_RXPWR_LOW = BIT(6), - SFP_EXT_STATUS = 0x76, - SFP_VSL = 0x78, - SFP_PAGE = 0x7f, + SFP_EXT_STATUS = 118, + SFP_EXT_STATUS_PWRLVL_SELECT = BIT(0), + + SFP_VSL = 120, + SFP_PAGE = 127, }; struct fwnode_handle; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7be5bb4c94b6..4e464a27adaf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -818,7 +818,7 @@ typedef unsigned char *sk_buff_data_t; * @mark: Generic packet mark * @reserved_tailroom: (aka @mark) number of bytes of free space available * at the tail of an sk_buff - * @vlan_present: VLAN tag is present + * @vlan_all: vlan fields (proto & tci) * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) @@ -951,7 +951,7 @@ struct sk_buff { /* private: */ __u8 __pkt_vlan_present_offset[0]; /* public: */ - __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ + __u8 remcsum_offload:1; __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 dst_pending_confirm:1; @@ -966,7 +966,6 @@ struct sk_buff { __u8 ipvs_property:1; __u8 inner_protocol_type:1; - __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; @@ -999,8 +998,13 @@ struct sk_buff { __u32 priority; int skb_iif; __u32 hash; - __be16 vlan_proto; - __u16 vlan_tci; + union { + u32 vlan_all; + struct { + __be16 vlan_proto; + __u16 vlan_tci; + }; + }; #if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS) union { unsigned int napi_id; @@ -1059,15 +1063,13 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move pkt_vlan_present, tc_at_ingress, or mono_delivery_time +/* if you move tc_at_ingress or mono_delivery_time * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define PKT_VLAN_PRESENT_BIT 7 #define TC_AT_INGRESS_MASK (1 << 0) #define SKB_MONO_DELIVERY_TIME_MASK (1 << 2) #else -#define PKT_VLAN_PRESENT_BIT 0 #define TC_AT_INGRESS_MASK (1 << 7) #define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #endif @@ -5050,12 +5052,5 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb) } #endif -static inline bool skb_pp_recycle(struct sk_buff *skb, void *data) -{ - if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) - return false; - return page_pool_return_skb_page(virt_to_page(data)); -} - #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/linux/smc911x.h b/include/linux/smc911x.h deleted file mode 100644 index 8cace8189e74..000000000000 --- a/include/linux/smc911x.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __SMC911X_H__ -#define __SMC911X_H__ - -#define SMC911X_USE_16BIT (1 << 0) -#define SMC911X_USE_32BIT (1 << 1) - -struct smc911x_platdata { - unsigned long flags; - unsigned long irq_flags; /* IRQF_... */ - int irq_polarity; -}; - -#endif /* __SMC911X_H__ */ diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 4450c8b7a1cb..8294978f4bca 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -5,27 +5,76 @@ #include <linux/rcupdate.h> #include <linux/regmap.h> #include <linux/pci.h> +#include <linux/skbuff.h> #define MTK_WED_TX_QUEUES 2 +#define MTK_WED_RX_QUEUES 2 + +#define WED_WO_STA_REC 0x6 struct mtk_wed_hw; struct mtk_wdma_desc; +enum mtk_wed_wo_cmd { + MTK_WED_WO_CMD_WED_CFG, + MTK_WED_WO_CMD_WED_RX_STAT, + MTK_WED_WO_CMD_RRO_SER, + MTK_WED_WO_CMD_DBG_INFO, + MTK_WED_WO_CMD_DEV_INFO, + MTK_WED_WO_CMD_BSS_INFO, + MTK_WED_WO_CMD_STA_REC, + MTK_WED_WO_CMD_DEV_INFO_DUMP, + MTK_WED_WO_CMD_BSS_INFO_DUMP, + MTK_WED_WO_CMD_STA_REC_DUMP, + MTK_WED_WO_CMD_BA_INFO_DUMP, + MTK_WED_WO_CMD_FBCMD_Q_DUMP, + MTK_WED_WO_CMD_FW_LOG_CTRL, + MTK_WED_WO_CMD_LOG_FLUSH, + MTK_WED_WO_CMD_CHANGE_STATE, + MTK_WED_WO_CMD_CPU_STATS_ENABLE, + MTK_WED_WO_CMD_CPU_STATS_DUMP, + MTK_WED_WO_CMD_EXCEPTION_INIT, + MTK_WED_WO_CMD_PROF_CTRL, + MTK_WED_WO_CMD_STA_BA_DUMP, + MTK_WED_WO_CMD_BA_CTRL_DUMP, + MTK_WED_WO_CMD_RXCNT_CTRL, + MTK_WED_WO_CMD_RXCNT_INFO, + MTK_WED_WO_CMD_SET_CAP, + MTK_WED_WO_CMD_CCIF_RING_DUMP, + MTK_WED_WO_CMD_WED_END +}; + +struct mtk_rxbm_desc { + __le32 buf0; + __le32 token; +} __packed __aligned(4); + enum mtk_wed_bus_tye { MTK_WED_BUS_PCIE, MTK_WED_BUS_AXI, }; +#define MTK_WED_RING_CONFIGURED BIT(0) struct mtk_wed_ring { struct mtk_wdma_desc *desc; dma_addr_t desc_phys; u32 desc_size; int size; + u32 flags; u32 reg_base; void __iomem *wpdma; }; +struct mtk_wed_wo_rx_stats { + __le16 wlan_idx; + __le16 tid; + __le32 rx_pkt_cnt; + __le32 rx_byte_cnt; + __le32 rx_err_cnt; + __le32 rx_drop_cnt; +}; + struct mtk_wed_device { #ifdef CONFIG_NET_MEDIATEK_SOC_WED const struct mtk_wed_ops *ops; @@ -34,17 +83,33 @@ struct mtk_wed_device { bool init_done, running; int wdma_idx; int irq; + u8 version; struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES]; + struct mtk_wed_ring rx_ring[MTK_WED_RX_QUEUES]; struct mtk_wed_ring txfree_ring; struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; + struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES]; struct { int size; void **pages; struct mtk_wdma_desc *desc; dma_addr_t desc_phys; - } buf_ring; + } tx_buf_ring; + + struct { + int size; + struct page_frag_cache rx_page; + struct mtk_rxbm_desc *desc; + dma_addr_t desc_phys; + } rx_buf_ring; + + struct { + struct mtk_wed_ring ring; + dma_addr_t miod_phys; + dma_addr_t fdbk_phys; + } rro; /* filled by driver: */ struct { @@ -53,22 +118,36 @@ struct mtk_wed_device { struct pci_dev *pci_dev; }; enum mtk_wed_bus_tye bus_type; + void __iomem *base; + u32 phy_base; u32 wpdma_phys; u32 wpdma_int; u32 wpdma_mask; u32 wpdma_tx; u32 wpdma_txfree; + u32 wpdma_rx_glo; + u32 wpdma_rx; + + bool wcid_512; u16 token_start; unsigned int nbuf; + unsigned int rx_nbuf; + unsigned int rx_npkt; + unsigned int rx_size; u8 tx_tbit[MTK_WED_TX_QUEUES]; + u8 rx_tbit[MTK_WED_RX_QUEUES]; u8 txfree_tbit; u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); void (*offload_disable)(struct mtk_wed_device *wed); + u32 (*init_rx_buf)(struct mtk_wed_device *wed, int size); + void (*release_rx_buf)(struct mtk_wed_device *wed); + void (*update_wo_rx_stats)(struct mtk_wed_device *wed, + struct mtk_wed_wo_rx_stats *stats); } wlan; #endif }; @@ -77,9 +156,15 @@ struct mtk_wed_ops { int (*attach)(struct mtk_wed_device *dev); int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, void __iomem *regs); + int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); int (*txfree_ring_setup)(struct mtk_wed_device *dev, void __iomem *regs); + int (*msg_update)(struct mtk_wed_device *dev, int cmd_id, + void *data, int len); void (*detach)(struct mtk_wed_device *dev); + void (*ppe_check)(struct mtk_wed_device *dev, struct sk_buff *skb, + u32 reason, u32 hash); void (*stop)(struct mtk_wed_device *dev); void (*start)(struct mtk_wed_device *dev, u32 irq_mask); @@ -114,6 +199,16 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) return ret; } +static inline bool +mtk_wed_get_rx_capa(struct mtk_wed_device *dev) +{ +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + return dev->version != 1; +#else + return false; +#endif +} + #ifdef CONFIG_NET_MEDIATEK_SOC_WED #define mtk_wed_device_active(_dev) !!(_dev)->ops #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) @@ -130,6 +225,12 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) (_dev)->ops->irq_get(_dev, _mask) #define mtk_wed_device_irq_set_mask(_dev, _mask) \ (_dev)->ops->irq_set_mask(_dev, _mask) +#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) \ + (_dev)->ops->ppe_check(_dev, _skb, _reason, _hash) +#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \ + (_dev)->ops->msg_update(_dev, _id, _msg, _len) #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -143,6 +244,9 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0) #define mtk_wed_device_irq_get(_dev, _mask) 0 #define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0) +#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) do {} while (0) +#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV #endif #endif diff --git a/include/linux/socket.h b/include/linux/socket.h index de3701a2a212..13c3a237b9c9 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -33,7 +33,10 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - char sa_data[14]; /* 14 bytes of protocol address */ + union { + char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ + DECLARE_FLEX_ARRAY(char, sa_data); + }; }; struct linger { diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 41b1da621a45..ca7f05a130d2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -423,6 +423,7 @@ struct tcp_sock { u32 probe_seq_start; u32 probe_seq_end; } mtu_probe; + u32 plb_rehash; /* PLB-triggered rehash attempts */ u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG * while socket was owned by user. */ diff --git a/include/linux/udp.h b/include/linux/udp.h index e96da4157d04..a2892e151644 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -23,7 +23,9 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) return (struct udphdr *)skb_transport_header(skb); } +#define UDP_HTABLE_SIZE_MIN_PERNET 128 #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) +#define UDP_HTABLE_SIZE_MAX 65536 static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) { @@ -70,7 +72,8 @@ struct udp_sock { * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); - void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); + void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload); int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); @@ -87,6 +90,9 @@ struct udp_sock { /* This field is dirtied by udp_recvmsg() */ int forward_deficit; + + /* This fields follows rcvbuf value, and is touched by udp_recvmsg */ + int forward_threshold; }; #define UDP_MAX_SEGMENTS (1 << 6UL) diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 5ce2acf444fb..24d76500b1cc 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -15,6 +15,7 @@ * @WWAN_PORT_QMI: Qcom modem/MSM interface for modem control * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface * @WWAN_PORT_FIREHOSE: XML based command protocol + * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems * * @WWAN_PORT_MAX: Highest supported port types * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type @@ -26,6 +27,7 @@ enum wwan_port_type { WWAN_PORT_QMI, WWAN_PORT_QCDM, WWAN_PORT_FIREHOSE, + WWAN_PORT_XMMRPC, /* Add new port types above this line */ diff --git a/include/net/act_api.h b/include/net/act_api.h index 61f2ceb3939e..c94ea1a306e0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -67,6 +67,7 @@ struct tc_action { #define TCA_ACT_FLAGS_BIND (1U << (TCA_ACT_FLAGS_USER_BITS + 1)) #define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) #define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) +#define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4)) /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index 191c36afa1f4..9dc082b2d543 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -156,8 +156,8 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave); void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave); void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link); void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); -int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); -int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); struct slave *bond_xmit_alb_slave_get(struct bonding *bond, struct sk_buff *skb); struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, diff --git a/include/net/bonding.h b/include/net/bonding.h index e999f851738b..ea36ab7f9e72 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -92,8 +92,6 @@ #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) -#define BOND_TLS_FEATURES (NETIF_F_HW_TLS_TX | NETIF_F_HW_TLS_RX) - #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; @@ -280,8 +278,6 @@ struct bond_vlan_tag { unsigned short vlan_id; }; -bool bond_sk_check(struct bonding *bond); - /** * Returns NULL if the net_device does not belong to any of the bond's slaves * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e09ff87146c1..11a370e64143 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2105,6 +2105,7 @@ struct mpath_info { * * Used to change BSS parameters (mainly for AP mode). * + * @link_id: link_id or -1 for non-MLD * @use_cts_prot: Whether to use CTS protection * (0 = no, 1 = yes, -1 = do not change) * @use_short_preamble: Whether the use of short preambles is allowed @@ -2122,6 +2123,7 @@ struct mpath_info { * @p2p_opp_ps: P2P opportunistic PS (-1 = no change) */ struct bss_parameters { + int link_id; int use_cts_prot; int use_short_preamble; int use_short_slot_time; @@ -6933,6 +6935,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * @ap_mld_addr: AP MLD address (in case of MLO) * @links: per-link information indexed by link ID, use links[0] for * non-MLO connections + * @links.status: Set this (along with a BSS pointer) for links that + * were rejected by the AP. */ struct cfg80211_rx_assoc_resp { const u8 *buf; @@ -6944,6 +6948,7 @@ struct cfg80211_rx_assoc_resp { struct { const u8 *addr; struct cfg80211_bss *bss; + u16 status; } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; @@ -7454,6 +7459,9 @@ struct cfg80211_fils_resp_params { * if the bss is expired during the connection, esp. for those drivers * implementing connect op. Only one parameter among @bssid and @bss needs * to be specified. + * @links.status: per-link status code, to report a status code that's not + * %WLAN_STATUS_SUCCESS for a given link, it must also be in the + * @valid_links bitmap and may have a BSS pointer (which is then released) */ struct cfg80211_connect_resp_params { int status; @@ -7470,6 +7478,7 @@ struct cfg80211_connect_resp_params { const u8 *addr; const u8 *bssid; struct cfg80211_bss *bss; + u16 status; } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; @@ -7674,6 +7683,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * * @dev: network device * @bssid: the BSSID of the AP + * @td_bitmap: transition disable policy + * @td_bitmap_len: Length of transition disable policy * @gfp: allocation flags * * This function should be called by a driver that supports 4 way handshake @@ -7684,7 +7695,7 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * indicate the 802.11 association. */ void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, - gfp_t gfp); + const u8* td_bitmap, u8 td_bitmap_len, gfp_t gfp); /** * cfg80211_disconnected - notify cfg80211 that connection was dropped diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index d8d8719315fd..e1481f9cf049 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -11,7 +11,7 @@ #include <linux/ieee802154.h> #include <linux/netdevice.h> -#include <linux/mutex.h> +#include <linux/spinlock.h> #include <linux/bug.h> #include <net/nl802154.h> @@ -166,11 +166,14 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) * level setting. * @WPAN_PHY_FLAG_CCA_MODE: Indicates that transceiver will support cca mode * setting. + * @WPAN_PHY_FLAG_STATE_QUEUE_STOPPED: Indicates that the transmit queue was + * temporarily stopped. */ enum wpan_phy_flags { WPAN_PHY_FLAG_TXPOWER = BIT(1), WPAN_PHY_FLAG_CCA_ED_LEVEL = BIT(2), WPAN_PHY_FLAG_CCA_MODE = BIT(3), + WPAN_PHY_FLAG_STATE_QUEUE_STOPPED = BIT(4), }; struct wpan_phy { @@ -182,7 +185,7 @@ struct wpan_phy { */ const void *privid; - u32 flags; + unsigned long flags; /* * This is a PIB according to 802.15.4-2011. @@ -214,6 +217,17 @@ struct wpan_phy { /* the network namespace this phy lives in currently */ possible_net_t _net; + /* Transmission monitoring and control */ + spinlock_t queue_lock; + atomic_t ongoing_txs; + atomic_t hold_txs; + wait_queue_head_t sync_txq; + + /* Current filtering level on reception. + * Only allowed to be changed if phy is not operational. + */ + enum ieee802154_filtering_level filtering; + char priv[] __aligned(NETDEV_ALIGN); }; @@ -365,8 +379,6 @@ struct wpan_dev { bool lbt; - bool promiscuous_mode; - /* fallback for acknowledgment bit setting */ bool ackreq; }; diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 2b2d86fb3131..8841ab6c2de7 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -109,6 +109,10 @@ struct dcbnl_rtnl_ops { /* buffer settings */ int (*dcbnl_getbuffer)(struct net_device *, struct dcbnl_buffer *); int (*dcbnl_setbuffer)(struct net_device *, struct dcbnl_buffer *); + + /* apptrust */ + int (*dcbnl_setapptrust)(struct net_device *, u8 *, int); + int (*dcbnl_getapptrust)(struct net_device *, u8 *, int *); }; #endif /* __NET_DCBNL_H__ */ diff --git a/include/net/devlink.h b/include/net/devlink.h index ba6b8b094943..611a23a3deb2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -121,12 +121,21 @@ struct devlink_port { struct list_head region_list; struct devlink *devlink; unsigned int index; - spinlock_t type_lock; /* Protects type and type_dev - * pointer consistency. + spinlock_t type_lock; /* Protects type and type_eth/ib + * structures consistency. */ enum devlink_port_type type; enum devlink_port_type desired_type; - void *type_dev; + union { + struct { + struct net_device *netdev; + int ifindex; + char ifname[IFNAMSIZ]; + } type_eth; + struct { + struct ib_device *ibdev; + } type_ib; + }; struct devlink_port_attrs attrs; u8 attrs_set:1, switch_port:1, @@ -885,6 +894,8 @@ enum devlink_trap_generic_id { DEVLINK_TRAP_GENERIC_ID_ESP_PARSING, DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP, DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER, + DEVLINK_TRAP_GENERIC_ID_EAPOL, + DEVLINK_TRAP_GENERIC_ID_LOCKED_PORT, /* Add new generic trap IDs above */ __DEVLINK_TRAP_GENERIC_ID_MAX, @@ -921,6 +932,7 @@ enum devlink_trap_group_generic_id { DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_SAMPLE, DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_TRAP, DEVLINK_TRAP_GROUP_GENERIC_ID_PARSER_ERROR_DROPS, + DEVLINK_TRAP_GROUP_GENERIC_ID_EAPOL, /* Add new generic trap group IDs above */ __DEVLINK_TRAP_GROUP_GENERIC_ID_MAX, @@ -1112,6 +1124,10 @@ enum devlink_trap_group_generic_id { "blackhole_nexthop" #define DEVLINK_TRAP_GENERIC_NAME_DMAC_FILTER \ "dmac_filter" +#define DEVLINK_TRAP_GENERIC_NAME_EAPOL \ + "eapol" +#define DEVLINK_TRAP_GENERIC_NAME_LOCKED_PORT \ + "locked_port" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ "l2_drops" @@ -1165,6 +1181,8 @@ enum devlink_trap_group_generic_id { "acl_trap" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_PARSER_ERROR_DROPS \ "parser_error_drops" +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_EAPOL \ + "eapol" #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id, \ _metadata_cap) \ @@ -1575,8 +1593,7 @@ int devlink_port_register(struct devlink *devlink, unsigned int port_index); void devl_port_unregister(struct devlink_port *devlink_port); void devlink_port_unregister(struct devlink_port *devlink_port); -void devlink_port_type_eth_set(struct devlink_port *devlink_port, - struct net_device *netdev); +void devlink_port_type_eth_set(struct devlink_port *devlink_port); void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev); void devlink_port_type_clear(struct devlink_port *devlink_port); @@ -1865,6 +1882,9 @@ int devlink_compat_phys_port_name_get(struct net_device *dev, int devlink_compat_switch_id_get(struct net_device *dev, struct netdev_phys_item_id *ppid); +int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port); +size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port); + #else static inline struct devlink *devlink_try_get(struct devlink *devlink) @@ -1901,6 +1921,17 @@ devlink_compat_switch_id_get(struct net_device *dev, return -EOPNOTSUPP; } +static inline int +devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port) +{ + return 0; +} + +static inline size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port) +{ + return 0; +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dropreason.h b/include/net/dropreason.h index c1cbcdbaf149..70539288f995 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -68,6 +68,9 @@ FN(IP_INADDRERRORS) \ FN(IP_INNOROUTES) \ FN(PKT_TOO_BIG) \ + FN(DUP_FRAG) \ + FN(FRAG_REASM_TIMEOUT) \ + FN(FRAG_TOO_FAR) \ FNe(MAX) /** @@ -80,6 +83,8 @@ enum skb_drop_reason { * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case) */ SKB_NOT_DROPPED_YET = 0, + /** @SKB_CONSUMED: packet has been consumed */ + SKB_CONSUMED, /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */ SKB_DROP_REASON_NOT_SPECIFIED, /** @SKB_DROP_REASON_NO_SOCKET: socket not found */ @@ -298,6 +303,15 @@ enum skb_drop_reason { * MTU) */ SKB_DROP_REASON_PKT_TOO_BIG, + /** @SKB_DROP_REASON_DUP_FRAG: duplicate fragment */ + SKB_DROP_REASON_DUP_FRAG, + /** @SKB_DROP_REASON_FRAG_REASM_TIMEOUT: fragment reassembly timeout */ + SKB_DROP_REASON_FRAG_REASM_TIMEOUT, + /** + * @SKB_DROP_REASON_FRAG_TOO_FAR: ipv4 fragment too far. + * (/proc/sys/net/ipv4/ipfrag_max_dist) + */ + SKB_DROP_REASON_FRAG_TOO_FAR, /** * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be * used as a real 'reason' diff --git a/include/net/dsa.h b/include/net/dsa.h index ee369670e20e..dde364688739 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -880,9 +880,6 @@ struct dsa_switch_ops { */ void (*phylink_get_caps)(struct dsa_switch *ds, int port, struct phylink_config *config); - void (*phylink_validate)(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state); struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, int port, phy_interface_t iface); diff --git a/include/net/dst.h b/include/net/dst.h index 00b479ce6b99..d67fda89cd0f 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -356,9 +356,8 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, struct net *net) { - /* TODO : stats should be SMP safe */ - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + DEV_STATS_INC(dev, rx_packets); + DEV_STATS_ADD(dev, rx_bytes, skb->len); __skb_tunnel_rx(skb, dev, net); } diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index e343f9f8363e..0400a0ac8a29 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -32,6 +32,10 @@ struct flow_match_vlan { struct flow_dissector_key_vlan *key, *mask; }; +struct flow_match_arp { + struct flow_dissector_key_arp *key, *mask; +}; + struct flow_match_ipv4_addrs { struct flow_dissector_key_ipv4_addrs *key, *mask; }; @@ -98,6 +102,8 @@ void flow_rule_match_vlan(const struct flow_rule *rule, struct flow_match_vlan *out); void flow_rule_match_cvlan(const struct flow_rule *rule, struct flow_match_vlan *out); +void flow_rule_match_arp(const struct flow_rule *rule, + struct flow_match_arp *out); void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, struct flow_match_ipv4_addrs *out); void flow_rule_match_ipv6_addrs(const struct flow_rule *rule, @@ -155,6 +161,7 @@ enum flow_action_id { FLOW_ACTION_MARK, FLOW_ACTION_PTYPE, FLOW_ACTION_PRIORITY, + FLOW_ACTION_RX_QUEUE_MAPPING, FLOW_ACTION_WAKE, FLOW_ACTION_QUEUE, FLOW_ACTION_SAMPLE, @@ -247,6 +254,7 @@ struct flow_action_entry { u32 csum_flags; /* FLOW_ACTION_CSUM */ u32 mark; /* FLOW_ACTION_MARK */ u16 ptype; /* FLOW_ACTION_PTYPE */ + u16 rx_queue; /* FLOW_ACTION_RX_QUEUE_MAPPING */ u32 priority; /* FLOW_ACTION_PRIORITY */ struct { /* FLOW_ACTION_QUEUE */ u32 ctx; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9f97f73615b6..d21210709f84 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -18,12 +18,11 @@ struct genl_multicast_group { u8 flags; }; -struct genl_ops; +struct genl_split_ops; struct genl_info; /** * struct genl_family - generic netlink family - * @id: protocol family identifier (private) * @hdrsize: length of user specific header in bytes * @name: name of family * @version: protocol version @@ -43,12 +42,13 @@ struct genl_info; * @resv_start_op: first operation for which reserved fields of the header * can be validated and policies are required (see below); * new families should leave this field at zero - * @mcgrp_offset: starting number of multicast group IDs in this family - * (private) * @ops: the operations supported by this family * @n_ops: number of operations supported by this family * @small_ops: the small-struct operations supported by this family * @n_small_ops: number of small-struct operations supported by this family + * @split_ops: the split do/dump form of operation definition + * @n_split_ops: number of entries in @split_ops, not that with split do/dump + * ops the number of entries is not the same as number of commands * * Attribute policies (the combination of @policy and @maxattr fields) * can be attached at the family level or at the operation level. @@ -58,29 +58,35 @@ struct genl_info; * if policy is not provided core will reject all TLV attributes. */ struct genl_family { - int id; /* private */ unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; unsigned int maxattr; - unsigned int mcgrp_offset; /* private */ u8 netnsok:1; u8 parallel_ops:1; u8 n_ops; u8 n_small_ops; + u8 n_split_ops; u8 n_mcgrps; u8 resv_start_op; const struct nla_policy *policy; - int (*pre_doit)(const struct genl_ops *ops, + int (*pre_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); - void (*post_doit)(const struct genl_ops *ops, + void (*post_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); const struct genl_ops * ops; const struct genl_small_ops *small_ops; + const struct genl_split_ops *split_ops; const struct genl_multicast_group *mcgrps; struct module *module; + +/* private: internal use only */ + /* protocol family identifier */ + int id; + /* starting number of multicast group IDs in this family */ + unsigned int mcgrp_offset; }; /** @@ -182,6 +188,58 @@ struct genl_ops { }; /** + * struct genl_split_ops - generic netlink operations (do/dump split version) + * @cmd: command identifier + * @internal_flags: flags used by the family + * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) + * @validate: validation flags from enum genl_validate_flags + * @policy: netlink policy (takes precedence over family policy) + * @maxattr: maximum number of attributes supported + * + * Do callbacks: + * @pre_doit: called before an operation's @doit callback, it may + * do additional, common, filtering and return an error + * @doit: standard command callback + * @post_doit: called after an operation's @doit callback, it may + * undo operations done by pre_doit, for example release locks + * + * Dump callbacks: + * @start: start callback for dumps + * @dumpit: callback for dumpers + * @done: completion callback for dumps + * + * Do callbacks can be used if %GENL_CMD_CAP_DO is set in @flags. + * Dump callbacks can be used if %GENL_CMD_CAP_DUMP is set in @flags. + * Exactly one of those flags must be set. + */ +struct genl_split_ops { + union { + struct { + int (*pre_doit)(const struct genl_split_ops *ops, + struct sk_buff *skb, + struct genl_info *info); + int (*doit)(struct sk_buff *skb, + struct genl_info *info); + void (*post_doit)(const struct genl_split_ops *ops, + struct sk_buff *skb, + struct genl_info *info); + }; + struct { + int (*start)(struct netlink_callback *cb); + int (*dumpit)(struct sk_buff *skb, + struct netlink_callback *cb); + int (*done)(struct netlink_callback *cb); + }; + }; + const struct nla_policy *policy; + unsigned int maxattr; + u8 cmd; + u8 internal_flags; + u8 flags; + u8 validate; +}; + +/** * struct genl_dumpit_info - info that is available during dumpit op call * @family: generic netlink family - for internal genl code usage * @op: generic netlink ops - for internal genl code usage @@ -189,7 +247,7 @@ struct genl_ops { */ struct genl_dumpit_info { const struct genl_family *family; - struct genl_ops op; + struct genl_split_ops op; struct nlattr **attrs; }; diff --git a/include/net/geneve.h b/include/net/geneve.h index bced0b1d9fe4..5c96827a487e 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -59,7 +59,7 @@ struct genevehdr { __be16 proto_type; u8 vni[3]; u8 rsvd2; - struct geneve_opt options[]; + u8 options[]; }; static inline bool netif_is_geneve(const struct net_device *dev) diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 03b64bf876a4..4c33a20ea57f 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -85,6 +85,14 @@ struct ieee802154_hdr_fc { #endif }; +enum ieee802154_frame_version { + IEEE802154_2003_STD, + IEEE802154_2006_STD, + IEEE802154_STD, + IEEE802154_RESERVED_STD, + IEEE802154_MULTIPURPOSE_STD = IEEE802154_2003_STD, +}; + struct ieee802154_hdr { struct ieee802154_hdr_fc fc; u8 seq; diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0b0876610553..b23ddec3cd5c 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -7,6 +7,7 @@ #include <linux/in6.h> #include <linux/rbtree_types.h> #include <linux/refcount.h> +#include <net/dropreason.h> /* Per netns frag queues directory */ struct fqdir { @@ -34,12 +35,14 @@ struct fqdir { * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable + * @INET_FRAG_DROP: if skbs must be dropped (instead of being consumed) */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), INET_FRAG_HASH_DEAD = BIT(3), + INET_FRAG_DROP = BIT(4), }; struct frag_v4_compare_key { @@ -139,7 +142,8 @@ void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); /* Free all skbs in the queue; return the sum of their truesizes. */ -unsigned int inet_frag_rbtree_purge(struct rb_root *root); +unsigned int inet_frag_rbtree_purge(struct rb_root *root, + enum skb_drop_reason reason); static inline void inet_frag_put(struct inet_frag_queue *q) { diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 5052c66e22d2..7321ffe3a108 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -76,6 +76,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) if (fq->q.flags & INET_FRAG_COMPLETE) goto out; + fq->q.flags |= INET_FRAG_DROP; inet_frag_kill(&fq->q); dev = dev_get_by_index_rcu(net, fq->iif); @@ -101,7 +102,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) spin_unlock(&fq->q.lock); icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); - kfree_skb(head); + kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT); goto out_rcu_unlock; out: diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ac2bad57933f..721c450a9ccd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -89,15 +89,13 @@ /** * DOC: mac80211 software tx queueing * - * mac80211 provides an optional intermediate queueing implementation designed - * to allow the driver to keep hardware queues short and provide some fairness - * between different stations/interfaces. - * In this model, the driver pulls data frames from the mac80211 queue instead - * of letting mac80211 push them via drv_tx(). - * Other frames (e.g. control or management) are still pushed using drv_tx(). + * mac80211 uses an intermediate queueing implementation, designed to allow the + * driver to keep hardware queues short and to provide some fairness between + * different stations/interfaces. * - * Drivers indicate that they use this model by implementing the .wake_tx_queue - * driver operation. + * Drivers must provide the .wake_tx_queue driver operation by either + * linking it to ieee80211_handle_wake_tx_queue() or implementing a custom + * handler. * * Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with * another per-sta for non-data/non-mgmt and bufferable management frames, and @@ -106,9 +104,12 @@ * The driver is expected to initialize its private per-queue data for stations * and interfaces in the .add_interface and .sta_add ops. * - * The driver can't access the queue directly. To dequeue a frame from a - * txq, it calls ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a - * queue, it calls the .wake_tx_queue driver op. + * The driver can't access the internal TX queues (iTXQs) directly. + * Whenever mac80211 adds a new frame to a queue, it calls the .wake_tx_queue + * driver op. + * Drivers implementing a custom .wake_tx_queue op can get them by calling + * ieee80211_tx_dequeue(). Drivers using ieee80211_handle_wake_tx_queue() will + * simply get the individual frames pushed via the .tx driver operation. * * Drivers can optionally delegate responsibility for scheduling queues to * mac80211, to take advantage of airtime fairness accounting. In this case, to @@ -1826,7 +1827,7 @@ struct ieee80211_vif_cfg { * for this interface. * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*). - * @txq: the multicast data TX queue (if driver uses the TXQ abstraction) + * @txq: the multicast data TX queue * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped, * protected by fq->lock. * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see @@ -1915,6 +1916,10 @@ static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif) rcu_dereference_protected((vif)->link_conf[link_id], \ lockdep_vif_mutex_held(vif)) +#define link_conf_dereference_check(vif, link_id) \ + rcu_dereference_check((vif)->link_conf[link_id], \ + lockdep_vif_mutex_held(vif)) + /** * enum ieee80211_key_flags - key flags * @@ -2176,6 +2181,7 @@ struct ieee80211_sta_aggregates { * All link specific info for a STA link for a non MLD STA(single) * or a MLD STA(multiple entries) are stored here. * + * @sta: reference to owning STA * @addr: MAC address of the Link STA. For non-MLO STA this is same as the addr * in ieee80211_sta. For MLO Link STA this addr can be same or different * from addr in ieee80211_sta (representing MLD STA addr) @@ -2196,6 +2202,8 @@ struct ieee80211_sta_aggregates { * */ struct ieee80211_link_sta { + struct ieee80211_sta *sta; + u8 addr[ETH_ALEN]; u8 link_id; enum ieee80211_smps_mode smps_mode; @@ -2252,8 +2260,8 @@ struct ieee80211_link_sta { * For non MLO STA it will point to the deflink data. For MLO STA * ieee80211_sta_recalc_aggregates() must be called to update it. * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. - * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that - * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames + * @txq: per-TID data TX queues; note that the last entry (%IEEE80211_NUM_TIDS) + * is used for non-data frames * @deflink: This holds the default link STA information, for non MLO STA all link * specific STA information is accessed through @deflink or through * link[0] which points to address of @deflink. For MLO Link STA @@ -2308,6 +2316,10 @@ static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) rcu_dereference_protected((sta)->link[link_id], \ lockdep_sta_mutex_held(sta)) +#define link_sta_dereference_check(sta, link_id) \ + rcu_dereference_check((sta)->link[link_id], \ + lockdep_sta_mutex_held(sta)) + #define for_each_sta_active_link(vif, sta, link_sta, link_id) \ for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \ if ((!(vif)->active_links || \ @@ -3787,6 +3799,13 @@ struct ieee80211_prep_tx_info { * should be within a CONFIG_MAC80211_DEBUGFS conditional. This * callback can sleep. * + * @link_sta_add_debugfs: Drivers can use this callback to add debugfs files + * when a link is added to a mac80211 station. This callback + * should be within a CPTCFG_MAC80211_DEBUGFS conditional. This + * callback can sleep. + * For non-MLO the callback will be called once for the deflink with the + * station's directory rather than a separate subdirectory. + * * @sta_notify: Notifies low level driver about power state transition of an * associated station, AP, IBSS/WDS/mesh peer etc. For a VIF operating * in AP mode, this callback will not be called when the flag @@ -4257,6 +4276,10 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir); + void (*link_sta_add_debugfs)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_link_sta *link_sta, + struct dentry *dir); #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); @@ -5691,7 +5714,7 @@ void ieee80211_key_replay(struct ieee80211_key_conf *keyconf); * @hw: pointer as obtained from ieee80211_alloc_hw(). * @queue: queue number (counted from zero). * - * Drivers should use this function instead of netif_wake_queue. + * Drivers must use this function instead of netif_wake_queue. */ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue); @@ -5700,7 +5723,7 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue); * @hw: pointer as obtained from ieee80211_alloc_hw(). * @queue: queue number (counted from zero). * - * Drivers should use this function instead of netif_stop_queue. + * Drivers must use this function instead of netif_stop_queue. */ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue); @@ -5709,7 +5732,7 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue); * @hw: pointer as obtained from ieee80211_alloc_hw(). * @queue: queue number (counted from zero). * - * Drivers should use this function instead of netif_stop_queue. + * Drivers must use this function instead of netif_queue_stopped. * * Return: %true if the queue is stopped. %false otherwise. */ @@ -5720,7 +5743,7 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue); * ieee80211_stop_queues - stop all queues * @hw: pointer as obtained from ieee80211_alloc_hw(). * - * Drivers should use this function instead of netif_stop_queue. + * Drivers must use this function instead of netif_tx_stop_all_queues. */ void ieee80211_stop_queues(struct ieee80211_hw *hw); @@ -5728,7 +5751,7 @@ void ieee80211_stop_queues(struct ieee80211_hw *hw); * ieee80211_wake_queues - wake all queues * @hw: pointer as obtained from ieee80211_alloc_hw(). * - * Drivers should use this function instead of netif_wake_queue. + * Drivers must use this function instead of netif_tx_wake_all_queues. */ void ieee80211_wake_queues(struct ieee80211_hw *hw); @@ -6950,6 +6973,18 @@ static inline struct sk_buff *ieee80211_tx_dequeue_ni(struct ieee80211_hw *hw, } /** + * ieee80211_handle_wake_tx_queue - mac80211 handler for wake_tx_queue callback + * + * @hw: pointer as obtained from wake_tx_queue() callback(). + * @txq: pointer as obtained from wake_tx_queue() callback(). + * + * Drivers can use this function for the mandatory mac80211 wake_tx_queue + * callback in struct ieee80211_ops. They should not call this function. + */ +void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, + struct ieee80211_txq *txq); + +/** * ieee80211_next_txq - get next tx queue to pull packets from * * @hw: pointer as obtained from ieee80211_alloc_hw() diff --git a/include/net/mac802154.h b/include/net/mac802154.h index bdac0ddbdcdb..4a3a9de9da73 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -111,9 +111,6 @@ struct ieee802154_hw { * promiscuous mode setting. * * @IEEE802154_HW_RX_OMIT_CKSUM: Indicates that receiver omits FCS. - * - * @IEEE802154_HW_RX_DROP_BAD_CKSUM: Indicates that receiver will not filter - * frames with bad checksum. */ enum ieee802154_hw_flags { IEEE802154_HW_TX_OMIT_CKSUM = BIT(0), @@ -123,7 +120,6 @@ enum ieee802154_hw_flags { IEEE802154_HW_AFILT = BIT(4), IEEE802154_HW_PROMISCUOUS = BIT(5), IEEE802154_HW_RX_OMIT_CKSUM = BIT(6), - IEEE802154_HW_RX_DROP_BAD_CKSUM = BIT(7), }; /* Indicates that receiver omits FCS and xmitter will add FCS on it's own. */ @@ -460,33 +456,6 @@ void ieee802154_unregister_hw(struct ieee802154_hw *hw); */ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi); -/** - * ieee802154_wake_queue - wake ieee802154 queue - * @hw: pointer as obtained from ieee802154_alloc_hw(). - * - * Tranceivers usually have either one transmit framebuffer or one framebuffer - * for both transmitting and receiving. Hence, the core currently only handles - * one frame at a time for each phy, which means we had to stop the queue to - * avoid new skb to come during the transmission. The queue then needs to be - * woken up after the operation. - * - * Drivers should use this function instead of netif_wake_queue. - */ -void ieee802154_wake_queue(struct ieee802154_hw *hw); - -/** - * ieee802154_stop_queue - stop ieee802154 queue - * @hw: pointer as obtained from ieee802154_alloc_hw(). - * - * Tranceivers usually have either one transmit framebuffer or one framebuffer - * for both transmitting and receiving. Hence, the core currently only handles - * one frame at a time for each phy, which means we need to tell upper layers to - * stop giving us new skbs while we are busy with the transmitted one. The queue - * must then be stopped before transmitting. - * - * Drivers should use this function instead of netif_stop_queue. - */ -void ieee802154_stop_queue(struct ieee802154_hw *hw); /** * ieee802154_xmit_complete - frame transmission complete diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h new file mode 100644 index 000000000000..28d0687bf7da --- /dev/null +++ b/include/net/mana/gdma.h @@ -0,0 +1,834 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _GDMA_H +#define _GDMA_H + +#include <linux/dma-mapping.h> +#include <linux/netdevice.h> + +#include "shm_channel.h" + +#define GDMA_STATUS_MORE_ENTRIES 0x00000105 + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +enum gdma_request_type { + GDMA_VERIFY_VF_DRIVER_VERSION = 1, + GDMA_QUERY_MAX_RESOURCES = 2, + GDMA_LIST_DEVICES = 3, + GDMA_REGISTER_DEVICE = 4, + GDMA_DEREGISTER_DEVICE = 5, + GDMA_GENERATE_TEST_EQE = 10, + GDMA_CREATE_QUEUE = 12, + GDMA_DISABLE_QUEUE = 13, + GDMA_ALLOCATE_RESOURCE_RANGE = 22, + GDMA_DESTROY_RESOURCE_RANGE = 24, + GDMA_CREATE_DMA_REGION = 25, + GDMA_DMA_REGION_ADD_PAGES = 26, + GDMA_DESTROY_DMA_REGION = 27, + GDMA_CREATE_PD = 29, + GDMA_DESTROY_PD = 30, + GDMA_CREATE_MR = 31, + GDMA_DESTROY_MR = 32, +}; + +#define GDMA_RESOURCE_DOORBELL_PAGE 27 + +enum gdma_queue_type { + GDMA_INVALID_QUEUE, + GDMA_SQ, + GDMA_RQ, + GDMA_CQ, + GDMA_EQ, +}; + +enum gdma_work_request_flags { + GDMA_WR_NONE = 0, + GDMA_WR_OOB_IN_SGL = BIT(0), + GDMA_WR_PAD_BY_SGE0 = BIT(1), +}; + +enum gdma_eqe_type { + GDMA_EQE_COMPLETION = 3, + GDMA_EQE_TEST_EVENT = 64, + GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, + GDMA_EQE_HWC_INIT_DATA = 130, + GDMA_EQE_HWC_INIT_DONE = 131, +}; + +enum { + GDMA_DEVICE_NONE = 0, + GDMA_DEVICE_HWC = 1, + GDMA_DEVICE_MANA = 2, +}; + +typedef u64 gdma_obj_handle_t; + +struct gdma_resource { + /* Protect the bitmap */ + spinlock_t lock; + + /* The bitmap size in bits. */ + u32 size; + + /* The bitmap tracks the resources. */ + unsigned long *map; +}; + +union gdma_doorbell_entry { + u64 as_uint64; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 31; + u64 arm : 1; + } cq; + + struct { + u64 id : 24; + u64 wqe_cnt : 8; + u64 tail_ptr : 32; + } rq; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 32; + } sq; + + struct { + u64 id : 16; + u64 reserved : 16; + u64 tail_ptr : 31; + u64 arm : 1; + } eq; +}; /* HW DATA */ + +struct gdma_msg_hdr { + u32 hdr_type; + u32 msg_type; + u16 msg_version; + u16 hwc_msg_id; + u32 msg_size; +}; /* HW DATA */ + +struct gdma_dev_id { + union { + struct { + u16 type; + u16 instance; + }; + + u32 as_uint32; + }; +}; /* HW DATA */ + +struct gdma_req_hdr { + struct gdma_msg_hdr req; + struct gdma_msg_hdr resp; /* The expected response */ + struct gdma_dev_id dev_id; + u32 activity_id; +}; /* HW DATA */ + +struct gdma_resp_hdr { + struct gdma_msg_hdr response; + struct gdma_dev_id dev_id; + u32 activity_id; + u32 status; + u32 reserved; +}; /* HW DATA */ + +struct gdma_general_req { + struct gdma_req_hdr hdr; +}; /* HW DATA */ + +#define GDMA_MESSAGE_V1 1 + +struct gdma_general_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +#define GDMA_STANDARD_HEADER_TYPE 0 + +static inline void mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, u32 code, + u32 req_size, u32 resp_size) +{ + hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->req.msg_type = code; + hdr->req.msg_version = GDMA_MESSAGE_V1; + hdr->req.msg_size = req_size; + + hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->resp.msg_type = code; + hdr->resp.msg_version = GDMA_MESSAGE_V1; + hdr->resp.msg_size = resp_size; +} + +/* The 16-byte struct is part of the GDMA work queue entry (WQE). */ +struct gdma_sge { + u64 address; + u32 mem_key; + u32 size; +}; /* HW DATA */ + +struct gdma_wqe_request { + struct gdma_sge *sgl; + u32 num_sge; + + u32 inline_oob_size; + const void *inline_oob_data; + + u32 flags; + u32 client_data_unit; +}; + +enum gdma_page_type { + GDMA_PAGE_TYPE_4K, +}; + +#define GDMA_INVALID_DMA_REGION 0 + +struct gdma_mem_info { + struct device *dev; + + dma_addr_t dma_handle; + void *virt_addr; + u64 length; + + /* Allocated by the PF driver */ + gdma_obj_handle_t dma_region_handle; +}; + +#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 + +struct gdma_dev { + struct gdma_context *gdma_context; + + struct gdma_dev_id dev_id; + + u32 pdid; + u32 doorbell; + u32 gpa_mkey; + + /* GDMA driver specific pointer */ + void *driver_data; + + struct auxiliary_device *adev; +}; + +#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE + +#define GDMA_CQE_SIZE 64 +#define GDMA_EQE_SIZE 16 +#define GDMA_MAX_SQE_SIZE 512 +#define GDMA_MAX_RQE_SIZE 256 + +#define GDMA_COMP_DATA_SIZE 0x3C + +#define GDMA_EVENT_DATA_SIZE 0xC + +/* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */ +#define GDMA_WQE_BU_SIZE 32 + +#define INVALID_PDID UINT_MAX +#define INVALID_DOORBELL UINT_MAX +#define INVALID_MEM_KEY UINT_MAX +#define INVALID_QUEUE_ID UINT_MAX +#define INVALID_PCI_MSIX_INDEX UINT_MAX + +struct gdma_comp { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + u32 wq_num; + bool is_sq; +}; + +struct gdma_event { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u8 type; +}; + +struct gdma_queue; + +struct mana_eq { + struct gdma_queue *eq; +}; + +typedef void gdma_eq_callback(void *context, struct gdma_queue *q, + struct gdma_event *e); + +typedef void gdma_cq_callback(void *context, struct gdma_queue *q); + +/* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE + * (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the + * driver increases the 'head' in BUs rather than in bytes, and notifies + * the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track + * the HW head, and increases the 'head' by 1 for every processed EQE/CQE. + * + * The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is + * processed, the driver increases the 'tail' to indicate that WQEs have + * been consumed by the HW, so the driver can post new WQEs into the SQ/RQ. + * + * The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures + * that the EQ/CQ is big enough so they can't overflow, and the driver uses + * the owner bits mechanism to detect if the queue has become empty. + */ +struct gdma_queue { + struct gdma_dev *gdma_dev; + + enum gdma_queue_type type; + u32 id; + + struct gdma_mem_info mem_info; + + void *queue_mem_ptr; + u32 queue_size; + + bool monitor_avl_buf; + + u32 head; + u32 tail; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + bool disable_needed; + + gdma_eq_callback *callback; + void *context; + + unsigned int msix_index; + + u32 log2_throttle_limit; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent; /* For CQ/EQ relationship */ + } cq; + }; +}; + +struct gdma_queue_spec { + enum gdma_queue_type type; + bool monitor_avl_buf; + unsigned int queue_size; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + gdma_eq_callback *callback; + void *context; + + unsigned long log2_throttle_limit; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent_eq; + + } cq; + }; +}; + +struct gdma_irq_context { + void (*handler)(void *arg); + void *arg; +}; + +struct gdma_context { + struct device *dev; + + /* Per-vPort max number of queues */ + unsigned int max_num_queues; + unsigned int max_num_msix; + unsigned int num_msix_usable; + struct gdma_resource msix_resource; + struct gdma_irq_context *irq_contexts; + + /* This maps a CQ index to the queue structure. */ + unsigned int max_num_cqs; + struct gdma_queue **cq_table; + + /* Protect eq_test_event and test_event_eq_id */ + struct mutex eq_test_event_mutex; + struct completion eq_test_event; + u32 test_event_eq_id; + + bool is_pf; + phys_addr_t bar0_pa; + void __iomem *bar0_va; + void __iomem *shm_base; + void __iomem *db_page_base; + phys_addr_t phys_db_page_base; + u32 db_page_size; + int numa_node; + + /* Shared memory chanenl (used to bootstrap HWC) */ + struct shm_channel shm_channel; + + /* Hardware communication channel (HWC) */ + struct gdma_dev hwc; + + /* Azure network adapter */ + struct gdma_dev mana; +}; + +#define MAX_NUM_GDMA_DEVICES 4 + +static inline bool mana_gd_is_mana(struct gdma_dev *gd) +{ + return gd->dev_id.type == GDMA_DEVICE_MANA; +} + +static inline bool mana_gd_is_hwc(struct gdma_dev *gd) +{ + return gd->dev_id.type == GDMA_DEVICE_HWC; +} + +u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset); +u32 mana_gd_wq_avail_space(struct gdma_queue *wq); + +int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq); + +int mana_gd_create_hwc_queue(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +int mana_gd_create_mana_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue); + +int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe); + +void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit); + +struct gdma_wqe { + u32 reserved :24; + u32 last_vbytes :8; + + union { + u32 flags; + + struct { + u32 num_sge :8; + u32 inline_oob_size_div4:3; + u32 client_oob_in_sgl :1; + u32 reserved1 :4; + u32 client_data_unit :14; + u32 reserved2 :2; + }; + }; +}; /* HW DATA */ + +#define INLINE_OOB_SMALL_SIZE 8 +#define INLINE_OOB_LARGE_SIZE 24 + +#define MAX_TX_WQE_SIZE 512 +#define MAX_RX_WQE_SIZE 256 + +#define MAX_TX_WQE_SGL_ENTRIES ((GDMA_MAX_SQE_SIZE - \ + sizeof(struct gdma_sge) - INLINE_OOB_SMALL_SIZE) / \ + sizeof(struct gdma_sge)) + +#define MAX_RX_WQE_SGL_ENTRIES ((GDMA_MAX_RQE_SIZE - \ + sizeof(struct gdma_sge)) / sizeof(struct gdma_sge)) + +struct gdma_cqe { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + + union { + u32 as_uint32; + + struct { + u32 wq_num : 24; + u32 is_sq : 1; + u32 reserved : 4; + u32 owner_bits : 3; + }; + } cqe_info; +}; /* HW DATA */ + +#define GDMA_CQE_OWNER_BITS 3 + +#define GDMA_CQE_OWNER_MASK ((1 << GDMA_CQE_OWNER_BITS) - 1) + +#define SET_ARM_BIT 1 + +#define GDMA_EQE_OWNER_BITS 3 + +union gdma_eqe_info { + u32 as_uint32; + + struct { + u32 type : 8; + u32 reserved1 : 8; + u32 client_id : 2; + u32 reserved2 : 11; + u32 owner_bits : 3; + }; +}; /* HW DATA */ + +#define GDMA_EQE_OWNER_MASK ((1 << GDMA_EQE_OWNER_BITS) - 1) +#define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries)) + +struct gdma_eqe { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u32 eqe_info; +}; /* HW DATA */ + +#define GDMA_REG_DB_PAGE_OFFSET 8 +#define GDMA_REG_DB_PAGE_SIZE 0x10 +#define GDMA_REG_SHM_OFFSET 0x18 + +#define GDMA_PF_REG_DB_PAGE_SIZE 0xD0 +#define GDMA_PF_REG_DB_PAGE_OFF 0xC8 +#define GDMA_PF_REG_SHM_OFF 0x70 + +#define GDMA_SRIOV_REG_CFG_BASE_OFF 0x108 + +#define MANA_PF_DEVICE_ID 0x00B9 +#define MANA_VF_DEVICE_ID 0x00BA + +struct gdma_posted_wqe_info { + u32 wqe_size_in_bu; +}; + +/* GDMA_GENERATE_TEST_EQE */ +struct gdma_generate_test_event_req { + struct gdma_req_hdr hdr; + u32 queue_index; +}; /* HW DATA */ + +/* GDMA_VERIFY_VF_DRIVER_VERSION */ +enum { + GDMA_PROTOCOL_V1 = 1, + GDMA_PROTOCOL_FIRST = GDMA_PROTOCOL_V1, + GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1, +}; + +#define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0) + +#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT + +#define GDMA_DRV_CAP_FLAGS2 0 + +#define GDMA_DRV_CAP_FLAGS3 0 + +#define GDMA_DRV_CAP_FLAGS4 0 + +struct gdma_verify_ver_req { + struct gdma_req_hdr hdr; + + /* Mandatory fields required for protocol establishment */ + u64 protocol_ver_min; + u64 protocol_ver_max; + + /* Gdma Driver Capability Flags */ + u64 gd_drv_cap_flags1; + u64 gd_drv_cap_flags2; + u64 gd_drv_cap_flags3; + u64 gd_drv_cap_flags4; + + /* Advisory fields */ + u64 drv_ver; + u32 os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */ + u32 reserved; + u32 os_ver_major; + u32 os_ver_minor; + u32 os_ver_build; + u32 os_ver_platform; + u64 reserved_2; + u8 os_ver_str1[128]; + u8 os_ver_str2[128]; + u8 os_ver_str3[128]; + u8 os_ver_str4[128]; +}; /* HW DATA */ + +struct gdma_verify_ver_resp { + struct gdma_resp_hdr hdr; + u64 gdma_protocol_ver; + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; +}; /* HW DATA */ + +/* GDMA_QUERY_MAX_RESOURCES */ +struct gdma_query_max_resources_resp { + struct gdma_resp_hdr hdr; + u32 status; + u32 max_sq; + u32 max_rq; + u32 max_cq; + u32 max_eq; + u32 max_db; + u32 max_mst; + u32 max_cq_mod_ctx; + u32 max_mod_cq; + u32 max_msix; +}; /* HW DATA */ + +/* GDMA_LIST_DEVICES */ +struct gdma_list_devices_resp { + struct gdma_resp_hdr hdr; + u32 num_of_devs; + u32 reserved; + struct gdma_dev_id devs[64]; +}; /* HW DATA */ + +/* GDMA_REGISTER_DEVICE */ +struct gdma_register_device_resp { + struct gdma_resp_hdr hdr; + u32 pdid; + u32 gpa_mkey; + u32 db_id; +}; /* HW DATA */ + +struct gdma_allocate_resource_range_req { + struct gdma_req_hdr hdr; + u32 resource_type; + u32 num_resources; + u32 alignment; + u32 allocated_resources; +}; + +struct gdma_allocate_resource_range_resp { + struct gdma_resp_hdr hdr; + u32 allocated_resources; +}; + +struct gdma_destroy_resource_range_req { + struct gdma_req_hdr hdr; + u32 resource_type; + u32 num_resources; + u32 allocated_resources; +}; + +/* GDMA_CREATE_QUEUE */ +struct gdma_create_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 reserved1; + u32 pdid; + u32 doolbell_id; + gdma_obj_handle_t gdma_region; + u32 reserved2; + u32 queue_size; + u32 log2_throttle_limit; + u32 eq_pci_msix_index; + u32 cq_mod_ctx_id; + u32 cq_parent_eq_id; + u8 rq_drop_on_overrun; + u8 rq_err_on_wqe_overflow; + u8 rq_chain_rec_wqes; + u8 sq_hw_db; + u32 reserved3; +}; /* HW DATA */ + +struct gdma_create_queue_resp { + struct gdma_resp_hdr hdr; + u32 queue_index; +}; /* HW DATA */ + +/* GDMA_DISABLE_QUEUE */ +struct gdma_disable_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 queue_index; + u32 alloc_res_id_on_creation; +}; /* HW DATA */ + +enum atb_page_size { + ATB_PAGE_SIZE_4K, + ATB_PAGE_SIZE_8K, + ATB_PAGE_SIZE_16K, + ATB_PAGE_SIZE_32K, + ATB_PAGE_SIZE_64K, + ATB_PAGE_SIZE_128K, + ATB_PAGE_SIZE_256K, + ATB_PAGE_SIZE_512K, + ATB_PAGE_SIZE_1M, + ATB_PAGE_SIZE_2M, + ATB_PAGE_SIZE_MAX, +}; + +enum gdma_mr_access_flags { + GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0), + GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1), + GDMA_ACCESS_FLAG_REMOTE_READ = BIT_ULL(2), + GDMA_ACCESS_FLAG_REMOTE_WRITE = BIT_ULL(3), + GDMA_ACCESS_FLAG_REMOTE_ATOMIC = BIT_ULL(4), +}; + +/* GDMA_CREATE_DMA_REGION */ +struct gdma_create_dma_region_req { + struct gdma_req_hdr hdr; + + /* The total size of the DMA region */ + u64 length; + + /* The offset in the first page */ + u32 offset_in_page; + + /* enum gdma_page_type */ + u32 gdma_page_type; + + /* The total number of pages */ + u32 page_count; + + /* If page_addr_list_len is smaller than page_count, + * the remaining page addresses will be added via the + * message GDMA_DMA_REGION_ADD_PAGES. + */ + u32 page_addr_list_len; + u64 page_addr_list[]; +}; /* HW DATA */ + +struct gdma_create_dma_region_resp { + struct gdma_resp_hdr hdr; + gdma_obj_handle_t dma_region_handle; +}; /* HW DATA */ + +/* GDMA_DMA_REGION_ADD_PAGES */ +struct gdma_dma_region_add_pages_req { + struct gdma_req_hdr hdr; + + gdma_obj_handle_t dma_region_handle; + + u32 page_addr_list_len; + u32 reserved3; + + u64 page_addr_list[]; +}; /* HW DATA */ + +/* GDMA_DESTROY_DMA_REGION */ +struct gdma_destroy_dma_region_req { + struct gdma_req_hdr hdr; + + gdma_obj_handle_t dma_region_handle; +}; /* HW DATA */ + +enum gdma_pd_flags { + GDMA_PD_FLAG_INVALID = 0, +}; + +struct gdma_create_pd_req { + struct gdma_req_hdr hdr; + enum gdma_pd_flags flags; + u32 reserved; +};/* HW DATA */ + +struct gdma_create_pd_resp { + struct gdma_resp_hdr hdr; + gdma_obj_handle_t pd_handle; + u32 pd_id; + u32 reserved; +};/* HW DATA */ + +struct gdma_destroy_pd_req { + struct gdma_req_hdr hdr; + gdma_obj_handle_t pd_handle; +};/* HW DATA */ + +struct gdma_destory_pd_resp { + struct gdma_resp_hdr hdr; +};/* HW DATA */ + +enum gdma_mr_type { + /* Guest Virtual Address - MRs of this type allow access + * to memory mapped by PTEs associated with this MR using a virtual + * address that is set up in the MST + */ + GDMA_MR_TYPE_GVA = 2, +}; + +struct gdma_create_mr_params { + gdma_obj_handle_t pd_handle; + enum gdma_mr_type mr_type; + union { + struct { + gdma_obj_handle_t dma_region_handle; + u64 virtual_address; + enum gdma_mr_access_flags access_flags; + } gva; + }; +}; + +struct gdma_create_mr_request { + struct gdma_req_hdr hdr; + gdma_obj_handle_t pd_handle; + enum gdma_mr_type mr_type; + u32 reserved_1; + + union { + struct { + gdma_obj_handle_t dma_region_handle; + u64 virtual_address; + enum gdma_mr_access_flags access_flags; + } gva; + + }; + u32 reserved_2; +};/* HW DATA */ + +struct gdma_create_mr_response { + struct gdma_resp_hdr hdr; + gdma_obj_handle_t mr_handle; + u32 lkey; + u32 rkey; +};/* HW DATA */ + +struct gdma_destroy_mr_request { + struct gdma_req_hdr hdr; + gdma_obj_handle_t mr_handle; +};/* HW DATA */ + +struct gdma_destroy_mr_response { + struct gdma_resp_hdr hdr; +};/* HW DATA */ + +int mana_gd_verify_vf_version(struct pci_dev *pdev); + +int mana_gd_register_device(struct gdma_dev *gd); +int mana_gd_deregister_device(struct gdma_dev *gd); + +int mana_gd_post_work_request(struct gdma_queue *wq, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info); + +int mana_gd_post_and_ring(struct gdma_queue *queue, + const struct gdma_wqe_request *wqe, + struct gdma_posted_wqe_info *wqe_info); + +int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r); +void mana_gd_free_res_map(struct gdma_resource *r); + +void mana_gd_wq_ring_doorbell(struct gdma_context *gc, + struct gdma_queue *queue); + +int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, + struct gdma_mem_info *gmi); + +void mana_gd_free_memory(struct gdma_mem_info *gmi); + +int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + u32 resp_len, void *resp); + +int mana_gd_destroy_dma_region(struct gdma_context *gc, + gdma_obj_handle_t dma_region_handle); + +#endif /* _GDMA_H */ diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h new file mode 100644 index 000000000000..6a757a6e2732 --- /dev/null +++ b/include/net/mana/hw_channel.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _HW_CHANNEL_H +#define _HW_CHANNEL_H + +#define DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ 4 + +#define HW_CHANNEL_MAX_REQUEST_SIZE 0x1000 +#define HW_CHANNEL_MAX_RESPONSE_SIZE 0x1000 + +#define HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH 1 + +#define HWC_INIT_DATA_CQID 1 +#define HWC_INIT_DATA_RQID 2 +#define HWC_INIT_DATA_SQID 3 +#define HWC_INIT_DATA_QUEUE_DEPTH 4 +#define HWC_INIT_DATA_MAX_REQUEST 5 +#define HWC_INIT_DATA_MAX_RESPONSE 6 +#define HWC_INIT_DATA_MAX_NUM_CQS 7 +#define HWC_INIT_DATA_PDID 8 +#define HWC_INIT_DATA_GPA_MKEY 9 +#define HWC_INIT_DATA_PF_DEST_RQ_ID 10 +#define HWC_INIT_DATA_PF_DEST_CQ_ID 11 + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +union hwc_init_eq_id_db { + u32 as_uint32; + + struct { + u32 eq_id : 16; + u32 doorbell : 16; + }; +}; /* HW DATA */ + +union hwc_init_type_data { + u32 as_uint32; + + struct { + u32 value : 24; + u32 type : 8; + }; +}; /* HW DATA */ + +struct hwc_rx_oob { + u32 type : 6; + u32 eom : 1; + u32 som : 1; + u32 vendor_err : 8; + u32 reserved1 : 16; + + u32 src_virt_wq : 24; + u32 src_vfid : 8; + + u32 reserved2; + + union { + u32 wqe_addr_low; + u32 wqe_offset; + }; + + u32 wqe_addr_high; + + u32 client_data_unit : 14; + u32 reserved3 : 18; + + u32 tx_oob_data_size; + + u32 chunk_offset : 21; + u32 reserved4 : 11; +}; /* HW DATA */ + +struct hwc_tx_oob { + u32 reserved1; + + u32 reserved2; + + u32 vrq_id : 24; + u32 dest_vfid : 8; + + u32 vrcq_id : 24; + u32 reserved3 : 8; + + u32 vscq_id : 24; + u32 loopback : 1; + u32 lso_override: 1; + u32 dest_pf : 1; + u32 reserved4 : 5; + + u32 vsq_id : 24; + u32 reserved5 : 8; +}; /* HW DATA */ + +struct hwc_work_request { + void *buf_va; + void *buf_sge_addr; + u32 buf_len; + u32 msg_size; + + struct gdma_wqe_request wqe_req; + struct hwc_tx_oob tx_oob; + + struct gdma_sge sge; +}; + +/* hwc_dma_buf represents the array of in-flight WQEs. + * mem_info as know as the GDMA mapped memory is partitioned and used by + * in-flight WQEs. + * The number of WQEs is determined by the number of in-flight messages. + */ +struct hwc_dma_buf { + struct gdma_mem_info mem_info; + + u32 gpa_mkey; + + u32 num_reqs; + struct hwc_work_request reqs[]; +}; + +typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id, + const struct hwc_rx_oob *rx_oob); + +typedef void hwc_tx_event_handler_t(void *ctx, u32 gdma_txq_id, + const struct hwc_rx_oob *rx_oob); + +struct hwc_cq { + struct hw_channel_context *hwc; + + struct gdma_queue *gdma_cq; + struct gdma_queue *gdma_eq; + struct gdma_comp *comp_buf; + u16 queue_depth; + + hwc_rx_event_handler_t *rx_event_handler; + void *rx_event_ctx; + + hwc_tx_event_handler_t *tx_event_handler; + void *tx_event_ctx; +}; + +struct hwc_wq { + struct hw_channel_context *hwc; + + struct gdma_queue *gdma_wq; + struct hwc_dma_buf *msg_buf; + u16 queue_depth; + + struct hwc_cq *hwc_cq; +}; + +struct hwc_caller_ctx { + struct completion comp_event; + void *output_buf; + u32 output_buflen; + + u32 error; /* Linux error code */ + u32 status_code; +}; + +struct hw_channel_context { + struct gdma_dev *gdma_dev; + struct device *dev; + + u16 num_inflight_msg; + u32 max_req_msg_size; + + u16 hwc_init_q_depth_max; + u32 hwc_init_max_req_msg_size; + u32 hwc_init_max_resp_msg_size; + + struct completion hwc_init_eqe_comp; + + struct hwc_wq *rxq; + struct hwc_wq *txq; + struct hwc_cq *cq; + + struct semaphore sema; + struct gdma_resource inflight_msg_res; + + u32 pf_dest_vrq_id; + u32 pf_dest_vrcq_id; + + struct hwc_caller_ctx *caller_ctx; +}; + +int mana_hwc_create_channel(struct gdma_context *gc); +void mana_hwc_destroy_channel(struct gdma_context *gc); + +int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + const void *req, u32 resp_len, void *resp); + +#endif /* _HW_CHANNEL_H */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h new file mode 100644 index 000000000000..575ea36ce606 --- /dev/null +++ b/include/net/mana/mana.h @@ -0,0 +1,648 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _MANA_H +#define _MANA_H + +#include "gdma.h" +#include "hw_channel.h" + +/* Microsoft Azure Network Adapter (MANA)'s definitions + * + * Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +/* MANA protocol version */ +#define MANA_MAJOR_VERSION 0 +#define MANA_MINOR_VERSION 1 +#define MANA_MICRO_VERSION 1 + +typedef u64 mana_handle_t; +#define INVALID_MANA_HANDLE ((mana_handle_t)-1) + +enum TRI_STATE { + TRI_STATE_UNKNOWN = -1, + TRI_STATE_FALSE = 0, + TRI_STATE_TRUE = 1 +}; + +/* Number of entries for hardware indirection table must be in power of 2 */ +#define MANA_INDIRECT_TABLE_SIZE 64 +#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) + +/* The Toeplitz hash key's length in bytes: should be multiple of 8 */ +#define MANA_HASH_KEY_SIZE 40 + +#define COMP_ENTRY_SIZE 64 + +#define ADAPTER_MTU_SIZE 1500 +#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14) + +#define RX_BUFFERS_PER_QUEUE 512 + +#define MAX_SEND_BUFFERS_PER_QUEUE 256 + +#define EQ_SIZE (8 * PAGE_SIZE) +#define LOG2_EQ_THROTTLE 3 + +#define MAX_PORTS_IN_MANA_DEV 256 + +struct mana_stats_rx { + u64 packets; + u64 bytes; + u64 xdp_drop; + u64 xdp_tx; + u64 xdp_redirect; + struct u64_stats_sync syncp; +}; + +struct mana_stats_tx { + u64 packets; + u64 bytes; + u64 xdp_xmit; + struct u64_stats_sync syncp; +}; + +struct mana_txq { + struct gdma_queue *gdma_sq; + + union { + u32 gdma_txq_id; + struct { + u32 reserved1 : 10; + u32 vsq_frame : 14; + u32 reserved2 : 8; + }; + }; + + u16 vp_offset; + + struct net_device *ndev; + + /* The SKBs are sent to the HW and we are waiting for the CQEs. */ + struct sk_buff_head pending_skbs; + struct netdev_queue *net_txq; + + atomic_t pending_sends; + + struct mana_stats_tx stats; +}; + +/* skb data and frags dma mappings */ +struct mana_skb_head { + dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; + + u32 size[MAX_SKB_FRAGS + 1]; +}; + +#define MANA_HEADROOM sizeof(struct mana_skb_head) + +enum mana_tx_pkt_format { + MANA_SHORT_PKT_FMT = 0, + MANA_LONG_PKT_FMT = 1, +}; + +struct mana_tx_short_oob { + u32 pkt_fmt : 2; + u32 is_outer_ipv4 : 1; + u32 is_outer_ipv6 : 1; + u32 comp_iphdr_csum : 1; + u32 comp_tcp_csum : 1; + u32 comp_udp_csum : 1; + u32 supress_txcqe_gen : 1; + u32 vcq_num : 24; + + u32 trans_off : 10; /* Transport header offset */ + u32 vsq_frame : 14; + u32 short_vp_offset : 8; +}; /* HW DATA */ + +struct mana_tx_long_oob { + u32 is_encap : 1; + u32 inner_is_ipv6 : 1; + u32 inner_tcp_opt : 1; + u32 inject_vlan_pri_tag : 1; + u32 reserved1 : 12; + u32 pcp : 3; /* 802.1Q */ + u32 dei : 1; /* 802.1Q */ + u32 vlan_id : 12; /* 802.1Q */ + + u32 inner_frame_offset : 10; + u32 inner_ip_rel_offset : 6; + u32 long_vp_offset : 12; + u32 reserved2 : 4; + + u32 reserved3; + u32 reserved4; +}; /* HW DATA */ + +struct mana_tx_oob { + struct mana_tx_short_oob s_oob; + struct mana_tx_long_oob l_oob; +}; /* HW DATA */ + +enum mana_cq_type { + MANA_CQ_TYPE_RX, + MANA_CQ_TYPE_TX, +}; + +enum mana_cqe_type { + CQE_INVALID = 0, + CQE_RX_OKAY = 1, + CQE_RX_COALESCED_4 = 2, + CQE_RX_OBJECT_FENCE = 3, + CQE_RX_TRUNCATED = 4, + + CQE_TX_OKAY = 32, + CQE_TX_SA_DROP = 33, + CQE_TX_MTU_DROP = 34, + CQE_TX_INVALID_OOB = 35, + CQE_TX_INVALID_ETH_TYPE = 36, + CQE_TX_HDR_PROCESSING_ERROR = 37, + CQE_TX_VF_DISABLED = 38, + CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39, + CQE_TX_VPORT_DISABLED = 40, + CQE_TX_VLAN_TAGGING_VIOLATION = 41, +}; + +#define MANA_CQE_COMPLETION 1 + +struct mana_cqe_header { + u32 cqe_type : 6; + u32 client_type : 2; + u32 vendor_err : 24; +}; /* HW DATA */ + +/* NDIS HASH Types */ +#define NDIS_HASH_IPV4 BIT(0) +#define NDIS_HASH_TCP_IPV4 BIT(1) +#define NDIS_HASH_UDP_IPV4 BIT(2) +#define NDIS_HASH_IPV6 BIT(3) +#define NDIS_HASH_TCP_IPV6 BIT(4) +#define NDIS_HASH_UDP_IPV6 BIT(5) +#define NDIS_HASH_IPV6_EX BIT(6) +#define NDIS_HASH_TCP_IPV6_EX BIT(7) +#define NDIS_HASH_UDP_IPV6_EX BIT(8) + +#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX) +#define MANA_HASH_L4 \ + (NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \ + NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX) + +struct mana_rxcomp_perpkt_info { + u32 pkt_len : 16; + u32 reserved1 : 16; + u32 reserved2; + u32 pkt_hash; +}; /* HW DATA */ + +#define MANA_RXCOMP_OOB_NUM_PPI 4 + +/* Receive completion OOB */ +struct mana_rxcomp_oob { + struct mana_cqe_header cqe_hdr; + + u32 rx_vlan_id : 12; + u32 rx_vlantag_present : 1; + u32 rx_outer_iphdr_csum_succeed : 1; + u32 rx_outer_iphdr_csum_fail : 1; + u32 reserved1 : 1; + u32 rx_hashtype : 9; + u32 rx_iphdr_csum_succeed : 1; + u32 rx_iphdr_csum_fail : 1; + u32 rx_tcp_csum_succeed : 1; + u32 rx_tcp_csum_fail : 1; + u32 rx_udp_csum_succeed : 1; + u32 rx_udp_csum_fail : 1; + u32 reserved2 : 1; + + struct mana_rxcomp_perpkt_info ppi[MANA_RXCOMP_OOB_NUM_PPI]; + + u32 rx_wqe_offset; +}; /* HW DATA */ + +struct mana_tx_comp_oob { + struct mana_cqe_header cqe_hdr; + + u32 tx_data_offset; + + u32 tx_sgl_offset : 5; + u32 tx_wqe_offset : 27; + + u32 reserved[12]; +}; /* HW DATA */ + +struct mana_rxq; + +#define CQE_POLLING_BUFFER 512 + +struct mana_cq { + struct gdma_queue *gdma_cq; + + /* Cache the CQ id (used to verify if each CQE comes to the right CQ. */ + u32 gdma_id; + + /* Type of the CQ: TX or RX */ + enum mana_cq_type type; + + /* Pointer to the mana_rxq that is pushing RX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_RX. + */ + struct mana_rxq *rxq; + + /* Pointer to the mana_txq that is pushing TX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_TX. + */ + struct mana_txq *txq; + + /* Buffer which the CQ handler can copy the CQE's into. */ + struct gdma_comp gdma_comp_buf[CQE_POLLING_BUFFER]; + + /* NAPI data */ + struct napi_struct napi; + int work_done; + int budget; +}; + +struct mana_recv_buf_oob { + /* A valid GDMA work request representing the data buffer. */ + struct gdma_wqe_request wqe_req; + + void *buf_va; + dma_addr_t buf_dma_addr; + + /* SGL of the buffer going to be sent has part of the work request. */ + u32 num_sge; + struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES]; + + /* Required to store the result of mana_gd_post_work_request. + * gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the + * work queue when the WQE is consumed. + */ + struct gdma_posted_wqe_info wqe_inf; +}; + +struct mana_rxq { + struct gdma_queue *gdma_rq; + /* Cache the gdma receive queue id */ + u32 gdma_id; + + /* Index of RQ in the vPort, not gdma receive queue id */ + u32 rxq_idx; + + u32 datasize; + + mana_handle_t rxobj; + + struct mana_cq rx_cq; + + struct completion fence_event; + + struct net_device *ndev; + + /* Total number of receive buffers to be allocated */ + u32 num_rx_buf; + + u32 buf_index; + + struct mana_stats_rx stats; + + struct bpf_prog __rcu *bpf_prog; + struct xdp_rxq_info xdp_rxq; + struct page *xdp_save_page; + bool xdp_flush; + int xdp_rc; /* XDP redirect return code */ + + /* MUST BE THE LAST MEMBER: + * Each receive buffer has an associated mana_recv_buf_oob. + */ + struct mana_recv_buf_oob rx_oobs[]; +}; + +struct mana_tx_qp { + struct mana_txq txq; + + struct mana_cq tx_cq; + + mana_handle_t tx_object; +}; + +struct mana_ethtool_stats { + u64 stop_queue; + u64 wake_queue; +}; + +struct mana_context { + struct gdma_dev *gdma_dev; + + u16 num_ports; + + struct mana_eq *eqs; + + struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; +}; + +struct mana_port_context { + struct mana_context *ac; + struct net_device *ndev; + + u8 mac_addr[ETH_ALEN]; + + enum TRI_STATE rss_state; + + mana_handle_t default_rxobj; + bool tx_shortform_allowed; + u16 tx_vp_offset; + + struct mana_tx_qp *tx_qp; + + /* Indirection Table for RX & TX. The values are queue indexes */ + u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Indirection table containing RxObject Handles */ + mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Hash key used by the NIC */ + u8 hashkey[MANA_HASH_KEY_SIZE]; + + /* This points to an array of num_queues of RQ pointers. */ + struct mana_rxq **rxqs; + + struct bpf_prog *bpf_prog; + + /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */ + unsigned int max_queues; + unsigned int num_queues; + + mana_handle_t port_handle; + mana_handle_t pf_filter_handle; + + /* Mutex for sharing access to vport_use_count */ + struct mutex vport_mutex; + int vport_use_count; + + u16 port_idx; + + bool port_is_up; + bool port_st_save; /* Saved port state */ + + struct mana_ethtool_stats eth_stats; +}; + +netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); +int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, + bool update_hash, bool update_tab); + +int mana_alloc_queues(struct net_device *ndev); +int mana_attach(struct net_device *ndev); +int mana_detach(struct net_device *ndev, bool from_close); + +int mana_probe(struct gdma_dev *gd, bool resuming); +void mana_remove(struct gdma_dev *gd, bool suspending); + +void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev); +int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames, + u32 flags); +u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, + struct xdp_buff *xdp, void *buf_va, uint pkt_len); +struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); +void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); +int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); + +extern const struct ethtool_ops mana_ethtool_ops; + +struct mana_obj_spec { + u32 queue_index; + u64 gdma_region; + u32 queue_size; + u32 attached_eq; + u32 modr_ctx_id; +}; + +enum mana_command_code { + MANA_QUERY_DEV_CONFIG = 0x20001, + MANA_QUERY_GF_STAT = 0x20002, + MANA_CONFIG_VPORT_TX = 0x20003, + MANA_CREATE_WQ_OBJ = 0x20004, + MANA_DESTROY_WQ_OBJ = 0x20005, + MANA_FENCE_RQ = 0x20006, + MANA_CONFIG_VPORT_RX = 0x20007, + MANA_QUERY_VPORT_CONFIG = 0x20008, + + /* Privileged commands for the PF mode */ + MANA_REGISTER_FILTER = 0x28000, + MANA_DEREGISTER_FILTER = 0x28001, + MANA_REGISTER_HW_PORT = 0x28003, + MANA_DEREGISTER_HW_PORT = 0x28004, +}; + +/* Query Device Configuration */ +struct mana_query_device_cfg_req { + struct gdma_req_hdr hdr; + + /* MANA Nic Driver Capability flags */ + u64 mn_drv_cap_flags1; + u64 mn_drv_cap_flags2; + u64 mn_drv_cap_flags3; + u64 mn_drv_cap_flags4; + + u32 proto_major_ver; + u32 proto_minor_ver; + u32 proto_micro_ver; + + u32 reserved; +}; /* HW DATA */ + +struct mana_query_device_cfg_resp { + struct gdma_resp_hdr hdr; + + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; + + u16 max_num_vports; + u16 reserved; + u32 max_num_eqs; +}; /* HW DATA */ + +/* Query vPort Configuration */ +struct mana_query_vport_cfg_req { + struct gdma_req_hdr hdr; + u32 vport_index; +}; /* HW DATA */ + +struct mana_query_vport_cfg_resp { + struct gdma_resp_hdr hdr; + u32 max_num_sq; + u32 max_num_rq; + u32 num_indirection_ent; + u32 reserved1; + u8 mac_addr[6]; + u8 reserved2[2]; + mana_handle_t vport; +}; /* HW DATA */ + +/* Configure vPort */ +struct mana_config_vport_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 pdid; + u32 doorbell_pageid; +}; /* HW DATA */ + +struct mana_config_vport_resp { + struct gdma_resp_hdr hdr; + u16 tx_vport_offset; + u8 short_form_allowed; + u8 reserved; +}; /* HW DATA */ + +/* Create WQ Object */ +struct mana_create_wqobj_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 wq_type; + u32 reserved; + u64 wq_gdma_region; + u64 cq_gdma_region; + u32 wq_size; + u32 cq_size; + u32 cq_moderation_ctx_id; + u32 cq_parent_qid; +}; /* HW DATA */ + +struct mana_create_wqobj_resp { + struct gdma_resp_hdr hdr; + u32 wq_id; + u32 cq_id; + mana_handle_t wq_obj; +}; /* HW DATA */ + +/* Destroy WQ Object */ +struct mana_destroy_wqobj_req { + struct gdma_req_hdr hdr; + u32 wq_type; + u32 reserved; + mana_handle_t wq_obj_handle; +}; /* HW DATA */ + +struct mana_destroy_wqobj_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Fence RQ */ +struct mana_fence_rq_req { + struct gdma_req_hdr hdr; + mana_handle_t wq_obj_handle; +}; /* HW DATA */ + +struct mana_fence_rq_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Configure vPort Rx Steering */ +struct mana_cfg_rx_steer_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u16 num_indir_entries; + u16 indir_tab_offset; + u32 rx_enable; + u32 rss_enable; + u8 update_default_rxobj; + u8 update_hashkey; + u8 update_indir_tab; + u8 reserved; + mana_handle_t default_rxobj; + u8 hashkey[MANA_HASH_KEY_SIZE]; +}; /* HW DATA */ + +struct mana_cfg_rx_steer_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Register HW vPort */ +struct mana_register_hw_vport_req { + struct gdma_req_hdr hdr; + u16 attached_gfid; + u8 is_pf_default_vport; + u8 reserved1; + u8 allow_all_ether_types; + u8 reserved2; + u8 reserved3; + u8 reserved4; +}; /* HW DATA */ + +struct mana_register_hw_vport_resp { + struct gdma_resp_hdr hdr; + mana_handle_t hw_vport_handle; +}; /* HW DATA */ + +/* Deregister HW vPort */ +struct mana_deregister_hw_vport_req { + struct gdma_req_hdr hdr; + mana_handle_t hw_vport_handle; +}; /* HW DATA */ + +struct mana_deregister_hw_vport_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Register filter */ +struct mana_register_filter_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u8 mac_addr[6]; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 reserved4; + u16 reserved5; + u32 reserved6; + u32 reserved7; + u32 reserved8; +}; /* HW DATA */ + +struct mana_register_filter_resp { + struct gdma_resp_hdr hdr; + mana_handle_t filter_handle; +}; /* HW DATA */ + +/* Deregister filter */ +struct mana_deregister_filter_req { + struct gdma_req_hdr hdr; + mana_handle_t filter_handle; +}; /* HW DATA */ + +struct mana_deregister_filter_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +#define MANA_MAX_NUM_QUEUES 64 + +#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) + +struct mana_tx_package { + struct gdma_wqe_request wqe_req; + struct gdma_sge sgl_array[5]; + struct gdma_sge *sgl_ptr; + + struct mana_tx_oob tx_oob; + + struct gdma_posted_wqe_info wqe_info; +}; + +int mana_create_wq_obj(struct mana_port_context *apc, + mana_handle_t vport, + u32 wq_type, struct mana_obj_spec *wq_spec, + struct mana_obj_spec *cq_spec, + mana_handle_t *wq_obj); + +void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, + mana_handle_t wq_obj); + +int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, + u32 doorbell_pg_id); +void mana_uncfg_vport(struct mana_port_context *apc); +#endif /* _MANA_H */ diff --git a/include/net/mana/mana_auxiliary.h b/include/net/mana/mana_auxiliary.h new file mode 100644 index 000000000000..373d59756846 --- /dev/null +++ b/include/net/mana/mana_auxiliary.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2022, Microsoft Corporation. */ + +#include "mana.h" +#include <linux/auxiliary_bus.h> + +struct mana_adev { + struct auxiliary_device adev; + struct gdma_dev *mdev; +}; diff --git a/include/net/mana/shm_channel.h b/include/net/mana/shm_channel.h new file mode 100644 index 000000000000..5199b41497ff --- /dev/null +++ b/include/net/mana/shm_channel.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _SHM_CHANNEL_H +#define _SHM_CHANNEL_H + +struct shm_channel { + struct device *dev; + void __iomem *base; +}; + +void mana_smc_init(struct shm_channel *sc, struct device *dev, + void __iomem *base); + +int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, + u64 cq_addr, u64 rq_addr, u64 sq_addr, + u32 eq_msix_index); + +int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf); + +#endif /* _SHM_CHANNEL_H */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 8c3587d5c308..78beaa765c73 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -92,7 +92,9 @@ struct net { struct ns_common ns; struct ref_tracker_dir refcnt_tracker; - + struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not + * refcounted against netns + */ struct list_head dev_base_head; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -320,19 +322,31 @@ static inline int check_net(const struct net *net) #endif -static inline void netns_tracker_alloc(struct net *net, - netns_tracker *tracker, gfp_t gfp) +static inline void __netns_tracker_alloc(struct net *net, + netns_tracker *tracker, + bool refcounted, + gfp_t gfp) { #ifdef CONFIG_NET_NS_REFCNT_TRACKER - ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp); + ref_tracker_alloc(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, + tracker, gfp); #endif } -static inline void netns_tracker_free(struct net *net, - netns_tracker *tracker) +static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker, + gfp_t gfp) +{ + __netns_tracker_alloc(net, tracker, true, gfp); +} + +static inline void __netns_tracker_free(struct net *net, + netns_tracker *tracker, + bool refcounted) { #ifdef CONFIG_NET_NS_REFCNT_TRACKER - ref_tracker_free(&net->refcnt_tracker, tracker); + ref_tracker_free(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, tracker); #endif } @@ -346,7 +360,7 @@ static inline struct net *get_net_track(struct net *net, static inline void put_net_track(struct net *net, netns_tracker *tracker) { - netns_tracker_free(net, tracker); + __netns_tracker_free(net, tracker, true); put_net(net); } diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 9939c366f720..f30b1694b690 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -115,6 +115,11 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags); +int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, u16 proto); +int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family, + u8 proto, bool nat, struct nf_conntrack_helper **hp); + void nf_ct_helper_destroy(struct nf_conn *ct); static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index cdb7db9b0e25..e69ce23566ea 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -24,6 +24,7 @@ struct module; enum { NFT_PKTINFO_L4PROTO = (1 << 0), NFT_PKTINFO_INNER = (1 << 1), + NFT_PKTINFO_INNER_FULL = (1 << 2), }; struct nft_pktinfo { @@ -32,8 +33,8 @@ struct nft_pktinfo { u8 flags; u8 tprot; u16 fragoff; - unsigned int thoff; - unsigned int inneroff; + u16 thoff; + u16 inneroff; }; static inline struct sock *nft_sk(const struct nft_pktinfo *pkt) @@ -375,10 +376,14 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) return (void *)expr->data; } +struct nft_expr_info; + +int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, + struct nft_expr_info *info); int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, - const struct nft_expr *expr); + const struct nft_expr *expr, bool reset); bool nft_expr_reduce_bitwise(struct nft_regs_track *track, const struct nft_expr *expr); @@ -864,6 +869,7 @@ struct nft_expr_type { const struct nlattr * const tb[]); void (*release_ops)(const struct nft_expr_ops *ops); const struct nft_expr_ops *ops; + const struct nft_expr_ops *inner_ops; struct list_head list; const char *name; struct module *owner; @@ -921,7 +927,8 @@ struct nft_expr_ops { void (*destroy_clone)(const struct nft_ctx *ctx, const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, - const struct nft_expr *expr); + const struct nft_expr *expr, + bool reset); int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 1223af68cd9a..3e825381ac5c 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -18,6 +18,8 @@ extern struct nft_expr_type nft_meta_type; extern struct nft_expr_type nft_rt_type; extern struct nft_expr_type nft_exthdr_type; extern struct nft_expr_type nft_last_type; +extern struct nft_expr_type nft_objref_type; +extern struct nft_expr_type nft_inner_type; #ifdef CONFIG_NETWORK_SECMARK extern struct nft_object_type nft_secmark_obj_type; @@ -66,16 +68,6 @@ struct nft_payload { u8 dreg; }; -struct nft_payload_set { - enum nft_payload_bases base:8; - u8 offset; - u8 len; - u8 sreg; - u8 csum_type; - u8 csum_offset; - u8 csum_flags; -}; - extern const struct nft_expr_ops nft_payload_fast_ops; extern const struct nft_expr_ops nft_bitwise_fast_ops; @@ -148,4 +140,28 @@ void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); + +enum { + NFT_PAYLOAD_CTX_INNER_TUN = (1 << 0), + NFT_PAYLOAD_CTX_INNER_LL = (1 << 1), + NFT_PAYLOAD_CTX_INNER_NH = (1 << 2), + NFT_PAYLOAD_CTX_INNER_TH = (1 << 3), +}; + +struct nft_inner_tun_ctx { + u16 type; + u16 inner_tunoff; + u16 inner_lloff; + u16 inner_nhoff; + u16 inner_thoff; + __be16 llproto; + u8 l4proto; + u8 flags; +}; + +int nft_payload_inner_offset(const struct nft_pktinfo *pkt); +void nft_payload_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *ctx); + #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index c4a6147b0ef8..112708f7a6b4 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -35,6 +35,8 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) return -1; else if (len < thoff) return -1; + else if (thoff < sizeof(*iph)) + return -1; pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; @@ -69,6 +71,8 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) return -1; } else if (len < thoff) { goto inhdr_error; + } else if (thoff < sizeof(*iph)) { + return -1; } pkt->flags = NFT_PKTINFO_L4PROTO; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index ec7eaeaf4f04..467d59b9e533 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -13,7 +13,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) unsigned short frag_off; protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); - if (protohdr < 0) { + if (protohdr < 0 || thoff > U16_MAX) { nft_set_pktinfo_unspec(pkt); return; } @@ -47,7 +47,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) return -1; protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); - if (protohdr < 0) + if (protohdr < 0 || thoff > U16_MAX) return -1; pkt->flags = NFT_PKTINFO_L4PROTO; @@ -93,7 +93,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) } protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); - if (protohdr < 0) + if (protohdr < 0 || thoff > U16_MAX) goto inhdr_error; pkt->flags = NFT_PKTINFO_L4PROTO; diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index eed099eae672..167640b843ef 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -18,7 +18,7 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in) return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } -int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 9b51cc67de54..ba1238f12a48 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -24,10 +24,10 @@ int nft_meta_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]); int nft_meta_get_dump(struct sk_buff *skb, - const struct nft_expr *expr); + const struct nft_expr *expr, bool reset); int nft_meta_set_dump(struct sk_buff *skb, - const struct nft_expr *expr); + const struct nft_expr *expr, bool reset); void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, @@ -46,4 +46,10 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, bool nft_meta_get_reduce(struct nft_regs_track *track, const struct nft_expr *expr); + +struct nft_inner_tun_ctx; +void nft_meta_inner_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *tun_ctx); + #endif diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h index 56b123a42220..6d9ba62efd75 100644 --- a/include/net/netfilter/nft_reject.h +++ b/include/net/netfilter/nft_reject.h @@ -22,7 +22,8 @@ int nft_reject_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); -int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_reject_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset); int nft_reject_icmp_code(u8 code); int nft_reject_icmpv6_code(u8 code); diff --git a/include/net/netlink.h b/include/net/netlink.h index 6bfa972f2fbf..6e1e670e06bc 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -906,6 +906,17 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) } /** + * nlmsg_seq - return the seq number of netlink message + * @nlh: netlink message header + * + * Returns 0 if netlink message is NULL + */ +static inline u32 nlmsg_seq(const struct nlmsghdr *nlh) +{ + return nlh ? nlh->nlmsg_seq : 0; +} + +/** * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute * @nlh: netlink message header @@ -938,6 +949,27 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se } /** + * nlmsg_append - Add more data to a nlmsg in a skb + * @skb: socket buffer to store message in + * @size: length of message payload + * + * Append data to an existing nlmsg, used when constructing a message + * with multiple fixed-format headers (which is rare). + * Returns NULL if the tailroom of the skb is insufficient to store + * the extra payload. + */ +static inline void *nlmsg_append(struct sk_buff *skb, u32 size) +{ + if (unlikely(skb_tailroom(skb) < NLMSG_ALIGN(size))) + return NULL; + + if (NLMSG_ALIGN(size) - size) + memset(skb_tail_pointer(skb) + size, 0, + NLMSG_ALIGN(size) - size); + return __skb_put(skb, NLMSG_ALIGN(size)); +} + +/** * nlmsg_put_answer - Add a new callback based netlink message to an skb * @skb: socket buffer to store message in * @cb: netlink callback diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1b8004679445..db762e35aca9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -43,6 +43,7 @@ struct tcp_fastopen_context; struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; + struct udp_table *udp_table; #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -183,6 +184,11 @@ struct netns_ipv4 { unsigned long tfo_active_disable_stamp; u32 tcp_challenge_timestamp; u32 tcp_challenge_count; + u8 sysctl_tcp_plb_enabled; + u8 sysctl_tcp_plb_idle_rehash_rounds; + u8 sysctl_tcp_plb_rehash_rounds; + u8 sysctl_tcp_plb_suspend_rto_sec; + int sysctl_tcp_plb_cong_thresh; int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; @@ -202,6 +208,8 @@ struct netns_ipv4 { atomic_t dev_addr_genid; + unsigned int sysctl_udp_child_hash_entries; + #ifdef CONFIG_SYSCTL unsigned long *sysctl_local_reserved_ports; int sysctl_ip_prot_sock; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bf8bb3357825..d9076a7a430c 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -186,8 +186,9 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[], struct netlink_ext_ack *extack); -int rtnl_delete_link(struct net_device *dev); -int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); +int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *nlh); +int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, + u32 portid, const struct nlmsghdr *nlh); int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, struct netlink_ext_ack *exterr); diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index 0eaf8650e3b2..60f6641290c3 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -35,8 +35,7 @@ struct sctp_ulpq { }; /* Prototypes. */ -struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *, - struct sctp_association *); +void sctp_ulpq_init(struct sctp_ulpq *ulpq, struct sctp_association *asoc); void sctp_ulpq_flush(struct sctp_ulpq *ulpq); void sctp_ulpq_free(struct sctp_ulpq *); diff --git a/include/net/sock.h b/include/net/sock.h index e0517ecc6531..6d207e7c4ad0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1908,7 +1908,7 @@ static inline void sockcm_init(struct sockcm_cookie *sockc, *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags }; } -int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, +int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, struct sockcm_cookie *sockc); int sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc); diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index efc9085c6892..6ec140b0a61b 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -16,6 +16,7 @@ struct sock_reuseport { u16 max_socks; /* length of socks */ u16 num_socks; /* elements in socks */ u16 num_closed_socks; /* closed elements in socks */ + u16 incoming_cpu; /* The last synq overflow event timestamp of this * reuse->socks[] group. */ @@ -58,5 +59,6 @@ static inline bool reuseport_has_conns(struct sock *sk) } void reuseport_has_conns_set(struct sock *sk); +void reuseport_update_incoming_cpu(struct sock *sk, int val); #endif /* _SOCK_REUSEPORT_H */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 7dcdc97c0bc3..ca0312b78294 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -248,6 +248,7 @@ struct switchdev_notifier_fdb_info { u16 vid; u8 added_by_user:1, is_local:1, + locked:1, offloaded:1; }; diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index 8250d6f0a462..b24ea2d9400b 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -10,6 +10,7 @@ #include <net/netfilter/nf_conntrack_labels.h> struct tcf_ct_params { + struct nf_conntrack_helper *helper; struct nf_conn *tmpl; u16 zone; diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index dc1079f28e13..9649600fb3dc 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -95,12 +95,41 @@ static inline u32 tcf_skbedit_priority(const struct tc_action *a) return priority; } +static inline u16 tcf_skbedit_rx_queue_mapping(const struct tc_action *a) +{ + u16 rx_queue; + + rcu_read_lock(); + rx_queue = rcu_dereference(to_skbedit(a)->params)->queue_mapping; + rcu_read_unlock(); + + return rx_queue; +} + /* Return true iff action is queue_mapping */ static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a) { return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING); } +/* Return true if action is on ingress traffic */ +static inline bool is_tcf_skbedit_ingress(u32 flags) +{ + return flags & TCA_ACT_FLAGS_AT_INGRESS; +} + +static inline bool is_tcf_skbedit_tx_queue_mapping(const struct tc_action *a) +{ + return is_tcf_skbedit_queue_mapping(a) && + !is_tcf_skbedit_ingress(a->tcfa_flags); +} + +static inline bool is_tcf_skbedit_rx_queue_mapping(const struct tc_action *a) +{ + return is_tcf_skbedit_queue_mapping(a) && + is_tcf_skbedit_ingress(a->tcfa_flags); +} + /* Return true iff action is inheritdsfield */ static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 14d45661a84d..6b814e788f00 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2140,6 +2140,34 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, extern void tcp_rack_reo_timeout(struct sock *sk); extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); +/* tcp_plb.c */ + +/* + * Scaling factor for fractions in PLB. For example, tcp_plb_update_state + * expects cong_ratio which represents fraction of traffic that experienced + * congestion over a single RTT. In order to avoid floating point operations, + * this fraction should be mapped to (1 << TCP_PLB_SCALE) and passed in. + */ +#define TCP_PLB_SCALE 8 + +/* State for PLB (Protective Load Balancing) for a single TCP connection. */ +struct tcp_plb_state { + u8 consec_cong_rounds:5, /* consecutive congested rounds */ + unused:3; + u32 pause_until; /* jiffies32 when PLB can resume rerouting */ +}; + +static inline void tcp_plb_init(const struct sock *sk, + struct tcp_plb_state *plb) +{ + plb->consec_cong_rounds = 0; + plb->pause_until = 0; +} +void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb, + const int cong_ratio); +void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb); +void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb); + /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) { diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index b830463e3dff..d27b1caf3753 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -58,8 +58,6 @@ ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, #define LOOPBACK4_IPV6 cpu_to_be32(0x7f000006) -void inet6_destroy_sock(struct sock *sk); - #define IPV6_SEQ_DGRAM_HEADER \ " sl " \ "local_address " \ diff --git a/include/net/udp.h b/include/net/udp.h index fee053bcd17c..de4b528522bb 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -174,6 +174,15 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); +static inline void udp_lib_init_sock(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + + skb_queue_head_init(&up->reader_queue); + up->forward_threshold = sk->sk_rcvbuf >> 2; + set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); +} + /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 72394f441dad..0ca9b7a11baf 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -68,8 +68,8 @@ typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, struct sk_buff *skb); typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk, - struct sk_buff *skb, - unsigned int udp_offset); + struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d20bf4aa0204..b9886d1df825 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -17,7 +17,9 @@ * Declare tracing information enums and their string mappings for display. */ #define rxrpc_skb_traces \ + EM(rxrpc_skb_ack, "ACK") \ EM(rxrpc_skb_cleaned, "CLN") \ + EM(rxrpc_skb_cloned_jumbo, "CLJ") \ EM(rxrpc_skb_freed, "FRE") \ EM(rxrpc_skb_got, "GOT") \ EM(rxrpc_skb_lost, "*L*") \ @@ -34,7 +36,8 @@ EM(rxrpc_local_new, "NEW") \ EM(rxrpc_local_processing, "PRO") \ EM(rxrpc_local_put, "PUT") \ - E_(rxrpc_local_queued, "QUE") + EM(rxrpc_local_queued, "QUE") \ + E_(rxrpc_local_tx_ack, "TAK") #define rxrpc_peer_traces \ EM(rxrpc_peer_got, "GOT") \ @@ -73,6 +76,7 @@ EM(rxrpc_call_got, "GOT") \ EM(rxrpc_call_got_kernel, "Gke") \ EM(rxrpc_call_got_timer, "GTM") \ + EM(rxrpc_call_got_tx, "Gtx") \ EM(rxrpc_call_got_userid, "Gus") \ EM(rxrpc_call_new_client, "NWc") \ EM(rxrpc_call_new_service, "NWs") \ @@ -81,20 +85,22 @@ EM(rxrpc_call_put_noqueue, "PnQ") \ EM(rxrpc_call_put_notimer, "PnT") \ EM(rxrpc_call_put_timer, "PTM") \ + EM(rxrpc_call_put_tx, "Ptx") \ EM(rxrpc_call_put_userid, "Pus") \ EM(rxrpc_call_queued, "QUE") \ EM(rxrpc_call_queued_ref, "QUR") \ EM(rxrpc_call_release, "RLS") \ E_(rxrpc_call_seen, "SEE") -#define rxrpc_transmit_traces \ - EM(rxrpc_transmit_await_reply, "AWR") \ - EM(rxrpc_transmit_end, "END") \ - EM(rxrpc_transmit_queue, "QUE") \ - EM(rxrpc_transmit_queue_last, "QLS") \ - EM(rxrpc_transmit_rotate, "ROT") \ - EM(rxrpc_transmit_rotate_last, "RLS") \ - E_(rxrpc_transmit_wait, "WAI") +#define rxrpc_txqueue_traces \ + EM(rxrpc_txqueue_await_reply, "AWR") \ + EM(rxrpc_txqueue_dequeue, "DEQ") \ + EM(rxrpc_txqueue_end, "END") \ + EM(rxrpc_txqueue_queue, "QUE") \ + EM(rxrpc_txqueue_queue_last, "QLS") \ + EM(rxrpc_txqueue_rotate, "ROT") \ + EM(rxrpc_txqueue_rotate_last, "RLS") \ + E_(rxrpc_txqueue_wait, "WAI") #define rxrpc_receive_traces \ EM(rxrpc_receive_end, "END") \ @@ -102,7 +108,12 @@ EM(rxrpc_receive_incoming, "INC") \ EM(rxrpc_receive_queue, "QUE") \ EM(rxrpc_receive_queue_last, "QLS") \ - E_(rxrpc_receive_rotate, "ROT") + EM(rxrpc_receive_queue_oos, "QUO") \ + EM(rxrpc_receive_queue_oos_last, "QOL") \ + EM(rxrpc_receive_oos, "OOS") \ + EM(rxrpc_receive_oos_last, "OSL") \ + EM(rxrpc_receive_rotate, "ROT") \ + E_(rxrpc_receive_rotate_last, "RLS") #define rxrpc_recvmsg_traces \ EM(rxrpc_recvmsg_cont, "CONT") \ @@ -133,7 +144,6 @@ #define rxrpc_timer_traces \ EM(rxrpc_timer_begin, "Begin ") \ - EM(rxrpc_timer_expired, "*EXPR*") \ EM(rxrpc_timer_exp_ack, "ExpAck") \ EM(rxrpc_timer_exp_hard, "ExpHrd") \ EM(rxrpc_timer_exp_idle, "ExpIdl") \ @@ -158,6 +168,7 @@ #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ EM(rxrpc_propose_ack_input_data, "DataIn ") \ + EM(rxrpc_propose_ack_input_data_hole, "DataInH") \ EM(rxrpc_propose_ack_ping_for_check_life, "ChkLife") \ EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ @@ -170,11 +181,6 @@ EM(rxrpc_propose_ack_rotate_rx, "RxAck ") \ E_(rxrpc_propose_ack_terminal_ack, "ClTerm ") -#define rxrpc_propose_ack_outcomes \ - EM(rxrpc_propose_ack_subsume, " Subsume") \ - EM(rxrpc_propose_ack_update, " Update") \ - E_(rxrpc_propose_ack_use, " New") - #define rxrpc_congest_modes \ EM(RXRPC_CALL_CONGEST_AVOIDANCE, "CongAvoid") \ EM(RXRPC_CALL_FAST_RETRANSMIT, "FastReTx ") \ @@ -187,6 +193,7 @@ EM(rxrpc_cong_new_low_nack, " NewLowN") \ EM(rxrpc_cong_no_change, " -") \ EM(rxrpc_cong_progress, " Progres") \ + EM(rxrpc_cong_idle_reset, " IdleRes") \ EM(rxrpc_cong_retransmit_again, " ReTxAgn") \ EM(rxrpc_cong_rtt_window_end, " RttWinE") \ E_(rxrpc_cong_saw_nack, " SawNack") @@ -242,6 +249,33 @@ EM(rxrpc_tx_point_version_keepalive, "VerKeepalive") \ E_(rxrpc_tx_point_version_reply, "VerReply") +#define rxrpc_req_ack_traces \ + EM(rxrpc_reqack_ack_lost, "ACK-LOST ") \ + EM(rxrpc_reqack_already_on, "ALREADY-ON") \ + EM(rxrpc_reqack_more_rtt, "MORE-RTT ") \ + EM(rxrpc_reqack_no_srv_last, "NO-SRVLAST") \ + EM(rxrpc_reqack_old_rtt, "OLD-RTT ") \ + EM(rxrpc_reqack_retrans, "RETRANS ") \ + EM(rxrpc_reqack_slow_start, "SLOW-START") \ + E_(rxrpc_reqack_small_txwin, "SMALL-TXWN") +/* ---- Must update size of stat_why_req_ack[] if more are added! */ + +#define rxrpc_txbuf_traces \ + EM(rxrpc_txbuf_alloc_ack, "ALLOC ACK ") \ + EM(rxrpc_txbuf_alloc_data, "ALLOC DATA ") \ + EM(rxrpc_txbuf_free, "FREE ") \ + EM(rxrpc_txbuf_get_buffer, "GET BUFFER ") \ + EM(rxrpc_txbuf_get_trans, "GET TRANS ") \ + EM(rxrpc_txbuf_get_retrans, "GET RETRANS") \ + EM(rxrpc_txbuf_put_ack_tx, "PUT ACK TX ") \ + EM(rxrpc_txbuf_put_cleaned, "PUT CLEANED") \ + EM(rxrpc_txbuf_put_nomem, "PUT NOMEM ") \ + EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \ + EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \ + EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \ + EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \ + E_(rxrpc_txbuf_see_unacked, "SEE UNACKED") + /* * Generate enums for tracing information. */ @@ -263,12 +297,14 @@ enum rxrpc_propose_ack_outcome { rxrpc_propose_ack_outcomes } __mode(byte); enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte); enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte); enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte); +enum rxrpc_req_ack_trace { rxrpc_req_ack_traces } __mode(byte); enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte); enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte); enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte); enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); -enum rxrpc_transmit_trace { rxrpc_transmit_traces } __mode(byte); enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); +enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte); +enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); #endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */ @@ -286,16 +322,17 @@ rxrpc_congest_changes; rxrpc_congest_modes; rxrpc_conn_traces; rxrpc_local_traces; -rxrpc_propose_ack_outcomes; rxrpc_propose_ack_traces; rxrpc_receive_traces; rxrpc_recvmsg_traces; +rxrpc_req_ack_traces; rxrpc_rtt_rx_traces; rxrpc_rtt_tx_traces; rxrpc_skb_traces; rxrpc_timer_traces; -rxrpc_transmit_traces; rxrpc_tx_points; +rxrpc_txbuf_traces; +rxrpc_txqueue_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -449,14 +486,13 @@ TRACE_EVENT(rxrpc_call, TRACE_EVENT(rxrpc_skb, TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op, - int usage, int mod_count, u8 flags, const void *where), + int usage, int mod_count, const void *where), - TP_ARGS(skb, op, usage, mod_count, flags, where), + TP_ARGS(skb, op, usage, mod_count, where), TP_STRUCT__entry( __field(struct sk_buff *, skb ) __field(enum rxrpc_skb_trace, op ) - __field(u8, flags ) __field(int, usage ) __field(int, mod_count ) __field(const void *, where ) @@ -464,16 +500,14 @@ TRACE_EVENT(rxrpc_skb, TP_fast_assign( __entry->skb = skb; - __entry->flags = flags; __entry->op = op; __entry->usage = usage; __entry->mod_count = mod_count; __entry->where = where; ), - TP_printk("s=%p %cx %s u=%d m=%d p=%pSR", + TP_printk("s=%p Rx %s u=%d m=%d p=%pSR", __entry->skb, - __entry->flags & RXRPC_SKB_TX_BUFFER ? 'T' : 'R', __print_symbolic(__entry->op, rxrpc_skb_traces), __entry->usage, __entry->mod_count, @@ -578,15 +612,16 @@ TRACE_EVENT(rxrpc_call_complete, __entry->abort_code) ); -TRACE_EVENT(rxrpc_transmit, - TP_PROTO(struct rxrpc_call *call, enum rxrpc_transmit_trace why), +TRACE_EVENT(rxrpc_txqueue, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_txqueue_trace why), TP_ARGS(call, why), TP_STRUCT__entry( __field(unsigned int, call ) - __field(enum rxrpc_transmit_trace, why ) - __field(rxrpc_seq_t, tx_hard_ack ) + __field(enum rxrpc_txqueue_trace, why ) + __field(rxrpc_seq_t, acks_hard_ack ) + __field(rxrpc_seq_t, tx_bottom ) __field(rxrpc_seq_t, tx_top ) __field(int, tx_winsize ) ), @@ -594,31 +629,33 @@ TRACE_EVENT(rxrpc_transmit, TP_fast_assign( __entry->call = call->debug_id; __entry->why = why; - __entry->tx_hard_ack = call->tx_hard_ack; + __entry->acks_hard_ack = call->acks_hard_ack; + __entry->tx_bottom = call->tx_bottom; __entry->tx_top = call->tx_top; __entry->tx_winsize = call->tx_winsize; ), - TP_printk("c=%08x %s f=%08x n=%u/%u", + TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u", __entry->call, - __print_symbolic(__entry->why, rxrpc_transmit_traces), - __entry->tx_hard_ack + 1, - __entry->tx_top - __entry->tx_hard_ack, + __print_symbolic(__entry->why, rxrpc_txqueue_traces), + __entry->tx_bottom, + __entry->acks_hard_ack, + __entry->tx_top - __entry->tx_bottom, + __entry->tx_top - __entry->acks_hard_ack, __entry->tx_winsize) ); TRACE_EVENT(rxrpc_rx_data, TP_PROTO(unsigned int call, rxrpc_seq_t seq, - rxrpc_serial_t serial, u8 flags, u8 anno), + rxrpc_serial_t serial, u8 flags), - TP_ARGS(call, seq, serial, flags, anno), + TP_ARGS(call, seq, serial, flags), TP_STRUCT__entry( __field(unsigned int, call ) __field(rxrpc_seq_t, seq ) __field(rxrpc_serial_t, serial ) __field(u8, flags ) - __field(u8, anno ) ), TP_fast_assign( @@ -626,15 +663,13 @@ TRACE_EVENT(rxrpc_rx_data, __entry->seq = seq; __entry->serial = serial; __entry->flags = flags; - __entry->anno = anno; ), - TP_printk("c=%08x DATA %08x q=%08x fl=%02x a=%02x", + TP_printk("c=%08x DATA %08x q=%08x fl=%02x", __entry->call, __entry->serial, __entry->seq, - __entry->flags, - __entry->anno) + __entry->flags) ); TRACE_EVENT(rxrpc_rx_ack, @@ -841,8 +876,7 @@ TRACE_EVENT(rxrpc_receive, __field(enum rxrpc_receive_trace, why ) __field(rxrpc_serial_t, serial ) __field(rxrpc_seq_t, seq ) - __field(rxrpc_seq_t, hard_ack ) - __field(rxrpc_seq_t, top ) + __field(u64, window ) ), TP_fast_assign( @@ -850,8 +884,7 @@ TRACE_EVENT(rxrpc_receive, __entry->why = why; __entry->serial = serial; __entry->seq = seq; - __entry->hard_ack = call->rx_hard_ack; - __entry->top = call->rx_top; + __entry->window = atomic64_read(&call->ackr_window); ), TP_printk("c=%08x %s r=%08x q=%08x w=%08x-%08x", @@ -859,12 +892,36 @@ TRACE_EVENT(rxrpc_receive, __print_symbolic(__entry->why, rxrpc_receive_traces), __entry->serial, __entry->seq, - __entry->hard_ack, - __entry->top) + lower_32_bits(__entry->window), + upper_32_bits(__entry->window)) ); TRACE_EVENT(rxrpc_recvmsg, TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why, + int ret), + + TP_ARGS(call, why, ret), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(enum rxrpc_recvmsg_trace, why ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->call = call ? call->debug_id : 0; + __entry->why = why; + __entry->ret = ret; + ), + + TP_printk("c=%08x %s ret=%d", + __entry->call, + __print_symbolic(__entry->why, rxrpc_recvmsg_traces), + __entry->ret) + ); + +TRACE_EVENT(rxrpc_recvdata, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why, rxrpc_seq_t seq, unsigned int offset, unsigned int len, int ret), @@ -986,7 +1043,7 @@ TRACE_EVENT(rxrpc_timer, __entry->call = call->debug_id; __entry->why = why; __entry->now = now; - __entry->ack_at = call->ack_at; + __entry->ack_at = call->delay_ack_at; __entry->ack_lost_at = call->ack_lost_at; __entry->resend_at = call->resend_at; __entry->expect_rx_by = call->expect_rx_by; @@ -1007,6 +1064,47 @@ TRACE_EVENT(rxrpc_timer, __entry->timer - __entry->now) ); +TRACE_EVENT(rxrpc_timer_expired, + TP_PROTO(struct rxrpc_call *call, unsigned long now), + + TP_ARGS(call, now), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(long, now ) + __field(long, ack_at ) + __field(long, ack_lost_at ) + __field(long, resend_at ) + __field(long, ping_at ) + __field(long, expect_rx_by ) + __field(long, expect_req_by ) + __field(long, expect_term_by ) + __field(long, timer ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->now = now; + __entry->ack_at = call->delay_ack_at; + __entry->ack_lost_at = call->ack_lost_at; + __entry->resend_at = call->resend_at; + __entry->expect_rx_by = call->expect_rx_by; + __entry->expect_req_by = call->expect_req_by; + __entry->expect_term_by = call->expect_term_by; + __entry->timer = call->timer.expires; + ), + + TP_printk("c=%08x EXPIRED a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", + __entry->call, + __entry->ack_at - __entry->now, + __entry->ack_lost_at - __entry->now, + __entry->resend_at - __entry->now, + __entry->expect_rx_by - __entry->now, + __entry->expect_req_by - __entry->now, + __entry->expect_term_by - __entry->now, + __entry->timer - __entry->now) + ); + TRACE_EVENT(rxrpc_rx_lose, TP_PROTO(struct rxrpc_skb_priv *sp), @@ -1031,20 +1129,15 @@ TRACE_EVENT(rxrpc_rx_lose, TRACE_EVENT(rxrpc_propose_ack, TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why, - u8 ack_reason, rxrpc_serial_t serial, bool immediate, - bool background, enum rxrpc_propose_ack_outcome outcome), + u8 ack_reason, rxrpc_serial_t serial), - TP_ARGS(call, why, ack_reason, serial, immediate, background, - outcome), + TP_ARGS(call, why, ack_reason, serial), TP_STRUCT__entry( __field(unsigned int, call ) __field(enum rxrpc_propose_ack_trace, why ) __field(rxrpc_serial_t, serial ) __field(u8, ack_reason ) - __field(bool, immediate ) - __field(bool, background ) - __field(enum rxrpc_propose_ack_outcome, outcome ) ), TP_fast_assign( @@ -1052,45 +1145,91 @@ TRACE_EVENT(rxrpc_propose_ack, __entry->why = why; __entry->serial = serial; __entry->ack_reason = ack_reason; - __entry->immediate = immediate; - __entry->background = background; - __entry->outcome = outcome; ), - TP_printk("c=%08x %s %s r=%08x i=%u b=%u%s", + TP_printk("c=%08x %s %s r=%08x", __entry->call, __print_symbolic(__entry->why, rxrpc_propose_ack_traces), __print_symbolic(__entry->ack_reason, rxrpc_ack_names), - __entry->serial, - __entry->immediate, - __entry->background, - __print_symbolic(__entry->outcome, rxrpc_propose_ack_outcomes)) + __entry->serial) + ); + +TRACE_EVENT(rxrpc_send_ack, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why, + u8 ack_reason, rxrpc_serial_t serial), + + TP_ARGS(call, why, ack_reason, serial), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(enum rxrpc_propose_ack_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(u8, ack_reason ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->why = why; + __entry->serial = serial; + __entry->ack_reason = ack_reason; + ), + + TP_printk("c=%08x %s %s r=%08x", + __entry->call, + __print_symbolic(__entry->why, rxrpc_propose_ack_traces), + __print_symbolic(__entry->ack_reason, rxrpc_ack_names), + __entry->serial) + ); + +TRACE_EVENT(rxrpc_drop_ack, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why, + u8 ack_reason, rxrpc_serial_t serial, bool nobuf), + + TP_ARGS(call, why, ack_reason, serial, nobuf), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(enum rxrpc_propose_ack_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(u8, ack_reason ) + __field(bool, nobuf ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->why = why; + __entry->serial = serial; + __entry->ack_reason = ack_reason; + __entry->nobuf = nobuf; + ), + + TP_printk("c=%08x %s %s r=%08x nbf=%u", + __entry->call, + __print_symbolic(__entry->why, rxrpc_propose_ack_traces), + __print_symbolic(__entry->ack_reason, rxrpc_ack_names), + __entry->serial, __entry->nobuf) ); TRACE_EVENT(rxrpc_retransmit, - TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, u8 annotation, - s64 expiry), + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, s64 expiry), - TP_ARGS(call, seq, annotation, expiry), + TP_ARGS(call, seq, expiry), TP_STRUCT__entry( __field(unsigned int, call ) __field(rxrpc_seq_t, seq ) - __field(u8, annotation ) __field(s64, expiry ) ), TP_fast_assign( __entry->call = call->debug_id; __entry->seq = seq; - __entry->annotation = annotation; __entry->expiry = expiry; ), - TP_printk("c=%08x q=%x a=%02x xp=%lld", + TP_printk("c=%08x q=%x xp=%lld", __entry->call, __entry->seq, - __entry->annotation, __entry->expiry) ); @@ -1113,14 +1252,14 @@ TRACE_EVENT(rxrpc_congest, TP_fast_assign( __entry->call = call->debug_id; __entry->change = change; - __entry->hard_ack = call->tx_hard_ack; + __entry->hard_ack = call->acks_hard_ack; __entry->top = call->tx_top; __entry->lowest_nak = call->acks_lowest_nak; __entry->ack_serial = ack_serial; memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), @@ -1128,8 +1267,8 @@ TRACE_EVENT(rxrpc_congest, __print_symbolic(__entry->sum.mode, rxrpc_congest_modes), __entry->sum.cwnd, __entry->sum.ssthresh, - __entry->sum.nr_acks, __entry->sum.nr_nacks, - __entry->sum.nr_new_acks, __entry->sum.nr_new_nacks, + __entry->sum.nr_acks, __entry->sum.saw_nacks, + __entry->sum.nr_new_acks, __entry->sum.nr_rot_new_acks, __entry->top - __entry->hard_ack, __entry->sum.cumulative_acks, @@ -1230,26 +1369,23 @@ TRACE_EVENT(rxrpc_connect_call, ); TRACE_EVENT(rxrpc_resend, - TP_PROTO(struct rxrpc_call *call, int ix), + TP_PROTO(struct rxrpc_call *call), - TP_ARGS(call, ix), + TP_ARGS(call), TP_STRUCT__entry( __field(unsigned int, call ) - __field(int, ix ) - __array(u8, anno, 64 ) + __field(rxrpc_seq_t, seq ) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->ix = ix; - memcpy(__entry->anno, call->rxtx_annotations, 64); + __entry->seq = call->acks_hard_ack; ), - TP_printk("c=%08x ix=%u a=%64phN", + TP_printk("c=%08x q=%x", __entry->call, - __entry->ix, - __entry->anno) + __entry->seq) ); TRACE_EVENT(rxrpc_rx_icmp, @@ -1329,8 +1465,8 @@ TRACE_EVENT(rxrpc_call_reset, __entry->call_id = call->call_id; __entry->call_serial = call->rx_serial; __entry->conn_serial = call->conn->hi_serial; - __entry->tx_seq = call->tx_hard_ack; - __entry->rx_seq = call->rx_hard_ack; + __entry->tx_seq = call->acks_hard_ack; + __entry->rx_seq = call->rx_highest_seq; ), TP_printk("c=%08x %08x:%08x r=%08x/%08x tx=%08x rx=%08x", @@ -1395,6 +1531,61 @@ TRACE_EVENT(rxrpc_rx_discard_ack, __entry->call_ackr_prev) ); +TRACE_EVENT(rxrpc_req_ack, + TP_PROTO(unsigned int call_debug_id, rxrpc_seq_t seq, + enum rxrpc_req_ack_trace why), + + TP_ARGS(call_debug_id, seq, why), + + TP_STRUCT__entry( + __field(unsigned int, call_debug_id ) + __field(rxrpc_seq_t, seq ) + __field(enum rxrpc_req_ack_trace, why ) + ), + + TP_fast_assign( + __entry->call_debug_id = call_debug_id; + __entry->seq = seq; + __entry->why = why; + ), + + TP_printk("c=%08x q=%08x REQ-%s", + __entry->call_debug_id, + __entry->seq, + __print_symbolic(__entry->why, rxrpc_req_ack_traces)) + ); + +TRACE_EVENT(rxrpc_txbuf, + TP_PROTO(unsigned int debug_id, + unsigned int call_debug_id, rxrpc_seq_t seq, + int ref, enum rxrpc_txbuf_trace what), + + TP_ARGS(debug_id, call_debug_id, seq, ref, what), + + TP_STRUCT__entry( + __field(unsigned int, debug_id ) + __field(unsigned int, call_debug_id ) + __field(rxrpc_seq_t, seq ) + __field(int, ref ) + __field(enum rxrpc_txbuf_trace, what ) + ), + + TP_fast_assign( + __entry->debug_id = debug_id; + __entry->call_debug_id = call_debug_id; + __entry->seq = seq; + __entry->ref = ref; + __entry->what = what; + ), + + TP_printk("B=%08x c=%08x q=%08x %s r=%d", + __entry->debug_id, + __entry->call_debug_id, + __entry->seq, + __print_symbolic(__entry->what, rxrpc_txbuf_traces), + __entry->ref) + ); + #undef EM #undef E_ #endif /* _TRACE_RXRPC_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 51b9aa640ad2..fb4c911d2a03 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -922,7 +922,14 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, - BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching + * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to + * both cgroup-attached and other progs and supports all functionality + * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark + * BPF_MAP_TYPE_CGROUP_STORAGE deprecated. + */ + BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, @@ -935,6 +942,7 @@ enum bpf_map_type { BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, + BPF_MAP_TYPE_CGRP_STORAGE, }; /* Note that tracing related programs such as @@ -5435,226 +5443,272 @@ union bpf_attr { * **-E2BIG** if user-space has tried to publish a sample which is * larger than the size of the ring buffer, or which cannot fit * within a struct bpf_dynptr. + * + * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *cgroup*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *cgroup* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this + * helper enforces the key must be a cgroup struct and the map must also + * be a **BPF_MAP_TYPE_CGRP_STORAGE**. + * + * In reality, the local-storage value is embedded directly inside of the + * *cgroup* object itself, rather than being located in the + * **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is + * queried for some *map* on a *cgroup* object, the kernel will perform an + * O(n) iteration over all of the live local-storage values for that + * *cgroup* object until the local-storage value for the *map* is found. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup) + * Description + * Delete a bpf_local_storage from a *cgroup*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ -#define __BPF_FUNC_MAPPER(FN) \ - FN(unspec), \ - FN(map_lookup_elem), \ - FN(map_update_elem), \ - FN(map_delete_elem), \ - FN(probe_read), \ - FN(ktime_get_ns), \ - FN(trace_printk), \ - FN(get_prandom_u32), \ - FN(get_smp_processor_id), \ - FN(skb_store_bytes), \ - FN(l3_csum_replace), \ - FN(l4_csum_replace), \ - FN(tail_call), \ - FN(clone_redirect), \ - FN(get_current_pid_tgid), \ - FN(get_current_uid_gid), \ - FN(get_current_comm), \ - FN(get_cgroup_classid), \ - FN(skb_vlan_push), \ - FN(skb_vlan_pop), \ - FN(skb_get_tunnel_key), \ - FN(skb_set_tunnel_key), \ - FN(perf_event_read), \ - FN(redirect), \ - FN(get_route_realm), \ - FN(perf_event_output), \ - FN(skb_load_bytes), \ - FN(get_stackid), \ - FN(csum_diff), \ - FN(skb_get_tunnel_opt), \ - FN(skb_set_tunnel_opt), \ - FN(skb_change_proto), \ - FN(skb_change_type), \ - FN(skb_under_cgroup), \ - FN(get_hash_recalc), \ - FN(get_current_task), \ - FN(probe_write_user), \ - FN(current_task_under_cgroup), \ - FN(skb_change_tail), \ - FN(skb_pull_data), \ - FN(csum_update), \ - FN(set_hash_invalid), \ - FN(get_numa_node_id), \ - FN(skb_change_head), \ - FN(xdp_adjust_head), \ - FN(probe_read_str), \ - FN(get_socket_cookie), \ - FN(get_socket_uid), \ - FN(set_hash), \ - FN(setsockopt), \ - FN(skb_adjust_room), \ - FN(redirect_map), \ - FN(sk_redirect_map), \ - FN(sock_map_update), \ - FN(xdp_adjust_meta), \ - FN(perf_event_read_value), \ - FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), \ - FN(sock_ops_cb_flags_set), \ - FN(msg_redirect_map), \ - FN(msg_apply_bytes), \ - FN(msg_cork_bytes), \ - FN(msg_pull_data), \ - FN(bind), \ - FN(xdp_adjust_tail), \ - FN(skb_get_xfrm_state), \ - FN(get_stack), \ - FN(skb_load_bytes_relative), \ - FN(fib_lookup), \ - FN(sock_hash_update), \ - FN(msg_redirect_hash), \ - FN(sk_redirect_hash), \ - FN(lwt_push_encap), \ - FN(lwt_seg6_store_bytes), \ - FN(lwt_seg6_adjust_srh), \ - FN(lwt_seg6_action), \ - FN(rc_repeat), \ - FN(rc_keydown), \ - FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), \ - FN(get_local_storage), \ - FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), \ - FN(sk_lookup_tcp), \ - FN(sk_lookup_udp), \ - FN(sk_release), \ - FN(map_push_elem), \ - FN(map_pop_elem), \ - FN(map_peek_elem), \ - FN(msg_push_data), \ - FN(msg_pop_data), \ - FN(rc_pointer_rel), \ - FN(spin_lock), \ - FN(spin_unlock), \ - FN(sk_fullsock), \ - FN(tcp_sock), \ - FN(skb_ecn_set_ce), \ - FN(get_listener_sock), \ - FN(skc_lookup_tcp), \ - FN(tcp_check_syncookie), \ - FN(sysctl_get_name), \ - FN(sysctl_get_current_value), \ - FN(sysctl_get_new_value), \ - FN(sysctl_set_new_value), \ - FN(strtol), \ - FN(strtoul), \ - FN(sk_storage_get), \ - FN(sk_storage_delete), \ - FN(send_signal), \ - FN(tcp_gen_syncookie), \ - FN(skb_output), \ - FN(probe_read_user), \ - FN(probe_read_kernel), \ - FN(probe_read_user_str), \ - FN(probe_read_kernel_str), \ - FN(tcp_send_ack), \ - FN(send_signal_thread), \ - FN(jiffies64), \ - FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), \ - FN(xdp_output), \ - FN(get_netns_cookie), \ - FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), \ - FN(ktime_get_boot_ns), \ - FN(seq_printf), \ - FN(seq_write), \ - FN(sk_cgroup_id), \ - FN(sk_ancestor_cgroup_id), \ - FN(ringbuf_output), \ - FN(ringbuf_reserve), \ - FN(ringbuf_submit), \ - FN(ringbuf_discard), \ - FN(ringbuf_query), \ - FN(csum_level), \ - FN(skc_to_tcp6_sock), \ - FN(skc_to_tcp_sock), \ - FN(skc_to_tcp_timewait_sock), \ - FN(skc_to_tcp_request_sock), \ - FN(skc_to_udp6_sock), \ - FN(get_task_stack), \ - FN(load_hdr_opt), \ - FN(store_hdr_opt), \ - FN(reserve_hdr_opt), \ - FN(inode_storage_get), \ - FN(inode_storage_delete), \ - FN(d_path), \ - FN(copy_from_user), \ - FN(snprintf_btf), \ - FN(seq_printf_btf), \ - FN(skb_cgroup_classid), \ - FN(redirect_neigh), \ - FN(per_cpu_ptr), \ - FN(this_cpu_ptr), \ - FN(redirect_peer), \ - FN(task_storage_get), \ - FN(task_storage_delete), \ - FN(get_current_task_btf), \ - FN(bprm_opts_set), \ - FN(ktime_get_coarse_ns), \ - FN(ima_inode_hash), \ - FN(sock_from_file), \ - FN(check_mtu), \ - FN(for_each_map_elem), \ - FN(snprintf), \ - FN(sys_bpf), \ - FN(btf_find_by_name_kind), \ - FN(sys_close), \ - FN(timer_init), \ - FN(timer_set_callback), \ - FN(timer_start), \ - FN(timer_cancel), \ - FN(get_func_ip), \ - FN(get_attach_cookie), \ - FN(task_pt_regs), \ - FN(get_branch_snapshot), \ - FN(trace_vprintk), \ - FN(skc_to_unix_sock), \ - FN(kallsyms_lookup_name), \ - FN(find_vma), \ - FN(loop), \ - FN(strncmp), \ - FN(get_func_arg), \ - FN(get_func_ret), \ - FN(get_func_arg_cnt), \ - FN(get_retval), \ - FN(set_retval), \ - FN(xdp_get_buff_len), \ - FN(xdp_load_bytes), \ - FN(xdp_store_bytes), \ - FN(copy_from_user_task), \ - FN(skb_set_tstamp), \ - FN(ima_file_hash), \ - FN(kptr_xchg), \ - FN(map_lookup_percpu_elem), \ - FN(skc_to_mptcp_sock), \ - FN(dynptr_from_mem), \ - FN(ringbuf_reserve_dynptr), \ - FN(ringbuf_submit_dynptr), \ - FN(ringbuf_discard_dynptr), \ - FN(dynptr_read), \ - FN(dynptr_write), \ - FN(dynptr_data), \ - FN(tcp_raw_gen_syncookie_ipv4), \ - FN(tcp_raw_gen_syncookie_ipv6), \ - FN(tcp_raw_check_syncookie_ipv4), \ - FN(tcp_raw_check_syncookie_ipv6), \ - FN(ktime_get_tai_ns), \ - FN(user_ringbuf_drain), \ +#define ___BPF_FUNC_MAPPER(FN, ctx...) \ + FN(unspec, 0, ##ctx) \ + FN(map_lookup_elem, 1, ##ctx) \ + FN(map_update_elem, 2, ##ctx) \ + FN(map_delete_elem, 3, ##ctx) \ + FN(probe_read, 4, ##ctx) \ + FN(ktime_get_ns, 5, ##ctx) \ + FN(trace_printk, 6, ##ctx) \ + FN(get_prandom_u32, 7, ##ctx) \ + FN(get_smp_processor_id, 8, ##ctx) \ + FN(skb_store_bytes, 9, ##ctx) \ + FN(l3_csum_replace, 10, ##ctx) \ + FN(l4_csum_replace, 11, ##ctx) \ + FN(tail_call, 12, ##ctx) \ + FN(clone_redirect, 13, ##ctx) \ + FN(get_current_pid_tgid, 14, ##ctx) \ + FN(get_current_uid_gid, 15, ##ctx) \ + FN(get_current_comm, 16, ##ctx) \ + FN(get_cgroup_classid, 17, ##ctx) \ + FN(skb_vlan_push, 18, ##ctx) \ + FN(skb_vlan_pop, 19, ##ctx) \ + FN(skb_get_tunnel_key, 20, ##ctx) \ + FN(skb_set_tunnel_key, 21, ##ctx) \ + FN(perf_event_read, 22, ##ctx) \ + FN(redirect, 23, ##ctx) \ + FN(get_route_realm, 24, ##ctx) \ + FN(perf_event_output, 25, ##ctx) \ + FN(skb_load_bytes, 26, ##ctx) \ + FN(get_stackid, 27, ##ctx) \ + FN(csum_diff, 28, ##ctx) \ + FN(skb_get_tunnel_opt, 29, ##ctx) \ + FN(skb_set_tunnel_opt, 30, ##ctx) \ + FN(skb_change_proto, 31, ##ctx) \ + FN(skb_change_type, 32, ##ctx) \ + FN(skb_under_cgroup, 33, ##ctx) \ + FN(get_hash_recalc, 34, ##ctx) \ + FN(get_current_task, 35, ##ctx) \ + FN(probe_write_user, 36, ##ctx) \ + FN(current_task_under_cgroup, 37, ##ctx) \ + FN(skb_change_tail, 38, ##ctx) \ + FN(skb_pull_data, 39, ##ctx) \ + FN(csum_update, 40, ##ctx) \ + FN(set_hash_invalid, 41, ##ctx) \ + FN(get_numa_node_id, 42, ##ctx) \ + FN(skb_change_head, 43, ##ctx) \ + FN(xdp_adjust_head, 44, ##ctx) \ + FN(probe_read_str, 45, ##ctx) \ + FN(get_socket_cookie, 46, ##ctx) \ + FN(get_socket_uid, 47, ##ctx) \ + FN(set_hash, 48, ##ctx) \ + FN(setsockopt, 49, ##ctx) \ + FN(skb_adjust_room, 50, ##ctx) \ + FN(redirect_map, 51, ##ctx) \ + FN(sk_redirect_map, 52, ##ctx) \ + FN(sock_map_update, 53, ##ctx) \ + FN(xdp_adjust_meta, 54, ##ctx) \ + FN(perf_event_read_value, 55, ##ctx) \ + FN(perf_prog_read_value, 56, ##ctx) \ + FN(getsockopt, 57, ##ctx) \ + FN(override_return, 58, ##ctx) \ + FN(sock_ops_cb_flags_set, 59, ##ctx) \ + FN(msg_redirect_map, 60, ##ctx) \ + FN(msg_apply_bytes, 61, ##ctx) \ + FN(msg_cork_bytes, 62, ##ctx) \ + FN(msg_pull_data, 63, ##ctx) \ + FN(bind, 64, ##ctx) \ + FN(xdp_adjust_tail, 65, ##ctx) \ + FN(skb_get_xfrm_state, 66, ##ctx) \ + FN(get_stack, 67, ##ctx) \ + FN(skb_load_bytes_relative, 68, ##ctx) \ + FN(fib_lookup, 69, ##ctx) \ + FN(sock_hash_update, 70, ##ctx) \ + FN(msg_redirect_hash, 71, ##ctx) \ + FN(sk_redirect_hash, 72, ##ctx) \ + FN(lwt_push_encap, 73, ##ctx) \ + FN(lwt_seg6_store_bytes, 74, ##ctx) \ + FN(lwt_seg6_adjust_srh, 75, ##ctx) \ + FN(lwt_seg6_action, 76, ##ctx) \ + FN(rc_repeat, 77, ##ctx) \ + FN(rc_keydown, 78, ##ctx) \ + FN(skb_cgroup_id, 79, ##ctx) \ + FN(get_current_cgroup_id, 80, ##ctx) \ + FN(get_local_storage, 81, ##ctx) \ + FN(sk_select_reuseport, 82, ##ctx) \ + FN(skb_ancestor_cgroup_id, 83, ##ctx) \ + FN(sk_lookup_tcp, 84, ##ctx) \ + FN(sk_lookup_udp, 85, ##ctx) \ + FN(sk_release, 86, ##ctx) \ + FN(map_push_elem, 87, ##ctx) \ + FN(map_pop_elem, 88, ##ctx) \ + FN(map_peek_elem, 89, ##ctx) \ + FN(msg_push_data, 90, ##ctx) \ + FN(msg_pop_data, 91, ##ctx) \ + FN(rc_pointer_rel, 92, ##ctx) \ + FN(spin_lock, 93, ##ctx) \ + FN(spin_unlock, 94, ##ctx) \ + FN(sk_fullsock, 95, ##ctx) \ + FN(tcp_sock, 96, ##ctx) \ + FN(skb_ecn_set_ce, 97, ##ctx) \ + FN(get_listener_sock, 98, ##ctx) \ + FN(skc_lookup_tcp, 99, ##ctx) \ + FN(tcp_check_syncookie, 100, ##ctx) \ + FN(sysctl_get_name, 101, ##ctx) \ + FN(sysctl_get_current_value, 102, ##ctx) \ + FN(sysctl_get_new_value, 103, ##ctx) \ + FN(sysctl_set_new_value, 104, ##ctx) \ + FN(strtol, 105, ##ctx) \ + FN(strtoul, 106, ##ctx) \ + FN(sk_storage_get, 107, ##ctx) \ + FN(sk_storage_delete, 108, ##ctx) \ + FN(send_signal, 109, ##ctx) \ + FN(tcp_gen_syncookie, 110, ##ctx) \ + FN(skb_output, 111, ##ctx) \ + FN(probe_read_user, 112, ##ctx) \ + FN(probe_read_kernel, 113, ##ctx) \ + FN(probe_read_user_str, 114, ##ctx) \ + FN(probe_read_kernel_str, 115, ##ctx) \ + FN(tcp_send_ack, 116, ##ctx) \ + FN(send_signal_thread, 117, ##ctx) \ + FN(jiffies64, 118, ##ctx) \ + FN(read_branch_records, 119, ##ctx) \ + FN(get_ns_current_pid_tgid, 120, ##ctx) \ + FN(xdp_output, 121, ##ctx) \ + FN(get_netns_cookie, 122, ##ctx) \ + FN(get_current_ancestor_cgroup_id, 123, ##ctx) \ + FN(sk_assign, 124, ##ctx) \ + FN(ktime_get_boot_ns, 125, ##ctx) \ + FN(seq_printf, 126, ##ctx) \ + FN(seq_write, 127, ##ctx) \ + FN(sk_cgroup_id, 128, ##ctx) \ + FN(sk_ancestor_cgroup_id, 129, ##ctx) \ + FN(ringbuf_output, 130, ##ctx) \ + FN(ringbuf_reserve, 131, ##ctx) \ + FN(ringbuf_submit, 132, ##ctx) \ + FN(ringbuf_discard, 133, ##ctx) \ + FN(ringbuf_query, 134, ##ctx) \ + FN(csum_level, 135, ##ctx) \ + FN(skc_to_tcp6_sock, 136, ##ctx) \ + FN(skc_to_tcp_sock, 137, ##ctx) \ + FN(skc_to_tcp_timewait_sock, 138, ##ctx) \ + FN(skc_to_tcp_request_sock, 139, ##ctx) \ + FN(skc_to_udp6_sock, 140, ##ctx) \ + FN(get_task_stack, 141, ##ctx) \ + FN(load_hdr_opt, 142, ##ctx) \ + FN(store_hdr_opt, 143, ##ctx) \ + FN(reserve_hdr_opt, 144, ##ctx) \ + FN(inode_storage_get, 145, ##ctx) \ + FN(inode_storage_delete, 146, ##ctx) \ + FN(d_path, 147, ##ctx) \ + FN(copy_from_user, 148, ##ctx) \ + FN(snprintf_btf, 149, ##ctx) \ + FN(seq_printf_btf, 150, ##ctx) \ + FN(skb_cgroup_classid, 151, ##ctx) \ + FN(redirect_neigh, 152, ##ctx) \ + FN(per_cpu_ptr, 153, ##ctx) \ + FN(this_cpu_ptr, 154, ##ctx) \ + FN(redirect_peer, 155, ##ctx) \ + FN(task_storage_get, 156, ##ctx) \ + FN(task_storage_delete, 157, ##ctx) \ + FN(get_current_task_btf, 158, ##ctx) \ + FN(bprm_opts_set, 159, ##ctx) \ + FN(ktime_get_coarse_ns, 160, ##ctx) \ + FN(ima_inode_hash, 161, ##ctx) \ + FN(sock_from_file, 162, ##ctx) \ + FN(check_mtu, 163, ##ctx) \ + FN(for_each_map_elem, 164, ##ctx) \ + FN(snprintf, 165, ##ctx) \ + FN(sys_bpf, 166, ##ctx) \ + FN(btf_find_by_name_kind, 167, ##ctx) \ + FN(sys_close, 168, ##ctx) \ + FN(timer_init, 169, ##ctx) \ + FN(timer_set_callback, 170, ##ctx) \ + FN(timer_start, 171, ##ctx) \ + FN(timer_cancel, 172, ##ctx) \ + FN(get_func_ip, 173, ##ctx) \ + FN(get_attach_cookie, 174, ##ctx) \ + FN(task_pt_regs, 175, ##ctx) \ + FN(get_branch_snapshot, 176, ##ctx) \ + FN(trace_vprintk, 177, ##ctx) \ + FN(skc_to_unix_sock, 178, ##ctx) \ + FN(kallsyms_lookup_name, 179, ##ctx) \ + FN(find_vma, 180, ##ctx) \ + FN(loop, 181, ##ctx) \ + FN(strncmp, 182, ##ctx) \ + FN(get_func_arg, 183, ##ctx) \ + FN(get_func_ret, 184, ##ctx) \ + FN(get_func_arg_cnt, 185, ##ctx) \ + FN(get_retval, 186, ##ctx) \ + FN(set_retval, 187, ##ctx) \ + FN(xdp_get_buff_len, 188, ##ctx) \ + FN(xdp_load_bytes, 189, ##ctx) \ + FN(xdp_store_bytes, 190, ##ctx) \ + FN(copy_from_user_task, 191, ##ctx) \ + FN(skb_set_tstamp, 192, ##ctx) \ + FN(ima_file_hash, 193, ##ctx) \ + FN(kptr_xchg, 194, ##ctx) \ + FN(map_lookup_percpu_elem, 195, ##ctx) \ + FN(skc_to_mptcp_sock, 196, ##ctx) \ + FN(dynptr_from_mem, 197, ##ctx) \ + FN(ringbuf_reserve_dynptr, 198, ##ctx) \ + FN(ringbuf_submit_dynptr, 199, ##ctx) \ + FN(ringbuf_discard_dynptr, 200, ##ctx) \ + FN(dynptr_read, 201, ##ctx) \ + FN(dynptr_write, 202, ##ctx) \ + FN(dynptr_data, 203, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \ + FN(ktime_get_tai_ns, 208, ##ctx) \ + FN(user_ringbuf_drain, 209, ##ctx) \ + FN(cgrp_storage_get, 210, ##ctx) \ + FN(cgrp_storage_delete, 211, ##ctx) \ /* */ +/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't + * know or care about integer value that is now passed as second argument + */ +#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name), +#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN) + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ -#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x +#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y, enum bpf_func_id { - __BPF_FUNC_MAPPER(__BPF_ENUM_FN) + ___BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; #undef __BPF_ENUM_FN @@ -6391,6 +6445,7 @@ struct bpf_sock_ops { * the outgoing header has not * been written yet. */ + __u64 skb_hwtstamp; }; /* Definitions for bpf_sock_ops_cb_flags */ diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h index a791a94013a6..99047223ab26 100644 --- a/include/uapi/linux/dcbnl.h +++ b/include/uapi/linux/dcbnl.h @@ -218,6 +218,9 @@ struct cee_pfc { #define IEEE_8021QAZ_APP_SEL_ANY 4 #define IEEE_8021QAZ_APP_SEL_DSCP 5 +/* Non-std selector values */ +#define DCB_APP_SEL_PCP 255 + /* This structure contains the IEEE 802.1Qaz APP managed object. This * object is also used for the CEE std as well. * @@ -247,6 +250,8 @@ struct dcb_app { __u16 protocol; }; +#define IEEE_8021QAZ_APP_SEL_MAX 255 + /** * struct dcb_peer_app_info - APP feature information sent by the peer * @@ -405,6 +410,7 @@ enum dcbnl_attrs { * @DCB_ATTR_IEEE_PEER_ETS: peer ETS configuration - get only * @DCB_ATTR_IEEE_PEER_PFC: peer PFC configuration - get only * @DCB_ATTR_IEEE_PEER_APP: peer APP tlv - get only + * @DCB_ATTR_DCB_APP_TRUST_TABLE: selector trust table */ enum ieee_attrs { DCB_ATTR_IEEE_UNSPEC, @@ -418,6 +424,7 @@ enum ieee_attrs { DCB_ATTR_IEEE_QCN, DCB_ATTR_IEEE_QCN_STATS, DCB_ATTR_DCB_BUFFER, + DCB_ATTR_DCB_APP_TRUST_TABLE, __DCB_ATTR_IEEE_MAX }; #define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1) @@ -425,6 +432,7 @@ enum ieee_attrs { enum ieee_attrs_app { DCB_ATTR_IEEE_APP_UNSPEC, DCB_ATTR_IEEE_APP, + DCB_ATTR_DCB_APP, __DCB_ATTR_IEEE_APP_MAX }; #define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index dc2aa3d75b39..f341de2ae612 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1737,6 +1737,13 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90, ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91, ETHTOOL_LINK_MODE_10baseT1L_Full_BIT = 92, + ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT = 93, + ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT = 94, + ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT = 95, + ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT = 96, + ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT = 97, + ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT = 98, + /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; @@ -1848,6 +1855,7 @@ enum ethtool_link_mode_bit_indices { #define SPEED_100000 100000 #define SPEED_200000 200000 #define SPEED_400000 400000 +#define SPEED_800000 800000 #define SPEED_UNKNOWN -1 diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index bb57084ac524..aaf7c6963d61 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -262,6 +262,7 @@ enum { ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ /* add new constants above here */ __ETHTOOL_A_LINKSTATE_CNT, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5e7a1041df3a..1021a7e47a86 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -372,6 +372,8 @@ enum { IFLA_TSO_MAX_SEGS, IFLA_ALLMULTI, /* Allmulti count: > 0 means acts ALLMULTI */ + IFLA_DEVLINK_PORT, + __IFLA_MAX }; @@ -561,6 +563,7 @@ enum { IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, IFLA_BRPORT_LOCKED, + IFLA_BRPORT_MAB, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index c07caf7b40db..a8516b3594a4 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -70,6 +70,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_EBPF 7 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_UNIQUEID 0x2000 +#define PACKET_FANOUT_FLAG_IGNORE_OUTGOING 0x4000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 struct tpacket_stats { diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index a998bf761635..5e67a7eaf4a7 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -52,7 +52,8 @@ enum { #define NTF_STICKY (1 << 6) #define NTF_ROUTER (1 << 7) /* Extended flags under NDA_FLAGS_EXT: */ -#define NTF_EXT_MANAGED (1 << 0) +#define NTF_EXT_MANAGED (1 << 0) +#define NTF_EXT_LOCKED (1 << 1) /* * Neighbor Cache Entry States. @@ -86,6 +87,11 @@ enum { * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf * of a user space control plane, and automatically refreshed so that (if * possible) they remain in NUD_REACHABLE state. + * + * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the + * bridge in response to a host trying to communicate via a locked bridge port + * with MAB enabled. Their purpose is to notify user space that a host requires + * authentication. */ struct nda_cacheinfo { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 466fd3f4447c..cfa844da1ce6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -97,6 +97,7 @@ enum nft_verdicts { * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes) * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes) * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETRULE_RESET: get rules and reset stateful expressions (enum nft_obj_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -124,6 +125,7 @@ enum nf_tables_msg_types { NFT_MSG_NEWFLOWTABLE, NFT_MSG_GETFLOWTABLE, NFT_MSG_DELFLOWTABLE, + NFT_MSG_GETRULE_RESET, NFT_MSG_MAX, }; @@ -760,6 +762,7 @@ enum nft_payload_bases { NFT_PAYLOAD_NETWORK_HEADER, NFT_PAYLOAD_TRANSPORT_HEADER, NFT_PAYLOAD_INNER_HEADER, + NFT_PAYLOAD_TUN_HEADER, }; /** @@ -779,6 +782,32 @@ enum nft_payload_csum_flags { NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0), }; +enum nft_inner_type { + NFT_INNER_UNSPEC = 0, + NFT_INNER_VXLAN, + NFT_INNER_GENEVE, +}; + +enum nft_inner_flags { + NFT_INNER_HDRSIZE = (1 << 0), + NFT_INNER_LL = (1 << 1), + NFT_INNER_NH = (1 << 2), + NFT_INNER_TH = (1 << 3), +}; +#define NFT_INNER_MASK (NFT_INNER_HDRSIZE | NFT_INNER_LL | \ + NFT_INNER_NH | NFT_INNER_TH) + +enum nft_inner_attributes { + NFTA_INNER_UNSPEC, + NFTA_INNER_NUM, + NFTA_INNER_TYPE, + NFTA_INNER_FLAGS, + NFTA_INNER_HDRSIZE, + NFTA_INNER_EXPR, + __NFTA_INNER_MAX +}; +#define NFTA_INNER_MAX (__NFTA_INNER_MAX - 1) + /** * enum nft_payload_attributes - nf_tables payload expression netlink attributes * diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index e2ae82e3f9f7..5da0da59bf01 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -48,6 +48,7 @@ struct sockaddr_nl { * @nlmsg_flags: Additional flags * @nlmsg_seq: Sequence number * @nlmsg_pid: Sending process port ID + * @nlmsg_data: Message payload */ struct nlmsghdr { __u32 nlmsg_len; @@ -55,6 +56,7 @@ struct nlmsghdr { __u16 nlmsg_flags; __u32 nlmsg_seq; __u32 nlmsg_pid; + __u8 nlmsg_data[]; }; /* Flags values */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c32e7616a366..c14a91bbca7c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2749,6 +2749,8 @@ enum nl80211_commands { * When used with %NL80211_CMD_FRAME_TX_STATUS, indicates the ack RX * timestamp. When used with %NL80211_CMD_FRAME RX notification, indicates * the incoming frame RX timestamp. + * @NL80211_ATTR_TD_BITMAP: Transition Disable bitmap, for subsequent + * (re)associations. * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3276,6 +3278,7 @@ enum nl80211_attrs { NL80211_ATTR_TX_HW_TIMESTAMP, NL80211_ATTR_RX_HW_TIMESTAMP, + NL80211_ATTR_TD_BITMAP, /* add attributes here, update the policy in nl80211.c */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 4d7470036a8b..6600cb0164c2 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -292,6 +292,7 @@ enum LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ + LINUX_MIB_TCPPLBREHASH, /* TCPPLBRehash */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h index 5fb1d7ac1027..6c5200f0ed38 100644 --- a/include/uapi/linux/tc_act/tc_ct.h +++ b/include/uapi/linux/tc_act/tc_ct.h @@ -22,6 +22,9 @@ enum { TCA_CT_NAT_PORT_MIN, /* be16 */ TCA_CT_NAT_PORT_MAX, /* be16 */ TCA_CT_PAD, + TCA_CT_HELPER_NAME, /* string */ + TCA_CT_HELPER_FAMILY, /* u8 */ + TCA_CT_HELPER_PROTO, /* u8 */ __TCA_CT_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 8fc09e8638b3..879eeb0a084b 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -284,6 +284,11 @@ struct tcp_info { __u32 tcpi_snd_wnd; /* peer's advertised receive window after * scaling (bytes) */ + __u32 tcpi_rcv_wnd; /* local advertised receive window after + * scaling (bytes) + */ + + __u32 tcpi_rehash; /* PLB or timeout triggered rehash attempts */ }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ @@ -315,6 +320,7 @@ enum { TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ TCP_NLA_TTL, /* TTL or hop limit of a packet received */ + TCP_NLA_REHASH, /* PLB and timeout triggered rehash attempts */ }; /* for TCP_MD5SIG socket option */ |