summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-18 12:54:24 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-18 12:54:24 -0800
commit8d0112ac6fd001f95aabb084ec2ccaa3637bc344 (patch)
tree323894ea1e3cf60935b3ded0c5b5b8ca55610ca7 /kernel/bpf
parent6fdf886424cf8c4fff96a20189c00606327e5df6 (diff)
parentc7521d3aa2fa7fc785682758c99b5bcae503f6be (diff)
Merge tag 'net-5.16-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Including fixes from bpf, mac80211. Current release - regressions: - devlink: don't throw an error if flash notification sent before devlink visible - page_pool: Revert "page_pool: disable dma mapping support...", turns out there are active arches who need it Current release - new code bugs: - amt: cancel delayed_work synchronously in amt_fini() Previous releases - regressions: - xsk: fix crash on double free in buffer pool - bpf: fix inner map state pruning regression causing program rejections - mac80211: drop check for DONT_REORDER in __ieee80211_select_queue, preventing mis-selecting the best effort queue - mac80211: do not access the IV when it was stripped - mac80211: fix radiotap header generation, off-by-one - nl80211: fix getting radio statistics in survey dump - e100: fix device suspend/resume Previous releases - always broken: - tcp: fix uninitialized access in skb frags array for Rx 0cp - bpf: fix toctou on read-only map's constant scalar tracking - bpf: forbid bpf_ktime_get_coarse_ns and bpf_timer_* in tracing progs - tipc: only accept encrypted MSG_CRYPTO msgs - smc: transfer remaining wait queue entries during fallback, fix missing wake ups - udp: validate checksum in udp_read_sock() (when sockmap is used) - sched: act_mirred: drop dst for the direction from egress to ingress - virtio_net_hdr_to_skb: count transport header in UFO, prevent allowing bad skbs into the stack - nfc: reorder the logic in nfc_{un,}register_device, fix unregister - ipsec: check return value of ipv6_skip_exthdr - usb: r8152: add MAC passthrough support for more Lenovo Docks" * tag 'net-5.16-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (96 commits) ptp: ocp: Fix a couple NULL vs IS_ERR() checks net: ethernet: dec: tulip: de4x5: fix possible array overflows in type3_infoblock() net: tulip: de4x5: fix the problem that the array 'lp->phy[8]' may be out of bound ipv6: check return value of ipv6_skip_exthdr e100: fix device suspend/resume devlink: Don't throw an error if flash notification sent before devlink visible page_pool: Revert "page_pool: disable dma mapping support..." ethernet: hisilicon: hns: hns_dsaf_misc: fix a possible array overflow in hns_dsaf_ge_srst_by_port() octeontx2-af: debugfs: don't corrupt user memory NFC: add NCI_UNREG flag to eliminate the race NFC: reorder the logic in nfc_{un,}register_device NFC: reorganize the functions in nci_request tipc: check for null after calling kmemdup i40e: Fix display error code in dmesg i40e: Fix creation of first queue by omitting it if is not power of two i40e: Fix warning message and call stack during rmmod i40e driver i40e: Fix ping is lost after configuring ADq on VF i40e: Fix changing previously set num_queue_pairs for PFs i40e: Fix NULL ptr dereference on VSI filter sync i40e: Fix correct max_pkt_size on VF RX queue ...
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/cgroup.c2
-rw-r--r--kernel/bpf/helpers.c2
-rw-r--r--kernel/bpf/syscall.c57
-rw-r--r--kernel/bpf/verifier.c27
4 files changed, 63 insertions, 25 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 2ca643af9a54..43eb3501721b 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1809,6 +1809,8 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sysctl_get_new_value_proto;
case BPF_FUNC_sysctl_set_new_value:
return &bpf_sysctl_set_new_value_proto;
+ case BPF_FUNC_ktime_get_coarse_ns:
+ return &bpf_ktime_get_coarse_ns_proto;
default:
return cgroup_base_func_proto(func_id, prog);
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1ffd469c217f..649f07623df6 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1364,8 +1364,6 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_ktime_get_boot_ns:
return &bpf_ktime_get_boot_ns_proto;
- case BPF_FUNC_ktime_get_coarse_ns:
- return &bpf_ktime_get_coarse_ns_proto;
case BPF_FUNC_ringbuf_output:
return &bpf_ringbuf_output_proto;
case BPF_FUNC_ringbuf_reserve:
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 50f96ea4452a..1033ee8c0caf 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -132,6 +132,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}
+static void bpf_map_write_active_inc(struct bpf_map *map)
+{
+ atomic64_inc(&map->writecnt);
+}
+
+static void bpf_map_write_active_dec(struct bpf_map *map)
+{
+ atomic64_dec(&map->writecnt);
+}
+
+bool bpf_map_write_active(const struct bpf_map *map)
+{
+ return atomic64_read(&map->writecnt) != 0;
+}
+
static u32 bpf_map_value_size(const struct bpf_map *map)
{
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -601,11 +616,8 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma)
{
struct bpf_map *map = vma->vm_file->private_data;
- if (vma->vm_flags & VM_MAYWRITE) {
- mutex_lock(&map->freeze_mutex);
- map->writecnt++;
- mutex_unlock(&map->freeze_mutex);
- }
+ if (vma->vm_flags & VM_MAYWRITE)
+ bpf_map_write_active_inc(map);
}
/* called for all unmapped memory region (including initial) */
@@ -613,11 +625,8 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma)
{
struct bpf_map *map = vma->vm_file->private_data;
- if (vma->vm_flags & VM_MAYWRITE) {
- mutex_lock(&map->freeze_mutex);
- map->writecnt--;
- mutex_unlock(&map->freeze_mutex);
- }
+ if (vma->vm_flags & VM_MAYWRITE)
+ bpf_map_write_active_dec(map);
}
static const struct vm_operations_struct bpf_map_default_vmops = {
@@ -668,7 +677,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
goto out;
if (vma->vm_flags & VM_MAYWRITE)
- map->writecnt++;
+ bpf_map_write_active_inc(map);
out:
mutex_unlock(&map->freeze_mutex);
return err;
@@ -1139,6 +1148,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
+ bpf_map_write_active_inc(map);
if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
@@ -1174,6 +1184,7 @@ free_value:
free_key:
kvfree(key);
err_put:
+ bpf_map_write_active_dec(map);
fdput(f);
return err;
}
@@ -1196,6 +1207,7 @@ static int map_delete_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
+ bpf_map_write_active_inc(map);
if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
@@ -1226,6 +1238,7 @@ static int map_delete_elem(union bpf_attr *attr)
out:
kvfree(key);
err_put:
+ bpf_map_write_active_dec(map);
fdput(f);
return err;
}
@@ -1533,6 +1546,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
+ bpf_map_write_active_inc(map);
if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
@@ -1597,6 +1611,7 @@ free_value:
free_key:
kvfree(key);
err_put:
+ bpf_map_write_active_dec(map);
fdput(f);
return err;
}
@@ -1624,8 +1639,7 @@ static int map_freeze(const union bpf_attr *attr)
}
mutex_lock(&map->freeze_mutex);
-
- if (map->writecnt) {
+ if (bpf_map_write_active(map)) {
err = -EBUSY;
goto err_put;
}
@@ -4171,6 +4185,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
union bpf_attr __user *uattr,
int cmd)
{
+ bool has_read = cmd == BPF_MAP_LOOKUP_BATCH ||
+ cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
+ bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
struct bpf_map *map;
int err, ufd;
struct fd f;
@@ -4183,16 +4200,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if ((cmd == BPF_MAP_LOOKUP_BATCH ||
- cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
- !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+ if (has_write)
+ bpf_map_write_active_inc(map);
+ if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}
-
- if (cmd != BPF_MAP_LOOKUP_BATCH &&
- !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+ if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
@@ -4205,8 +4219,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
BPF_DO_BATCH(map->ops->map_update_batch);
else
BPF_DO_BATCH(map->ops->map_delete_batch);
-
err_put:
+ if (has_write)
+ bpf_map_write_active_dec(map);
fdput(f);
return err;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 890b3ec375a3..50efda51515b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1151,7 +1151,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
/* transfer reg's id which is unique for every map_lookup_elem
* as UID of the inner map.
*/
- reg->map_uid = reg->id;
+ if (map_value_has_timer(map->inner_map_meta))
+ reg->map_uid = reg->id;
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
reg->type = PTR_TO_XDP_SOCK;
} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
@@ -4055,7 +4056,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
static bool bpf_map_is_rdonly(const struct bpf_map *map)
{
- return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
+ /* A map is considered read-only if the following condition are true:
+ *
+ * 1) BPF program side cannot change any of the map content. The
+ * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
+ * and was set at map creation time.
+ * 2) The map value(s) have been initialized from user space by a
+ * loader and then "frozen", such that no new map update/delete
+ * operations from syscall side are possible for the rest of
+ * the map's lifetime from that point onwards.
+ * 3) Any parallel/pending map update/delete operations from syscall
+ * side have been completed. Only after that point, it's safe to
+ * assume that map value(s) are immutable.
+ */
+ return (map->map_flags & BPF_F_RDONLY_PROG) &&
+ READ_ONCE(map->frozen) &&
+ !bpf_map_write_active(map);
}
static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
@@ -11631,6 +11647,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
}
}
+ if (map_value_has_timer(map)) {
+ if (is_tracing_prog_type(prog_type)) {
+ verbose(env, "tracing progs cannot use bpf_timer yet\n");
+ return -EINVAL;
+ }
+ }
+
if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
!bpf_offload_prog_map_match(prog, map)) {
verbose(env, "offload device mismatch between prog and map\n");