/* * Public API and common code for kernel->userspace relay file support. * * See Documentation/filesystems/relay.rst for an overview. * * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) * * Moved to kernel/relay.c by Paul Mundt, 2006. * November 2006 - CPU hotplug support by Mathieu Desnoyers * (mathieu.desnoyers@polymtl.ca) * * This file is released under the GPL. */ #include #include #include #include #include #include #include #include #include #include /* list of open channels, for cpu hotplug */ static DEFINE_MUTEX(relay_channels_mutex); static LIST_HEAD(relay_channels); /* * fault() vm_op implementation for relay file mapping. */ static vm_fault_t relay_buf_fault(struct vm_fault *vmf) { struct page *page; struct rchan_buf *buf = vmf->vma->vm_private_data; pgoff_t pgoff = vmf->pgoff; if (!buf) return VM_FAULT_OOM; page = vmalloc_to_page(buf->start + (pgoff << PAGE_SHIFT)); if (!page) return VM_FAULT_SIGBUS; get_page(page); vmf->page = page; return 0; } /* * vm_ops for relay file mappings. */ static const struct vm_operations_struct relay_file_mmap_ops = { .fault = relay_buf_fault, }; /* * allocate an array of pointers of struct page */ static struct page **relay_alloc_page_array(unsigned int n_pages) { return kvcalloc(n_pages, sizeof(struct page *), GFP_KERNEL); } /* * free an array of pointers of struct page */ static void relay_free_page_array(struct page **array) { kvfree(array); } /** * relay_mmap_buf: - mmap channel buffer to process address space * @buf: relay channel buffer * @vma: vm_area_struct describing memory to be mapped * * Returns 0 if ok, negative on error * * Caller should already have grabbed mmap_lock. */ static int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) { unsigned long length = vma->vm_end - vma->vm_start; if (!buf) return -EBADF; if (length != (unsigned long)buf->chan->alloc_size) return -EINVAL; vma->vm_ops = &relay_file_mmap_ops; vm_flags_set(vma, VM_DONTEXPAND); vma->vm_private_data = buf; return 0; } /** * relay_alloc_buf - allocate a channel buffer * @buf: the buffer struct * @size: total size of the buffer * * Returns a pointer to the resulting buffer, %NULL if unsuccessful. The * passed in size will get page aligned, if it isn't already. */ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) { void *mem; unsigned int i, j, n_pages; *size = PAGE_ALIGN(*size); n_pages = *size >> PAGE_SHIFT; buf->page_array = relay_alloc_page_array(n_pages); if (!buf->page_array) return NULL; for (i = 0; i < n_pages; i++) { buf->page_array[i] = alloc_page(GFP_KERNEL); if (unlikely(!buf->page_array[i])) goto depopulate; set_page_private(buf->page_array[i], (unsigned long)buf); } mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL); if (!mem) goto depopulate; memset(mem, 0, *size); buf->page_count = n_pages; return mem; depopulate: for (j = 0; j < i; j++) __free_page(buf->page_array[j]); relay_free_page_array(buf->page_array); return NULL; } /** * relay_create_buf - allocate and initialize a channel buffer * @chan: the relay channel * * Returns channel buffer if successful, %NULL otherwise. */ static struct rchan_buf *relay_create_buf(struct rchan *chan) { struct rchan_buf *buf; if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t)) return NULL; buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); if (!buf) return NULL; buf->padding = kmalloc_array(chan->n_subbufs, sizeof(size_t), GFP_KERNEL); if (!buf->padding) goto free_buf; buf->start = relay_alloc_buf(buf, &chan->alloc_size); if (!buf->start) goto free_buf; buf->chan = chan; kref_get(&buf->chan->kref); return buf; free_buf: kfree(buf->padding); kfree(buf); return NULL; } /** * relay_destroy_channel - free the channel struct * @kref: target kernel reference that contains the relay channel * * Should only be called from kref_put(). */ static void relay_destroy_channel(struct kref *kref) { struct rchan *chan = container_of(kref, struct rchan, kref); free_percpu(chan->buf); kfree(chan); } /** * relay_destroy_buf - destroy an rchan_buf struct and associated buffer * @buf: the buffer struct */ static void relay_destroy_buf(struct rchan_buf *buf) { struct rchan *chan = buf->chan; unsigned int i; if (likely(buf->start)) { vunmap(buf->start); for (i = 0; i < buf->page_count; i++) __free_page(buf->page_array[i]); relay_free_page_array(buf->page_array); } *per_cpu_ptr(chan->buf, buf->cpu) = NULL; kfree(buf->padding); kfree(buf); kref_put(&chan->kref, relay_destroy_channel); } /** * relay_remove_buf - remove a channel buffer * @kref: target kernel reference that contains the relay buffer * * Removes the file from the filesystem, which also frees the * rchan_buf_struct and the channel buffer. Should only be called from * kref_put(). */ static void relay_remove_buf(struct kref *kref) { struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); relay_destroy_buf(buf); } /** * relay_buf_empty - boolean, is the channel buffer empty? * @buf: channel buffer * * Returns 1 if the buffer is empty, 0 otherwise. */ static int relay_buf_empty(struct rchan_buf *buf) { return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; } /** * relay_buf_full - boolean, is the channel buffer full? * @buf: channel buffer * * Returns 1 if the buffer is full, 0 otherwise. */ int relay_buf_full(struct rchan_buf *buf) { size_t ready = buf->subbufs_produced - buf->subbufs_consumed; return (ready >= buf->chan->n_subbufs) ? 1 : 0; } EXPORT_SYMBOL_GPL(relay_buf_full); /* * High-level relay kernel API and associated functions. */ static int relay_subbuf_start(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding) { if (!buf->chan->cb->subbuf_start) return !relay_buf_full(buf); return buf->chan->cb->subbuf_start(buf, subbuf, prev_subbuf, prev_padding); } /** * wakeup_readers - wake up readers waiting on a channel * @work: contains the channel buffer * * This is the function used to defer reader waking */ static void wakeup_readers(struct irq_work *work) { struct rchan_buf *buf; buf = container_of(work, struct rchan_buf, wakeup_work); wake_up_interruptible(&buf->read_wait); } /** * __relay_reset - reset a channel buffer * @buf: the channel buffer * @init: 1 if this is a first-time initialization * * See relay_reset() for description of effect. */ static void __relay_reset(struct rchan_buf *buf, unsigned int init) { size_t i; if (init) { init_waitqueue_head(&buf->read_wait); kref_init(&buf->kref); init_irq_work(&buf->wakeup_work, wakeup_readers); } else { irq_work_sync(&buf->wakeup_work); } buf->subbufs_produced = 0; buf->subbufs_consumed = 0; buf->bytes_consumed = 0; buf->finalized = 0; buf->data = buf->start; buf->offset = 0; for (i = 0; i < buf->chan->n_subbufs; i++) buf->padding[i] = 0; relay_subbuf_start(buf, buf->data, NULL, 0); } /** * relay_reset - reset the channel * @chan: the channel * * This has the effect of erasing all data from all channel buffers * and restarting the channel in its initial state. The buffers * are not freed, so any mappings are still in effect. * * NOTE. Care should be taken that the channel isn't actually * being used by anything when this call is made. */ void relay_reset(struct rchan *chan) { struct rchan_buf *buf; unsigned int i; if (!chan) return; if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) { __relay_reset(buf, 0); return; } mutex_lock(&relay_channels_mutex); for_each_possible_cpu(i) if ((buf = *per_cpu_ptr(chan->buf, i))) __relay_reset(buf, 0); mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_reset); static inline void relay_set_buf_dentry(struct rchan_buf *buf, struct dentry *dentry) { buf->dentry = dentry; d_inode(buf->dentry)->i_size = buf->early_bytes; } static struct dentry *relay_create_buf_file(struct rchan *chan, struct rchan_buf *buf, unsigned int cpu) { struct dentry *dentry; char *tmpname; tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL); if (!tmpname) return NULL; snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu); /* Create file in fs */ dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, buf, &chan->is_global); if (IS_ERR(dentry)) dentry = NULL; kfree(tmpname); return dentry; } /* * relay_open_buf - create a new relay channel buffer * * used by relay_open() and CPU hotplug. */ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) { struct rchan_buf *buf; struct dentry *dentry; if (chan->is_global) return *per_cpu_ptr(chan->buf, 0); buf = relay_create_buf(chan); if (!buf) return NULL; if (chan->has_base_filename) { dentry = relay_create_buf_file(chan, buf, cpu); if (!dentry) goto free_buf; relay_set_buf_dentry(buf, dentry); } else { /* Only retrieve global info, nothing more, nothing less */ dentry = chan->cb->create_buf_file(NULL, NULL, S_IRUSR, buf, &chan->is_global); if (IS_ERR_OR_NULL(dentry)) goto free_buf; } buf->cpu = cpu; __relay_reset(buf, 1); if(chan->is_global) { *per_cpu_ptr(chan->buf, 0) = buf; buf->cpu = 0; } return buf; free_buf: relay_destroy_buf(buf); return NULL; } /** * relay_close_buf - close a channel buffer * @buf: channel buffer * * Marks the buffer finalized and restores the default callbacks. * The channel buffer and channel buffer data structure are then freed * automatically when the last reference is given up. */ static void relay_close_buf(struct rchan_buf *buf) { buf->finalized = 1; irq_work_sync(&buf->wakeup_work); buf->chan->cb->remove_buf_file(buf->dentry); kref_put(&buf->kref, relay_remove_buf); } int relay_prepare_cpu(unsigned int cpu) { struct rchan *chan; struct rchan_buf *buf; mutex_lock(&relay_channels_mutex); list_for_each_entry(chan, &relay_channels, list) { if (*per_cpu_ptr(chan->buf, cpu)) continue; buf = relay_open_buf(chan, cpu); if (!buf) { pr_err("relay: cpu %d buffer creation failed\n", cpu); mutex_unlock(&relay_channels_mutex); return -ENOMEM; } *per_cpu_ptr(chan->buf, cpu) = buf; } mutex_unlock(&relay_channels_mutex); return 0; } /** * relay_open - create a new relay channel * @base_filename: base name of files to create, %NULL for buffering only * @parent: dentry of parent directory, %NULL for root directory or buffer * @subbuf_size: size of sub-buffers * @n_subbufs: number of sub-buffers * @cb: client callback functions * @private_data: user-defined data * * Returns channel pointer if successful, %NULL otherwise. * * Creates a channel buffer for each cpu using the sizes and * attributes specified. The created channel buffer files * will be named base_filename0...base_filenameN-1. File * permissions will be %S_IRUSR. * * If opening a buffer (@parent = NULL) that you later wish to register * in a filesystem, call relay_late_setup_files() once the @parent dentry * is available. */ struct rchan *relay_open(const char *base_filename, struct dentry *parent, size_t subbuf_size, size_t n_subbufs, const struct rchan_callbacks *cb, void *private_data) { unsigned int i; struct rchan *chan; struct rchan_buf *buf; if (!(subbuf_size && n_subbufs)) return NULL; if (subbuf_size > UINT_MAX / n_subbufs) return NULL; if (!cb || !cb->create_buf_file || !cb->remove_buf_file) return NULL; chan = kzalloc(sizeof(struct rchan), GFP_KERNEL); if (!chan) return NULL; chan->buf = alloc_percpu(struct rchan_buf *); if (!chan->buf) { kfree(chan); return NULL; } chan->version = RELAYFS_CHANNEL_VERSION; chan->n_subbufs = n_subbufs; chan->subbuf_size = subbuf_size; chan->alloc_size = PAGE_ALIGN(subbuf_size * n_subbufs); chan->parent = parent; chan->private_data = private_data; if (base_filename) { chan->has_base_filename = 1; strscpy(chan->base_filename, base_filename, NAME_MAX); } chan->cb = cb; kref_init(&chan->kref); mutex_lock(&relay_channels_mutex); for_each_online_cpu(i) { buf = relay_open_buf(chan, i); if (!buf) goto free_bufs; *per_cpu_ptr(chan->buf, i) = buf; } list_add(&chan->list, &relay_channels); mutex_unlock(&relay_channels_mutex); return chan; free_bufs: for_each_possible_cpu(i) { if ((buf = *per_cpu_ptr(chan->buf, i))) relay_close_buf(buf); } kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); return NULL; } EXPORT_SYMBOL_GPL(relay_open); struct rchan_percpu_buf_dispatcher { struct rchan_buf *buf; struct dentry *dentry; }; /* Called in atomic context. */ static void __relay_set_buf_dentry(void *info) { struct rchan_percpu_buf_dispatcher *p = info; relay_set_buf_dentry(p->buf, p->dentry); } /** * relay_late_setup_files - triggers file creation * @chan: channel to operate on * @base_filename: base name of files to create * @parent: dentry of parent directory, %NULL for root directory * * Returns 0 if successful, non-zero otherwise. * * Use to setup files for a previously buffer-only channel created * by relay_open() with a NULL parent dentry. * * For example, this is useful for perfomring early tracing in kernel, * before VFS is up and then exposing the early results once the dentry * is available. */ int relay_late_setup_files(struct rchan *chan, const char *base_filename, struct dentry *parent) { int err = 0; unsigned int i, curr_cpu; unsigned long flags; struct dentry *dentry; struct rchan_buf *buf; struct rchan_percpu_buf_dispatcher disp; if (!chan || !base_filename) return -EINVAL; strscpy(chan->base_filename, base_filename, NAME_MAX); mutex_lock(&relay_channels_mutex); /* Is chan already set up? */ if (unlikely(chan->has_base_filename)) { mutex_unlock(&relay_channels_mutex); return -EEXIST; } chan->has_base_filename = 1; chan->parent = parent; if (chan->is_global) { err = -EINVAL; buf = *per_cpu_ptr(chan->buf, 0); if (!WARN_ON_ONCE(!buf)) { dentry = relay_create_buf_file(chan, buf, 0); if (dentry && !WARN_ON_ONCE(!chan->is_global)) { relay_set_buf_dentry(buf, dentry); err = 0; } } mutex_unlock(&relay_channels_mutex); return err; } curr_cpu = get_cpu(); /* * The CPU hotplug notifier ran before us and created buffers with * no files associated. So it's safe to call relay_setup_buf_file() * on all currently online CPUs. */ for_each_online_cpu(i) { buf = *per_cpu_ptr(chan->buf, i); if (unlikely(!buf)) { WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n"); err = -EINVAL; break; } dentry = relay_create_buf_file(chan, buf, i); if (unlikely(!dentry)) { err = -EINVAL; break; } if (curr_cpu == i) { local_irq_save(flags); relay_set_buf_dentry(buf, dentry); local_irq_restore(flags); } else { disp.buf = buf; disp.dentry = dentry; smp_mb(); /* relay_channels_mutex must be held, so wait. */ err = smp_call_function_single(i, __relay_set_buf_dentry, &disp, 1); } if (unlikely(err)) break; } put_cpu(); mutex_unlock(&relay_channels_mutex); return err; } EXPORT_SYMBOL_GPL(relay_late_setup_files); /** * relay_switch_subbuf - switch to a new sub-buffer * @buf: channel buffer * @length: size of current event * * Returns either the length passed in or 0 if full. * * Performs sub-buffer-switch tasks such as invoking callbacks, * updating padding counts, waking up readers, etc. */ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) { void *old, *new; size_t old_subbuf, new_subbuf; if (unlikely(length > buf->chan->subbuf_size)) goto toobig; if (buf->offset != buf->chan->subbuf_size + 1) { buf->prev_padding = buf->chan->subbuf_size - buf->offset; old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; buf->padding[old_subbuf] = buf->prev_padding; buf->subbufs_produced++; if (buf->dentry) d_inode(buf->dentry)->i_size += buf->chan->subbuf_size - buf->padding[old_subbuf]; else buf->early_bytes += buf->chan->subbuf_size - buf->padding[old_subbuf]; smp_mb(); if (waitqueue_active(&buf->read_wait)) { /* * Calling wake_up_interruptible() from here * will deadlock if we happen to be logging * from the scheduler (trying to re-grab * rq->lock), so defer it. */ irq_work_queue(&buf->wakeup_work); } } old = buf->data; new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; new = buf->start + new_subbuf * buf->chan->subbuf_size; buf->offset = 0; if (!relay_subbuf_start(buf, new, old, buf->prev_padding)) { buf->offset = buf->chan->subbuf_size + 1; return 0; } buf->data = new; buf->padding[new_subbuf] = 0; if (unlikely(length + buf->offset > buf->chan->subbuf_size)) goto toobig; return length; toobig: buf->chan->last_toobig = length; return 0; } EXPORT_SYMBOL_GPL(relay_switch_subbuf); /** * relay_subbufs_consumed - update the buffer's sub-buffers-consumed count * @chan: the channel * @cpu: the cpu associated with the channel buffer to update * @subbufs_consumed: number of sub-buffers to add to current buf's count * * Adds to the channel buffer's consumed sub-buffer count. * subbufs_consumed should be the number of sub-buffers newly consumed, * not the total consumed. * * NOTE. Kernel clients don't need to call this function if the channel * mode is 'overwrite'. */ void relay_subbufs_consumed(struct rchan *chan, unsigned int cpu, size_t subbufs_consumed) { struct rchan_buf *buf; if (!chan || cpu >= NR_CPUS) return; buf = *per_cpu_ptr(chan->buf, cpu); if (!buf || subbufs_consumed > chan->n_subbufs) return; if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed) buf->subbufs_consumed = buf->subbufs_produced; else buf->subbufs_consumed += subbufs_consumed; } EXPORT_SYMBOL_GPL(relay_subbufs_consumed); /** * relay_close - close the channel * @chan: the channel * * Closes all channel buffers and frees the channel. */ void relay_close(struct rchan *chan) { struct rchan_buf *buf; unsigned int i; if (!chan) return; mutex_lock(&relay_channels_mutex); if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) relay_close_buf(buf); else for_each_possible_cpu(i) if ((buf = *per_cpu_ptr(chan->buf, i))) relay_close_buf(buf); if (chan->last_toobig) printk(KERN_WARNING "relay: one or more items not logged " "[item size (%zd) > sub-buffer size (%zd)]\n", chan->last_toobig, chan->subbuf_size); list_del(&chan->list); kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_close); /** * relay_flush - close the channel * @chan: the channel * * Flushes all channel buffers, i.e. forces buffer switch. */ void relay_flush(struct rchan *chan) { struct rchan_buf *buf; unsigned int i; if (!chan) return; if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) { relay_switch_subbuf(buf, 0); return; } mutex_lock(&relay_channels_mutex); for_each_possible_cpu(i) if ((buf = *per_cpu_ptr(chan->buf, i))) relay_switch_subbuf(buf, 0); mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_flush); /** * relay_file_open - open file op for relay files * @inode: the inode * @filp: the file * * Increments the channel buffer refcount. */ static int relay_file_open(struct inode *inode, struct file *filp) { struct rchan_buf *buf = inode->i_private; kref_get(&buf->kref); filp->private_data = buf; return nonseekable_open(inode, filp); } /** * relay_file_mmap - mmap file op for relay files * @filp: the file * @vma: the vma describing what to map * * Calls upon relay_mmap_buf() to map the file into user space. */ static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma) { struct rchan_buf *buf = filp->private_data; return relay_mmap_buf(buf, vma); } /** * relay_file_poll - poll file op for relay files * @filp: the file * @wait: poll table * * Poll implemention. */ static __poll_t relay_file_poll(struct file *filp, poll_table *wait) { __poll_t mask = 0; struct rchan_buf *buf = filp->private_data; if (buf->finalized) return EPOLLERR; if (filp->f_mode & FMODE_READ) { poll_wait(filp, &buf->read_wait, wait); if (!relay_buf_empty(buf)) mask |= EPOLLIN | EPOLLRDNORM; } return mask; } /** * relay_file_release - release file op for relay files * @inode: the inode * @filp: the file * * Decrements the channel refcount, as the filesystem is * no longer using it. */ static int relay_file_release(struct inode *inode, struct file *filp) { struct rchan_buf *buf = filp->private_data; kref_put(&buf->kref, relay_remove_buf); return 0; } /* * relay_file_read_consume - update the consumed count for the buffer */ static void relay_file_read_consume(struct rchan_buf *buf, size_t read_pos, size_t bytes_consumed) { size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t read_subbuf; if (buf->subbufs_produced == buf->subbufs_consumed && buf->offset == buf->bytes_consumed) return; if (buf->bytes_consumed + bytes_consumed > subbuf_size) { relay_subbufs_consumed(buf->chan, buf->cpu, 1); buf->bytes_consumed = 0; } buf->bytes_consumed += bytes_consumed; if (!read_pos) read_subbuf = buf->subbufs_consumed % n_subbufs; else read_subbuf = read_pos / buf->chan->subbuf_size; if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) { if ((read_subbuf == buf->subbufs_produced % n_subbufs) && (buf->offset == subbuf_size)) return; relay_subbufs_consumed(buf->chan, buf->cpu, 1); buf->bytes_consumed = 0; } } /* * relay_file_read_avail - boolean, are there unconsumed bytes available? */ static int relay_file_read_avail(struct rchan_buf *buf) { size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t produced = buf->subbufs_produced; size_t consumed; relay_file_read_consume(buf, 0, 0); consumed = buf->subbufs_consumed; if (unlikely(buf->offset > subbuf_size)) { if (produced == consumed) return 0; return 1; } if (unlikely(produced - consumed >= n_subbufs)) { consumed = produced - n_subbufs + 1; buf->subbufs_consumed = consumed; buf->bytes_consumed = 0; } produced = (produced % n_subbufs) * subbuf_size + buf->offset; consumed = (consumed % n_subbufs) * subbuf_size + buf->bytes_consumed; if (consumed > produced) produced += n_subbufs * subbuf_size; if (consumed == produced) { if (buf->offset == subbuf_size && buf->subbufs_produced > buf->subbufs_consumed) return 1; return 0; } return 1; } /** * relay_file_read_subbuf_avail - return bytes available in sub-buffer * @read_pos: file read position * @buf: relay channel buffer */ static size_t relay_file_read_subbuf_avail(size_t read_pos, struct rchan_buf *buf) { size_t padding, avail = 0; size_t read_subbuf, read_offset, write_subbuf, write_offset; size_t subbuf_size = buf->chan->subbuf_size; write_subbuf = (buf->data - buf->start) / subbuf_size; write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset; read_subbuf = read_pos / subbuf_size; read_offset = read_pos % subbuf_size; padding = buf->padding[read_subbuf]; if (read_subbuf == write_subbuf) { if (read_offset + padding < write_offset) avail = write_offset - (read_offset + padding); } else avail = (subbuf_size - padding) - read_offset; return avail; } /** * relay_file_read_start_pos - find the first available byte to read * @buf: relay channel buffer * * If the read_pos is in the middle of padding, return the * position of the first actually available byte, otherwise * return the original value. */ static size_t relay_file_read_start_pos(struct rchan_buf *buf) { size_t read_subbuf, padding, padding_start, padding_end; size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t consumed = buf->subbufs_consumed % n_subbufs; size_t read_pos = (consumed * subbuf_size + buf->bytes_consumed) % (n_subbufs * subbuf_size); read_subbuf = read_pos / subbuf_size; padding = buf->padding[read_subbuf]; padding_start = (read_subbuf + 1) * subbuf_size - padding; padding_end = (read_subbuf + 1) * subbuf_size; if (read_pos >= padding_start && read_pos < padding_end) { read_subbuf = (read_subbuf + 1) % n_subbufs; read_pos = read_subbuf * subbuf_size; } return read_pos; } /** * relay_file_read_end_pos - return the new read position * @read_pos: file read position * @buf: relay channel buffer * @count: number of bytes to be read */ static size_t relay_file_read_end_pos(struct rchan_buf *buf, size_t read_pos, size_t count) { size_t read_subbuf, padding, end_pos; size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; read_subbuf = read_pos / subbuf_size; padding = buf->padding[read_subbuf]; if (read_pos % subbuf_size + count + padding == subbuf_size) end_pos = (read_subbuf + 1) * subbuf_size; else end_pos = read_pos + count; if (end_pos >= subbuf_size * n_subbufs) end_pos = 0; return end_pos; } static ssize_t relay_file_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct rchan_buf *buf = filp->private_data; size_t read_start, avail; size_t written = 0; int ret; if (!count) return 0; inode_lock(file_inode(filp)); do { void *from; if (!relay_file_read_avail(buf)) break; read_start = relay_file_read_start_pos(buf); avail = relay_file_read_subbuf_avail(read_start, buf); if (!avail) break; avail = min(count, avail); from = buf->start + read_start; ret = avail; if (copy_to_user(buffer, from, avail)) break; buffer += ret; written += ret; count -= ret; relay_file_read_consume(buf, read_start, ret); *ppos = relay_file_read_end_pos(buf, read_start, ret); } while (count); inode_unlock(file_inode(filp)); return written; } static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) { rbuf->bytes_consumed += bytes_consumed; if (rbuf->bytes_consumed >= rbuf->chan->subbuf_size) { relay_subbufs_consumed(rbuf->chan, rbuf->cpu, 1); rbuf->bytes_consumed %= rbuf->chan->subbuf_size; } } static void relay_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct rchan_buf *rbuf; rbuf = (struct rchan_buf *)page_private(buf->page); relay_consume_bytes(rbuf, buf->private); } static const struct pipe_buf_operations relay_pipe_buf_ops = { .release = relay_pipe_buf_release, .try_steal = generic_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; static void relay_page_release(struct splice_pipe_desc *spd, unsigned int i) { } /* * subbuf_splice_actor - splice up to one subbuf's worth of data */ static ssize_t subbuf_splice_actor(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags, int *nonpad_ret) { unsigned int pidx, poff, total_len, subbuf_pages, nr_pages; struct rchan_buf *rbuf = in->private_data; unsigned int subbuf_size = rbuf->chan->subbuf_size; uint64_t pos = (uint64_t) *ppos; uint32_t alloc_size = (uint32_t) rbuf->chan->alloc_size; size_t read_start = (size_t) do_div(pos, alloc_size); size_t read_subbuf = read_start / subbuf_size; size_t padding = rbuf->padding[read_subbuf]; size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding; struct page *pages[PIPE_DEF_BUFFERS]; struct partial_page partial[PIPE_DEF_BUFFERS]; struct splice_pipe_desc spd = { .pages = pages, .nr_pages = 0, .nr_pages_max = PIPE_DEF_BUFFERS, .partial = partial, .ops = &relay_pipe_buf_ops, .spd_release = relay_page_release, }; ssize_t ret; if (rbuf->subbufs_produced == rbuf->subbufs_consumed) return 0; if (splice_grow_spd(pipe, &spd)) return -ENOMEM; /* * Adjust read len, if longer than what is available */ if (len > (subbuf_size - read_start % subbuf_size)) len = subbuf_size - read_start % subbuf_size; subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; pidx = (read_start / PAGE_SIZE) % subbuf_pages; poff = read_start & ~PAGE_MASK; nr_pages = min_t(unsigned int, subbuf_pages, spd.nr_pages_max); for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { unsigned int this_len, this_end, private; unsigned int cur_pos = read_start + total_len; if (!len) break; this_len = min_t(unsigned long, len, PAGE_SIZE - poff); private = this_len; spd.pages[spd.nr_pages] = rbuf->page_array[pidx]; spd.partial[spd.nr_pages].offset = poff; this_end = cur_pos + this_len; if (this_end >= nonpad_end) { this_len = nonpad_end - cur_pos; private = this_len + padding; } spd.partial[spd.nr_pages].len = this_len; spd.partial[spd.nr_pages].private = private; len -= this_len; total_len += this_len; poff = 0; pidx = (pidx + 1) % subbuf_pages; if (this_end >= nonpad_end) { spd.nr_pages++; break; } } ret = 0; if (!spd.nr_pages) goto out; ret = *nonpad_ret = splice_to_pipe(pipe, &spd); if (ret < 0 || ret < total_len) goto out; if (read_start + ret == nonpad_end) ret += padding; out: splice_shrink_spd(&spd); return ret; } static ssize_t relay_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { ssize_t spliced; int ret; int nonpad_ret = 0; ret = 0; spliced = 0; while (len && !spliced) { ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); if (ret < 0) break; else if (!ret) { if (flags & SPLICE_F_NONBLOCK) ret = -EAGAIN; break; } *ppos += ret; if (ret > len) len = 0; else len -= ret; spliced += nonpad_ret; nonpad_ret = 0; } if (spliced) return spliced; return ret; } const struct file_operations relay_file_operations = { .open = relay_file_open, .poll = relay_file_poll, .mmap = relay_file_mmap, .read = relay_file_read, .llseek = no_llseek, .release = relay_file_release, .splice_read = relay_file_splice_read, }; EXPORT_SYMBOL_GPL(relay_file_operations); m.h?id2=bfb1d2b9021c21891427acc86eb848ccedeb274e'>tools/testing/scatterlist/linux/mm.h1
-rw-r--r--tools/testing/selftests/.gitignore1
-rw-r--r--tools/testing/selftests/Makefile23
-rw-r--r--tools/testing/selftests/acct/acct_syscall.c2
-rw-r--r--tools/testing/selftests/alsa/conf.c4
-rw-r--r--tools/testing/selftests/alsa/mixer-test.c10
-rw-r--r--tools/testing/selftests/alsa/pcm-test.c10
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c2
-rw-r--r--tools/testing/selftests/alsa/utimer-test.c3
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/arm64/abi/Makefile2
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c40
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c2
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c154
-rw-r--r--tools/testing/selftests/arm64/bti/assembler.h1
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c140
-rw-r--r--tools/testing/selftests/arm64/fp/fp-stress.c8
-rw-r--r--tools/testing/selftests/arm64/fp/kernel-test.c5
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c2
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c179
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c3
-rw-r--r--tools/testing/selftests/arm64/fp/za-ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/fp/zt-ptrace.c3
-rw-r--r--tools/testing/selftests/arm64/fp/zt-test.S2
-rw-r--r--tools/testing/selftests/arm64/gcs/Makefile6
-rw-r--r--tools/testing/selftests/arm64/gcs/basic-gcs.c75
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-locking.c1
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-stress.c4
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c12
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c8
-rw-r--r--tools/testing/selftests/arm64/mte/check_hugetlb_options.c29
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c6
-rw-r--r--tools/testing/selftests/arm64/mte/check_mmap_options.c896
-rw-r--r--tools/testing/selftests/arm64/mte/check_prctl.c29
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c10
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c4
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c84
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.h9
-rw-r--r--tools/testing/selftests/arm64/mte/mte_def.h8
-rw-r--r--tools/testing/selftests/arm64/pauth/exec_target.c7
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c2
-rw-r--r--tools/testing/selftests/arm64/tags/tags_test.c2
-rw-r--r--tools/testing/selftests/bpf/.gitignore4
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch6412
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile134
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs6
-rw-r--r--tools/testing/selftests/bpf/bench.c38
-rw-r--r--tools/testing/selftests/bpf/bench.h1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_htab_mem.c3
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c555
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c65
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_sockmap.c599
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c107
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh4
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh4
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh2
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h3
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_list.h6
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_strsearch.h128
-rw-r--r--tools/testing/selftests/bpf/bpf_atomic.h140
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h79
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h22
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h3
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.c8
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c41
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h5
-rw-r--r--tools/testing/selftests/bpf/config18
-rw-r--r--tools/testing/selftests/bpf/config.aarch6413
-rw-r--r--tools/testing/selftests/bpf/config.ppc64el92
-rw-r--r--tools/testing/selftests/bpf/config.riscv641
-rw-r--r--tools/testing/selftests/bpf/config.s390x12
-rw-r--r--tools/testing/selftests/bpf/config.x86_646
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c62
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c191
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c189
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_atomics.c66
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c123
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_strsearch.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arg_parsing.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c112
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_gotox.c292
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c504
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c231
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c88
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c267
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_split.c143
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_sysfs.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c617
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c122
-rw-r--r--tools/testing/selftests/bpf/prog_tests/compute_live_registers.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c285
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c192
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/file_reader.c117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fill_link_info.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/free_timer.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c162
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_update.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kernel_flag.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c247
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_buf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_helpers.h29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c540
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c176
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_excl.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c140
-rw-r--r--tools/testing/selftests/bpf/prog_tests/net_timestamping.c239
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netns_cookie.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_htab.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prepare.c99
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pro_epilogue.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rbtree.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/read_vsyscall.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/recursive_attach.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/res_spin_lock.c117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c69
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/setget_sockopt.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sha256.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c292
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c885
-rw-r--r--tools/testing/selftests/bpf/prog_tests/socket_helpers.h93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c129
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c389
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c459
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_redir.c465
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_strp.c454
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stream.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/summarization.c144
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h386
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_work_stress.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_helpers.h28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_opts.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c390
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_btf_ext.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_sysctl.c (renamed from tools/testing/selftests/bpf/test_sysctl.c)37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_local_data.c297
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_work.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_edt.c145
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c714
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c590
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c261
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c638
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.c2596
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.h298
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_crash.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/token.c182
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_failure.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe.c156
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c486
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c144
-rw-r--r--tools/testing/selftests/bpf/prog_tests/user_ringbuf.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/wq.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c340
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c39
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c179
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_vlan.c175
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xsk.c151
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c124
-rw-r--r--tools/testing/selftests/bpf/progs/arena_spin_lock.c54
-rw-r--r--tools/testing/selftests/bpf/progs/arena_strsearch.c146
-rw-r--r--tools/testing/selftests/bpf/progs/bench_sockmap_prog.c65
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h542
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cc_cubic.c11
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c7
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_gotox.c448
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c22
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tasks.c110
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h74
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_common.h27
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c41
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c126
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c756
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_smc.c117
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_test_utils.h18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h20
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c22
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c9
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_mprog.c30
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_preorder.c41
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_read_xattr.c158
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c12
-rw-r--r--tools/testing/selftests/bpf/progs/changes_pkt_data.c39
-rw-r--r--tools/testing/selftests/bpf/progs/compute_live_registers.c440
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_dropper.c4
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c21
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h10
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h1
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c38
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c120
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_sanity.c46
-rw-r--r--tools/testing/selftests/bpf/progs/dmabuf_iter.c101
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c258
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c578
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c34
-rw-r--r--tools/testing/selftests/bpf/progs/fd_htab_lookup.c25
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader.c145
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader_fail.c52
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_hash_modify.c30
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/htab_update.c19
-rw-r--r--tools/testing/selftests/bpf/progs/ip_check_defrag.c5
-rw-r--r--tools/testing/selftests/bpf/progs/irq.c124
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c414
-rw-r--r--tools/testing/selftests/bpf/progs/iters_looping.c53
-rw-r--r--tools/testing/selftests/bpf/progs/iters_state_safety.c6
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_failure.c4
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod.c46
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod_seq.c6
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_write_ctx.c22
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c5
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_peek.c113
-rw-r--r--tools/testing/selftests/bpf/progs/livepatch_trampoline.c30
-rw-r--r--tools/testing/selftests/bpf/progs/loop1.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop2.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c21
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie.h30
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_bench.c230
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_map.c19
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c8
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_tailcall.c8
-rw-r--r--tools/testing/selftests/bpf/progs/map_excl.c34
-rw-r--r--tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c229
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_sockmap.c43
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_subflow.c2
-rw-r--r--tools/testing/selftests/bpf/progs/net_timestamping.c248
-rw-r--r--tools/testing/selftests/bpf/progs/netns_cookie_prog.c9
-rw-r--r--tools/testing/selftests/bpf/progs/preempt_lock.c68
-rw-r--r--tools/testing/selftests/bpf/progs/prepare.c27
-rw-r--r--tools/testing/selftests/bpf/progs/priv_freplace_prog.c13
-rw-r--r--tools/testing/selftests/bpf/progs/priv_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c88
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h2
-rw-r--r--tools/testing/selftests/bpf/progs/raw_tp_null.c2
-rw-r--r--tools/testing/selftests/bpf/progs/raw_tp_null_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree.c14
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c29
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search.c206
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c106
-rw-r--r--tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c60
-rw-r--r--tools/testing/selftests/bpf/progs/read_vsyscall.c11
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c60
-rw-r--r--tools/testing/selftests/bpf/progs/res_spin_lock.c147
-rw-r--r--tools/testing/selftests/bpf/progs/res_spin_lock_fail.c244
-rw-r--r--tools/testing/selftests/bpf/progs/ringbuf_bench.c11
-rw-r--r--tools/testing/selftests/bpf/progs/security_bpf_map.c69
-rw-r--r--tools/testing/selftests/bpf/progs/set_global_vars.c106
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c14
-rw-r--r--tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c104
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c52
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_ips.c49
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_map.c (renamed from tools/testing/selftests/bpf/progs/test_stacktrace_map.c)2
-rw-r--r--tools/testing/selftests/bpf/progs/stream.c237
-rw-r--r--tools/testing/selftests/bpf/progs/stream_fail.c33
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c105
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c26
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c56
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_bench.c5
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h6
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c30
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c26
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c34
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c25
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c30
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_private_stack.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted.c31
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c39
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c22
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c36
-rw-r--r--tools/testing/selftests/bpf/progs/summarization.c78
-rw-r--r--tools/testing/selftests/bpf/progs/summarization_freplace.c (renamed from tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c)17
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c3
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_data.bpf.h237
-rw-r--r--tools/testing/selftests/bpf/progs/task_work.c107
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_fail.c96
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_stress.c73
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_ext.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_xattr.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_map_resize.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_kernel_flag.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_key.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_branches.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_devmap.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_htab.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_ptr_untrusted.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c98
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_write.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_set_remove_xattr.c133
-rw-r--r--tools/testing/selftests/bpf/progs/test_sig_in_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_ktls.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_redir.c68
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_strp.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock_fail.c69
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs_extable.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_local_data.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_under_cgroup.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_change_tail.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_edt.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c95
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c654
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_pull_data.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_vlan.c20
-rw-r--r--tools/testing/selftests/bpf/progs/timer_interrupt.c48
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_failure.c12
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c33
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c18
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall.c4
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c60
-rw-r--r--tools/testing/selftests/bpf/progs/uretprobe_stack.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_and.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c106
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c99
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_array_access.c15
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_async_cb_context.c181
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c573
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c11
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c73
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_trap.c71
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c76
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c59
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div_overflow.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c128
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotol.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotox.c389
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c178
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_live_stack.c344
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_load_acquire.c234
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c21
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_lsm.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_in_map.c118
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_may_goto_1.c28
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c16
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_mul.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c5
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c179
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_private_stack.c95
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ref_tracking.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock.c87
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c40
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_stack_ptr.c52
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_store_release.c301
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c59
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_tailcall.c31
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c233
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c47
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_var_off.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_accept.c20
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_reject.c15
-rw-r--r--tools/testing/selftests/bpf/progs/wq.c17
-rw-r--r--tools/testing/selftests/bpf/progs/wq_failures.c23
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata.c13
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_map.c88
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c41
-rw-r--r--tools/testing/selftests/bpf/progs/xsk_xdp_progs.c50
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh4
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_map.sh398
-rw-r--r--tools/testing/selftests/bpf/test_btf.h6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/Makefile2
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c393
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c322
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.h12
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h4
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c2
-rw-r--r--tools/testing/selftests/bpf/test_loader.c399
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c105
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh476
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh156
-rw-r--r--tools/testing/selftests/bpf/test_maps.c14
-rw-r--r--tools/testing/selftests/bpf/test_progs.c85
-rw-r--r--tools/testing/selftests/bpf/test_progs.h53
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c2
-rw-r--r--tools/testing/selftests/bpf/test_tag.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_edt.sh100
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh320
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c20
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh645
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c8
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh214
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan.sh233
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh9
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh9
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh2
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c14
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c234
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.c94
-rw-r--r--tools/testing/selftests/bpf/usdt.h545
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_st_mem.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c32
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c3
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c33
-rw-r--r--tools/testing/selftests/bpf/verifier/jset.c10
-rwxr-xr-xtools/testing/selftests/bpf/verify_sig_setup.sh11
-rw-r--r--tools/testing/selftests/bpf/veristat.c964
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh9
-rwxr-xr-xtools/testing/selftests/bpf/with_addr.sh54
-rwxr-xr-xtools/testing/selftests/bpf/with_tunnels.sh36
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c168
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c226
-rw-r--r--tools/testing/selftests/bpf/xdping.c2
-rw-r--r--tools/testing/selftests/bpf/xsk.h4
-rw-r--r--tools/testing/selftests/bpf/xsk_xdp_common.h1
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c2370
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h153
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c2
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c2
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c43
-rw-r--r--tools/testing/selftests/cachestat/.gitignore1
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c66
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c2
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c2
-rw-r--r--tools/testing/selftests/cgroup/Makefile21
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c (renamed from tools/testing/selftests/cgroup/cgroup_util.c)130
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h (renamed from tools/testing/selftests/cgroup/cgroup_util.h)39
-rw-r--r--tools/testing/selftests/cgroup/lib/libcgroup.mk19
-rw-r--r--tools/testing/selftests/cgroup/test_core.c95
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c86
-rw-r--r--tools/testing/selftests/cgroup/test_cpuset.c9
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh617
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c672
-rw-r--r--tools/testing/selftests/cgroup/test_hugetlb_memcg.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c9
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c14
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c109
-rw-r--r--tools/testing/selftests/cgroup/test_pids.c5
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c11
-rw-r--r--tools/testing/selftests/clone3/clone3.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h4
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c2
-rw-r--r--tools/testing/selftests/connector/proc_filter.c2
-rw-r--r--tools/testing/selftests/core/close_range_test.c2
-rw-r--r--tools/testing/selftests/core/unshare_test.c2
-rw-r--r--tools/testing/selftests/coredump/.gitignore4
-rw-r--r--tools/testing/selftests/coredump/Makefile10
-rw-r--r--tools/testing/selftests/coredump/config3
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_protocol_test.c1568
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_test.c742
-rw-r--r--tools/testing/selftests/coredump/coredump_test.h59
-rw-r--r--tools/testing/selftests/coredump/coredump_test_helpers.c383
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c80
-rwxr-xr-xtools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh4
-rwxr-xr-xtools/testing/selftests/cpufreq/cpufreq.sh18
-rw-r--r--tools/testing/selftests/damon/Makefile7
-rw-r--r--tools/testing/selftests/damon/_chk_dependency.sh52
-rw-r--r--tools/testing/selftests/damon/_common.sh11
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py324
-rw-r--r--tools/testing/selftests/damon/_debugfs_common.sh64
-rw-r--r--tools/testing/selftests/damon/access_memory_even.c1
-rwxr-xr-xtools/testing/selftests/damon/damon_nr_regions.py2
-rwxr-xr-xtools/testing/selftests/damon/damos_quota.py9
-rwxr-xr-xtools/testing/selftests/damon/damos_quota_goal.py3
-rwxr-xr-xtools/testing/selftests/damon/drgn_dump_damon_status.py223
-rwxr-xr-xtools/testing/selftests/damon/lru_sort.sh8
-rwxr-xr-xtools/testing/selftests/damon/reclaim.sh8
-rwxr-xr-xtools/testing/selftests/damon/sysfs.py303
-rwxr-xr-xtools/testing/selftests/damon/sysfs.sh11
-rwxr-xr-xtools/testing/selftests/damon/sysfs_memcg_path_leak.sh43
-rwxr-xr-xtools/testing/selftests/damon/sysfs_no_op_commit_break.py72
-rwxr-xr-xtools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh8
-rw-r--r--tools/testing/selftests/dma/Makefile7
-rw-r--r--tools/testing/selftests/dma/config1
-rw-r--r--tools/testing/selftests/dma/dma_map_benchmark.c128
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c2
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c22
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore4
-rw-r--r--tools/testing/selftests/drivers/net/Makefile31
-rw-r--r--tools/testing/selftests/drivers/net/README.rst4
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile20
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh156
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh108
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh201
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh105
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh3
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh2
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config12
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh361
-rw-r--r--tools/testing/selftests/drivers/net/config8
-rw-r--r--tools/testing/selftests/drivers/net/dsa/Makefile12
l---------tools/testing/selftests/drivers/net/dsa/tc_taprio.sh1
-rw-r--r--tools/testing/selftests/drivers/net/gro.c (renamed from tools/testing/selftests/net/gro.c)83
-rwxr-xr-xtools/testing/selftests/drivers/net/gro.py164
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py215
-rw-r--r--tools/testing/selftests/drivers/net/hw/.gitignore3
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile49
-rw-r--r--tools/testing/selftests/drivers/net/hw/config11
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py54
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py439
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py46
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c464
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py145
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/irq.py99
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py47
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py222
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c1100
-rw-r--r--tools/testing/selftests/drivers/net/hw/nic_link_layer.py113
-rw-r--r--tools/testing/selftests/drivers/net/hw/nic_performance.py137
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/nic_timestamp.py113
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py36
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_api.py476
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py125
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_flow_label.py167
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py92
-rw-r--r--tools/testing/selftests/drivers/net/hw/toeplitz.c (renamed from tools/testing/selftests/net/toeplitz.c)72
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/toeplitz.py211
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py261
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/xsk_reconfig.py60
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py52
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py179
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py119
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh258
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh12
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_id.py23
-rw-r--r--tools/testing/selftests/drivers/net/napi_id_helper.c100
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_threaded.py143
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh62
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_cmdline.sh65
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_fragmented_msg.sh122
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_overflow.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_sysdata.sh272
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_torture.sh130
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile9
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh171
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh85
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh23
-rwxr-xr-xtools/testing/selftests/drivers/net/netpoll_basic.py396
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/psfp.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py230
-rwxr-xr-xtools/testing/selftests/drivers/net/psp.py640
-rw-r--r--tools/testing/selftests/drivers/net/psp_responder.c483
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py52
-rwxr-xr-xtools/testing/selftests/drivers/net/ring_reconfig.py167
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py85
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile11
-rw-r--r--tools/testing/selftests/drivers/net/team/config2
-rwxr-xr-xtools/testing/selftests/drivers/net/team/options.sh188
-rwxr-xr-xtools/testing/selftests/drivers/net/team/propagation.sh80
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/Makefile13
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py779
-rw-r--r--tools/testing/selftests/drivers/ntsync/ntsync.c2
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c2
-rw-r--r--tools/testing/selftests/exec/check-exec.c2
-rw-r--r--tools/testing/selftests/exec/execveat.c2
-rw-r--r--tools/testing/selftests/exec/load_address.c2
-rw-r--r--tools/testing/selftests/exec/non-regular.c2
-rw-r--r--tools/testing/selftests/exec/null-argv.c2
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c2
-rw-r--r--tools/testing/selftests/fchmodat2/fchmodat2_test.c2
-rw-r--r--tools/testing/selftests/filelock/ofdlocks.c2
-rw-r--r--tools/testing/selftests/filesystems/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c69
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c3
-rw-r--r--tools/testing/selftests/filesystems/devpts_pts.c2
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c2
-rw-r--r--tools/testing/selftests/filesystems/eventfd/eventfd_test.c9
-rw-r--r--tools/testing/selftests/filesystems/fclog.c130
-rw-r--r--tools/testing/selftests/filesystems/file_stressor.c4
-rw-r--r--tools/testing/selftests/filesystems/fuse/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/fuse/Makefile21
-rw-r--r--tools/testing/selftests/filesystems/fuse/fuse_mnt.c146
-rw-r--r--tools/testing/selftests/filesystems/fuse/fusectl_test.c140
-rw-r--r--tools/testing/selftests/filesystems/kernfs_test.c38
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/Makefile11
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c528
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c555
-rw-r--r--tools/testing/selftests/filesystems/nsfs/iterate_mntns.c16
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/Makefile11
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c4
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c511
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/wrappers.h47
-rw-r--r--tools/testing/selftests/filesystems/statmount/Makefile6
-rw-r--r--tools/testing/selftests/filesystems/statmount/listmount_test.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h38
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c15
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c88
-rw-r--r--tools/testing/selftests/filesystems/utils.c589
-rw-r--r--tools/testing/selftests/filesystems/utils.h48
-rw-r--r--tools/testing/selftests/filesystems/wrappers.h108
-rw-r--r--tools/testing/selftests/ftrace/.gitignore1
-rw-r--r--tools/testing/selftests/ftrace/Makefile2
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest34
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc107
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc56
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc14
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc63
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc40
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc28
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc24
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc177
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions14
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc1
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc1
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore7
-rw-r--r--tools/testing/selftests/futex/functional/Makefile15
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa.c263
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c219
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c270
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c266
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c86
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c129
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c103
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c83
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c139
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c78
-rw-r--r--tools/testing/selftests/futex/functional/futex_waitv.c99
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh59
-rw-r--r--tools/testing/selftests/futex/include/futex2test.h78
-rw-r--r--tools/testing/selftests/futex/include/futextest.h22
-rw-r--r--tools/testing/selftests/futex/include/logging.h148
-rw-r--r--tools/testing/selftests/gpio/Makefile2
-rw-r--r--tools/testing/selftests/gpio/config1
-rwxr-xr-xtools/testing/selftests/gpio/gpio-aggregator.sh727
-rw-r--r--tools/testing/selftests/hid/Makefile2
-rw-r--r--tools/testing/selftests/hid/config.common2
-rw-r--r--tools/testing/selftests/hid/hid_common.h8
-rw-r--r--tools/testing/selftests/hid/hidraw.c473
-rw-r--r--tools/testing/selftests/hid/tests/base.py46
-rw-r--r--tools/testing/selftests/hid/tests/base_device.py49
-rw-r--r--tools/testing/selftests/hid/tests/test_apple_keyboard.py3
-rw-r--r--tools/testing/selftests/hid/tests/test_gamepad.py3
-rw-r--r--tools/testing/selftests/hid/tests/test_ite_keyboard.py3
-rw-r--r--tools/testing/selftests/hid/tests/test_mouse.py70
-rw-r--r--tools/testing/selftests/hid/tests/test_multitouch.py57
-rw-r--r--tools/testing/selftests/hid/tests/test_sony.py7
-rw-r--r--tools/testing/selftests/hid/tests/test_tablet.py82
-rw-r--r--tools/testing/selftests/hid/tests/test_wacom_generic.py445
-rwxr-xr-xtools/testing/selftests/hid/vmtest.sh668
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd.c1017
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c72
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h369
-rw-r--r--tools/testing/selftests/ipc/msgque.c49
-rw-r--r--tools/testing/selftests/ir/ir_loopback.c2
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c2
-rw-r--r--tools/testing/selftests/kexec/.gitignore2
-rw-r--r--tools/testing/selftests/kexec/Makefile7
-rw-r--r--tools/testing/selftests/kexec/test_kexec_jump.c72
-rwxr-xr-xtools/testing/selftests/kexec/test_kexec_jump.sh42
-rw-r--r--tools/testing/selftests/kho/arm64.conf9
-rw-r--r--tools/testing/selftests/kho/init.c95
-rwxr-xr-xtools/testing/selftests/kho/vmtest.sh186
-rw-r--r--tools/testing/selftests/kho/x86.conf7
-rw-r--r--tools/testing/selftests/kmod/config5
-rw-r--r--tools/testing/selftests/kselftest.h27
-rwxr-xr-xtools/testing/selftests/kselftest/module.sh2
-rw-r--r--tools/testing/selftests/kselftest/runner.sh14
-rw-r--r--tools/testing/selftests/kselftest_harness.h189
-rw-r--r--tools/testing/selftests/kselftest_harness/.gitignore2
-rw-r--r--tools/testing/selftests/kselftest_harness/Makefile8
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.c136
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.expected64
-rwxr-xr-xtools/testing/selftests/kselftest_harness/harness-selftest.sh13
-rw-r--r--tools/testing/selftests/kvm/Makefile4
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm96
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c281
-rw-r--r--tools/testing/selftests/kvm/arch_timer.c7
-rw-r--r--tools/testing/selftests/kvm/arm64/aarch32_id_regs.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c81
-rw-r--r--tools/testing/selftests/kvm/arm64/at.c166
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c16
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c415
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c306
-rw-r--r--tools/testing/selftests/kvm/arm64/hello_el2.c71
-rw-r--r--tools/testing/selftests/kvm/arm64/host_sve.c127
-rw-r--r--tools/testing/selftests/kvm/arm64/hypercalls.c48
-rw-r--r--tools/testing/selftests/kvm/arm64/kvm-uuid.c70
-rw-r--r--tools/testing/selftests/kvm/arm64/mmio_abort.c159
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic-v3.c6
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c8
-rw-r--r--tools/testing/selftests/kvm/arm64/psci_test.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/sea_to_user.c331
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c163
-rw-r--r--tools/testing/selftests/kvm/arm64/smccc_filter.c17
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_init.c261
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c303
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c15
-rw-r--r--tools/testing/selftests/kvm/arm64/vpmu_counter_access.c77
-rw-r--r--tools/testing/selftests/kvm/config1
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c35
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c524
-rw-r--r--tools/testing/selftests/kvm/get-reg-list.c9
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c367
-rw-r--r--tools/testing/selftests/kvm/include/arm64/arch_timer.h24
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3_its.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h5
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h161
-rw-r--r--tools/testing/selftests/kvm/include/arm64/vgic.h3
-rw-r--r--tools/testing/selftests/kvm/include/kvm_syscalls.h81
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h188
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/arch_timer.h85
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h220
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/ucall.h20
-rw-r--r--tools/testing/selftests/kvm/include/lru_gen_util.h51
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h110
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h24
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h22
-rw-r--r--tools/testing/selftests/kvm/include/x86/pmu.h26
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h99
-rw-r--r--tools/testing/selftests/kvm/include/x86/sev.h53
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h3
-rw-r--r--tools/testing/selftests/kvm/irqfd_test.c143
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c4
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c28
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic.c6
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_private.h1
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3.c22
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c19
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c177
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/vgic.c64
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c433
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S65
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c389
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/ucall.c38
-rw-r--r--tools/testing/selftests/kvm/lib/lru_gen_util.c387
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c2
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/handlers.S139
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c5
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c4
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c49
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/pmu.c49
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c142
-rw-r--r--tools/testing/selftests/kvm/lib/x86/sev.c76
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c9
-rw-r--r--tools/testing/selftests/kvm/loongarch/arch_timer.c200
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c1
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c1
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c36
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c157
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c204
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c105
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c31
-rw-r--r--tools/testing/selftests/kvm/s390/cmma_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c16
-rw-r--r--tools/testing/selftests/kvm/s390/user_operexec.c140
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c19
-rw-r--r--tools/testing/selftests/kvm/steal_time.c2
-rw-r--r--tools/testing/selftests/kvm/x86/aperfmperf_test.c213
-rw-r--r--tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86/fastops_test.c209
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c23
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c18
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c24
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c2
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_buslock_test.c135
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c115
-rw-r--r--tools/testing/selftests/kvm/x86/msrs_test.c489
-rw-r--r--tools/testing/selftests/kvm/x86/nested_close_kvm_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c)42
-rw-r--r--tools/testing/selftests/kvm/x86/nested_emulation_test.c146
-rw-r--r--tools/testing/selftests/kvm/x86/nested_exceptions_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c116
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c)79
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c)48
-rw-r--r--tools/testing/selftests/kvm/x86/nx_huge_pages_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c225
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_event_filter_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_conversions_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86/sev_init2_tests.c13
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c80
-rw-r--r--tools/testing/selftests/kvm/x86/state_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/svm_int_ctl_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86/ucna_injection_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_io_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c2
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c132
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_state_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/xen_shinfo_test.c26
-rw-r--r--tools/testing/selftests/landlock/.gitignore3
-rw-r--r--tools/testing/selftests/landlock/Makefile8
-rw-r--r--tools/testing/selftests/landlock/audit.h478
-rw-r--r--tools/testing/selftests/landlock/audit_test.c672
-rw-r--r--tools/testing/selftests/landlock/base_test.c130
-rw-r--r--tools/testing/selftests/landlock/common.h27
-rw-r--r--tools/testing/selftests/landlock/config4
-rw-r--r--tools/testing/selftests/landlock/fs_test.c2109
-rw-r--r--tools/testing/selftests/landlock/net_test.c256
-rw-r--r--tools/testing/selftests/landlock/ptrace_test.c140
-rw-r--r--tools/testing/selftests/landlock/scoped_abstract_unix_test.c111
-rw-r--r--tools/testing/selftests/landlock/scoped_signal_test.c108
-rw-r--r--tools/testing/selftests/landlock/wait-pipe-sandbox.c131
-rw-r--r--tools/testing/selftests/lib.mk11
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rw-r--r--tools/testing/selftests/lib/config2
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh4
-rwxr-xr-xtools/testing/selftests/lib/printf.sh4
-rwxr-xr-xtools/testing/selftests/lib/scanf.sh4
-rw-r--r--tools/testing/selftests/livepatch/functions.sh53
-rwxr-xr-xtools/testing/selftests/livepatch/test-ftrace.sh34
-rwxr-xr-xtools/testing/selftests/livepatch/test-kprobe.sh2
-rw-r--r--tools/testing/selftests/liveupdate/.gitignore9
-rw-r--r--tools/testing/selftests/liveupdate/Makefile34
-rw-r--r--tools/testing/selftests/liveupdate/config11
-rwxr-xr-xtools/testing/selftests/liveupdate/do_kexec.sh16
-rw-r--r--tools/testing/selftests/liveupdate/liveupdate.c348
-rw-r--r--tools/testing/selftests/liveupdate/luo_kexec_simple.c89
-rw-r--r--tools/testing/selftests/liveupdate/luo_multi_session.c162
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.c266
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.h44
-rw-r--r--tools/testing/selftests/lkdtm/config2
-rw-r--r--tools/testing/selftests/lsm/lsm_get_self_attr_test.c2
-rw-r--r--tools/testing/selftests/lsm/lsm_list_modules_test.c2
-rw-r--r--tools/testing/selftests/lsm/lsm_set_self_attr_test.c2
-rw-r--r--tools/testing/selftests/media_tests/media_device_open.c2
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c2
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_impl.h2
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c23
-rw-r--r--tools/testing/selftests/mm/.gitignore10
-rw-r--r--tools/testing/selftests/mm/Makefile10
-rwxr-xr-xtools/testing/selftests/mm/charge_reserved_hugetlb.sh4
-rw-r--r--tools/testing/selftests/mm/compaction_test.c21
-rw-r--r--tools/testing/selftests/mm/config3
-rw-r--r--tools/testing/selftests/mm/cow.c428
-rw-r--r--tools/testing/selftests/mm/droppable.c2
-rw-r--r--tools/testing/selftests/mm/guard-pages.c1320
-rw-r--r--tools/testing/selftests/mm/guard-regions.c2326
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c170
-rw-r--r--tools/testing/selftests/mm/gup_test.c28
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c926
-rw-r--r--tools/testing/selftests/mm/hugepage-mmap.c2
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c20
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c9
-rw-r--r--tools/testing/selftests/mm/hugetlb-read-hwpoison.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb-soft-offline.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_dio.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_fault_after_madv.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_madv_vs_map.c2
-rwxr-xr-xtools/testing/selftests/mm/hugetlb_reparenting_test.sh100
-rw-r--r--tools/testing/selftests/mm/khugepaged.c9
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c294
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c40
-rw-r--r--tools/testing/selftests/mm/madv_populate.c41
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c4
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c2
-rw-r--r--tools/testing/selftests/mm/map_populate.c7
-rw-r--r--tools/testing/selftests/mm/mdwe_test.c2
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c16
-rw-r--r--tools/testing/selftests/mm/merge.c1174
-rw-r--r--tools/testing/selftests/mm/migration.c23
-rw-r--r--tools/testing/selftests/mm/mkdirty.c10
-rw-r--r--tools/testing/selftests/mm/mlock-random-test.c6
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c4
-rw-r--r--tools/testing/selftests/mm/mlock2.h9
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c2
-rw-r--r--tools/testing/selftests/mm/mremap_dontunmap.c2
-rw-r--r--tools/testing/selftests/mm/mremap_test.c630
-rw-r--r--tools/testing/selftests/mm/mseal_test.c8
-rw-r--r--tools/testing/selftests/mm/on-fault-limit.c2
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c110
-rw-r--r--tools/testing/selftests/mm/pfnmap.c269
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h8
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h14
-rw-r--r--tools/testing/selftests/mm/pkey_sighandler_tests.c6
-rw-r--r--tools/testing/selftests/mm/pkey_util.c1
-rw-r--r--tools/testing/selftests/mm/prctl_thp_disable.c291
-rw-r--r--tools/testing/selftests/mm/process_madv.c344
-rw-r--r--tools/testing/selftests/mm/protection_keys.c10
-rw-r--r--tools/testing/selftests/mm/rmap.c433
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh149
-rw-r--r--tools/testing/selftests/mm/settings2
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c141
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c559
-rwxr-xr-xtools/testing/selftests/mm/test_vmalloc.sh6
-rw-r--r--tools/testing/selftests/mm/thp_settings.c18
-rw-r--r--tools/testing/selftests/mm/thp_settings.h3
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c53
-rw-r--r--tools/testing/selftests/mm/transhuge-stress.c2
-rw-r--r--tools/testing/selftests/mm/uffd-common.c301
-rw-r--r--tools/testing/selftests/mm/uffd-common.h80
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c266
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c781
-rw-r--r--tools/testing/selftests/mm/uffd-wp-mremap.c36
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c6
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh77
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c22
-rw-r--r--tools/testing/selftests/mm/vm_util.c305
-rw-r--r--tools/testing/selftests/mm/vm_util.h93
-rw-r--r--tools/testing/selftests/mount_setattr/Makefile2
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c703
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c2
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c2
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c2
-rw-r--r--tools/testing/selftests/mseal_system_mappings/.gitignore2
-rw-r--r--tools/testing/selftests/mseal_system_mappings/Makefile6
-rw-r--r--tools/testing/selftests/mseal_system_mappings/config1
-rw-r--r--tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c119
-rw-r--r--tools/testing/selftests/namespaces/.gitignore12
-rw-r--r--tools/testing/selftests/namespaces/Makefile29
-rw-r--r--tools/testing/selftests/namespaces/config7
-rw-r--r--tools/testing/selftests/namespaces/cred_change_test.c814
-rw-r--r--tools/testing/selftests/namespaces/file_handle_test.c1429
-rw-r--r--tools/testing/selftests/namespaces/init_ino_test.c61
-rw-r--r--tools/testing/selftests/namespaces/listns_efault_test.c530
-rw-r--r--tools/testing/selftests/namespaces/listns_pagination_bug.c138
-rw-r--r--tools/testing/selftests/namespaces/listns_permissions_test.c759
-rw-r--r--tools/testing/selftests/namespaces/listns_test.c679
-rw-r--r--tools/testing/selftests/namespaces/ns_active_ref_test.c2672
-rw-r--r--tools/testing/selftests/namespaces/nsid_test.c981
-rw-r--r--tools/testing/selftests/namespaces/regression_pidfd_setns_test.c113
-rw-r--r--tools/testing/selftests/namespaces/siocgskns_test.c1824
-rw-r--r--tools/testing/selftests/namespaces/stress_test.c626
-rw-r--r--tools/testing/selftests/namespaces/wrappers.h35
-rw-r--r--tools/testing/selftests/nci/nci_dev.c4
-rw-r--r--tools/testing/selftests/net/.gitignore13
-rw-r--r--tools/testing/selftests/net/Makefile286
-rw-r--r--tools/testing/selftests/net/af_unix/.gitignore8
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile14
-rw-r--r--tools/testing/selftests/net/af_unix/config2
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c2
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c144
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c123
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c217
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c110
-rw-r--r--tools/testing/selftests/net/af_unix/so_peek_off.c162
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connect.c2
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connreset.c180
-rwxr-xr-xtools/testing/selftests/net/amt.sh20
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh2
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh51
-rw-r--r--tools/testing/selftests/net/bench/Makefile7
-rw-r--r--tools/testing/selftests/net/bench/page_pool/Makefile17
-rw-r--r--tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c267
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.c394
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.h238
-rwxr-xr-xtools/testing/selftests/net/bench/test_bench_page_pool.sh32
-rw-r--r--tools/testing/selftests/net/bind_bhash.c4
-rw-r--r--tools/testing/selftests/net/bind_timewait.c2
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c2
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py9
-rwxr-xr-xtools/testing/selftests/net/broadcast_ether_dst.sh83
-rwxr-xr-xtools/testing/selftests/net/broadcast_pmtu.sh47
-rwxr-xr-xtools/testing/selftests/net/busy_poll_test.sh26
-rw-r--r--tools/testing/selftests/net/busy_poller.c16
-rw-r--r--tools/testing/selftests/net/can/.gitignore2
-rw-r--r--tools/testing/selftests/net/can/Makefile11
-rw-r--r--tools/testing/selftests/net/can/config3
-rw-r--r--tools/testing/selftests/net/can/test_raw_filter.c405
-rwxr-xr-xtools/testing/selftests/net/can/test_raw_filter.sh45
-rwxr-xr-xtools/testing/selftests/net/cmsg_ip.sh187
-rwxr-xr-xtools/testing/selftests/net/cmsg_ipv6.sh154
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c126
-rw-r--r--tools/testing/selftests/net/config131
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv4.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv6.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-other.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh439
-rwxr-xr-xtools/testing/selftests/net/fdb_flush.sh2
-rwxr-xr-xtools/testing/selftests/net/fdb_notify.sh26
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh61
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh111
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh187
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile58
-rw-r--r--tools/testing/selftests/net/forwarding/README17
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_activity_notify.sh170
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh387
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh80
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh102
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh81
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh96
-rw-r--r--tools/testing/selftests/net/forwarding/config29
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh129
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multicast.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh1
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_core.sh9
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh12
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh52
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_port_range.sh46
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_taprio.sh421
-rw-r--r--tools/testing/selftests/net/forwarding/tsn_lib.sh26
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh15
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh766
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_reserved.sh33
-rwxr-xr-xtools/testing/selftests/net/gre_ipv6_lladdr.sh184
-rwxr-xr-xtools/testing/selftests/net/gro.sh104
-rw-r--r--tools/testing/selftests/net/hsr/Makefile6
-rw-r--r--tools/testing/selftests/net/hsr/config4
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh2
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c24
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c2
-rwxr-xr-xtools/testing/selftests/net/ip_local_port_range.sh4
-rw-r--r--tools/testing/selftests/net/ipsec.c2
-rwxr-xr-xtools/testing/selftests/net/ipv6_force_forwarding.sh105
-rw-r--r--tools/testing/selftests/net/ipv6_fragmentation.c114
-rwxr-xr-xtools/testing/selftests/net/ipv6_route_update_soft_lockup.sh1
-rw-r--r--tools/testing/selftests/net/lib.sh202
-rw-r--r--tools/testing/selftests/net/lib/.gitignore1
-rw-r--r--tools/testing/selftests/net/lib/Makefile15
-rw-r--r--tools/testing/selftests/net/lib/ksft.h56
-rwxr-xr-xtools/testing/selftests/net/lib/ksft_setup_loopback.sh111
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py36
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py149
-rw-r--r--tools/testing/selftests/net/lib/py/netns.py18
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py2
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py167
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py16
-rw-r--r--tools/testing/selftests/net/lib/sh/defer.sh20
-rw-r--r--tools/testing/selftests/net/lib/xdp_dummy.bpf.c (renamed from tools/testing/selftests/net/xdp_dummy.bpf.c)6
-rw-r--r--tools/testing/selftests/net/lib/xdp_helper.c131
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c680
-rwxr-xr-xtools/testing/selftests/net/link_netns.py141
-rwxr-xr-xtools/testing/selftests/net/lwt_dst_cache_ref_loop.sh246
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore1
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile35
-rw-r--r--tools/testing/selftests/net/mptcp/config44
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh60
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c74
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh154
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh5
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c435
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c28
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh626
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh91
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c44
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh45
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh6
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c25
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh48
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh49
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c24
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh84
-rwxr-xr-xtools/testing/selftests/net/nat6to4.sh15
-rw-r--r--tools/testing/selftests/net/net_helper.sh25
-rwxr-xr-xtools/testing/selftests/net/netdev-l2addr.sh59
-rw-r--r--tools/testing/selftests/net/netfilter/.gitignore1
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile86
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh10
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter_queue.sh7
-rwxr-xr-xtools/testing/selftests/net/netfilter/bridge_brouter.sh2
-rw-r--r--tools/testing/selftests/net/netfilter/config49
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh174
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c2
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh515
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh37
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh8
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh58
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh361
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh638
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh213
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh157
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh85
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat_zones.sh2
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh39
-rwxr-xr-xtools/testing/selftests/net/netfilter/rpath.sh18
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c3
-rw-r--r--tools/testing/selftests/net/netfilter/udpclash.c158
-rw-r--r--tools/testing/selftests/net/netlink-dumps.c46
-rwxr-xr-xtools/testing/selftests/net/netns-name.sh23
-rw-r--r--tools/testing/selftests/net/nettest.c12
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py143
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh99
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py2
-rw-r--r--tools/testing/selftests/net/ovpn/.gitignore2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile34
-rw-r--r--tools/testing/selftests/net/ovpn/common.sh108
-rw-r--r--tools/testing/selftests/net/ovpn/config10
-rw-r--r--tools/testing/selftests/net/ovpn/data64.key5
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2387
-rw-r--r--tools/testing/selftests/net/ovpn/tcp_peers.txt5
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-chachapoly.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket.sh45
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-float.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-large-mtu.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh117
-rw-r--r--tools/testing/selftests/net/ovpn/udp_peers.txt6
-rw-r--r--tools/testing/selftests/net/packetdrill/Makefile10
-rw-r--r--tools/testing/selftests/net/packetdrill/config4
-rwxr-xr-xtools/testing/selftests/net/packetdrill/defaults.sh3
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt45
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt49
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt74
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt33
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt4
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt6
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt2
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh10
-rw-r--r--tools/testing/selftests/net/proc_net_pktgen.c690
-rw-r--r--tools/testing/selftests/net/psock_fanout.c2
-rw-r--r--tools/testing/selftests/net/psock_lib.h4
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c8
-rw-r--r--tools/testing/selftests/net/rds/Makefile10
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c4
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c36
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c2
-rwxr-xr-xtools/testing/selftests/net/route_hint.sh79
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh12
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.py30
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh129
-rwxr-xr-xtools/testing/selftests/net/rtnetlink_notification.sh112
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c2
-rw-r--r--tools/testing/selftests/net/sctp_hello.c17
-rwxr-xr-xtools/testing/selftests/net/sctp_vrf.sh73
-rw-r--r--tools/testing/selftests/net/setup_loopback.sh120
-rw-r--r--tools/testing/selftests/net/setup_veth.sh44
-rw-r--r--tools/testing/selftests/net/sk_so_peek_off.c2
-rw-r--r--tools/testing/selftests/net/skf_net_off.c244
-rwxr-xr-xtools/testing/selftests/net/skf_net_off.sh30
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c2
-rw-r--r--tools/testing/selftests/net/so_rcv_listener.c168
-rw-r--r--tools/testing/selftests/net/so_txtime.c2
-rw-r--r--tools/testing/selftests/net/socket.c13
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_flavors_test.sh4
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh79
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh133
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh76
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh85
-rw-r--r--tools/testing/selftests/net/tap.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/config2
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect-deny.c58
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect.c22
-rw-r--r--tools/testing/selftests/net/tcp_ao/icmps-discard.c17
-rw-r--r--tools/testing/selftests/net/tcp_ao/key-management.c76
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/aolib.h114
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c7
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/sock.c315
-rw-r--r--tools/testing/selftests/net/tcp_ao/restore.c75
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c47
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c21
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c32
-rw-r--r--tools/testing/selftests/net/tcp_ao/unsigned-md5.c118
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c2
-rw-r--r--tools/testing/selftests/net/tcp_port_share.c258
-rwxr-xr-xtools/testing/selftests/net/test_blackhole_dev.sh11
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh31
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh125
-rwxr-xr-xtools/testing/selftests/net/test_neigh.sh366
-rwxr-xr-xtools/testing/selftests/net/test_so_rcv.sh73
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_fdb_changelink.sh111
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nh.sh223
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh9
-rw-r--r--tools/testing/selftests/net/tfo.c171
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh112
-rw-r--r--tools/testing/selftests/net/tls.c626
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh561
-rw-r--r--tools/testing/selftests/net/tun.c2
-rw-r--r--tools/testing/selftests/net/txtimestamp.c2
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh10
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh4
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh4
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh4
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c2
-rwxr-xr-xtools/testing/selftests/net/veth.sh2
-rwxr-xr-xtools/testing/selftests/net/vlan_bridge_binding.sh46
-rwxr-xr-xtools/testing/selftests/net/vlan_hw_filter.sh98
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh4
-rw-r--r--tools/testing/selftests/net/ynl.mk8
-rw-r--r--tools/testing/selftests/nolibc/Makefile309
-rw-r--r--tools/testing/selftests/nolibc/Makefile.include10
-rw-r--r--tools/testing/selftests/nolibc/Makefile.nolibc382
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.c8
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c541
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh39
-rw-r--r--tools/testing/selftests/openat2/helpers.h2
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c2
-rw-r--r--tools/testing/selftests/openat2/rename_attack_test.c2
-rw-r--r--tools/testing/selftests/openat2/resolve_test.c2
-rw-r--r--tools/testing/selftests/pci_endpoint/pci_endpoint_test.c65
-rw-r--r--tools/testing/selftests/pcie_bwctrl/Makefile1
-rw-r--r--tools/testing/selftests/perf_events/.gitignore1
-rw-r--r--tools/testing/selftests/perf_events/Makefile2
-rw-r--r--tools/testing/selftests/perf_events/mmap.c236
-rw-r--r--tools/testing/selftests/perf_events/remove_on_exec.c2
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c2
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c6
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c3
-rw-r--r--tools/testing/selftests/pid_namespace/regression_enomem.c2
-rw-r--r--tools/testing/selftests/pidfd/.gitignore4
-rw-r--r--tools/testing/selftests/pidfd/Makefile7
-rw-r--r--tools/testing/selftests/pidfd/config1
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h165
-rw-r--r--tools/testing/selftests/pidfd/pidfd_bind_mount.c76
-rw-r--r--tools/testing/selftests/pidfd/pidfd_exec_helper.c12
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c3
-rw-r--r--tools/testing/selftests/pidfd/pidfd_file_handle_test.c62
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c766
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c32
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setattr_test.c69
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c47
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c78
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_xattr_test.c132
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h2
-rw-r--r--tools/testing/selftests/powerpc/include/pkeys.h5
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_exec_prot.c2
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_siginfo.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c4
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c5
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c17
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/check_extended_reg_test.c35
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c20
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h12
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c6
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c6
-rw-r--r--tools/testing/selftests/prctl/set-anon-vma-name-test.c2
-rw-r--r--tools/testing/selftests/prctl/set-process-name.c2
-rw-r--r--tools/testing/selftests/proc/.gitignore3
-rw-r--r--tools/testing/selftests/proc/Makefile3
-rw-r--r--tools/testing/selftests/proc/proc-maps-race.c806
-rw-r--r--tools/testing/selftests/proc/proc-net-dev-lseek.c68
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c14
-rw-r--r--tools/testing/selftests/proc/proc-pidns.c211
-rw-r--r--tools/testing/selftests/ptp/testptp.c44
-rw-r--r--tools/testing/selftests/ptrace/.gitignore1
-rw-r--r--tools/testing/selftests/ptrace/Makefile2
-rw-r--r--tools/testing/selftests/ptrace/get_set_sud.c2
-rw-r--r--tools/testing/selftests/ptrace/get_syscall_info.c2
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c2
-rw-r--r--tools/testing/selftests/ptrace/set_syscall_info.c519
-rw-r--r--tools/testing/selftests/ptrace/vmaccess.c2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/console-badness.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh27
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-series.sh116
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh15
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/mktestid.sh29
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/srcu_lockdep.sh44
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh166
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-L10
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE041
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot6
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE073
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE103
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h2
-rw-r--r--tools/testing/selftests/ring-buffer/map_test.c2
-rw-r--r--tools/testing/selftests/riscv/README24
-rw-r--r--tools/testing/selftests/riscv/abi/pointer_masking.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c217
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/which-cpus.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_bottomup.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_default.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_test.h2
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c2
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile5
-rw-r--r--tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c10
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval.c2
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c2
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_ptrace.c134
-rw-r--r--tools/testing/selftests/rseq/.gitignore1
-rw-r--r--tools/testing/selftests/rseq/Makefile9
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c2
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv-bits.h6
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h5
-rw-r--r--tools/testing/selftests/rseq/rseq-s390.h39
-rw-r--r--tools/testing/selftests/rseq/rseq.c37
-rw-r--r--tools/testing/selftests/rseq/rseq.h5
-rwxr-xr-xtools/testing/selftests/rseq/run_syscall_errors_test.sh5
-rw-r--r--tools/testing/selftests/rseq/syscall_errors_test.c124
-rw-r--r--tools/testing/selftests/rtc/.gitignore1
-rw-r--r--tools/testing/selftests/rtc/Makefile2
-rw-r--r--tools/testing/selftests/rtc/rtctest.c21
-rw-r--r--tools/testing/selftests/rtc/setdate.c77
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh23
-rw-r--r--tools/testing/selftests/sched/config2
-rw-r--r--tools/testing/selftests/sched_ext/Makefile5
-rw-r--r--tools/testing/selftests/sched_ext/allowed_cpus.bpf.c144
-rw-r--r--tools/testing/selftests/sched_ext/allowed_cpus.c84
-rw-r--r--tools/testing/selftests/sched_ext/config1
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c74
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu.c88
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c43
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu_fails.c61
-rw-r--r--tools/testing/selftests/sched_ext/exit.c8
-rw-r--r--tools/testing/selftests/sched_ext/hotplug.c1
-rw-r--r--tools/testing/selftests/sched_ext/maximal.bpf.c5
-rw-r--r--tools/testing/selftests/sched_ext/numa.bpf.c100
-rw-r--r--tools/testing/selftests/sched_ext/numa.c59
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.bpf.c251
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.c224
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c4
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c259
-rw-r--r--tools/testing/selftests/sgx/main.c2
-rw-r--r--tools/testing/selftests/signal/mangle_uc_sigmask.c2
-rw-r--r--tools/testing/selftests/signal/sas.c2
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c2
-rw-r--r--tools/testing/selftests/sync/sync_test.c2
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c142
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh42
-rw-r--r--tools/testing/selftests/tc-testing/config2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/nat.json14
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json12
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/actions.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json909
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json254
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json81
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json72
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh10
-rw-r--r--tools/testing/selftests/tdx/tdx_guest_test.c2
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c2
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c18
-rw-r--r--tools/testing/selftests/timens/clock_nanosleep.c4
-rw-r--r--tools/testing/selftests/timens/exec.c2
-rw-r--r--tools/testing/selftests/timens/futex.c2
-rw-r--r--tools/testing/selftests/timens/gettime_perf.c2
-rw-r--r--tools/testing/selftests/timens/procfs.c2
-rw-r--r--tools/testing/selftests/timens/timens.c2
-rw-r--r--tools/testing/selftests/timens/timens.h2
-rw-r--r--tools/testing/selftests/timens/timer.c4
-rw-r--r--tools/testing/selftests/timens/timerfd.c6
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c2
-rw-r--r--tools/testing/selftests/timers/adjtick.c2
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c2
-rw-r--r--tools/testing/selftests/timers/change_skew.c2
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c2
-rw-r--r--tools/testing/selftests/timers/freq-step.c2
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c2
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c2
-rw-r--r--tools/testing/selftests/timers/leapcrash.c2
-rw-r--r--tools/testing/selftests/timers/mqueue-lat.c2
-rw-r--r--tools/testing/selftests/timers/nanosleep.c57
-rw-r--r--tools/testing/selftests/timers/nsleep-lat.c2
-rw-r--r--tools/testing/selftests/timers/posix_timers.c105
-rw-r--r--tools/testing/selftests/timers/raw_skew.c2
-rw-r--r--tools/testing/selftests/timers/rtcpie.c2
-rw-r--r--tools/testing/selftests/timers/set-2038.c2
-rw-r--r--tools/testing/selftests/timers/set-tai.c2
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c2
-rw-r--r--tools/testing/selftests/timers/set-tz.c2
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c4
-rw-r--r--tools/testing/selftests/timers/threadtest.c2
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c2
-rw-r--r--tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c2
-rw-r--r--tools/testing/selftests/tpm2/.gitignore3
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh2
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py4
-rw-r--r--tools/testing/selftests/tty/.gitignore1
-rw-r--r--tools/testing/selftests/tty/Makefile6
-rw-r--r--tools/testing/selftests/tty/config1
-rw-r--r--tools/testing/selftests/tty/tty_tiocsti_test.c650
-rw-r--r--tools/testing/selftests/tty/tty_tstamp_update.c2
-rw-r--r--tools/testing/selftests/ublk/.gitignore3
-rw-r--r--tools/testing/selftests/ublk/Makefile51
-rw-r--r--tools/testing/selftests/ublk/common.c55
-rw-r--r--tools/testing/selftests/ublk/config1
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c106
-rw-r--r--tools/testing/selftests/ublk/file_backed.c182
-rw-r--r--tools/testing/selftests/ublk/kublk.c1729
-rw-r--r--tools/testing/selftests/ublk/kublk.h421
-rw-r--r--tools/testing/selftests/ublk/null.c152
-rw-r--r--tools/testing/selftests/ublk/stripe.c391
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh384
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_01.sh48
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_02.sh48
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_03.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_04.sh40
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_05.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_06.sh41
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_07.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_08.sh32
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_09.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_10.sh30
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_11.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_12.sh59
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_13.sh20
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_01.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_02.sh20
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_03.sh25
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_04.sh21
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_05.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_null_01.sh24
-rwxr-xr-xtools/testing/selftests/ublk/test_null_02.sh24
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_01.sh34
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_02.sh36
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh54
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh51
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh84
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_01.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_02.sh21
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_03.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_04.sh21
-rw-r--r--tools/testing/selftests/ublk/trace/count_ios_per_tid.bt11
-rw-r--r--tools/testing/selftests/ublk/trace/seq_io.bt25
-rw-r--r--tools/testing/selftests/ublk/ublk_dep.h18
-rw-r--r--tools/testing/selftests/ublk/utils.h68
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c2
-rw-r--r--tools/testing/selftests/user_events/abi_test.c2
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c4
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c2
-rw-r--r--tools/testing/selftests/user_events/perf_test.c4
-rw-r--r--tools/testing/selftests/user_events/user_events_selftests.h2
-rw-r--r--tools/testing/selftests/vDSO/.gitignore1
-rw-r--r--tools/testing/selftests/vDSO/Makefile15
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c29
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.h1
-rw-r--r--tools/testing/selftests/vDSO/vdso_call.h7
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h6
l---------[-rw-r--r--]tools/testing/selftests/vDSO/vdso_standalone_test_x86.c143
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c103
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c5
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c124
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c4
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c12
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c6
-rw-r--r--tools/testing/selftests/vDSO/vgetrandom-chacha.S2
-rw-r--r--tools/testing/selftests/verification/.gitignore2
-rw-r--r--tools/testing/selftests/verification/Makefile8
-rw-r--r--tools/testing/selftests/verification/config1
-rw-r--r--tools/testing/selftests/verification/settings1
-rw-r--r--tools/testing/selftests/verification/test.d/functions39
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc75
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc68
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitors_available.tc18
-rw-r--r--tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc30
-rwxr-xr-xtools/testing/selftests/verification/verificationtest-ktap8
-rw-r--r--tools/testing/selftests/vfio/.gitignore10
-rw-r--r--tools/testing/selftests/vfio/Makefile29
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c416
l---------tools/testing/selftests/vfio/lib/drivers/dsa/registers.h1
l---------tools/testing/selftests/vfio/lib/drivers/ioat/hw.h1
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c235
l---------tools/testing/selftests/vfio/lib/drivers/ioat/registers.h1
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h26
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/assert.h54
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iommu.h76
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h23
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h125
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h97
-rw-r--r--tools/testing/selftests/vfio/lib/iommu.c465
-rw-r--r--tools/testing/selftests/vfio/lib/iova_allocator.c94
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.c78
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk29
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c378
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c112
-rwxr-xr-xtools/testing/selftests/vfio/scripts/cleanup.sh41
-rwxr-xr-xtools/testing/selftests/vfio/scripts/lib.sh42
-rwxr-xr-xtools/testing/selftests/vfio/scripts/run.sh16
-rwxr-xr-xtools/testing/selftests/vfio/scripts/setup.sh48
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c312
-rw-r--r--tools/testing/selftests/vfio/vfio_iommufd_setup_test.c127
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c168
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_test.c182
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_driver_test.c263
-rw-r--r--tools/testing/selftests/vsock/.gitignore2
-rw-r--r--tools/testing/selftests/vsock/Makefile17
-rw-r--r--tools/testing/selftests/vsock/config111
-rw-r--r--tools/testing/selftests/vsock/settings1
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh607
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c6
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh29
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile3
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config8
-rw-r--r--tools/testing/selftests/x86/Makefile9
-rw-r--r--tools/testing/selftests/x86/amx.c442
-rw-r--r--tools/testing/selftests/x86/apx.c10
-rw-r--r--tools/testing/selftests/x86/avx.c12
-rw-r--r--tools/testing/selftests/x86/bugs/Makefile3
-rwxr-xr-xtools/testing/selftests/x86/bugs/common.py164
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_indirect_alignment.py150
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_permutations.py109
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_ret_alignment.py139
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_sysfs.py65
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c16
-rw-r--r--tools/testing/selftests/x86/entry_from_vm86.c24
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c24
-rw-r--r--tools/testing/selftests/x86/helpers.h28
-rw-r--r--tools/testing/selftests/x86/ioperm.c25
-rw-r--r--tools/testing/selftests/x86/iopl.c25
-rw-r--r--tools/testing/selftests/x86/lam.c162
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c18
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c14
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c24
-rw-r--r--tools/testing/selftests/x86/sigaltstack.c26
-rw-r--r--tools/testing/selftests/x86/sigreturn.c24
-rw-r--r--tools/testing/selftests/x86/sigtrap_loop.c101
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c22
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c12
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c12
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c3
-rw-r--r--tools/testing/selftests/x86/sysret_rip.c24
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c45
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c36
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c12
-rw-r--r--tools/testing/selftests/x86/xstate.c478
-rw-r--r--tools/testing/selftests/x86/xstate.h197
-rw-r--r--tools/testing/selftests/zram/README1
-rw-r--r--tools/testing/shared/interval_tree-shim.c5
-rw-r--r--tools/testing/shared/linux.c124
-rw-r--r--tools/testing/shared/linux/cleanup.h2
-rw-r--r--tools/testing/shared/linux/idr.h4
-rw-r--r--tools/testing/shared/linux/interval_tree.h7
-rw-r--r--tools/testing/shared/linux/interval_tree_generic.h2
-rw-r--r--tools/testing/shared/linux/maple_tree.h6
-rw-r--r--tools/testing/shared/linux/rbtree.h8
-rw-r--r--tools/testing/shared/linux/rbtree_augmented.h7
-rw-r--r--tools/testing/shared/linux/rbtree_types.h8
-rw-r--r--tools/testing/shared/maple-shared.h11
-rw-r--r--tools/testing/shared/maple-shim.c7
-rw-r--r--tools/testing/shared/rbtree-shim.c6
-rw-r--r--tools/testing/shared/shared.mk6
-rw-r--r--tools/testing/vma/Makefile2
-rw-r--r--tools/testing/vma/linux/atomic.h12
-rw-r--r--tools/testing/vma/vma.c566
-rw-r--r--tools/testing/vma/vma_internal.h1125
-rw-r--r--tools/testing/vsock/Makefile1
-rw-r--r--tools/testing/vsock/timeout.c18
-rw-r--r--tools/testing/vsock/timeout.h1
-rw-r--r--tools/testing/vsock/util.c131
-rw-r--r--tools/testing/vsock/util.h37
-rw-r--r--tools/testing/vsock/vsock_test.c485
1778 files changed, 145244 insertions, 23374 deletions
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c
index 8141d45df51a..827507844e8f 100644
--- a/tools/testing/crypto/chacha20-s390/test-cipher.c
+++ b/tools/testing/crypto/chacha20-s390/test-cipher.c
@@ -50,7 +50,7 @@ struct skcipher_def {
/* Perform cipher operations with the chacha lib */
static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
{
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
u8 iv[16], key[32];
u64 start, end;
@@ -66,10 +66,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
}
/* Encrypt */
- chacha_init_arch(chacha_state, (u32*)key, iv);
+ chacha_init(&chacha_state, (u32 *)key, iv);
start = ktime_get_ns();
- chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20);
+ chacha_crypt_arch(&chacha_state, cipher, plain, data_size, 20);
end = ktime_get_ns();
@@ -81,10 +81,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
pr_info("lib encryption took: %lld nsec", end - start);
/* Decrypt */
- chacha_init_arch(chacha_state, (u32 *)key, iv);
+ chacha_init(&chacha_state, (u32 *)key, iv);
start = ktime_get_ns();
- chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20);
+ chacha_crypt_arch(&chacha_state, revert, cipher, data_size, 20);
end = ktime_get_ns();
if (debug)
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index b1256fee3567..0e151d0572d1 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -4,23 +4,19 @@ ldflags-y += --wrap=is_acpi_device_node
ldflags-y += --wrap=acpi_evaluate_integer
ldflags-y += --wrap=acpi_pci_find_root
ldflags-y += --wrap=nvdimm_bus_register
-ldflags-y += --wrap=devm_cxl_port_enumerate_dports
-ldflags-y += --wrap=devm_cxl_setup_hdm
-ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
-ldflags-y += --wrap=devm_cxl_enumerate_decoders
ldflags-y += --wrap=cxl_await_media_ready
-ldflags-y += --wrap=cxl_hdm_decode_init
-ldflags-y += --wrap=cxl_dvsec_rr_decode
ldflags-y += --wrap=devm_cxl_add_rch_dport
-ldflags-y += --wrap=cxl_rcd_component_reg_phys
ldflags-y += --wrap=cxl_endpoint_parse_cdat
ldflags-y += --wrap=cxl_dport_init_ras_reporting
+ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
+ldflags-y += --wrap=hmat_get_extended_linear_cache_size
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
CXL_CORE_SRC := $(DRIVERS)/cxl/core
ccflags-y := -I$(srctree)/drivers/cxl/
ccflags-y += -D__mock=__weak
+ccflags-y += -DCXL_TEST_ENABLE=1
ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/
obj-m += cxl_acpi.o
@@ -61,8 +57,12 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o
cxl_core-y += $(CXL_CORE_SRC)/hdm.o
cxl_core-y += $(CXL_CORE_SRC)/pmu.o
cxl_core-y += $(CXL_CORE_SRC)/cdat.o
+cxl_core-y += $(CXL_CORE_SRC)/ras.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
+cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
+cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
+cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
cxl_core-y += config_check.o
cxl_core-y += cxl_core_test.o
cxl_core-y += cxl_core_exports.o
diff --git a/tools/testing/cxl/config_check.c b/tools/testing/cxl/config_check.c
index 0902c5d6e410..a80bc2c062fe 100644
--- a/tools/testing/cxl/config_check.c
+++ b/tools/testing/cxl/config_check.c
@@ -14,4 +14,5 @@ void check(void)
BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST));
BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST));
BUILD_BUG_ON(!IS_ENABLED(CONFIG_DEBUG_FS));
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_MEMORY_HOTPLUG));
}
diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c
index f088792a8925..6754de35598d 100644
--- a/tools/testing/cxl/cxl_core_exports.c
+++ b/tools/testing/cxl/cxl_core_exports.c
@@ -2,6 +2,28 @@
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include "cxl.h"
+#include "exports.h"
/* Exporting of cxl_core symbols that are only used by cxl_test */
EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL");
+
+cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev =
+ __devm_cxl_add_dport_by_dev;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL");
+
+struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ return _devm_cxl_add_dport_by_dev(port, dport_dev);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL");
+
+cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup =
+ __devm_cxl_switch_port_decoders_setup;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL");
+
+int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+ return _devm_cxl_switch_port_decoders_setup(port);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL");
diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h
new file mode 100644
index 000000000000..7ebee7c0bd67
--- /dev/null
+++ b/tools/testing/cxl/exports.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef __MOCK_CXL_EXPORTS_H_
+#define __MOCK_CXL_EXPORTS_H_
+
+typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port,
+ struct device *dport_dev);
+extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev;
+
+typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port);
+extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup;
+
+#endif
diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild
index 6b1927897856..af50972c8b6d 100644
--- a/tools/testing/cxl/test/Kbuild
+++ b/tools/testing/cxl/test/Kbuild
@@ -4,6 +4,7 @@ ccflags-y := -I$(srctree)/drivers/cxl/ -I$(srctree)/drivers/cxl/core
obj-m += cxl_test.o
obj-m += cxl_mock.o
obj-m += cxl_mock_mem.o
+obj-m += cxl_translate.o
cxl_test-y := cxl.o
cxl_mock-y := mock.o
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index cc8948f49117..81e2aef3627a 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -2,6 +2,7 @@
// Copyright(c) 2021 Intel Corporation. All rights reserved.
#include <linux/platform_device.h>
+#include <linux/memory_hotplug.h>
#include <linux/genalloc.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -14,6 +15,7 @@
#include "mock.h"
static int interleave_arithmetic;
+static bool extended_linear_cache;
#define FAKE_QTG_ID 42
@@ -25,6 +27,9 @@ static int interleave_arithmetic;
#define NR_CXL_PORT_DECODERS 8
#define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH)
+#define MOCK_AUTO_REGION_SIZE_DEFAULT SZ_512M
+static int mock_auto_region_size = MOCK_AUTO_REGION_SIZE_DEFAULT;
+
static struct platform_device *cxl_acpi;
static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES];
#define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS)
@@ -155,7 +160,7 @@ static struct {
} cfmws7;
struct {
struct acpi_cedt_cfmws cfmws;
- u32 target[4];
+ u32 target[3];
} cfmws8;
struct {
struct acpi_cedt_cxims cxims;
@@ -209,7 +214,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
@@ -224,7 +229,7 @@ static struct {
},
.interleave_ways = 1,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -239,7 +244,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
@@ -254,7 +259,7 @@ static struct {
},
.interleave_ways = 1,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -269,7 +274,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
@@ -284,7 +289,7 @@ static struct {
},
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M,
@@ -301,7 +306,7 @@ static struct {
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 0,
.granularity = 4,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -317,7 +322,7 @@ static struct {
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
.interleave_ways = 1,
.granularity = 0,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
@@ -331,14 +336,14 @@ static struct {
.length = sizeof(mock_cedt.cfmws8),
},
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
- .interleave_ways = 2,
- .granularity = 0,
- .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+ .interleave_ways = 8,
+ .granularity = 1,
+ .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
- .window_size = SZ_256M * 16UL,
+ .window_size = SZ_512M * 6UL,
},
- .target = { 0, 1, 0, 1, },
+ .target = { 0, 1, 2, },
},
.cxims0 = {
.cxims = {
@@ -425,6 +430,22 @@ static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align)
return res;
}
+/* Only update CFMWS0 as this is used by the auto region. */
+static void cfmws_elc_update(struct acpi_cedt_cfmws *window, int index)
+{
+ if (!extended_linear_cache)
+ return;
+
+ if (index != 0)
+ return;
+
+ /*
+ * The window size should be 2x of the CXL region size where half is
+ * DRAM and half is CXL
+ */
+ window->window_size = mock_auto_region_size * 2;
+}
+
static int populate_cedt(void)
{
struct cxl_mock_res *res;
@@ -449,6 +470,7 @@ static int populate_cedt(void)
for (i = cfmws_start; i <= cfmws_end; i++) {
struct acpi_cedt_cfmws *window = mock_cfmws[i];
+ cfmws_elc_update(window, i);
res = alloc_mock_res(window->window_size, SZ_256M);
if (!res)
return -ENOMEM;
@@ -590,6 +612,25 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname,
return AE_OK;
}
+static int
+mock_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid, resource_size_t *cache_size)
+{
+ struct acpi_cedt_cfmws *window = mock_cfmws[0];
+ struct resource cfmws0_res =
+ DEFINE_RES_MEM(window->base_hpa, window->window_size);
+
+ if (!extended_linear_cache ||
+ !resource_contains(&cfmws0_res, backing_res)) {
+ return hmat_get_extended_linear_cache_size(backing_res,
+ nid, cache_size);
+ }
+
+ *cache_size = mock_auto_region_size;
+
+ return 0;
+}
+
static struct pci_bus mock_pci_bus[NR_BRIDGES];
static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = {
[0] = {
@@ -642,15 +683,8 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port,
return cxlhdm;
}
-static int mock_cxl_add_passthrough_decoder(struct cxl_port *port)
-{
- dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n");
- return -EOPNOTSUPP;
-}
-
-
struct target_map_ctx {
- int *target_map;
+ u32 *target_map;
int index;
int target_count;
};
@@ -744,7 +778,6 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
struct cxl_endpoint_decoder *cxled;
struct cxl_switch_decoder *cxlsd;
struct cxl_port *port, *iter;
- const int size = SZ_512M;
struct cxl_memdev *cxlmd;
struct cxl_dport *dport;
struct device *dev;
@@ -787,9 +820,11 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
}
base = window->base_hpa;
+ if (extended_linear_cache)
+ base += mock_auto_region_size;
cxld->hpa_range = (struct range) {
.start = base,
- .end = base + size - 1,
+ .end = base + mock_auto_region_size - 1,
};
cxld->interleave_ways = 2;
@@ -798,7 +833,8 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
cxld->flags = CXL_DECODER_F_ENABLE;
cxled->state = CXL_DECODER_STATE_AUTO;
port->commit_end = cxld->id;
- devm_cxl_dpa_reserve(cxled, 0, size / cxld->interleave_ways, 0);
+ devm_cxl_dpa_reserve(cxled, 0,
+ mock_auto_region_size / cxld->interleave_ways, 0);
cxld->commit = mock_decoder_commit;
cxld->reset = mock_decoder_reset;
@@ -817,15 +853,21 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
*/
if (WARN_ON(!dev))
continue;
+
cxlsd = to_cxl_switch_decoder(dev);
if (i == 0) {
/* put cxl_mem.4 second in the decode order */
- if (pdev->id == 4)
+ if (pdev->id == 4) {
cxlsd->target[1] = dport;
- else
+ cxld->target_map[1] = dport->port_id;
+ } else {
cxlsd->target[0] = dport;
- } else
+ cxld->target_map[0] = dport->port_id;
+ }
+ } else {
cxlsd->target[0] = dport;
+ cxld->target_map[0] = dport->port_id;
+ }
cxld = &cxlsd->cxld;
cxld->target_type = CXL_DECODER_HOSTONLYMEM;
cxld->flags = CXL_DECODER_F_ENABLE;
@@ -841,7 +883,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
cxld->interleave_granularity = 4096;
cxld->hpa_range = (struct range) {
.start = base,
- .end = base + size - 1,
+ .end = base + mock_auto_region_size - 1,
};
put_device(dev);
}
@@ -862,9 +904,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
target_count = NR_CXL_SWITCH_PORTS;
for (i = 0; i < NR_CXL_PORT_DECODERS; i++) {
- int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
struct target_map_ctx ctx = {
- .target_map = target_map,
.target_count = target_count,
};
struct cxl_decoder *cxld;
@@ -893,6 +933,8 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
cxld = &cxled->cxld;
}
+ ctx.target_map = cxld->target_map;
+
mock_init_hdm_decoder(cxld);
if (target_count) {
@@ -904,7 +946,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
}
}
- rc = cxl_decoder_add_locked(cxld, target_map);
+ rc = cxl_decoder_add_locked(cxld);
if (rc) {
put_device(&cxld->dev);
dev_err(&port->dev, "Failed to add decoder\n");
@@ -920,10 +962,42 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
return 0;
}
-static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
+static int __mock_cxl_decoders_setup(struct cxl_port *port)
+{
+ struct cxl_hdm *cxlhdm;
+
+ cxlhdm = mock_cxl_setup_hdm(port, NULL);
+ if (IS_ERR(cxlhdm)) {
+ if (PTR_ERR(cxlhdm) != -ENODEV)
+ dev_err(&port->dev, "Failed to map HDM decoder capability\n");
+ return PTR_ERR(cxlhdm);
+ }
+
+ return mock_cxl_enumerate_decoders(cxlhdm, NULL);
+}
+
+static int mock_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+ if (is_cxl_root(port) || is_cxl_endpoint(port))
+ return -EOPNOTSUPP;
+
+ return __mock_cxl_decoders_setup(port);
+}
+
+static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port)
+{
+ if (!is_cxl_endpoint(port))
+ return -EOPNOTSUPP;
+
+ return __mock_cxl_decoders_setup(port);
+}
+
+static int get_port_array(struct cxl_port *port,
+ struct platform_device ***port_array,
+ int *port_array_size)
{
struct platform_device **array;
- int i, array_size;
+ int array_size;
if (port->depth == 1) {
if (is_multi_bridge(port->uport_dev)) {
@@ -957,9 +1031,24 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
return -ENXIO;
}
+ *port_array = array;
+ *port_array_size = array_size;
+
+ return 0;
+}
+
+static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ struct platform_device **array;
+ int rc, i, array_size;
+
+ rc = get_port_array(port, &array, &array_size);
+ if (rc)
+ return ERR_PTR(rc);
+
for (i = 0; i < array_size; i++) {
struct platform_device *pdev = array[i];
- struct cxl_dport *dport;
if (pdev->dev.parent != port->uport_dev) {
dev_dbg(&port->dev, "%s: mismatch parent %s\n",
@@ -968,14 +1057,14 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
continue;
}
- dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
- CXL_RESOURCE_NONE);
+ if (&pdev->dev != dport_dev)
+ continue;
- if (IS_ERR(dport))
- return PTR_ERR(dport);
+ return devm_cxl_add_dport(port, &pdev->dev, pdev->id,
+ CXL_RESOURCE_NONE);
}
- return 0;
+ return ERR_PTR(-ENODEV);
}
/*
@@ -1000,25 +1089,21 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
find_cxl_root(port);
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
- struct range pmem_range = {
- .start = cxlds->pmem_res.start,
- .end = cxlds->pmem_res.end,
- };
- struct range ram_range = {
- .start = cxlds->ram_res.start,
- .end = cxlds->ram_res.end,
- };
if (!cxl_root)
return;
- if (range_len(&ram_range))
- dpa_perf_setup(port, &ram_range, &mds->ram_perf);
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct resource *res = &cxlds->part[i].res;
+ struct cxl_dpa_perf *perf = &cxlds->part[i].perf;
+ struct range range = {
+ .start = res->start,
+ .end = res->end,
+ };
- if (range_len(&pmem_range))
- dpa_perf_setup(port, &pmem_range, &mds->pmem_perf);
+ dpa_perf_setup(port, &range, perf);
+ }
cxl_memdev_update_perf(cxlmd);
@@ -1038,11 +1123,12 @@ static struct cxl_mock_ops cxl_mock_ops = {
.acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
.acpi_evaluate_integer = mock_acpi_evaluate_integer,
.acpi_pci_find_root = mock_acpi_pci_find_root,
- .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
- .devm_cxl_setup_hdm = mock_cxl_setup_hdm,
- .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder,
- .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders,
+ .devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
+ .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
+ .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
+ .hmat_get_extended_linear_cache_size =
+ mock_hmat_get_extended_linear_cache_size,
.list = LIST_HEAD_INIT(cxl_mock_ops.list),
};
@@ -1332,6 +1418,7 @@ err_mem:
static __init int cxl_test_init(void)
{
int rc, i;
+ struct range mappable;
cxl_acpi_test();
cxl_core_test();
@@ -1346,8 +1433,11 @@ static __init int cxl_test_init(void)
rc = -ENOMEM;
goto err_gen_pool_create;
}
+ mappable = mhp_get_pluggable_range(true);
- rc = gen_pool_add(cxl_mock_pool, iomem_resource.end + 1 - SZ_64G,
+ rc = gen_pool_add(cxl_mock_pool,
+ min(iomem_resource.end + 1 - SZ_64G,
+ mappable.end + 1 - SZ_64G),
SZ_64G, NUMA_NO_NODE);
if (rc)
goto err_gen_pool_add;
@@ -1528,8 +1618,11 @@ static __exit void cxl_test_exit(void)
module_param(interleave_arithmetic, int, 0444);
MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1");
+module_param(extended_linear_cache, bool, 0444);
+MODULE_PARM_DESC(extended_linear_cache, "Enable extended linear cache support");
module_init(cxl_test_init);
module_exit(cxl_test_exit);
MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("cxl_test: setup module");
MODULE_IMPORT_NS("ACPI");
MODULE_IMPORT_NS("CXL");
diff --git a/tools/testing/cxl/test/cxl_translate.c b/tools/testing/cxl/test/cxl_translate.c
new file mode 100644
index 000000000000..2200ae21795c
--- /dev/null
+++ b/tools/testing/cxl/test/cxl_translate.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2025 Intel Corporation. All rights reserved.
+
+/* Preface all log entries with "cxl_translate" */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <cxlmem.h>
+#include <cxl.h>
+
+/* Maximum number of test vectors and entry length */
+#define MAX_TABLE_ENTRIES 128
+#define MAX_ENTRY_LEN 128
+
+/* Expected number of parameters in each test vector */
+#define EXPECTED_PARAMS 7
+
+/* Module parameters for test vectors */
+static char *table[MAX_TABLE_ENTRIES];
+static int table_num;
+
+/* Interleave Arithmetic */
+#define MODULO_MATH 0
+#define XOR_MATH 1
+
+/*
+ * XOR mapping configuration
+ * The test data sets all use the same set of xormaps. When additional
+ * data sets arrive for validation, this static setup will need to
+ * be changed to accept xormaps as additional parameters.
+ */
+struct cxl_cxims_data *cximsd;
+static u64 xormaps[] = {
+ 0x2020900,
+ 0x4041200,
+ 0x1010400,
+ 0x800,
+};
+
+static int nr_maps = ARRAY_SIZE(xormaps);
+
+#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1)
+static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = {
+ [1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4
+};
+
+/**
+ * to_hpa - calculate an HPA offset from a DPA offset and position
+ *
+ * dpa_offset: device physical address offset
+ * pos: devices position in interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: host physical address offset
+ */
+static u64 to_hpa(u64 dpa_offset, int pos, u8 r_eiw, u16 r_eig, u8 hb_ways,
+ u8 math)
+{
+ u64 hpa_offset;
+
+ /* Calculate base HPA offset from DPA and position */
+ hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig);
+
+ if (math == XOR_MATH) {
+ cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+ if (cximsd->nr_maps)
+ return cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+ }
+ return hpa_offset;
+}
+
+/**
+ * to_dpa - translate an HPA offset to DPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: device physical address offset
+ */
+static u64 to_dpa(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+ u64 offset = hpa_offset;
+
+ if (math == XOR_MATH) {
+ cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+ if (cximsd->nr_maps)
+ offset =
+ cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+ }
+ return cxl_calculate_dpa_offset(offset, r_eiw, r_eig);
+}
+
+/**
+ * to_pos - extract an interleave position from an HPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: devices position in region interleave
+ */
+static u64 to_pos(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+ u64 offset = hpa_offset;
+
+ /* Reverse XOR mapping if specified */
+ if (math == XOR_MATH)
+ offset = cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+
+ return cxl_calculate_position(offset, r_eiw, r_eig);
+}
+
+/**
+ * run_translation_test - execute forward and reverse translations
+ *
+ * @dpa: device physical address
+ * @pos: expected position in region interleave
+ * @r_eiw: region encoded interleave ways
+ * @r_eig: region encoded interleave granularity
+ * @hb_ways: host bridge interleave ways
+ * @math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * @expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int run_translation_test(u64 dpa, int pos, u8 r_eiw, u16 r_eig,
+ u8 hb_ways, int math, u64 expect_hpa)
+{
+ u64 translated_spa, reverse_dpa;
+ int reverse_pos;
+
+ /* Test Device to Host translation: DPA + POS -> SPA */
+ translated_spa = to_hpa(dpa, pos, r_eiw, r_eig, hb_ways, math);
+ if (translated_spa != expect_hpa) {
+ pr_err("Device to host failed: expected HPA %llu, got %llu\n",
+ expect_hpa, translated_spa);
+ return -1;
+ }
+
+ /* Test Host to Device DPA translation: SPA -> DPA */
+ reverse_dpa = to_dpa(translated_spa, r_eiw, r_eig, hb_ways, math);
+ if (reverse_dpa != dpa) {
+ pr_err("Host to Device DPA failed: expected %llu, got %llu\n",
+ dpa, reverse_dpa);
+ return -1;
+ }
+
+ /* Test Host to Device Position translation: SPA -> POS */
+ reverse_pos = to_pos(translated_spa, r_eiw, r_eig, hb_ways, math);
+ if (reverse_pos != pos) {
+ pr_err("Position lookup failed: expected %d, got %d\n", pos,
+ reverse_pos);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * parse_test_vector - parse a single test vector string
+ *
+ * entry: test vector string to parse
+ * dpa: device physical address
+ * pos: expected position in region interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int parse_test_vector(const char *entry, u64 *dpa, int *pos, u8 *r_eiw,
+ u16 *r_eig, u8 *hb_ways, int *math,
+ u64 *expect_hpa)
+{
+ unsigned int tmp_r_eiw, tmp_r_eig, tmp_hb_ways;
+ int parsed;
+
+ parsed = sscanf(entry, "%llu %d %u %u %u %d %llu", dpa, pos, &tmp_r_eiw,
+ &tmp_r_eig, &tmp_hb_ways, math, expect_hpa);
+
+ if (parsed != EXPECTED_PARAMS) {
+ pr_err("Parse error: expected %d parameters, got %d in '%s'\n",
+ EXPECTED_PARAMS, parsed, entry);
+ return -EINVAL;
+ }
+ if (tmp_r_eiw > U8_MAX || tmp_r_eig > U16_MAX || tmp_hb_ways > U8_MAX) {
+ pr_err("Parameter overflow in entry: '%s'\n", entry);
+ return -ERANGE;
+ }
+ if (*math != MODULO_MATH && *math != XOR_MATH) {
+ pr_err("Invalid math type %d in entry: '%s'\n", *math, entry);
+ return -EINVAL;
+ }
+ *r_eiw = tmp_r_eiw;
+ *r_eig = tmp_r_eig;
+ *hb_ways = tmp_hb_ways;
+
+ return 0;
+}
+
+/*
+ * setup_xor_mapping - Initialize XOR mapping data structure
+ *
+ * The test data sets all use the same HBIG so we can use one set
+ * of xormaps, and set the number to apply based on HBIW before
+ * calling cxl_do_xormap_calc().
+ *
+ * When additional data sets arrive for validation with different
+ * HBIG's this static setup will need to be updated.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int setup_xor_mapping(void)
+{
+ if (nr_maps <= 0)
+ return -EINVAL;
+
+ cximsd = kzalloc(struct_size(cximsd, xormaps, nr_maps), GFP_KERNEL);
+ if (!cximsd)
+ return -ENOMEM;
+
+ memcpy(cximsd->xormaps, xormaps, nr_maps * sizeof(*cximsd->xormaps));
+ cximsd->nr_maps = nr_maps;
+
+ return 0;
+}
+
+static int test_random_params(void)
+{
+ u8 valid_eiws[] = { 0, 1, 2, 3, 4, 8, 9, 10 };
+ u16 valid_eigs[] = { 0, 1, 2, 3, 4, 5, 6 };
+ int i, ways, pos, reverse_pos;
+ u64 dpa, hpa, reverse_dpa;
+ int iterations = 10000;
+ int failures = 0;
+
+ for (i = 0; i < iterations; i++) {
+ /* Generate valid random parameters for eiw, eig, pos, dpa */
+ u8 eiw = valid_eiws[get_random_u32() % ARRAY_SIZE(valid_eiws)];
+ u16 eig = valid_eigs[get_random_u32() % ARRAY_SIZE(valid_eigs)];
+
+ eiw_to_ways(eiw, &ways);
+ pos = get_random_u32() % ways;
+ dpa = get_random_u64() >> 12;
+
+ hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig);
+ reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig);
+ reverse_pos = cxl_calculate_position(hpa, eiw, eig);
+
+ if (reverse_dpa != dpa || reverse_pos != pos) {
+ pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n",
+ i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw,
+ eig);
+
+ if (failures++ > 10) {
+ pr_err("test random too many failures, stop\n");
+ break;
+ }
+ }
+ }
+ pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures);
+
+ if (failures)
+ return -EINVAL;
+
+ return 0;
+}
+
+struct param_test {
+ u8 eiw;
+ u16 eig;
+ int pos;
+ bool expect; /* true: expect pass, false: expect fail */
+ const char *desc;
+};
+
+static struct param_test param_tests[] = {
+ { 0x0, 0, 0, true, "1-way, min eig=0, pos=0" },
+ { 0x0, 3, 0, true, "1-way, mid eig=3, pos=0" },
+ { 0x0, 6, 0, true, "1-way, max eig=6, pos=0" },
+ { 0x1, 0, 0, true, "2-way, eig=0, pos=0" },
+ { 0x1, 3, 1, true, "2-way, eig=3, max pos=1" },
+ { 0x1, 6, 1, true, "2-way, eig=6, max pos=1" },
+ { 0x2, 0, 0, true, "4-way, eig=0, pos=0" },
+ { 0x2, 3, 3, true, "4-way, eig=3, max pos=3" },
+ { 0x2, 6, 3, true, "4-way, eig=6, max pos=3" },
+ { 0x3, 0, 0, true, "8-way, eig=0, pos=0" },
+ { 0x3, 3, 7, true, "8-way, eig=3, max pos=7" },
+ { 0x3, 6, 7, true, "8-way, eig=6, max pos=7" },
+ { 0x4, 0, 0, true, "16-way, eig=0, pos=0" },
+ { 0x4, 3, 15, true, "16-way, eig=3, max pos=15" },
+ { 0x4, 6, 15, true, "16-way, eig=6, max pos=15" },
+ { 0x8, 0, 0, true, "3-way, eig=0, pos=0" },
+ { 0x8, 3, 2, true, "3-way, eig=3, max pos=2" },
+ { 0x8, 6, 2, true, "3-way, eig=6, max pos=2" },
+ { 0x9, 0, 0, true, "6-way, eig=0, pos=0" },
+ { 0x9, 3, 5, true, "6-way, eig=3, max pos=5" },
+ { 0x9, 6, 5, true, "6-way, eig=6, max pos=5" },
+ { 0xA, 0, 0, true, "12-way, eig=0, pos=0" },
+ { 0xA, 3, 11, true, "12-way, eig=3, max pos=11" },
+ { 0xA, 6, 11, true, "12-way, eig=6, max pos=11" },
+ { 0x5, 0, 0, false, "invalid eiw=5" },
+ { 0x7, 0, 0, false, "invalid eiw=7" },
+ { 0xB, 0, 0, false, "invalid eiw=0xB" },
+ { 0xFF, 0, 0, false, "invalid eiw=0xFF" },
+ { 0x1, 7, 0, false, "invalid eig=7 (out of range)" },
+ { 0x2, 0x10, 0, false, "invalid eig=0x10" },
+ { 0x3, 0xFFFF, 0, false, "invalid eig=0xFFFF" },
+ { 0x1, 0, -1, false, "pos < 0" },
+ { 0x1, 0, 2, false, "2-way, pos=2 (>= ways)" },
+ { 0x2, 0, 4, false, "4-way, pos=4 (>= ways)" },
+ { 0x3, 0, 8, false, "8-way, pos=8 (>= ways)" },
+ { 0x4, 0, 16, false, "16-way, pos=16 (>= ways)" },
+ { 0x8, 0, 3, false, "3-way, pos=3 (>= ways)" },
+ { 0x9, 0, 6, false, "6-way, pos=6 (>= ways)" },
+ { 0xA, 0, 12, false, "12-way, pos=12 (>= ways)" },
+};
+
+static int test_cxl_validate_translation_params(void)
+{
+ int i, rc, failures = 0;
+ bool valid;
+
+ for (i = 0; i < ARRAY_SIZE(param_tests); i++) {
+ struct param_test *t = &param_tests[i];
+
+ rc = cxl_validate_translation_params(t->eiw, t->eig, t->pos);
+ valid = (rc == 0);
+
+ if (valid != t->expect) {
+ pr_err("test params failed: %s\n", t->desc);
+ failures++;
+ }
+ }
+ pr_info("..... test params: PASS %d FAIL %d\n", i - failures, failures);
+
+ if (failures)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * cxl_translate_init
+ *
+ * Run the internal validation tests when no params are passed.
+ * Otherwise, parse the parameters (test vectors), and kick off
+ * the translation test.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int __init cxl_translate_init(void)
+{
+ int rc, i;
+
+ /* If no tables are passed, validate module params only */
+ if (table_num == 0) {
+ pr_info("Internal validation test start...\n");
+ rc = test_cxl_validate_translation_params();
+ if (rc)
+ return rc;
+
+ rc = test_random_params();
+ if (rc)
+ return rc;
+
+ pr_info("Internal validation test completed successfully\n");
+
+ return 0;
+ }
+
+ pr_info("CXL translate test module loaded with %d test vectors\n",
+ table_num);
+
+ rc = setup_xor_mapping();
+ if (rc)
+ return rc;
+
+ /* Process each test vector */
+ for (i = 0; i < table_num; i++) {
+ u64 dpa, expect_spa;
+ int pos, math;
+ u8 r_eiw, hb_ways;
+ u16 r_eig;
+
+ pr_debug("Processing test vector %d: '%s'\n", i, table[i]);
+
+ /* Parse the test vector */
+ rc = parse_test_vector(table[i], &dpa, &pos, &r_eiw, &r_eig,
+ &hb_ways, &math, &expect_spa);
+ if (rc) {
+ pr_err("CXL Translate Test %d: FAIL\n"
+ " Failed to parse test vector '%s'\n",
+ i, table[i]);
+ continue;
+ }
+ /* Run the translation test */
+ rc = run_translation_test(dpa, pos, r_eiw, r_eig, hb_ways, math,
+ expect_spa);
+ if (rc) {
+ pr_err("CXL Translate Test %d: FAIL\n"
+ " dpa=%llu pos=%d r_eiw=%u r_eig=%u hb_ways=%u math=%s expect_spa=%llu\n",
+ i, dpa, pos, r_eiw, r_eig, hb_ways,
+ (math == XOR_MATH) ? "XOR" : "MODULO",
+ expect_spa);
+ } else {
+ pr_info("CXL Translate Test %d: PASS\n", i);
+ }
+ }
+
+ kfree(cximsd);
+ pr_info("CXL translate test completed\n");
+
+ return 0;
+}
+
+static void __exit cxl_translate_exit(void)
+{
+ pr_info("CXL translate test module unloaded\n");
+}
+
+module_param_array(table, charp, &table_num, 0444);
+MODULE_PARM_DESC(table, "Test vectors as space-separated decimal strings");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("cxl_test: cxl address translation test module");
+MODULE_IMPORT_NS("CXL");
+
+module_init(cxl_translate_init);
+module_exit(cxl_translate_exit);
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 8d731bd63988..176dcde570cd 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -45,6 +45,18 @@ static struct cxl_cel_entry mock_cel[] = {
.effect = CXL_CMD_EFFECT_NONE,
},
{
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_FEATURES),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_GET_FEATURE),
+ .effect = CXL_CMD_EFFECT_NONE,
+ },
+ {
+ .opcode = cpu_to_le16(CXL_MBOX_OP_SET_FEATURE),
+ .effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE)),
+ },
+ {
.opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY),
.effect = CXL_CMD_EFFECT_NONE,
},
@@ -66,6 +78,10 @@ static struct cxl_cel_entry mock_cel[] = {
.effect = CXL_CMD_EFFECT_NONE,
},
{
+ .opcode = cpu_to_le16(CXL_MBOX_OP_SET_SHUTDOWN_STATE),
+ .effect = POLICY_CHANGE_IMMEDIATE,
+ },
+ {
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON),
.effect = CXL_CMD_EFFECT_NONE,
},
@@ -145,6 +161,10 @@ struct mock_event_store {
u32 ev_status;
};
+struct vendor_test_feat {
+ __le32 data;
+} __packed;
+
struct cxl_mockmem_data {
void *lsa;
void *fw;
@@ -161,6 +181,8 @@ struct cxl_mockmem_data {
u8 event_buf[SZ_4K];
u64 timestamp;
unsigned long sanitize_timeout;
+ struct vendor_test_feat test_feat;
+ u8 shutdown_state;
};
static struct mock_event_log *event_find_log(struct device *dev, int log_type)
@@ -228,22 +250,21 @@ static void mes_add_event(struct mock_event_store *mes,
* Vary the number of events returned to simulate events occuring while the
* logs are being read.
*/
-static int ret_limit = 0;
+static atomic_t event_counter = ATOMIC_INIT(0);
static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
{
struct cxl_get_event_payload *pl;
struct mock_event_log *log;
- u16 nr_overflow;
+ int ret_limit;
u8 log_type;
int i;
if (cmd->size_in != sizeof(log_type))
return -EINVAL;
- ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
- if (!ret_limit)
- ret_limit = 1;
+ /* Vary return limit from 1 to CXL_TEST_EVENT_RET_MAX */
+ ret_limit = (atomic_inc_return(&event_counter) % CXL_TEST_EVENT_RET_MAX) + 1;
if (cmd->size_out < struct_size(pl, records, ret_limit))
return -EINVAL;
@@ -277,7 +298,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
u64 ns;
pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW;
- pl->overflow_err_count = cpu_to_le16(nr_overflow);
+ pl->overflow_err_count = cpu_to_le16(log->nr_overflow);
ns = ktime_get_real_ns();
ns -= 5000000000; /* 5s ago */
pl->first_overflow_timestamp = cpu_to_le64(ns);
@@ -1088,6 +1109,21 @@ static int mock_health_info(struct cxl_mbox_cmd *cmd)
return 0;
}
+static int mock_set_shutdown_state(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_shutdown_state_in *ss = cmd->payload_in;
+
+ if (cmd->size_in != sizeof(*ss))
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ mdata->shutdown_state = ss->state;
+ return 0;
+}
+
static struct mock_poison {
struct cxl_dev_state *cxlds;
u64 dpa;
@@ -1354,6 +1390,151 @@ static int mock_activate_fw(struct cxl_mockmem_data *mdata,
return -EINVAL;
}
+#define CXL_VENDOR_FEATURE_TEST \
+ UUID_INIT(0xffffffff, 0xffff, 0xffff, 0xff, 0xff, 0xff, 0xff, 0xff, \
+ 0xff, 0xff, 0xff)
+
+static void fill_feature_vendor_test(struct cxl_feat_entry *feat)
+{
+ feat->uuid = CXL_VENDOR_FEATURE_TEST;
+ feat->id = 0;
+ feat->get_feat_size = cpu_to_le16(0x4);
+ feat->set_feat_size = cpu_to_le16(0x4);
+ feat->flags = cpu_to_le32(CXL_FEATURE_F_CHANGEABLE |
+ CXL_FEATURE_F_DEFAULT_SEL |
+ CXL_FEATURE_F_SAVED_SEL);
+ feat->get_feat_ver = 1;
+ feat->set_feat_ver = 1;
+ feat->effects = cpu_to_le16(CXL_CMD_CONFIG_CHANGE_COLD_RESET |
+ CXL_CMD_EFFECTS_VALID);
+}
+
+#define MAX_CXL_TEST_FEATS 1
+
+static int mock_get_test_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct vendor_test_feat *output = cmd->payload_out;
+ struct cxl_mbox_get_feat_in *input = cmd->payload_in;
+ u16 offset = le16_to_cpu(input->offset);
+ u16 count = le16_to_cpu(input->count);
+ u8 *ptr;
+
+ if (offset > sizeof(*output)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ if (offset + count > sizeof(*output)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ ptr = (u8 *)&mdata->test_feat + offset;
+ memcpy((u8 *)output + offset, ptr, count);
+
+ return 0;
+}
+
+static int mock_get_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_feat_in *input = cmd->payload_in;
+
+ if (uuid_equal(&input->uuid, &CXL_VENDOR_FEATURE_TEST))
+ return mock_get_test_feature(mdata, cmd);
+
+ cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
+
+ return -EOPNOTSUPP;
+}
+
+static int mock_set_test_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_feat_in *input = cmd->payload_in;
+ struct vendor_test_feat *test =
+ (struct vendor_test_feat *)input->feat_data;
+ u32 action;
+
+ action = FIELD_GET(CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK,
+ le32_to_cpu(input->hdr.flags));
+ /*
+ * While it is spec compliant to support other set actions, it is not
+ * necessary to add the complication in the emulation currently. Reject
+ * anything besides full xfer.
+ */
+ if (action != CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ /* Offset should be reserved when doing full transfer */
+ if (input->hdr.offset) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ memcpy(&mdata->test_feat.data, &test->data, sizeof(u32));
+
+ return 0;
+}
+
+static int mock_set_feature(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_feat_in *input = cmd->payload_in;
+
+ if (uuid_equal(&input->hdr.uuid, &CXL_VENDOR_FEATURE_TEST))
+ return mock_set_test_feature(mdata, cmd);
+
+ cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
+
+ return -EOPNOTSUPP;
+}
+
+static int mock_get_supported_features(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_get_sup_feats_in *in = cmd->payload_in;
+ struct cxl_mbox_get_sup_feats_out *out = cmd->payload_out;
+ struct cxl_feat_entry *feat;
+ u16 start_idx, count;
+
+ if (cmd->size_out < sizeof(*out)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
+ return -EINVAL;
+ }
+
+ /*
+ * Current emulation only supports 1 feature
+ */
+ start_idx = le16_to_cpu(in->start_idx);
+ if (start_idx != 0) {
+ cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
+ return -EINVAL;
+ }
+
+ count = le16_to_cpu(in->count);
+ if (count < struct_size(out, ents, 0)) {
+ cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
+ return -EINVAL;
+ }
+
+ out->supported_feats = cpu_to_le16(MAX_CXL_TEST_FEATS);
+ cmd->return_code = 0;
+ if (count < struct_size(out, ents, MAX_CXL_TEST_FEATS)) {
+ out->num_entries = 0;
+ return 0;
+ }
+
+ out->num_entries = cpu_to_le16(MAX_CXL_TEST_FEATS);
+ feat = out->ents;
+ fill_feature_vendor_test(feat);
+
+ return 0;
+}
+
static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *cmd)
{
@@ -1421,6 +1602,9 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
rc = mock_passphrase_secure_erase(mdata, cmd);
break;
+ case CXL_MBOX_OP_SET_SHUTDOWN_STATE:
+ rc = mock_set_shutdown_state(mdata, cmd);
+ break;
case CXL_MBOX_OP_GET_POISON:
rc = mock_get_poison(cxlds, cmd);
break;
@@ -1439,6 +1623,15 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
case CXL_MBOX_OP_ACTIVATE_FW:
rc = mock_activate_fw(mdata, cmd);
break;
+ case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
+ rc = mock_get_supported_features(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_GET_FEATURE:
+ rc = mock_get_feature(mdata, cmd);
+ break;
+ case CXL_MBOX_OP_SET_FEATURE:
+ rc = mock_set_feature(mdata, cmd);
+ break;
default:
break;
}
@@ -1486,6 +1679,11 @@ static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds)
return 0;
}
+static void cxl_mock_test_feat_init(struct cxl_mockmem_data *mdata)
+{
+ mdata->test_feat.data = cpu_to_le32(0xdeadbeef);
+}
+
static int cxl_mock_mem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -1494,6 +1692,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
struct cxl_dev_state *cxlds;
struct cxl_mockmem_data *mdata;
struct cxl_mailbox *cxl_mbox;
+ struct cxl_dpa_info range_info = { 0 };
int rc;
mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL);
@@ -1533,7 +1732,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work);
- cxlds->serial = pdev->id;
+ cxlds->serial = pdev->id + 1;
if (is_rcd(pdev))
cxlds->rcd = true;
@@ -1554,10 +1753,18 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
- rc = cxl_mem_create_range_info(mds);
+ rc = cxl_mem_dpa_fetch(mds, &range_info);
if (rc)
return rc;
+ rc = cxl_dpa_setup(cxlds, &range_info);
+ if (rc)
+ return rc;
+
+ rc = devm_cxl_setup_features(cxlds);
+ if (rc)
+ dev_dbg(dev, "No CXL Features discovered\n");
+
cxl_mock_add_event_logs(&mdata->mes);
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
@@ -1572,7 +1779,12 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
+ rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd);
+ if (rc)
+ dev_dbg(dev, "No CXL FWCTL setup\n");
+
cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
+ cxl_mock_test_feat_init(mdata);
return 0;
}
@@ -1615,27 +1827,10 @@ static ssize_t fw_buf_checksum_show(struct device *dev,
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
u8 hash[SHA256_DIGEST_SIZE];
- unsigned char *hstr, *hptr;
- struct sha256_state sctx;
- ssize_t written = 0;
- int i;
-
- sha256_init(&sctx);
- sha256_update(&sctx, mdata->fw, mdata->fw_size);
- sha256_final(&sctx, hash);
-
- hstr = kzalloc((SHA256_DIGEST_SIZE * 2) + 1, GFP_KERNEL);
- if (!hstr)
- return -ENOMEM;
-
- hptr = hstr;
- for (i = 0; i < SHA256_DIGEST_SIZE; i++)
- hptr += sprintf(hptr, "%02x", hash[i]);
- written = sysfs_emit(buf, "%s\n", hstr);
+ sha256(mdata->fw, mdata->fw_size, hash);
- kfree(hstr);
- return written;
+ return sysfs_emit(buf, "%*phN\n", SHA256_DIGEST_SIZE, hash);
}
static DEVICE_ATTR_RO(fw_buf_checksum);
@@ -1696,4 +1891,5 @@ static struct platform_driver cxl_mock_mem_driver = {
module_platform_driver(cxl_mock_mem_driver);
MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("cxl_test: mem device mock module");
MODULE_IMPORT_NS("CXL");
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index af2594e4f35d..44bce80ef3ff 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -10,12 +10,21 @@
#include <cxlmem.h>
#include <cxlpci.h>
#include "mock.h"
+#include "../exports.h"
static LIST_HEAD(mock);
+static struct cxl_dport *
+redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev);
+static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port);
+
void register_cxl_mock_ops(struct cxl_mock_ops *ops)
{
list_add_rcu(&ops->list, &mock);
+ _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev;
+ _devm_cxl_switch_port_decoders_setup =
+ redirect_devm_cxl_switch_port_decoders_setup;
}
EXPORT_SYMBOL_GPL(register_cxl_mock_ops);
@@ -23,6 +32,9 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu);
void unregister_cxl_mock_ops(struct cxl_mock_ops *ops)
{
+ _devm_cxl_switch_port_decoders_setup =
+ __devm_cxl_switch_port_decoders_setup;
+ _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev;
list_del_rcu(&ops->list);
synchronize_srcu(&cxl_mock_srcu);
}
@@ -99,6 +111,26 @@ acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle,
}
EXPORT_SYMBOL(__wrap_acpi_evaluate_integer);
+int __wrap_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+ int nid,
+ resource_size_t *cache_size)
+{
+ int index, rc;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops)
+ rc = ops->hmat_get_extended_linear_cache_size(backing_res, nid,
+ cache_size);
+ else
+ rc = hmat_get_extended_linear_cache_size(backing_res, nid,
+ cache_size);
+
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(__wrap_hmat_get_extended_linear_cache_size);
+
struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle)
{
int index;
@@ -131,70 +163,34 @@ __wrap_nvdimm_bus_register(struct device *dev,
}
EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
-struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port,
- struct cxl_endpoint_dvsec_info *info)
-
-{
- int index;
- struct cxl_hdm *cxlhdm;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(port->uport_dev))
- cxlhdm = ops->devm_cxl_setup_hdm(port, info);
- else
- cxlhdm = devm_cxl_setup_hdm(port, info);
- put_cxl_mock_ops(index);
-
- return cxlhdm;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL");
-
-int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port)
+int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
{
int rc, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_add_passthrough_decoder(port);
+ rc = ops->devm_cxl_switch_port_decoders_setup(port);
else
- rc = devm_cxl_add_passthrough_decoder(port);
+ rc = __devm_cxl_switch_port_decoders_setup(port);
put_cxl_mock_ops(index);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL");
-int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
- struct cxl_endpoint_dvsec_info *info)
+int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
{
int rc, index;
- struct cxl_port *port = cxlhdm->port;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info);
+ rc = ops->devm_cxl_endpoint_decoders_setup(port);
else
- rc = devm_cxl_enumerate_decoders(cxlhdm, info);
+ rc = devm_cxl_endpoint_decoders_setup(port);
put_cxl_mock_ops(index);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL");
-
-int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
-{
- int rc, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_port_enumerate_dports(port);
- else
- rc = devm_cxl_port_enumerate_dports(port);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL");
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
{
@@ -211,39 +207,6 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL");
-int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
- struct cxl_hdm *cxlhdm,
- struct cxl_endpoint_dvsec_info *info)
-{
- int rc = 0, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_dev(cxlds->dev))
- rc = 0;
- else
- rc = cxl_hdm_decode_init(cxlds, cxlhdm, info);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL");
-
-int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
- struct cxl_endpoint_dvsec_info *info)
-{
- int rc = 0, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_dev(cxlds->dev))
- rc = 0;
- else
- rc = cxl_dvsec_rr_decode(cxlds, info);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL");
-
struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
struct device *dport_dev,
int port_id,
@@ -268,23 +231,6 @@ struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL");
-resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev,
- struct cxl_dport *dport)
-{
- int index;
- resource_size_t component_reg_phys;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(dev))
- component_reg_phys = CXL_RESOURCE_NONE;
- else
- component_reg_phys = cxl_rcd_component_reg_phys(dev, dport);
- put_cxl_mock_ops(index);
-
- return component_reg_phys;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, "CXL");
-
void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
{
int index;
@@ -311,6 +257,23 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL");
+struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+ struct device *dport_dev)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+ struct cxl_dport *dport;
+
+ if (ops && ops->is_mock_port(port->uport_dev))
+ dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev);
+ else
+ dport = __devm_cxl_add_dport_by_dev(port, dport_dev);
+ put_cxl_mock_ops(index);
+
+ return dport;
+}
+
MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("cxl_test: emulation module");
MODULE_IMPORT_NS("ACPI");
MODULE_IMPORT_NS("CXL");
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index d1b0271d2822..2684b89c8aa2 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -19,13 +19,14 @@ struct cxl_mock_ops {
bool (*is_mock_bus)(struct pci_bus *bus);
bool (*is_mock_port)(struct device *dev);
bool (*is_mock_dev)(struct device *dev);
- int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
- struct cxl_hdm *(*devm_cxl_setup_hdm)(
- struct cxl_port *port, struct cxl_endpoint_dvsec_info *info);
- int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port);
- int (*devm_cxl_enumerate_decoders)(
- struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info);
+ int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
+ int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
+ struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port,
+ struct device *dport_dev);
+ int (*hmat_get_extended_linear_cache_size)(struct resource *backing_res,
+ int nid,
+ resource_size_t *cache_size);
};
void register_cxl_mock_ops(struct cxl_mock_ops *ops);
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
index 6fd864935319..bee7cb34a289 100755
--- a/tools/testing/ktest/config-bisect.pl
+++ b/tools/testing/ktest/config-bisect.pl
@@ -741,9 +741,9 @@ if ($start) {
die "Can not find file $bad\n";
}
if ($val eq "good") {
- run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+ run_command "cp $output_config $good" or die "failed to copy $output_config to $good\n";
} elsif ($val eq "bad") {
- run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+ run_command "cp $output_config $bad" or die "failed to copy $output_config to $bad\n";
}
}
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8c8da966c641..001c4df9f7df 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -21,6 +21,8 @@ my %opt;
my %repeat_tests;
my %repeats;
my %evals;
+my @command_vars;
+my %command_tmp_vars;
#default opts
my %default = (
@@ -216,6 +218,7 @@ my $patchcheck_type;
my $patchcheck_start;
my $patchcheck_cherry;
my $patchcheck_end;
+my $patchcheck_skip;
my $build_time;
my $install_time;
@@ -380,6 +383,7 @@ my %option_map = (
"PATCHCHECK_START" => \$patchcheck_start,
"PATCHCHECK_CHERRY" => \$patchcheck_cherry,
"PATCHCHECK_END" => \$patchcheck_end,
+ "PATCHCHECK_SKIP" => \$patchcheck_skip,
);
# Options may be used by other options, record them.
@@ -900,14 +904,22 @@ sub set_eval {
}
sub set_variable {
- my ($lvalue, $rvalue) = @_;
+ my ($lvalue, $rvalue, $command) = @_;
+ # Command line variables override all others
+ if (defined($command_tmp_vars{$lvalue})) {
+ return;
+ }
if ($rvalue =~ /^\s*$/) {
delete $variable{$lvalue};
} else {
$rvalue = process_variables($rvalue);
$variable{$lvalue} = $rvalue;
}
+
+ if (defined($command)) {
+ $command_tmp_vars{$lvalue} = 1;
+ }
}
sub process_compare {
@@ -1286,6 +1298,19 @@ sub read_config {
$test_case = __read_config $config, \$test_num;
+ foreach my $val (@command_vars) {
+ chomp $val;
+ my %command_overrides;
+ if ($val =~ m/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ set_value($lvalue, $rvalue, 1, \%command_overrides, "COMMAND LINE");
+ } else {
+ die "Invalid option definition '$val'\n";
+ }
+ }
+
# make sure we have all mandatory configs
get_mandatory_configs;
@@ -1371,7 +1396,10 @@ sub __eval_option {
# If a variable contains itself, use the default var
if (($var eq $name) && defined($opt{$var})) {
$o = $opt{$var};
- $retval = "$retval$o";
+ # Only append if the default doesn't contain itself
+ if ($o !~ m/\$\{$var\}/) {
+ $retval = "$retval$o";
+ }
} elsif (defined($opt{$o})) {
$o = $opt{$o};
$retval = "$retval$o";
@@ -3511,11 +3539,37 @@ sub patchcheck {
@list = reverse @list;
}
+ my %skip_list;
+ my $will_skip = 0;
+
+ if (defined($patchcheck_skip)) {
+ foreach my $s (split /\s+/, $patchcheck_skip) {
+ $s = `git log --pretty=oneline $s~1..$s`;
+ $s =~ s/^(\S+).*/$1/;
+ chomp $s;
+ $skip_list{$s} = 1;
+ $will_skip++;
+ }
+ }
+
doprint("Going to test the following commits:\n");
foreach my $l (@list) {
+ my $sha1 = $l;
+ $sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+ next if (defined($skip_list{$sha1}));
doprint "$l\n";
}
+ if ($will_skip) {
+ doprint("\nSkipping the following commits:\n");
+ foreach my $l (@list) {
+ my $sha1 = $l;
+ $sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+ next if (!defined($skip_list{$sha1}));
+ doprint "$l\n";
+ }
+ }
+
my $save_clean = $noclean;
my %ignored_warnings;
@@ -3530,6 +3584,11 @@ sub patchcheck {
my $sha1 = $item;
$sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+ if (defined($skip_list{$sha1})) {
+ doprint "\nSkipping \"$item\"\n\n";
+ next;
+ }
+
doprint "\nProcessing commit \"$item\"\n\n";
run_command "git checkout $sha1" or
@@ -4242,8 +4301,55 @@ sub cancel_test {
die "\nCaught Sig Int, test interrupted: $!\n"
}
-$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n";
+sub die_usage {
+ die << "EOF"
+ktest.pl version: $VERSION
+ usage: ktest.pl [options] [config-file]
+ [options]:
+ -D value: Where value can act as an option override.
+ -D BUILD_NOCLEAN=1
+ Sets global BUILD_NOCLEAN to 1
+ -D TEST_TYPE[2]=build
+ Sets TEST_TYPE of test 2 to "build"
+
+ It can also override all temp variables.
+ -D USE_TEMP_DIR:=1
+ Will override all variables that use
+ "USE_TEMP_DIR="
+
+EOF
+;
+}
+
+while ( $#ARGV >= 0 ) {
+ if ( $ARGV[0] eq "-D" ) {
+ shift;
+ die_usage if ($#ARGV < 1);
+ my $val = shift;
+
+ if ($val =~ m/(.*?):=(.*)$/) {
+ set_variable($1, $2, 1);
+ } else {
+ $command_vars[$#command_vars + 1] = $val;
+ }
+
+ } elsif ( $ARGV[0] =~ m/^-D(.*)/) {
+ my $val = $1;
+ shift;
+ if ($val =~ m/(.*?):=(.*)$/) {
+ set_variable($1, $2, 1);
+ } else {
+ $command_vars[$#command_vars + 1] = $val;
+ }
+ } elsif ( $ARGV[0] eq "-h" ) {
+ die_usage;
+ } else {
+ last;
+ }
+}
+
+$#ARGV < 1 or die_usage;
if ($#ARGV == 0) {
$ktest_config = $ARGV[0];
if (! -f $ktest_config) {
@@ -4303,6 +4409,14 @@ if (defined($opt{"LOG_FILE"})) {
if ($opt{"CLEAR_LOG"}) {
unlink $opt{"LOG_FILE"};
}
+
+ if (! -e $opt{"LOG_FILE"} && $opt{"LOG_FILE"} =~ m,^(.*/),) {
+ my $dir = $1;
+ if (! -d $dir) {
+ mkpath($dir) or die "Failed to create directories '$dir': $!";
+ print "\nThe log directory $dir did not exist, so it was created.\n";
+ }
+ }
open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
LOG->autoflush(1);
}
@@ -4458,6 +4572,10 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
+ # Always show which build directory and output directory is being used
+ doprint "BUILD_DIR=$builddir\n";
+ doprint "OUTPUT_DIR=$outputdir\n\n";
+
if (defined($pre_test)) {
my $ret = run_command $pre_test;
if (!$ret && defined($pre_test_die) &&
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index f43477a9b857..9c4c449a8f3e 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -1017,6 +1017,8 @@
# Note, PATCHCHECK_CHERRY requires PATCHCHECK_END to be defined.
# (default 0)
#
+# PATCHCHECK_SKIP is an optional list of shas to skip testing
+#
# PATCHCHECK_TYPE is required and is the type of test to run:
# build, boot, test.
#
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index b0049be00c70..422e186cf3cf 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -20,6 +20,7 @@ CONFIG_VFAT_FS=y
CONFIG_PCI=y
CONFIG_USB4=y
+CONFIG_I2C=y
CONFIG_NET=y
CONFIG_MCTP=y
@@ -41,6 +42,10 @@ CONFIG_DAMON_PADDR=y
CONFIG_REGMAP_BUILD=y
+CONFIG_AUDIT=y
+
+CONFIG_PRIME_NUMBERS=y
+
CONFIG_SECURITY=y
CONFIG_SECURITY_APPARMOR=y
CONFIG_SECURITY_LANDLOCK=y
@@ -49,3 +54,4 @@ CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_SOC=y
CONFIG_SND_SOC_TOPOLOGY_BUILD=y
+CONFIG_SND_SOC_CS35L56_I2C=y
diff --git a/tools/testing/kunit/configs/arch_uml.config b/tools/testing/kunit/configs/arch_uml.config
index 54ad8972681a..28edf816aa70 100644
--- a/tools/testing/kunit/configs/arch_uml.config
+++ b/tools/testing/kunit/configs/arch_uml.config
@@ -1,8 +1,7 @@
# Config options which are added to UML builds by default
-# Enable virtio/pci, as a lot of tests require it.
-CONFIG_VIRTIO_UML=y
-CONFIG_UML_PCI_OVER_VIRTIO=y
+# Enable pci, as a lot of tests require it.
+CONFIG_KUNIT_UML_PCI=y
# Enable FORTIFY_SOURCE for wider checking.
CONFIG_FORTIFY_SOURCE=y
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 7f9ae55fd6d5..cd99c1956331 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -228,7 +228,7 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input
fake_test.counts.passed = 1
output: Iterable[str] = input_data
- if request.raw_output == 'all':
+ if request.raw_output == 'all' or request.raw_output == 'full':
pass
elif request.raw_output == 'kunit':
output = kunit_parser.extract_tap_lines(output)
@@ -425,7 +425,7 @@ def add_parse_opts(parser: argparse.ArgumentParser) -> None:
parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. '
'By default, filters to just KUnit output. Use '
'--raw_output=all to show everything',
- type=str, nargs='?', const='all', default=None, choices=['all', 'kunit'])
+ type=str, nargs='?', const='all', default=None, choices=['all', 'full', 'kunit'])
parser.add_argument('--json',
nargs='?',
help='Prints parsed test results as JSON to stdout or a file if '
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 10ff65689dd8..80fa4e354a17 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -39,10 +39,20 @@ def _get_group_json(test: Test, common_fields: JsonObj) -> JsonObj:
status = _status_map.get(subtest.status, "FAIL")
test_cases.append({"name": subtest.name, "status": status})
+ test_counts = test.counts
+ counts_json = {
+ "tests": test_counts.total(),
+ "passed": test_counts.passed,
+ "failed": test_counts.failed,
+ "crashed": test_counts.crashed,
+ "skipped": test_counts.skipped,
+ "errors": test_counts.errors,
+ }
test_group = {
"name": test.name,
"sub_groups": sub_groups,
"test_cases": test_cases,
+ "misc": counts_json
}
test_group.update(common_fields)
return test_group
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index d30f90eae9a4..260d8d9aa1db 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -14,6 +14,7 @@ import os
import shlex
import shutil
import signal
+import sys
import threading
from typing import Iterator, List, Optional, Tuple
from types import FrameType
@@ -72,8 +73,8 @@ class LinuxSourceTreeOperations:
raise ConfigError(e.output.decode())
def make(self, jobs: int, build_dir: str, make_options: Optional[List[str]]) -> None:
- command = ['make', 'all', 'compile_commands.json', 'ARCH=' + self._linux_arch,
- 'O=' + build_dir, '--jobs=' + str(jobs)]
+ command = ['make', 'all', 'compile_commands.json', 'scripts_gdb',
+ 'ARCH=' + self._linux_arch, 'O=' + build_dir, '--jobs=' + str(jobs)]
if make_options:
command.extend(make_options)
if self._cross_compile:
@@ -201,6 +202,13 @@ def _default_qemu_config_path(arch: str) -> str:
return config_path
options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')]
+
+ if arch == 'help':
+ print('um')
+ for option in options:
+ print(option)
+ sys.exit()
+
raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options)))
def _get_qemu_ops(config_path: str,
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 29fc27e8949b..333cd3a4a56b 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -352,9 +352,9 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool:
lines.pop()
return True
-TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$')
+TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?([^#]*)( # .*)?$')
-TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$')
+TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?(.*) # SKIP ?(.*)$')
def peek_test_name_match(lines: LineStream, test: Test) -> bool:
"""
@@ -379,6 +379,8 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool:
if not match:
return False
name = match.group(4)
+ if not name:
+ return False
return name == test.name
def parse_test_result(lines: LineStream, test: Test,
@@ -416,7 +418,7 @@ def parse_test_result(lines: LineStream, test: Test,
# Set name of test object
if skip_match:
- test.name = skip_match.group(4)
+ test.name = skip_match.group(4) or skip_match.group(5)
else:
test.name = match.group(4)
@@ -759,7 +761,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest:
# If parsing the main/top-level test, parse KTAP version line and
# test plan
test.name = "main"
- ktap_line = parse_ktap_header(lines, test, printer)
+ parse_ktap_header(lines, test, printer)
test.log.extend(parse_diagnostic(lines))
parse_test_plan(lines, test)
parent_test = True
@@ -768,13 +770,12 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest:
# the KTAP version line and/or subtest header line
ktap_line = parse_ktap_header(lines, test, printer)
subtest_line = parse_test_header(lines, test)
+ test.log.extend(parse_diagnostic(lines))
+ parse_test_plan(lines, test)
parent_test = (ktap_line or subtest_line)
if parent_test:
- # If KTAP version line and/or subtest header is found, attempt
- # to parse test plan and print test header
- test.log.extend(parse_diagnostic(lines))
- parse_test_plan(lines, test)
print_test_header(test, printer)
+
expected_count = test.expected_count
subtests = []
test_num = 1
@@ -810,6 +811,10 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest:
test.log.extend(parse_diagnostic(lines))
if test.name != "" and not peek_test_name_match(lines, test):
test.add_error(printer, 'missing subtest result line!')
+ elif not lines:
+ print_log(test.log, printer)
+ test.status = TestStatus.NO_TESTS
+ test.add_error(printer, 'No more test results!')
else:
parse_test_result(lines, test, expected_num, printer)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 0bcb0cc002f8..bbba921e0eac 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -363,6 +363,17 @@ class KUnitParserTest(unittest.TestCase):
self.print_mock.assert_any_call(StrContains(' Indented more.'))
self.noPrintCallContains('not ok 1 test1')
+ def test_parse_late_test_plan(self):
+ output = """
+ TAP version 13
+ ok 4 test4
+ 1..4
+ """
+ result = kunit_parser.parse_run_tests(output.splitlines(), stdout)
+ # Missing test results after test plan should alert a suspected test crash.
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2))
+
def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream:
return kunit_parser.LineStream(enumerate(strs, start=1))
diff --git a/tools/testing/kunit/qemu_configs/mips.py b/tools/testing/kunit/qemu_configs/mips.py
new file mode 100644
index 000000000000..8899ac157b30
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mips.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_32BIT=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mips',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta'])
diff --git a/tools/testing/kunit/qemu_configs/mips64.py b/tools/testing/kunit/qemu_configs/mips64.py
new file mode 100644
index 000000000000..1478aed05b94
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mips64.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_64BIT=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mips64',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta', '-cpu', '5KEc'])
diff --git a/tools/testing/kunit/qemu_configs/mips64el.py b/tools/testing/kunit/qemu_configs/mips64el.py
new file mode 100644
index 000000000000..300c711d7a82
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mips64el.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_64BIT=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mips64el',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta', '-cpu', '5KEc'])
diff --git a/tools/testing/kunit/qemu_configs/mipsel.py b/tools/testing/kunit/qemu_configs/mipsel.py
new file mode 100644
index 000000000000..3d3543315b45
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/mipsel.py
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='mips',
+ kconfig='''
+CONFIG_32BIT=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_MALTA=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+''',
+ qemu_arch='mipsel',
+ kernel_path='vmlinuz',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'malta'])
diff --git a/tools/testing/kunit/qemu_configs/powerpc.py b/tools/testing/kunit/qemu_configs/powerpc.py
index 7ec38d4131f7..5b4c895d5d5a 100644
--- a/tools/testing/kunit/qemu_configs/powerpc.py
+++ b/tools/testing/kunit/qemu_configs/powerpc.py
@@ -3,6 +3,7 @@ from ..qemu_config import QemuArchParams
QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
kconfig='''
CONFIG_PPC64=y
+CONFIG_CPU_BIG_ENDIAN=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_HVC_CONSOLE=y''',
diff --git a/tools/testing/kunit/qemu_configs/powerpc32.py b/tools/testing/kunit/qemu_configs/powerpc32.py
new file mode 100644
index 000000000000..88bd60dbb948
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/powerpc32.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
+ kconfig='''
+CONFIG_PPC32=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_ADB_CUDA=y
+CONFIG_SERIAL_PMACZILOG=y
+CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
+''',
+ qemu_arch='ppc',
+ kernel_path='vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'g3beige', '-cpu', 'max'])
diff --git a/tools/testing/kunit/qemu_configs/powerpcle.py b/tools/testing/kunit/qemu_configs/powerpcle.py
new file mode 100644
index 000000000000..7ddee8af4bd7
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/powerpcle.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
+ kconfig='''
+CONFIG_PPC64=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_HVC_CONSOLE=y
+''',
+ qemu_arch='ppc64',
+ kernel_path='vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'pseries', '-cpu', 'power8'])
diff --git a/tools/testing/kunit/qemu_configs/riscv32.py b/tools/testing/kunit/qemu_configs/riscv32.py
new file mode 100644
index 000000000000..b79ba0ae30f8
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/riscv32.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='riscv',
+ kconfig='''
+CONFIG_NONPORTABLE=y
+CONFIG_ARCH_RV32I=y
+CONFIG_ARCH_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+''',
+ qemu_arch='riscv32',
+ kernel_path='arch/riscv/boot/Image',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-machine', 'virt'])
diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py
index 78a474a5b95f..f00cb89fdef6 100644
--- a/tools/testing/kunit/qemu_configs/sh.py
+++ b/tools/testing/kunit/qemu_configs/sh.py
@@ -7,7 +7,9 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
CONFIG_MEMORY_START=0x0c000000
CONFIG_SH_RTS7751R2D=y
CONFIG_RTS7751R2D_PLUS=y
-CONFIG_SERIAL_SH_SCI=y''',
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_CMDLINE_EXTEND=y
+''',
qemu_arch='sh4',
kernel_path='arch/sh/boot/zImage',
kernel_command_line='console=ttySC1',
diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py
index e975c4331a7c..2019550a1b69 100644
--- a/tools/testing/kunit/qemu_configs/sparc.py
+++ b/tools/testing/kunit/qemu_configs/sparc.py
@@ -2,8 +2,11 @@ from ..qemu_config import QemuArchParams
QEMU_ARCH = QemuArchParams(linux_arch='sparc',
kconfig='''
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y''',
+CONFIG_KUNIT_FAULT_TEST=n
+CONFIG_SPARC32=y
+CONFIG_SERIAL_SUNZILOG=y
+CONFIG_SERIAL_SUNZILOG_CONSOLE=y
+''',
qemu_arch='sparc',
kernel_path='arch/sparc/boot/zImage',
kernel_command_line='console=ttyS0 mem=256M',
diff --git a/tools/testing/kunit/qemu_configs/sparc64.py b/tools/testing/kunit/qemu_configs/sparc64.py
new file mode 100644
index 000000000000..53d4e5a8c972
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/sparc64.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='sparc',
+ kconfig='''
+CONFIG_64BIT=y
+CONFIG_SPARC64=y
+CONFIG_PCI=y
+CONFIG_SERIAL_SUNSU=y
+CONFIG_SERIAL_SUNSU_CONSOLE=y
+''',
+ qemu_arch='sparc64',
+ kernel_path='arch/sparc/boot/image',
+ kernel_command_line='console=ttyS0 kunit_shutdown=poweroff',
+ extra_qemu_params=[])
diff --git a/tools/testing/kunit/qemu_configs/x86_64.py b/tools/testing/kunit/qemu_configs/x86_64.py
index dc7949076863..4a6bf4e048f5 100644
--- a/tools/testing/kunit/qemu_configs/x86_64.py
+++ b/tools/testing/kunit/qemu_configs/x86_64.py
@@ -7,4 +7,6 @@ CONFIG_SERIAL_8250_CONSOLE=y''',
qemu_arch='x86_64',
kernel_path='arch/x86/boot/bzImage',
kernel_command_line='console=ttyS0',
- extra_qemu_params=[])
+ # qboot is faster than SeaBIOS and doesn't mess up
+ # the terminal.
+ extra_qemu_params=['-bios', 'qboot.rom'])
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log
index 65d3f27feaf2..30d9ef18bcec 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log
@@ -1,5 +1,5 @@
TAP version 13
-1..2
+1..3
# selftests: membarrier: membarrier_test_single_thread
# TAP version 13
# 1..2
@@ -12,3 +12,4 @@ ok 1 selftests: membarrier: membarrier_test_single_thread
# ok 1 sys_membarrier available
# ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected
ok 2 selftests: membarrier: membarrier_test_multi_thread
+ok 3 # SKIP selftests: membarrier: membarrier_test_multi_thread
diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h
index 1cf82acb2a3e..0ab4b53bb4f3 100644
--- a/tools/testing/memblock/internal.h
+++ b/tools/testing/memblock/internal.h
@@ -24,4 +24,10 @@ static inline void accept_memory(phys_addr_t start, unsigned long size)
{
}
+static inline unsigned long free_reserved_area(void *start, void *end,
+ int poison, const char *s)
+{
+ return 0;
+}
+
#endif
diff --git a/tools/testing/memblock/linux/mutex.h b/tools/testing/memblock/linux/mutex.h
new file mode 100644
index 000000000000..ae3f497165d6
--- /dev/null
+++ b/tools/testing/memblock/linux/mutex.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MUTEX_H
+#define _MUTEX_H
+
+#define DEFINE_MUTEX(name) int name
+
+static inline void dummy_mutex_guard(int *name)
+{
+}
+
+#define guard(mutex) \
+ dummy_##mutex##_guard
+
+#endif /* _MUTEX_H */ \ No newline at end of file
diff --git a/tools/testing/memblock/tests/alloc_api.c b/tools/testing/memblock/tests/alloc_api.c
index 68f1a75cd72c..c55f67dd367d 100644
--- a/tools/testing/memblock/tests/alloc_api.c
+++ b/tools/testing/memblock/tests/alloc_api.c
@@ -134,7 +134,7 @@ static int alloc_top_down_before_check(void)
PREFIX_PUSH();
setup_memblock();
- memblock_reserve(memblock_end_of_DRAM() - total_size, r1_size);
+ memblock_reserve_kern(memblock_end_of_DRAM() - total_size, r1_size);
allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
@@ -182,7 +182,7 @@ static int alloc_top_down_after_check(void)
total_size = r1.size + r2_size;
- memblock_reserve(r1.base, r1.size);
+ memblock_reserve_kern(r1.base, r1.size);
allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
@@ -231,8 +231,8 @@ static int alloc_top_down_second_fit_check(void)
total_size = r1.size + r2.size + r3_size;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
@@ -285,8 +285,8 @@ static int alloc_in_between_generic_check(void)
total_size = r1.size + r2.size + r3_size;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
@@ -422,7 +422,7 @@ static int alloc_limited_space_generic_check(void)
setup_memblock();
/* Simulate almost-full memory */
- memblock_reserve(memblock_start_of_DRAM(), reserved_size);
+ memblock_reserve_kern(memblock_start_of_DRAM(), reserved_size);
allocated_ptr = run_memblock_alloc(available_size, SMP_CACHE_BYTES);
@@ -608,7 +608,7 @@ static int alloc_bottom_up_before_check(void)
PREFIX_PUSH();
setup_memblock();
- memblock_reserve(memblock_start_of_DRAM() + r1_size, r2_size);
+ memblock_reserve_kern(memblock_start_of_DRAM() + r1_size, r2_size);
allocated_ptr = run_memblock_alloc(r1_size, SMP_CACHE_BYTES);
@@ -655,7 +655,7 @@ static int alloc_bottom_up_after_check(void)
total_size = r1.size + r2_size;
- memblock_reserve(r1.base, r1.size);
+ memblock_reserve_kern(r1.base, r1.size);
allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
@@ -705,8 +705,8 @@ static int alloc_bottom_up_second_fit_check(void)
total_size = r1.size + r2.size + r3_size;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
diff --git a/tools/testing/memblock/tests/alloc_helpers_api.c b/tools/testing/memblock/tests/alloc_helpers_api.c
index 3ef9486da8a0..e5362cfd2ff3 100644
--- a/tools/testing/memblock/tests/alloc_helpers_api.c
+++ b/tools/testing/memblock/tests/alloc_helpers_api.c
@@ -163,7 +163,7 @@ static int alloc_from_top_down_no_space_above_check(void)
min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
/* No space above this address */
- memblock_reserve(min_addr, r2_size);
+ memblock_reserve_kern(min_addr, r2_size);
allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
@@ -199,7 +199,7 @@ static int alloc_from_top_down_min_addr_cap_check(void)
start_addr = (phys_addr_t)memblock_start_of_DRAM();
min_addr = start_addr - SMP_CACHE_BYTES * 3;
- memblock_reserve(start_addr + r1_size, MEM_SIZE - r1_size);
+ memblock_reserve_kern(start_addr + r1_size, MEM_SIZE - r1_size);
allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c
index 49bb416d34ff..562e4701b0e0 100644
--- a/tools/testing/memblock/tests/alloc_nid_api.c
+++ b/tools/testing/memblock/tests/alloc_nid_api.c
@@ -324,7 +324,7 @@ static int alloc_nid_min_reserved_generic_check(void)
min_addr = max_addr - r2_size;
reserved_base = min_addr - r1_size;
- memblock_reserve(reserved_base, r1_size);
+ memblock_reserve_kern(reserved_base, r1_size);
allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -374,7 +374,7 @@ static int alloc_nid_max_reserved_generic_check(void)
max_addr = memblock_end_of_DRAM() - r1_size;
min_addr = max_addr - r2_size;
- memblock_reserve(max_addr, r1_size);
+ memblock_reserve_kern(max_addr, r1_size);
allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -436,8 +436,8 @@ static int alloc_nid_top_down_reserved_with_space_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -499,8 +499,8 @@ static int alloc_nid_reserved_full_merge_generic_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -563,8 +563,8 @@ static int alloc_nid_top_down_reserved_no_space_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -909,8 +909,8 @@ static int alloc_nid_bottom_up_reserved_with_space_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 67503089e6a0..01e836fba488 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void)
return 0;
}
+#ifdef CONFIG_NUMA
+static int memblock_set_node_check(void)
+{
+ unsigned long i, max_reserved;
+ struct memblock_region *rgn;
+ void *orig_region;
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ dummy_physical_memory_init();
+ memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+ orig_region = memblock.reserved.regions;
+
+ /* Equally Split range to node 0 and 1*/
+ memblock_set_node(memblock_start_of_DRAM(),
+ memblock_phys_mem_size() / 2, &memblock.memory, 0);
+ memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2,
+ memblock_phys_mem_size() / 2, &memblock.memory, 1);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+ rgn = &memblock.memory.regions[0];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 0);
+ rgn = &memblock.memory.regions[1];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2);
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 1);
+
+ /* Reserve 126 regions with the last one across node boundary */
+ for (i = 0; i < 125; i++)
+ memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8);
+
+ memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8,
+ SZ_16);
+
+ /*
+ * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()")
+ * do following process to set nid to each memblock.reserved region.
+ * But it may miss some region if memblock_set_node() double the
+ * array.
+ *
+ * By checking 'max', we make sure all region nid is set properly.
+ */
+repeat:
+ max_reserved = memblock.reserved.max;
+ for_each_mem_region(rgn) {
+ int nid = memblock_get_region_node(rgn);
+
+ memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid);
+ }
+ if (max_reserved != memblock.reserved.max)
+ goto repeat;
+
+ /* Confirm each region has valid node set */
+ for_each_reserved_mem_region(rgn) {
+ ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn)));
+ if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1))
+ ASSERT_EQ(1, memblock_get_region_node(rgn));
+ else
+ ASSERT_EQ(0, memblock_get_region_node(rgn));
+ }
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_set_node_checks(void)
+{
+ prefix_reset();
+ prefix_push("memblock_set_node");
+ test_print("Running memblock_set_node tests...\n");
+
+ memblock_set_node_check();
+
+ prefix_pop();
+
+ return 0;
+}
+#else
+static int memblock_set_node_checks(void)
+{
+ return 0;
+}
+#endif
+
int memblock_basic_checks(void)
{
memblock_initialization_check();
@@ -2444,6 +2545,7 @@ int memblock_basic_checks(void)
memblock_bottom_up_checks();
memblock_trim_memory_checks();
memblock_overlaps_region_checks();
+ memblock_set_node_checks();
return 0;
}
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
index c1ec099a3b1d..05e763aab104 100644
--- a/tools/testing/nvdimm/pmem-dax.c
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -10,7 +10,7 @@
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
long nr_pages, enum dax_access_mode mode, void **kaddr,
- pfn_t *pfn)
+ unsigned long *pfn)
{
resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
@@ -29,7 +29,7 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
*kaddr = pmem->virt_addr + offset;
page = vmalloc_to_page(pmem->virt_addr + offset);
if (pfn)
- *pfn = page_to_pfn_t(page);
+ *pfn = page_to_pfn(page);
pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
__func__, pmem, pgoff, page_to_pfn(page));
@@ -39,7 +39,7 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
if (kaddr)
*kaddr = pmem->virt_addr + offset;
if (pfn)
- *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+ *pfn = PHYS_PFN(pmem->phys_addr + offset);
/*
* If badblocks are present, limit known good range to the
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index e4313726fae3..f7e7bfe9bb85 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -8,7 +8,6 @@
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/types.h>
-#include <linux/pfn_t.h>
#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/mm.h>
@@ -135,16 +134,6 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
}
EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
-pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
-{
- struct nfit_test_resource *nfit_res = get_nfit_res(addr);
-
- if (nfit_res)
- flags &= ~PFN_MAP;
- return phys_to_pfn_t(addr, flags);
-}
-EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
-
void *__wrap_memremap(resource_size_t offset, size_t size,
unsigned long flags)
{
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 68a064ce598c..8e3b6be53839 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -850,11 +850,22 @@ static int ndtest_probe(struct platform_device *pdev)
p->dcr_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->dcr_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->label_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->label_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
p->dimm_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
-
+ if (!p->dimm_dma) {
+ rc = -ENOMEM;
+ goto err;
+ }
rc = ndtest_nvdimm_init(p);
if (rc)
goto err;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index cfd4378e2129..f87e9f251d13 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -670,6 +670,7 @@ static int nfit_test_search_spa(struct nvdimm_bus *bus,
.addr = spa->spa,
.region = NULL,
};
+ struct nfit_mem *nfit_mem;
u64 dpa;
ret = device_for_each_child(&bus->dev, &ctx,
@@ -687,8 +688,12 @@ static int nfit_test_search_spa(struct nvdimm_bus *bus,
*/
nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
nvdimm = nd_mapping->nvdimm;
+ nfit_mem = nvdimm_provider_data(nvdimm);
+ if (!nfit_mem)
+ return -EINVAL;
- spa->devices[0].nfit_device_handle = handle[nvdimm->id];
+ spa->devices[0].nfit_device_handle =
+ __to_nfit_memdev(nfit_mem)->device_handle;
spa->num_nvdimms = 1;
spa->devices[0].dpa = dpa;
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index b00583d1eace..b9047fb8ea4a 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -212,7 +212,6 @@ void __iomem *__wrap_devm_ioremap(struct device *dev,
void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
size_t size, unsigned long flags);
void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
-pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
void *__wrap_memremap(resource_size_t offset, size_t size,
unsigned long flags);
void __wrap_devm_memunmap(struct device *dev, void *addr);
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 8b3591a51e1f..b2a6660bbd92 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -14,6 +14,7 @@ include ../shared/shared.mk
main: $(OFILES)
+xarray.o: ../../../lib/test_xarray.c
idr-test.o: ../../../lib/test_ida.c
idr-test: idr-test.o $(CORE_OFILES)
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 84b8c3c92c79..2f830ff8396c 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -499,19 +499,17 @@ void ida_check_random(void)
goto repeat;
}
-void ida_simple_get_remove_test(void)
+void ida_alloc_free_test(void)
{
DEFINE_IDA(ida);
unsigned long i;
- for (i = 0; i < 10000; i++) {
- assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
- }
- assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+ for (i = 0; i < 10000; i++)
+ assert(ida_alloc_max(&ida, 20000, GFP_KERNEL) == i);
+ assert(ida_alloc_range(&ida, 5, 30, GFP_KERNEL) < 0);
- for (i = 0; i < 10000; i++) {
- ida_simple_remove(&ida, i);
- }
+ for (i = 0; i < 10000; i++)
+ ida_free(&ida, i);
assert(ida_is_empty(&ida));
ida_destroy(&ida);
@@ -524,7 +522,7 @@ void user_ida_checks(void)
ida_check_nomem();
ida_check_conv_user();
ida_check_random();
- ida_simple_get_remove_test();
+ ida_alloc_free_test();
radix_tree_cpu_dead(1);
}
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index bc30050227fd..5c1b18e3ed21 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -8,14 +8,6 @@
* difficult to handle in kernel tests.
*/
-#define CONFIG_DEBUG_MAPLE_TREE
-#define CONFIG_MAPLE_SEARCH
-#define MAPLE_32BIT (MAPLE_NODE_SLOTS > 31)
-#include "test.h"
-#include <stdlib.h>
-#include <time.h>
-#include <linux/init.h>
-
#define module_init(x)
#define module_exit(x)
#define MODULE_AUTHOR(x)
@@ -23,7 +15,9 @@
#define MODULE_LICENSE(x)
#define dump_stack() assert(0)
-#include "../../../lib/maple_tree.c"
+#include "test.h"
+
+#include "../shared/maple-shim.c"
#include "../../../lib/test_maple_tree.c"
#define RCU_RANGE_COUNT 1000
@@ -63,430 +57,6 @@ struct rcu_reader_struct {
struct rcu_test_struct2 *test;
};
-static int get_alloc_node_count(struct ma_state *mas)
-{
- int count = 1;
- struct maple_alloc *node = mas->alloc;
-
- if (!node || ((unsigned long)node & 0x1))
- return 0;
- while (node->node_count) {
- count += node->node_count;
- node = node->slot[0];
- }
- return count;
-}
-
-static void check_mas_alloc_node_count(struct ma_state *mas)
-{
- mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 1, GFP_KERNEL);
- mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 3, GFP_KERNEL);
- MT_BUG_ON(mas->tree, get_alloc_node_count(mas) != mas->alloc->total);
- mas_destroy(mas);
-}
-
-/*
- * check_new_node() - Check the creation of new nodes and error path
- * verification.
- */
-static noinline void __init check_new_node(struct maple_tree *mt)
-{
-
- struct maple_node *mn, *mn2, *mn3;
- struct maple_alloc *smn;
- struct maple_node *nodes[100];
- int i, j, total;
-
- MA_STATE(mas, mt, 0, 0);
-
- check_mas_alloc_node_count(&mas);
-
- /* Try allocating 3 nodes */
- mtree_lock(mt);
- mt_set_non_kernel(0);
- /* request 3 nodes to be allocated. */
- mas_node_count(&mas, 3);
- /* Allocation request of 3. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 3);
- /* Allocate failed. */
- MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
-
- MT_BUG_ON(mt, mas_allocated(&mas) != 3);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, mas.alloc == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
- mas_push_node(&mas, mn);
- mas_reset(&mas);
- mas_destroy(&mas);
- mtree_unlock(mt);
-
-
- /* Try allocating 1 node, then 2 more */
- mtree_lock(mt);
- /* Set allocation request to 1. */
- mas_set_alloc_req(&mas, 1);
- /* Check Allocation request of 1. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
- mas_set_err(&mas, -ENOMEM);
- /* Validate allocation request. */
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- /* Eat the requested node. */
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, mn->slot[0] != NULL);
- MT_BUG_ON(mt, mn->slot[1] != NULL);
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
-
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mas.status = ma_start;
- mas_destroy(&mas);
- /* Allocate 3 nodes, will fail. */
- mas_node_count(&mas, 3);
- /* Drop the lock and allocate 3 nodes. */
- mas_nomem(&mas, GFP_KERNEL);
- /* Ensure 3 are allocated. */
- MT_BUG_ON(mt, mas_allocated(&mas) != 3);
- /* Allocation request of 0. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 0);
-
- MT_BUG_ON(mt, mas.alloc == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[1] == NULL);
- /* Ensure we counted 3. */
- MT_BUG_ON(mt, mas_allocated(&mas) != 3);
- /* Free. */
- mas_reset(&mas);
- mas_destroy(&mas);
-
- /* Set allocation request to 1. */
- mas_set_alloc_req(&mas, 1);
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
- mas_set_err(&mas, -ENOMEM);
- /* Validate allocation request. */
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- MT_BUG_ON(mt, mas_allocated(&mas) != 1);
- /* Check the node is only one node. */
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, mn->slot[0] != NULL);
- MT_BUG_ON(mt, mn->slot[1] != NULL);
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != 1);
- MT_BUG_ON(mt, mas.alloc->node_count);
-
- mas_set_alloc_req(&mas, 2); /* request 2 more. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 2);
- mas_set_err(&mas, -ENOMEM);
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- MT_BUG_ON(mt, mas_allocated(&mas) != 3);
- MT_BUG_ON(mt, mas.alloc == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
- MT_BUG_ON(mt, mas.alloc->slot[1] == NULL);
- for (i = 2; i >= 0; i--) {
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, mas_allocated(&mas) != i);
- MT_BUG_ON(mt, !mn);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
-
- total = 64;
- mas_set_alloc_req(&mas, total); /* request 2 more. */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != total);
- mas_set_err(&mas, -ENOMEM);
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- for (i = total; i > 0; i--) {
- unsigned int e = 0; /* expected node_count */
-
- if (!MAPLE_32BIT) {
- if (i >= 35)
- e = i - 34;
- else if (i >= 5)
- e = i - 4;
- else if (i >= 2)
- e = i - 1;
- } else {
- if (i >= 4)
- e = i - 3;
- else if (i >= 1)
- e = i - 1;
- else
- e = 0;
- }
-
- MT_BUG_ON(mt, mas.alloc->node_count != e);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != i - 1);
- MT_BUG_ON(mt, !mn);
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
-
- total = 100;
- for (i = 1; i < total; i++) {
- mas_set_alloc_req(&mas, i);
- mas_set_err(&mas, -ENOMEM);
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- for (j = i; j > 0; j--) {
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, mas_allocated(&mas) != j - 1);
- MT_BUG_ON(mt, !mn);
- MT_BUG_ON(mt, not_empty(mn));
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != j);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != j - 1);
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
-
- mas_set_alloc_req(&mas, i);
- mas_set_err(&mas, -ENOMEM);
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- for (j = 0; j <= i/2; j++) {
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
- nodes[j] = mas_pop_node(&mas);
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
- }
-
- while (j) {
- j--;
- mas_push_node(&mas, nodes[j]);
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != i);
- for (j = 0; j <= i/2; j++) {
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
- }
- mas_reset(&mas);
- MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL));
- mas_destroy(&mas);
-
- }
-
- /* Set allocation request. */
- total = 500;
- mas_node_count(&mas, total);
- /* Drop the lock and allocate the nodes. */
- mas_nomem(&mas, GFP_KERNEL);
- MT_BUG_ON(mt, !mas.alloc);
- i = 1;
- smn = mas.alloc;
- while (i < total) {
- for (j = 0; j < MAPLE_ALLOC_SLOTS; j++) {
- i++;
- MT_BUG_ON(mt, !smn->slot[j]);
- if (i == total)
- break;
- }
- smn = smn->slot[0]; /* next. */
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != total);
- mas_reset(&mas);
- mas_destroy(&mas); /* Free. */
-
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- for (i = 1; i < 128; i++) {
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */
- for (j = i; j > 0; j--) { /*Free the requests */
- mn = mas_pop_node(&mas); /* get the next node. */
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- }
-
- for (i = 1; i < MAPLE_NODE_MASK + 1; i++) {
- MA_STATE(mas2, mt, 0, 0);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */
- for (j = 1; j <= i; j++) { /* Move the allocations to mas2 */
- mn = mas_pop_node(&mas); /* get the next node. */
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, not_empty(mn));
- mas_push_node(&mas2, mn);
- MT_BUG_ON(mt, mas_allocated(&mas2) != j);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mas_allocated(&mas2) != i);
-
- for (j = i; j > 0; j--) { /*Free the requests */
- MT_BUG_ON(mt, mas_allocated(&mas2) != j);
- mn = mas_pop_node(&mas2); /* get the next node. */
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
- MT_BUG_ON(mt, mas_allocated(&mas2) != 0);
- }
-
-
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */
- MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
-
- mn = mas_pop_node(&mas); /* get the next node. */
- MT_BUG_ON(mt, mn == NULL);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
-
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
-
- /* Check the limit of pop/push/pop */
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */
- MT_BUG_ON(mt, mas_alloc_req(&mas) != 1);
- MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
- MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
- MT_BUG_ON(mt, mas_alloc_req(&mas));
- MT_BUG_ON(mt, mas.alloc->node_count != 1);
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas.alloc->node_count != 1);
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) {
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, not_empty(mn));
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- }
- MT_BUG_ON(mt, mas_allocated(&mas) != 0);
-
-
- for (i = 3; i < MAPLE_NODE_MASK * 3; i++) {
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mas_push_node(&mas, mn); /* put it back */
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mn2 = mas_pop_node(&mas); /* get the next node. */
- mas_push_node(&mas, mn); /* put them back */
- mas_push_node(&mas, mn2);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mn2 = mas_pop_node(&mas); /* get the next node. */
- mn3 = mas_pop_node(&mas); /* get the next node. */
- mas_push_node(&mas, mn); /* put them back */
- mas_push_node(&mas, mn2);
- mas_push_node(&mas, mn3);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, i); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mn = mas_pop_node(&mas); /* get the next node. */
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mn = mas_pop_node(&mas); /* get the next node. */
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mn = mas_pop_node(&mas); /* get the next node. */
- mn->parent = ma_parent_ptr(mn);
- ma_free_rcu(mn);
- mas_destroy(&mas);
- }
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, 5); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != 5);
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, 10); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.status = ma_start;
- MT_BUG_ON(mt, mas_allocated(&mas) != 10);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS - 1); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS - 1);
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, 10 + MAPLE_ALLOC_SLOTS - 1); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.status = ma_start;
- MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 2); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.status = ma_start;
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 2);
- mas_destroy(&mas);
-
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 1); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 1);
- mas.node = MA_ERROR(-ENOMEM);
- mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 3 + 2); /* Request */
- mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.status = ma_start;
- MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 3 + 2);
- mas_destroy(&mas);
-
- mtree_unlock(mt);
-}
-
/*
* Check erasing including RCU.
*/
@@ -35062,7 +34632,7 @@ void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
int i;
void *(*function)(void *);
- pthread_t readers[20];
+ pthread_t readers[30];
unsigned int index = vals->index;
mt_set_in_rcu(mt);
@@ -35080,14 +34650,14 @@ void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
}
}
- usleep(5); /* small yield to ensure all threads are at least started. */
+ usleep(3); /* small yield to ensure all threads are at least started. */
while (index <= vals->last) {
mtree_store(mt, index,
(index % 2 ? vals->entry2 : vals->entry3),
GFP_KERNEL);
index++;
- usleep(5);
+ usleep(2);
}
while (i--)
@@ -35098,6 +34668,7 @@ void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
MT_BUG_ON(mt, !vals->seen_entry3);
MT_BUG_ON(mt, !vals->seen_both);
}
+
static noinline void __init check_rcu_simulated(struct maple_tree *mt)
{
unsigned long i, nr_entries = 1000;
@@ -35454,17 +35025,6 @@ static void check_dfs_preorder(struct maple_tree *mt)
MT_BUG_ON(mt, count != e);
mtree_destroy(mt);
- mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
- mas_reset(&mas);
- mt_zero_nr_tallocated();
- mt_set_non_kernel(200);
- mas_expected_entries(&mas, max);
- for (count = 0; count <= max; count++) {
- mas.index = mas.last = count;
- mas_store(&mas, xa_mk_value(count));
- MT_BUG_ON(mt, mas_is_err(&mas));
- }
- mas_destroy(&mas);
rcu_barrier();
/*
* pr_info(" ->seq test of 0-%lu %luK in %d active (%d total)\n",
@@ -35475,15 +35035,77 @@ static void check_dfs_preorder(struct maple_tree *mt)
}
/* End of depth first search tests */
+/* get height of the lowest non-leaf node with free space */
+static unsigned char get_vacant_height(struct ma_wr_state *wr_mas, void *entry)
+{
+ struct ma_state *mas = wr_mas->mas;
+ char vacant_height = 0;
+ enum maple_type type;
+ unsigned long *pivots;
+ unsigned long min = 0;
+ unsigned long max = ULONG_MAX;
+ unsigned char offset;
+
+ /* start traversal */
+ mas_reset(mas);
+ mas_start(mas);
+ if (!xa_is_node(mas_root(mas)))
+ return 0;
+
+ type = mte_node_type(mas->node);
+ wr_mas->type = type;
+ while (!ma_is_leaf(type)) {
+ mas_node_walk(mas, mte_to_node(mas->node), type, &min, &max);
+ offset = mas->offset;
+ mas->end = mas_data_end(mas);
+ pivots = ma_pivots(mte_to_node(mas->node), type);
+
+ if (pivots) {
+ if (offset)
+ min = pivots[mas->offset - 1];
+ if (offset < mas->end)
+ max = pivots[mas->offset];
+ }
+ wr_mas->r_max = offset < mas->end ? pivots[offset] : mas->max;
+
+ /* detect spanning write */
+ if (mas_is_span_wr(wr_mas))
+ break;
+
+ if (mas->end < mt_slot_count(mas->node) - 1)
+ vacant_height = mas->depth + 1;
+
+ mas_descend(mas);
+ type = mte_node_type(mas->node);
+ mas->depth++;
+ }
+
+ return vacant_height;
+}
+
+static int mas_allocated(struct ma_state *mas)
+{
+ int total = 0;
+
+ if (mas->alloc)
+ total++;
+
+ if (mas->sheaf)
+ total += kmem_cache_sheaf_size(mas->sheaf);
+
+ return total;
+}
/* Preallocation testing */
static noinline void __init check_prealloc(struct maple_tree *mt)
{
unsigned long i, max = 100;
unsigned long allocated;
unsigned char height;
+ unsigned char vacant_height;
struct maple_node *mn;
void *ptr = check_prealloc;
MA_STATE(mas, mt, 10, 20);
+ MA_WR_STATE(wr_mas, &mas, ptr);
mt_set_non_kernel(1000);
for (i = 0; i <= max; i++)
@@ -35491,20 +35113,26 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
/* Spanning store */
mas_set_range(&mas, 470, 500);
- MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+
+ mas_wr_preallocate(&wr_mas, ptr);
+ MT_BUG_ON(mt, mas.store_type != wr_spanning_store);
+ MT_BUG_ON(mt, mas_is_err(&mas));
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
+ mas_wr_preallocate(&wr_mas, ptr);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
@@ -35514,7 +35142,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mn->parent = ma_parent_ptr(mn);
@@ -35527,7 +35156,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
@@ -35540,20 +35170,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
- mn = mas_pop_node(&mas);
- MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
- mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas_allocated(&mas) != allocated);
- MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
- mas_destroy(&mas);
- allocated = mas_allocated(&mas);
- MT_BUG_ON(mt, allocated != 0);
-
- MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
- allocated = mas_allocated(&mas);
- height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -35578,7 +35196,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 2);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 2);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mt_set_non_kernel(1);
@@ -35595,8 +35214,14 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ /*
+ * vacant height cannot be used to compute the number of nodes needed
+ * as the root contains two entries which means it is on the verge of
+ * insufficiency. The worst case full height of the tree is needed.
+ */
+ MT_BUG_ON(mt, allocated != height * 3 + 1);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mas_set_range(&mas, 0, 200);
@@ -35605,6 +35230,18 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated != 0);
+
+ /* Chaining multiple preallocations */
+ mt_set_in_rcu(mt);
+ mas_set_range(&mas, 800, 805); /* Slot store, should be 0 allocations */
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ MT_BUG_ON(mt, allocated != 0);
+ mas.last = 809; /* Node store */
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+ allocated = mas_allocated(&mas);
+ MT_BUG_ON(mt, allocated != 1);
+ mas_store_prealloc(&mas, ptr);
}
/* End of preallocation testing */
@@ -36248,6 +35885,50 @@ static noinline void __init check_mtree_dup(struct maple_tree *mt)
extern void test_kmem_cache_bulk(void);
+static inline void check_spanning_store_height(struct maple_tree *mt)
+{
+ int index = 0;
+ int last = 140;
+ MA_STATE(mas, mt, 0, 0);
+ mas_lock(&mas);
+ while (mt_height(mt) != 3) {
+ mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
+ mas_set(&mas, ++index);
+ }
+
+ if (MAPLE_32BIT)
+ last = 155; /* 32 bit higher branching factor. */
+
+ mas_set_range(&mas, 90, last);
+ mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
+ MT_BUG_ON(mt, mas_mt_height(&mas) != 2);
+ mas_unlock(&mas);
+}
+
+/*
+ * Test to check the path of a spanning rebalance which results in
+ * a collapse where the rebalancing of the child node leads to
+ * insufficieny in the parent node.
+ */
+static void check_collapsing_rebalance(struct maple_tree *mt)
+{
+ int i = 0;
+ MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
+
+ /* create a height 6 tree */
+ while (mt_height(mt) < 6) {
+ mtree_store_range(mt, i, i + 10, xa_mk_value(i), GFP_KERNEL);
+ i += 9;
+ }
+
+ /* delete all entries one at a time, starting from the right */
+ do {
+ mas_erase(&mas);
+ } while (mas_prev(&mas, 0) != NULL);
+
+ mtree_unlock(mt);
+}
+
/* callback function used for check_nomem_writer_race() */
static void writer2(void *maple_tree)
{
@@ -36291,11 +35972,17 @@ static void check_nomem_writer_race(struct maple_tree *mt)
check_load(mt, 6, xa_mk_value(0xC));
mtree_unlock(mt);
+ mt_set_non_kernel(0);
/* test for the same race but with mas_store_gfp() */
mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL);
mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL);
mas_set_range(&mas, 0, 5);
+
+ /* setup writer 2 that will trigger the race condition */
+ mt_set_private(mt);
+ mt_set_callback(writer2);
+
mtree_lock(mt);
mas_store_gfp(&mas, NULL, GFP_KERNEL);
@@ -36313,6 +36000,7 @@ static void check_nomem_writer_race(struct maple_tree *mt)
*/
static inline int check_vma_modification(struct maple_tree *mt)
{
+#if defined(CONFIG_64BIT)
MA_STATE(mas, mt, 0, 0);
mtree_lock(mt);
@@ -36336,30 +36024,11 @@ static inline int check_vma_modification(struct maple_tree *mt)
mas_destroy(&mas);
mtree_unlock(mt);
+#endif
+
return 0;
}
-/*
- * test to check that bulk stores do not use wr_rebalance as the store
- * type.
- */
-static inline void check_bulk_rebalance(struct maple_tree *mt)
-{
- MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
- int max = 10;
-
- build_full_tree(mt, 0, 2);
-
- /* erase every entry in the tree */
- do {
- /* set up bulk store mode */
- mas_expected_entries(&mas, max);
- mas_erase(&mas);
- MT_BUG_ON(mt, mas.store_type == wr_rebalance);
- } while (mas_prev(&mas, 0) != NULL);
-
- mas_destroy(&mas);
-}
void farmer_tests(void)
{
@@ -36372,10 +36041,6 @@ void farmer_tests(void)
check_vma_modification(&tree);
mtree_destroy(&tree);
- mt_init(&tree);
- check_bulk_rebalance(&tree);
- mtree_destroy(&tree);
-
tree.ma_root = xa_mk_value(0);
mt_dump(&tree, mt_dump_dec);
@@ -36415,6 +36080,14 @@ void farmer_tests(void)
mtree_destroy(&tree);
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_spanning_store_height(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_collapsing_rebalance(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_null_expand(&tree);
mtree_destroy(&tree);
@@ -36427,10 +36100,6 @@ void farmer_tests(void)
check_erase_testset(&tree);
mtree_destroy(&tree);
- mt_init_flags(&tree, 0);
- check_new_node(&tree);
- mtree_destroy(&tree);
-
if (!MAPLE_32BIT) {
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_rcu_simulated(&tree);
diff --git a/tools/testing/rbtree/Makefile b/tools/testing/rbtree/Makefile
new file mode 100644
index 000000000000..d7bbae2af4c7
--- /dev/null
+++ b/tools/testing/rbtree/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+
+.PHONY: clean
+
+TARGETS = rbtree_test interval_tree_test
+OFILES = $(SHARED_OFILES) rbtree-shim.o interval_tree-shim.o maple-shim.o
+DEPS = ../../../include/linux/rbtree.h \
+ ../../../include/linux/rbtree_types.h \
+ ../../../include/linux/rbtree_augmented.h \
+ ../../../include/linux/interval_tree.h \
+ ../../../include/linux/interval_tree_generic.h \
+ ../../../lib/rbtree.c \
+ ../../../lib/interval_tree.c
+
+targets: $(TARGETS)
+
+include ../shared/shared.mk
+
+ifeq ($(DEBUG), 1)
+ CFLAGS += -g
+endif
+
+$(TARGETS): $(OFILES)
+
+rbtree-shim.o: $(DEPS)
+rbtree_test.o: ../../../lib/rbtree_test.c
+interval_tree-shim.o: $(DEPS)
+interval_tree-shim.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER
+interval_tree_test.o: ../../../lib/interval_tree_test.c
+interval_tree_test.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER
+
+clean:
+ $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/*
diff --git a/tools/testing/rbtree/interval_tree_test.c b/tools/testing/rbtree/interval_tree_test.c
new file mode 100644
index 000000000000..49bc5b534330
--- /dev/null
+++ b/tools/testing/rbtree/interval_tree_test.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * interval_tree.c: Userspace Interval Tree test-suite
+ * Copyright (c) 2025 Wei Yang <richard.weiyang@gmail.com>
+ */
+#include <linux/math64.h>
+#include <linux/kern_levels.h>
+#include "shared.h"
+#include "maple-shared.h"
+
+#include "../../../lib/interval_tree_test.c"
+
+int usage(void)
+{
+ fprintf(stderr, "Userland interval tree test cases\n");
+ fprintf(stderr, " -n: Number of nodes in the interval tree\n");
+ fprintf(stderr, " -p: Number of iterations modifying the tree\n");
+ fprintf(stderr, " -q: Number of searches to the interval tree\n");
+ fprintf(stderr, " -s: Number of iterations searching the tree\n");
+ fprintf(stderr, " -a: Searches will iterate all nodes in the tree\n");
+ fprintf(stderr, " -m: Largest value for the interval's endpoint\n");
+ fprintf(stderr, " -r: Random seed\n");
+ exit(-1);
+}
+
+void interval_tree_tests(void)
+{
+ interval_tree_test_init();
+ interval_tree_test_exit();
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "n:p:q:s:am:r:")) != -1) {
+ if (opt == 'n')
+ nnodes = strtoul(optarg, NULL, 0);
+ else if (opt == 'p')
+ perf_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'q')
+ nsearches = strtoul(optarg, NULL, 0);
+ else if (opt == 's')
+ search_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'a')
+ search_all = true;
+ else if (opt == 'm')
+ max_endpoint = strtoul(optarg, NULL, 0);
+ else if (opt == 'r')
+ seed = strtoul(optarg, NULL, 0);
+ else
+ usage();
+ }
+
+ maple_tree_init();
+ interval_tree_tests();
+ return 0;
+}
diff --git a/tools/testing/rbtree/rbtree_test.c b/tools/testing/rbtree/rbtree_test.c
new file mode 100644
index 000000000000..585c970f679e
--- /dev/null
+++ b/tools/testing/rbtree/rbtree_test.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rbtree_test.c: Userspace Red Black Tree test-suite
+ * Copyright (c) 2025 Wei Yang <richard.weiyang@gmail.com>
+ */
+#include <linux/init.h>
+#include <linux/math64.h>
+#include <linux/kern_levels.h>
+#include "shared.h"
+
+#include "../../../lib/rbtree_test.c"
+
+int usage(void)
+{
+ fprintf(stderr, "Userland rbtree test cases\n");
+ fprintf(stderr, " -n: Number of nodes in the rb-tree\n");
+ fprintf(stderr, " -p: Number of iterations modifying the rb-tree\n");
+ fprintf(stderr, " -c: Number of iterations modifying and verifying the rb-tree\n");
+ fprintf(stderr, " -r: Random seed\n");
+ exit(-1);
+}
+
+void rbtree_tests(void)
+{
+ rbtree_test_init();
+ rbtree_test_exit();
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "n:p:c:r:")) != -1) {
+ if (opt == 'n')
+ nnodes = strtoul(optarg, NULL, 0);
+ else if (opt == 'p')
+ perf_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'c')
+ check_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'r')
+ seed = strtoul(optarg, NULL, 0);
+ else
+ usage();
+ }
+
+ rbtree_tests();
+ return 0;
+}
diff --git a/tools/testing/rbtree/test.h b/tools/testing/rbtree/test.h
new file mode 100644
index 000000000000..f1f1b545b55a
--- /dev/null
+++ b/tools/testing/rbtree/test.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+void rbtree_tests(void);
+void interval_tree_tests(void);
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index 5bd9e6e80625..121ae78d6e88 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -51,7 +51,6 @@ static inline unsigned long page_to_phys(struct page *page)
#define page_to_pfn(page) ((unsigned long)(page) / PAGE_SIZE)
#define pfn_to_page(pfn) (void *)((pfn) * PAGE_SIZE)
-#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
#define __min(t1, t2, min1, min2, x, y) ({ \
t1 min1 = (x); \
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index cb24124ac5b9..674aaa02e396 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -4,7 +4,6 @@ gpiogpio-hammer
gpioinclude/
gpiolsgpio
kselftest_install/
-tpm2/SpaceTest.log
# Python bytecode and cache
__pycache__/
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 8daac70c2f9d..56e44a98d6a5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -35,6 +35,8 @@ TARGETS += filesystems/epoll
TARGETS += filesystems/fat
TARGETS += filesystems/overlayfs
TARGETS += filesystems/statmount
+TARGETS += filesystems/mount-notify
+TARGETS += filesystems/fuse
TARGETS += firmware
TARGETS += fpu
TARGETS += ftrace
@@ -47,10 +49,12 @@ TARGETS += ipc
TARGETS += ir
TARGETS += kcmp
TARGETS += kexec
+TARGETS += kselftest_harness
TARGETS += kvm
TARGETS += landlock
TARGETS += lib
TARGETS += livepatch
+TARGETS += liveupdate
TARGETS += lkdtm
TARGETS += lsm
TARGETS += membarrier
@@ -61,17 +65,21 @@ TARGETS += mount
TARGETS += mount_setattr
TARGETS += move_mount_set_group
TARGETS += mqueue
+TARGETS += mseal_system_mappings
TARGETS += nci
TARGETS += net
TARGETS += net/af_unix
+TARGETS += net/can
TARGETS += net/forwarding
TARGETS += net/hsr
TARGETS += net/mptcp
TARGETS += net/netfilter
TARGETS += net/openvswitch
+TARGETS += net/ovpn
TARGETS += net/packetdrill
TARGETS += net/rds
TARGETS += net/tcp_ao
+TARGETS += nolibc
TARGETS += nsfs
TARGETS += pci_endpoint
TARGETS += pcie_bwctrl
@@ -113,11 +121,14 @@ endif
TARGETS += tmpfs
TARGETS += tpm2
TARGETS += tty
+TARGETS += ublk
TARGETS += uevent
TARGETS += user_events
TARGETS += vDSO
TARGETS += mm
+TARGETS += vfio
TARGETS += x86
+TARGETS += x86/bugs
TARGETS += zram
#Please keep the TARGETS list alphabetically sorted
# Run "make quicktest=1 run_tests" or
@@ -198,7 +209,7 @@ export KHDR_INCLUDES
all:
@ret=1; \
- for TARGET in $(TARGETS); do \
+ for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
mkdir $$BUILD_TARGET -p; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \
@@ -286,6 +297,14 @@ ifdef INSTALL_PATH
$(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \
-C $$TARGET emit_tests >> $(TEST_LIST); \
done;
+ @VERSION=$$(git describe HEAD 2>/dev/null); \
+ if [ -n "$$VERSION" ]; then \
+ echo "$$VERSION" > $(INSTALL_PATH)/VERSION; \
+ printf "Version saved to $(INSTALL_PATH)/VERSION\n"; \
+ else \
+ printf "Unable to get version from git describe\n"; \
+ fi
+ @echo "**Kselftest Installation is complete: $(INSTALL_PATH)**"
else
$(error Error: set INSTALL_PATH to use install)
endif
@@ -298,7 +317,7 @@ gen_tar: install
@echo "Created ${TAR_PATH}"
clean:
- @for TARGET in $(TARGETS); do \
+ @for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
done;
diff --git a/tools/testing/selftests/acct/acct_syscall.c b/tools/testing/selftests/acct/acct_syscall.c
index 87c044fb9293..421adbdc299d 100644
--- a/tools/testing/selftests/acct/acct_syscall.c
+++ b/tools/testing/selftests/acct/acct_syscall.c
@@ -9,7 +9,7 @@
#include <string.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(void)
{
diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c
index e2b3a5810f47..317212078e36 100644
--- a/tools/testing/selftests/alsa/conf.c
+++ b/tools/testing/selftests/alsa/conf.c
@@ -14,7 +14,7 @@
#include <regex.h>
#include <sys/stat.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "alsa-local.h"
#define SYSFS_ROOT "/sys"
@@ -448,7 +448,7 @@ int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int de
ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
ret = snd_config_get_bool(cfg);
if (ret < 0)
- ksft_exit_fail_msg("key '%s'.'%s' is not an bool\n", key1, key2);
+ ksft_exit_fail_msg("key '%s'.'%s' is not a bool\n", key1, key2);
return !!ret;
}
diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
index 2a4b2662035e..d4f845c32804 100644
--- a/tools/testing/selftests/alsa/mixer-test.c
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -25,7 +25,7 @@
#include <poll.h>
#include <stdint.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "alsa-local.h"
#define TESTS_PER_CONTROL 7
@@ -53,10 +53,10 @@ struct ctl_data {
struct ctl_data *next;
};
-int num_cards = 0;
-int num_controls = 0;
-struct card_data *card_list = NULL;
-struct ctl_data *ctl_list = NULL;
+int num_cards;
+int num_controls;
+struct card_data *card_list;
+struct ctl_data *ctl_list;
static void find_controls(void)
{
diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c
index dbd7c222ce93..ee04ccef7d7c 100644
--- a/tools/testing/selftests/alsa/pcm-test.c
+++ b/tools/testing/selftests/alsa/pcm-test.c
@@ -17,7 +17,7 @@
#include <assert.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "alsa-local.h"
typedef struct timespec timestamp_t;
@@ -30,7 +30,7 @@ struct card_data {
struct card_data *next;
};
-struct card_data *card_list = NULL;
+struct card_data *card_list;
struct pcm_data {
snd_pcm_t *handle;
@@ -43,10 +43,10 @@ struct pcm_data {
struct pcm_data *next;
};
-struct pcm_data *pcm_list = NULL;
+struct pcm_data *pcm_list;
-int num_missing = 0;
-struct pcm_data *pcm_missing = NULL;
+int num_missing;
+struct pcm_data *pcm_missing;
snd_config_t *default_pcm_config;
diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c
index ca81afa4ee90..95065ef3b441 100644
--- a/tools/testing/selftests/alsa/test-pcmtest-driver.c
+++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c
@@ -7,7 +7,7 @@
*/
#include <string.h>
#include <alsa/asoundlib.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define CH_NUM 4
diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c
index 32ee3ce57721..c45cb226bd8f 100644
--- a/tools/testing/selftests/alsa/utimer-test.c
+++ b/tools/testing/selftests/alsa/utimer-test.c
@@ -6,7 +6,7 @@
*
* Author: Ivan Orlov <ivan.orlov0322@gmail.com>
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <sound/asound.h>
#include <unistd.h>
#include <fcntl.h>
@@ -135,6 +135,7 @@ TEST_F(timer_f, utimer) {
pthread_join(ticking_thread, NULL);
ASSERT_EQ(total_ticks, TICKS_COUNT);
pclose(rfp);
+ free(buf);
}
TEST(wrong_timers_test) {
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 22029e60eff3..c4c72ee2ef55 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -21,6 +21,8 @@ CFLAGS += $(KHDR_INCLUDES)
CFLAGS += -I$(top_srcdir)/tools/include
+OUTPUT ?= $(CURDIR)
+
export CFLAGS
export top_srcdir
diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile
index a6d30c620908..483488f8c2ad 100644
--- a/tools/testing/selftests/arm64/abi/Makefile
+++ b/tools/testing/selftests/arm64/abi/Makefile
@@ -12,4 +12,4 @@ $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S
$(OUTPUT)/tpidr2: tpidr2.c
$(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
-static -include ../../../../include/nolibc/nolibc.h \
- -ffreestanding -Wall $^ -o $@ -lgcc
+ -I../.. -ffreestanding -Wall $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 35f521e5f41c..c41640f18e4e 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -17,10 +17,16 @@
#include <asm/sigcontext.h>
#include <asm/unistd.h>
-#include "../../kselftest.h"
+#include <linux/auxvec.h>
+
+#include "kselftest.h"
#define TESTS_PER_HWCAP 3
+#ifndef AT_HWCAP3
+#define AT_HWCAP3 29
+#endif
+
/*
* Function expected to generate exception when the feature is not
* supported and return when it is supported. If the specific exception
@@ -51,7 +57,6 @@ static void cmpbr_sigill(void)
/* Not implemented, too complicated and unreliable anyway */
}
-
static void crc32_sigill(void)
{
/* CRC32W W0, W0, W1 */
@@ -165,6 +170,18 @@ static void lse128_sigill(void)
: "cc", "memory");
}
+static void lsfe_sigill(void)
+{
+ float __attribute__ ((aligned (16))) mem;
+ register float *memp asm ("x0") = &mem;
+
+ /* STFADD H0, [X0] */
+ asm volatile(".inst 0x7c20801f"
+ : "+r" (memp)
+ :
+ : "memory");
+}
+
static void lut_sigill(void)
{
/* LUTI2 V0.16B, { V0.16B }, V[0] */
@@ -759,6 +776,13 @@ static const struct hwcap_data {
.sigill_fn = lse128_sigill,
},
{
+ .name = "LSFE",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_LSFE,
+ .cpuinfo = "lsfe",
+ .sigill_fn = lsfe_sigill,
+ },
+ {
.name = "LUT",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_LUT,
@@ -1098,6 +1122,18 @@ static const struct hwcap_data {
.sigill_fn = hbc_sigill,
.sigill_reliable = true,
},
+ {
+ .name = "MTE_FAR",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_MTE_FAR,
+ .cpuinfo = "mtefar",
+ },
+ {
+ .name = "MTE_STOREONLY",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_MTE_STORE_ONLY,
+ .cpuinfo = "mtestoreonly",
+ },
};
typedef void (*sighandler_fn)(int, siginfo_t *, void *);
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
index b51d21f78cf9..0e46ac21c81d 100644
--- a/tools/testing/selftests/arm64/abi/ptrace.c
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -18,7 +18,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define EXPECTED_TESTS 11
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index 5ec9a18ec802..b67e3e26fa6d 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -16,7 +16,7 @@
#include <asm/sigcontext.h>
#include <asm/unistd.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "syscall-abi.h"
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index 285c47dd42f6..1703543fb7c7 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -3,31 +3,12 @@
#include <linux/sched.h>
#include <linux/wait.h>
+#include "kselftest.h"
+
#define SYS_TPIDR2 "S3_3_C13_C0_5"
#define EXPECTED_TESTS 5
-static void putstr(const char *str)
-{
- write(1, str, strlen(str));
-}
-
-static void putnum(unsigned int num)
-{
- char c;
-
- if (num / 10)
- putnum(num / 10);
-
- c = '0' + (num % 10);
- write(1, &c, 1);
-}
-
-static int tests_run;
-static int tests_passed;
-static int tests_failed;
-static int tests_skipped;
-
static void set_tpidr2(uint64_t val)
{
asm volatile (
@@ -50,20 +31,6 @@ static uint64_t get_tpidr2(void)
return val;
}
-static void print_summary(void)
-{
- if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
- putstr("# UNEXPECTED TEST COUNT: ");
-
- putstr("# Totals: pass:");
- putnum(tests_passed);
- putstr(" fail:");
- putnum(tests_failed);
- putstr(" xfail:0 xpass:0 skip:");
- putnum(tests_skipped);
- putstr(" error:0\n");
-}
-
/* Processes should start with TPIDR2 == 0 */
static int default_value(void)
{
@@ -105,9 +72,8 @@ static int write_fork_read(void)
if (newpid == 0) {
/* In child */
if (get_tpidr2() != oldpid) {
- putstr("# TPIDR2 changed in child: ");
- putnum(get_tpidr2());
- putstr("\n");
+ ksft_print_msg("TPIDR2 changed in child: %llx\n",
+ get_tpidr2());
exit(0);
}
@@ -115,14 +81,12 @@ static int write_fork_read(void)
if (get_tpidr2() == getpid()) {
exit(1);
} else {
- putstr("# Failed to set TPIDR2 in child\n");
+ ksft_print_msg("Failed to set TPIDR2 in child\n");
exit(0);
}
}
if (newpid < 0) {
- putstr("# fork() failed: -");
- putnum(-newpid);
- putstr("\n");
+ ksft_print_msg("fork() failed: %d\n", newpid);
return 0;
}
@@ -132,23 +96,22 @@ static int write_fork_read(void)
if (waiting < 0) {
if (errno == EINTR)
continue;
- putstr("# waitpid() failed: ");
- putnum(errno);
- putstr("\n");
+ ksft_print_msg("waitpid() failed: %d\n", errno);
return 0;
}
if (waiting != newpid) {
- putstr("# waitpid() returned wrong PID\n");
+ ksft_print_msg("waitpid() returned wrong PID: %d != %d\n",
+ waiting, newpid);
return 0;
}
if (!WIFEXITED(status)) {
- putstr("# child did not exit\n");
+ ksft_print_msg("child did not exit\n");
return 0;
}
if (getpid() != get_tpidr2()) {
- putstr("# TPIDR2 corrupted in parent\n");
+ ksft_print_msg("TPIDR2 corrupted in parent\n");
return 0;
}
@@ -169,8 +132,10 @@ static int sys_clone(unsigned long clone_flags, unsigned long newsp,
child_tidptr);
}
+#define __STACK_SIZE (8 * 1024 * 1024)
+
/*
- * If we clone with CLONE_SETTLS then the value in the parent should
+ * If we clone with CLONE_VM then the value in the parent should
* be unchanged and the child should start with zero and be able to
* set its own value.
*/
@@ -179,63 +144,65 @@ static int write_clone_read(void)
int parent_tid, child_tid;
pid_t parent, waiting;
int ret, status;
+ void *stack;
parent = getpid();
set_tpidr2(parent);
- ret = sys_clone(CLONE_SETTLS, 0, &parent_tid, 0, &child_tid);
+ stack = malloc(__STACK_SIZE);
+ if (!stack) {
+ ksft_print_msg("malloc() failed\n");
+ return 0;
+ }
+
+ ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE,
+ &parent_tid, 0, &child_tid);
if (ret == -1) {
- putstr("# clone() failed\n");
- putnum(errno);
- putstr("\n");
+ ksft_print_msg("clone() failed: %d\n", errno);
return 0;
}
if (ret == 0) {
/* In child */
if (get_tpidr2() != 0) {
- putstr("# TPIDR2 non-zero in child: ");
- putnum(get_tpidr2());
- putstr("\n");
+ ksft_print_msg("TPIDR2 non-zero in child: %llx\n",
+ get_tpidr2());
exit(0);
}
if (gettid() == 0)
- putstr("# Child TID==0\n");
+ ksft_print_msg("Child TID==0\n");
set_tpidr2(gettid());
if (get_tpidr2() == gettid()) {
exit(1);
} else {
- putstr("# Failed to set TPIDR2 in child\n");
+ ksft_print_msg("Failed to set TPIDR2 in child\n");
exit(0);
}
}
for (;;) {
- waiting = wait4(ret, &status, __WCLONE, NULL);
+ waiting = waitpid(ret, &status, __WCLONE);
if (waiting < 0) {
if (errno == EINTR)
continue;
- putstr("# wait4() failed: ");
- putnum(errno);
- putstr("\n");
+ ksft_print_msg("waitpid() failed: %d\n", errno);
return 0;
}
if (waiting != ret) {
- putstr("# wait4() returned wrong PID ");
- putnum(waiting);
- putstr("\n");
+ ksft_print_msg("waitpid() returned wrong PID %d\n",
+ waiting);
return 0;
}
if (!WIFEXITED(status)) {
- putstr("# child did not exit\n");
+ ksft_print_msg("child did not exit\n");
return 0;
}
if (parent != get_tpidr2()) {
- putstr("# TPIDR2 corrupted in parent\n");
+ ksft_print_msg("TPIDR2 corrupted in parent\n");
return 0;
}
@@ -243,35 +210,14 @@ static int write_clone_read(void)
}
}
-#define run_test(name) \
- if (name()) { \
- tests_passed++; \
- } else { \
- tests_failed++; \
- putstr("not "); \
- } \
- putstr("ok "); \
- putnum(++tests_run); \
- putstr(" " #name "\n");
-
-#define skip_test(name) \
- tests_skipped++; \
- putstr("ok "); \
- putnum(++tests_run); \
- putstr(" # SKIP " #name "\n");
-
int main(int argc, char **argv)
{
int ret;
- putstr("TAP version 13\n");
- putstr("1..");
- putnum(EXPECTED_TESTS);
- putstr("\n");
+ ksft_print_header();
+ ksft_set_plan(5);
- putstr("# PID: ");
- putnum(getpid());
- putstr("\n");
+ ksft_print_msg("PID: %d\n", getpid());
/*
* This test is run with nolibc which doesn't support hwcap and
@@ -280,23 +226,21 @@ int main(int argc, char **argv)
*/
ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
if (ret >= 0) {
- run_test(default_value);
- run_test(write_read);
- run_test(write_sleep_read);
- run_test(write_fork_read);
- run_test(write_clone_read);
+ ksft_test_result(default_value(), "default_value\n");
+ ksft_test_result(write_read(), "write_read\n");
+ ksft_test_result(write_sleep_read(), "write_sleep_read\n");
+ ksft_test_result(write_fork_read(), "write_fork_read\n");
+ ksft_test_result(write_clone_read(), "write_clone_read\n");
} else {
- putstr("# SME support not present\n");
+ ksft_print_msg("SME support not present\n");
- skip_test(default_value);
- skip_test(write_read);
- skip_test(write_sleep_read);
- skip_test(write_fork_read);
- skip_test(write_clone_read);
+ ksft_test_result_skip("default_value\n");
+ ksft_test_result_skip("write_read\n");
+ ksft_test_result_skip("write_sleep_read\n");
+ ksft_test_result_skip("write_fork_read\n");
+ ksft_test_result_skip("write_clone_read\n");
}
- print_summary();
-
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h
index 04e7b72880ef..141cdcbf0b8f 100644
--- a/tools/testing/selftests/arm64/bti/assembler.h
+++ b/tools/testing/selftests/arm64/bti/assembler.h
@@ -14,7 +14,6 @@
#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
-
.macro startfn name:req
.globl \name
\name:
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
index 4930e03a7b99..22c584b78be5 100644
--- a/tools/testing/selftests/arm64/fp/fp-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -27,7 +27,7 @@
#include <asm/sve_context.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "fp-ptrace.h"
@@ -439,10 +439,17 @@ static bool check_ptrace_values_sve(pid_t child, struct test_config *config)
pass = false;
}
- if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
- ksft_print_msg("Mismatch in SVE header size: %d != %lu\n",
- sve->size, SVE_PT_SIZE(vq, sve->flags));
- pass = false;
+ if (svcr_in & SVCR_SM) {
+ if (sve->size != sizeof(sve)) {
+ ksft_print_msg("NT_ARM_SVE reports data with PSTATE.SM\n");
+ pass = false;
+ }
+ } else {
+ if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
+ ksft_print_msg("Mismatch in SVE header size: %d != %lu\n",
+ sve->size, SVE_PT_SIZE(vq, sve->flags));
+ pass = false;
+ }
}
/* The registers might be in completely different formats! */
@@ -515,10 +522,17 @@ static bool check_ptrace_values_ssve(pid_t child, struct test_config *config)
pass = false;
}
- if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
- ksft_print_msg("Mismatch in SSVE header size: %d != %lu\n",
- sve->size, SVE_PT_SIZE(vq, sve->flags));
- pass = false;
+ if (!(svcr_in & SVCR_SM)) {
+ if (sve->size != sizeof(sve)) {
+ ksft_print_msg("NT_ARM_SSVE reports data without PSTATE.SM\n");
+ pass = false;
+ }
+ } else {
+ if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
+ ksft_print_msg("Mismatch in SSVE header size: %d != %lu\n",
+ sve->size, SVE_PT_SIZE(vq, sve->flags));
+ pass = false;
+ }
}
/* The registers might be in completely different formats! */
@@ -891,18 +905,11 @@ static void set_initial_values(struct test_config *config)
{
int vq = __sve_vq_from_vl(vl_in(config));
int sme_vq = __sve_vq_from_vl(config->sme_vl_in);
- bool sm_change;
svcr_in = config->svcr_in;
svcr_expected = config->svcr_expected;
svcr_out = 0;
- if (sme_supported() &&
- (svcr_in & SVCR_SM) != (svcr_expected & SVCR_SM))
- sm_change = true;
- else
- sm_change = false;
-
fill_random(&v_in, sizeof(v_in));
memcpy(v_expected, v_in, sizeof(v_in));
memset(v_out, 0, sizeof(v_out));
@@ -953,12 +960,7 @@ static void set_initial_values(struct test_config *config)
if (fpmr_supported()) {
fill_random(&fpmr_in, sizeof(fpmr_in));
fpmr_in &= FPMR_SAFE_BITS;
-
- /* Entering or exiting streaming mode clears FPMR */
- if (sm_change)
- fpmr_expected = 0;
- else
- fpmr_expected = fpmr_in;
+ fpmr_expected = fpmr_in;
} else {
fpmr_in = 0;
fpmr_expected = 0;
@@ -1059,11 +1061,31 @@ static bool sve_write_supported(struct test_config *config)
if (config->sme_vl_in != config->sme_vl_expected) {
return false;
}
+
+ if (!sve_supported())
+ return false;
}
return true;
}
+static bool sve_write_fpsimd_supported(struct test_config *config)
+{
+ if (!sve_supported() && !sme_supported())
+ return false;
+
+ if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA))
+ return false;
+
+ if (config->svcr_expected & SVCR_SM)
+ return false;
+
+ if (config->sme_vl_in != config->sme_vl_expected)
+ return false;
+
+ return true;
+}
+
static void fpsimd_write_expected(struct test_config *config)
{
int vl;
@@ -1132,6 +1154,9 @@ static void sve_write_expected(struct test_config *config)
int vl = vl_expected(config);
int sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+ if (!vl)
+ return;
+
fill_random(z_expected, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl)));
fill_random(p_expected, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl)));
@@ -1150,7 +1175,7 @@ static void sve_write_expected(struct test_config *config)
}
}
-static void sve_write(pid_t child, struct test_config *config)
+static void sve_write_sve(pid_t child, struct test_config *config)
{
struct user_sve_header *sve;
struct iovec iov;
@@ -1159,7 +1184,10 @@ static void sve_write(pid_t child, struct test_config *config)
vl = vl_expected(config);
vq = __sve_vq_from_vl(vl);
- iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ if (!vl)
+ return;
+
+ iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_SVE);
iov.iov_base = malloc(iov.iov_len);
if (!iov.iov_base) {
ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
@@ -1193,20 +1221,45 @@ static void sve_write(pid_t child, struct test_config *config)
free(iov.iov_base);
}
-static bool za_write_supported(struct test_config *config)
+static void sve_write_fpsimd(pid_t child, struct test_config *config)
{
- if (config->sme_vl_in != config->sme_vl_expected) {
- /* Changing the SME VL exits streaming mode. */
- if (config->svcr_expected & SVCR_SM) {
- return false;
- }
- } else {
- /* Otherwise we can't change streaming mode */
- if ((config->svcr_in & SVCR_SM) !=
- (config->svcr_expected & SVCR_SM)) {
- return false;
- }
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd;
+ struct iovec iov;
+ int ret, vl, vq;
+
+ vl = vl_expected(config);
+ vq = __sve_vq_from_vl(vl);
+
+ iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
+ iov.iov_len);
+ return;
}
+ memset(iov.iov_base, 0, iov.iov_len);
+
+ sve = iov.iov_base;
+ sve->size = iov.iov_len;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->vl = vl;
+
+ fpsimd = iov.iov_base + SVE_PT_REGS_OFFSET;
+ memcpy(&fpsimd->vregs, v_expected, sizeof(v_expected));
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_SVE, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write SVE: %s (%d)\n",
+ strerror(errno), errno);
+
+ free(iov.iov_base);
+}
+
+static bool za_write_supported(struct test_config *config)
+{
+ if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM))
+ return false;
return true;
}
@@ -1224,10 +1277,8 @@ static void za_write_expected(struct test_config *config)
memset(zt_expected, 0, sizeof(zt_expected));
}
- /* Changing the SME VL flushes ZT, SVE state and exits SM */
+ /* Changing the SME VL flushes ZT, SVE state */
if (config->sme_vl_in != config->sme_vl_expected) {
- svcr_expected &= ~SVCR_SM;
-
sve_vq = __sve_vq_from_vl(vl_expected(config));
memset(z_expected, 0, __SVE_ZREGS_SIZE(sve_vq));
memset(p_expected, 0, __SVE_PREGS_SIZE(sve_vq));
@@ -1396,7 +1447,13 @@ static struct test_definition sve_test_defs[] = {
.name = "SVE write",
.supported = sve_write_supported,
.set_expected_values = sve_write_expected,
- .modify_values = sve_write,
+ .modify_values = sve_write_sve,
+ },
+ {
+ .name = "SVE write FPSIMD format",
+ .supported = sve_write_fpsimd_supported,
+ .set_expected_values = fpsimd_write_expected,
+ .modify_values = sve_write_fpsimd,
},
};
@@ -1508,7 +1565,6 @@ static void run_sve_tests(void)
&test_config);
}
}
-
}
static void run_sme_tests(void)
@@ -1617,7 +1673,7 @@ int main(void)
* Run the test set if there is no SVE or SME, with those we
* have to pick a VL for each run.
*/
- if (!sve_supported()) {
+ if (!sve_supported() && !sme_supported()) {
test_config.sve_vl_in = 0;
test_config.sve_vl_expected = 0;
test_config.sme_vl_in = 0;
diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c
index 74e23208b94c..65e01aba96ff 100644
--- a/tools/testing/selftests/arm64/fp/fp-stress.c
+++ b/tools/testing/selftests/arm64/fp/fp-stress.c
@@ -24,7 +24,7 @@
#include <sys/wait.h>
#include <asm/hwcap.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define MAX_VLS 16
@@ -105,8 +105,8 @@ static void child_start(struct child_data *child, const char *program)
/*
* Read from the startup pipe, there should be no data
- * and we should block until it is closed. We just
- * carry on on error since this isn't super critical.
+ * and we should block until it is closed. We just
+ * carry-on on error since this isn't super critical.
*/
ret = read(3, &i, sizeof(i));
if (ret < 0)
@@ -549,7 +549,7 @@ int main(int argc, char **argv)
evs = calloc(tests, sizeof(*evs));
if (!evs)
- ksft_exit_fail_msg("Failed to allocated %d epoll events\n",
+ ksft_exit_fail_msg("Failed to allocate %d epoll events\n",
tests);
for (i = 0; i < cpus; i++) {
diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c
index 348e8bef62c7..0c40007d1282 100644
--- a/tools/testing/selftests/arm64/fp/kernel-test.c
+++ b/tools/testing/selftests/arm64/fp/kernel-test.c
@@ -46,7 +46,6 @@ static void handle_kick_signal(int sig, siginfo_t *info, void *context)
}
static char *drivers[] = {
- "crct10dif-arm64",
"sha1-ce",
"sha224-arm64",
"sha224-arm64-neon",
@@ -189,13 +188,13 @@ static bool create_socket(void)
ref = malloc(digest_len);
if (!ref) {
- printf("Failed to allocated %d byte reference\n", digest_len);
+ printf("Failed to allocate %d byte reference\n", digest_len);
return false;
}
digest = malloc(digest_len);
if (!digest) {
- printf("Failed to allocated %d byte digest\n", digest_len);
+ printf("Failed to allocate %d byte digest\n", digest_len);
return false;
}
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index a24eca7a4ecb..df0c1b6eb114 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -12,7 +12,7 @@
#include <sys/prctl.h>
#include <asm/sigcontext.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "rdvl.h"
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 577b6e05e860..28f6b996c5e2 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -19,7 +19,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_SVE
@@ -66,7 +66,7 @@ static const struct vec_type vec_types[] = {
};
#define VL_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 4)
-#define FLAG_TESTS 2
+#define FLAG_TESTS 4
#define FPSIMD_TESTS 2
#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types))
@@ -95,19 +95,27 @@ static int do_child(void)
static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
{
struct iovec iov;
+ int ret;
iov.iov_base = fpsimd;
iov.iov_len = sizeof(*fpsimd);
- return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
+ ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_GETREGSET)");
+ return ret;
}
static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
{
struct iovec iov;
+ int ret;
iov.iov_base = fpsimd;
iov.iov_len = sizeof(*fpsimd);
- return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+ ret = ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_SETREGSET)");
+ return ret;
}
static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
@@ -115,8 +123,9 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
{
struct user_sve_header *sve;
void *p;
- size_t sz = sizeof *sve;
+ size_t sz = sizeof(*sve);
struct iovec iov;
+ int ret;
while (1) {
if (*size < sz) {
@@ -132,8 +141,11 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
iov.iov_base = *buf;
iov.iov_len = sz;
- if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov))
+ ret = ptrace(PTRACE_GETREGSET, pid, type->regset, &iov);
+ if (ret) {
+ ksft_perror("ptrace(PTRACE_GETREGSET)");
goto error;
+ }
sve = *buf;
if (sve->size <= sz)
@@ -152,10 +164,46 @@ static int set_sve(pid_t pid, const struct vec_type *type,
const struct user_sve_header *sve)
{
struct iovec iov;
+ int ret;
iov.iov_base = (void *)sve;
iov.iov_len = sve->size;
- return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov);
+ ret = ptrace(PTRACE_SETREGSET, pid, type->regset, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_SETREGSET)");
+ return ret;
+}
+
+/* A read operation fails */
+static void read_fails(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header *new_sve = NULL;
+ size_t new_sve_size = 0;
+ void *ret;
+
+ ret = get_sve(child, type, (void **)&new_sve, &new_sve_size);
+
+ ksft_test_result(ret == NULL, "%s unsupported read fails\n",
+ type->name);
+
+ free(new_sve);
+}
+
+/* A write operation fails */
+static void write_fails(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ int ret;
+
+ /* Just the header, no data */
+ memset(&sve, 0, sizeof(sve));
+ sve.size = sizeof(sve);
+ sve.flags = SVE_PT_REGS_SVE;
+ sve.vl = SVE_VL_MIN;
+ ret = set_sve(child, type, &sve);
+
+ ksft_test_result(ret != 0, "%s unsupported write fails\n",
+ type->name);
}
/* Validate setting and getting the inherit flag */
@@ -170,7 +218,7 @@ static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type)
memset(&sve, 0, sizeof(sve));
sve.size = sizeof(sve);
sve.vl = sve_vl_from_vq(SVE_VQ_MIN);
- sve.flags = SVE_PT_VL_INHERIT;
+ sve.flags = SVE_PT_VL_INHERIT | SVE_PT_REGS_SVE;
ret = set_sve(child, type, &sve);
if (ret != 0) {
ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n",
@@ -235,6 +283,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type,
/* Set the VL by doing a set with no register payload */
memset(&sve, 0, sizeof(sve));
sve.size = sizeof(sve);
+ sve.flags = SVE_PT_REGS_SVE;
sve.vl = vl;
ret = set_sve(child, type, &sve);
if (ret != 0) {
@@ -253,7 +302,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type,
return;
}
- ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n",
+ ksft_test_result(new_sve->vl == prctl_vl, "Set %s VL %u\n",
type->name, vl);
free(new_sve);
@@ -269,6 +318,25 @@ static void check_u32(unsigned int vl, const char *reg,
}
}
+/* Set out of range VLs */
+static void ptrace_set_vl_ranges(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ int ret;
+
+ memset(&sve, 0, sizeof(sve));
+ sve.flags = SVE_PT_REGS_SVE;
+ sve.size = sizeof(sve);
+
+ ret = set_sve(child, type, &sve);
+ ksft_test_result(ret != 0, "%s Set invalid VL 0\n", type->name);
+
+ sve.vl = SVE_VL_MAX + SVE_VQ_BYTES;
+ ret = set_sve(child, type, &sve);
+ ksft_test_result(ret != 0, "%s Set invalid VL %d\n", type->name,
+ SVE_VL_MAX + SVE_VQ_BYTES);
+}
+
/* Access the FPSIMD registers via the SVE regset */
static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
{
@@ -301,8 +369,10 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
p[j] = j;
}
+ /* This should only succeed for SVE */
ret = set_sve(child, type, sve);
- ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n",
+ ksft_test_result((type->regset == NT_ARM_SVE) == (ret == 0),
+ "%s FPSIMD set via SVE: %d\n",
type->name, ret);
if (ret)
goto out;
@@ -324,6 +394,58 @@ out:
free(svebuf);
}
+/* Write the FPSIMD registers via the SVE regset when SVE is not supported */
+static void ptrace_sve_fpsimd_no_sve(pid_t child)
+{
+ void *svebuf;
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd, new_fpsimd;
+ unsigned int i, j;
+ unsigned char *p;
+ int ret;
+
+ svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ if (!svebuf) {
+ ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
+ return;
+ }
+
+ /* On a system without SVE the VL should be set to 0 */
+ memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ sve = svebuf;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+ sve->vl = 0;
+
+ /* Try to set a known FPSIMD state via PT_REGS_SVE */
+ fpsimd = (struct user_fpsimd_state *)((char *)sve +
+ SVE_PT_FPSIMD_OFFSET);
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&fpsimd->vregs[i];
+
+ for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j)
+ p[j] = j;
+ }
+
+ ret = set_sve(child, &vec_types[0], sve);
+ ksft_test_result(ret == 0, "FPSIMD write via SVE\n");
+ if (ret) {
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ goto out;
+ }
+
+ /* Verify via the FPSIMD regset */
+ if (get_fpsimd(child, &new_fpsimd)) {
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ goto out;
+ }
+ ksft_test_result(memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0,
+ "Verify FPSIMD write via SVE\n");
+
+out:
+ free(svebuf);
+}
+
/* Validate attempting to set SVE data and read SVE data */
static void ptrace_set_sve_get_sve_data(pid_t child,
const struct vec_type *type,
@@ -680,6 +802,20 @@ static int do_parent(pid_t child)
}
for (i = 0; i < ARRAY_SIZE(vec_types); i++) {
+ /*
+ * If the vector type isn't supported reads and writes
+ * should fail.
+ */
+ if (!(getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap)) {
+ read_fails(child, &vec_types[i]);
+ write_fails(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s unsupported read fails\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s unsupported write fails\n",
+ vec_types[i].name);
+ }
+
/* FPSIMD via SVE regset */
if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
ptrace_sve_fpsimd(child, &vec_types[i]);
@@ -700,6 +836,17 @@ static int do_parent(pid_t child)
vec_types[i].name);
}
+ /* Setting out of bounds VLs should fail */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_set_vl_ranges(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s Set invalid VL 0\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s Set invalid VL %d\n",
+ vec_types[i].name,
+ SVE_VL_MAX + SVE_VQ_BYTES);
+ }
+
/* Step through every possible VQ */
for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) {
vl = sve_vl_from_vq(vq);
@@ -731,6 +878,15 @@ static int do_parent(pid_t child)
}
}
+ /* We support SVE writes of FPSMID format on SME only systems */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE) &&
+ (getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+ ptrace_sve_fpsimd_no_sve(child);
+ } else {
+ ksft_test_result_skip("FPSIMD write via SVE\n");
+ ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+ }
+
ret = EXIT_SUCCESS;
error:
@@ -750,9 +906,6 @@ int main(void)
ksft_print_header();
ksft_set_plan(EXPECTED_TESTS);
- if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
- ksft_exit_skip("SVE not available\n");
-
child = fork();
if (!child)
return do_child();
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index ea9c7d47790f..8dd932fdcdc4 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -19,7 +19,7 @@
#include <asm/sigcontext.h>
#include <asm/hwcap.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "rdvl.h"
#define ARCH_MIN_VL SVE_VL_MIN
@@ -690,7 +690,6 @@ static inline void smstop(void)
asm volatile("msr S0_3_C4_C6_3, xzr");
}
-
/*
* Verify we can change the SVE vector length while SME is active and
* continue to use SME afterwards.
diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c
index 08c777f87ea2..787eed22d059 100644
--- a/tools/testing/selftests/arm64/fp/za-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/za-ptrace.c
@@ -18,7 +18,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_ZA
diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c
index 584b8d59b7ea..f3fa49fd0fbd 100644
--- a/tools/testing/selftests/arm64/fp/zt-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c
@@ -18,7 +18,7 @@
#include <asm/sigcontext.h>
#include <asm/ptrace.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
#ifndef NT_ARM_ZA
@@ -108,7 +108,6 @@ static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES])
return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov);
}
-
static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES])
{
struct iovec iov;
diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S
index 38080f3c3280..a8df05771670 100644
--- a/tools/testing/selftests/arm64/fp/zt-test.S
+++ b/tools/testing/selftests/arm64/fp/zt-test.S
@@ -276,7 +276,7 @@ function barf
bl putdec
puts ", iteration="
mov x0, x22
- bl putdec
+ bl putdecn
puts "\tExpected ["
mov x0, x10
mov x1, x12
diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile
index d2f3497a9103..1fbbf0ca1f02 100644
--- a/tools/testing/selftests/arm64/gcs/Makefile
+++ b/tools/testing/selftests/arm64/gcs/Makefile
@@ -14,11 +14,11 @@ LDLIBS+=-lpthread
include ../../lib.mk
$(OUTPUT)/basic-gcs: basic-gcs.c
- $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
- -static -include ../../../../include/nolibc/nolibc.h \
+ $(CC) $(CFLAGS) -fno-asynchronous-unwind-tables -fno-ident -s -nostdlib -nostdinc \
+ -static -I../../../../include/nolibc -include ../../../../include/nolibc/nolibc.h \
-I../../../../../usr/include \
-std=gnu99 -I../.. -g \
- -ffreestanding -Wall $^ -o $@ -lgcc
+ -ffreestanding $^ -o $@ -lgcc
$(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S
$(CC) -nostdlib $^ -o $@
diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c
index 3fb9742342a3..250977abc398 100644
--- a/tools/testing/selftests/arm64/gcs/basic-gcs.c
+++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c
@@ -10,6 +10,7 @@
#include <sys/mman.h>
#include <asm/mman.h>
+#include <asm/hwcap.h>
#include <linux/sched.h>
#include "kselftest.h"
@@ -298,6 +299,68 @@ out:
return pass;
}
+/* A vfork()ed process can run and exit */
+static bool test_vfork(void)
+{
+ unsigned long child_mode;
+ int ret, status;
+ pid_t pid;
+ bool pass = true;
+
+ pid = vfork();
+ if (pid == -1) {
+ ksft_print_msg("vfork() failed: %d\n", errno);
+ pass = false;
+ goto out;
+ }
+ if (pid == 0) {
+ /*
+ * In child, make sure we can call a function, read
+ * the GCS pointer and status and then exit.
+ */
+ valid_gcs_function();
+ get_gcspr();
+
+ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
+ &child_mode, 0, 0, 0);
+ if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) {
+ ksft_print_msg("GCS not enabled in child\n");
+ ret = EXIT_FAILURE;
+ }
+
+ _exit(ret);
+ }
+
+ /*
+ * In parent, check we can still do function calls then check
+ * on the child.
+ */
+ valid_gcs_function();
+
+ ksft_print_msg("Waiting for child %d\n", pid);
+
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ ksft_print_msg("Failed to wait for child: %d\n",
+ errno);
+ return false;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("Child exited due to signal %d\n",
+ WTERMSIG(status));
+ pass = false;
+ } else if (WEXITSTATUS(status)) {
+ ksft_print_msg("Child exited with status %d\n",
+ WEXITSTATUS(status));
+ pass = false;
+ }
+
+out:
+
+ return pass;
+}
+
typedef bool (*gcs_test)(void);
static struct {
@@ -314,6 +377,7 @@ static struct {
{ "enable_invalid", enable_invalid, true },
{ "map_guarded_stack", map_guarded_stack },
{ "fork", test_fork },
+ { "vfork", test_vfork },
};
int main(void)
@@ -323,14 +387,13 @@ int main(void)
ksft_print_header();
- /*
- * We don't have getauxval() with nolibc so treat a failure to
- * read GCS state as a lack of support and skip.
- */
+ if (!(getauxval(AT_HWCAP) & HWCAP_GCS))
+ ksft_exit_skip("SKIP GCS not supported\n");
+
ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
&gcs_mode, 0, 0, 0);
if (ret != 0)
- ksft_exit_skip("Failed to read GCS state: %d\n", ret);
+ ksft_exit_fail_msg("Failed to read GCS state: %d\n", ret);
if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) {
gcs_mode = PR_SHADOW_STACK_ENABLE;
@@ -347,7 +410,7 @@ int main(void)
}
/* One last test: disable GCS, we can do this one time */
- my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
+ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
if (ret != 0)
ksft_print_msg("Failed to disable GCS: %d\n", ret);
diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c
index 989f75a491b7..1e6abb136ffd 100644
--- a/tools/testing/selftests/arm64/gcs/gcs-locking.c
+++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c
@@ -165,7 +165,6 @@ TEST_F(valid_modes, lock_enable_disable_others)
ASSERT_EQ(ret, 0);
ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES);
-
ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
variant->mode);
ASSERT_EQ(ret, 0);
diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c
index bbc7f4950c13..86d8cd42aee7 100644
--- a/tools/testing/selftests/arm64/gcs/gcs-stress.c
+++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c
@@ -24,7 +24,7 @@
#include <sys/wait.h>
#include <asm/hwcap.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
struct child_data {
char *name, *output;
@@ -433,7 +433,7 @@ int main(int argc, char **argv)
evs = calloc(tests, sizeof(*evs));
if (!evs)
- ksft_exit_fail_msg("Failed to allocated %d epoll events\n",
+ ksft_exit_fail_msg("Failed to allocate %d epoll events\n",
tests);
for (i = 0; i < gcs_threads; i++)
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
index 2ee7f114d7fa..ff4e07503349 100644
--- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -31,7 +31,7 @@ static int check_buffer_by_byte(int mem_type, int mode)
int i, j, item;
bool err;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
@@ -68,7 +68,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode,
bool err;
char *und_ptr = NULL;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
@@ -164,7 +164,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode,
size_t tagged_size, overflow_size;
char *over_ptr = NULL;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
for (i = 0; i < item; i++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
@@ -337,7 +337,7 @@ static int check_buffer_by_block(int mem_type, int mode)
{
int i, item, result = KSFT_PASS;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
item = ARRAY_SIZE(sizes);
cur_mte_cxt.fault_valid = false;
for (i = 0; i < item; i++) {
@@ -368,7 +368,7 @@ static int check_memory_initial_tags(int mem_type, int mode, int mapping)
int run, fd;
int total = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
/* check initial tags for anonymous mmap */
ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
@@ -415,7 +415,7 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(20);
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
index 7597fc632cad..5e97ee792e4d 100644
--- a/tools/testing/selftests/arm64/mte/check_child_memory.c
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -88,7 +88,7 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping)
int item = ARRAY_SIZE(sizes);
item = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < item; run++) {
ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
UNDERFLOW, OVERFLOW);
@@ -109,7 +109,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
int run, fd, map_size, result = KSFT_PASS;
int total = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
fd = create_temp_file();
if (fd == -1)
@@ -160,8 +160,8 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
- mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(12);
diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
index 303260a6dc65..aad1234c7e0f 100644
--- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
+++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
@@ -151,7 +151,7 @@ static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int
map_size = default_huge_page_size();
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
@@ -180,7 +180,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
unsigned long map_size;
prot_flag = PROT_READ | PROT_WRITE;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
map_size = default_huge_page_size();
map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping,
0, 0);
@@ -210,7 +210,7 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin
map_size = default_huge_page_size();
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping,
0, 0);
if (check_allocated_memory_range(ptr, map_size, mem_type,
@@ -227,14 +227,16 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin
int main(int argc, char *argv[])
{
int err;
+ void *map_ptr;
+ unsigned long map_size;
err = mte_default_setup();
if (err)
return err;
/* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
allocate_hugetlb();
@@ -243,6 +245,15 @@ int main(int argc, char *argv[])
return KSFT_FAIL;
}
+ /* Check if MTE supports hugetlb mappings */
+ map_size = default_huge_page_size();
+ map_ptr = mmap(NULL, map_size, PROT_READ | PROT_MTE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+ if (map_ptr == MAP_FAILED)
+ ksft_exit_skip("PROT_MTE not supported with MAP_HUGETLB mappings\n");
+ else
+ munmap(map_ptr, map_size);
+
/* Set test plan */
ksft_set_plan(12);
@@ -270,13 +281,13 @@ int main(int argc, char *argv[])
"Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap memory\n");
+ "Check child hugetlb memory with private mapping, sync error mode and mmap memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap memory\n");
+ "Check child hugetlb memory with private mapping, async error mode and mmap memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n");
+ "Check child hugetlb memory with private mapping, sync error mode and mmap/mprotect memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n");
+ "Check child hugetlb memory with private mapping, async error mode and mmap/mprotect memory\n");
mte_restore_setup();
free_hugetlb();
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
index 88c74bc46d4f..0cf5faef1724 100644
--- a/tools/testing/selftests/arm64/mte/check_ksm_options.c
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -106,7 +106,7 @@ static int check_madvise_options(int mem_type, int mode, int mapping)
return err;
}
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
@@ -141,8 +141,8 @@ int main(int argc, char *argv[])
return KSFT_FAIL;
}
/* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler, false);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(4);
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
index 17694caaff53..c100af3012cb 100644
--- a/tools/testing/selftests/arm64/mte/check_mmap_options.c
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
+#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
@@ -23,6 +24,35 @@
#define OVERFLOW MT_GRANULE_SIZE
#define TAG_CHECK_ON 0
#define TAG_CHECK_OFF 1
+#define ATAG_CHECK_ON 1
+#define ATAG_CHECK_OFF 0
+
+#define TEST_NAME_MAX 256
+
+enum mte_mem_check_type {
+ CHECK_ANON_MEM = 0,
+ CHECK_FILE_MEM = 1,
+ CHECK_CLEAR_PROT_MTE = 2,
+};
+
+enum mte_tag_op_type {
+ TAG_OP_ALL = 0,
+ TAG_OP_STONLY = 1,
+};
+
+struct check_mmap_testcase {
+ int check_type;
+ int mem_type;
+ int mte_sync;
+ int mapping;
+ int tag_check;
+ int atag_check;
+ int tag_op;
+ bool enable_tco;
+};
+
+#define TAG_OP_ALL 0
+#define TAG_OP_STONLY 1
static size_t page_size;
static int sizes[] = {
@@ -30,8 +60,17 @@ static int sizes[] = {
/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
};
-static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+static int check_mte_memory(char *ptr, int size, int mode,
+ int tag_check,int atag_check, int tag_op)
{
+ char buf[MT_GRANULE_SIZE];
+
+ if (!mtefar_support && atag_check == ATAG_CHECK_ON)
+ return KSFT_SKIP;
+
+ if (atag_check == ATAG_CHECK_ON)
+ ptr = mte_insert_atag(ptr);
+
mte_initialize_current_context(mode, (uintptr_t)ptr, size);
memset(ptr, '1', size);
mte_wait_after_trig();
@@ -54,16 +93,34 @@ static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
return KSFT_FAIL;
+ if (tag_op == TAG_OP_STONLY) {
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memcpy(buf, ptr - UNDERFLOW, MT_GRANULE_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memcpy(buf, ptr + size, MT_GRANULE_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+ }
+
return KSFT_PASS;
}
-static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping,
+ int tag_check, int atag_check, int tag_op)
{
char *ptr, *map_ptr;
int run, result, map_size;
int item = ARRAY_SIZE(sizes);
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (tag_op == TAG_OP_STONLY && !mtestonly_support)
+ return KSFT_SKIP;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op);
for (run = 0; run < item; run++) {
map_size = sizes[run] + OVERFLOW + UNDERFLOW;
map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
@@ -79,23 +136,27 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i
munmap((void *)map_ptr, map_size);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op);
mte_clear_tags((void *)ptr, sizes[run]);
mte_free_memory((void *)map_ptr, map_size, mem_type, false);
- if (result == KSFT_FAIL)
- return KSFT_FAIL;
+ if (result != KSFT_PASS)
+ return result;
}
return KSFT_PASS;
}
-static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+static int check_file_memory_mapping(int mem_type, int mode, int mapping,
+ int tag_check, int atag_check, int tag_op)
{
char *ptr, *map_ptr;
int run, fd, map_size;
int total = ARRAY_SIZE(sizes);
int result = KSFT_PASS;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (tag_op == TAG_OP_STONLY && !mtestonly_support)
+ return KSFT_SKIP;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op);
for (run = 0; run < total; run++) {
fd = create_temp_file();
if (fd == -1)
@@ -117,24 +178,24 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta
close(fd);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op);
mte_clear_tags((void *)ptr, sizes[run]);
munmap((void *)map_ptr, map_size);
close(fd);
- if (result == KSFT_FAIL)
- break;
+ if (result != KSFT_PASS)
+ return result;
}
- return result;
+ return KSFT_PASS;
}
-static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int atag_check)
{
char *ptr, *map_ptr;
int run, prot_flag, result, fd, map_size;
int total = ARRAY_SIZE(sizes);
prot_flag = PROT_READ | PROT_WRITE;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
for (run = 0; run < total; run++) {
map_size = sizes[run] + OVERFLOW + UNDERFLOW;
ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
@@ -150,10 +211,10 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL);
mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
if (result != KSFT_PASS)
- return KSFT_FAIL;
+ return result;
fd = create_temp_file();
if (fd == -1)
@@ -174,19 +235,715 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
close(fd);
return KSFT_FAIL;
}
- result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL);
mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
close(fd);
if (result != KSFT_PASS)
- return KSFT_FAIL;
+ return result;
}
return KSFT_PASS;
}
+const char *format_test_name(struct check_mmap_testcase *tc)
+{
+ static char test_name[TEST_NAME_MAX];
+ const char *check_type_str;
+ const char *mem_type_str;
+ const char *sync_str;
+ const char *mapping_str;
+ const char *tag_check_str;
+ const char *atag_check_str;
+ const char *tag_op_str;
+
+ switch (tc->check_type) {
+ case CHECK_ANON_MEM:
+ check_type_str = "anonymous memory";
+ break;
+ case CHECK_FILE_MEM:
+ check_type_str = "file memory";
+ break;
+ case CHECK_CLEAR_PROT_MTE:
+ check_type_str = "clear PROT_MTE flags";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mem_type) {
+ case USE_MMAP:
+ mem_type_str = "mmap";
+ break;
+ case USE_MPROTECT:
+ mem_type_str = "mmap/mprotect";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mte_sync) {
+ case MTE_NONE_ERR:
+ sync_str = "no error";
+ break;
+ case MTE_SYNC_ERR:
+ sync_str = "sync error";
+ break;
+ case MTE_ASYNC_ERR:
+ sync_str = "async error";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->mapping) {
+ case MAP_SHARED:
+ mapping_str = "shared";
+ break;
+ case MAP_PRIVATE:
+ mapping_str = "private";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->tag_check) {
+ case TAG_CHECK_ON:
+ tag_check_str = "tag check on";
+ break;
+ case TAG_CHECK_OFF:
+ tag_check_str = "tag check off";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (tc->atag_check) {
+ case ATAG_CHECK_ON:
+ atag_check_str = "with address tag [63:60]";
+ break;
+ case ATAG_CHECK_OFF:
+ atag_check_str = "without address tag [63:60]";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(test_name, sizeof(test_name),
+ "Check %s with %s mapping, %s mode, %s memory and %s (%s)\n",
+ check_type_str, mapping_str, sync_str, mem_type_str,
+ tag_check_str, atag_check_str);
+
+ switch (tc->tag_op) {
+ case TAG_OP_ALL:
+ tag_op_str = "";
+ break;
+ case TAG_OP_STONLY:
+ tag_op_str = " / store-only";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(test_name, TEST_NAME_MAX,
+ "Check %s with %s mapping, %s mode, %s memory and %s (%s%s)\n",
+ check_type_str, mapping_str, sync_str, mem_type_str,
+ tag_check_str, atag_check_str, tag_op_str);
+
+ return test_name;
+}
+
int main(int argc, char *argv[])
{
- int err;
+ int err, i;
int item = ARRAY_SIZE(sizes);
+ struct check_mmap_testcase test_cases[]= {
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = true,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = true,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_NONE_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_NONE_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_OFF,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_OFF,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_ANON_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_SHARED,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_FILE_MEM,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_ASYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_STONLY,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MMAP,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ {
+ .check_type = CHECK_CLEAR_PROT_MTE,
+ .mem_type = USE_MPROTECT,
+ .mte_sync = MTE_SYNC_ERR,
+ .mapping = MAP_PRIVATE,
+ .tag_check = TAG_CHECK_ON,
+ .atag_check = ATAG_CHECK_ON,
+ .tag_op = TAG_OP_ALL,
+ .enable_tco = false,
+ },
+ };
err = mte_default_setup();
if (err)
@@ -200,64 +957,51 @@ int main(int argc, char *argv[])
sizes[item - 2] = page_size;
sizes[item - 1] = page_size + 1;
- /* Register signal handlers */
- mte_register_signal(SIGBUS, mte_default_handler);
- mte_register_signal(SIGSEGV, mte_default_handler);
-
/* Set test plan */
- ksft_set_plan(22);
-
- mte_enable_pstate_tco();
-
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
-
- mte_disable_pstate_tco();
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
- "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
-
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
-
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
- "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
- evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
- "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
-
- evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
- evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
- "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+ ksft_set_plan(ARRAY_SIZE(test_cases));
+
+ for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) {
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler,
+ test_cases[i].atag_check == ATAG_CHECK_ON);
+ mte_register_signal(SIGSEGV, mte_default_handler,
+ test_cases[i].atag_check == ATAG_CHECK_ON);
+
+ if (test_cases[i].enable_tco)
+ mte_enable_pstate_tco();
+ else
+ mte_disable_pstate_tco();
+
+ switch (test_cases[i].check_type) {
+ case CHECK_ANON_MEM:
+ evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].tag_check,
+ test_cases[i].atag_check,
+ test_cases[i].tag_op),
+ format_test_name(&test_cases[i]));
+ break;
+ case CHECK_FILE_MEM:
+ evaluate_test(check_file_memory_mapping(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].tag_check,
+ test_cases[i].atag_check,
+ test_cases[i].tag_op),
+ format_test_name(&test_cases[i]));
+ break;
+ case CHECK_CLEAR_PROT_MTE:
+ evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type,
+ test_cases[i].mte_sync,
+ test_cases[i].mapping,
+ test_cases[i].atag_check),
+ format_test_name(&test_cases[i]));
+ break;
+ default:
+ exit(KSFT_FAIL);
+ }
+ }
mte_restore_setup();
ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c
index 4c89e9538ca0..f7f320defa7b 100644
--- a/tools/testing/selftests/arm64/mte/check_prctl.c
+++ b/tools/testing/selftests/arm64/mte/check_prctl.c
@@ -12,6 +12,10 @@
#include "kselftest.h"
+#ifndef AT_HWCAP3
+#define AT_HWCAP3 29
+#endif
+
static int set_tagged_addr_ctrl(int val)
{
int ret;
@@ -60,7 +64,7 @@ void check_basic_read(void)
/*
* Attempt to set a specified combination of modes.
*/
-void set_mode_test(const char *name, int hwcap2, int mask)
+void set_mode_test(const char *name, int hwcap2, int hwcap3, int mask)
{
int ret;
@@ -69,6 +73,11 @@ void set_mode_test(const char *name, int hwcap2, int mask)
return;
}
+ if ((getauxval(AT_HWCAP3) & hwcap3) != hwcap3) {
+ ksft_test_result_skip("%s\n", name);
+ return;
+ }
+
ret = set_tagged_addr_ctrl(mask);
if (ret < 0) {
ksft_test_result_fail("%s\n", name);
@@ -81,7 +90,7 @@ void set_mode_test(const char *name, int hwcap2, int mask)
return;
}
- if ((ret & PR_MTE_TCF_MASK) == mask) {
+ if ((ret & (PR_MTE_TCF_MASK | PR_MTE_STORE_ONLY)) == mask) {
ksft_test_result_pass("%s\n", name);
} else {
ksft_print_msg("Got %x, expected %x\n",
@@ -93,12 +102,16 @@ void set_mode_test(const char *name, int hwcap2, int mask)
struct mte_mode {
int mask;
int hwcap2;
+ int hwcap3;
const char *name;
} mte_modes[] = {
- { PR_MTE_TCF_NONE, 0, "NONE" },
- { PR_MTE_TCF_SYNC, HWCAP2_MTE, "SYNC" },
- { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" },
- { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, "SYNC+ASYNC" },
+ { PR_MTE_TCF_NONE, 0, 0, "NONE" },
+ { PR_MTE_TCF_SYNC, HWCAP2_MTE, 0, "SYNC" },
+ { PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "SYNC+ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+STONLY" },
+ { PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "ASYNC+STONLY" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+ASYNC+STONLY" },
};
int main(void)
@@ -106,11 +119,11 @@ int main(void)
int i;
ksft_print_header();
- ksft_set_plan(5);
+ ksft_set_plan(ARRAY_SIZE(mte_modes));
check_basic_read();
for (i = 0; i < ARRAY_SIZE(mte_modes); i++)
- set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2,
+ set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, mte_modes[i].hwcap3,
mte_modes[i].mask);
ksft_print_cnts();
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
index a3d1e23fe02a..4b764f2a8185 100644
--- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -57,7 +57,7 @@ static int check_single_included_tags(int mem_type, int mode)
return KSFT_FAIL;
for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
- ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag), false);
if (ret != 0)
result = KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
@@ -91,7 +91,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
excl_mask |= 1 << tag;
- mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask), false);
/* Try to catch a excluded tag by a number of tries. */
for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
ptr = mte_insert_tags(ptr, BUFFER_SIZE);
@@ -120,7 +120,7 @@ static int check_all_included_tags(int mem_type, int mode)
mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK, false);
if (ret != 0)
return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
@@ -145,7 +145,7 @@ static int check_none_included_tags(int mem_type, int mode)
if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
return KSFT_FAIL;
- ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK, false);
if (ret != 0)
return KSFT_FAIL;
/* Try to catch a excluded tag by a number of tries. */
@@ -180,7 +180,7 @@ int main(int argc, char *argv[])
return err;
/* Register SIGSEGV handler */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(4);
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index f4ae5f87a3b7..fb7936c4e097 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -44,7 +44,7 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
err = KSFT_PASS;
len = 2 * page_sz;
- mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false);
fd = create_temp_file();
if (fd == -1)
return KSFT_FAIL;
@@ -211,7 +211,7 @@ int main(int argc, char *argv[])
return err;
/* Register signal handlers */
- mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler, false);
/* Set test plan */
ksft_set_plan(64);
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index a1dc2fe5285b..397e57dd946a 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -6,6 +6,7 @@
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
+#include <time.h>
#include <unistd.h>
#include <linux/auxvec.h>
@@ -19,20 +20,40 @@
#include "mte_common_util.h"
#include "mte_def.h"
+#ifndef SA_EXPOSE_TAGBITS
+#define SA_EXPOSE_TAGBITS 0x00000800
+#endif
+
#define INIT_BUFFER_SIZE 256
struct mte_fault_cxt cur_mte_cxt;
+bool mtefar_support;
+bool mtestonly_support;
static unsigned int mte_cur_mode;
static unsigned int mte_cur_pstate_tco;
+static bool mte_cur_stonly;
void mte_default_handler(int signum, siginfo_t *si, void *uc)
{
+ struct sigaction sa;
unsigned long addr = (unsigned long)si->si_addr;
+ unsigned char si_tag, si_atag;
+
+ sigaction(signum, NULL, &sa);
+
+ if (sa.sa_flags & SA_EXPOSE_TAGBITS) {
+ si_tag = MT_FETCH_TAG(addr);
+ si_atag = MT_FETCH_ATAG(addr);
+ addr = MT_CLEAR_TAGS(addr);
+ } else {
+ si_tag = 0;
+ si_atag = 0;
+ }
if (signum == SIGSEGV) {
#ifdef DEBUG
- ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
- ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+ ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx, si_tag=%x, si_atag=%x\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code, si_tag, si_atag);
#endif
if (si->si_code == SEGV_MTEAERR) {
if (cur_mte_cxt.trig_si_code == si->si_code)
@@ -45,13 +66,18 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
}
/* Compare the context for precise error */
else if (si->si_code == SEGV_MTESERR) {
+ if ((!mtefar_support && si_atag) || (si_atag != MT_FETCH_ATAG(cur_mte_cxt.trig_addr))) {
+ ksft_print_msg("Invalid MTE synchronous exception caught for address tag! si_tag=%x, si_atag: %x\n", si_tag, si_atag);
+ exit(KSFT_FAIL);
+ }
+
if (cur_mte_cxt.trig_si_code == si->si_code &&
((cur_mte_cxt.trig_range >= 0 &&
- addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
(cur_mte_cxt.trig_range < 0 &&
- addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+ addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
cur_mte_cxt.fault_valid = true;
/* Adjust the pc by 4 */
((ucontext_t *)uc)->uc_mcontext.pc += 4;
@@ -67,11 +93,11 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n",
((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
if ((cur_mte_cxt.trig_range >= 0 &&
- addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
(cur_mte_cxt.trig_range < 0 &&
- addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
- addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+ addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
cur_mte_cxt.fault_valid = true;
/* Adjust the pc by 4 */
((ucontext_t *)uc)->uc_mcontext.pc += 4;
@@ -79,12 +105,17 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
}
}
-void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *),
+ bool export_tags)
{
struct sigaction sa;
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO;
+
+ if (export_tags && signal == SIGSEGV)
+ sa.sa_flags |= SA_EXPOSE_TAGBITS;
+
sigemptyset(&sa.sa_mask);
sigaction(signal, &sa, NULL);
}
@@ -120,6 +151,19 @@ void mte_clear_tags(void *ptr, size_t size)
mte_clear_tag_address_range(ptr, size);
}
+void *mte_insert_atag(void *ptr)
+{
+ unsigned char atag;
+
+ atag = mtefar_support ? (random() % MT_ATAG_MASK) + 1 : 0;
+ return (void *)MT_SET_ATAG((unsigned long)ptr, atag);
+}
+
+void *mte_clear_atag(void *ptr)
+{
+ return (void *)MT_CLEAR_ATAG((unsigned long)ptr);
+}
+
static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
size_t range_before, size_t range_after,
bool tags, int fd)
@@ -272,7 +316,7 @@ void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
cur_mte_cxt.trig_si_code = 0;
}
-int mte_switch_mode(int mte_option, unsigned long incl_mask)
+int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly)
{
unsigned long en = 0;
@@ -304,6 +348,9 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
break;
}
+ if (mtestonly_support && stonly)
+ en |= PR_MTE_STORE_ONLY;
+
en |= (incl_mask << PR_MTE_TAG_SHIFT);
/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) {
@@ -316,12 +363,21 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
int mte_default_setup(void)
{
unsigned long hwcaps2 = getauxval(AT_HWCAP2);
+ unsigned long hwcaps3 = getauxval(AT_HWCAP3);
unsigned long en = 0;
int ret;
+ /* To generate random address tag */
+ srandom(time(NULL));
+
if (!(hwcaps2 & HWCAP2_MTE))
ksft_exit_skip("MTE features unavailable\n");
+ mtefar_support = !!(hwcaps3 & HWCAP3_MTE_FAR);
+
+ if (hwcaps3 & HWCAP3_MTE_STORE_ONLY)
+ mtestonly_support = true;
+
/* Get current mte mode */
ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
if (ret < 0) {
@@ -335,6 +391,8 @@ int mte_default_setup(void)
else if (ret & PR_MTE_TCF_NONE)
mte_cur_mode = MTE_NONE_ERR;
+ mte_cur_stonly = (ret & PR_MTE_STORE_ONLY) ? true : false;
+
mte_cur_pstate_tco = mte_get_pstate_tco();
/* Disable PSTATE.TCO */
mte_disable_pstate_tco();
@@ -343,7 +401,7 @@ int mte_default_setup(void)
void mte_restore_setup(void)
{
- mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+ mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG, mte_cur_stonly);
if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
mte_enable_pstate_tco();
else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
index a0017a303beb..250d671329a5 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.h
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -37,10 +37,13 @@ struct mte_fault_cxt {
};
extern struct mte_fault_cxt cur_mte_cxt;
+extern bool mtefar_support;
+extern bool mtestonly_support;
/* MTE utility functions */
void mte_default_handler(int signum, siginfo_t *si, void *uc);
-void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *),
+ bool export_tags);
void mte_wait_after_trig(void);
void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
@@ -54,9 +57,11 @@ void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
size_t range_before, size_t range_after);
void *mte_insert_tags(void *ptr, size_t size);
void mte_clear_tags(void *ptr, size_t size);
+void *mte_insert_atag(void *ptr);
+void *mte_clear_atag(void *ptr);
int mte_default_setup(void);
void mte_restore_setup(void);
-int mte_switch_mode(int mte_option, unsigned long incl_mask);
+int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly);
void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
/* Common utility functions */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
index 9b188254b61a..6ad22f07c9b8 100644
--- a/tools/testing/selftests/arm64/mte/mte_def.h
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -42,6 +42,8 @@
#define MT_TAG_COUNT 16
#define MT_INCLUDE_TAG_MASK 0xFFFF
#define MT_EXCLUDE_TAG_MASK 0x0
+#define MT_ATAG_SHIFT 60
+#define MT_ATAG_MASK 0xFUL
#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1)
#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
@@ -49,6 +51,12 @@
#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+#define MT_CLEAR_ATAG(x) ((x) & ~(MT_TAG_MASK << MT_ATAG_SHIFT))
+#define MT_SET_ATAG(x, y) ((x) | (((y) & MT_ATAG_MASK) << MT_ATAG_SHIFT))
+#define MT_FETCH_ATAG(x) ((x >> MT_ATAG_SHIFT) & (MT_ATAG_MASK))
+
+#define MT_CLEAR_TAGS(x) (MT_CLEAR_ATAG(MT_CLEAR_TAG(x)))
+
#define MT_PSTATE_TCO_SHIFT 25
#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT)
#define MT_PSTATE_TCO_EN 1
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
index 4435600ca400..e597861b26d6 100644
--- a/tools/testing/selftests/arm64/pauth/exec_target.c
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -13,7 +13,12 @@ int main(void)
unsigned long hwcaps;
size_t val;
- fread(&val, sizeof(size_t), 1, stdin);
+ size_t size = fread(&val, sizeof(size_t), 1, stdin);
+
+ if (size != 1) {
+ fprintf(stderr, "Could not read input from stdin\n");
+ return EXIT_FAILURE;
+ }
/* don't try to execute illegal (unimplemented) instructions) caller
* should have checked this and keep worker simple
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index 6d21b2fc758d..67d138057707 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -10,7 +10,7 @@
#include <setjmp.h>
#include <sched.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "helper.h"
#define PAC_COLLISION_ATTEMPTS 1000
diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c
index 8ae26e496c89..375ab47f0edb 100644
--- a/tools/testing/selftests/arm64/tags/tags_test.c
+++ b/tools/testing/selftests/arm64/tags/tags_test.c
@@ -6,7 +6,7 @@
#include <stdint.h>
#include <sys/prctl.h>
#include <sys/utsname.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define SHIFT_TAG(tag) ((uint64_t)(tag) << 56)
#define SET_TAG(ptr, tag) (((uint64_t)(ptr) & ~SHIFT_TAG(0xff)) | \
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index e2a2c46c008b..19c1638e312a 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -21,9 +21,9 @@ test_lirc_mode2_user
flow_dissector_load
test_tcpnotify_user
test_libbpf
-test_sysctl
xdping
test_cpp
+test_progs_verification_cert
*.d
*.subskel.h
*.skel.h
@@ -33,7 +33,6 @@ test_cpp
/cpuv4
/host-tools
/tools
-/runqslower
/bench
/veristat
/sign-file
@@ -45,3 +44,4 @@ xdp_redirect_multi
xdp_synproxy
xdp_hw_metadata
xdp_features
+verification_cert.h
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
deleted file mode 100644
index 901349da680f..000000000000
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ /dev/null
@@ -1,12 +0,0 @@
-bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-kprobe_multi_bench_attach # needs CONFIG_FPROBE
-kprobe_multi_test # needs CONFIG_FPROBE
-module_attach # prog 'kprobe_multi': failed to auto-attach: -95
-fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
-fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
-tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
-fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 3ebd77206f98..a17baf8c6fd7 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -2,4 +2,3 @@
# Alphabetical order
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
-verifier_iterating_callbacks
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 87551628e112..b7030a6e2e76 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -34,6 +34,9 @@ OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0)
LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
+SKIP_DOCS ?=
+SKIP_LLVM ?=
+
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -43,6 +46,7 @@ endif
CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11 \
-Wall -Werror -fno-omit-frame-pointer \
+ -Wno-unused-but-set-variable \
$(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT)
@@ -70,7 +74,7 @@ endif
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \
test_sockmap \
- test_tcpnotify_user test_sysctl \
+ test_tcpnotify_user \
test_progs-no_alu32
TEST_INST_SUBDIRS := no_alu32
@@ -95,52 +99,42 @@ TEST_GEN_PROGS += test_progs-cpuv4
TEST_INST_SUBDIRS += cpuv4
endif
-TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
- test_xdp_redirect_multi.sh \
- test_tunnel.sh \
- test_lwt_seg6local.sh \
test_lirc_mode2.sh \
- test_xdp_vlan_mode_generic.sh \
- test_xdp_vlan_mode_native.sh \
- test_lwt_ip_encap.sh \
- test_tc_tunnel.sh \
- test_tc_edt.sh \
test_xdping.sh \
test_bpftool_build.sh \
test_bpftool.sh \
+ test_bpftool_map.sh \
test_bpftool_metadata.sh \
test_doc_build.sh \
test_xsk.sh \
test_xdp_features.sh
-TEST_PROGS_EXTENDED := with_addr.sh \
- with_tunnels.sh ima_setup.sh verify_sig_setup.sh \
- test_xdp_vlan.sh test_bpftool.py
+TEST_PROGS_EXTENDED := \
+ ima_setup.sh verify_sig_setup.sh \
+ test_bpftool.py
TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
- bpf_test_modorder_y.ko
+ bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = \
bench \
flow_dissector_load \
- runqslower \
test_cpp \
test_lirc_mode2_user \
veristat \
xdp_features \
xdp_hw_metadata \
- xdp_redirect_multi \
xdp_synproxy \
xdping \
xskxceiver
-TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
+TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi
ifneq ($(V),1)
submake_extras := feature_display=0
@@ -179,16 +173,23 @@ override OUTPUT := $(patsubst %/,%,$(OUTPUT))
endif
endif
+ifneq ($(SKIP_LLVM),1)
ifeq ($(feature-llvm),1)
LLVM_CFLAGS += -DHAVE_LLVM_SUPPORT
LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
# both llvm-config and lib.mk add -D_GNU_SOURCE, which ends up as conflict
LLVM_CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags))
- LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
- LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
- LLVM_LDLIBS += -lstdc++
+ # Prefer linking statically if it's available, otherwise fallback to shared
+ ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static)
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LLVM_LDLIBS += $(filter-out -lxml2,$(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)))
+ LLVM_LDLIBS += -lstdc++
+ else
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ endif
LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
endif
+endif
SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
@@ -205,8 +206,6 @@ HOST_INCLUDE_DIR := $(INCLUDE_DIR)
endif
HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
-RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/
-
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
../../../../vmlinux \
@@ -217,7 +216,7 @@ ifeq ($(VMLINUX_BTF),)
$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
endif
-# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
+# Define simple and short `make test_progs`, `make test_maps`, etc targets
# to build individual tests.
# NOTE: Semicolon at the end is critical to override lib.mk's default static
# rule for binaries.
@@ -228,7 +227,7 @@ $(notdir $(TEST_GEN_PROGS) $(TEST_KMODS) \
MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
$(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool \
$(HOST_BUILD_DIR)/resolve_btfids \
- $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR))
+ $(INCLUDE_DIR))
$(MAKE_DIRS):
$(call msg,MKDIR,,$@)
$(Q)mkdir -p $@
@@ -300,16 +299,6 @@ TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL)
USE_BOOTSTRAP := "bootstrap/"
endif
-$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
- $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
- OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \
- BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
- BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \
- BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \
- EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
- EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \
- cp $(RUNQSLOWER_OUTPUT)runqslower $@
-
TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL)
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
@@ -325,7 +314,6 @@ NETWORK_HELPERS := $(OUTPUT)/network_helpers.o
$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS)
-$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tag: $(TESTING_HELPERS)
$(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS)
$(OUTPUT)/xdping: $(TESTING_HELPERS)
@@ -359,7 +347,9 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin
endif
+ifneq ($(SKIP_DOCS),1)
all: docs
+endif
docs:
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
@@ -392,7 +382,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
-# vmlinux.h is first dumped to a temprorary file and then compared to
+# vmlinux.h is first dumped to a temporary file and then compared to
# the previous version. This helps to avoid unnecessary re-builds of
# $(TRUNNER_BPF_OBJS)
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
@@ -447,7 +437,9 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(abspath $(OUTPUT)/../usr/include) \
-std=gnu11 \
-fno-strict-aliasing \
- -Wno-compare-distinct-pointer-types
+ -Wno-compare-distinct-pointer-types \
+ -Wno-initializer-overrides \
+ #
# TODO: enable me -Wsign-compare
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES)
@@ -490,15 +482,17 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
test_subskeleton.skel.h test_subskeleton_lib.skel.h \
test_usdt.skel.h
-LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
- trace_printk.c trace_vprintk.c map_ptr_kern.c \
+LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
- test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \
+ test_ringbuf_overwrite.c
+
+LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
kfunc_call_test_subprog.c
-SKEL_BLACKLIST += $$(LSKELS)
+SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED)
test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o
@@ -529,12 +523,15 @@ HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \
# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER
+LSKEL_SIGN := -S -k $(PRIVATE_KEY) -i $(VERIFICATION_CERT)
TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
TRUNNER_BINARY := $1$(if $2,-)$2
TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \
$$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c)))
TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
$$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
+TRUNNER_LIB_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
+ $$(filter %.c,$(TRUNNER_LIB_SOURCES)))
TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
@@ -544,6 +541,7 @@ TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(TRUNNER_BPF_SRCS)))
TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
+TRUNNER_BPF_LSKELS_SIGNED := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS_SIGNED))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -598,6 +596,15 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
$(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+$(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+ $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+
$(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/%
# .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites
@@ -647,6 +654,7 @@ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_SKELS) \
$(TRUNNER_BPF_LSKELS) \
+ $(TRUNNER_BPF_LSKELS_SIGNED) \
$(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
@@ -661,11 +669,16 @@ $(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)), \
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
$(TRUNNER_EXTRA_HDRS) \
+ $(VERIFY_SIG_HDR) \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+$(TRUNNER_LIB_OBJS): $(TRUNNER_OUTPUT)/%.o:$(TOOLSDIR)/lib/%.c
+ $$(call msg,LIB-OBJ,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+
# non-flavored in-srctree builds receive special treatment, in particular, we
# do not need to copy extra resources (see e.g. test_btf_dump_case())
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
@@ -674,25 +687,37 @@ ifneq ($2:$(OUTPUT),:$(shell pwd))
$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
-$(OUTPUT)/$(TRUNNER_BINARY): LDLIBS += $$(LLVM_LDLIBS)
-$(OUTPUT)/$(TRUNNER_BINARY): LDFLAGS += $$(LLVM_LDFLAGS)
-
# some X.test.o files have runtime dependencies on Y.bpf.o files
$(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
+ $(TRUNNER_LIB_OBJS) \
$(RESOLVE_BTFIDS) \
$(TRUNNER_BPFTOOL) \
+ $(OUTPUT)/veristat \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
- $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@
$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
$(OUTPUT)/$(if $2,$2/)bpftool
endef
+VERIFY_SIG_SETUP := $(CURDIR)/verify_sig_setup.sh
+VERIFY_SIG_HDR := verification_cert.h
+VERIFICATION_CERT := $(BUILD_DIR)/signing_key.der
+PRIVATE_KEY := $(BUILD_DIR)/signing_key.pem
+
+$(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
+ $(Q)mkdir -p $(BUILD_DIR)
+ $(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
+
+$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
+ $(Q)ln -fs $< test_progs_verification_cert && \
+ xxd -i test_progs_verification_cert > $@
+
# Define test_progs test runner.
TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
@@ -712,8 +737,10 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
disasm.c \
disasm_helpers.c \
json_writer.c \
+ $(VERIFY_SIG_HDR) \
flow_dissector_load.h \
ip_check_defrag_frags.h
+TRUNNER_LIB_SOURCES := find_bit.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
@@ -721,7 +748,7 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/uprobe_multi \
$(TEST_KMOD_TARGETS) \
ima_setup.sh \
- verify_sig_setup.sh \
+ $(VERIFY_SIG_SETUP) \
$(wildcard progs/btf_dump_test_case_*.c) \
$(wildcard progs/*.bpf.o)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
@@ -751,6 +778,7 @@ endif
TRUNNER_TESTS_DIR := map_tests
TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_maps.c
+TRUNNER_LIB_SOURCES :=
TRUNNER_EXTRA_FILES :=
TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
TRUNNER_BPF_CFLAGS :=
@@ -772,7 +800,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
# Include find_bit.c to compile xskxceiver.
-EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
+EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c prog_tests/test_xsk.c prog_tests/test_xsk.h
$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
@@ -811,6 +839,8 @@ $(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.ske
$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
+$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
+$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -831,10 +861,17 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_local_storage_create.o \
$(OUTPUT)/bench_htab_mem.o \
$(OUTPUT)/bench_bpf_crypto.o \
+ $(OUTPUT)/bench_sockmap.o \
+ $(OUTPUT)/bench_lpm_trie_map.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
+# This works around GCC warning about snprintf truncating strings like:
+#
+# char a[PATH_MAX], b[PATH_MAX];
+# snprintf(a, "%s/foo", b); // triggers -Wformat-truncation
+$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation
$(OUTPUT)/veristat.o: $(BPFOBJ)
$(OUTPUT)/veristat: $(OUTPUT)/veristat.o
$(call msg,BINARY,,$@)
@@ -853,7 +890,8 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
$(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \
no_alu32 cpuv4 bpf_gcc \
liburandom_read.so) \
- $(OUTPUT)/FEATURE-DUMP.selftests
+ $(OUTPUT)/FEATURE-DUMP.selftests \
+ test_progs_verification_cert
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index eb6a4fea8c79..f7f9e7088bb3 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -7,12 +7,6 @@ INSTALL ?= install
RM ?= rm -f
RMDIR ?= rmdir --ignore-fail-on-non-empty
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
prefix ?= /usr/local
mandir ?= $(prefix)/man
man2dir = $(mandir)/man2
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 1bd403a5ef7b..bd29bb2e6cb5 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -283,6 +283,8 @@ extern struct argp bench_local_storage_create_argp;
extern struct argp bench_htab_mem_argp;
extern struct argp bench_trigger_batch_argp;
extern struct argp bench_crypto_argp;
+extern struct argp bench_sockmap_argp;
+extern struct argp bench_lpm_trie_map_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -297,6 +299,8 @@ static const struct argp_child bench_parsers[] = {
{ &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
{ &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
+ { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
+ { &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 },
{},
};
@@ -497,7 +501,7 @@ extern const struct bench bench_rename_rawtp;
extern const struct bench bench_rename_fentry;
extern const struct bench bench_rename_fexit;
-/* pure counting benchmarks to establish theoretical lmits */
+/* pure counting benchmarks to establish theoretical limits */
extern const struct bench bench_trig_usermode_count;
extern const struct bench bench_trig_syscall_count;
extern const struct bench bench_trig_kernel_count;
@@ -508,6 +512,8 @@ extern const struct bench bench_trig_kretprobe;
extern const struct bench bench_trig_kprobe_multi;
extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_kprobe_multi_all;
+extern const struct bench bench_trig_kretprobe_multi_all;
extern const struct bench bench_trig_fexit;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_tp;
@@ -526,6 +532,12 @@ extern const struct bench bench_trig_uprobe_multi_push;
extern const struct bench bench_trig_uretprobe_multi_push;
extern const struct bench bench_trig_uprobe_multi_ret;
extern const struct bench bench_trig_uretprobe_multi_ret;
+#ifdef __x86_64__
+extern const struct bench bench_trig_uprobe_nop5;
+extern const struct bench bench_trig_uretprobe_nop5;
+extern const struct bench bench_trig_uprobe_multi_nop5;
+extern const struct bench bench_trig_uretprobe_multi_nop5;
+#endif
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
@@ -549,6 +561,14 @@ extern const struct bench bench_local_storage_create;
extern const struct bench bench_htab_mem;
extern const struct bench bench_crypto_encrypt;
extern const struct bench bench_crypto_decrypt;
+extern const struct bench bench_sockmap;
+extern const struct bench bench_lpm_trie_noop;
+extern const struct bench bench_lpm_trie_baseline;
+extern const struct bench bench_lpm_trie_lookup;
+extern const struct bench bench_lpm_trie_insert;
+extern const struct bench bench_lpm_trie_update;
+extern const struct bench bench_lpm_trie_delete;
+extern const struct bench bench_lpm_trie_free;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -569,6 +589,8 @@ static const struct bench *benchs[] = {
&bench_trig_kprobe_multi,
&bench_trig_kretprobe_multi,
&bench_trig_fentry,
+ &bench_trig_kprobe_multi_all,
+ &bench_trig_kretprobe_multi_all,
&bench_trig_fexit,
&bench_trig_fmodret,
&bench_trig_tp,
@@ -586,6 +608,12 @@ static const struct bench *benchs[] = {
&bench_trig_uretprobe_multi_push,
&bench_trig_uprobe_multi_ret,
&bench_trig_uretprobe_multi_ret,
+#ifdef __x86_64__
+ &bench_trig_uprobe_nop5,
+ &bench_trig_uretprobe_nop5,
+ &bench_trig_uprobe_multi_nop5,
+ &bench_trig_uretprobe_multi_nop5,
+#endif
/* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
&bench_rb_custom,
@@ -609,6 +637,14 @@ static const struct bench *benchs[] = {
&bench_htab_mem,
&bench_crypto_encrypt,
&bench_crypto_decrypt,
+ &bench_sockmap,
+ &bench_lpm_trie_noop,
+ &bench_lpm_trie_baseline,
+ &bench_lpm_trie_lookup,
+ &bench_lpm_trie_insert,
+ &bench_lpm_trie_update,
+ &bench_lpm_trie_delete,
+ &bench_lpm_trie_free,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index 005c401b3e22..bea323820ffb 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -46,6 +46,7 @@ struct bench_res {
unsigned long gp_ns;
unsigned long gp_ct;
unsigned int stime;
+ unsigned long duration_ns;
};
struct bench {
diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
index 926ee822143e..297e32390cd1 100644
--- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
+++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
@@ -279,6 +279,7 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
}
got = read(fd, buf, sizeof(buf) - 1);
+ close(fd);
if (got <= 0) {
*value = 0;
return;
@@ -286,8 +287,6 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
buf[got] = 0;
*value = strtoull(buf, NULL, 0);
-
- close(fd);
}
static void htab_mem_measure(struct bench_res *res)
diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
new file mode 100644
index 000000000000..246f6cb3387d
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+/*
+ * All of these benchmarks operate on tries with keys in the range
+ * [0, args.nr_entries), i.e. there are no gaps or partially filled
+ * branches of the trie for any key < args.nr_entries.
+ *
+ * This gives an idea of worst-case behaviour.
+ */
+
+#include <argp.h>
+#include <linux/time64.h>
+#include <linux/if_ether.h>
+#include "lpm_trie_bench.skel.h"
+#include "lpm_trie_map.skel.h"
+#include "bench.h"
+#include "testing_helpers.h"
+#include "progs/lpm_trie.h"
+
+static struct ctx {
+ struct lpm_trie_bench *bench;
+} ctx;
+
+static struct {
+ __u32 nr_entries;
+ __u32 prefixlen;
+ bool random;
+} args = {
+ .nr_entries = 0,
+ .prefixlen = 32,
+ .random = false,
+};
+
+enum {
+ ARG_NR_ENTRIES = 9000,
+ ARG_PREFIX_LEN,
+ ARG_RANDOM,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+ "Number of unique entries in the LPM trie" },
+ { "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0,
+ "Number of prefix bits to use in the LPM trie" },
+ { "random", ARG_RANDOM, NULL, 0, "Access random keys during op" },
+ {},
+};
+
+static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid nr_entries count.");
+ argp_usage(state);
+ }
+ args.nr_entries = ret;
+ break;
+ case ARG_PREFIX_LEN:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid prefix_len value.");
+ argp_usage(state);
+ }
+ args.prefixlen = ret;
+ break;
+ case ARG_RANDOM:
+ args.random = true;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+const struct argp bench_lpm_trie_map_argp = {
+ .options = opts,
+ .parser = lpm_parse_arg,
+};
+
+static void validate_common(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer\n");
+ exit(1);
+ }
+
+ if (args.nr_entries == 0) {
+ fprintf(stderr, "Missing --nr_entries parameter\n");
+ exit(1);
+ }
+
+ if ((1UL << args.prefixlen) < args.nr_entries) {
+ fprintf(stderr, "prefix_len value too small for nr_entries\n");
+ exit(1);
+ }
+}
+
+static void lpm_insert_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-insert requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-insert does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_delete_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-delete requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-delete does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_free_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-free requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-free does not support --random\n");
+ exit(1);
+ }
+}
+
+static struct trie_key *keys;
+static __u32 *vals;
+
+static void fill_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch update keys to map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void empty_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch delete keys for map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void attach_prog(void)
+{
+ int i;
+
+ ctx.bench = lpm_trie_bench__open_and_load();
+ if (!ctx.bench) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.bench->bss->nr_entries = args.nr_entries;
+ ctx.bench->bss->prefixlen = args.prefixlen;
+ ctx.bench->bss->random = args.random;
+
+ if (lpm_trie_bench__attach(ctx.bench)) {
+ fprintf(stderr, "failed to attach skeleton\n");
+ exit(1);
+ }
+
+ keys = calloc(args.nr_entries, sizeof(*keys));
+ vals = calloc(args.nr_entries, sizeof(*vals));
+
+ for (i = 0; i < args.nr_entries; i++) {
+ struct trie_key *k = &keys[i];
+ __u32 *v = &vals[i];
+
+ k->prefixlen = args.prefixlen;
+ k->data = i;
+ *v = 1;
+ }
+}
+
+static void attach_prog_and_fill_map(void)
+{
+ int fd;
+
+ attach_prog();
+
+ fd = bpf_map__fd(ctx.bench->maps.trie_map);
+ fill_map(fd);
+}
+
+static void lpm_noop_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_NOOP;
+}
+
+static void lpm_baseline_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_BASELINE;
+}
+
+static void lpm_lookup_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_LOOKUP;
+}
+
+static void lpm_insert_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_INSERT;
+}
+
+static void lpm_update_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_UPDATE;
+}
+
+static void lpm_delete_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_DELETE;
+}
+
+static void lpm_free_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_FREE;
+}
+
+static void lpm_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.bench->bss->hits, 0);
+ res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
+}
+
+static void bench_reinit_map(void)
+{
+ int fd = bpf_map__fd(ctx.bench->maps.trie_map);
+
+ switch (ctx.bench->bss->op) {
+ case LPM_OP_INSERT:
+ /* trie_map needs to be emptied */
+ empty_map(fd);
+ break;
+ case LPM_OP_DELETE:
+ /* trie_map needs to be refilled */
+ fill_map(fd);
+ break;
+ default:
+ fprintf(stderr, "Unexpected REINIT return code for op %d\n",
+ ctx.bench->bss->op);
+ exit(1);
+ }
+}
+
+/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */
+static void *lpm_producer(void *unused __always_unused)
+{
+ int err;
+ char in[ETH_HLEN]; /* unused */
+
+ LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in,
+ .data_size_in = sizeof(in), .repeat = 1, );
+
+ while (true) {
+ int fd = bpf_program__fd(ctx.bench->progs.run_bench);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err) {
+ fprintf(stderr, "failed to run BPF prog: %d\n", err);
+ exit(1);
+ }
+
+ /* Check for kernel error code */
+ if ((int)opts.retval < 0) {
+ fprintf(stderr, "BPF prog returned error: %d\n",
+ opts.retval);
+ exit(1);
+ }
+
+ switch (opts.retval) {
+ case LPM_BENCH_SUCCESS:
+ break;
+ case LPM_BENCH_REINIT_MAP:
+ bench_reinit_map();
+ break;
+ default:
+ fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n",
+ opts.retval, ctx.bench->bss->op);
+ exit(1);
+ }
+ }
+
+ return NULL;
+}
+
+static void *lpm_free_producer(void *unused __always_unused)
+{
+ while (true) {
+ struct lpm_trie_map *skel;
+
+ skel = lpm_trie_map__open_and_load();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ fill_map(bpf_map__fd(skel->maps.trie_free_map));
+ lpm_trie_map__destroy(skel);
+ }
+
+ return NULL;
+}
+
+/*
+ * The standard bench op_report_*() functions assume measurements are
+ * taken over a 1-second interval but operations that modify the map
+ * (INSERT, DELETE, and FREE) cannot run indefinitely without
+ * "resetting" the map to the initial state. Depending on the size of
+ * the map, this likely needs to happen before the 1-second timer fires.
+ *
+ * Calculate the fraction of a second over which the op measurement was
+ * taken (to ignore any time spent doing the reset) and report the
+ * throughput results per second.
+ */
+static void frac_second_report_progress(int iter, struct bench_res *res,
+ long delta_ns, double rate_divisor,
+ char rate)
+{
+ double hits_per_sec, hits_per_prod;
+
+ hits_per_sec = res->hits / rate_divisor /
+ (res->duration_ns / (double)NSEC_PER_SEC);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+
+ printf("Iter %3d (%7.3lfus): ", iter,
+ (delta_ns - NSEC_PER_SEC) / 1000.0);
+ printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate,
+ hits_per_prod, rate);
+}
+
+static void frac_second_report_final(struct bench_res res[], int res_cnt,
+ double lat_divisor, double rate_divisor,
+ char rate, const char *unit)
+{
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ double latency = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ double val = res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_mean += val / (0.0 + res_cnt);
+ latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ double val =
+ res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_stddev += (hits_mean - val) * (hits_mean - val) /
+ (res_cnt - 1.0);
+ }
+
+ hits_stddev = sqrt(hits_stddev);
+ }
+ printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ",
+ hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt,
+ rate);
+ printf("latency %8.3lf %s/op\n",
+ latency / lat_divisor / env.producer_cnt, unit);
+}
+
+static void insert_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void delete_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void free_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000.0;
+ char rate = 'K';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void insert_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void delete_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void free_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1000000.0;
+ double rate_divisor = 1000.0;
+ const char *unit = "ms";
+ char rate = 'K';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+/* noop bench measures harness-overhead */
+const struct bench bench_lpm_trie_noop = {
+ .name = "lpm-trie-noop",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_noop_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* baseline overhead for lookup and update */
+const struct bench bench_lpm_trie_baseline = {
+ .name = "lpm-trie-baseline",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_baseline_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of doing a lookup on existing entries in a full trie */
+const struct bench bench_lpm_trie_lookup = {
+ .name = "lpm-trie-lookup",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_lookup_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of inserting new entries into an empty trie */
+const struct bench bench_lpm_trie_insert = {
+ .name = "lpm-trie-insert",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_insert_validate,
+ .setup = lpm_insert_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = insert_ops_report_progress,
+ .report_final = insert_ops_report_final,
+};
+
+/* measure cost of updating existing entries in a full trie */
+const struct bench bench_lpm_trie_update = {
+ .name = "lpm-trie-update",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_update_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of deleting existing entries from a full trie */
+const struct bench bench_lpm_trie_delete = {
+ .name = "lpm-trie-delete",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_delete_validate,
+ .setup = lpm_delete_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = delete_ops_report_progress,
+ .report_final = delete_ops_report_final,
+};
+
+/* measure cost of freeing a full trie */
+const struct bench bench_lpm_trie_free = {
+ .name = "lpm-trie-free",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_free_validate,
+ .setup = lpm_free_setup,
+ .producer_thread = lpm_free_producer,
+ .measure = lpm_measure,
+ .report_progress = free_ops_report_progress,
+ .report_final = free_ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index e1ee979e6acc..01bdce692799 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -19,6 +19,8 @@ static struct {
int ringbuf_sz; /* per-ringbuf, in bytes */
bool ringbuf_use_output; /* use slower output API */
int perfbuf_sz; /* per-CPU size, in pages */
+ bool overwrite;
+ bool bench_producer;
} args = {
.back2back = false,
.batch_cnt = 500,
@@ -27,6 +29,8 @@ static struct {
.ringbuf_sz = 512 * 1024,
.ringbuf_use_output = false,
.perfbuf_sz = 128,
+ .overwrite = false,
+ .bench_producer = false,
};
enum {
@@ -35,6 +39,8 @@ enum {
ARG_RB_BATCH_CNT = 2002,
ARG_RB_SAMPLED = 2003,
ARG_RB_SAMPLE_RATE = 2004,
+ ARG_RB_OVERWRITE = 2005,
+ ARG_RB_BENCH_PRODUCER = 2006,
};
static const struct argp_option opts[] = {
@@ -43,6 +49,8 @@ static const struct argp_option opts[] = {
{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+ { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
+ { "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
{},
};
@@ -72,6 +80,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
+ case ARG_RB_OVERWRITE:
+ args.overwrite = true;
+ break;
+ case ARG_RB_BENCH_PRODUCER:
+ args.bench_producer = true;
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
@@ -95,8 +109,33 @@ static inline void bufs_trigger_batch(void)
static void bufs_validate(void)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
+ if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
+ fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
+ exit(1);
+ }
+
+ if (args.overwrite && !args.bench_producer) {
+ fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
+ exit(1);
+ }
+
+ if (args.bench_producer && env.consumer_cnt != 0) {
+ fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
+ exit(1);
+ }
+
+ if (args.bench_producer && args.back2back) {
+ fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
+ exit(1);
+ }
+
+ if (args.bench_producer && args.sampled) {
+ fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
+ exit(1);
+ }
+
+ if (!args.bench_producer && env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
exit(1);
}
@@ -128,12 +167,17 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
{
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
- res->hits = atomic_swap(&buf_hits.value, 0);
+ if (args.bench_producer)
+ res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
+ else
+ res->hits = atomic_swap(&buf_hits.value, 0);
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
static struct ringbuf_bench *ringbuf_setup_skeleton(void)
{
+ __u32 flags;
+ struct bpf_map *ringbuf;
struct ringbuf_bench *skel;
setup_libbpf();
@@ -146,12 +190,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
skel->rodata->batch_cnt = args.batch_cnt;
skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+ skel->rodata->bench_producer = args.bench_producer;
if (args.sampled)
/* record data + header take 16 bytes */
skel->rodata->wakeup_data_size = args.sample_rate * 16;
- bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
+ ringbuf = skel->maps.ringbuf;
+ if (args.overwrite) {
+ flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
+ bpf_map__set_map_flags(ringbuf, flags);
+ }
+
+ bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
if (ringbuf_bench__load(skel)) {
fprintf(stderr, "failed to load skeleton\n");
@@ -171,10 +222,12 @@ static void ringbuf_libbpf_setup(void)
{
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
struct bpf_link *link;
+ int map_fd;
ctx->skel = ringbuf_setup_skeleton();
- ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
- buf_process_sample, NULL, NULL);
+
+ map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+ ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL);
if (!ctx->ringbuf) {
fprintf(stderr, "failed to create ringbuf\n");
exit(1);
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
new file mode 100644
index 000000000000..cfc072aa7fff
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -0,0 +1,599 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <error.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sendfile.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <argp.h>
+#include "bench.h"
+#include "bench_sockmap_prog.skel.h"
+#include "bpf_util.h"
+
+#define FILE_SIZE (128 * 1024)
+#define DATA_REPEAT_SIZE 10
+
+static const char snd_data[DATA_REPEAT_SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+/* c1 <-> [p1, p2] <-> c2
+ * RX bench(BPF_SK_SKB_STREAM_VERDICT):
+ * ARG_FW_RX_PASS:
+ * send(p2) -> recv(c2) -> bpf skb passthrough -> recv(c2)
+ * ARG_FW_RX_VERDICT_EGRESS:
+ * send(c1) -> verdict skb to tx queuec of p2 -> recv(c2)
+ * ARG_FW_RX_VERDICT_INGRESS:
+ * send(c1) -> verdict skb to rx queuec of c2 -> recv(c2)
+ *
+ * TX bench(BPF_SK_MSG_VERDIC):
+ * ARG_FW_TX_PASS:
+ * send(p2) -> bpf msg passthrough -> send(p2) -> recv(c2)
+ * ARG_FW_TX_VERDICT_INGRESS:
+ * send(p2) -> verdict msg to rx queue of c2 -> recv(c2)
+ * ARG_FW_TX_VERDICT_EGRESS:
+ * send(p1) -> verdict msg to tx queue of p2 -> recv(c2)
+ */
+enum SOCKMAP_ARG_FLAG {
+ ARG_FW_RX_NORMAL = 11000,
+ ARG_FW_RX_PASS,
+ ARG_FW_RX_VERDICT_EGRESS,
+ ARG_FW_RX_VERDICT_INGRESS,
+ ARG_FW_TX_NORMAL,
+ ARG_FW_TX_PASS,
+ ARG_FW_TX_VERDICT_INGRESS,
+ ARG_FW_TX_VERDICT_EGRESS,
+ ARG_CTL_RX_STRP,
+ ARG_CONSUMER_DELAY_TIME,
+ ARG_PRODUCER_DURATION,
+};
+
+#define TXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_TX_NORMAL)
+
+#define TXMODE_BPF_INGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_INGRESS)
+
+#define TXMODE_BPF_EGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_EGRESS)
+
+#define TXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_TX_PASS)
+
+#define TXMODE_BPF() ( \
+ TXMODE_BPF_PASS() || \
+ TXMODE_BPF_INGRESS() || \
+ TXMODE_BPF_EGRESS())
+
+#define TXMODE() ( \
+ TXMODE_NORMAL() || \
+ TXMODE_BPF())
+
+#define RXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_RX_NORMAL)
+
+#define RXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_RX_PASS)
+
+#define RXMODE_BPF_VERDICT_EGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_EGRESS)
+
+#define RXMODE_BPF_VERDICT_INGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_INGRESS)
+
+#define RXMODE_BPF_VERDICT() ( \
+ RXMODE_BPF_VERDICT_INGRESS() || \
+ RXMODE_BPF_VERDICT_EGRESS())
+
+#define RXMODE_BPF() ( \
+ RXMODE_BPF_PASS() || \
+ RXMODE_BPF_VERDICT())
+
+#define RXMODE() ( \
+ RXMODE_NORMAL() || \
+ RXMODE_BPF())
+
+static struct socmap_ctx {
+ struct bench_sockmap_prog *skel;
+ enum SOCKMAP_ARG_FLAG mode;
+ #define c1 fds[0]
+ #define p1 fds[1]
+ #define c2 fds[2]
+ #define p2 fds[3]
+ #define sfd fds[4]
+ int fds[5];
+ long send_calls;
+ long read_calls;
+ long prod_send;
+ long user_read;
+ int file_size;
+ int delay_consumer;
+ int prod_run_time;
+ int strp_size;
+} ctx = {
+ .prod_send = 0,
+ .user_read = 0,
+ .file_size = FILE_SIZE,
+ .mode = ARG_FW_RX_VERDICT_EGRESS,
+ .fds = {0},
+ .delay_consumer = 0,
+ .prod_run_time = 0,
+ .strp_size = 0,
+};
+
+static void bench_sockmap_prog_destroy(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx.fds); i++) {
+ if (ctx.fds[i] > 0)
+ close(ctx.fds[i]);
+ }
+
+ bench_sockmap_prog__destroy(ctx.skel);
+}
+
+static void init_addr(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static bool set_non_block(int fd, bool blocking)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (flags == -1)
+ return false;
+ flags = blocking ? (flags | O_NONBLOCK) : (flags & ~O_NONBLOCK);
+ return (fcntl(fd, F_SETFL, flags) == 0);
+}
+
+static int create_pair(int *c, int *p, int type)
+{
+ struct sockaddr_storage addr;
+ int err, cfd, pfd;
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+ err = getsockname(ctx.sfd, (struct sockaddr *)&addr, &addr_len);
+ if (err) {
+ fprintf(stderr, "getsockname error %d\n", errno);
+ return err;
+ }
+ cfd = socket(AF_INET, type, 0);
+ if (cfd < 0) {
+ fprintf(stderr, "socket error %d\n", errno);
+ return err;
+ }
+
+ err = connect(cfd, (struct sockaddr *)&addr, addr_len);
+ if (err && errno != EINPROGRESS) {
+ fprintf(stderr, "connect error %d\n", errno);
+ return err;
+ }
+
+ pfd = accept(ctx.sfd, NULL, NULL);
+ if (pfd < 0) {
+ fprintf(stderr, "accept error %d\n", errno);
+ return err;
+ }
+ *c = cfd;
+ *p = pfd;
+ return 0;
+}
+
+static int create_sockets(void)
+{
+ struct sockaddr_storage addr;
+ int err, one = 1;
+ socklen_t addr_len;
+
+ init_addr(&addr, &addr_len);
+ ctx.sfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (ctx.sfd < 0) {
+ fprintf(stderr, "socket error:%d\n", errno);
+ return ctx.sfd;
+ }
+ err = setsockopt(ctx.sfd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err) {
+ fprintf(stderr, "setsockopt error:%d\n", errno);
+ return err;
+ }
+
+ err = bind(ctx.sfd, (struct sockaddr *)&addr, addr_len);
+ if (err) {
+ fprintf(stderr, "bind error:%d\n", errno);
+ return err;
+ }
+
+ err = listen(ctx.sfd, SOMAXCONN);
+ if (err) {
+ fprintf(stderr, "listen error:%d\n", errno);
+ return err;
+ }
+
+ err = create_pair(&ctx.c1, &ctx.p1, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 1 error\n");
+ return err;
+ }
+
+ err = create_pair(&ctx.c2, &ctx.p2, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 2 error\n");
+ return err;
+ }
+ printf("create socket fd c1:%d p1:%d c2:%d p2:%d\n",
+ ctx.c1, ctx.p1, ctx.c2, ctx.p2);
+ return 0;
+}
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 2 || env.producer_cnt != 1 ||
+ !env.affinity)
+ goto err;
+ return;
+err:
+ fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary");
+ exit(1);
+}
+
+static int setup_rx_sockmap(void)
+{
+ int verdict, pass, parser, map;
+ int zero = 0, one = 1;
+ int err;
+
+ parser = bpf_program__fd(ctx.skel->progs.prog_skb_parser);
+ verdict = bpf_program__fd(ctx.skel->progs.prog_skb_verdict);
+ pass = bpf_program__fd(ctx.skel->progs.prog_skb_pass);
+ map = bpf_map__fd(ctx.skel->maps.sock_map_rx);
+
+ if (ctx.strp_size != 0) {
+ ctx.skel->bss->pkt_size = ctx.strp_size;
+ err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return err;
+ }
+
+ if (RXMODE_BPF_VERDICT())
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ else if (RXMODE_BPF_PASS())
+ err = bpf_prog_attach(pass, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (RXMODE_BPF_PASS())
+ return bpf_map_update_elem(map, &zero, &ctx.c2, BPF_NOEXIST);
+
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ if (err < 0)
+ return err;
+
+ if (RXMODE_BPF_VERDICT_INGRESS()) {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ } else {
+ err = bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ }
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int setup_tx_sockmap(void)
+{
+ int zero = 0, one = 1;
+ int prog, map;
+ int err;
+
+ map = bpf_map__fd(ctx.skel->maps.sock_map_tx);
+ prog = TXMODE_BPF_PASS() ?
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_pass) :
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_verdict);
+
+ err = bpf_prog_attach(prog, map, BPF_SK_MSG_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (TXMODE_BPF_EGRESS()) {
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ } else {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &zero, &ctx.p2, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ }
+
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void setup(void)
+{
+ int err;
+
+ ctx.skel = bench_sockmap_prog__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "error loading skel\n");
+ exit(1);
+ }
+
+ if (create_sockets()) {
+ fprintf(stderr, "create_net_mode error\n");
+ goto err;
+ }
+
+ if (RXMODE_BPF()) {
+ err = setup_rx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_rx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else if (TXMODE_BPF()) {
+ err = setup_tx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_tx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else {
+ fprintf(stderr, "unknown sockmap bench mode: %d\n", ctx.mode);
+ goto err;
+ }
+
+ return;
+
+err:
+ bench_sockmap_prog_destroy();
+ exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+ res->drops = atomic_swap(&ctx.prod_send, 0);
+ res->hits = atomic_swap(&ctx.skel->bss->process_byte, 0);
+ res->false_hits = atomic_swap(&ctx.user_read, 0);
+ res->important_hits = atomic_swap(&ctx.send_calls, 0);
+ res->important_hits |= atomic_swap(&ctx.read_calls, 0) << 32;
+}
+
+static void verify_data(int *check_pos, char *buf, int rcv)
+{
+ for (int i = 0 ; i < rcv; i++) {
+ if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
+ fprintf(stderr, "verify data fail");
+ exit(1);
+ }
+ (*check_pos)++;
+ if (*check_pos >= FILE_SIZE)
+ *check_pos = 0;
+ }
+}
+
+static void *consumer(void *input)
+{
+ int rcv, sent;
+ int check_pos = 0;
+ int tid = (long)input;
+ int recv_buf_size = FILE_SIZE;
+ char *buf = malloc(recv_buf_size);
+ int delay_read = ctx.delay_consumer;
+
+ if (!buf) {
+ fprintf(stderr, "fail to init read buffer");
+ return NULL;
+ }
+
+ while (true) {
+ if (tid == 1) {
+ /* consumer 1 is unused for tx test and stream verdict test */
+ if (RXMODE_BPF() || TXMODE())
+ return NULL;
+ /* it's only for RX_NORMAL which service as reserve-proxy mode */
+ rcv = read(ctx.p1, buf, recv_buf_size);
+ if (rcv < 0) {
+ fprintf(stderr, "fail to read p1");
+ return NULL;
+ }
+
+ sent = send(ctx.p2, buf, recv_buf_size, 0);
+ if (sent < 0) {
+ fprintf(stderr, "fail to send p2");
+ return NULL;
+ }
+ } else {
+ if (delay_read != 0) {
+ if (delay_read < 0)
+ return NULL;
+ sleep(delay_read);
+ delay_read = 0;
+ }
+ /* read real endpoint by consumer 0 */
+ atomic_inc(&ctx.read_calls);
+ rcv = read(ctx.c2, buf, recv_buf_size);
+ if (rcv < 0 && errno != EAGAIN) {
+ fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
+ return NULL;
+ }
+ verify_data(&check_pos, buf, rcv);
+ atomic_add(&ctx.user_read, rcv);
+ }
+ }
+
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ int off = 0, fp, need_sent, sent;
+ int file_size = ctx.file_size;
+ struct timespec ts1, ts2;
+ int target;
+ FILE *file;
+
+ file = tmpfile();
+ if (!file) {
+ fprintf(stderr, "create file for sendfile");
+ return NULL;
+ }
+
+ /* we need simple verify */
+ for (int i = 0; i < file_size; i++) {
+ if (fwrite(&snd_data[off], sizeof(char), 1, file) != 1) {
+ fprintf(stderr, "init tmpfile error");
+ return NULL;
+ }
+ if (++off >= sizeof(snd_data))
+ off = 0;
+ }
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+
+ fp = fileno(file);
+ need_sent = file_size;
+ clock_gettime(CLOCK_MONOTONIC, &ts1);
+
+ if (RXMODE_BPF_VERDICT())
+ target = ctx.c1;
+ else if (TXMODE_BPF_EGRESS())
+ target = ctx.p1;
+ else
+ target = ctx.p2;
+ set_non_block(target, true);
+ while (true) {
+ if (ctx.prod_run_time) {
+ clock_gettime(CLOCK_MONOTONIC, &ts2);
+ if (ts2.tv_sec - ts1.tv_sec > ctx.prod_run_time)
+ return NULL;
+ }
+
+ errno = 0;
+ atomic_inc(&ctx.send_calls);
+ sent = sendfile(target, fp, NULL, need_sent);
+ if (sent < 0) {
+ if (errno != EAGAIN && errno != ENOMEM && errno != ENOBUFS) {
+ fprintf(stderr, "sendfile return %d, errorno %d:%s\n",
+ sent, errno, strerror(errno));
+ return NULL;
+ }
+ continue;
+ } else if (sent < need_sent) {
+ need_sent -= sent;
+ atomic_add(&ctx.prod_send, sent);
+ continue;
+ }
+ atomic_add(&ctx.prod_send, need_sent);
+ need_sent = file_size;
+ lseek(fp, 0, SEEK_SET);
+ }
+
+ return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double speed_mbs, prod_mbs, bpf_mbs, send_hz, read_hz;
+
+ prod_mbs = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+ speed_mbs = res->false_hits / 1000000.0 / (delta_ns / 1000000000.0);
+ bpf_mbs = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ send_hz = (res->important_hits & 0xFFFFFFFF) / (delta_ns / 1000000000.0);
+ read_hz = (res->important_hits >> 32) / (delta_ns / 1000000000.0);
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+ printf("Send Speed %8.3lf MB/s (%8.3lf calls/s), BPF Speed %8.3lf MB/s, "
+ "Rcv Speed %8.3lf MB/s (%8.3lf calls/s)\n",
+ prod_mbs, send_hz, bpf_mbs, speed_mbs, read_hz);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ double verdict_mbs_mean = 0.0;
+ long verdict_total = 0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ verdict_mbs_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ verdict_total += res[i].hits / 1000000.0;
+ }
+
+ printf("Summary: total trans %8.3lu MB \u00B1 %5.3lf MB/s\n",
+ verdict_total, verdict_mbs_mean);
+}
+
+static const struct argp_option opts[] = {
+ { "rx-normal", ARG_FW_RX_NORMAL, NULL, 0,
+ "simple reserve-proxy mode, no bfp enabled"},
+ { "rx-pass", ARG_FW_RX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "rx-strp", ARG_CTL_RX_STRP, "Byte", 0,
+ "enable strparser and set the encapsulation size"},
+ { "rx-verdict-egress", ARG_FW_RX_VERDICT_EGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "rx-verdict-ingress", ARG_FW_RX_VERDICT_INGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "tx-normal", ARG_FW_TX_NORMAL, NULL, 0,
+ "simple c-s mode, no bfp enabled"},
+ { "tx-pass", ARG_FW_TX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "tx-verdict-ingress", ARG_FW_TX_VERDICT_INGRESS, NULL, 0,
+ "forward msg to ingress queue of another socket"},
+ { "tx-verdict-egress", ARG_FW_TX_VERDICT_EGRESS, NULL, 0,
+ "forward msg to egress queue of another socket"},
+ { "delay-consumer", ARG_CONSUMER_DELAY_TIME, "SEC", 0,
+ "delay consumer start"},
+ { "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
+ "producer duration"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_FW_RX_NORMAL...ARG_FW_TX_VERDICT_EGRESS:
+ ctx.mode = key;
+ break;
+ case ARG_CONSUMER_DELAY_TIME:
+ ctx.delay_consumer = strtol(arg, NULL, 10);
+ break;
+ case ARG_PRODUCER_DURATION:
+ ctx.prod_run_time = strtol(arg, NULL, 10);
+ break;
+ case ARG_CTL_RX_STRP:
+ ctx.strp_size = strtol(arg, NULL, 10);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_sockmap_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* Benchmark performance of creating bpf local storage */
+const struct bench bench_sockmap = {
+ .name = "sockmap",
+ .argp = &bench_sockmap_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 32e9f194d449..34018fc3927f 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -180,10 +180,10 @@ static void trigger_kernel_count_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
- bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true);
load_ctx();
/* override driver program */
- ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count);
}
static void trigger_kprobe_setup(void)
@@ -226,6 +226,65 @@ static void trigger_fentry_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
+static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ char **syms = NULL;
+ size_t cnt = 0;
+
+ /* Some recursive functions will be skipped in
+ * bpf_get_ksyms -> skip_entry, as they can introduce sufficient
+ * overhead. However, it's difficut to skip all the recursive
+ * functions for a debug kernel.
+ *
+ * So, don't run the kprobe-multi-all and kretprobe-multi-all on
+ * a debug kernel.
+ */
+ if (bpf_get_ksyms(&syms, &cnt, true)) {
+ fprintf(stderr, "failed to get ksyms\n");
+ exit(1);
+ }
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+ opts.retprobe = kretprobe;
+ /* attach empty to all the kernel functions except bpf_get_numa_node_id. */
+ if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
+ fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
+ exit(1);
+ }
+}
+
+static void trigger_kprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, false);
+ attach_bpf(prog);
+}
+
+static void trigger_kretprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kretprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, true);
+ attach_bpf(prog);
+}
+
static void trigger_fexit_setup(void)
{
setup_ctx();
@@ -333,6 +392,20 @@ static void *uprobe_producer_ret(void *input)
return NULL;
}
+#ifdef __x86_64__
+__nocf_check __weak void uprobe_target_nop5(void)
+{
+ asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
+}
+
+static void *uprobe_producer_nop5(void *input)
+{
+ while (true)
+ uprobe_target_nop5();
+ return NULL;
+}
+#endif
+
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
{
size_t uprobe_offset;
@@ -448,6 +521,28 @@ static void uretprobe_multi_ret_setup(void)
usetup(true, true /* use_multi */, &uprobe_target_ret);
}
+#ifdef __x86_64__
+static void uprobe_nop5_setup(void)
+{
+ usetup(false, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_nop5_setup(void)
+{
+ usetup(true, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uprobe_multi_nop5_setup(void)
+{
+ usetup(false, true /* use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_multi_nop5_setup(void)
+{
+ usetup(true, true /* use_multi */, &uprobe_target_nop5);
+}
+#endif
+
const struct bench bench_trig_syscall_count = {
.name = "trig-syscall-count",
.validate = trigger_validate,
@@ -476,6 +571,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
+BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
BENCH_TRIG_KERNEL(fexit, "fexit");
BENCH_TRIG_KERNEL(fmodret, "fmodret");
BENCH_TRIG_KERNEL(tp, "tp");
@@ -506,3 +603,9 @@ BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
+#ifdef __x86_64__
+BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
+BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
+BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
+BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
+#endif
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index 91e3567962ff..83e05e837871 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
done
+header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+ summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index a690f5a68b6b..f7573708a0c3 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -6,8 +6,8 @@ def_tests=( \
usermode-count kernel-count syscall-count \
fentry fexit fmodret \
rawtp tp \
- kprobe kprobe-multi \
- kretprobe kretprobe-multi \
+ kprobe kprobe-multi kprobe-multi-all \
+ kretprobe kretprobe-multi kretprobe-multi-all \
)
tests=("$@")
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index af169f831f2f..03f55405484b 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5}
do
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-15s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
index 68a51dcc0669..16f8ce832004 100644
--- a/tools/testing/selftests/bpf/bpf_arena_common.h
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -46,8 +46,11 @@
void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
int node_id, __u64 flags) __ksym __weak;
+int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak;
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start)
+
#else /* when compiled as user space code */
#define __arena
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
index 85dbc3ea4da5..e16fa7d95fcf 100644
--- a/tools/testing/selftests/bpf/bpf_arena_list.h
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -64,14 +64,12 @@ static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
static inline void __list_del(arena_list_node_t *n)
{
- arena_list_node_t *next = n->next, *tmp;
+ arena_list_node_t *next = n->next;
arena_list_node_t * __arena *pprev = n->pprev;
cast_user(next);
cast_kern(pprev);
- tmp = *pprev;
- cast_kern(tmp);
- WRITE_ONCE(tmp, next);
+ WRITE_ONCE(*pprev, next);
if (next) {
cast_user(pprev);
cast_kern(next);
diff --git a/tools/testing/selftests/bpf/bpf_arena_strsearch.h b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
new file mode 100644
index 000000000000..c1b6eaa905bb
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+__noinline int bpf_arena_strlen(const char __arena *s __arg_arena)
+{
+ const char __arena *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ cond_break;
+ return sc - s;
+}
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match. The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns. Thus, it
+ * does not preprocess the patterns. It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+__noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena)
+{
+ /*
+ * Backtrack to previous * on mismatch and retry starting one
+ * character later in the string. Because * matches all characters
+ * (no exception for /), it can be easily proved that there's
+ * never a need to backtrack multiple levels.
+ */
+ char const __arena *back_pat = NULL, *back_str;
+
+ /*
+ * Loop over each token (character or class) in pat, matching
+ * it against the remaining unmatched tail of str. Return false
+ * on mismatch, or true after matching the trailing nul bytes.
+ */
+ for (;;) {
+ unsigned char c = *str++;
+ unsigned char d = *pat++;
+
+ switch (d) {
+ case '?': /* Wildcard: anything but nul */
+ if (c == '\0')
+ return false;
+ break;
+ case '*': /* Any-length wildcard */
+ if (*pat == '\0') /* Optimize trailing * case */
+ return true;
+ back_pat = pat;
+ back_str = --str; /* Allow zero-length match */
+ break;
+ case '[': { /* Character class */
+ bool match = false, inverted = (*pat == '!');
+ char const __arena *class = pat + inverted;
+ unsigned char a = *class++;
+
+ /*
+ * Iterate over each span in the character class.
+ * A span is either a single character a, or a
+ * range a-b. The first span may begin with ']'.
+ */
+ do {
+ unsigned char b = a;
+
+ if (a == '\0') /* Malformed */
+ goto literal;
+
+ if (class[0] == '-' && class[1] != ']') {
+ b = class[1];
+
+ if (b == '\0')
+ goto literal;
+
+ class += 2;
+ /* Any special action if a > b? */
+ }
+ match |= (a <= c && c <= b);
+ cond_break;
+ } while ((a = *class++) != ']');
+
+ if (match == inverted)
+ goto backtrack;
+ pat = class;
+ }
+ break;
+ case '\\':
+ d = *pat++;
+ __attribute__((__fallthrough__));
+ default: /* Literal character */
+literal:
+ if (c == d) {
+ if (d == '\0')
+ return true;
+ break;
+ }
+backtrack:
+ if (c == '\0' || !back_pat)
+ return false; /* No point continuing */
+ /* Try again from last *, one character later in str. */
+ pat = back_pat;
+ str = ++back_str;
+ break;
+ }
+ cond_break;
+ }
+ return false;
+}
diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h
new file mode 100644
index 000000000000..c550e5711967
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_atomic.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ATOMIC_H
+#define BPF_ATOMIC_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+extern bool CONFIG_X86_64 __kconfig __weak;
+
+/*
+ * __unqual_typeof(x) - Declare an unqualified scalar type, leaving
+ * non-scalar types unchanged,
+ *
+ * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char'
+ * is not type-compatible with 'signed char', and we define a separate case.
+ *
+ * This is copied verbatim from kernel's include/linux/compiler_types.h, but
+ * with default expression (for pointers) changed from (x) to (typeof(x)0).
+ *
+ * This is because LLVM has a bug where for lvalue (x), it does not get rid of
+ * an extra address_space qualifier, but does in case of rvalue (typeof(x)0).
+ * Hence, for pointers, we need to create an rvalue expression to get the
+ * desired type. See https://github.com/llvm/llvm-project/issues/53400.
+ */
+#define __scalar_type_to_expr_cases(type) \
+ unsigned type : (unsigned type)0, signed type : (signed type)0
+
+#define __unqual_typeof(x) \
+ typeof(_Generic((x), \
+ char: (char)0, \
+ __scalar_type_to_expr_cases(char), \
+ __scalar_type_to_expr_cases(short), \
+ __scalar_type_to_expr_cases(int), \
+ __scalar_type_to_expr_cases(long), \
+ __scalar_type_to_expr_cases(long long), \
+ default: (typeof(x))0))
+
+/* No-op for BPF */
+#define cpu_relax() ({})
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val))
+
+#define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new)
+
+#define try_cmpxchg(p, pold, new) \
+ ({ \
+ __unqual_typeof(*(pold)) __o = *(pold); \
+ __unqual_typeof(*(p)) __r = cmpxchg(p, __o, new); \
+ if (__r != __o) \
+ *(pold) = __r; \
+ __r == __o; \
+ })
+
+#define try_cmpxchg_relaxed(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define try_cmpxchg_acquire(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define smp_mb() \
+ ({ \
+ volatile unsigned long __val; \
+ __sync_fetch_and_add(&__val, 0); \
+ })
+
+#define smp_rmb() \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ else \
+ barrier(); \
+ })
+
+#define smp_wmb() \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ else \
+ barrier(); \
+ })
+
+/* Control dependency provides LOAD->STORE, provide LOAD->LOAD */
+#define smp_acquire__after_ctrl_dep() ({ smp_rmb(); })
+
+#define smp_load_acquire(p) \
+ ({ \
+ __unqual_typeof(*(p)) __v = READ_ONCE(*(p)); \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ barrier(); \
+ __v; \
+ })
+
+#define smp_store_release(p, val) \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ barrier(); \
+ WRITE_ONCE(*(p), val); \
+ })
+
+#define smp_cond_load_relaxed_label(p, cond_expr, label) \
+ ({ \
+ typeof(p) __ptr = (p); \
+ __unqual_typeof(*(p)) VAL; \
+ for (;;) { \
+ VAL = (__unqual_typeof(*(p)))READ_ONCE(*__ptr); \
+ if (cond_expr) \
+ break; \
+ cond_break_label(label); \
+ cpu_relax(); \
+ } \
+ (typeof(*(p)))VAL; \
+ })
+
+#define smp_cond_load_acquire_label(p, cond_expr, label) \
+ ({ \
+ __unqual_typeof(*p) __val = \
+ smp_cond_load_relaxed_label(p, cond_expr, label); \
+ smp_acquire__after_ctrl_dep(); \
+ (typeof(*(p)))__val; \
+ })
+
+#define atomic_read(p) READ_ONCE((p)->counter)
+
+#define atomic_cond_read_relaxed_label(p, cond_expr, label) \
+ smp_cond_load_relaxed_label(&(p)->counter, cond_expr, label)
+
+#define atomic_cond_read_acquire_label(p, cond_expr, label) \
+ smp_cond_load_acquire_label(&(p)->counter, cond_expr, label)
+
+#define atomic_try_cmpxchg_relaxed(p, pold, new) \
+ try_cmpxchg_relaxed(&(p)->counter, pold, new)
+
+#define atomic_try_cmpxchg_acquire(p, pold, new) \
+ try_cmpxchg_acquire(&(p)->counter, pold, new)
+
+#endif /* BPF_ATOMIC_H */
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index cd8ecd39c3f3..2cd9165c7348 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -219,7 +219,7 @@ extern void bpf_put_file(struct file *file) __ksym;
* including the NULL termination character, stored in the supplied
* buffer. On error, a negative integer is returned.
*/
-extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __ksym;
+extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym;
/* This macro must be used to mark the exception callback corresponding to the
* main program. For example:
@@ -368,12 +368,12 @@ l_true: \
ret; \
})
-#define cond_break \
+#define __cond_break(expr) \
({ __label__ l_break, l_continue; \
asm volatile goto("may_goto %l[l_break]" \
:::: l_break); \
goto l_continue; \
- l_break: break; \
+ l_break: expr; \
l_continue:; \
})
#else
@@ -392,7 +392,7 @@ l_true: \
ret; \
})
-#define cond_break \
+#define __cond_break(expr) \
({ __label__ l_break, l_continue; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
@@ -400,7 +400,7 @@ l_true: \
.short 0" \
:::: l_break); \
goto l_continue; \
- l_break: break; \
+ l_break: expr; \
l_continue:; \
})
#else
@@ -418,7 +418,7 @@ l_true: \
ret; \
})
-#define cond_break \
+#define __cond_break(expr) \
({ __label__ l_break, l_continue; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
@@ -426,12 +426,15 @@ l_true: \
.short 0" \
:::: l_break); \
goto l_continue; \
- l_break: break; \
+ l_break: expr; \
l_continue:; \
})
#endif
#endif
+#define cond_break __cond_break(break)
+#define cond_break_label(label) __cond_break(goto label)
+
#ifndef bpf_nop_mov
#define bpf_nop_mov(var) \
asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
@@ -588,4 +591,66 @@ extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym
extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym;
extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym;
+struct bpf_iter_dmabuf;
+extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
+
+extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
+ struct bpf_dynptr *value_p) __weak __ksym;
+
+#define PREEMPT_BITS 8
+#define SOFTIRQ_BITS 8
+#define HARDIRQ_BITS 4
+#define NMI_BITS 4
+
+#define PREEMPT_SHIFT 0
+#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x) ((1UL << (x))-1)
+
+#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
+
+extern bool CONFIG_PREEMPT_RT __kconfig __weak;
+#ifdef bpf_target_x86
+extern const int __preempt_count __ksym;
+#endif
+
+struct task_struct___preempt_rt {
+ int softirq_disable_cnt;
+} __attribute__((preserve_access_index));
+
+static inline int get_preempt_count(void)
+{
+#if defined(bpf_target_x86)
+ return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+#elif defined(bpf_target_arm64)
+ return bpf_get_current_task_btf()->thread_info.preempt.count;
+#endif
+ return 0;
+}
+
+/* Description
+ * Report whether it is in interrupt context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_interrupt(void)
+{
+ struct task_struct___preempt_rt *tsk;
+ int pcnt;
+
+ pcnt = get_preempt_count();
+ if (!CONFIG_PREEMPT_RT)
+ return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK);
+
+ tsk = (void *) bpf_get_current_task_btf();
+ return (pcnt & (NMI_MASK | HARDIRQ_MASK)) |
+ (tsk->softirq_disable_cnt & SOFTIRQ_MASK);
+}
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 2eb3483f2fb0..e0189254bb6e 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -19,14 +19,17 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym __weak;
+extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
/* Description
* Obtain a read-only pointer to the dynptr's data
* Returns
* Either a direct pointer to the dynptr data or a pointer to the user-provided
* buffer if unable to obtain a direct pointer
*/
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
- void *buffer, __u32 buffer__szk) __ksym __weak;
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset,
+ void *buffer, __u64 buffer__szk) __ksym __weak;
/* Description
* Obtain a read-write pointer to the dynptr's data
@@ -34,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
* Either a direct pointer to the dynptr data or a pointer to the user-provided
* buffer if unable to obtain a direct pointer
*/
-extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
- void *buffer, __u32 buffer__szk) __ksym __weak;
+extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer,
+ __u64 buffer__szk) __ksym __weak;
-extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak;
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak;
extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
-extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
+extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
/* Description
@@ -69,7 +72,7 @@ extern int bpf_get_file_xattr(struct file *file, const char *name,
struct bpf_dynptr *value_ptr) __ksym;
extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
extern void bpf_key_put(struct bpf_key *key) __ksym;
extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
@@ -87,4 +90,9 @@ struct dentry;
*/
extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name,
struct bpf_dynptr *value_ptr) __ksym __weak;
+
+extern int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str,
+ const struct bpf_dynptr *value_p, int flags) __ksym __weak;
+extern int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) __ksym __weak;
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 5f6963a320d7..4bc2d25f33e1 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -67,6 +67,9 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
#define sys_gettid() syscall(SYS_gettid)
#endif
+/* and poison usage to ensure it does not creep back in. */
+#pragma GCC poison gettid
+
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
diff --git a/tools/testing/selftests/bpf/cap_helpers.c b/tools/testing/selftests/bpf/cap_helpers.c
index d5ac507401d7..98f840c3a38f 100644
--- a/tools/testing/selftests/bpf/cap_helpers.c
+++ b/tools/testing/selftests/bpf/cap_helpers.c
@@ -19,7 +19,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps)
err = capget(&hdr, data);
if (err)
- return err;
+ return -errno;
if (old_caps)
*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
@@ -32,7 +32,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps)
data[1].effective |= cap1;
err = capset(&hdr, data);
if (err)
- return err;
+ return -errno;
return 0;
}
@@ -49,7 +49,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps)
err = capget(&hdr, data);
if (err)
- return err;
+ return -errno;
if (old_caps)
*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
@@ -61,7 +61,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps)
data[1].effective &= ~cap1;
err = capset(&hdr, data);
if (err)
- return err;
+ return -errno;
return 0;
}
diff --git a/tools/testing/selftests/bpf/cap_helpers.h b/tools/testing/selftests/bpf/cap_helpers.h
index 6d163530cb0f..8dcb28557f76 100644
--- a/tools/testing/selftests/bpf/cap_helpers.h
+++ b/tools/testing/selftests/bpf/cap_helpers.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/capability.h>
+#include <errno.h>
#ifndef CAP_PERFMON
#define CAP_PERFMON 38
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index e4535451322e..20cede4db3ce 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -4,6 +4,7 @@
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <sys/xattr.h>
#include <linux/limits.h>
#include <stdio.h>
#include <stdlib.h>
@@ -319,6 +320,26 @@ int join_parent_cgroup(const char *relative_path)
}
/**
+ * set_cgroup_xattr() - Set xattr on a cgroup dir
+ * @relative_path: The cgroup path, relative to the workdir, to set xattr
+ * @name: xattr name
+ * @value: xattr value
+ *
+ * This function set xattr on cgroup dir.
+ *
+ * On success, it returns 0, otherwise on failure it returns -1.
+ */
+int set_cgroup_xattr(const char *relative_path,
+ const char *name,
+ const char *value)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ return setxattr(cgroup_path, name, value, strlen(value) + 1, 0);
+}
+
+/**
* __cleanup_cgroup_environment() - Delete temporary cgroups
*
* This is a helper for cleanup_cgroup_environment() that is responsible for
@@ -391,6 +412,26 @@ void remove_cgroup(const char *relative_path)
log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
}
+/*
+ * remove_cgroup_pid() - Remove a cgroup setup by process identified by PID
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ * @pid: PID to be used to find cgroup_path
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup_pid(const char *relative_path, int pid)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path_pid(cgroup_path, relative_path, pid);
+ if (rmdir(cgroup_path))
+ log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
+}
+
/**
* create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
* @relative_path: The cgroup path, relative to the workdir, to join
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 502845160d88..3857304be874 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -19,6 +19,7 @@ int cgroup_setup_and_join(const char *relative_path);
int get_root_cgroup(void);
int create_and_get_cgroup(const char *relative_path);
void remove_cgroup(const char *relative_path);
+void remove_cgroup_pid(const char *relative_path, int pid);
unsigned long long get_cgroup_id(const char *relative_path);
int get_cgroup1_hierarchy_id(const char *subsys_name);
@@ -26,6 +27,10 @@ int join_cgroup(const char *relative_path);
int join_root_cgroup(void);
int join_parent_cgroup(const char *relative_path);
+int set_cgroup_xattr(const char *relative_path,
+ const char *name,
+ const char *value);
+
int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index c378d5d07e02..558839e3c185 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -22,6 +22,8 @@ CONFIG_CRYPTO_AES=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
CONFIG_DUMMY=y
CONFIG_DYNAMIC_FTRACE=y
CONFIG_FPROBE=y
@@ -48,6 +50,7 @@ CONFIG_IPV6_SIT=y
CONFIG_IPV6_TUNNEL=y
CONFIG_KEYS=y
CONFIG_LIRC=y
+CONFIG_LIVEPATCH=y
CONFIG_LWTUNNEL=y
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SRCVERSION_ALL=y
@@ -59,6 +62,7 @@ CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
CONFIG_NET_ACT_GACT=y
+CONFIG_NET_ACT_MIRRED=y
CONFIG_NET_ACT_SKBMOD=y
CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
@@ -71,8 +75,10 @@ CONFIG_NET_IPGRE=y
CONFIG_NET_IPGRE_DEMUX=y
CONFIG_NET_IPIP=y
CONFIG_NET_MPLS_GSO=y
+CONFIG_NET_SCH_BPF=y
CONFIG_NET_SCH_FQ=y
CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCH_HTB=y
CONFIG_NET_SCHED=y
CONFIG_NETDEVSIM=y
CONFIG_NETFILTER=y
@@ -93,6 +99,9 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NETFILTER_INGRESS=y
+CONFIG_IP_NF_IPTABLES_LEGACY=y
+CONFIG_IP6_NF_IPTABLES_LEGACY=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_FLOW_TABLE=y
CONFIG_NF_FLOW_TABLE_INET=y
CONFIG_NETFILTER_NETLINK=y
@@ -101,11 +110,15 @@ CONFIG_IP_NF_IPTABLES=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_FILTER=y
CONFIG_NF_NAT=y
+CONFIG_PACKET=y
CONFIG_RC_CORE=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_LIVEPATCH=m
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_SYN_COOKIES=y
CONFIG_TEST_BPF=m
+CONFIG_UDMABUF=y
CONFIG_USERFAULTFD=y
CONFIG_VSOCKETS=y
CONFIG_VXLAN=y
@@ -113,3 +126,8 @@ CONFIG_XDP_SOCKETS=y
CONFIG_XFRM_INTERFACE=y
CONFIG_TCP_CONG_DCTCP=y
CONFIG_TCP_CONG_BBR=y
+CONFIG_INFINIBAND=y
+CONFIG_SMC=y
+CONFIG_SMC_HS_CTRL_BPF=y
+CONFIG_DIBS=y
+CONFIG_DIBS_LO=y \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 3720b7611523..7efad36ceb26 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -31,10 +31,7 @@ CONFIG_COMPAT=y
CONFIG_CPUSETS=y
CONFIG_CRASH_DUMP=y
CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_INFO_REDUCED=n
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
@@ -46,7 +43,6 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEVTMPFS=y
CONFIG_DRM=y
-CONFIG_DUMMY=y
CONFIG_EXPERT=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -70,13 +66,11 @@ CONFIG_HZ_100=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_IKHEADERS=y
CONFIG_INET6_ESP=y
-CONFIG_INET_ESP=y
CONFIG_INET=y
CONFIG_INPUT_EVDEV=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPVLAN=y
CONFIG_JUMP_LABEL=y
@@ -97,22 +91,18 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
CONFIG_NETDEVICES=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
CONFIG_NET_VRF=y
CONFIG_NET=y
-CONFIG_NF_TABLES=y
CONFIG_NLMON=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
CONFIG_OVERLAY_FS=y
CONFIG_PACKET_DIAG=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI_HOST_GENERIC=y
@@ -149,7 +139,6 @@ CONFIG_TASK_XACCT=y
CONFIG_TCG_TIS=y
CONFIG_TCG_TPM=y
CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
CONFIG_TLS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS=y
@@ -158,10 +147,8 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_TUN=y
CONFIG_UNIX=y
CONFIG_UPROBES=y
-CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el
new file mode 100644
index 000000000000..b53afb5e0b71
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.ppc64el
@@ -0,0 +1,92 @@
+CONFIG_ALTIVEC=y
+CONFIG_AUDIT=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CPUSETS=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_FS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FRAME_POINTER=y
+CONFIG_FRAME_WARN=1280
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_INET=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_MEMCG=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NONPORTABLE=y
+CONFIG_NR_CPUS=256
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PPC64=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_PSERIES=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SMP=y
+CONFIG_SOC_VIRT=y
+CONFIG_SYSVIPC=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_THREAD_SHIFT=14
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_VSX=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.riscv64 b/tools/testing/selftests/bpf/config.riscv64
index bb7043a80e1a..7bee24a79a71 100644
--- a/tools/testing/selftests/bpf/config.riscv64
+++ b/tools/testing/selftests/bpf/config.riscv64
@@ -48,7 +48,6 @@ CONFIG_NET_VRF=y
CONFIG_NONPORTABLE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 706931a8c2c6..db61878148e4 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -22,10 +22,7 @@ CONFIG_CHECKPOINT_RESTORE=y
CONFIG_CPUSETS=y
CONFIG_CRASH_DUMP=y
CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_NOTIFIERS=y
@@ -56,11 +53,9 @@ CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_IKHEADERS=y
CONFIG_INET6_ESP=y
CONFIG_INET=y
-CONFIG_INET_ESP=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPVLAN=y
CONFIG_JUMP_LABEL=y
@@ -83,18 +78,14 @@ CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET=y
CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
CONFIG_NET_VRF=y
CONFIG_NETDEVICES=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
-CONFIG_NF_TABLES=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
@@ -119,7 +110,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_TASK_XACCT=y
CONFIG_TASKSTATS=y
CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
CONFIG_TLS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -128,10 +118,8 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_TUN=y
CONFIG_UNIX=y
CONFIG_UPROBES=y
-CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 5680befae8c6..42ad817b00ae 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -39,13 +39,11 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPUSETS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_BLAKE2B=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_XXHASH=y
CONFIG_DCB=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEFAULT_FQ_CODEL=y
@@ -105,12 +103,10 @@ CONFIG_HZ_1000=y
CONFIG_INET=y
CONFIG_INPUT_EVDEV=y
CONFIG_INTEL_POWERCLAMP=y
-CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_IP_ROUTE_MULTIPATH=y
@@ -163,7 +159,6 @@ CONFIG_NUMA=y
CONFIG_NUMA_BALANCING=y
CONFIG_NVMEM=y
CONFIG_OSF_PARTITION=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
@@ -221,7 +216,6 @@ CONFIG_VALIDATE_FS_PARSER=y
CONFIG_VETH=y
CONFIG_VIRT_DRIVERS=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_X86_CPUID=y
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
index 66191ae9863c..79c3ccadb962 100644
--- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -120,11 +120,12 @@ static void validate_fetch_results(int outer_map_fd,
static void fetch_and_validate(int outer_map_fd,
struct bpf_map_batch_opts *opts,
- __u32 batch_size, bool delete_entries)
+ __u32 batch_size, bool delete_entries,
+ bool has_holes)
{
- __u32 *fetched_keys, *fetched_values, total_fetched = 0;
- __u32 batch_key = 0, fetch_count, step_size;
- int err, max_entries = OUTER_MAP_ENTRIES;
+ int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes;
+ __u32 *fetched_keys, *fetched_values, total_fetched = 0, i;
+ __u32 batch_key = 0, fetch_count, step_size = batch_size;
__u32 value_size = sizeof(__u32);
/* Total entries needs to be fetched */
@@ -134,9 +135,8 @@ static void fetch_and_validate(int outer_map_fd,
"Memory allocation failed for fetched_keys or fetched_values",
"error=%s\n", strerror(errno));
- for (step_size = batch_size;
- step_size <= max_entries;
- step_size += batch_size) {
+ /* hash map may not always return full batch */
+ for (i = 0; i < OUTER_MAP_ENTRIES; i++) {
fetch_count = step_size;
err = delete_entries
? bpf_map_lookup_and_delete_batch(outer_map_fd,
@@ -155,6 +155,7 @@ static void fetch_and_validate(int outer_map_fd,
if (err && errno == ENOSPC) {
/* Fetch again with higher batch size */
total_fetched = 0;
+ step_size += batch_size;
continue;
}
@@ -184,18 +185,19 @@ static void fetch_and_validate(int outer_map_fd,
}
static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
- enum bpf_map_type inner_map_type)
+ enum bpf_map_type inner_map_type,
+ bool has_holes)
{
+ __u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes;
__u32 *outer_map_keys, *inner_map_fds;
- __u32 max_entries = OUTER_MAP_ENTRIES;
LIBBPF_OPTS(bpf_map_batch_opts, opts);
__u32 value_size = sizeof(__u32);
int batch_size[2] = {5, 10};
__u32 map_index, op_index;
int outer_map_fd, ret;
- outer_map_keys = calloc(max_entries, value_size);
- inner_map_fds = calloc(max_entries, value_size);
+ outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size);
+ inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size);
CHECK((!outer_map_keys || !inner_map_fds),
"Memory allocation failed for outer_map_keys or inner_map_fds",
"error=%s\n", strerror(errno));
@@ -209,6 +211,24 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
? 9 : 1000) - map_index;
+ /* This condition is only meaningful for array of maps.
+ *
+ * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say
+ * max_entries is short for n, then outer_map_keys looks like:
+ *
+ * [n, n-1, ... 2, 1]
+ *
+ * We change it to
+ *
+ * [n, n-1, ... 2, 0]
+ *
+ * So it will leave key 1 as a hole. It will serve to test the
+ * correctness when batch on an array: a "non-exist" key might be
+ * actually allocated and returned from key iteration.
+ */
+ if (has_holes)
+ outer_map_keys[max_entries - 1]--;
+
/* batch operation - map_update */
ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
inner_map_fds, &max_entries, &opts);
@@ -219,15 +239,17 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
/* batch operation - map_lookup */
for (op_index = 0; op_index < 2; ++op_index)
fetch_and_validate(outer_map_fd, &opts,
- batch_size[op_index], false);
+ batch_size[op_index], false,
+ has_holes);
/* batch operation - map_lookup_delete */
if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
fetch_and_validate(outer_map_fd, &opts,
- max_entries, true /*delete*/);
+ max_entries, true /*delete*/,
+ has_holes);
/* close all map fds */
- for (map_index = 0; map_index < max_entries; map_index++)
+ for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++)
close(inner_map_fds[map_index]);
close(outer_map_fd);
@@ -237,16 +259,20 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
void test_map_in_map_batch_ops_array(void)
{
- _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
printf("%s:PASS with inner ARRAY map\n", __func__);
- _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false);
printf("%s:PASS with inner HASH map\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true);
+ printf("%s:PASS with inner ARRAY map with holes\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true);
+ printf("%s:PASS with inner HASH map with holes\n", __func__);
}
void test_map_in_map_batch_ops_hash(void)
{
- _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
printf("%s:PASS with inner ARRAY map\n", __func__);
- _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false);
printf("%s:PASS with inner HASH map\n", __func__);
}
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 80844a5fb1fe..0a6a5561bed3 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -97,7 +97,7 @@ int settimeo(int fd, int timeout_ms)
int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
const struct network_helper_opts *opts)
{
- int fd;
+ int on = 1, fd;
if (!opts)
opts = &default_opts;
@@ -111,6 +111,12 @@ int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t a
if (settimeo(fd, opts->timeout_ms))
goto error_close;
+ if (type == SOCK_STREAM &&
+ setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) {
+ log_err("Failed to enable SO_REUSEADDR");
+ goto error_close;
+ }
+
if (opts->post_socket_cb &&
opts->post_socket_cb(fd, opts->cb_opts)) {
log_err("Failed to call post_socket_cb");
@@ -446,6 +452,23 @@ char *ping_command(int family)
return "ping";
}
+int append_tid(char *str, size_t sz)
+{
+ size_t end;
+
+ if (!str)
+ return -1;
+
+ end = strlen(str);
+ if (end + 8 > sz)
+ return -1;
+
+ sprintf(&str[end], "%07ld", sys_gettid());
+ str[end + 7] = '\0';
+
+ return 0;
+}
+
int remove_netns(const char *name)
{
char *cmd;
@@ -548,6 +571,34 @@ void close_netns(struct nstoken *token)
free(token);
}
+int open_tuntap(const char *dev_name, bool need_mac)
+{
+ int err = 0;
+ struct ifreq ifr;
+ int fd = open("/dev/net/tun", O_RDWR);
+
+ if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)"))
+ return -1;
+
+ ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
+ strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
+ ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
+ close(fd);
+ return -1;
+ }
+
+ err = fcntl(fd, F_SETFL, O_NONBLOCK);
+ if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
int get_socket_local_port(int sock_fd)
{
struct sockaddr_storage addr;
@@ -721,6 +772,50 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes)
return err;
}
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd)
+{
+ int ifindex, ret;
+
+ if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0,
+ "at least one program fd is valid"))
+ return -1;
+
+ ifindex = if_nametoindex(dev);
+ if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+ return -1;
+
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
+ .priority = 1, .prog_fd = ingress_fd);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
+ .priority = 1, .prog_fd = egress_fd);
+
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ if (ingress_fd >= 0) {
+ hook.attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(&hook, &opts1);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ }
+
+ if (egress_fd >= 0) {
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = bpf_tc_attach(&hook, &opts2);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx {
pcap_t *pcap;
@@ -733,6 +828,36 @@ struct tmonitor_ctx {
int pcap_fd;
};
+static int __base_pr(const char *format, va_list args)
+{
+ return vfprintf(stdout, format, args);
+}
+
+static tm_print_fn_t __tm_pr = __base_pr;
+
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+ tm_print_fn_t old_print_fn;
+
+ old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
+
+ return old_print_fn;
+}
+
+void tm_print(const char *format, ...)
+{
+ tm_print_fn_t print_fn;
+ va_list args;
+
+ print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
+ if (!print_fn)
+ return;
+
+ va_start(args, format);
+ print_fn(format, args);
+ va_end(args);
+}
+
/* Is this packet captured with a Ethernet protocol type? */
static bool is_ethernet(const u_char *packet)
{
@@ -750,7 +875,7 @@ static bool is_ethernet(const u_char *packet)
case 770: /* ARPHRD_FRAD */
case 778: /* ARPHDR_IPGRE */
case 803: /* ARPHRD_IEEE80211_RADIOTAP */
- printf("Packet captured: arphdr_type=%d\n", arphdr_type);
+ tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
return false;
}
return true;
@@ -771,12 +896,13 @@ static const char *pkt_type_str(u16 pkt_type)
return "Unknown";
}
+#define MAX_FLAGS_STRLEN 21
/* Show the information of the transport layer in the packet */
static void show_transport(const u_char *packet, u16 len, u32 ifindex,
const char *src_addr, const char *dst_addr,
u16 proto, bool ipv6, u8 pkt_type)
{
- char *ifname, _ifname[IF_NAMESIZE];
+ char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = "";
const char *transport_str;
u16 src_port, dst_port;
struct udphdr *udp;
@@ -799,47 +925,39 @@ static void show_transport(const u_char *packet, u16 len, u32 ifindex,
dst_port = ntohs(tcp->dest);
transport_str = "TCP";
} else if (proto == IPPROTO_ICMP) {
- printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
- ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
- packet[0], packet[1]);
+ tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
+ ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+ packet[0], packet[1]);
return;
} else if (proto == IPPROTO_ICMPV6) {
- printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
- ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
- packet[0], packet[1]);
+ tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
+ ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+ packet[0], packet[1]);
return;
} else {
- printf("%-7s %-3s %s %s > %s: protocol %d\n",
- ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
- src_addr, dst_addr, proto);
+ tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
+ ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
+ src_addr, dst_addr, proto);
return;
}
/* TCP or UDP*/
- flockfile(stdout);
+ if (proto == IPPROTO_TCP)
+ snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s",
+ tcp->fin ? ", FIN" : "",
+ tcp->syn ? ", SYN" : "",
+ tcp->rst ? ", RST" : "",
+ tcp->ack ? ", ACK" : "");
+
if (ipv6)
- printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d",
- ifname, pkt_type_str(pkt_type), src_addr, src_port,
- dst_addr, dst_port, transport_str, len);
+ tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
+ ifname, pkt_type_str(pkt_type), src_addr, src_port,
+ dst_addr, dst_port, transport_str, len, flags);
else
- printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d",
- ifname, pkt_type_str(pkt_type), src_addr, src_port,
- dst_addr, dst_port, transport_str, len);
-
- if (proto == IPPROTO_TCP) {
- if (tcp->fin)
- printf(", FIN");
- if (tcp->syn)
- printf(", SYN");
- if (tcp->rst)
- printf(", RST");
- if (tcp->ack)
- printf(", ACK");
- }
-
- printf("\n");
- funlockfile(stdout);
+ tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
+ ifname, pkt_type_str(pkt_type), src_addr, src_port,
+ dst_addr, dst_port, transport_str, len, flags);
}
static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
@@ -954,8 +1072,8 @@ static void *traffic_monitor_thread(void *arg)
ifname = _ifname;
}
- printf("%-7s %-3s Unknown network protocol type 0x%x\n",
- ifname, pkt_type_str(ptype), proto);
+ tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
+ ifname, pkt_type_str(ptype), proto);
}
}
@@ -1155,8 +1273,9 @@ void traffic_monitor_stop(struct tmonitor_ctx *ctx)
write(ctx->wake_fd, &w, sizeof(w));
pthread_join(ctx->thread, NULL);
- printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
+ tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
traffic_monitor_release(ctx);
}
+
#endif /* TRAFFIC_MONITOR */
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index ebec8a8d6f81..79a010c88e11 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -8,6 +8,7 @@
typedef __u16 __sum16;
#include <linux/if_ether.h>
#include <linux/if_packet.h>
+#include <linux/if_tun.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/ethtool.h>
@@ -17,6 +18,7 @@ typedef __u16 __sum16;
#include <netinet/udp.h>
#include <bpf/bpf_endian.h>
#include <net/if.h>
+#include <stdio.h>
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
@@ -85,6 +87,8 @@ int get_socket_local_port(int sock_fd);
int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
+int open_tuntap(const char *dev_name, bool need_mac);
+
struct nstoken;
/**
* open_netns() - Switch to specified network namespace by name.
@@ -98,6 +102,18 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes);
int make_netns(const char *name);
int remove_netns(const char *name);
+/**
+ * append_tid() - Append thread ID to the given string.
+ *
+ * @str: string to extend
+ * @sz: string's size
+ *
+ * 8 characters are used to append the thread ID (7 digits + '\0')
+ *
+ * Returns -1 on errors, 0 otherwise
+ */
+int append_tid(char *str, size_t sz);
+
static __u16 csum_fold(__u32 csum)
{
csum = (csum & 0xffff) + (csum >> 16);
@@ -237,10 +253,29 @@ static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h,
struct tmonitor_ctx;
+typedef int (*tm_print_fn_t)(const char *format, va_list args);
+
+/**
+ * tc_prog_attach - attach BPF program(s) to an interface
+ *
+ * Takes file descriptors pointing to at least one, at most two BPF
+ * programs, and attach those programs to an interface ingress, egress or
+ * both.
+ *
+ * @dev: string containing the interface name
+ * @ingress_fd: file descriptor of the program to attach to interface ingress
+ * @egress_fd: file descriptor of the program to attach to interface egress
+ *
+ * Returns 0 on success, -1 if no valid file descriptor has been found, if
+ * the interface name is invalid or if an error ocurred during attach.
+ */
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd);
+
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name);
void traffic_monitor_stop(struct tmonitor_ctx *ctx);
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn);
#else
static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name)
@@ -251,6 +286,11 @@ static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, cons
static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
{
}
+
+static inline tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+ return NULL;
+}
#endif
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 4ebd0da898f5..24c509ce4e5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -42,11 +42,11 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "2"},
- {1, "R3_w", "4"},
- {2, "R3_w", "8"},
- {3, "R3_w", "16"},
- {4, "R3_w", "32"},
+ {0, "R3", "2"},
+ {1, "R3", "4"},
+ {2, "R3", "8"},
+ {3, "R3", "16"},
+ {4, "R3", "32"},
},
},
{
@@ -70,17 +70,17 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "1"},
- {1, "R3_w", "2"},
- {2, "R3_w", "4"},
- {3, "R3_w", "8"},
- {4, "R3_w", "16"},
- {5, "R3_w", "1"},
- {6, "R4_w", "32"},
- {7, "R4_w", "16"},
- {8, "R4_w", "8"},
- {9, "R4_w", "4"},
- {10, "R4_w", "2"},
+ {0, "R3", "1"},
+ {1, "R3", "2"},
+ {2, "R3", "4"},
+ {3, "R3", "8"},
+ {4, "R3", "16"},
+ {5, "R3", "1"},
+ {6, "R4", "32"},
+ {7, "R4", "16"},
+ {8, "R4", "8"},
+ {9, "R4", "4"},
+ {10, "R4", "2"},
},
},
{
@@ -99,12 +99,12 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "4"},
- {1, "R3_w", "8"},
- {2, "R3_w", "10"},
- {3, "R4_w", "8"},
- {4, "R4_w", "12"},
- {5, "R4_w", "14"},
+ {0, "R3", "4"},
+ {1, "R3", "8"},
+ {2, "R3", "10"},
+ {3, "R4", "8"},
+ {4, "R4", "12"},
+ {5, "R4", "14"},
},
},
{
@@ -121,10 +121,10 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "7"},
- {1, "R3_w", "7"},
- {2, "R3_w", "14"},
- {3, "R3_w", "56"},
+ {0, "R3", "7"},
+ {1, "R3", "7"},
+ {2, "R3", "14"},
+ {3, "R3", "56"},
},
},
@@ -162,19 +162,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R0_w", "pkt(off=8,r=8)"},
- {6, "R3_w", "var_off=(0x0; 0xff)"},
- {7, "R3_w", "var_off=(0x0; 0x1fe)"},
- {8, "R3_w", "var_off=(0x0; 0x3fc)"},
- {9, "R3_w", "var_off=(0x0; 0x7f8)"},
- {10, "R3_w", "var_off=(0x0; 0xff0)"},
- {12, "R3_w", "pkt_end()"},
- {17, "R4_w", "var_off=(0x0; 0xff)"},
- {18, "R4_w", "var_off=(0x0; 0x1fe0)"},
- {19, "R4_w", "var_off=(0x0; 0xff0)"},
- {20, "R4_w", "var_off=(0x0; 0x7f8)"},
- {21, "R4_w", "var_off=(0x0; 0x3fc)"},
- {22, "R4_w", "var_off=(0x0; 0x1fe)"},
+ {6, "R0", "pkt(off=8,r=8)"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R3", "var_off=(0x0; 0x1fe)"},
+ {8, "R3", "var_off=(0x0; 0x3fc)"},
+ {9, "R3", "var_off=(0x0; 0x7f8)"},
+ {10, "R3", "var_off=(0x0; 0xff0)"},
+ {12, "R3", "pkt_end()"},
+ {17, "R4", "var_off=(0x0; 0xff)"},
+ {18, "R4", "var_off=(0x0; 0x1fe0)"},
+ {19, "R4", "var_off=(0x0; 0xff0)"},
+ {20, "R4", "var_off=(0x0; 0x7f8)"},
+ {21, "R4", "var_off=(0x0; 0x3fc)"},
+ {22, "R4", "var_off=(0x0; 0x1fe)"},
},
},
{
@@ -195,16 +195,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R3_w", "var_off=(0x0; 0xff)"},
- {7, "R4_w", "var_off=(0x0; 0xff)"},
- {8, "R4_w", "var_off=(0x0; 0xff)"},
- {9, "R4_w", "var_off=(0x0; 0xff)"},
- {10, "R4_w", "var_off=(0x0; 0x1fe)"},
- {11, "R4_w", "var_off=(0x0; 0xff)"},
- {12, "R4_w", "var_off=(0x0; 0x3fc)"},
- {13, "R4_w", "var_off=(0x0; 0xff)"},
- {14, "R4_w", "var_off=(0x0; 0x7f8)"},
- {15, "R4_w", "var_off=(0x0; 0xff0)"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R4", "var_off=(0x0; 0xff)"},
+ {8, "R4", "var_off=(0x0; 0xff)"},
+ {9, "R4", "var_off=(0x0; 0xff)"},
+ {10, "R4", "var_off=(0x0; 0x1fe)"},
+ {11, "R4", "var_off=(0x0; 0xff)"},
+ {12, "R4", "var_off=(0x0; 0x3fc)"},
+ {13, "R4", "var_off=(0x0; 0xff)"},
+ {14, "R4", "var_off=(0x0; 0x7f8)"},
+ {15, "R4", "var_off=(0x0; 0xff0)"},
},
},
{
@@ -235,14 +235,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {2, "R5_w", "pkt(r=0)"},
- {4, "R5_w", "pkt(off=14,r=0)"},
- {5, "R4_w", "pkt(off=14,r=0)"},
+ {2, "R5", "pkt(r=0)"},
+ {4, "R5", "pkt(off=14,r=0)"},
+ {5, "R4", "pkt(off=14,r=0)"},
{9, "R2", "pkt(r=18)"},
{10, "R5", "pkt(off=14,r=18)"},
- {10, "R4_w", "var_off=(0x0; 0xff)"},
- {13, "R4_w", "var_off=(0x0; 0xffff)"},
- {14, "R4_w", "var_off=(0x0; 0xffff)"},
+ {10, "R4", "var_off=(0x0; 0xff)"},
+ {13, "R4", "var_off=(0x0; 0xffff)"},
+ {14, "R4", "var_off=(0x0; 0xffff)"},
},
},
{
@@ -299,12 +299,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {7, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
- {11, "R5_w", "pkt(id=1,off=14,"},
+ {11, "R5", "pkt(id=1,off=14,"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (14) which is 16. Then the variable
@@ -320,12 +320,12 @@ static struct bpf_align_test tests[] = {
* instruction to validate R5 state. We also check
* that R4 is what it should be in such case.
*/
- {18, "R4_w", "var_off=(0x0; 0x3fc)"},
- {18, "R5_w", "var_off=(0x0; 0x3fc)"},
+ {18, "R4", "var_off=(0x0; 0x3fc)"},
+ {18, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {19, "R5_w", "pkt(id=2,off=14,"},
+ {19, "R5", "pkt(id=2,off=14,"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
@@ -337,21 +337,21 @@ static struct bpf_align_test tests[] = {
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w", "pkt(off=14,r=8)"},
+ {26, "R5", "pkt(off=14,r=8)"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n). See comment for insn #18
* for R4 = R5 trick.
*/
- {28, "R4_w", "var_off=(0x0; 0x3fc)"},
- {28, "R5_w", "var_off=(0x0; 0x3fc)"},
+ {28, "R4", "var_off=(0x0; 0x3fc)"},
+ {28, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {29, "R5_w", "pkt(id=3,off=18,"},
+ {29, "R5", "pkt(id=3,off=18,"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {31, "R4_w", "var_off=(0x0; 0x7fc)"},
- {31, "R5_w", "var_off=(0x0; 0x7fc)"},
+ {31, "R4", "var_off=(0x0; 0x7fc)"},
+ {31, "R5", "var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
@@ -397,12 +397,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {7, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {8, "R6_w", "var_off=(0x2; 0x7fc)"},
+ {8, "R6", "var_off=(0x2; 0x7fc)"},
/* Packet pointer has (4n+2) offset */
- {11, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {11, "R5", "var_off=(0x2; 0x7fc)"},
{12, "R4", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -414,11 +414,11 @@ static struct bpf_align_test tests[] = {
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
- {17, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {17, "R6", "var_off=(0x0; 0x3fc)"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5_w", "var_off=(0x2; 0xffc)"},
+ {19, "R5", "var_off=(0x2; 0xffc)"},
{20, "R4", "var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -459,18 +459,18 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {3, "R5_w", "pkt_end()"},
+ {3, "R5", "pkt_end()"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
+ {5, "R5", "var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"},
+ {6, "R5", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
{9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* packet pointer + nonnegative (4n+2) */
- {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
- {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {11, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {12, "R4", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -478,7 +478,7 @@ static struct bpf_align_test tests[] = {
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
- {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {15, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
}
},
{
@@ -513,12 +513,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {8, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {8, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {9, "R6_w", "var_off=(0x2; 0x7fc)"},
+ {9, "R6", "var_off=(0x2; 0x7fc)"},
/* New unknown value in R7 is (4n) */
- {10, "R7_w", "var_off=(0x0; 0x3fc)"},
+ {10, "R7", "var_off=(0x0; 0x3fc)"},
/* Subtracting it from R6 blows our unsigned bounds */
{11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
@@ -566,16 +566,16 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {9, "R6_w", "var_off=(0x0; 0x3c)"},
+ {6, "R2", "pkt(r=8)"},
+ {9, "R6", "var_off=(0x0; 0x3c)"},
/* Adding 14 makes R6 be (4n+2) */
- {10, "R6_w", "var_off=(0x2; 0x7c)"},
+ {10, "R6", "var_off=(0x2; 0x7c)"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"},
+ {13, "R5", "var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
- {14, "R7_w", "var_off=(0x0; 0x7fc)"},
+ {14, "R7", "var_off=(0x0; 0x7fc)"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {16, "R5", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
@@ -610,9 +610,11 @@ static int do_test_single(struct bpf_align_test *test)
.log_size = sizeof(bpf_vlog),
.log_level = 2,
);
+ const char *main_pass_start = "0: R1=ctx() R10=fp0";
const char *line_ptr;
int cur_line = -1;
int prog_len, i;
+ char *start;
int fd_prog;
int ret;
@@ -632,7 +634,13 @@ static int do_test_single(struct bpf_align_test *test)
ret = 0;
/* We make a local copy so that we can strtok() it */
strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
- line_ptr = strtok(bpf_vlog_copy, "\n");
+ start = strstr(bpf_vlog_copy, main_pass_start);
+ if (!start) {
+ ret = 1;
+ printf("Can't find initial line '%s'\n", main_pass_start);
+ goto out;
+ }
+ line_ptr = strtok(start, "\n");
for (i = 0; i < MAX_MATCHES; i++) {
struct bpf_reg_match m = test->matches[i];
const char *p;
@@ -682,6 +690,7 @@ static int do_test_single(struct bpf_align_test *test)
break;
}
}
+out:
if (fd_prog >= 0)
close(fd_prog);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
index 26e7c06c6cb4..d98577a6babc 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
@@ -162,6 +162,66 @@ static void test_uaf(struct arena_atomics *skel)
ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails");
}
+static void test_load_acquire(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ if (skel->data->skip_lacq_srel_tests) {
+ printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support load-acquire\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.load_acquire);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->load_acquire8_result, 0x12,
+ "load_acquire8_result");
+ ASSERT_EQ(skel->arena->load_acquire16_result, 0x1234,
+ "load_acquire16_result");
+ ASSERT_EQ(skel->arena->load_acquire32_result, 0x12345678,
+ "load_acquire32_result");
+ ASSERT_EQ(skel->arena->load_acquire64_result, 0x1234567890abcdef,
+ "load_acquire64_result");
+}
+
+static void test_store_release(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ if (skel->data->skip_lacq_srel_tests) {
+ printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support store-release\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.store_release);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->store_release8_result, 0x12,
+ "store_release8_result");
+ ASSERT_EQ(skel->arena->store_release16_result, 0x1234,
+ "store_release16_result");
+ ASSERT_EQ(skel->arena->store_release32_result, 0x12345678,
+ "store_release32_result");
+ ASSERT_EQ(skel->arena->store_release64_result, 0x1234567890abcdef,
+ "store_release64_result");
+}
+
void test_arena_atomics(void)
{
struct arena_atomics *skel;
@@ -171,7 +231,7 @@ void test_arena_atomics(void)
if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open"))
return;
- if (skel->data->skip_tests) {
+ if (skel->data->skip_all_tests) {
printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang",
__func__);
test__skip();
@@ -198,6 +258,10 @@ void test_arena_atomics(void)
test_xchg(skel);
if (test__start_subtest("uaf"))
test_uaf(skel);
+ if (test__start_subtest("load_acquire"))
+ test_load_acquire(skel);
+ if (test__start_subtest("store_release"))
+ test_store_release(skel);
cleanup:
arena_atomics__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
new file mode 100644
index 000000000000..693fd86fbde6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+struct __qspinlock { int val; };
+typedef struct __qspinlock arena_spinlock_t;
+
+struct arena_qnode {
+ unsigned long next;
+ int count;
+ int locked;
+};
+
+#include "arena_spin_lock.skel.h"
+
+static long cpu;
+static int repeat;
+
+pthread_barrier_t barrier;
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *)arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = repeat,
+ );
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset), "cpu affinity");
+
+ err = pthread_barrier_wait(&barrier);
+ if (err != PTHREAD_BARRIER_SERIAL_THREAD && err != 0)
+ ASSERT_FALSE(true, "pthread_barrier");
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run err");
+
+ if (topts.retval == -EOPNOTSUPP)
+ goto end;
+
+ ASSERT_EQ((int)topts.retval, 0, "test_run retval");
+
+end:
+ pthread_exit(arg);
+}
+
+static void test_arena_spin_lock_size(int size)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct arena_spin_lock *skel;
+ pthread_t thread_id[16];
+ int prog_fd, i, err;
+ int nthreads;
+ void *ret;
+
+ nthreads = MIN(get_nprocs(), ARRAY_SIZE(thread_id));
+ if (nthreads < 2) {
+ test__skip();
+ return;
+ }
+
+ skel = arena_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load"))
+ return;
+
+ if (skel->data->test_skip == 2) {
+ test__skip();
+ goto end;
+ }
+ skel->bss->cs_count = size;
+ skel->bss->limit = repeat * nthreads;
+
+ ASSERT_OK(pthread_barrier_init(&barrier, NULL, nthreads), "barrier init");
+
+ prog_fd = bpf_program__fd(skel->progs.prog);
+ for (i = 0; i < nthreads; i++) {
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end_barrier;
+ }
+
+ for (i = 0; i < nthreads; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end_barrier;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end_barrier;
+ }
+
+ if (skel->data->test_skip == 3) {
+ printf("%s:SKIP: CONFIG_NR_CPUS exceed the maximum supported by arena spinlock\n",
+ __func__);
+ test__skip();
+ goto end_barrier;
+ }
+
+ ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value");
+
+end_barrier:
+ pthread_barrier_destroy(&barrier);
+end:
+ arena_spin_lock__destroy(skel);
+ return;
+}
+
+void test_arena_spin_lock(void)
+{
+ repeat = 1000;
+ if (test__start_subtest("arena_spin_lock_1"))
+ test_arena_spin_lock_size(1);
+ cpu = 0;
+ if (test__start_subtest("arena_spin_lock_1000"))
+ test_arena_spin_lock_size(1000);
+ cpu = 0;
+ repeat = 100;
+ if (test__start_subtest("arena_spin_lock_50000"))
+ test_arena_spin_lock_size(50000);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
new file mode 100644
index 000000000000..f81a0c066505
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "arena_strsearch.skel.h"
+
+static void test_arena_str(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_strsearch *skel;
+ int ret;
+
+ skel = arena_strsearch__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts);
+ ASSERT_OK(ret, "ret_add");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ }
+ arena_strsearch__destroy(skel);
+}
+
+void test_arena_strsearch(void)
+{
+ if (test__start_subtest("arena_strsearch"))
+ test_arena_str();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
index bb143de68875..e27d66b75fb1 100644
--- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -144,11 +144,17 @@ static void test_parse_test_list_file(void)
if (!ASSERT_OK(ferror(fp), "prepare tmp"))
goto out_fclose;
+ if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp"))
+ goto out_fclose;
+
init_test_filter_set(&set);
- ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file");
+ if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"))
+ goto out_fclose;
+
+ if (!ASSERT_EQ(set.cnt, 4, "test count"))
+ goto out_free_set;
- ASSERT_EQ(set.cnt, 4, "test count");
ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
@@ -158,8 +164,8 @@ static void test_parse_test_list_file(void)
ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
+out_free_set:
free_test_filter_set(&set);
-
out_fclose:
fclose(fp);
out_remove:
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 13e101f370a1..92b5f378bfb8 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -165,11 +165,17 @@ static void test_xchg(struct atomics_lskel *skel)
void test_atomics(void)
{
struct atomics_lskel *skel;
+ int err;
- skel = atomics_lskel__open_and_load();
- if (!ASSERT_OK_PTR(skel, "atomics skeleton load"))
+ skel = atomics_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "atomics skeleton open"))
return;
+ skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = atomics_lskel__load(skel);
+ if (!ASSERT_OK(err, "atomics skeleton load"))
+ goto cleanup;
+
if (skel->data->skip_tests) {
printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)",
__func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 329c7862b52d..9e77e5da7097 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -3,6 +3,7 @@
#include "test_attach_kprobe_sleepable.skel.h"
#include "test_attach_probe_manual.skel.h"
#include "test_attach_probe.skel.h"
+#include "kprobe_write_ctx.skel.h"
/* this is how USDT semaphore is actually defined, except volatile modifier */
volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
@@ -122,6 +123,110 @@ cleanup:
test_attach_probe_manual__destroy(skel);
}
+/* attach uprobe/uretprobe long event name testings */
+static void test_attach_uprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ struct test_attach_probe_manual *skel;
+ ssize_t uprobe_offset;
+ char path[PATH_MAX] = {0};
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ goto cleanup;
+
+ if (!ASSERT_GT(readlink("/proc/self/exe", path, PATH_MAX - 1), 0, "readlink"))
+ goto cleanup;
+
+ /* manual-attach uprobe/uretprobe */
+ uprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ uprobe_opts.ref_ctr_offset = 0;
+ uprobe_opts.retprobe = false;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ path,
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ uprobe_opts.retprobe = true;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ path,
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_uretprobe = uretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ struct bpf_link *kprobe_link, *kretprobe_link;
+ struct test_attach_probe_manual *skel;
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ /* manual-attach kprobe/kretprobe */
+ kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_kprobe = kprobe_link;
+
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_kretprobe = kretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+#ifdef __x86_64__
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_kprobe_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
static void test_attach_probe_auto(struct test_attach_probe *skel)
{
struct bpf_link *uprobe_err_link;
@@ -323,6 +428,13 @@ void test_attach_probe(void)
if (test__start_subtest("uprobe-ref_ctr"))
test_uprobe_ref_ctr(skel);
+ if (test__start_subtest("uprobe-long_name"))
+ test_attach_uprobe_long_event_name();
+ if (test__start_subtest("kprobe-long_name"))
+ test_attach_kprobe_long_event_name();
+ if (test__start_subtest("kprobe-write-ctx"))
+ test_attach_kprobe_write_ctx();
+
cleanup:
test_attach_probe__destroy(skel);
ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index cc184e4420f6..42b49870e520 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -6,10 +6,14 @@
#include <test_progs.h>
#include "bloom_filter_map.skel.h"
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
static void test_fail_cases(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts);
- __u32 value;
+ __u32 value = 0;
int fd, err;
/* Invalid key size */
@@ -69,6 +73,7 @@ static void test_success_cases(void)
/* Create a map */
opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE;
+ opts.numa_node = NUMA_NO_NODE;
fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 6befa870434b..75f4dff7d042 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -450,8 +450,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
attr.size = sizeof(attr);
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
- attr.freq = 1;
- attr.sample_freq = 10000;
+ attr.sample_period = 100000;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
if (!ASSERT_GE(pfd, 0, "perf_fd"))
goto cleanup;
@@ -489,10 +488,28 @@ cleanup:
bpf_link__destroy(link);
}
+static int verify_tracing_link_info(int fd, u64 cookie)
+{
+ struct bpf_link_info info;
+ int err;
+ u32 len = sizeof(info);
+
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type"))
+ return -1;
+
+ ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie");
+
+ return 0;
+}
+
static void tracing_subtest(struct test_bpf_cookie *skel)
{
__u64 cookie;
- int prog_fd;
+ int prog_fd, err;
int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1;
LIBBPF_OPTS(bpf_test_run_opts, opts);
LIBBPF_OPTS(bpf_link_create_opts, link_opts);
@@ -507,6 +524,10 @@ static void tracing_subtest(struct test_bpf_cookie *skel)
if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create"))
goto cleanup;
+ err = verify_tracing_link_info(fentry_fd, cookie);
+ if (!ASSERT_OK(err, "verify_tracing_link_info"))
+ goto cleanup;
+
cookie = 0x20000000000000L;
prog_fd = bpf_program__fd(skel->progs.fexit_test1);
link_opts.tracing.cookie = cookie;
@@ -635,10 +656,29 @@ cleanup:
bpf_link__destroy(link);
}
+static int verify_raw_tp_link_info(int fd, u64 cookie)
+{
+ struct bpf_link_info info;
+ int err;
+ u32 len = sizeof(info);
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type"))
+ return -1;
+
+ ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie");
+
+ return 0;
+}
+
static void raw_tp_subtest(struct test_bpf_cookie *skel)
{
__u64 cookie;
- int prog_fd, link_fd = -1;
+ int err, prog_fd, link_fd = -1;
struct bpf_link *link = NULL;
LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts);
@@ -656,6 +696,11 @@ static void raw_tp_subtest(struct test_bpf_cookie *skel)
goto cleanup;
usleep(1); /* trigger */
+
+ err = verify_raw_tp_link_info(link_fd, cookie);
+ if (!ASSERT_OK(err, "verify_raw_tp_link_info"))
+ goto cleanup;
+
close(link_fd); /* detach */
link_fd = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
new file mode 100644
index 000000000000..d138cc7b1bda
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+
+#include <sys/syscall.h>
+#include <bpf/bpf.h>
+
+#include "bpf_gotox.skel.h"
+
+static void __test_run(struct bpf_program *prog, void *ctx_in, size_t ctx_size_in)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .ctx_in = ctx_in,
+ .ctx_size_in = ctx_size_in,
+ );
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+}
+
+static void __subtest(struct bpf_gotox *skel, void (*check)(struct bpf_gotox *))
+{
+ if (skel->data->skip)
+ test__skip();
+ else
+ check(skel);
+}
+
+static void check_simple(struct bpf_gotox *skel,
+ struct bpf_program *prog,
+ __u64 ctx_in,
+ __u64 expected)
+{
+ skel->bss->ret_user = 0;
+
+ __test_run(prog, &ctx_in, sizeof(ctx_in));
+
+ if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+ return;
+}
+
+static void check_simple_fentry(struct bpf_gotox *skel,
+ struct bpf_program *prog,
+ __u64 ctx_in,
+ __u64 expected)
+{
+ skel->bss->in_user = ctx_in;
+ skel->bss->ret_user = 0;
+
+ /* trigger */
+ usleep(1);
+
+ if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+ return;
+}
+
+/* validate that for two loads of the same jump table libbpf generates only one map */
+static void check_one_map_two_jumps(struct bpf_gotox *skel)
+{
+ struct bpf_prog_info prog_info;
+ struct bpf_map_info map_info;
+ __u32 len;
+ __u32 map_ids[16];
+ int prog_fd, map_fd;
+ int ret;
+ int i;
+ bool seen = false;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.map_ids = (long)map_ids;
+ prog_info.nr_map_ids = ARRAY_SIZE(map_ids);
+ prog_fd = bpf_program__fd(skel->progs.one_map_two_jumps);
+ if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd(one_map_two_jumps)"))
+ return;
+
+ len = sizeof(prog_info);
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &len);
+ if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(prog_fd)"))
+ return;
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ map_fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
+ return;
+
+ len = sizeof(map_info);
+ memset(&map_info, 0, len);
+ ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &len);
+ if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(map_fd)")) {
+ close(map_fd);
+ return;
+ }
+
+ if (map_info.type == BPF_MAP_TYPE_INSN_ARRAY) {
+ if (!ASSERT_EQ(seen, false, "more than one INSN_ARRAY map")) {
+ close(map_fd);
+ return;
+ }
+ seen = true;
+ }
+ close(map_fd);
+ }
+
+ ASSERT_EQ(seen, true, "no INSN_ARRAY map");
+}
+
+static void check_one_switch(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.one_switch, in[i], out[i]);
+}
+
+static void check_one_switch_non_zero_sec_off(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.one_switch_non_zero_sec_off, in[i], out[i]);
+}
+
+static void check_two_switches(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {103, 104, 107, 205, 115, 1019, 1019};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.two_switches, in[i], out[i]);
+}
+
+static void check_big_jump_table(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 11, 27, 31, 22, 45, 99};
+ __u64 out[] = {2, 3, 4, 5, 19, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.big_jump_table, in[i], out[i]);
+}
+
+static void check_one_jump_two_maps(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {12, 15, 7 , 15, 12, 15, 15};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.one_jump_two_maps, in[i], out[i]);
+}
+
+static void check_static_global(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_static_global1, in[i], out[i]);
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_static_global2, in[i], out[i]);
+}
+
+static void check_nonstatic_global(struct bpf_gotox *skel)
+{
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_nonstatic_global1, in[i], out[i]);
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple(skel, skel->progs.use_nonstatic_global2, in[i], out[i]);
+}
+
+static void check_other_sec(struct bpf_gotox *skel)
+{
+ struct bpf_link *link;
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ link = bpf_program__attach(skel->progs.simple_test_other_sec);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple_fentry(skel, skel->progs.simple_test_other_sec, in[i], out[i]);
+
+ bpf_link__destroy(link);
+}
+
+static void check_static_global_other_sec(struct bpf_gotox *skel)
+{
+ struct bpf_link *link;
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ link = bpf_program__attach(skel->progs.use_static_global_other_sec);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple_fentry(skel, skel->progs.use_static_global_other_sec, in[i], out[i]);
+
+ bpf_link__destroy(link);
+}
+
+static void check_nonstatic_global_other_sec(struct bpf_gotox *skel)
+{
+ struct bpf_link *link;
+ __u64 in[] = {0, 1, 2, 3, 4, 5, 77};
+ __u64 out[] = {2, 3, 4, 5, 7, 19, 19};
+ int i;
+
+ link = bpf_program__attach(skel->progs.use_nonstatic_global_other_sec);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(in); i++)
+ check_simple_fentry(skel, skel->progs.use_nonstatic_global_other_sec, in[i], out[i]);
+
+ bpf_link__destroy(link);
+}
+
+void test_bpf_gotox(void)
+{
+ struct bpf_gotox *skel;
+ int ret;
+
+ skel = bpf_gotox__open();
+ if (!ASSERT_NEQ(skel, NULL, "bpf_gotox__open"))
+ return;
+
+ ret = bpf_gotox__load(skel);
+ if (!ASSERT_OK(ret, "bpf_gotox__load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("one-switch"))
+ __subtest(skel, check_one_switch);
+
+ if (test__start_subtest("one-switch-non-zero-sec-offset"))
+ __subtest(skel, check_one_switch_non_zero_sec_off);
+
+ if (test__start_subtest("two-switches"))
+ __subtest(skel, check_two_switches);
+
+ if (test__start_subtest("big-jump-table"))
+ __subtest(skel, check_big_jump_table);
+
+ if (test__start_subtest("static-global"))
+ __subtest(skel, check_static_global);
+
+ if (test__start_subtest("nonstatic-global"))
+ __subtest(skel, check_nonstatic_global);
+
+ if (test__start_subtest("other-sec"))
+ __subtest(skel, check_other_sec);
+
+ if (test__start_subtest("static-global-other-sec"))
+ __subtest(skel, check_static_global_other_sec);
+
+ if (test__start_subtest("nonstatic-global-other-sec"))
+ __subtest(skel, check_nonstatic_global_other_sec);
+
+ if (test__start_subtest("one-jump-two-maps"))
+ __subtest(skel, check_one_jump_two_maps);
+
+ if (test__start_subtest("one-map-two-jumps"))
+ __subtest(skel, check_one_map_two_jumps);
+
+ bpf_gotox__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
new file mode 100644
index 000000000000..269870bec941
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <bpf/bpf.h>
+#include <test_progs.h>
+
+#ifdef __x86_64__
+static int map_create(__u32 map_type, __u32 max_entries)
+{
+ const char *map_name = "insn_array";
+ __u32 key_size = 4;
+ __u32 value_size = sizeof(struct bpf_insn_array_value);
+
+ return bpf_map_create(map_type, map_name, key_size, value_size, max_entries, NULL);
+}
+
+static int prog_load(struct bpf_insn *insns, __u32 insn_cnt, int *fd_array, __u32 fd_array_cnt)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ opts.fd_array = fd_array;
+ opts.fd_array_cnt = fd_array_cnt;
+
+ return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, &opts);
+}
+
+static void __check_success(struct bpf_insn *insns, __u32 insn_cnt, __u32 *map_in, __u32 *map_out)
+{
+ struct bpf_insn_array_value val = {};
+ int prog_fd = -1, map_fd, i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, insn_cnt);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < insn_cnt; i++) {
+ val.orig_off = map_in[i];
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, insn_cnt, &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < insn_cnt; i++) {
+ char buf[64];
+
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ snprintf(buf, sizeof(buf), "val.xlated_off should be equal map_out[%d]", i);
+ ASSERT_EQ(val.xlated_off, map_out[i], buf);
+ }
+
+cleanup:
+ close(prog_fd);
+ close(map_fd);
+}
+
+/*
+ * Load a program, which will not be anyhow mangled by the verifier. Add an
+ * insn_array map pointing to every instruction. Check that it hasn't changed
+ * after the program load.
+ */
+static void check_one_to_one_mapping(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = {0, 1, 2, 3, 4, 5};
+ __u32 map_out[] = {0, 1, 2, 3, 4, 5};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Load a program with two patches (get jiffies, for simplicity). Add an
+ * insn_array map pointing to every instruction. Check how it was changed
+ * after the program load.
+ */
+static void check_simple(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = {0, 1, 2, 3, 4, 5};
+ __u32 map_out[] = {0, 1, 4, 5, 8, 9};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Verifier can delete code in two cases: nops & dead code. From insn
+ * array's point of view, the two cases are the same, so test using
+ * the simplest method: by loading some nops
+ */
+static void check_deletions(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = {0, 1, 2, 3, 4, 5};
+ __u32 map_out[] = {0, -1, 1, -1, 2, 3};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Same test as check_deletions, but also add code which adds instructions
+ */
+static void check_deletions_with_functions(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ };
+ __u32 map_in[] = { 0, 1, 2, 3, 4, 5, /* func */ 6, 7, 8, 9, 10};
+ __u32 map_out[] = {-1, 0, -1, 3, 4, 5, /* func */ -1, 6, -1, 9, 10};
+
+ __check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Try to load a program with a map which points to outside of the program
+ */
+static void check_out_of_bounds_index(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, map_fd;
+ struct bpf_insn_array_value val = {};
+ int key;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ key = 0;
+ val.orig_off = ARRAY_SIZE(insns); /* too big */
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+ close(prog_fd);
+ goto cleanup;
+ }
+
+cleanup:
+ close(map_fd);
+}
+
+/*
+ * Try to load a program with a map which points to the middle of 16-bit insn
+ */
+static void check_mid_insn_index(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_LD_IMM64(BPF_REG_0, 0), /* 2 x 8 */
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, map_fd;
+ struct bpf_insn_array_value val = {};
+ int key;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ key = 0;
+ val.orig_off = 1; /* middle of 16-byte instruction */
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+ close(prog_fd);
+ goto cleanup;
+ }
+
+cleanup:
+ close(map_fd);
+}
+
+static void check_incorrect_index(void)
+{
+ check_out_of_bounds_index();
+ check_mid_insn_index();
+}
+
+static int set_bpf_jit_harden(char *level)
+{
+ char old_level;
+ int err = -1;
+ int fd = -1;
+
+ fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ ASSERT_FAIL("open .../bpf_jit_harden returned %d (errno=%d)", fd, errno);
+ return -1;
+ }
+
+ err = read(fd, &old_level, 1);
+ if (err != 1) {
+ ASSERT_FAIL("read from .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+ err = -1;
+ goto end;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ err = write(fd, level, 1);
+ if (err != 1) {
+ ASSERT_FAIL("write to .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+ err = -1;
+ goto end;
+ }
+
+ err = 0;
+ *level = old_level;
+end:
+ if (fd >= 0)
+ close(fd);
+ return err;
+}
+
+static void check_blindness(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd;
+ struct bpf_insn_array_value val = {};
+ char bpf_jit_harden = '@'; /* non-exizsting value */
+ int i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ val.orig_off = i;
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ bpf_jit_harden = '2';
+ if (set_bpf_jit_harden(&bpf_jit_harden)) {
+ bpf_jit_harden = '@'; /* open, read or write failed => no write was done */
+ goto cleanup;
+ }
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ char fmt[32];
+
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ snprintf(fmt, sizeof(fmt), "val should be equal 3*%d", i);
+ ASSERT_EQ(val.xlated_off, i * 3, fmt);
+ }
+
+cleanup:
+ /* restore the old one */
+ if (bpf_jit_harden != '@')
+ set_bpf_jit_harden(&bpf_jit_harden);
+
+ close(prog_fd);
+ close(map_fd);
+}
+
+/* Once map was initialized, it should be frozen */
+static void check_load_unfrozen_map(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd;
+ struct bpf_insn_array_value val = {};
+ int i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ val.orig_off = i;
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+ goto cleanup;
+
+ /* correctness: now freeze the map, the program should load fine */
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+ }
+
+cleanup:
+ close(prog_fd);
+ close(map_fd);
+}
+
+/* Map can be used only by one BPF program */
+static void check_no_map_reuse(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd, extra_fd = -1;
+ struct bpf_insn_array_value val = {};
+ int i;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ val.orig_off = i;
+ if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+ goto cleanup;
+ }
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(insns); i++) {
+ if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+ }
+
+ extra_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+ if (!ASSERT_EQ(extra_fd, -EBUSY, "program should have been rejected (extra_fd != -EBUSY)"))
+ goto cleanup;
+
+ /* correctness: check that prog is still loadable without fd_array */
+ extra_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+ if (!ASSERT_GE(extra_fd, 0, "bpf(BPF_PROG_LOAD): expected no error"))
+ goto cleanup;
+
+cleanup:
+ close(extra_fd);
+ close(prog_fd);
+ close(map_fd);
+}
+
+static void check_bpf_no_lookup(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd = -1, map_fd;
+
+ map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+ if (!ASSERT_GE(map_fd, 0, "map_create"))
+ return;
+
+ insns[0].imm = map_fd;
+
+ if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+ goto cleanup;
+
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+ if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+ goto cleanup;
+
+ /* correctness: check that prog is still loadable with normal map */
+ close(map_fd);
+ map_fd = map_create(BPF_MAP_TYPE_ARRAY, 1);
+ insns[0].imm = map_fd;
+ prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+ if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+ goto cleanup;
+
+cleanup:
+ close(prog_fd);
+ close(map_fd);
+}
+
+static void check_bpf_side(void)
+{
+ check_bpf_no_lookup();
+}
+
+static void __test_bpf_insn_array(void)
+{
+ /* Test if offsets are adjusted properly */
+
+ if (test__start_subtest("one2one"))
+ check_one_to_one_mapping();
+
+ if (test__start_subtest("simple"))
+ check_simple();
+
+ if (test__start_subtest("deletions"))
+ check_deletions();
+
+ if (test__start_subtest("deletions-with-functions"))
+ check_deletions_with_functions();
+
+ if (test__start_subtest("blindness"))
+ check_blindness();
+
+ /* Check all kinds of operations and related restrictions */
+
+ if (test__start_subtest("incorrect-index"))
+ check_incorrect_index();
+
+ if (test__start_subtest("load-unfrozen-map"))
+ check_load_unfrozen_map();
+
+ if (test__start_subtest("no-map-reuse"))
+ check_no_map_reuse();
+
+ if (test__start_subtest("bpf-side-ops"))
+ check_bpf_side();
+}
+#else
+static void __test_bpf_insn_array(void)
+{
+ test__skip();
+}
+#endif
+
+void test_bpf_insn_array(void)
+{
+ __test_bpf_insn_array();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 6f1bfacd7375..5225d69bf79b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -323,19 +323,87 @@ static void test_task_pidfd(void)
static void test_task_sleepable(void)
{
struct bpf_iter_tasks *skel;
+ int pid, status, err, data_pipe[2], finish_pipe[2], c = 0;
+ char *test_data = NULL;
+ char *test_data_long = NULL;
+ char *data[2];
+
+ if (!ASSERT_OK(pipe(data_pipe), "data_pipe") ||
+ !ASSERT_OK(pipe(finish_pipe), "finish_pipe"))
+ return;
skel = bpf_iter_tasks__open_and_load();
if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
return;
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork"))
+ return;
+
+ if (pid == 0) {
+ /* child */
+ close(data_pipe[0]);
+ close(finish_pipe[1]);
+
+ test_data = malloc(sizeof(char) * 10);
+ strncpy(test_data, "test_data", 10);
+ test_data[9] = '\0';
+
+ test_data_long = malloc(sizeof(char) * 5000);
+ for (int i = 0; i < 5000; ++i) {
+ if (i % 2 == 0)
+ test_data_long[i] = 'b';
+ else
+ test_data_long[i] = 'a';
+ }
+ test_data_long[4999] = '\0';
+
+ data[0] = test_data;
+ data[1] = test_data_long;
+
+ write(data_pipe[1], &data, sizeof(data));
+
+ /* keep child alive until after the test */
+ err = read(finish_pipe[0], &c, 1);
+ if (err != 1)
+ exit(-1);
+
+ close(data_pipe[1]);
+ close(finish_pipe[0]);
+ _exit(0);
+ }
+
+ /* parent */
+ close(data_pipe[1]);
+ close(finish_pipe[0]);
+
+ err = read(data_pipe[0], &data, sizeof(data));
+ ASSERT_EQ(err, sizeof(data), "read_check");
+
+ skel->bss->user_ptr = data[0];
+ skel->bss->user_ptr_long = data[1];
+ skel->bss->pid = pid;
+
do_dummy_read(skel->progs.dump_task_sleepable);
ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
"num_expected_failure_copy_from_user_task");
ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
"num_success_copy_from_user_task");
+ ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0,
+ "num_expected_failure_copy_from_user_task_str");
+ ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0,
+ "num_success_copy_from_user_task_str");
bpf_iter_tasks__destroy(skel);
+
+ write(finish_pipe[1], &c, 1);
+ err = waitpid(pid, &status, 0);
+ ASSERT_EQ(err, pid, "waitpid");
+ ASSERT_EQ(status, 0, "zero_child_exit");
+
+ close(data_pipe[0]);
+ close(finish_pipe[1]);
}
static void test_task_stack(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
index fe2c502e5089..ecc3d47919ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -78,7 +78,7 @@ static int test_setup_uffd(void *fault_addr)
}
uffd_register.range.start = (unsigned long)fault_addr;
- uffd_register.range.len = 4096;
+ uffd_register.range.len = getpagesize();
uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
close(uffd);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index a4a1f93878d4..dd6512fa652b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode)
.repeat = 1,
);
+ if (SYS_NOFAIL("iptables-legacy --version")) {
+ fprintf(stdout, "Missing required iptables-legacy tool\n");
+ test__skip();
+ return;
+ }
+
skel = test_bpf_nf__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
return;
@@ -72,11 +78,14 @@ static void test_bpf_nf_ct(int mode)
if (!ASSERT_OK(system(cmd), cmd))
goto end;
- srv_port = (mode == TEST_XDP) ? 5005 : 5006;
- srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS);
+ srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS);
if (!ASSERT_GE(srv_fd, 0, "start_server"))
goto end;
+ srv_port = get_socket_local_port(srv_fd);
+ if (!ASSERT_GE(srv_port, 0, "get_sock_local_port"))
+ goto end;
+
client_fd = connect_to_server(srv_fd);
if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
goto end;
@@ -91,7 +100,7 @@ static void test_bpf_nf_ct(int mode)
skel->bss->saddr = peer_addr.sin_addr.s_addr;
skel->bss->sport = peer_addr.sin_port;
skel->bss->daddr = peer_addr.sin_addr.s_addr;
- skel->bss->dport = htons(srv_port);
+ skel->bss->dport = srv_port;
if (mode == TEST_XDP)
prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
new file mode 100644
index 000000000000..730357cd0c9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "bpf_qdisc_fifo.skel.h"
+#include "bpf_qdisc_fq.skel.h"
+#include "bpf_qdisc_fail__incompl_ops.skel.h"
+
+#define LO_IFINDEX 1
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+
+static void do_test(char *qdisc)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_ROOT,
+ .handle = 0x8000000,
+ .qdisc = qdisc);
+ int srv_fd = -1, cli_fd = -1;
+ int err;
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(err, "attach qdisc"))
+ return;
+
+ srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_OK_FD(srv_fd, "start server"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, 0);
+ if (!ASSERT_OK_FD(cli_fd, "connect to client"))
+ goto done;
+
+ err = send_recv_data(srv_fd, cli_fd, total_bytes);
+ ASSERT_OK(err, "send_recv_data");
+
+done:
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+
+ bpf_tc_hook_destroy(&hook);
+}
+
+static void test_fifo(void)
+{
+ struct bpf_qdisc_fifo *fifo_skel;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ do_test("bpf_fifo");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_fq(void)
+{
+ struct bpf_qdisc_fq *fq_skel;
+
+ fq_skel = bpf_qdisc_fq__open_and_load();
+ if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach"))
+ goto out;
+
+ do_test("bpf_fq");
+out:
+ bpf_qdisc_fq__destroy(fq_skel);
+}
+
+static void test_qdisc_attach_to_mq(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ hook.ifindex = if_nametoindex("veth0");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ err = bpf_tc_hook_create(&hook);
+ ASSERT_OK(err, "attach qdisc");
+
+ bpf_tc_hook_destroy(&hook);
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_qdisc_attach_to_non_root(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "tc qdisc add dev lo root handle 1: htb");
+ SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit");
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_ERR(err, "attach qdisc"))
+ bpf_tc_hook_destroy(&hook);
+
+out_del_htb:
+ SYS(out, "tc qdisc delete dev lo root htb");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_incompl_ops(void)
+{
+ struct bpf_qdisc_fail__incompl_ops *skel;
+ struct bpf_link *link;
+
+ skel = bpf_qdisc_fail__incompl_ops__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.test);
+ if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops"))
+ bpf_link__destroy(link);
+
+ bpf_qdisc_fail__incompl_ops__destroy(skel);
+}
+
+static int get_default_qdisc(char *qdisc_name)
+{
+ FILE *f;
+ int num;
+
+ f = fopen("/proc/sys/net/core/default_qdisc", "r");
+ if (!f)
+ return -errno;
+
+ num = fscanf(f, "%s", qdisc_name);
+ fclose(f);
+
+ return num == 1 ? 0 : -EFAULT;
+}
+
+static void test_default_qdisc_attach_to_mq(void)
+{
+ char default_qdisc[IFNAMSIZ] = {};
+ struct bpf_qdisc_fifo *fifo_skel;
+ struct netns_obj *netns = NULL;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ err = get_default_qdisc(default_qdisc);
+ if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc"))
+ goto out;
+
+ err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo");
+ if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc"))
+ goto out;
+
+ netns = netns_new("bpf_qdisc_ns", true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called");
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ netns_free(netns);
+ if (default_qdisc[0])
+ write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc);
+
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+void test_ns_bpf_qdisc(void)
+{
+ if (test__start_subtest("fifo"))
+ test_fifo();
+ if (test__start_subtest("fq"))
+ test_fq();
+ if (test__start_subtest("attach to mq"))
+ test_qdisc_attach_to_mq();
+ if (test__start_subtest("attach to non root"))
+ test_qdisc_attach_to_non_root();
+ if (test__start_subtest("incompl_ops"))
+ test_incompl_ops();
+}
+
+void serial_test_bpf_qdisc_default(void)
+{
+ test_default_qdisc_attach_to_mq();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index e63d74ce046f..054ecb6b1e9f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3866,11 +3866,11 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "vlen != 0",
},
{
- .descr = "decl_tag test #8, invalid kflag",
+ .descr = "decl_tag test #8, tag with kflag",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
- BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1),
+ BTF_DECL_ATTR_ENC(NAME_TBD, 2, -1),
BTF_END_RAW,
},
BTF_STR_SEC("\0local\0tag1"),
@@ -3881,8 +3881,6 @@ static struct btf_raw_test raw_tests[] = {
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 1,
- .btf_load_err = true,
- .err_str = "Invalid btf_info kind_flag",
},
{
.descr = "decl_tag test #9, var, invalid component_idx",
@@ -4207,6 +4205,23 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Type tags don't precede modifiers",
},
{
+ .descr = "type_tag test #7, tag with kflag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ATTR_ENC(NAME_TBD, 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
.descr = "enum64 test #1, unsigned, size 8",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
@@ -7481,6 +7496,71 @@ static struct btf_dedup_test dedup_tests[] = {
},
},
{
+ .descr = "dedup: recursive typedef",
+ /*
+ * This test simulates a recursive typedef, which in GO is defined as such:
+ *
+ * type Foo func() Foo
+ *
+ * In BTF terms, this is represented as a TYPEDEF referencing
+ * a FUNC_PROTO that returns the same TYPEDEF.
+ */
+ .input = {
+ .raw_types = {
+ /*
+ * [1] typedef Foo -> func() Foo
+ * [2] func_proto() -> Foo
+ * [3] typedef Foo -> func() Foo
+ * [4] func_proto() -> Foo
+ */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */
+ BTF_FUNC_PROTO_ENC(1, 0), /* [2] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 4), /* [3] */
+ BTF_FUNC_PROTO_ENC(3, 0), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0Foo"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */
+ BTF_FUNC_PROTO_ENC(1, 0), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0Foo"),
+ },
+},
+{
+ .descr = "dedup: typedef",
+ /*
+ * // CU 1:
+ * typedef int foo;
+ *
+ * // CU 2:
+ * typedef int foo;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ /* CU 2 */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [3] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 3), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo"),
+ },
+},
+{
.descr = "dedup: typedef tags",
.input = {
.raw_types = {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
index d9024c7a892a..5bc15bb6b7ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -440,6 +440,105 @@ cleanup:
btf__free(btf1);
}
+/* Ensure module split BTF dedup worked correctly; when dedup fails badly
+ * core kernel types are in split BTF also, so ensure that references to
+ * such types point at base - not split - BTF.
+ *
+ * bpf_testmod_test_write() has multiple core kernel type parameters;
+ *
+ * ssize_t
+ * bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ * struct bin_attribute *bin_attr,
+ * char *buf, loff_t off, size_t len);
+ *
+ * Ensure each of the FUNC_PROTO params is a core kernel type.
+ *
+ * Do the same for
+ *
+ * __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk);
+ *
+ * ...and
+ *
+ * __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb);
+ *
+ */
+const char *mod_funcs[] = {
+ "bpf_testmod_test_write",
+ "bpf_kfunc_call_test3",
+ "bpf_kfunc_call_test_pass_ctx"
+};
+
+static void test_split_module(void)
+{
+ struct btf *vmlinux_btf, *btf1 = NULL;
+ int i, nr_base_types;
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux_btf"))
+ return;
+ nr_base_types = btf__type_cnt(vmlinux_btf);
+ if (!ASSERT_GT(nr_base_types, 0, "nr_base_types"))
+ goto cleanup;
+
+ btf1 = btf__parse_split("/sys/kernel/btf/bpf_testmod", vmlinux_btf);
+ if (!ASSERT_OK_PTR(btf1, "split_btf"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) {
+ const struct btf_param *p;
+ const struct btf_type *t;
+ __u16 vlen;
+ __u32 id;
+ int j;
+
+ id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC);
+ if (!ASSERT_GE(id, nr_base_types, "func_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "func_id_type"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, t->type);
+ if (!ASSERT_OK_PTR(t, "func_proto_id_type"))
+ goto cleanup;
+ if (!ASSERT_EQ(btf_is_func_proto(t), true, "is_func_proto"))
+ goto cleanup;
+ vlen = btf_vlen(t);
+
+ for (j = 0, p = btf_params(t); j < vlen; j++, p++) {
+ /* bpf_testmod uses resilient split BTF, so any
+ * reference types will be added to split BTF and their
+ * associated targets will be base BTF types; for example
+ * for a "struct sock *" the PTR will be in split BTF
+ * while the "struct sock" will be in base.
+ *
+ * In some cases like loff_t we have to resolve
+ * multiple typedefs hence the while() loop below.
+ *
+ * Note that resilient split BTF generation depends
+ * on pahole version, so we do not assert that
+ * reference types are in split BTF, as if pahole
+ * does not support resilient split BTF they will
+ * also be base BTF types.
+ */
+ id = p->type;
+ do {
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "param_ref_type"))
+ goto cleanup;
+ if (!btf_is_mod(t) && !btf_is_ptr(t) && !btf_is_typedef(t))
+ break;
+ id = t->type;
+ } while (true);
+
+ if (!ASSERT_LT(id, nr_base_types, "verify_base_type"))
+ goto cleanup;
+ }
+ }
+cleanup:
+ btf__free(btf1);
+ btf__free(vmlinux_btf);
+}
+
void test_btf_dedup_split()
{
if (test__start_subtest("split_simple"))
@@ -450,4 +549,6 @@ void test_btf_dedup_split()
test_split_fwd_resolve();
if (test__start_subtest("split_dup_struct_in_cu"))
test_split_dup_struct_in_cu();
+ if (test__start_subtest("split_module"))
+ test_split_module();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index b293b8501fd6..10cba526d3e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -63,7 +63,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
/* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
* so it's impossible to determine correct pointer size; but if they
- * do, it should be 8 regardless of host architecture, becaues BPF
+ * do, it should be 8 regardless of host architecture, because BPF
* target is always 64-bit
*/
if (!t->known_ptr_sz) {
@@ -126,26 +126,69 @@ done:
return err;
}
-static char *dump_buf;
-static size_t dump_buf_sz;
-static FILE *dump_buf_file;
+struct test_ctx {
+ struct btf *btf;
+ struct btf_dump *d;
+ char *dump_buf;
+ size_t dump_buf_sz;
+ FILE *dump_buf_file;
+};
-static void test_btf_dump_incremental(void)
+static void test_ctx__free(struct test_ctx *t)
{
- struct btf *btf = NULL;
- struct btf_dump *d = NULL;
- int id, err, i;
+ fclose(t->dump_buf_file);
+ free(t->dump_buf);
+ btf_dump__free(t->d);
+ btf__free(t->btf);
+}
- dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
- if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
- return;
- btf = btf__new_empty();
- if (!ASSERT_OK_PTR(btf, "new_empty"))
+static int test_ctx__init(struct test_ctx *t)
+{
+ t->dump_buf_file = open_memstream(&t->dump_buf, &t->dump_buf_sz);
+ if (!ASSERT_OK_PTR(t->dump_buf_file, "dump_memstream"))
+ return -1;
+ t->btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(t->btf, "new_empty"))
goto err_out;
- d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL);
- if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+ t->d = btf_dump__new(t->btf, btf_dump_printf, t->dump_buf_file, NULL);
+ if (!ASSERT_OK(libbpf_get_error(t->d), "btf_dump__new"))
goto err_out;
+ return 0;
+
+err_out:
+ test_ctx__free(t);
+ return -1;
+}
+
+static void test_ctx__dump_and_compare(struct test_ctx *t,
+ const char *expected_output,
+ const char *message)
+{
+ int i, err;
+
+ for (i = 1; i < btf__type_cnt(t->btf); i++) {
+ err = btf_dump__dump_type(t->d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(t->dump_buf_file);
+ t->dump_buf[t->dump_buf_sz] = 0; /* some libc implementations don't do this */
+
+ ASSERT_STREQ(t->dump_buf, expected_output, message);
+}
+
+static void test_btf_dump_incremental(void)
+{
+ struct test_ctx t = {};
+ struct btf *btf;
+ int id, err;
+
+ if (test_ctx__init(&t))
+ return;
+
+ btf = t.btf;
+
/* First, generate BTF corresponding to the following C code:
*
* enum x;
@@ -182,15 +225,7 @@ static void test_btf_dump_incremental(void)
err = btf__add_field(btf, "x", 4, 0, 0);
ASSERT_OK(err, "field_ok");
- for (i = 1; i < btf__type_cnt(btf); i++) {
- err = btf_dump__dump_type(d, i);
- ASSERT_OK(err, "dump_type_ok");
- }
-
- fflush(dump_buf_file);
- dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
-
- ASSERT_STREQ(dump_buf,
+ test_ctx__dump_and_compare(&t,
"enum x;\n"
"\n"
"enum x {\n"
@@ -221,7 +256,7 @@ static void test_btf_dump_incremental(void)
* enum values don't conflict;
*
*/
- fseek(dump_buf_file, 0, SEEK_SET);
+ fseek(t.dump_buf_file, 0, SEEK_SET);
id = btf__add_struct(btf, "s", 4);
ASSERT_EQ(id, 7, "struct_id");
@@ -232,14 +267,7 @@ static void test_btf_dump_incremental(void)
err = btf__add_field(btf, "s", 6, 64, 0);
ASSERT_OK(err, "field_ok");
- for (i = 1; i < btf__type_cnt(btf); i++) {
- err = btf_dump__dump_type(d, i);
- ASSERT_OK(err, "dump_type_ok");
- }
-
- fflush(dump_buf_file);
- dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
- ASSERT_STREQ(dump_buf,
+ test_ctx__dump_and_compare(&t,
"struct s___2 {\n"
" enum x x;\n"
" enum {\n"
@@ -248,11 +276,53 @@ static void test_btf_dump_incremental(void)
" struct s s;\n"
"};\n\n" , "c_dump1");
-err_out:
- fclose(dump_buf_file);
- free(dump_buf);
- btf_dump__free(d);
- btf__free(btf);
+ test_ctx__free(&t);
+}
+
+static void test_btf_dump_type_tags(void)
+{
+ struct test_ctx t = {};
+ struct btf *btf;
+ int id, err;
+
+ if (test_ctx__init(&t))
+ return;
+
+ btf = t.btf;
+
+ /* Generate BTF corresponding to the following C code:
+ *
+ * struct s {
+ * void __attribute__((btf_type_tag(\"void_tag\"))) *p1;
+ * void __attribute__((void_attr)) *p2;
+ * };
+ *
+ */
+
+ id = btf__add_type_tag(btf, "void_tag", 0);
+ ASSERT_EQ(id, 1, "type_tag_id");
+ id = btf__add_ptr(btf, id);
+ ASSERT_EQ(id, 2, "void_ptr_id1");
+
+ id = btf__add_type_attr(btf, "void_attr", 0);
+ ASSERT_EQ(id, 3, "type_attr_id");
+ id = btf__add_ptr(btf, id);
+ ASSERT_EQ(id, 4, "void_ptr_id2");
+
+ id = btf__add_struct(btf, "s", 8);
+ ASSERT_EQ(id, 5, "struct_id");
+ err = btf__add_field(btf, "p1", 2, 0, 0);
+ ASSERT_OK(err, "field_ok1");
+ err = btf__add_field(btf, "p2", 4, 0, 0);
+ ASSERT_OK(err, "field_ok2");
+
+ test_ctx__dump_and_compare(&t,
+"struct s {\n"
+" void __attribute__((btf_type_tag(\"void_tag\"))) *p1;\n"
+" void __attribute__((void_attr)) *p2;\n"
+"};\n\n", "dump_and_compare");
+
+ test_ctx__free(&t);
}
#define STRSIZE 4096
@@ -809,6 +879,122 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
"static int bpf_cgrp_storage_busy = (int)2", 2);
}
+struct btf_dump_string_ctx {
+ struct btf *btf;
+ struct btf_dump *d;
+ char *str;
+ struct btf_dump_type_data_opts *opts;
+ int array_id;
+};
+
+static int btf_dump_one_string(struct btf_dump_string_ctx *ctx,
+ char *ptr, size_t ptr_sz,
+ const char *expected_val)
+{
+ size_t type_sz;
+ int ret;
+
+ ctx->str[0] = '\0';
+ type_sz = btf__resolve_size(ctx->btf, ctx->array_id);
+ ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+static void btf_dump_strings(struct btf_dump_string_ctx *ctx)
+{
+ struct btf_dump_type_data_opts *opts = ctx->opts;
+
+ opts->emit_strings = true;
+
+ opts->compact = true;
+ opts->emit_zeroes = false;
+
+ opts->skip_names = false;
+ btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\"");
+
+ opts->skip_names = true;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* This should have no effect. */
+ opts->emit_zeroes = false;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* This should have no effect. */
+ opts->compact = false;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* Non-printable characters come out as hex. */
+ btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\"");
+ btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\"");
+
+ /*
+ * Strings that are too long for the specified type ("char[4]")
+ * should fall back to the current behavior.
+ */
+ opts->compact = true;
+ btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]");
+
+ /*
+ * Strings that are too short for the specified type ("char[4]")
+ * should work normally.
+ */
+ btf_dump_one_string(ctx, "ab", 3, "\"ab\"");
+
+ /* Non-NUL-terminated arrays don't get printed as strings. */
+ char food[4] = { 'f', 'o', 'o', 'd' };
+ char bye[3] = { 'b', 'y', 'e' };
+
+ btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]");
+ btf_dump_one_string(ctx, bye, 3, "['b','y','e',]");
+
+ /* The embedded NUL should terminate the string. */
+ char embed[4] = { 'f', 'o', '\0', 'd' };
+
+ btf_dump_one_string(ctx, embed, 4, "\"fo\"");
+}
+
+static void test_btf_dump_string_data(void)
+{
+ struct test_ctx t = {};
+ char str[STRSIZE];
+ struct btf_dump *d;
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ struct btf_dump_string_ctx ctx;
+ int char_id, int_id, array_id;
+
+ if (test_ctx__init(&t))
+ return;
+
+ d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Generate BTF for a four-element char array. */
+ char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR);
+ ASSERT_EQ(char_id, 1, "char_id");
+ int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(int_id, 2, "int_id");
+ array_id = btf__add_array(t.btf, int_id, char_id, 4);
+ ASSERT_EQ(array_id, 3, "array_id");
+
+ ctx.btf = t.btf;
+ ctx.d = d;
+ ctx.str = str;
+ ctx.opts = &opts;
+ ctx.array_id = array_id;
+
+ btf_dump_strings(&ctx);
+
+ btf_dump__free(d);
+ test_ctx__free(&t);
+}
+
static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
const char *name, const char *expected_val,
void *data, size_t data_sz)
@@ -874,6 +1060,9 @@ void test_btf_dump() {
if (test__start_subtest("btf_dump: incremental"))
test_btf_dump_incremental();
+ if (test__start_subtest("btf_dump: type_tags"))
+ test_btf_dump_type_tags();
+
btf = libbpf_find_kernel_btf();
if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
return;
@@ -897,6 +1086,8 @@ void test_btf_dump() {
test_btf_dump_struct_data(btf, d, str);
if (test__start_subtest("btf_dump: var_data"))
test_btf_dump_var_data(btf, d, str);
+ if (test__start_subtest("btf_dump: string_data"))
+ test_btf_dump_string_data();
btf_dump__free(d);
btf__free(btf);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index eef1158676ed..2d47cad50a51 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -12,10 +12,45 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
vfprintf(ctx, fmt, args);
}
-void test_btf_split() {
+/* Write raw BTF to file, return number of bytes written or negative errno */
+static ssize_t btf_raw_write(struct btf *btf, char *file)
+{
+ ssize_t written = 0;
+ const void *data;
+ __u32 size = 0;
+ int fd, ret;
+
+ fd = mkstemp(file);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return -errno;
+
+ data = btf__raw_data(btf, &size);
+ if (!ASSERT_OK_PTR(data, "btf__raw_data")) {
+ close(fd);
+ return -EINVAL;
+ }
+ while (written < size) {
+ ret = write(fd, data + written, size - written);
+ if (!ASSERT_GE(ret, 0, "write succeeded")) {
+ close(fd);
+ return -errno;
+ }
+ written += ret;
+ }
+ close(fd);
+ return written;
+}
+
+static void __test_btf_split(bool multi)
+{
+ char multisplit_btf_file[] = "/tmp/test_btf_multisplit.XXXXXX";
+ char split_btf_file[] = "/tmp/test_btf_split.XXXXXX";
+ char base_btf_file[] = "/tmp/test_btf_base.XXXXXX";
+ ssize_t multisplit_btf_sz = 0, split_btf_sz = 0, base_btf_sz = 0;
struct btf_dump *d = NULL;
- const struct btf_type *t;
- struct btf *btf1, *btf2;
+ const struct btf_type *t, *ot;
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL;
+ struct btf *btf4 = NULL, *btf5 = NULL, *btf6 = NULL;
int str_off, i, err;
btf1 = btf__new_empty();
@@ -63,14 +98,46 @@ void test_btf_split() {
ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen");
ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name");
+ if (multi) {
+ btf3 = btf__new_empty_split(btf2);
+ if (!ASSERT_OK_PTR(btf3, "multi_split_btf"))
+ goto cleanup;
+ } else {
+ btf3 = btf2;
+ }
+
+ btf__add_union(btf3, "u1", 16); /* [5] union u1 { */
+ btf__add_field(btf3, "f1", 4, 0, 0); /* struct s2 f1; */
+ btf__add_field(btf3, "uf2", 1, 0, 0); /* int f2; */
+ /* } */
+
+ if (multi) {
+ t = btf__type_by_id(btf2, 5);
+ ASSERT_NULL(t, "multisplit_type_in_first_split");
+ }
+
+ t = btf__type_by_id(btf3, 5);
+ if (!ASSERT_OK_PTR(t, "split_union_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_union(t), true, "split_union_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "split_union_vlen");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "u1", "split_union_name");
+ ASSERT_EQ(btf__type_cnt(btf3), 6, "split_type_cnt");
+
+ t = btf__type_by_id(btf3, 1);
+ if (!ASSERT_OK_PTR(t, "split_base_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_int(t), true, "split_base_int");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "int", "split_base_type_name");
+
/* BTF-to-C dump of split BTF */
dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
return;
- d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL);
+ d = btf_dump__new(btf3, btf_dump_printf, dump_buf_file, NULL);
if (!ASSERT_OK_PTR(d, "btf_dump__new"))
goto cleanup;
- for (i = 1; i < btf__type_cnt(btf2); i++) {
+ for (i = 1; i < btf__type_cnt(btf3); i++) {
err = btf_dump__dump_type(d, i);
ASSERT_OK(err, "dump_type_ok");
}
@@ -79,14 +146,56 @@ void test_btf_split() {
ASSERT_STREQ(dump_buf,
"struct s1 {\n"
" int f1;\n"
-"};\n"
-"\n"
+"};\n\n"
"struct s2 {\n"
" struct s1 f1;\n"
" int f2;\n"
" int *f3;\n"
+"};\n\n"
+"union u1 {\n"
+" struct s2 f1;\n"
+" int uf2;\n"
"};\n\n", "c_dump");
+ /* write base, split BTFs to files and ensure parsing succeeds */
+ base_btf_sz = btf_raw_write(btf1, base_btf_file);
+ if (base_btf_sz < 0)
+ goto cleanup;
+ split_btf_sz = btf_raw_write(btf2, split_btf_file);
+ if (split_btf_sz < 0)
+ goto cleanup;
+ btf4 = btf__parse(base_btf_file, NULL);
+ if (!ASSERT_OK_PTR(btf4, "parse_base"))
+ goto cleanup;
+ btf5 = btf__parse_split(split_btf_file, btf4);
+ if (!ASSERT_OK_PTR(btf5, "parse_split"))
+ goto cleanup;
+ if (multi) {
+ multisplit_btf_sz = btf_raw_write(btf3, multisplit_btf_file);
+ if (multisplit_btf_sz < 0)
+ goto cleanup;
+ btf6 = btf__parse_split(multisplit_btf_file, btf5);
+ if (!ASSERT_OK_PTR(btf6, "parse_multisplit"))
+ goto cleanup;
+ } else {
+ btf6 = btf5;
+ }
+
+ if (!ASSERT_EQ(btf__type_cnt(btf3), btf__type_cnt(btf6), "cmp_type_cnt"))
+ goto cleanup;
+
+ /* compare parsed to original BTF */
+ for (i = 1; i < btf__type_cnt(btf6); i++) {
+ t = btf__type_by_id(btf6, i);
+ if (!ASSERT_OK_PTR(t, "type_in_parsed_btf"))
+ goto cleanup;
+ ot = btf__type_by_id(btf3, i);
+ if (!ASSERT_OK_PTR(ot, "type_in_orig_btf"))
+ goto cleanup;
+ if (!ASSERT_EQ(memcmp(t, ot, sizeof(*ot)), 0, "cmp_parsed_orig_btf"))
+ goto cleanup;
+ }
+
cleanup:
if (dump_buf_file)
fclose(dump_buf_file);
@@ -94,4 +203,24 @@ cleanup:
btf_dump__free(d);
btf__free(btf1);
btf__free(btf2);
+ if (btf2 != btf3)
+ btf__free(btf3);
+ btf__free(btf4);
+ btf__free(btf5);
+ if (btf5 != btf6)
+ btf__free(btf6);
+ if (base_btf_sz > 0)
+ unlink(base_btf_file);
+ if (split_btf_sz > 0)
+ unlink(split_btf_file);
+ if (multisplit_btf_sz > 0)
+ unlink(multisplit_btf_file);
+}
+
+void test_btf_split(void)
+{
+ if (test__start_subtest("single_split"))
+ __test_btf_split(false);
+ if (test__start_subtest("multi_split"))
+ __test_btf_split(true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
new file mode 100644
index 000000000000..3923e64c4c1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2025 Isovalent */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static void test_btf_mmap_sysfs(const char *path, struct btf *base)
+{
+ struct stat st;
+ __u64 btf_size, end;
+ void *raw_data = NULL;
+ int fd = -1;
+ long page_size;
+ struct btf *btf = NULL;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (!ASSERT_GE(page_size, 0, "get_page_size"))
+ goto cleanup;
+
+ if (!ASSERT_OK(stat(path, &st), "stat_btf"))
+ goto cleanup;
+
+ btf_size = st.st_size;
+ end = (btf_size + page_size - 1) / page_size * page_size;
+
+ fd = open(path, O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open_btf"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
+ "mprotect_writable"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
+ "mprotect_executable"))
+ goto cleanup;
+
+ /* Check padding is zeroed */
+ for (int i = btf_size; i < end; i++) {
+ if (((__u8 *)raw_data)[i] != 0) {
+ PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i);
+ goto cleanup;
+ }
+ }
+
+ btf = btf__new_split(raw_data, btf_size, base);
+ if (!ASSERT_OK_PTR(btf, "parse_btf"))
+ goto cleanup;
+
+cleanup:
+ btf__free(btf);
+ if (raw_data && raw_data != MAP_FAILED)
+ munmap(raw_data, btf_size);
+ if (fd >= 0)
+ close(fd);
+}
+
+void test_btf_sysfs(void)
+{
+ test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
new file mode 100644
index 000000000000..bb60704a3ef9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_mprog.skel.h"
+
+static void assert_mprog_count(int cg, int atype, int expected)
+{
+ __u32 count = 0, attach_flags = 0;
+ int err;
+
+ err = bpf_prog_query(cg, atype, 0, &attach_flags,
+ NULL, &count);
+ ASSERT_EQ(count, expected, "count");
+ ASSERT_EQ(err, 0, "prog_query");
+}
+
+static void test_prog_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct cgroup_mprog *skel;
+ __u32 prog_ids[10];
+ int cg, err;
+
+ cg = test__join_cgroup("/prog_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd2, fd1] */
+ err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd2, fd3, fd1] */
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ /* retrieve optq.prog_cnt */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ /* optq.prog_cnt will be used in below query */
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+ optd.expected_revision = 5;
+ err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ LIBBPF_OPTS_RESET(optd);
+ err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 2);
+
+ /* Check revision after two detach operations */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ ASSERT_OK(err, "prog_query");
+ ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_link_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_cgroup_opts, opta);
+ LIBBPF_OPTS(bpf_cgroup_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ struct bpf_link *link1, *link2, *link3, *link4;
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct cgroup_mprog *skel;
+ __u32 prog_ids[10];
+ int cg, err;
+
+ cg = test__join_cgroup("/link_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+ if (!ASSERT_OK_PTR(link1, "link_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_id = id_from_link_fd(bpf_link__fd(link1)),
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd2, fd1] */
+ link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+ if (!ASSERT_OK_PTR(link2, "link_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(link2),
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd2, fd3, fd1] */
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_OK_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+ if (!ASSERT_OK_PTR(link4, "link_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ /* retrieve optq.prog_cnt */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ /* optq.prog_cnt will be used in below query */
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+ bpf_link__destroy(link4);
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ bpf_link__destroy(link3);
+ assert_mprog_count(cg, atype, 2);
+
+ /* Check revision after two detach operations */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ ASSERT_OK(err, "prog_query");
+ ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+ bpf_link__destroy(link2);
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ bpf_link__destroy(link1);
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_preorder_prog_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ __u32 fd1, fd2, fd3, fd4;
+ struct cgroup_mprog *skel;
+ int cg, err;
+
+ cg = test__join_cgroup("/preorder_prog_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd1, fd2] */
+ err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, -EINVAL, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd1, fd2, fd3] */
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_preorder_link_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_cgroup_opts, opta);
+ struct bpf_link *link1, *link2, *link3, *link4;
+ struct cgroup_mprog *skel;
+ __u32 fd2;
+ int cg;
+
+ cg = test__join_cgroup("/preorder_link_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+ if (!ASSERT_OK_PTR(link1, "link_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_PREORDER,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd1, fd2] */
+ link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+ if (!ASSERT_OK_PTR(link2, "link_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_ERR_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(link2),
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd1, fd2, fd3] */
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_OK_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+ if (!ASSERT_OK_PTR(link4, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ bpf_link__destroy(link4);
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ bpf_link__destroy(link3);
+ assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+ bpf_link__destroy(link2);
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ bpf_link__destroy(link1);
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_invalid_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ __u32 fd1, fd2, id2;
+ struct cgroup_mprog *skel;
+ int cg, err;
+
+ cg = test__join_cgroup("/invalid_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+ id2 = id_from_prog_fd(fd2);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_LINK,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER,
+ .replace_prog_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 1);
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+void test_cgroup_mprog_opts(void)
+{
+ if (test__start_subtest("prog_attach_detach"))
+ test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("link_attach_detach"))
+ test_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("preorder_prog_attach_detach"))
+ test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("preorder_link_attach_detach"))
+ test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("invalid_attach_detach"))
+ test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
new file mode 100644
index 000000000000..a36d2e968bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+ enum bpf_attach_type prog_p_atype, prog_p2_atype;
+ int prog_p_fd, prog_p2_fd;
+ struct cgroup_preorder *skel = NULL;
+ struct bpf_program *prog;
+ __u8 *result, buf;
+ socklen_t optlen = 1;
+ int err = 0;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return 0;
+
+ LIBBPF_OPTS_RESET(opts);
+ opts.flags = BPF_F_ALLOW_MULTI;
+ prog = skel->progs.parent;
+ prog_p_fd = bpf_program__fd(prog);
+ prog_p_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE;
+ if (has_relative_fd)
+ opts.relative_fd = prog_p_fd;
+ prog = skel->progs.parent_2;
+ prog_p2_fd = bpf_program__fd(prog);
+ prog_p2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+ goto detach_parent;
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_parent_2;
+
+ result = skel->bss->result;
+ ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values");
+
+detach_parent_2:
+ ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+ "bpf_prog_detach2-parent_2");
+detach_parent:
+ ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+ "bpf_prog_detach2-parent");
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
+void test_cgroup_mprog_ordering(void)
+{
+ int cg_parent = -1, sock_fd = -1;
+
+ cg_parent = test__join_cgroup("/parent");
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+ goto out;
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1");
+ ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2");
+
+out:
+ close(sock_fd);
+ close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
new file mode 100644
index 000000000000..d4d583872fa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int cg_child, int sock_fd, bool all_preorder)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+ enum bpf_attach_type prog_c_atype, prog_c2_atype, prog_p_atype, prog_p2_atype;
+ int prog_c_fd, prog_c2_fd, prog_p_fd, prog_p2_fd;
+ struct cgroup_preorder *skel = NULL;
+ struct bpf_program *prog;
+ __u8 *result, buf;
+ socklen_t optlen;
+ int err = 0;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return 0;
+
+ buf = 0x00;
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (!ASSERT_OK(err, "setsockopt"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI;
+ if (all_preorder)
+ opts.flags |= BPF_F_PREORDER;
+ prog = skel->progs.child;
+ prog_c_fd = bpf_program__fd(prog);
+ prog_c_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_c_fd, cg_child, prog_c_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-child"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+ prog = skel->progs.child_2;
+ prog_c2_fd = bpf_program__fd(prog);
+ prog_c2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_c2_fd, cg_child, prog_c2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-child_2"))
+ goto detach_child;
+
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_child_2;
+
+ result = skel->bss->result;
+ if (all_preorder)
+ ASSERT_TRUE(result[0] == 1 && result[1] == 2, "child only");
+ else
+ ASSERT_TRUE(result[0] == 2 && result[1] == 1, "child only");
+
+ skel->bss->idx = 0;
+ memset(result, 0, 4);
+
+ opts.flags = BPF_F_ALLOW_MULTI;
+ if (all_preorder)
+ opts.flags |= BPF_F_PREORDER;
+ prog = skel->progs.parent;
+ prog_p_fd = bpf_program__fd(prog);
+ prog_p_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+ goto detach_child_2;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+ prog = skel->progs.parent_2;
+ prog_p2_fd = bpf_program__fd(prog);
+ prog_p2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+ goto detach_parent;
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_parent_2;
+
+ if (all_preorder)
+ ASSERT_TRUE(result[0] == 3 && result[1] == 4 && result[2] == 1 && result[3] == 2,
+ "parent and child");
+ else
+ ASSERT_TRUE(result[0] == 4 && result[1] == 2 && result[2] == 1 && result[3] == 3,
+ "parent and child");
+
+detach_parent_2:
+ ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+ "bpf_prog_detach2-parent_2");
+detach_parent:
+ ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+ "bpf_prog_detach2-parent");
+detach_child_2:
+ ASSERT_OK(bpf_prog_detach2(prog_c2_fd, cg_child, prog_c2_atype),
+ "bpf_prog_detach2-child_2");
+detach_child:
+ ASSERT_OK(bpf_prog_detach2(prog_c_fd, cg_child, prog_c_atype),
+ "bpf_prog_detach2-child");
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
+void test_cgroup_preorder(void)
+{
+ int cg_parent = -1, cg_child = -1, sock_fd = -1;
+
+ cg_parent = test__join_cgroup("/parent");
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+ goto out;
+
+ cg_child = test__join_cgroup("/parent/child");
+ if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
+ goto out;
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1");
+ ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2");
+
+out:
+ close(sock_fd);
+ close(cg_child);
+ close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index 64abba72ac10..37c1cc52ed98 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -10,12 +10,18 @@
static int run_test(int cgroup_fd, int server_fd, bool classid)
{
struct connect4_dropper *skel;
- int fd, err = 0;
+ int fd, err = 0, port;
skel = connect4_dropper__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return -1;
+ port = get_socket_local_port(server_fd);
+ if (!ASSERT_GE(port, 0, "get_socket_local_port"))
+ return -1;
+
+ skel->bss->port = ntohs(port);
+
skel->links.connect_v4_dropper =
bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
cgroup_fd);
@@ -48,10 +54,9 @@ void test_cgroup_v1v2(void)
{
struct network_helper_opts opts = {};
int server_fd, client_fd, cgroup_fd;
- static const int port = 60120;
/* Step 1: Check base connectivity works without any BPF. */
- server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd"))
return;
client_fd = connect_to_fd_opts(server_fd, &opts);
@@ -66,7 +71,7 @@ void test_cgroup_v1v2(void)
cgroup_fd = test__join_cgroup("/connect_dropper");
if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd")) {
close(cgroup_fd);
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
new file mode 100644
index 000000000000..5ad904e9d15d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+
+#include "read_cgroupfs_xattr.skel.h"
+#include "cgroup_read_xattr.skel.h"
+
+#define CGROUP_FS_PARENT "foo/"
+#define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/"
+#define TMP_FILE "/tmp/selftests_cgroup_xattr"
+
+static const char xattr_value_a[] = "bpf_selftest_value_a";
+static const char xattr_value_b[] = "bpf_selftest_value_b";
+static const char xattr_name[] = "user.bpf_test";
+
+static void test_read_cgroup_xattr(void)
+{
+ int tmp_fd, parent_cgroup_fd = -1, child_cgroup_fd = -1;
+ struct read_cgroupfs_xattr *skel = NULL;
+
+ parent_cgroup_fd = test__join_cgroup(CGROUP_FS_PARENT);
+ if (!ASSERT_OK_FD(parent_cgroup_fd, "create parent cgroup"))
+ return;
+ if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a),
+ "set parent xattr"))
+ goto out;
+
+ child_cgroup_fd = test__join_cgroup(CGROUP_FS_CHILD);
+ if (!ASSERT_OK_FD(child_cgroup_fd, "create child cgroup"))
+ goto out;
+ if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b),
+ "set child xattr"))
+ goto out;
+
+ skel = read_cgroupfs_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load"))
+ goto out;
+
+ skel->bss->target_pid = sys_gettid();
+
+ if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach"))
+ goto out;
+
+ tmp_fd = open(TMP_FILE, O_RDONLY | O_CREAT);
+ ASSERT_OK_FD(tmp_fd, "open tmp file");
+ close(tmp_fd);
+
+ ASSERT_TRUE(skel->bss->found_value_a, "found_value_a");
+ ASSERT_TRUE(skel->bss->found_value_b, "found_value_b");
+
+out:
+ close(child_cgroup_fd);
+ close(parent_cgroup_fd);
+ read_cgroupfs_xattr__destroy(skel);
+ unlink(TMP_FILE);
+}
+
+void test_cgroup_xattr(void)
+{
+ RUN_TESTS(cgroup_read_xattr);
+
+ if (test__start_subtest("read_cgroupfs_xattr"))
+ test_read_cgroup_xattr();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index adda85f97058..4b42fbc96efc 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -4,6 +4,8 @@
#define _GNU_SOURCE
#include <cgroup_helpers.h>
#include <test_progs.h>
+#include <sched.h>
+#include <sys/wait.h>
#include "cgrp_kfunc_failure.skel.h"
#include "cgrp_kfunc_success.skel.h"
@@ -87,6 +89,72 @@ static const char * const success_tests[] = {
"test_cgrp_from_id",
};
+static void test_cgrp_from_id_ns(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct cgrp_kfunc_success *skel;
+ struct bpf_program *prog;
+ int pid, pipe_fd[2];
+
+ skel = open_load_cgrp_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+ goto cleanup;
+
+ prog = skel->progs.test_cgrp_from_id_ns;
+
+ if (!ASSERT_OK(pipe(pipe_fd), "pipe"))
+ goto cleanup;
+
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork result")) {
+ close(pipe_fd[0]);
+ close(pipe_fd[1]);
+ goto cleanup;
+ }
+
+ if (pid == 0) {
+ int ret = 0;
+
+ close(pipe_fd[0]);
+
+ if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup"))
+ exit(1);
+
+ if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns"))
+ exit(1);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(ret, "test run ret"))
+ exit(1);
+
+ if (!ASSERT_OK(opts.retval, "test run retval"))
+ exit(1);
+
+ if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe"))
+ exit(1);
+
+ exit(0);
+ } else {
+ int res;
+
+ close(pipe_fd[1]);
+
+ ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res");
+ ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child");
+
+ remove_cgroup_pid("cgrp_from_id_ns", pid);
+
+ ASSERT_OK(res, "result from run");
+ }
+
+ close(pipe_fd[0]);
+cleanup:
+ cgrp_kfunc_success__destroy(skel);
+}
+
void test_cgrp_kfunc(void)
{
int i, err;
@@ -102,6 +170,9 @@ void test_cgrp_kfunc(void)
run_success_test(success_tests[i]);
}
+ if (test__start_subtest("test_cgrp_from_id_ns"))
+ test_cgrp_from_id_ns();
+
RUN_TESTS(cgrp_kfunc_failure);
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
deleted file mode 100644
index 7526de379081..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
+++ /dev/null
@@ -1,107 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "bpf/libbpf.h"
-#include "changes_pkt_data_freplace.skel.h"
-#include "changes_pkt_data.skel.h"
-#include <test_progs.h>
-
-static void print_verifier_log(const char *log)
-{
- if (env.verbosity >= VERBOSE_VERY)
- fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
-}
-
-static void test_aux(const char *main_prog_name,
- const char *to_be_replaced,
- const char *replacement,
- bool expect_load)
-{
- struct changes_pkt_data_freplace *freplace = NULL;
- struct bpf_program *freplace_prog = NULL;
- struct bpf_program *main_prog = NULL;
- LIBBPF_OPTS(bpf_object_open_opts, opts);
- struct changes_pkt_data *main = NULL;
- char log[16*1024];
- int err;
-
- opts.kernel_log_buf = log;
- opts.kernel_log_size = sizeof(log);
- if (env.verbosity >= VERBOSE_SUPER)
- opts.kernel_log_level = 1 | 2 | 4;
- main = changes_pkt_data__open_opts(&opts);
- if (!ASSERT_OK_PTR(main, "changes_pkt_data__open"))
- goto out;
- main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name);
- if (!ASSERT_OK_PTR(main_prog, "main_prog"))
- goto out;
- bpf_program__set_autoload(main_prog, true);
- err = changes_pkt_data__load(main);
- print_verifier_log(log);
- if (!ASSERT_OK(err, "changes_pkt_data__load"))
- goto out;
- freplace = changes_pkt_data_freplace__open_opts(&opts);
- if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open"))
- goto out;
- freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement);
- if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
- goto out;
- bpf_program__set_autoload(freplace_prog, true);
- bpf_program__set_autoattach(freplace_prog, true);
- bpf_program__set_attach_target(freplace_prog,
- bpf_program__fd(main_prog),
- to_be_replaced);
- err = changes_pkt_data_freplace__load(freplace);
- print_verifier_log(log);
- if (expect_load) {
- ASSERT_OK(err, "changes_pkt_data_freplace__load");
- } else {
- ASSERT_ERR(err, "changes_pkt_data_freplace__load");
- ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log");
- }
-
-out:
- changes_pkt_data_freplace__destroy(freplace);
- changes_pkt_data__destroy(main);
-}
-
-/* There are two global subprograms in both changes_pkt_data.skel.h:
- * - one changes packet data;
- * - another does not.
- * It is ok to freplace subprograms that change packet data with those
- * that either do or do not. It is only ok to freplace subprograms
- * that do not change packet data with those that do not as well.
- * The below tests check outcomes for each combination of such freplace.
- * Also test a case when main subprogram itself is replaced and is a single
- * subprogram in a program.
- */
-void test_changes_pkt_data_freplace(void)
-{
- struct {
- const char *main;
- const char *to_be_replaced;
- bool changes;
- } mains[] = {
- { "main_with_subprogs", "changes_pkt_data", true },
- { "main_with_subprogs", "does_not_change_pkt_data", false },
- { "main_changes", "main_changes", true },
- { "main_does_not_change", "main_does_not_change", false },
- };
- struct {
- const char *func;
- bool changes;
- } replacements[] = {
- { "changes_pkt_data", true },
- { "does_not_change_pkt_data", false }
- };
- char buf[64];
-
- for (int i = 0; i < ARRAY_SIZE(mains); ++i) {
- for (int j = 0; j < ARRAY_SIZE(replacements); ++j) {
- snprintf(buf, sizeof(buf), "%s_with_%s",
- mains[i].to_be_replaced, replacements[j].func);
- if (!test__start_subtest(buf))
- continue;
- test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func,
- mains[i].changes || !replacements[j].changes);
- }
- }
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 2a9a30650350..65b4512967e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -153,6 +153,26 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user");
}
+static void test_chk_segs_flag(struct test_check_mtu *skel, __u32 mtu)
+{
+ int err, prog_fd = bpf_program__fd(skel->progs.tc_chk_segs_flag);
+ struct __sk_buff skb = {
+ .gso_size = 10,
+ };
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ /* Lower the mtu to test the BPF_MTU_CHK_SEGS */
+ SYS_NOFAIL("ip link set dev lo mtu 10");
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ SYS_NOFAIL("ip link set dev lo mtu %u", mtu);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, BPF_OK, "retval");
+}
static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
{
@@ -177,11 +197,12 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
+ test_chk_segs_flag(skel, mtu);
cleanup:
test_check_mtu__destroy(skel);
}
-void serial_test_check_mtu(void)
+void test_ns_check_mtu(void)
{
int mtu_lo;
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index 34b59f6baca1..7488a7606e6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -22,79 +22,37 @@
static int duration = 0;
-struct addr_port {
- in_port_t port;
- union {
- struct in_addr in_addr;
- struct in6_addr in6_addr;
- };
-};
-
-struct tuple {
- int family;
- struct addr_port src;
- struct addr_port dst;
-};
-
-static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
-{
- const struct sockaddr_in6 *in6;
- const struct sockaddr_in *in;
-
- switch (sa->sa_family) {
- case AF_INET:
- in = (const struct sockaddr_in *)sa;
- ap->in_addr = in->sin_addr;
- ap->port = in->sin_port;
- return true;
-
- case AF_INET6:
- in6 = (const struct sockaddr_in6 *)sa;
- ap->in6_addr = in6->sin6_addr;
- ap->port = in6->sin6_port;
- return true;
-
- default:
- return false;
- }
-}
-static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
- int *server, int *conn, struct tuple *tuple)
+static bool set_up_conn(const struct sockaddr_storage *addr, socklen_t len, int type,
+ int *server, int *conn,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst)
{
struct sockaddr_storage ss;
socklen_t slen = sizeof(ss);
- struct sockaddr *sa = (struct sockaddr *)&ss;
- *server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL);
+ *server = start_server_addr(type, addr, len, NULL);
if (*server < 0)
return false;
- if (CHECK_FAIL(getsockname(*server, sa, &slen)))
+ if (CHECK_FAIL(getsockname(*server, (struct sockaddr *)&ss, &slen)))
goto close_server;
- *conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL);
+ *conn = connect_to_addr(type, &ss, slen, NULL);
if (*conn < 0)
goto close_server;
/* We want to simulate packets arriving at conn, so we have to
* swap src and dst.
*/
- slen = sizeof(ss);
- if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
- goto close_conn;
-
- if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
+ slen = sizeof(*dst);
+ if (CHECK_FAIL(getsockname(*conn, (struct sockaddr *)dst, &slen)))
goto close_conn;
- slen = sizeof(ss);
- if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
+ slen = sizeof(*src);
+ if (CHECK_FAIL(getpeername(*conn, (struct sockaddr *)src, &slen)))
goto close_conn;
- if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
- goto close_conn;
-
- tuple->family = ss.ss_family;
return true;
close_conn:
@@ -110,17 +68,16 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
{
struct sockaddr_in *addr4;
struct sockaddr_in6 *addr6;
+ memset(addr, 0, sizeof(*addr));
switch (family) {
case AF_INET:
addr4 = (struct sockaddr_in *)addr;
- memset(addr4, 0, sizeof(*addr4));
addr4->sin_family = family;
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
return sizeof(*addr4);
case AF_INET6:
addr6 = (struct sockaddr_in6 *)addr;
- memset(addr6, 0, sizeof(*addr6));
addr6->sin6_family = family;
addr6->sin6_addr = in6addr_loopback;
return sizeof(*addr6);
@@ -242,9 +199,15 @@ static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
}
static size_t build_input(const struct test_cfg *test, void *const buf,
- const struct tuple *tuple)
+ const struct sockaddr_storage *src,
+ const struct sockaddr_storage *dst)
{
- in_port_t sport = tuple->src.port;
+ struct sockaddr_in6 *src_in6 = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *dst_in6 = (struct sockaddr_in6 *)dst;
+ struct sockaddr_in *src_in = (struct sockaddr_in *)src;
+ struct sockaddr_in *dst_in = (struct sockaddr_in *)dst;
+ sa_family_t family = src->ss_family;
+ in_port_t sport, dport;
encap_headers_t encap;
struct iphdr ip;
struct ipv6hdr ipv6;
@@ -254,8 +217,11 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
uint8_t *p = buf;
int proto;
+ sport = (family == AF_INET) ? src_in->sin_port : src_in6->sin6_port;
+ dport = (family == AF_INET) ? dst_in->sin_port : dst_in6->sin6_port;
+
proto = IPPROTO_IPIP;
- if (tuple->family == AF_INET6)
+ if (family == AF_INET6)
proto = IPPROTO_IPV6;
encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
@@ -270,15 +236,15 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
if (test->type == UDP)
proto = IPPROTO_UDP;
- switch (tuple->family) {
+ switch (family) {
case AF_INET:
ip = (struct iphdr){
.ihl = 5,
.version = 4,
.ttl = IPDEFTTL,
.protocol = proto,
- .saddr = tuple->src.in_addr.s_addr,
- .daddr = tuple->dst.in_addr.s_addr,
+ .saddr = src_in->sin_addr.s_addr,
+ .daddr = dst_in->sin_addr.s_addr,
};
p = mempcpy(p, &ip, sizeof(ip));
break;
@@ -287,8 +253,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
.version = 6,
.hop_limit = IPDEFTTL,
.nexthdr = proto,
- .saddr = tuple->src.in6_addr,
- .daddr = tuple->dst.in6_addr,
+ .saddr = src_in6->sin6_addr,
+ .daddr = dst_in6->sin6_addr,
};
p = mempcpy(p, &ipv6, sizeof(ipv6));
break;
@@ -303,18 +269,16 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
case TCP:
tcp = (struct tcphdr){
.source = sport,
- .dest = tuple->dst.port,
+ .dest = dport,
+ .syn = (test->flags == SYN),
+ .ack = (test->flags == ACK),
};
- if (test->flags == SYN)
- tcp.syn = true;
- if (test->flags == ACK)
- tcp.ack = true;
p = mempcpy(p, &tcp, sizeof(tcp));
break;
case UDP:
udp = (struct udphdr){
.source = sport,
- .dest = tuple->dst.port,
+ .dest = dport,
};
p = mempcpy(p, &udp, sizeof(udp));
break;
@@ -339,27 +303,26 @@ static void test_cls_redirect_common(struct bpf_program *prog)
LIBBPF_OPTS(bpf_test_run_opts, tattr);
int families[] = { AF_INET, AF_INET6 };
struct sockaddr_storage ss;
- struct sockaddr *addr;
socklen_t slen;
int i, j, err, prog_fd;
int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
- struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
+ struct sockaddr_storage srcs[__NR_KIND][ARRAY_SIZE(families)];
+ struct sockaddr_storage dsts[__NR_KIND][ARRAY_SIZE(families)];
- addr = (struct sockaddr *)&ss;
for (i = 0; i < ARRAY_SIZE(families); i++) {
slen = prepare_addr(&ss, families[i]);
if (CHECK_FAIL(!slen))
goto cleanup;
- if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
+ if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_DGRAM,
&servers[UDP][i], &conns[UDP][i],
- &tuples[UDP][i])))
+ &srcs[UDP][i], &dsts[UDP][i])))
goto cleanup;
- if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
+ if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_STREAM,
&servers[TCP][i], &conns[TCP][i],
- &tuples[TCP][i])))
+ &srcs[TCP][i], &dsts[TCP][i])))
goto cleanup;
}
@@ -368,11 +331,12 @@ static void test_cls_redirect_common(struct bpf_program *prog)
struct test_cfg *test = &tests[i];
for (j = 0; j < ARRAY_SIZE(families); j++) {
- struct tuple *tuple = &tuples[test->type][j];
+ struct sockaddr_storage *src = &srcs[test->type][j];
+ struct sockaddr_storage *dst = &dsts[test->type][j];
char input[256];
char tmp[256];
- test_str(tmp, sizeof(tmp), test, tuple->family);
+ test_str(tmp, sizeof(tmp), test, families[j]);
if (!test__start_subtest(tmp))
continue;
@@ -380,7 +344,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
tattr.data_size_out = sizeof(tmp);
tattr.data_in = input;
- tattr.data_size_in = build_input(test, input, tuple);
+ tattr.data_size_in = build_input(test, input, src, dst);
if (CHECK_FAIL(!tattr.data_size_in))
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
new file mode 100644
index 000000000000..285f20241fe1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "compute_live_registers.skel.h"
+#include "test_progs.h"
+
+void test_compute_live_registers(void)
+{
+ RUN_TESTS(compute_live_registers);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index e10ea92c3fe2..08963c82f30b 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -85,11 +85,11 @@ static int duration = 0;
#define NESTING_ERR_CASE(name) { \
NESTING_CASE_COMMON(name), \
.fails = true, \
- .run_btfgen_fails = true, \
+ .run_btfgen_fails = true, \
}
#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
- .a = { [2] = 1 }, \
+ .a = { [2] = 1, [3] = 11 }, \
.b = { [1] = { [2] = { [3] = 2 } } }, \
.c = { [1] = { .c = 3 } }, \
.d = { [0] = { [0] = { .d = 4 } } }, \
@@ -108,6 +108,7 @@ static int duration = 0;
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \
.a2 = 1, \
+ .a3 = 12, \
.b123 = 2, \
.c1c = 3, \
.d00d = 4, \
@@ -602,6 +603,7 @@ static const struct core_reloc_test_case test_cases[] = {
ARRAYS_ERR_CASE(arrays___err_non_array),
ARRAYS_ERR_CASE(arrays___err_wrong_val_type),
ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
+ ARRAYS_ERR_CASE(arrays___err_bad_signed_arr_elem_sz),
/* enum/ptr/int handling scenarios */
PRIMITIVES_CASE(primitives),
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index e58a04654238..6c45330a5ca3 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -25,6 +25,10 @@ static const char * const cpumask_success_testcases[] = {
"test_global_mask_nested_deep_rcu",
"test_global_mask_nested_deep_array_rcu",
"test_cpumask_weight",
+ "test_refcount_null_tracking",
+ "test_populate_reject_small_mask",
+ "test_populate_reject_unaligned",
+ "test_populate",
};
static void verify_success(const char *prog_name)
@@ -78,6 +82,5 @@ void test_cpumask(void)
verify_success(cpumask_success_testcases[i]);
}
- RUN_TESTS(cpumask_success);
RUN_TESTS(cpumask_failure);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
new file mode 100644
index 000000000000..6c2b0c3dbcd8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "dmabuf_iter.skel.h"
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/udmabuf.h>
+
+static int udmabuf = -1;
+static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter";
+static size_t udmabuf_test_buffer_size;
+static int sysheap_dmabuf = -1;
+static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter";
+static size_t sysheap_test_buffer_size;
+
+static int create_udmabuf(void)
+{
+ struct udmabuf_create create;
+ int dev_udmabuf, memfd, local_udmabuf;
+
+ udmabuf_test_buffer_size = 10 * getpagesize();
+
+ if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING);
+ if (!ASSERT_OK_FD(memfd, "memfd_create"))
+ return -1;
+
+ if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal"))
+ goto close_memfd;
+
+ dev_udmabuf = open("/dev/udmabuf", O_RDONLY);
+ if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf"))
+ goto close_memfd;
+
+ memset(&create, 0, sizeof(create));
+ create.memfd = memfd;
+ create.flags = UDMABUF_FLAGS_CLOEXEC;
+ create.offset = 0;
+ create.size = udmabuf_test_buffer_size;
+
+ local_udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create);
+ close(dev_udmabuf);
+ if (!ASSERT_OK_FD(local_udmabuf, "udmabuf_create"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(ioctl(local_udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name"))
+ goto close_udmabuf;
+
+ return local_udmabuf;
+
+close_udmabuf:
+ close(local_udmabuf);
+close_memfd:
+ close(memfd);
+ return -1;
+}
+
+static int create_sys_heap_dmabuf(void)
+{
+ sysheap_test_buffer_size = 20 * getpagesize();
+
+ struct dma_heap_allocation_data data = {
+ .len = sysheap_test_buffer_size,
+ .fd = 0,
+ .fd_flags = O_RDWR | O_CLOEXEC,
+ .heap_flags = 0,
+ };
+ int heap_fd, ret;
+
+ if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ heap_fd = open("/dev/dma_heap/system", O_RDONLY);
+ if (!ASSERT_OK_FD(heap_fd, "open dma heap"))
+ return -1;
+
+ ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data);
+ close(heap_fd);
+ if (!ASSERT_OK(ret, "syheap alloc"))
+ return -1;
+
+ if (!ASSERT_OK(ioctl(data.fd, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name"))
+ goto close_sysheap_dmabuf;
+
+ return data.fd;
+
+close_sysheap_dmabuf:
+ close(data.fd);
+ return -1;
+}
+
+static int create_test_buffers(void)
+{
+ udmabuf = create_udmabuf();
+ sysheap_dmabuf = create_sys_heap_dmabuf();
+
+ if (udmabuf < 0 || sysheap_dmabuf < 0)
+ return -1;
+
+ return 0;
+}
+
+static void destroy_test_buffers(void)
+{
+ close(udmabuf);
+ udmabuf = -1;
+
+ close(sysheap_dmabuf);
+ sysheap_dmabuf = -1;
+}
+
+enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT };
+struct DmabufInfo {
+ unsigned long inode;
+ unsigned long size;
+ char name[DMA_BUF_NAME_LEN];
+ char exporter[32];
+};
+
+static bool check_dmabuf_info(const struct DmabufInfo *bufinfo,
+ unsigned long size,
+ const char *name, const char *exporter)
+{
+ return size == bufinfo->size &&
+ !strcmp(name, bufinfo->name) &&
+ !strcmp(exporter, bufinfo->exporter);
+}
+
+static void subtest_dmabuf_iter_check_no_infinite_reads(struct dmabuf_iter *skel)
+{
+ int iter_fd;
+ char buf[256];
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ while (read(iter_fd, buf, sizeof(buf)) > 0)
+ ; /* Read out all contents */
+
+ /* Next reads should return 0 */
+ ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
+
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel)
+{
+ bool found_test_sysheap_dmabuf = false;
+ bool found_test_udmabuf = false;
+ struct DmabufInfo bufinfo;
+ size_t linesize = 0;
+ char *line = NULL;
+ FILE *iter_file;
+ int iter_fd, f = INODE;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ iter_file = fdopen(iter_fd, "r");
+ if (!ASSERT_OK_PTR(iter_file, "fdopen"))
+ goto close_iter_fd;
+
+ while (getline(&line, &linesize, iter_file) != -1) {
+ if (f % FIELD_COUNT == INODE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1,
+ "read inode");
+ } else if (f % FIELD_COUNT == SIZE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1,
+ "read size");
+ } else if (f % FIELD_COUNT == NAME) {
+ ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1,
+ "read name");
+ } else if (f % FIELD_COUNT == EXPORTER) {
+ ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1,
+ "read exporter");
+
+ if (check_dmabuf_info(&bufinfo,
+ sysheap_test_buffer_size,
+ sysheap_test_buffer_name,
+ "system"))
+ found_test_sysheap_dmabuf = true;
+ else if (check_dmabuf_info(&bufinfo,
+ udmabuf_test_buffer_size,
+ udmabuf_test_buffer_name,
+ "udmabuf"))
+ found_test_udmabuf = true;
+ }
+ ++f;
+ }
+
+ ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields");
+
+ ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf");
+ ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf");
+
+ free(line);
+ fclose(iter_file);
+close_iter_fd:
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ char key[DMA_BUF_NAME_LEN];
+ int err, fd;
+ bool found;
+
+ /* No need to attach it, just run it directly */
+ fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each);
+
+ err = bpf_prog_test_run_opts(fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key"))
+ return;
+
+ do {
+ ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup");
+ ASSERT_TRUE(found, "found test buffer");
+ } while (bpf_map_get_next_key(map_fd, key, key));
+}
+
+void test_dmabuf_iter(void)
+{
+ struct dmabuf_iter *skel = NULL;
+ int map_fd;
+ const bool f = false;
+
+ skel = dmabuf_iter__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.testbuf_hash);
+ if (!ASSERT_OK_FD(map_fd, "map_fd"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY),
+ "insert udmabuf"))
+ goto destroy_skel;
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY),
+ "insert sysheap buffer"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(create_test_buffers(), "create_test_buffers"))
+ goto destroy;
+
+ if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach"))
+ goto destroy;
+
+ if (test__start_subtest("no_infinite_reads"))
+ subtest_dmabuf_iter_check_no_infinite_reads(skel);
+ if (test__start_subtest("default_iter"))
+ subtest_dmabuf_iter_check_default_iter(skel);
+ if (test__start_subtest("open_coded"))
+ subtest_dmabuf_iter_check_open_coded(skel, map_fd);
+
+destroy:
+ destroy_test_buffers();
+destroy_skel:
+ dmabuf_iter__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index b614a5272dfd..b9f86cb91e81 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -10,6 +10,7 @@ enum test_setup_type {
SETUP_SYSCALL_SLEEP,
SETUP_SKB_PROG,
SETUP_SKB_PROG_TP,
+ SETUP_XDP_PROG,
};
static struct {
@@ -18,9 +19,21 @@ static struct {
} success_tests[] = {
{"test_read_write", SETUP_SYSCALL_SLEEP},
{"test_dynptr_data", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_copy", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_copy_xdp", SETUP_XDP_PROG},
+ {"test_dynptr_memset_zero", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_notzero", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_zero_offset", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_zero_adjusted", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_overflow", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_overflow_offset", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_memset_readonly", SETUP_SKB_PROG},
+ {"test_dynptr_memset_xdp_chunks", SETUP_XDP_PROG},
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG},
{"test_adjust", SETUP_SYSCALL_SLEEP},
{"test_adjust_err", SETUP_SYSCALL_SLEEP},
{"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
@@ -30,10 +43,21 @@ static struct {
{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
{"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
+ {"test_probe_read_user_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_user_str_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG},
+ {"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP},
};
+#define PAGE_SIZE_64K 65536
+
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
{
+ char user_data[384] = {[0 ... 382] = 'a', '\0'};
struct dynptr_success *skel;
struct bpf_program *prog;
struct bpf_link *link;
@@ -55,6 +79,10 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
if (!ASSERT_OK(err, "dynptr_success__load"))
goto cleanup;
+ skel->bss->user_ptr = user_data;
+ skel->data->test_len[0] = sizeof(user_data);
+ memcpy(skel->bss->expected_str, user_data, sizeof(user_data));
+
switch (setup_type) {
case SETUP_SYSCALL_SLEEP:
link = bpf_program__attach(prog);
@@ -120,6 +148,28 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
break;
}
+ case SETUP_XDP_PROG:
+ {
+ char data[90000];
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .repeat = 1,
+ );
+
+ if (getpagesize() == PAGE_SIZE_64K)
+ opts.data_size_in = sizeof(data);
+ else
+ opts.data_size_in = 5000;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
}
ASSERT_EQ(skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index a1d52e73fb16..c534b4d5f9da 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -83,8 +83,8 @@ static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *ma
int err;
memset(&info, 0, len);
- info.nr_map_ids = *nr_map_ids,
- info.map_ids = ptr_to_u64(map_ids),
+ info.nr_map_ids = *nr_map_ids;
+ info.map_ids = ptr_to_u64(map_ids);
err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
@@ -293,7 +293,7 @@ static int get_btf_id_by_fd(int btf_fd, __u32 *id)
* 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1
* 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2
* 3) Close the btf_fd, now refcnt=1
- * Wait and check that BTF stil exists.
+ * Wait and check that BTF still exists.
*/
static void check_fd_array_cnt__referenced_btfs(void)
{
@@ -312,7 +312,7 @@ static void check_fd_array_cnt__referenced_btfs(void)
/* btf should still exist when original file descriptor is closed */
err = get_btf_id_by_fd(extra_fds[0], &btf_id);
- if (!ASSERT_GE(err, 0, "get_btf_id_by_fd"))
+ if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd"))
goto cleanup;
Close(extra_fds[0]);
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
new file mode 100644
index 000000000000..ca46fdd6e1ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <test_progs.h>
+#include "fd_htab_lookup.skel.h"
+
+struct htab_op_ctx {
+ int fd;
+ int loop;
+ unsigned int entries;
+ bool stop;
+};
+
+#define ERR_TO_RETVAL(where, err) ((void *)(long)(((where) << 12) | (-err)))
+
+static void *htab_lookup_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0, value;
+ int inner_fd, err;
+
+ err = bpf_map_lookup_elem(ctx->fd, &key, &value);
+ if (err) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, err);
+ }
+
+ inner_fd = bpf_map_get_fd_by_id(value);
+ if (inner_fd < 0) {
+ /* The old map has been freed */
+ if (inner_fd == -ENOENT)
+ continue;
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, inner_fd);
+ }
+
+ err = bpf_map_lookup_elem(inner_fd, &zero, &value);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+
+ if (value != key) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(4, -EINVAL);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void *htab_update_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (inner_fd < 0) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, inner_fd);
+ }
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, err);
+ }
+
+ err = bpf_map_update_elem(ctx->fd, &key, &inner_fd, BPF_EXIST);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+ }
+ }
+
+ return NULL;
+}
+
+static int setup_htab(int fd, unsigned int entries)
+{
+ unsigned int i;
+
+ for (i = 0; i < entries; i++) {
+ unsigned int key = i, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "new array"))
+ return -1;
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (!ASSERT_OK(err, "init array")) {
+ close(inner_fd);
+ return -1;
+ }
+
+ err = bpf_map_update_elem(fd, &key, &inner_fd, 0);
+ if (!ASSERT_OK(err, "init outer")) {
+ close(inner_fd);
+ return -1;
+ }
+ close(inner_fd);
+ }
+
+ return 0;
+}
+
+static int get_int_from_env(const char *name, int dft)
+{
+ const char *value;
+
+ value = getenv(name);
+ if (!value)
+ return dft;
+
+ return atoi(value);
+}
+
+void test_fd_htab_lookup(void)
+{
+ unsigned int i, wr_nr = 8, rd_nr = 16;
+ pthread_t tids[wr_nr + rd_nr];
+ struct fd_htab_lookup *skel;
+ struct htab_op_ctx ctx;
+ int err;
+
+ skel = fd_htab_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fd_htab_lookup__open_and_load"))
+ return;
+
+ ctx.fd = bpf_map__fd(skel->maps.outer_map);
+ ctx.loop = get_int_from_env("FD_HTAB_LOOP_NR", 5);
+ ctx.stop = false;
+ ctx.entries = 8;
+
+ err = setup_htab(ctx.fd, ctx.entries);
+ if (err)
+ goto destroy;
+
+ memset(tids, 0, sizeof(tids));
+ for (i = 0; i < wr_nr; i++) {
+ err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+ for (i = 0; i < rd_nr; i++) {
+ err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+
+reap:
+ for (i = 0; i < wr_nr + rd_nr; i++) {
+ void *ret = NULL;
+ char desc[32];
+
+ if (!tids[i])
+ continue;
+
+ snprintf(desc, sizeof(desc), "thread %u", i + 1);
+ err = pthread_join(tids[i], &ret);
+ ASSERT_OK(err, desc);
+ ASSERT_EQ(ret, NULL, desc);
+ }
+destroy:
+ fd_htab_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 130f5b82d2e6..5ef1804e44df 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -12,13 +12,24 @@ void test_fentry_fexit(void)
int err, prog_fd, i;
LIBBPF_OPTS(bpf_test_run_opts, topts);
- fentry_skel = fentry_test_lskel__open_and_load();
+ fentry_skel = fentry_test_lskel__open();
if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
goto close_prog;
- fexit_skel = fexit_test_lskel__open_and_load();
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
+ goto close_prog;
+
+ fexit_skel = fexit_test_lskel__open();
if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
goto close_prog;
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
+ goto close_prog;
+
err = fentry_test_lskel__attach(fentry_skel);
if (!ASSERT_OK(err, "fentry_attach"))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index aee1bc77a17f..ec882328eb59 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -43,8 +43,13 @@ static void fentry_test(void)
struct fentry_test_lskel *fentry_skel = NULL;
int err;
- fentry_skel = fentry_test_lskel__open_and_load();
- if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
+ fentry_skel = fentry_test_lskel__open();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open"))
+ goto cleanup;
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
goto cleanup;
err = fentry_test_common(fentry_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 1c13007e37dd..94eed753560c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -43,8 +43,13 @@ static void fexit_test(void)
struct fexit_test_lskel *fexit_skel = NULL;
int err;
- fexit_skel = fexit_test_lskel__open_and_load();
- if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
+ fexit_skel = fexit_test_lskel__open();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open"))
+ goto cleanup;
+
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
goto cleanup;
err = fexit_test_common(fexit_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c
new file mode 100644
index 000000000000..5cde32b35da4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "file_reader.skel.h"
+#include "file_reader_fail.skel.h"
+#include <dlfcn.h>
+#include <sys/mman.h>
+
+const char *user_ptr = "hello world";
+char file_contents[256000];
+
+void *get_executable_base_addr(void)
+{
+ Dl_info info;
+
+ if (!dladdr((void *)&get_executable_base_addr, &info)) {
+ fprintf(stderr, "dladdr failed\n");
+ return NULL;
+ }
+
+ return info.dli_fbase;
+}
+
+static int initialize_file_contents(void)
+{
+ int fd, page_sz = sysconf(_SC_PAGESIZE);
+ ssize_t n = 0, cur, off;
+ void *addr;
+
+ fd = open("/proc/self/exe", O_RDONLY);
+ if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n"))
+ return 1;
+
+ do {
+ cur = read(fd, file_contents + n, sizeof(file_contents) - n);
+ if (!ASSERT_GT(cur, 0, "read success"))
+ break;
+ n += cur;
+ } while (n < sizeof(file_contents));
+
+ close(fd);
+
+ if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n"))
+ return 1;
+
+ addr = get_executable_base_addr();
+ if (!ASSERT_NEQ(addr, NULL, "get executable address"))
+ return 1;
+
+ /* page-align base file address */
+ addr = (void *)((unsigned long)addr & ~(page_sz - 1));
+
+ /*
+ * Page out range 0..512K, use 0..256K for positive tests and
+ * 256K..512K for negative tests expecting page faults
+ */
+ for (off = 0; off < sizeof(file_contents) * 2; off += page_sz) {
+ if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT),
+ "madvise pageout"))
+ return errno;
+ }
+
+ return 0;
+}
+
+static void run_test(const char *prog_name)
+{
+ struct file_reader *skel;
+ struct bpf_program *prog;
+ int err, fd;
+
+ err = initialize_file_contents();
+ if (!ASSERT_OK(err, "initialize file contents"))
+ return;
+
+ skel = file_reader__open();
+ if (!ASSERT_OK_PTR(skel, "file_reader__open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0);
+ }
+
+ memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents));
+ skel->bss->pid = getpid();
+
+ err = file_reader__load(skel);
+ if (!ASSERT_OK(err, "file_reader__load"))
+ goto cleanup;
+
+ err = file_reader__attach(skel);
+ if (!ASSERT_OK(err, "file_reader__attach"))
+ goto cleanup;
+
+ fd = open("/proc/self/exe", O_RDONLY);
+ if (fd >= 0)
+ close(fd);
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
+ ASSERT_EQ(skel->bss->run_success, 1, "run_success");
+cleanup:
+ file_reader__destroy(skel);
+}
+
+void test_file_reader(void)
+{
+ if (test__start_subtest("on_open_expect_fault"))
+ run_test("on_open_expect_fault");
+
+ if (test__start_subtest("on_open_validate_file_read"))
+ run_test("on_open_validate_file_read");
+
+ if (test__start_subtest("negative"))
+ RUN_TESTS(file_reader_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index e59af2aa6601..e40114620751 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -37,6 +37,7 @@ static noinline void uprobe_func(void)
static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr,
ssize_t offset, ssize_t entry_offset)
{
+ ssize_t ref_ctr_offset = entry_offset /* ref_ctr_offset for uprobes */;
struct bpf_link_info info;
__u32 len = sizeof(info);
char buf[PATH_MAX];
@@ -97,6 +98,7 @@ again:
case BPF_PERF_EVENT_UPROBE:
case BPF_PERF_EVENT_URETPROBE:
ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset");
+ ASSERT_EQ(info.perf_event.uprobe.ref_ctr_offset, ref_ctr_offset, "uprobe_ref_ctr_offset");
ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1,
"name_len");
@@ -241,20 +243,32 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
.retprobe = type == BPF_PERF_EVENT_URETPROBE,
.bpf_cookie = PERF_EVENT_COOKIE,
);
+ const char *sema[1] = {
+ "uprobe_link_info_sema_1",
+ };
+ __u64 *ref_ctr_offset;
struct bpf_link *link;
int link_fd, err;
+ err = elf_resolve_syms_offsets("/proc/self/exe", 1, sema,
+ (unsigned long **) &ref_ctr_offset, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object"))
+ return;
+
+ opts.ref_ctr_offset = *ref_ctr_offset;
link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run,
0, /* self pid */
UPROBE_FILE, uprobe_offset,
&opts);
if (!ASSERT_OK_PTR(link, "attach_uprobe"))
- return;
+ goto out;
link_fd = bpf_link__fd(link);
- err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0);
+ err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, *ref_ctr_offset);
ASSERT_OK(err, "verify_perf_link_info");
bpf_link__destroy(link);
+out:
+ free(ref_ctr_offset);
}
static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies)
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
index 3729fbfd3084..80b153d3ddec 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
@@ -542,8 +542,12 @@ static void detach_program(struct bpf_flow *skel, int prog_fd)
static int set_port_drop(int pf, bool multi_port)
{
+ char dst_port[16];
+
+ snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER);
+
SYS(fail, "tc qdisc add dev lo ingress");
- SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s",
+ SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s",
"dev lo",
"parent FFFF:",
"protocol", pf == PF_INET6 ? "ipv6" : "ip",
@@ -551,6 +555,7 @@ static int set_port_drop(int pf, bool multi_port)
"flower",
"ip_proto udp",
"src_port", multi_port ? "8-10" : "9",
+ "dst_port", dst_port,
"action drop");
return 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 09f6487f58b9..5fea3209566e 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -6,6 +6,7 @@
#include "for_each_array_map_elem.skel.h"
#include "for_each_map_elem_write_key.skel.h"
#include "for_each_multi_maps.skel.h"
+#include "for_each_hash_modify.skel.h"
static unsigned int duration;
@@ -203,6 +204,40 @@ out:
for_each_multi_maps__destroy(skel);
}
+static void test_hash_modify(void)
+{
+ struct for_each_hash_modify *skel;
+ int max_entries, i, err;
+ __u64 key, val;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1
+ );
+
+ skel = for_each_hash_modify__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load"))
+ return;
+
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+
+out:
+ for_each_hash_modify__destroy(skel);
+}
+
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
@@ -213,4 +248,6 @@ void test_for_each(void)
test_write_map_key();
if (test__start_subtest("multi_maps"))
test_multi_maps();
+ if (test__start_subtest("hash_modify"))
+ test_hash_modify();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c
index b7b77a6b2979..0de8facca4c5 100644
--- a/tools/testing/selftests/bpf/prog_tests/free_timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c
@@ -124,6 +124,10 @@ void test_free_timer(void)
int err;
skel = free_timer__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "open_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
index 5a0b51157451..43a26ec69a8e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -8,11 +8,12 @@
#include <unistd.h>
#include <test_progs.h>
#include "test_get_xattr.skel.h"
+#include "test_set_remove_xattr.skel.h"
#include "test_fsverity.skel.h"
static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
-static void test_xattr(void)
+static void test_get_xattr(const char *name, const char *value, bool allow_access)
{
struct test_get_xattr *skel = NULL;
int fd = -1, err;
@@ -25,7 +26,7 @@ static void test_xattr(void)
close(fd);
fd = -1;
- err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0);
+ err = setxattr(testfile, name, value, strlen(value) + 1, 0);
if (err && errno == EOPNOTSUPP) {
printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
"To run this test, make sure /tmp filesystem supports xattr.\n",
@@ -48,16 +49,23 @@ static void test_xattr(void)
goto out;
fd = open(testfile, O_RDONLY, 0644);
+
if (!ASSERT_GE(fd, 0, "open_file"))
goto out;
- ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
-
/* Trigger security_inode_getxattr */
- err = getxattr(testfile, "user.kfuncs", v, sizeof(v));
- ASSERT_EQ(err, -1, "getxattr_return");
- ASSERT_EQ(errno, EINVAL, "getxattr_errno");
- ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
+ err = getxattr(testfile, name, v, sizeof(v));
+
+ if (allow_access) {
+ ASSERT_EQ(err, -1, "getxattr_return");
+ ASSERT_EQ(errno, EINVAL, "getxattr_errno");
+ ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
+ ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
+ } else {
+ ASSERT_EQ(err, strlen(value) + 1, "getxattr_return");
+ ASSERT_EQ(skel->bss->found_xattr_from_file, 0, "found_xattr_from_file");
+ ASSERT_EQ(skel->bss->found_xattr_from_dentry, 0, "found_xattr_from_dentry");
+ }
out:
close(fd);
@@ -65,6 +73,127 @@ out:
remove(testfile);
}
+/* xattr value we will set to security.bpf.foo */
+static const char value_foo[] = "hello";
+
+static void read_and_validate_foo(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_foo, value_out, sizeof(value_out));
+ ASSERT_EQ(err, sizeof(value_foo), "getxattr size foo");
+ ASSERT_EQ(strncmp(value_out, value_foo, sizeof(value_foo)), 0, "strncmp value_foo");
+}
+
+static void set_foo(struct test_set_remove_xattr *skel)
+{
+ ASSERT_OK(setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0),
+ "setxattr foo");
+}
+
+static void validate_bar_match(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+ ASSERT_EQ(err, sizeof(skel->data->value_bar), "getxattr size bar");
+ ASSERT_EQ(strncmp(value_out, skel->data->value_bar, sizeof(skel->data->value_bar)), 0,
+ "strncmp value_bar");
+}
+
+static void validate_bar_removed(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+ ASSERT_LT(err, 0, "getxattr size bar should fail");
+}
+
+static void test_set_remove_xattr(void)
+{
+ struct test_set_remove_xattr *skel = NULL;
+ int fd = -1, err;
+
+ fd = open(testfile, O_CREAT | O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ close(fd);
+ fd = -1;
+
+ skel = test_set_remove_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_set_remove_xattr__open_and_load"))
+ return;
+
+ /* Set security.bpf.foo to "hello" */
+ err = setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0);
+ if (err && errno == EOPNOTSUPP) {
+ printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
+ "To run this test, make sure /tmp filesystem supports xattr.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ if (!ASSERT_OK(err, "setxattr"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_set_remove_xattr__attach(skel);
+ if (!ASSERT_OK(err, "test_set_remove_xattr__attach"))
+ goto out;
+
+ /* First, test not _locked version of the kfuncs with getxattr. */
+
+ /* Read security.bpf.foo and trigger test_inode_getxattr. This
+ * bpf program will set security.bpf.bar to "world".
+ */
+ read_and_validate_foo(skel);
+ validate_bar_match(skel);
+
+ /* Read security.bpf.foo and trigger test_inode_getxattr again.
+ * This will remove xattr security.bpf.bar.
+ */
+ read_and_validate_foo(skel);
+ validate_bar_removed(skel);
+
+ ASSERT_TRUE(skel->bss->set_security_bpf_bar_success, "set_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->remove_security_bpf_bar_success, "remove_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->set_security_selinux_fail, "set_security_selinux_fail");
+ ASSERT_TRUE(skel->bss->remove_security_selinux_fail, "remove_security_selinux_fail");
+
+ /* Second, test _locked version of the kfuncs, with setxattr */
+
+ /* Set security.bpf.foo and trigger test_inode_setxattr. This
+ * bpf program will set security.bpf.bar to "world".
+ */
+ set_foo(skel);
+ validate_bar_match(skel);
+
+ /* Set security.bpf.foo and trigger test_inode_setxattr again.
+ * This will remove xattr security.bpf.bar.
+ */
+ set_foo(skel);
+ validate_bar_removed(skel);
+
+ ASSERT_TRUE(skel->bss->locked_set_security_bpf_bar_success,
+ "locked_set_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->locked_remove_security_bpf_bar_success,
+ "locked_remove_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->locked_set_security_selinux_fail,
+ "locked_set_security_selinux_fail");
+ ASSERT_TRUE(skel->bss->locked_remove_security_selinux_fail,
+ "locked_remove_security_selinux_fail");
+
+out:
+ close(fd);
+ test_set_remove_xattr__destroy(skel);
+ remove(testfile);
+}
+
#ifndef SHA256_DIGEST_SIZE
#define SHA256_DIGEST_SIZE 32
#endif
@@ -141,8 +270,21 @@ out:
void test_fs_kfuncs(void)
{
- if (test__start_subtest("xattr"))
- test_xattr();
+ /* Matches xattr_names in progs/test_get_xattr.c */
+ if (test__start_subtest("user_xattr"))
+ test_get_xattr("user.kfuncs", "hello", true);
+
+ if (test__start_subtest("security_bpf_xattr"))
+ test_get_xattr("security.bpf.xxx", "hello", true);
+
+ if (test__start_subtest("security_bpf_xattr_error"))
+ test_get_xattr("security.bpf", "hello", false);
+
+ if (test__start_subtest("security_selinux_xattr_error"))
+ test_get_xattr("security.selinux", "hello", false);
+
+ if (test__start_subtest("set_remove_xattr"))
+ test_set_remove_xattr();
if (test__start_subtest("fsverity"))
test_fsverity();
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c
index 2bc85f4814f4..d0b405eb2966 100644
--- a/tools/testing/selftests/bpf/prog_tests/htab_update.c
+++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -15,17 +15,17 @@ struct htab_update_ctx {
static void test_reenter_update(void)
{
struct htab_update *skel;
- unsigned int key, value;
+ void *value = NULL;
+ unsigned int key, value_size;
int err;
skel = htab_update__open();
if (!ASSERT_OK_PTR(skel, "htab_update__open"))
return;
- /* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */
- bpf_program__set_autoload(skel->progs.lookup_elem_raw, true);
+ bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true);
err = htab_update__load(skel);
- if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err)
+ if (!ASSERT_TRUE(!err, "htab_update__load") || err)
goto out;
skel->bss->pid = getpid();
@@ -33,14 +33,33 @@ static void test_reenter_update(void)
if (!ASSERT_OK(err, "htab_update__attach"))
goto out;
- /* Will trigger the reentrancy of bpf_map_update_elem() */
+ value_size = bpf_map__value_size(skel->maps.htab);
+
+ value = calloc(1, value_size);
+ if (!ASSERT_OK_PTR(value, "calloc value"))
+ goto out;
+ /*
+ * First update: plain insert. This should NOT trigger the re-entrancy
+ * path, because there is no old element to free yet.
+ */
key = 0;
- value = 0;
- err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0);
- if (!ASSERT_OK(err, "add element"))
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+ if (!ASSERT_OK(err, "first update (insert)"))
+ goto out;
+
+ /*
+ * Second update: replace existing element with same key and trigger
+ * the reentrancy of bpf_map_update_elem().
+ * check_and_free_fields() calls bpf_obj_free_fields() on the old
+ * value, which is where fentry program runs and performs a nested
+ * bpf_map_update_elem(), triggering -EDEADLK.
+ */
+ memset(value, 0, value_size);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+ if (!ASSERT_OK(err, "second update (replace)"))
goto out;
- ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy");
+ ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy");
out:
htab_update__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
new file mode 100644
index 000000000000..97b00c7efe94
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Microsoft */
+#include <test_progs.h>
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
+#include "test_kernel_flag.skel.h"
+
+void test_kernel_flag(void)
+{
+ struct test_kernel_flag *lsm_skel;
+ struct kfunc_call_test *skel = NULL;
+ struct kfunc_call_test_lskel *lskel = NULL;
+ int ret;
+
+ lsm_skel = test_kernel_flag__open_and_load();
+ if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel"))
+ return;
+
+ lsm_skel->bss->monitored_tid = sys_gettid();
+
+ ret = test_kernel_flag__attach(lsm_skel);
+ if (!ASSERT_OK(ret, "test_kernel_flag__attach"))
+ goto close_prog;
+
+ /* Test with skel. This should pass the gatekeeper */
+ skel = kfunc_call_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto close_prog;
+
+ /* Test with lskel. This should fail due to blocking kernel-based bpf() invocations */
+ lskel = kfunc_call_test_lskel__open_and_load();
+ if (!ASSERT_ERR_PTR(lskel, "lskel"))
+ goto close_prog;
+
+close_prog:
+ if (skel)
+ kfunc_call_test__destroy(skel);
+ if (lskel)
+ kfunc_call_test_lskel__destroy(lskel);
+
+ lsm_skel->bss->monitored_tid = 0;
+ test_kernel_flag__destroy(lsm_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
index 8e13a3416a21..6e35e13c2022 100644
--- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -57,7 +57,8 @@ static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel)
if (!ASSERT_OK(ret, "kmem_cache_lookup"))
break;
- ASSERT_STREQ(r.name, name, "kmem_cache_name");
+ ASSERT_STRNEQ(r.name, name, sizeof(r.name) - 1,
+ "kmem_cache_name");
ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize");
seen++;
@@ -104,7 +105,7 @@ void test_kmem_cache_iter(void)
goto destroy;
memset(buf, 0, sizeof(buf));
- while (read(iter_fd, buf, sizeof(buf) > 0)) {
+ while (read(iter_fd, buf, sizeof(buf)) > 0) {
/* Read out all contents */
printf("%s", buf);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index e19ef509ebf8..6cfaa978bc9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -7,6 +7,7 @@
#include "kprobe_multi_session.skel.h"
#include "kprobe_multi_session_cookie.skel.h"
#include "kprobe_multi_verifier.skel.h"
+#include "kprobe_write_ctx.skel.h"
#include "bpf/libbpf_internal.h"
#include "bpf/hashmap.h"
@@ -422,220 +423,6 @@ static void test_unique_match(void)
kprobe_multi__destroy(skel);
}
-static size_t symbol_hash(long key, void *ctx __maybe_unused)
-{
- return str_hash((const char *) key);
-}
-
-static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
-{
- return strcmp((const char *) key1, (const char *) key2) == 0;
-}
-
-static bool is_invalid_entry(char *buf, bool kernel)
-{
- if (kernel && strchr(buf, '['))
- return true;
- if (!kernel && !strchr(buf, '['))
- return true;
- return false;
-}
-
-static bool skip_entry(char *name)
-{
- /*
- * We attach to almost all kernel functions and some of them
- * will cause 'suspicious RCU usage' when fprobe is attached
- * to them. Filter out the current culprits - arch_cpu_idle
- * default_idle and rcu_* functions.
- */
- if (!strcmp(name, "arch_cpu_idle"))
- return true;
- if (!strcmp(name, "default_idle"))
- return true;
- if (!strncmp(name, "rcu_", 4))
- return true;
- if (!strcmp(name, "bpf_dispatcher_xdp_func"))
- return true;
- if (!strncmp(name, "__ftrace_invalid_address__",
- sizeof("__ftrace_invalid_address__") - 1))
- return true;
- return false;
-}
-
-/* Do comparision by ignoring '.llvm.<hash>' suffixes. */
-static int compare_name(const char *name1, const char *name2)
-{
- const char *res1, *res2;
- int len1, len2;
-
- res1 = strstr(name1, ".llvm.");
- res2 = strstr(name2, ".llvm.");
- len1 = res1 ? res1 - name1 : strlen(name1);
- len2 = res2 ? res2 - name2 : strlen(name2);
-
- if (len1 == len2)
- return strncmp(name1, name2, len1);
- if (len1 < len2)
- return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
- return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
-}
-
-static int load_kallsyms_compare(const void *p1, const void *p2)
-{
- return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
-}
-
-static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
-{
- return compare_name(p1, p2->name);
-}
-
-static int get_syms(char ***symsp, size_t *cntp, bool kernel)
-{
- size_t cap = 0, cnt = 0;
- char *name = NULL, *ksym_name, **syms = NULL;
- struct hashmap *map;
- struct ksyms *ksyms;
- struct ksym *ks;
- char buf[256];
- FILE *f;
- int err = 0;
-
- ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
- if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local"))
- return -EINVAL;
-
- /*
- * The available_filter_functions contains many duplicates,
- * but other than that all symbols are usable in kprobe multi
- * interface.
- * Filtering out duplicates by using hashmap__add, which won't
- * add existing entry.
- */
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
-
- if (!f)
- return -EINVAL;
-
- map = hashmap__new(symbol_hash, symbol_equal, NULL);
- if (IS_ERR(map)) {
- err = libbpf_get_error(map);
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
- continue;
- if (skip_entry(name))
- continue;
-
- ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
- if (!ks) {
- err = -EINVAL;
- goto error;
- }
-
- ksym_name = ks->name;
- err = hashmap__add(map, ksym_name, 0);
- if (err == -EEXIST) {
- err = 0;
- continue;
- }
- if (err)
- goto error;
-
- err = libbpf_ensure_mem((void **) &syms, &cap,
- sizeof(*syms), cnt + 1);
- if (err)
- goto error;
-
- syms[cnt++] = ksym_name;
- }
-
- *symsp = syms;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- hashmap__free(map);
- if (err)
- free(syms);
- return err;
-}
-
-static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
-{
- unsigned long *addr, *addrs, *tmp_addrs;
- int err = 0, max_cnt, inc_cnt;
- char *name = NULL;
- size_t cnt = 0;
- char buf[256];
- FILE *f;
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
-
- if (!f)
- return -ENOENT;
-
- /* In my local setup, the number of entries is 50k+ so Let us initially
- * allocate space to hold 64k entries. If 64k is not enough, incrementally
- * increase 1k each time.
- */
- max_cnt = 65536;
- inc_cnt = 1024;
- addrs = malloc(max_cnt * sizeof(long));
- if (addrs == NULL) {
- err = -ENOMEM;
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
- continue;
- if (skip_entry(name))
- continue;
-
- if (cnt == max_cnt) {
- max_cnt += inc_cnt;
- tmp_addrs = realloc(addrs, max_cnt);
- if (!tmp_addrs) {
- err = -ENOMEM;
- goto error;
- }
- addrs = tmp_addrs;
- }
-
- addrs[cnt++] = (unsigned long)addr;
- }
-
- *addrsp = addrs;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- if (err)
- free(addrs);
- return err;
-}
-
static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts)
{
long attach_start_ns, attach_end_ns;
@@ -670,7 +457,7 @@ static void test_kprobe_multi_bench_attach(bool kernel)
char **syms = NULL;
size_t cnt = 0;
- if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
+ if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms"))
return;
skel = kprobe_multi_empty__open_and_load();
@@ -696,13 +483,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel)
size_t cnt = 0;
int err;
- err = get_addrs(&addrs, &cnt, kernel);
+ err = bpf_get_addrs(&addrs, &cnt, kernel);
if (err == -ENOENT) {
test__skip();
return;
}
- if (!ASSERT_OK(err, "get_addrs"))
+ if (!ASSERT_OK(err, "bpf_get_addrs"))
return;
skel = kprobe_multi_empty__open_and_load();
@@ -753,6 +540,30 @@ cleanup:
kprobe_multi_override__destroy(skel);
}
+#ifdef __x86_64__
+static void test_attach_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_multi_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
void serial_test_kprobe_multi_bench_attach(void)
{
if (test__start_subtest("kernel"))
@@ -792,5 +603,7 @@ void test_kprobe_multi_test(void)
test_session_cookie_skel_api();
if (test__start_subtest("unique_match"))
test_unique_match();
+ if (test__start_subtest("attach_write_ctx"))
+ test_attach_write_ctx();
RUN_TESTS(kprobe_multi_verifier);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 77d07e0a4a55..14c5a7ef0e87 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -7,6 +7,7 @@
#include "linked_list.skel.h"
#include "linked_list_fail.skel.h"
+#include "linked_list_peek.skel.h"
static char log_buf[1024 * 1024];
@@ -71,7 +72,7 @@ static struct {
{ "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" },
{ "obj_new_acq", "Unreleased reference id=" },
{ "use_after_drop", "invalid mem access 'scalar'" },
- { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" },
+ { "ptr_walk_scalar", "type=rdonly_untrusted_mem expected=percpu_ptr_" },
{ "direct_read_lock", "direct access to bpf_spin_lock is disallowed" },
{ "direct_write_lock", "direct access to bpf_spin_lock is disallowed" },
{ "direct_read_head", "direct access to bpf_list_head is disallowed" },
@@ -805,3 +806,8 @@ void test_linked_list(void)
test_linked_list_success(LIST_IN_LIST, true);
test_linked_list_success(TEST_ALL, false);
}
+
+void test_linked_list_peek(void)
+{
+ RUN_TESTS(linked_list_peek);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
new file mode 100644
index 000000000000..72aa5376c30e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "testing_helpers.h"
+#include "livepatch_trampoline.skel.h"
+
+static int load_livepatch(void)
+{
+ char path[4096];
+
+ /* CI will set KBUILD_OUTPUT */
+ snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko",
+ getenv("KBUILD_OUTPUT") ? : "../../../..");
+
+ return load_module(path, env_verbosity > VERBOSE_NONE);
+}
+
+static void unload_livepatch(void)
+{
+ /* Disable the livepatch before unloading the module */
+ system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled");
+
+ unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE);
+}
+
+static void read_proc_cmdline(void)
+{
+ char buf[4096];
+ int fd, ret;
+
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (!ASSERT_OK_FD(fd, "open /proc/cmdline"))
+ return;
+
+ ret = read(fd, buf, sizeof(buf));
+ if (!ASSERT_GT(ret, 0, "read /proc/cmdline"))
+ goto out;
+
+ ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp");
+
+out:
+ close(fd);
+}
+
+static void __test_livepatch_trampoline(bool fexit_first)
+{
+ struct livepatch_trampoline *skel = NULL;
+ int err;
+
+ skel = livepatch_trampoline__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ skel->bss->my_pid = getpid();
+
+ if (!fexit_first) {
+ /* fentry program is loaded first by default */
+ err = livepatch_trampoline__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+ } else {
+ /* Manually load fexit program first. */
+ skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline);
+ if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit"))
+ goto out;
+
+ skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline);
+ if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry"))
+ goto out;
+ }
+
+ read_proc_cmdline();
+
+ ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit");
+ ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit");
+out:
+ livepatch_trampoline__destroy(skel);
+}
+
+void test_livepatch_trampoline(void)
+{
+ int retry_cnt = 0;
+
+retry:
+ if (load_livepatch()) {
+ if (retry_cnt) {
+ ASSERT_OK(1, "load_livepatch");
+ goto out;
+ }
+ /*
+ * Something else (previous run of the same test?) loaded
+ * the KLP module. Unload the KLP module and retry.
+ */
+ unload_livepatch();
+ retry_cnt++;
+ goto retry;
+ }
+
+ if (test__start_subtest("fentry_first"))
+ __test_livepatch_trampoline(false);
+
+ if (test__start_subtest("fexit_first"))
+ __test_livepatch_trampoline(true);
+out:
+ unload_livepatch();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index 169ce689b97c..d6f14a232002 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -7,6 +7,10 @@
#include "test_log_buf.skel.h"
#include "bpf_util.h"
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
static size_t libbpf_log_pos;
static char libbpf_log_buf[1024 * 1024];
static bool libbpf_log_error;
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
index fb1eb8c67361..ccec0fcdabc1 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
@@ -5,7 +5,6 @@
#include <time.h>
#include <net/if.h>
-#include <linux/if_tun.h>
#include <linux/icmp.h>
#include "test_progs.h"
@@ -37,34 +36,6 @@ static inline int netns_delete(void)
return system("ip netns del " NETNS ">/dev/null 2>&1");
}
-static int open_tuntap(const char *dev_name, bool need_mac)
-{
- int err = 0;
- struct ifreq ifr;
- int fd = open("/dev/net/tun", O_RDWR);
-
- if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)"))
- return -1;
-
- ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
- strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
- ifr.ifr_name[IFNAMSIZ - 1] = '\0';
-
- err = ioctl(fd, TUNSETIFF, &ifr);
- if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
- close(fd);
- return -1;
- }
-
- err = fcntl(fd, F_SETFL, O_NONBLOCK);
- if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
- close(fd);
- return -1;
- }
-
- return fd;
-}
-
#define ICMP_PAYLOAD_SIZE 100
/* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
new file mode 100644
index 000000000000..b6391af5f6f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <netinet/in.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define BPF_FILE "test_lwt_ip_encap.bpf.o"
+
+#define NETNS_NAME_SIZE 32
+#define NETNS_BASE "ns-lwt-ip-encap"
+
+#define IP4_ADDR_1 "172.16.1.100"
+#define IP4_ADDR_2 "172.16.2.100"
+#define IP4_ADDR_3 "172.16.3.100"
+#define IP4_ADDR_4 "172.16.4.100"
+#define IP4_ADDR_5 "172.16.5.100"
+#define IP4_ADDR_6 "172.16.6.100"
+#define IP4_ADDR_7 "172.16.7.100"
+#define IP4_ADDR_8 "172.16.8.100"
+#define IP4_ADDR_GRE "172.16.16.100"
+
+#define IP4_ADDR_SRC IP4_ADDR_1
+#define IP4_ADDR_DST IP4_ADDR_4
+
+#define IP6_ADDR_1 "fb01::1"
+#define IP6_ADDR_2 "fb02::1"
+#define IP6_ADDR_3 "fb03::1"
+#define IP6_ADDR_4 "fb04::1"
+#define IP6_ADDR_5 "fb05::1"
+#define IP6_ADDR_6 "fb06::1"
+#define IP6_ADDR_7 "fb07::1"
+#define IP6_ADDR_8 "fb08::1"
+#define IP6_ADDR_GRE "fb10::1"
+
+#define IP6_ADDR_SRC IP6_ADDR_1
+#define IP6_ADDR_DST IP6_ADDR_4
+
+/* Setup/topology:
+ *
+ * NS1 NS2 NS3
+ * veth1 <---> veth2 veth3 <---> veth4 (the top route)
+ * veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
+ *
+ * Each vethN gets IP[4|6]_ADDR_N address.
+ *
+ * IP*_ADDR_SRC = IP*_ADDR_1
+ * IP*_ADDR_DST = IP*_ADDR_4
+ *
+ * All tests test pings from IP*_ADDR__SRC to IP*_ADDR_DST.
+ *
+ * By default, routes are configured to allow packets to go
+ * IP*_ADDR_1 <=> IP*_ADDR_2 <=> IP*_ADDR_3 <=> IP*_ADDR_4 (the top route).
+ *
+ * A GRE device is installed in NS3 with IP*_ADDR_GRE, and
+ * NS1/NS2 are configured to route packets to IP*_ADDR_GRE via IP*_ADDR_8
+ * (the bottom route).
+ *
+ * Tests:
+ *
+ * 1. Routes NS2->IP*_ADDR_DST are brought down, so the only way a ping
+ * from IP*_ADDR_SRC to IP*_ADDR_DST can work is via IP*_ADDR_GRE.
+ *
+ * 2a. In an egress test, a bpf LWT_XMIT program is installed on veth1
+ * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ * ping: SRC->[encap at veth1:egress]->GRE:decap->DST
+ * ping replies go DST->SRC directly
+ *
+ * 2b. In an ingress test, a bpf LWT_IN program is installed on veth2
+ * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ * ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
+ * ping replies go DST->SRC directly
+ */
+
+static int create_ns(char *name, size_t name_sz)
+{
+ if (!name)
+ goto fail;
+
+ if (!ASSERT_OK(append_tid(name, name_sz), "append TID"))
+ goto fail;
+
+ SYS(fail, "ip netns add %s", name);
+
+ /* rp_filter gets confused by what these tests are doing, so disable it */
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.all.rp_filter=0", name);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.default.rp_filter=0", name);
+ /* Disable IPv6 DAD because it sometimes takes too long and fails tests */
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.accept_dad=0", name);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.default.accept_dad=0", name);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int set_top_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+ SYS(fail, "ip -n %s a add %s/24 dev veth1", ns1, IP4_ADDR_1);
+ SYS(fail, "ip -n %s a add %s/24 dev veth2", ns2, IP4_ADDR_2);
+ SYS(fail, "ip -n %s a add %s/24 dev veth3", ns2, IP4_ADDR_3);
+ SYS(fail, "ip -n %s a add %s/24 dev veth4", ns3, IP4_ADDR_4);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth1", ns1, IP6_ADDR_1);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth2", ns2, IP6_ADDR_2);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth3", ns2, IP6_ADDR_3);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth4", ns3, IP6_ADDR_4);
+
+ SYS(fail, "ip -n %s link set dev veth1 up", ns1);
+ SYS(fail, "ip -n %s link set dev veth2 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth3 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth4 up", ns3);
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int set_bottom_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+ SYS(fail, "ip -n %s a add %s/24 dev veth5", ns1, IP4_ADDR_5);
+ SYS(fail, "ip -n %s a add %s/24 dev veth6", ns2, IP4_ADDR_6);
+ SYS(fail, "ip -n %s a add %s/24 dev veth7", ns2, IP4_ADDR_7);
+ SYS(fail, "ip -n %s a add %s/24 dev veth8", ns3, IP4_ADDR_8);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth5", ns1, IP6_ADDR_5);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth6", ns2, IP6_ADDR_6);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth7", ns2, IP6_ADDR_7);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth8", ns3, IP6_ADDR_8);
+
+ SYS(fail, "ip -n %s link set dev veth5 up", ns1);
+ SYS(fail, "ip -n %s link set dev veth6 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth7 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth8 up", ns3);
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int configure_vrf(const char *ns1, const char *ns2)
+{
+ if (!ns1 || !ns2)
+ goto fail;
+
+ SYS(fail, "ip -n %s link add red type vrf table 1001", ns1);
+ SYS(fail, "ip -n %s link set red up", ns1);
+ SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns1);
+ SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns1);
+ SYS(fail, "ip -n %s link set veth1 vrf red", ns1);
+ SYS(fail, "ip -n %s link set veth5 vrf red", ns1);
+
+ SYS(fail, "ip -n %s link add red type vrf table 1001", ns2);
+ SYS(fail, "ip -n %s link set red up", ns2);
+ SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns2);
+ SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns2);
+ SYS(fail, "ip -n %s link set veth2 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth3 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth6 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth7 vrf red", ns2);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int configure_ns1(const char *ns1, const char *vrf)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns1 || !vrf)
+ goto fail;
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto fail;
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth1 %s", IP4_ADDR_2, vrf);
+ SYS(fail, "ip route add default dev veth1 via %s %s", IP4_ADDR_2, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth1 %s", IP6_ADDR_2, vrf);
+ SYS(fail, "ip -6 route add default dev veth1 via %s %s", IP6_ADDR_2, vrf);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth5 %s", IP4_ADDR_6, vrf);
+ SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_7, IP4_ADDR_6, vrf);
+ SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_8, IP4_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 %s", IP6_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_7, IP6_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_8, IP6_ADDR_6, vrf);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int configure_ns2(const char *ns2, const char *vrf)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns2 || !vrf)
+ goto fail;
+
+ nstoken = open_netns(ns2);
+ if (!ASSERT_OK_PTR(nstoken, "open ns2"))
+ goto fail;
+
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.ip_forward=1", ns2);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.forwarding=1", ns2);
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth2 %s", IP4_ADDR_1, vrf);
+ SYS(fail, "ip route add %s/32 dev veth3 %s", IP4_ADDR_4, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth2 %s", IP6_ADDR_1, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth3 %s", IP6_ADDR_4, vrf);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth6 %s", IP4_ADDR_5, vrf);
+ SYS(fail, "ip route add %s/32 dev veth7 %s", IP4_ADDR_8, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth6 %s", IP6_ADDR_5, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth7 %s", IP6_ADDR_8, vrf);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int configure_ns3(const char *ns3)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns3)
+ goto fail;
+
+ nstoken = open_netns(ns3);
+ if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+ goto fail;
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth4", IP4_ADDR_3);
+ SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_1, IP4_ADDR_3);
+ SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_2, IP4_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4", IP6_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_1, IP6_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_2, IP6_ADDR_3);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth8", IP4_ADDR_7);
+ SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_5, IP4_ADDR_7);
+ SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_6, IP4_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8", IP6_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_5, IP6_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_6, IP6_ADDR_7);
+
+ /* Configure IPv4 GRE device */
+ SYS(fail, "ip tunnel add gre_dev mode gre remote %s local %s ttl 255",
+ IP4_ADDR_1, IP4_ADDR_GRE);
+ SYS(fail, "ip link set gre_dev up");
+ SYS(fail, "ip a add %s dev gre_dev", IP4_ADDR_GRE);
+
+ /* Configure IPv6 GRE device */
+ SYS(fail, "ip tunnel add gre6_dev mode ip6gre remote %s local %s ttl 255",
+ IP6_ADDR_1, IP6_ADDR_GRE);
+ SYS(fail, "ip link set gre6_dev up");
+ SYS(fail, "ip a add %s dev gre6_dev", IP6_ADDR_GRE);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int setup_network(char *ns1, char *ns2, char *ns3, const char *vrf)
+{
+ if (!ns1 || !ns2 || !ns3 || !vrf)
+ goto fail;
+
+ SYS(fail, "ip -n %s link add veth1 type veth peer name veth2 netns %s", ns1, ns2);
+ SYS(fail, "ip -n %s link add veth3 type veth peer name veth4 netns %s", ns2, ns3);
+ SYS(fail, "ip -n %s link add veth5 type veth peer name veth6 netns %s", ns1, ns2);
+ SYS(fail, "ip -n %s link add veth7 type veth peer name veth8 netns %s", ns2, ns3);
+
+ if (vrf[0]) {
+ if (!ASSERT_OK(configure_vrf(ns1, ns2), "configure vrf"))
+ goto fail;
+ }
+ if (!ASSERT_OK(set_top_addr(ns1, ns2, ns3), "set top addresses"))
+ goto fail;
+
+ if (!ASSERT_OK(set_bottom_addr(ns1, ns2, ns3), "set bottom addresses"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns1(ns1, vrf), "configure ns1 routes"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns2(ns2, vrf), "configure ns2 routes"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns3(ns3), "configure ns3 routes"))
+ goto fail;
+
+ /* Link bottom route to the GRE tunnels */
+ SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s %s",
+ ns1, IP4_ADDR_GRE, IP4_ADDR_6, vrf);
+ SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s %s",
+ ns2, IP4_ADDR_GRE, IP4_ADDR_8, vrf);
+ SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s %s",
+ ns1, IP6_ADDR_GRE, IP6_ADDR_6, vrf);
+ SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s %s",
+ ns2, IP6_ADDR_GRE, IP6_ADDR_8, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int remove_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+ SYS(fail, "ip -n %s route del %s dev veth5 %s", ns1, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s route del %s dev veth7 %s", ns2, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route del %s/128 dev veth5 %s", ns1, IP6_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route del %s/128 dev veth7 %s", ns2, IP6_ADDR_GRE, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int add_unreachable_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+ SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns1, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns2, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns1, IP6_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns2, IP6_ADDR_GRE, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+#define GSO_SIZE 5000
+#define GSO_TCP_PORT 9000
+/* This tests the fix from commit ea0371f78799 ("net: fix GSO in bpf_lwt_push_ip_encap") */
+static int test_gso_fix(const char *ns1, const char *ns3, int family)
+{
+ const char *ip_addr = family == AF_INET ? IP4_ADDR_DST : IP6_ADDR_DST;
+ char gso_packet[GSO_SIZE] = {};
+ struct nstoken *nstoken = NULL;
+ int sfd, cfd, afd;
+ ssize_t bytes;
+ int ret = -1;
+
+ if (!ns1 || !ns3)
+ return ret;
+
+ nstoken = open_netns(ns3);
+ if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+ return ret;
+
+ sfd = start_server_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+ if (!ASSERT_OK_FD(sfd, "start server"))
+ goto close_netns;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto close_server;
+
+ cfd = connect_to_addr_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+ if (!ASSERT_OK_FD(cfd, "connect to server"))
+ goto close_server;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ afd = accept(sfd, NULL, NULL);
+ if (!ASSERT_OK_FD(afd, "accept"))
+ goto close_client;
+
+ /* Send a packet larger than MTU */
+ bytes = send(cfd, gso_packet, GSO_SIZE, 0);
+ if (!ASSERT_EQ(bytes, GSO_SIZE, "send packet"))
+ goto close_accept;
+
+ /* Verify we received all expected bytes */
+ bytes = read(afd, gso_packet, GSO_SIZE);
+ if (!ASSERT_EQ(bytes, GSO_SIZE, "receive packet"))
+ goto close_accept;
+
+ ret = 0;
+
+close_accept:
+ close(afd);
+close_client:
+ close(cfd);
+close_server:
+ close(sfd);
+close_netns:
+ close_netns(nstoken);
+
+ return ret;
+}
+
+static int check_ping_ok(const char *ns1)
+{
+ SYS(fail, "ip netns exec %s ping -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP4_ADDR_DST);
+ SYS(fail, "ip netns exec %s ping6 -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP6_ADDR_DST);
+ return 0;
+fail:
+ return -1;
+}
+
+static int check_ping_fails(const char *ns1)
+{
+ int ret;
+
+ ret = SYS_NOFAIL("ip netns exec %s ping -c 1 -W1 -I veth1 %s", ns1, IP4_ADDR_DST);
+ if (!ret)
+ return -1;
+
+ ret = SYS_NOFAIL("ip netns exec %s ping6 -c 1 -W1 -I veth1 %s", ns1, IP6_ADDR_DST);
+ if (!ret)
+ return -1;
+
+ return 0;
+}
+
+#define EGRESS true
+#define INGRESS false
+#define IPV4_ENCAP true
+#define IPV6_ENCAP false
+static void lwt_ip_encap(bool ipv4_encap, bool egress, const char *vrf)
+{
+ char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-";
+ char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-";
+ char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-";
+ char *sec = ipv4_encap ? "encap_gre" : "encap_gre6";
+
+ if (!vrf)
+ return;
+
+ if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1"))
+ goto out;
+ if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2"))
+ goto out;
+ if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3"))
+ goto out;
+
+ if (!ASSERT_OK(setup_network(ns1, ns2, ns3, vrf), "setup network"))
+ goto out;
+
+ /* By default, pings work */
+ if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+ goto out;
+
+ /* Remove NS2->DST routes, ping fails */
+ SYS(out, "ip -n %s route del %s/32 dev veth3 %s", ns2, IP4_ADDR_DST, vrf);
+ SYS(out, "ip -n %s -6 route del %s/128 dev veth3 %s", ns2, IP6_ADDR_DST, vrf);
+ if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+ goto out;
+
+ /* Install replacement routes (LWT/eBPF), pings succeed */
+ if (egress) {
+ SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+ ns1, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+ SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+ ns1, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+ } else {
+ SYS(out, "ip -n %s route add %s encap bpf in obj %s sec %s dev veth2 %s",
+ ns2, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+ SYS(out, "ip -n %s -6 route add %s encap bpf in obj %s sec %s dev veth2 %s",
+ ns2, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+ }
+
+ if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+ goto out;
+
+ /* Skip GSO tests with VRF: VRF routing needs properly assigned
+ * source IP/device, which is easy to do with ping but hard with TCP.
+ */
+ if (egress && !vrf[0]) {
+ if (!ASSERT_OK(test_gso_fix(ns1, ns3, AF_INET), "test GSO"))
+ goto out;
+ }
+
+ /* Negative test: remove routes to GRE devices: ping fails */
+ if (!ASSERT_OK(remove_routes_to_gredev(ns1, ns2, vrf), "remove routes to gredev"))
+ goto out;
+ if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+ goto out;
+
+ /* Another negative test */
+ if (!ASSERT_OK(add_unreachable_routes_to_gredev(ns1, ns2, vrf),
+ "add unreachable routes"))
+ goto out;
+ ASSERT_OK(check_ping_fails(ns1), "ping expected fail");
+
+out:
+ SYS_NOFAIL("ip netns del %s", ns1);
+ SYS_NOFAIL("ip netns del %s", ns2);
+ SYS_NOFAIL("ip netns del %s", ns3);
+}
+
+void test_lwt_ip_encap_vrf_ipv6(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV6_ENCAP, EGRESS, "vrf red");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV6_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_vrf_ipv4(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV4_ENCAP, EGRESS, "vrf red");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV4_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_ipv6(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV6_ENCAP, EGRESS, "");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV6_ENCAP, INGRESS, "");
+}
+
+void test_lwt_ip_encap_ipv4(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV4_ENCAP, EGRESS, "");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV4_ENCAP, INGRESS, "");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
new file mode 100644
index 000000000000..3bc730b7c7fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Connects 6 network namespaces through veths.
+ * Each NS may have different IPv6 global scope addresses :
+ *
+ * NS1 NS2 NS3 NS4 NS5 NS6
+ * lo veth1 <-> veth2 veth3 <-> veth4 veth5 <-> veth6 lo veth7 <-> veth8 veth9 <-> veth10 lo
+ * fb00 ::1 ::12 ::21 ::34 ::43 ::56 ::65 ::78 ::87 ::910 ::109 ::6
+ * fd00 ::4
+ * fc42 ::1
+ *
+ * All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+ * IPv6 header with a Segment Routing Header, with segments :
+ * fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+ *
+ * 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+ * - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+ * - fd00::2 : remove the TLV, change the flags, add a tag
+ * - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+ *
+ * fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+ * Each End.BPF action will validate the operations applied on the SRH by the
+ * previous BPF program in the chain, otherwise the packet is dropped.
+ *
+ * An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+ * datagram can be read on NS6 when binding to fb00::6.
+ */
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define NETNS_BASE "lwt-seg6local-"
+#define BPF_FILE "test_lwt_seg6local.bpf.o"
+
+static void cleanup(void)
+{
+ int ns;
+
+ for (ns = 1; ns < 7; ns++)
+ SYS_NOFAIL("ip netns del %s%d", NETNS_BASE, ns);
+}
+
+static int setup(void)
+{
+ int ns;
+
+ for (ns = 1; ns < 7; ns++)
+ SYS(fail, "ip netns add %s%d", NETNS_BASE, ns);
+
+ SYS(fail, "ip -n %s6 link set dev lo up", NETNS_BASE);
+
+ for (ns = 1; ns < 6; ns++) {
+ int local_id = ns * 2 - 1;
+ int peer_id = ns * 2;
+ int next_ns = ns + 1;
+
+ SYS(fail, "ip -n %s%d link add veth%d type veth peer name veth%d netns %s%d",
+ NETNS_BASE, ns, local_id, peer_id, NETNS_BASE, next_ns);
+
+ SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, ns, local_id);
+ SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, next_ns, peer_id);
+
+ /* All link scope addresses to veths */
+ SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+ NETNS_BASE, ns, local_id, peer_id, local_id);
+ SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+ NETNS_BASE, next_ns, peer_id, local_id, peer_id);
+ }
+
+
+ SYS(fail, "ip -n %s5 -6 route add fb00::109 table 117 dev veth9 scope link", NETNS_BASE);
+
+ SYS(fail, "ip -n %s1 -6 addr add fb00::1/16 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s1 -6 route add fb00::6 dev veth1 via fb00::21", NETNS_BASE);
+
+ SYS(fail, "ip -n %s2 -6 route add fb00::6 encap bpf in obj %s sec encap_srh dev veth2",
+ NETNS_BASE, BPF_FILE);
+ SYS(fail, "ip -n %s2 -6 route add fd00::1 dev veth3 via fb00::43 scope link", NETNS_BASE);
+
+ SYS(fail, "ip -n %s3 -6 route add fc42::1 dev veth5 via fb00::65", NETNS_BASE);
+ SYS(fail,
+ "ip -n %s3 -6 route add fd00::1 encap seg6local action End.BPF endpoint obj %s sec add_egr_x dev veth4",
+ NETNS_BASE, BPF_FILE);
+
+ SYS(fail,
+ "ip -n %s4 -6 route add fd00::2 encap seg6local action End.BPF endpoint obj %s sec pop_egr dev veth6",
+ NETNS_BASE, BPF_FILE);
+ SYS(fail, "ip -n %s4 -6 addr add fc42::1 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s4 -6 route add fd00::3 dev veth7 via fb00::87", NETNS_BASE);
+
+ SYS(fail, "ip -n %s5 -6 route add fd00::4 table 117 dev veth9 via fb00::109", NETNS_BASE);
+ SYS(fail,
+ "ip -n %s5 -6 route add fd00::3 encap seg6local action End.BPF endpoint obj %s sec inspect_t dev veth8",
+ NETNS_BASE, BPF_FILE);
+
+ SYS(fail, "ip -n %s6 -6 addr add fb00::6/16 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s6 -6 addr add fd00::4/16 dev lo", NETNS_BASE);
+
+ for (ns = 1; ns < 6; ns++)
+ SYS(fail, "ip netns exec %s%d sysctl -wq net.ipv6.conf.all.forwarding=1",
+ NETNS_BASE, ns);
+
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.all.seg6_enabled=1", NETNS_BASE);
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.lo.seg6_enabled=1", NETNS_BASE);
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.veth10.seg6_enabled=1", NETNS_BASE);
+
+ return 0;
+fail:
+ return -1;
+}
+
+#define SERVER_PORT 7330
+#define CLIENT_PORT 2121
+void test_lwt_seg6local(void)
+{
+ struct sockaddr_in6 server_addr = {};
+ const char *ns1 = NETNS_BASE "1";
+ const char *ns6 = NETNS_BASE "6";
+ struct nstoken *nstoken = NULL;
+ const char *foobar = "foobar";
+ ssize_t bytes;
+ int sfd, cfd;
+ char buf[7];
+
+ if (!ASSERT_OK(setup(), "setup"))
+ goto out;
+
+ nstoken = open_netns(ns6);
+ if (!ASSERT_OK_PTR(nstoken, "open ns6"))
+ goto out;
+
+ sfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::6", SERVER_PORT, NULL);
+ if (!ASSERT_OK_FD(sfd, "start server"))
+ goto close_netns;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto close_server;
+
+ cfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::1", CLIENT_PORT, NULL);
+ if (!ASSERT_OK_FD(cfd, "start client"))
+ goto close_server;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ /* Send a packet larger than MTU */
+ server_addr.sin6_family = AF_INET6;
+ server_addr.sin6_port = htons(SERVER_PORT);
+ if (!ASSERT_EQ(inet_pton(AF_INET6, "fb00::6", &server_addr.sin6_addr), 1,
+ "build target addr"))
+ goto close_client;
+
+ bytes = sendto(cfd, foobar, sizeof(foobar), 0,
+ (struct sockaddr *)&server_addr, sizeof(server_addr));
+ if (!ASSERT_EQ(bytes, sizeof(foobar), "send packet"))
+ goto close_client;
+
+ /* Verify we received all expected bytes */
+ bytes = read(sfd, buf, sizeof(buf));
+ if (!ASSERT_EQ(bytes, sizeof(buf), "receive packet"))
+ goto close_client;
+ ASSERT_STREQ(buf, foobar, "check udp packet");
+
+close_client:
+ close(cfd);
+close_server:
+ close(sfd);
+close_netns:
+ close_netns(nstoken);
+
+out:
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c
new file mode 100644
index 000000000000..6bdc6d6de0da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "map_excl.skel.h"
+
+static void test_map_excl_allowed(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__set_exclusive_program"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, true);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, false);
+
+ err = map_excl__load(skel);
+ ASSERT_OK(err, "map_excl__load");
+out:
+ map_excl__destroy(skel);
+}
+
+static void test_map_excl_denied(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__make_exclusive"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, false);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, true);
+
+ err = map_excl__load(skel);
+ ASSERT_EQ(err, -EACCES, "exclusive map access not denied\n");
+out:
+ map_excl__destroy(skel);
+
+}
+
+void test_map_excl(void)
+{
+ if (test__start_subtest("map_excl_allowed"))
+ test_map_excl_allowed();
+ if (test__start_subtest("map_excl_denied"))
+ test_map_excl_denied();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..40d4f687bd9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "mem_rdonly_untrusted.skel.h"
+
+void test_mem_rdonly_untrusted(void)
+{
+ RUN_TESTS(mem_rdonly_untrusted);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 6d391d95f96e..70fa7ae93173 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -90,7 +90,7 @@ void test_module_attach(void)
test_module_attach__detach(skel);
- /* attach fentry/fexit and make sure it get's module reference */
+ /* attach fentry/fexit and make sure it gets module reference */
link = bpf_program__attach(skel->progs.handle_fentry);
if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index f8eb7f9d4fd2..8fade8bdc451 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -6,11 +6,13 @@
#include <netinet/in.h>
#include <test_progs.h>
#include <unistd.h>
+#include <errno.h>
#include "cgroup_helpers.h"
#include "network_helpers.h"
#include "mptcp_sock.skel.h"
#include "mptcpify.skel.h"
#include "mptcp_subflow.skel.h"
+#include "mptcp_sockmap.skel.h"
#define NS_TEST "mptcp_ns"
#define ADDR_1 "10.0.1.1"
@@ -436,6 +438,142 @@ close_cgroup:
close(cgroup_fd);
}
+/* Test sockmap on MPTCP server handling non-mp-capable clients. */
+static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel)
+{
+ int listen_fd = -1, client_fd1 = -1, client_fd2 = -1;
+ int server_fd1 = -1, server_fd2 = -1, sent, recvd;
+ char snd[9] = "123456789";
+ char rcv[10];
+
+ /* start server with MPTCP enabled */
+ listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server"))
+ return;
+
+ skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+ skel->bss->sk_index = 0;
+ /* create client without MPTCP enabled */
+ client_fd1 = connect_to_fd_opts(listen_fd, NULL);
+ if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd"))
+ goto end;
+
+ server_fd1 = accept(listen_fd, NULL, 0);
+ skel->bss->sk_index = 1;
+ client_fd2 = connect_to_fd_opts(listen_fd, NULL);
+ if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd"))
+ goto end;
+
+ server_fd2 = accept(listen_fd, NULL, 0);
+ /* test normal redirect behavior: data sent by client_fd1 can be
+ * received by client_fd2
+ */
+ skel->bss->redirect_idx = 1;
+ sent = send(client_fd1, snd, sizeof(snd), 0);
+ if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)"))
+ goto end;
+
+ /* try to recv more bytes to avoid truncation check */
+ recvd = recv(client_fd2, rcv, sizeof(rcv), 0);
+ if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)"))
+ goto end;
+
+end:
+ if (client_fd1 >= 0)
+ close(client_fd1);
+ if (client_fd2 >= 0)
+ close(client_fd2);
+ if (server_fd1 >= 0)
+ close(server_fd1);
+ if (server_fd2 >= 0)
+ close(server_fd2);
+ close(listen_fd);
+}
+
+/* Test sockmap rejection of MPTCP sockets - both server and client sides. */
+static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel)
+{
+ int listen_fd = -1, server_fd = -1, client_fd1 = -1;
+ int err, zero = 0;
+
+ /* start server with MPTCP enabled */
+ listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server"))
+ return;
+
+ skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+ skel->bss->sk_index = 0;
+ /* create client with MPTCP enabled */
+ client_fd1 = connect_to_fd(listen_fd, 0);
+ if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1"))
+ goto end;
+
+ /* bpf_sock_map_update() called from sockops should reject MPTCP sk */
+ if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject"))
+ goto end;
+
+ server_fd = accept(listen_fd, NULL, 0);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+ &zero, &server_fd, BPF_NOEXIST);
+ if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed"))
+ goto end;
+
+ /* MPTCP client should also be disallowed */
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+ &zero, &client_fd1, BPF_NOEXIST);
+ if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed"))
+ goto end;
+end:
+ if (client_fd1 >= 0)
+ close(client_fd1);
+ if (server_fd >= 0)
+ close(server_fd);
+ close(listen_fd);
+}
+
+static void test_mptcp_sockmap(void)
+{
+ struct mptcp_sockmap *skel;
+ struct netns_obj *netns;
+ int cgroup_fd, err;
+
+ cgroup_fd = test__join_cgroup("/mptcp_sockmap");
+ if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap"))
+ return;
+
+ skel = mptcp_sockmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap"))
+ goto close_cgroup;
+
+ skel->links.mptcp_sockmap_inject =
+ bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap"))
+ goto skel_destroy;
+
+ err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect),
+ bpf_map__fd(skel->maps.sock_map),
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+ goto skel_destroy;
+
+ netns = netns_new(NS_TEST, true);
+ if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap"))
+ goto skel_destroy;
+
+ if (endpoint_init("subflow") < 0)
+ goto close_netns;
+
+ test_sockmap_with_mptcp_fallback(skel);
+ test_sockmap_reject_mptcp(skel);
+
+close_netns:
+ netns_free(netns);
+skel_destroy:
+ mptcp_sockmap__destroy(skel);
+close_cgroup:
+ close(cgroup_fd);
+}
+
void test_mptcp(void)
{
if (test__start_subtest("base"))
@@ -444,4 +582,6 @@ void test_mptcp(void)
test_mptcpify();
if (test__start_subtest("subflow"))
test_subflow();
+ if (test__start_subtest("sockmap"))
+ test_mptcp_sockmap();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/net_timestamping.c b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
new file mode 100644
index 000000000000..dbfd87499b6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
@@ -0,0 +1,239 @@
+#include <linux/net_tstamp.h>
+#include <sys/time.h>
+#include <linux/errqueue.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "net_timestamping.skel.h"
+
+#define CG_NAME "/net-timestamping-test"
+#define NSEC_PER_SEC 1000000000LL
+
+static const char addr4_str[] = "127.0.0.1";
+static const char addr6_str[] = "::1";
+static struct net_timestamping *skel;
+static const int cfg_payload_len = 30;
+static struct timespec usr_ts;
+static u64 delay_tolerance_nsec = 10000000000; /* 10 seconds */
+int SK_TS_SCHED;
+int SK_TS_TXSW;
+int SK_TS_ACK;
+
+static int64_t timespec_to_ns64(struct timespec *ts)
+{
+ return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
+static void validate_key(int tskey, int tstype)
+{
+ static int expected_tskey = -1;
+
+ if (tstype == SCM_TSTAMP_SCHED)
+ expected_tskey = cfg_payload_len - 1;
+
+ ASSERT_EQ(expected_tskey, tskey, "tskey mismatch");
+
+ expected_tskey = tskey;
+}
+
+static void validate_timestamp(struct timespec *cur, struct timespec *prev)
+{
+ int64_t cur_ns, prev_ns;
+
+ cur_ns = timespec_to_ns64(cur);
+ prev_ns = timespec_to_ns64(prev);
+
+ ASSERT_LT(cur_ns - prev_ns, delay_tolerance_nsec, "latency");
+}
+
+static void test_socket_timestamp(struct scm_timestamping *tss, int tstype,
+ int tskey)
+{
+ static struct timespec prev_ts;
+
+ validate_key(tskey, tstype);
+
+ switch (tstype) {
+ case SCM_TSTAMP_SCHED:
+ validate_timestamp(&tss->ts[0], &usr_ts);
+ SK_TS_SCHED += 1;
+ break;
+ case SCM_TSTAMP_SND:
+ validate_timestamp(&tss->ts[0], &prev_ts);
+ SK_TS_TXSW += 1;
+ break;
+ case SCM_TSTAMP_ACK:
+ validate_timestamp(&tss->ts[0], &prev_ts);
+ SK_TS_ACK += 1;
+ break;
+ }
+
+ prev_ts = tss->ts[0];
+}
+
+static void test_recv_errmsg_cmsg(struct msghdr *msg)
+{
+ struct sock_extended_err *serr = NULL;
+ struct scm_timestamping *tss = NULL;
+ struct cmsghdr *cm;
+
+ for (cm = CMSG_FIRSTHDR(msg);
+ cm && cm->cmsg_len;
+ cm = CMSG_NXTHDR(msg, cm)) {
+ if (cm->cmsg_level == SOL_SOCKET &&
+ cm->cmsg_type == SCM_TIMESTAMPING) {
+ tss = (void *)CMSG_DATA(cm);
+ } else if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_RECVERR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_RECVERR) ||
+ (cm->cmsg_level == SOL_PACKET &&
+ cm->cmsg_type == PACKET_TX_TIMESTAMP)) {
+ serr = (void *)CMSG_DATA(cm);
+ ASSERT_EQ(serr->ee_origin, SO_EE_ORIGIN_TIMESTAMPING,
+ "cmsg type");
+ }
+
+ if (serr && tss)
+ test_socket_timestamp(tss, serr->ee_info,
+ serr->ee_data);
+ }
+}
+
+static bool socket_recv_errmsg(int fd)
+{
+ static char ctrl[1024 /* overprovision*/];
+ char data[cfg_payload_len];
+ static struct msghdr msg;
+ struct iovec entry;
+ int n = 0;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&entry, 0, sizeof(entry));
+ memset(ctrl, 0, sizeof(ctrl));
+
+ entry.iov_base = data;
+ entry.iov_len = cfg_payload_len;
+ msg.msg_iov = &entry;
+ msg.msg_iovlen = 1;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = ctrl;
+ msg.msg_controllen = sizeof(ctrl);
+
+ n = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (n == -1)
+ ASSERT_EQ(errno, EAGAIN, "recvmsg MSG_ERRQUEUE");
+
+ if (n >= 0)
+ test_recv_errmsg_cmsg(&msg);
+
+ return n == -1;
+}
+
+static void test_socket_timestamping(int fd)
+{
+ while (!socket_recv_errmsg(fd));
+
+ ASSERT_EQ(SK_TS_SCHED, 1, "SCM_TSTAMP_SCHED");
+ ASSERT_EQ(SK_TS_TXSW, 1, "SCM_TSTAMP_SND");
+ ASSERT_EQ(SK_TS_ACK, 1, "SCM_TSTAMP_ACK");
+
+ SK_TS_SCHED = 0;
+ SK_TS_TXSW = 0;
+ SK_TS_ACK = 0;
+}
+
+static void test_tcp(int family, bool enable_socket_timestamping)
+{
+ struct net_timestamping__bss *bss;
+ char buf[cfg_payload_len];
+ int sfd = -1, cfd = -1;
+ unsigned int sock_opt;
+ struct netns_obj *ns;
+ int cg_fd;
+ int ret;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
+ return;
+
+ ns = netns_new("net_timestamping_ns", true);
+ if (!ASSERT_OK_PTR(ns, "create ns"))
+ goto out;
+
+ skel = net_timestamping__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skel"))
+ goto out;
+
+ if (!ASSERT_OK(net_timestamping__attach(skel), "attach skel"))
+ goto out;
+
+ skel->links.skops_sockopt =
+ bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
+ goto out;
+
+ bss = skel->bss;
+ memset(bss, 0, sizeof(*bss));
+
+ skel->bss->monitored_pid = getpid();
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_OK_FD(sfd, "start_server"))
+ goto out;
+
+ cfd = connect_to_fd(sfd, 0);
+ if (!ASSERT_OK_FD(cfd, "connect_to_fd_server"))
+ goto out;
+
+ if (enable_socket_timestamping) {
+ sock_opt = SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID |
+ SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK;
+ ret = setsockopt(cfd, SOL_SOCKET, SO_TIMESTAMPING,
+ (char *) &sock_opt, sizeof(sock_opt));
+ if (!ASSERT_OK(ret, "setsockopt SO_TIMESTAMPING"))
+ goto out;
+
+ ret = clock_gettime(CLOCK_REALTIME, &usr_ts);
+ if (!ASSERT_OK(ret, "get user time"))
+ goto out;
+ }
+
+ ret = write(cfd, buf, sizeof(buf));
+ if (!ASSERT_EQ(ret, sizeof(buf), "send to server"))
+ goto out;
+
+ if (enable_socket_timestamping)
+ test_socket_timestamping(cfd);
+
+ ASSERT_EQ(bss->nr_active, 1, "nr_active");
+ ASSERT_EQ(bss->nr_snd, 2, "nr_snd");
+ ASSERT_EQ(bss->nr_sched, 1, "nr_sched");
+ ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw");
+ ASSERT_EQ(bss->nr_ack, 1, "nr_ack");
+
+out:
+ if (sfd >= 0)
+ close(sfd);
+ if (cfd >= 0)
+ close(cfd);
+ net_timestamping__destroy(skel);
+ netns_free(ns);
+ close(cg_fd);
+}
+
+void test_net_timestamping(void)
+{
+ if (test__start_subtest("INET4: bpf timestamping"))
+ test_tcp(AF_INET, false);
+ if (test__start_subtest("INET4: bpf and socket timestamping"))
+ test_tcp(AF_INET, true);
+ if (test__start_subtest("INET6: bpf timestamping"))
+ test_tcp(AF_INET6, false);
+ if (test__start_subtest("INET6: bpf and socket timestamping"))
+ test_tcp(AF_INET6, true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
index ac3c3c097c0e..e00cd34586dd 100644
--- a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -33,20 +33,25 @@ void test_netns_cookie(void)
skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
skel->progs.get_netns_cookie_sockops, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach_sockops"))
goto done;
verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
map = bpf_map__fd(skel->maps.sock_map);
err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
- if (!ASSERT_OK(err, "prog_attach"))
+ if (!ASSERT_OK(err, "prog_attach_sk_msg"))
goto done;
tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx);
err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta);
- if (!ASSERT_OK(err, "prog_attach"))
+ if (!ASSERT_OK(err, "prog_attach_tcx"))
goto done;
+ skel->links.get_netns_cookie_cgroup_skb = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_cgroup_skb, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_cgroup_skb, "prog_attach_cgroup_skb"))
+ goto cleanup_tc;
+
server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
goto cleanup_tc;
@@ -69,16 +74,18 @@ void test_netns_cookie(void)
if (!ASSERT_OK(err, "getsockopt"))
goto cleanup_tc;
- ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value_sockops");
err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
&client_fd, &val);
if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
goto cleanup_tc;
- ASSERT_EQ(val, cookie_expected_value, "cookie_value");
- ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value");
- ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value");
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value_sk_msg");
+ ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value_init_tcx");
+ ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value_tcx");
+ ASSERT_EQ(skel->bss->cgroup_skb_init_netns_cookie, cookie_expected_value, "cookie_value_init_cgroup_skb");
+ ASSERT_EQ(skel->bss->cgroup_skb_netns_cookie, cookie_expected_value, "cookie_value_cgroup_skb");
cleanup_tc:
err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd);
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 761ce24bce38..99c953f2be21 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -200,41 +200,28 @@ static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg)
return;
}
-static void test_in_netns(int (*fn)(void *), void *arg)
-{
- struct nstoken *nstoken = NULL;
-
- SYS(cleanup, "ip netns add ns_current_pid_tgid");
- SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up");
-
- nstoken = open_netns("ns_current_pid_tgid");
- if (!ASSERT_OK_PTR(nstoken, "open_netns"))
- goto cleanup;
-
- test_ns_current_pid_tgid_new_ns(fn, arg);
-
-cleanup:
- if (nstoken)
- close_netns(nstoken);
- SYS_NOFAIL("ip netns del ns_current_pid_tgid");
-}
-
/* TODO: use a different tracepoint */
-void serial_test_ns_current_pid_tgid(void)
+void serial_test_current_pid_tgid(void)
{
if (test__start_subtest("root_ns_tp"))
test_current_pid_tgid_tp(NULL);
if (test__start_subtest("new_ns_tp"))
test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL);
- if (test__start_subtest("new_ns_cgrp")) {
- int cgroup_fd = -1;
-
- cgroup_fd = test__join_cgroup("/sock_addr");
- if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) {
- test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd);
- close(cgroup_fd);
- }
+}
+
+void test_ns_current_pid_tgid_cgrp(void)
+{
+ int cgroup_fd = test__join_cgroup("/sock_addr");
+
+ if (ASSERT_OK_FD(cgroup_fd, "join_cgroup")) {
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_cgrp, &cgroup_fd);
+ close(cgroup_fd);
}
- if (test__start_subtest("new_ns_sk_msg"))
- test_in_netns(test_current_pid_tgid_sk_msg, NULL);
}
+
+void test_ns_current_pid_tgid_sk_msg(void)
+{
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_sk_msg, NULL);
+}
+
+
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index bc24f83339d6..0a7ef770c487 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -15,6 +15,10 @@ static void check_good_sample(struct test_perf_branches *skel)
int pbe_size = sizeof(struct perf_branch_entry);
int duration = 0;
+ if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+ "checked sample validity before prog run"))
+ return;
+
if (CHECK(!skel->bss->valid, "output not valid",
"no valid sample from prog"))
return;
@@ -45,6 +49,10 @@ static void check_bad_sample(struct test_perf_branches *skel)
int written_stack = skel->bss->written_stack_out;
int duration = 0;
+ if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+ "checked sample validity before prog run"))
+ return;
+
if (CHECK(!skel->bss->valid, "output not valid",
"no valid sample from prog"))
return;
@@ -83,8 +91,12 @@ static void test_perf_branches_common(int perf_fd,
err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
goto out_destroy;
- /* spin the loop for a while (random high number) */
- for (i = 0; i < 1000000; ++i)
+
+ /* Spin the loop for a while by using a high iteration count, and by
+ * checking whether the specific run count marker has been explicitly
+ * incremented at least once by the backing perf_event BPF program.
+ */
+ for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i)
++j;
test_perf_branches__detach(skel);
@@ -116,11 +128,11 @@ static void test_perf_branches_hw(void)
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
/*
- * Some setups don't support branch records (virtual machines, !x86),
- * so skip test in this case.
+ * Some setups don't support LBR (virtual machines, !x86, AMD Milan Zen
+ * 3 which only supports BRS), so skip test in this case.
*/
if (pfd < 0) {
- if (errno == ENOENT || errno == EOPNOTSUPP) {
+ if (errno == ENOENT || errno == EOPNOTSUPP || errno == EINVAL) {
printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
__func__);
test__skip();
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
new file mode 100644
index 000000000000..9ae49b587f3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <test_progs.h>
+
+
+#include "test_pinning_devmap.skel.h"
+
+void test_pinning_devmap_reuse(void)
+{
+ const char *pinpath1 = "/sys/fs/bpf/pinmap1";
+ const char *pinpath2 = "/sys/fs/bpf/pinmap2";
+ struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+ /* load the object a first time */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "skel_load1"))
+ goto out;
+
+ /* load the object a second time, re-using the pinned map */
+ skel2 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "skel_load2"))
+ goto out;
+
+ /* we can close the reference safely without
+ * the map's refcount falling to 0
+ */
+ test_pinning_devmap__destroy(skel1);
+ skel1 = NULL;
+
+ /* now, swap the pins */
+ err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE);
+ if (!ASSERT_OK(err, "swap pins"))
+ goto out;
+
+ /* load the object again, this time the re-use should fail */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_ERR_PTR(skel1, "skel_load3"))
+ goto out;
+
+out:
+ unlink(pinpath1);
+ unlink(pinpath2);
+ test_pinning_devmap__destroy(skel1);
+ test_pinning_devmap__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_htab.c b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
new file mode 100644
index 000000000000..16bd74be3dbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_pinning_htab.skel.h"
+
+static void unpin_map(const char *map_name, const char *pin_path)
+{
+ struct test_pinning_htab *skel;
+ struct bpf_map *map;
+ int err;
+
+ skel = test_pinning_htab__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+
+ map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(map, "bpf_object__find_map_by_name"))
+ goto out;
+
+ err = bpf_map__pin(map, pin_path);
+ if (!ASSERT_OK(err, "bpf_map__pin"))
+ goto out;
+
+ err = bpf_map__unpin(map, pin_path);
+ ASSERT_OK(err, "bpf_map__unpin");
+out:
+ test_pinning_htab__destroy(skel);
+}
+
+void test_pinning_htab(void)
+{
+ if (test__start_subtest("timer_prealloc"))
+ unpin_map("timer_prealloc", "/sys/fs/bpf/timer_prealloc");
+ if (test__start_subtest("timer_no_prealloc"))
+ unpin_map("timer_no_prealloc", "/sys/fs/bpf/timer_no_prealloc");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prepare.c b/tools/testing/selftests/bpf/prog_tests/prepare.c
new file mode 100644
index 000000000000..fb5cdad97116
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prepare.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "prepare.skel.h"
+
+static bool check_prepared(struct bpf_object *obj)
+{
+ bool is_prepared = true;
+ const struct bpf_map *map;
+
+ bpf_object__for_each_map(map, obj) {
+ if (bpf_map__fd(map) < 0)
+ is_prepared = false;
+ }
+
+ return is_prepared;
+}
+
+static void test_prepare_no_load(void)
+{
+ struct prepare *skel;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ );
+
+ skel = prepare__open();
+ if (!ASSERT_OK_PTR(skel, "prepare__open"))
+ return;
+
+ if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+ goto cleanup;
+
+ err = bpf_object__prepare(skel->obj);
+
+ if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+ goto cleanup;
+
+ if (!ASSERT_OK(err, "bpf_object__prepare"))
+ goto cleanup;
+
+cleanup:
+ prepare__destroy(skel);
+}
+
+static void test_prepare_load(void)
+{
+ struct prepare *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ );
+
+ skel = prepare__open();
+ if (!ASSERT_OK_PTR(skel, "prepare__open"))
+ return;
+
+ if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+ goto cleanup;
+
+ err = bpf_object__prepare(skel->obj);
+ if (!ASSERT_OK(err, "bpf_object__prepare"))
+ goto cleanup;
+
+ err = prepare__load(skel);
+ if (!ASSERT_OK(err, "prepare__load"))
+ goto cleanup;
+
+ if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.program);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
+
+cleanup:
+ prepare__destroy(skel);
+}
+
+void test_prepare(void)
+{
+ if (test__start_subtest("prepare_load"))
+ test_prepare_load();
+ if (test__start_subtest("prepare_no_load"))
+ test_prepare_no_load();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
index 509883e6823a..5d3c00a08a88 100644
--- a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
+++ b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
@@ -6,6 +6,7 @@
#include "epilogue_tailcall.skel.h"
#include "pro_epilogue_goto_start.skel.h"
#include "epilogue_exit.skel.h"
+#include "pro_epilogue_with_kfunc.skel.h"
struct st_ops_args {
__u64 a;
@@ -55,6 +56,7 @@ void test_pro_epilogue(void)
RUN_TESTS(pro_epilogue);
RUN_TESTS(pro_epilogue_goto_start);
RUN_TESTS(epilogue_exit);
+ RUN_TESTS(pro_epilogue_with_kfunc);
if (test__start_subtest("tailcall"))
test_tailcall();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
index 14f2796076e0..7607cfc2408c 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -54,3 +54,128 @@ void test_prog_tests_framework(void)
return;
clear_test_state(state);
}
+
+static void dummy_emit(const char *buf, bool force) {}
+
+void test_prog_tests_framework_expected_msgs(void)
+{
+ struct expected_msgs msgs;
+ int i, j, error_cnt;
+ const struct {
+ const char *name;
+ const char *log;
+ const char *expected;
+ struct expect_msg *pats;
+ } cases[] = {
+ {
+ .name = "simple-ok",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "simple-fail",
+ .log = "aaabbbddd",
+ .expected = "MATCHED SUBSTR: 'aaa'\n"
+ "EXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-mid",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ { .substr = "bar", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-tail",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-head",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "foo", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-head",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'aaa'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa", .negative = true },
+ { .substr = "bbb" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-tail",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "bbb" },
+ { .substr = "ccc", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-1",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'bbb'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-2",
+ .log = "aaabbb222ccc",
+ .expected = "UNEXPECTED SUBSTR: '222'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "222", .negative = true },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ }
+ };
+
+ for (i = 0; i < ARRAY_SIZE(cases); i++) {
+ if (test__start_subtest(cases[i].name)) {
+ error_cnt = env.subtest_state->error_cnt;
+ msgs.patterns = cases[i].pats;
+ msgs.cnt = 0;
+ for (j = 0; cases[i].pats[j].substr; j++)
+ msgs.cnt++;
+ validate_msgs(cases[i].log, &msgs, dummy_emit);
+ fflush(stderr);
+ env.subtest_state->error_cnt = error_cnt;
+ if (cases[i].expected)
+ ASSERT_HAS_SUBSTR(env.subtest_state->log_buf, cases[i].expected, "expected output");
+ else
+ ASSERT_STREQ(env.subtest_state->log_buf, "", "expected no output");
+ test__end_subtest();
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
index 9818f06c97c5..d8f3d7a45fe9 100644
--- a/tools/testing/selftests/bpf/prog_tests/rbtree.c
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -8,6 +8,7 @@
#include "rbtree_fail.skel.h"
#include "rbtree_btf_fail__wrong_node_type.skel.h"
#include "rbtree_btf_fail__add_wrong_type.skel.h"
+#include "rbtree_search.skel.h"
static void test_rbtree_add_nodes(void)
{
@@ -187,3 +188,8 @@ void test_rbtree_fail(void)
{
RUN_TESTS(rbtree_fail);
}
+
+void test_rbtree_search(void)
+{
+ RUN_TESTS(rbtree_search);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index ebe0c12b5536..246eb259c08a 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -28,6 +28,7 @@ static void test_success(void)
bpf_program__set_autoload(skel->progs.two_regions, true);
bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+ bpf_program__set_autoload(skel->progs.nested_rcu_region, true);
bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
@@ -78,9 +79,13 @@ static const char * const inproper_region_tests[] = {
"non_sleepable_rcu_mismatch",
"inproper_sleepable_helper",
"inproper_sleepable_kfunc",
- "nested_rcu_region",
+ "nested_rcu_region_unbalanced_1",
+ "nested_rcu_region_unbalanced_2",
"rcu_read_lock_global_subprog_lock",
"rcu_read_lock_global_subprog_unlock",
+ "rcu_read_lock_sleepable_helper_global_subprog",
+ "rcu_read_lock_sleepable_kfunc_global_subprog",
+ "rcu_read_lock_sleepable_global_subprog_indirect",
};
static void test_inproper_region(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
index c7b9ba8b1d06..a8d1eaa67020 100644
--- a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -24,6 +24,7 @@ struct read_ret_desc {
{ .name = "copy_from_user", .ret = -EFAULT },
{ .name = "copy_from_user_task", .ret = -EFAULT },
{ .name = "copy_from_user_str", .ret = -EFAULT },
+ { .name = "copy_from_user_task_str", .ret = -EFAULT },
};
void test_read_vsyscall(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
index 8100509e561b..0ffa01d54ce2 100644
--- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
@@ -149,3 +149,70 @@ close_prog:
fentry_recursive_target__destroy(target_skel);
fentry_recursive__destroy(tracing_skel);
}
+
+static void *fentry_target_test_run(void *arg)
+{
+ for (;;) {
+ int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err;
+
+ if (prog_fd == -1)
+ break;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "fentry_target test_run"))
+ break;
+ }
+
+ return NULL;
+}
+
+void test_fentry_attach_stress(void)
+{
+ struct fentry_recursive_target *target_skel = NULL;
+ struct fentry_recursive *tracing_skel = NULL;
+ struct bpf_program *prog;
+ int err, i, tgt_prog_fd;
+ pthread_t thread;
+
+ target_skel = fentry_recursive_target__open_and_load();
+ if (!ASSERT_OK_PTR(target_skel,
+ "fentry_recursive_target__open_and_load"))
+ goto close_prog;
+ tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target);
+ err = pthread_create(&thread, NULL,
+ fentry_target_test_run, &tgt_prog_fd);
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto close_prog;
+
+ for (i = 0; i < 1000; i++) {
+ tracing_skel = fentry_recursive__open();
+ if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open"))
+ goto stop_thread;
+
+ prog = tracing_skel->progs.recursive_attach;
+ err = bpf_program__set_attach_target(prog, tgt_prog_fd,
+ "fentry_target");
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto stop_thread;
+
+ err = fentry_recursive__load(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__load"))
+ goto stop_thread;
+
+ err = fentry_recursive__attach(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__attach"))
+ goto stop_thread;
+
+ fentry_recursive__destroy(tracing_skel);
+ tracing_skel = NULL;
+ }
+
+stop_thread:
+ __atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST);
+ err = pthread_join(thread, NULL);
+ ASSERT_OK(err, "pthread_join");
+close_prog:
+ fentry_recursive__destroy(tracing_skel);
+ fentry_recursive_target__destroy(target_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
index d6bd5e16e637..d2c0542716a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -44,3 +44,59 @@ void test_refcounted_kptr_wrong_owner(void)
ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
refcounted_kptr__destroy(skel);
}
+
+void test_percpu_hash_refcounted_kptr_refcount_leak(void)
+{
+ struct refcounted_kptr *skel;
+ int cpu_nr, fd, err, key = 0;
+ struct bpf_map *map;
+ size_t values_sz;
+ u64 *values;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ cpu_nr = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
+ return;
+
+ values = calloc(cpu_nr, sizeof(u64));
+ if (!ASSERT_OK_PTR(values, "calloc values"))
+ return;
+
+ skel = refcounted_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
+ free(values);
+ return;
+ }
+
+ values_sz = cpu_nr * sizeof(u64);
+ memset(values, 0, values_sz);
+
+ map = skel->maps.percpu_hash;
+ err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+ if (!ASSERT_OK(err, "bpf_map__update_elem"))
+ goto out;
+
+ fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
+ goto out;
+
+ err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+ if (!ASSERT_OK(err, "bpf_map__update_elem"))
+ goto out;
+
+ fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(opts.retval, 1, "opts.retval");
+
+out:
+ refcounted_kptr__destroy(skel);
+ free(values);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 39d42271cc46..d93a0c7b1786 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -465,6 +465,20 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t,
return range_improve(x_t, x, x_swap);
}
+ if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) {
+ if (x_t == S64 && x.a > x.b) {
+ if (x.b < y.a && x.a <= y.b)
+ return range(x_t, x.a, y.b);
+ if (x.a > y.b && x.b >= y.a)
+ return range(x_t, y.a, x.b);
+ } else if (x_t == U64 && y.a > y.b) {
+ if (y.b < x.a && y.a <= x.b)
+ return range(x_t, y.a, x.b);
+ if (y.a > x.b && y.b >= x.a)
+ return range(x_t, x.a, y.b);
+ }
+ }
+
/* otherwise, plain range cast and intersection works */
return range_improve(x_t, x, y_cast);
}
@@ -609,7 +623,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
*newx = range(t, x.a, x.b);
*newy = range(t, y.a + 1, y.b);
} else if (x.a == x.b && x.b == y.b) {
- /* X is a constant matching rigth side of Y */
+ /* X is a constant matching right side of Y */
*newx = range(t, x.a, x.b);
*newy = range(t, y.a, y.b - 1);
} else if (y.a == y.b && x.a == y.a) {
@@ -617,7 +631,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
*newx = range(t, x.a + 1, x.b);
*newy = range(t, y.a, y.b);
} else if (y.a == y.b && x.b == y.b) {
- /* Y is a constant matching rigth side of X */
+ /* Y is a constant matching right side of X */
*newx = range(t, x.a, x.b - 1);
*newy = range(t, y.a, y.b);
} else {
diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
new file mode 100644
index 000000000000..f0a8c828f8f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+#include "res_spin_lock.skel.h"
+#include "res_spin_lock_fail.skel.h"
+
+void test_res_spin_lock_failure(void)
+{
+ RUN_TESTS(res_spin_lock_fail);
+}
+
+static volatile int skip;
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
+
+ while (!READ_ONCE(skip)) {
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (err || topts.retval) {
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+ break;
+ }
+ }
+ pthread_exit(arg);
+}
+
+void test_res_spin_lock_success(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct res_spin_lock *skel;
+ pthread_t thread_id[16];
+ int prog_fd, i, err;
+ void *ret;
+
+ if (get_nprocs() < 2) {
+ test__skip();
+ return;
+ }
+
+ skel = res_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "res_spin_lock__open_and_load"))
+ return;
+ /* AA deadlock */
+ prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "error");
+ ASSERT_OK(topts.retval, "retval");
+
+ prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_held_lock_max);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "error");
+ ASSERT_OK(topts.retval, "retval");
+
+ /* Multi-threaded ABBA deadlock. */
+
+ prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_AB);
+ for (i = 0; i < 16; i++) {
+ int err;
+
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end;
+ }
+
+ topts.retval = 0;
+ topts.repeat = 1000;
+ int fd = bpf_program__fd(skel->progs.res_spin_lock_test_BA);
+ while (!topts.retval && !err && !READ_ONCE(skel->bss->err)) {
+ err = bpf_prog_test_run_opts(fd, &topts);
+ }
+
+ WRITE_ONCE(skip, true);
+
+ for (i = 0; i < 16; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end;
+ }
+
+ ASSERT_EQ(READ_ONCE(skel->bss->err), -EDEADLK, "timeout err");
+ ASSERT_OK(err, "err");
+ ASSERT_EQ(topts.retval, -EDEADLK, "timeout");
+end:
+ res_spin_lock__destroy(skel);
+ return;
+}
+
+void serial_test_res_spin_lock_stress(void)
+{
+ if (libbpf_num_possible_cpus() < 3) {
+ test__skip();
+ return;
+ }
+
+ ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA");
+ sleep(5);
+ unload_module("bpf_test_rqspinlock", false);
+ /*
+ * Insert bpf_test_rqspinlock.ko manually with test_mode=[1|2] to test
+ * other cases (ABBA, ABBCCA).
+ */
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index da430df45aa4..64520684d2cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -17,6 +17,7 @@
#include "test_ringbuf_n.lskel.h"
#include "test_ringbuf_map_key.lskel.h"
#include "test_ringbuf_write.lskel.h"
+#include "test_ringbuf_overwrite.lskel.h"
#define EDONE 7777
@@ -97,7 +98,7 @@ static void ringbuf_write_subtest(void)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
- skel->maps.ringbuf.max_entries = 0x4000;
+ skel->maps.ringbuf.max_entries = 0x40000;
err = test_ringbuf_write_lskel__load(skel);
if (!ASSERT_OK(err, "skel_load"))
@@ -108,7 +109,7 @@ static void ringbuf_write_subtest(void)
mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
goto cleanup;
- *mmap_ptr = 0x3000;
+ *mmap_ptr = 0x30000;
ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
skel->bss->pid = getpid();
@@ -497,6 +498,68 @@ cleanup:
test_ringbuf_map_key_lskel__destroy(skel_map_key);
}
+static void ringbuf_overwrite_mode_subtest(void)
+{
+ unsigned long size, len1, len2, len3, len4, len5;
+ unsigned long expect_avail_data, expect_prod_pos, expect_over_pos;
+ struct test_ringbuf_overwrite_lskel *skel;
+ int page_size = getpagesize();
+ int err;
+
+ skel = test_ringbuf_overwrite_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ size = page_size;
+ len1 = page_size / 2;
+ len2 = page_size / 4;
+ len3 = size - len1 - len2 - BPF_RINGBUF_HDR_SZ * 3;
+ len4 = len3 - 8;
+ len5 = len3; /* retry with len3 */
+
+ skel->maps.ringbuf.max_entries = size;
+ skel->rodata->LEN1 = len1;
+ skel->rodata->LEN2 = len2;
+ skel->rodata->LEN3 = len3;
+ skel->rodata->LEN4 = len4;
+ skel->rodata->LEN5 = len5;
+
+ skel->bss->pid = getpid();
+
+ err = test_ringbuf_overwrite_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = test_ringbuf_overwrite_lskel__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ syscall(__NR_getpgid);
+
+ ASSERT_EQ(skel->bss->reserve1_fail, 0, "reserve 1");
+ ASSERT_EQ(skel->bss->reserve2_fail, 0, "reserve 2");
+ ASSERT_EQ(skel->bss->reserve3_fail, 1, "reserve 3");
+ ASSERT_EQ(skel->bss->reserve4_fail, 0, "reserve 4");
+ ASSERT_EQ(skel->bss->reserve5_fail, 0, "reserve 5");
+
+ ASSERT_EQ(skel->bss->ring_size, size, "check_ring_size");
+
+ expect_avail_data = len2 + len4 + len5 + 3 * BPF_RINGBUF_HDR_SZ;
+ ASSERT_EQ(skel->bss->avail_data, expect_avail_data, "check_avail_size");
+
+ ASSERT_EQ(skel->bss->cons_pos, 0, "check_cons_pos");
+
+ expect_prod_pos = len1 + len2 + len4 + len5 + 4 * BPF_RINGBUF_HDR_SZ;
+ ASSERT_EQ(skel->bss->prod_pos, expect_prod_pos, "check_prod_pos");
+
+ expect_over_pos = len1 + BPF_RINGBUF_HDR_SZ;
+ ASSERT_EQ(skel->bss->over_pos, expect_over_pos, "check_over_pos");
+
+ test_ringbuf_overwrite_lskel__detach(skel);
+cleanup:
+ test_ringbuf_overwrite_lskel__destroy(skel);
+}
+
void test_ringbuf(void)
{
if (test__start_subtest("ringbuf"))
@@ -507,4 +570,6 @@ void test_ringbuf(void)
ringbuf_map_key_subtest();
if (test__start_subtest("ringbuf_write"))
ringbuf_write_subtest();
+ if (test__start_subtest("ringbuf_overwrite_mode"))
+ ringbuf_overwrite_mode_subtest();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 036d4760d2c1..3dbcc091f16c 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -41,11 +41,7 @@ static struct bpf_object *obj;
static __u32 index_zero;
static int epfd;
-static union sa46 {
- struct sockaddr_in6 v6;
- struct sockaddr_in v4;
- sa_family_t family;
-} srv_sa;
+static struct sockaddr_storage srv_sa;
#define RET_IF(condition, tag, format...) ({ \
if (CHECK_FAIL(condition)) { \
@@ -135,24 +131,24 @@ static int prepare_bpf_obj(void)
return 0;
}
-static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
+static void ss_init_loopback(struct sockaddr_storage *sa, sa_family_t family)
{
memset(sa, 0, sizeof(*sa));
- sa->family = family;
- if (sa->family == AF_INET6)
- sa->v6.sin6_addr = in6addr_loopback;
+ sa->ss_family = family;
+ if (sa->ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_loopback;
else
- sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ ((struct sockaddr_in *)sa)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
}
-static void sa46_init_inany(union sa46 *sa, sa_family_t family)
+static void ss_init_inany(struct sockaddr_storage *sa, sa_family_t family)
{
memset(sa, 0, sizeof(*sa));
- sa->family = family;
- if (sa->family == AF_INET6)
- sa->v6.sin6_addr = in6addr_any;
+ sa->ss_family = family;
+ if (sa->ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_any;
else
- sa->v4.sin_addr.s_addr = INADDR_ANY;
+ ((struct sockaddr_in *)sa)->sin_addr.s_addr = INADDR_ANY;
}
static int read_int_sysctl(const char *sysctl)
@@ -228,7 +224,7 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
int cli_fd)
{
struct data_check expected = {}, result;
- union sa46 cli_sa;
+ struct sockaddr_storage cli_sa;
socklen_t addrlen;
int err;
@@ -251,26 +247,32 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
}
if (family == AF_INET6) {
+ struct sockaddr_in6 *srv_v6 = (struct sockaddr_in6 *)&srv_sa;
+ struct sockaddr_in6 *cli_v6 = (struct sockaddr_in6 *)&cli_sa;
+
expected.eth_protocol = htons(ETH_P_IPV6);
- expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
- !srv_sa.v6.sin6_addr.s6_addr32[2] &&
- !srv_sa.v6.sin6_addr.s6_addr32[1] &&
- !srv_sa.v6.sin6_addr.s6_addr32[0];
+ expected.bind_inany = !srv_v6->sin6_addr.s6_addr32[3] &&
+ !srv_v6->sin6_addr.s6_addr32[2] &&
+ !srv_v6->sin6_addr.s6_addr32[1] &&
+ !srv_v6->sin6_addr.s6_addr32[0];
- memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
- sizeof(cli_sa.v6.sin6_addr));
+ memcpy(&expected.skb_addrs[0], cli_v6->sin6_addr.s6_addr32,
+ sizeof(cli_v6->sin6_addr));
memcpy(&expected.skb_addrs[4], &in6addr_loopback,
sizeof(in6addr_loopback));
- expected.skb_ports[0] = cli_sa.v6.sin6_port;
- expected.skb_ports[1] = srv_sa.v6.sin6_port;
+ expected.skb_ports[0] = cli_v6->sin6_port;
+ expected.skb_ports[1] = srv_v6->sin6_port;
} else {
+ struct sockaddr_in *srv_v4 = (struct sockaddr_in *)&srv_sa;
+ struct sockaddr_in *cli_v4 = (struct sockaddr_in *)&cli_sa;
+
expected.eth_protocol = htons(ETH_P_IP);
- expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
+ expected.bind_inany = !srv_v4->sin_addr.s_addr;
- expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
+ expected.skb_addrs[0] = cli_v4->sin_addr.s_addr;
expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
- expected.skb_ports[0] = cli_sa.v4.sin_port;
- expected.skb_ports[1] = srv_sa.v4.sin_port;
+ expected.skb_ports[0] = cli_v4->sin_port;
+ expected.skb_ports[1] = srv_v4->sin_port;
}
if (memcmp(&result, &expected, offsetof(struct data_check,
@@ -364,16 +366,15 @@ static void check_results(void)
static int send_data(int type, sa_family_t family, void *data, size_t len,
enum result expected)
{
- union sa46 cli_sa;
+ struct sockaddr_storage cli_sa;
int fd, err;
fd = socket(family, type, 0);
RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
- sa46_init_loopback(&cli_sa, family);
+ ss_init_loopback(&cli_sa, family);
err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
-
err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
sizeof(srv_sa));
RET_ERR(err != len && expected >= PASS,
@@ -589,9 +590,9 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
socklen_t addrlen;
if (inany)
- sa46_init_inany(&srv_sa, family);
+ ss_init_inany(&srv_sa, family);
else
- sa46_init_loopback(&srv_sa, family);
+ ss_init_loopback(&srv_sa, family);
addrlen = sizeof(srv_sa);
/*
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 1702aa592c2c..7ac4d5a488aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -206,6 +206,11 @@ destroy_skel:
skel_open_load_failure:
close(pipe_c2p[0]);
close(pipe_p2c[1]);
+ /*
+ * Child is either about to exit cleanly or stuck in case of errors.
+ * Nudge it to exit.
+ */
+ kill(pid, SIGKILL);
wait(NULL);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
index e12255121c15..e4dac529d424 100644
--- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -202,7 +202,7 @@ err_out:
void test_setget_sockopt(void)
{
cg_fd = test__join_cgroup(CG_NAME);
- if (cg_fd < 0)
+ if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
return;
if (create_netns())
diff --git a/tools/testing/selftests/bpf/prog_tests/sha256.c b/tools/testing/selftests/bpf/prog_tests/sha256.c
new file mode 100644
index 000000000000..604a0b1423d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sha256.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "bpf/libbpf_internal.h"
+
+#define MAX_LEN 4096
+
+/* Test libbpf_sha256() for all lengths from 0 to MAX_LEN inclusively. */
+void test_sha256(void)
+{
+ /*
+ * The correctness of this value was verified by running this test with
+ * libbpf_sha256() replaced by OpenSSL's SHA256().
+ */
+ static const __u8 expected_digest_of_digests[SHA256_DIGEST_LENGTH] = {
+ 0x62, 0x30, 0x0e, 0x1d, 0xea, 0x7f, 0xc4, 0x74,
+ 0xfd, 0x8e, 0x64, 0x0b, 0xd8, 0x5f, 0xea, 0x04,
+ 0xf3, 0xef, 0x77, 0x42, 0xc2, 0x01, 0xb8, 0x90,
+ 0x6e, 0x19, 0x91, 0x1b, 0xca, 0xb3, 0x28, 0x42,
+ };
+ __u64 seed = 0;
+ __u8 *data = NULL, *digests = NULL;
+ __u8 digest_of_digests[SHA256_DIGEST_LENGTH];
+ size_t i;
+
+ data = malloc(MAX_LEN);
+ if (!ASSERT_OK_PTR(data, "malloc"))
+ goto out;
+ digests = malloc((MAX_LEN + 1) * SHA256_DIGEST_LENGTH);
+ if (!ASSERT_OK_PTR(digests, "malloc"))
+ goto out;
+
+ /* Generate MAX_LEN bytes of "random" data deterministically. */
+ for (i = 0; i < MAX_LEN; i++) {
+ seed = (seed * 25214903917 + 11) & ((1ULL << 48) - 1);
+ data[i] = (__u8)(seed >> 16);
+ }
+
+ /* Calculate a digest for each length 0 through MAX_LEN inclusively. */
+ for (i = 0; i <= MAX_LEN; i++)
+ libbpf_sha256(data, i, &digests[i * SHA256_DIGEST_LENGTH]);
+
+ /* Calculate and verify the digest of all the digests. */
+ libbpf_sha256(digests, (MAX_LEN + 1) * SHA256_DIGEST_LENGTH,
+ digest_of_digests);
+ ASSERT_MEMEQ(digest_of_digests, expected_digest_of_digests,
+ SHA256_DIGEST_LENGTH, "digest_of_digests");
+out:
+ free(data);
+ free(digests);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 0b9bd1d6f7cc..10a0ab954b8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -37,8 +37,10 @@ configure_stack(void)
tc = popen("tc -V", "r");
if (CHECK_FAIL(!tc))
return false;
- if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc)))
+ if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) {
+ pclose(tc);
return false;
+ }
if (strstr(tc_version, ", libbpf "))
prog = "test_sk_assign_libbpf.bpf.o";
else
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..e4940583924b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "sk_bypass_prot_mem.skel.h"
+#include "network_helpers.h"
+
+#define NR_PAGES 32
+#define NR_SOCKETS 2
+#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS)
+#define BUF_SINGLE 1024
+#define NR_SEND (BUF_TOTAL / BUF_SINGLE)
+
+struct test_case {
+ char name[8];
+ int family;
+ int type;
+ int (*create_sockets)(struct test_case *test_case, int sk[], int len);
+ long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel);
+};
+
+static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+ int server, i, err = 0;
+
+ server = start_server(test_case->family, test_case->type, NULL, 0, 0);
+ if (!ASSERT_GE(server, 0, "start_server_str"))
+ return server;
+
+ /* Keep for-loop so we can change NR_SOCKETS easily. */
+ for (i = 0; i < len; i += 2) {
+ sk[i] = connect_to_fd(server, 0);
+ if (sk[i] < 0) {
+ ASSERT_GE(sk[i], 0, "connect_to_fd");
+ err = sk[i];
+ break;
+ }
+
+ sk[i + 1] = accept(server, NULL, NULL);
+ if (sk[i + 1] < 0) {
+ ASSERT_GE(sk[i + 1], 0, "accept");
+ err = sk[i + 1];
+ break;
+ }
+ }
+
+ close(server);
+
+ return err;
+}
+
+static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+ int i, j, err, rcvbuf = BUF_TOTAL;
+
+ /* Keep for-loop so we can change NR_SOCKETS easily. */
+ for (i = 0; i < len; i += 2) {
+ sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0);
+ if (sk[i] < 0) {
+ ASSERT_GE(sk[i], 0, "start_server");
+ return sk[i];
+ }
+
+ sk[i + 1] = connect_to_fd(sk[i], 0);
+ if (sk[i + 1] < 0) {
+ ASSERT_GE(sk[i + 1], 0, "connect_to_fd");
+ return sk[i + 1];
+ }
+
+ err = connect_fd_to_fd(sk[i], sk[i + 1], 0);
+ if (err) {
+ ASSERT_EQ(err, 0, "connect_fd_to_fd");
+ return err;
+ }
+
+ for (j = 0; j < 2; j++) {
+ err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
+ if (err) {
+ ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)");
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static long get_memory_allocated(struct test_case *test_case,
+ bool *activated, long *memory_allocated)
+{
+ int sk;
+
+ *activated = true;
+
+ /* AF_INET and AF_INET6 share the same memory_allocated.
+ * tcp_init_sock() is called by AF_INET and AF_INET6,
+ * but udp_lib_init_sock() is inline.
+ */
+ sk = socket(AF_INET, test_case->type, 0);
+ if (!ASSERT_GE(sk, 0, "get_memory_allocated"))
+ return -1;
+
+ close(sk);
+
+ return *memory_allocated;
+}
+
+static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+ return get_memory_allocated(test_case,
+ &skel->bss->tcp_activated,
+ &skel->bss->tcp_memory_allocated);
+}
+
+static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+ return get_memory_allocated(test_case,
+ &skel->bss->udp_activated,
+ &skel->bss->udp_memory_allocated);
+}
+
+static int check_bypass(struct test_case *test_case,
+ struct sk_bypass_prot_mem *skel, bool bypass)
+{
+ char buf[BUF_SINGLE] = {};
+ long memory_allocated[2];
+ int sk[NR_SOCKETS];
+ int err, i, j;
+
+ for (i = 0; i < ARRAY_SIZE(sk); i++)
+ sk[i] = -1;
+
+ err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
+ if (err)
+ goto close;
+
+ memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
+
+ /* allocate pages >= NR_PAGES */
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ for (j = 0; j < NR_SEND; j++) {
+ int bytes = send(sk[i], buf, sizeof(buf), 0);
+
+ /* Avoid too noisy logs when something failed. */
+ if (bytes != sizeof(buf)) {
+ ASSERT_EQ(bytes, sizeof(buf), "send");
+ if (bytes < 0) {
+ err = bytes;
+ goto drain;
+ }
+ }
+ }
+ }
+
+ memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
+
+ if (bypass)
+ ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass");
+ else
+ ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass");
+
+drain:
+ if (test_case->type == SOCK_DGRAM) {
+ /* UDP starts purging sk->sk_receive_queue after one RCU
+ * grace period, then udp_memory_allocated goes down,
+ * so drain the queue before close().
+ */
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ for (j = 0; j < NR_SEND; j++) {
+ int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC);
+
+ if (bytes == sizeof(buf))
+ continue;
+ if (bytes != -1 || errno != EAGAIN)
+ PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno));
+ break;
+ }
+ }
+ }
+
+close:
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ if (sk[i] < 0)
+ break;
+
+ close(sk[i]);
+ }
+
+ return err;
+}
+
+static void run_test(struct test_case *test_case)
+{
+ struct sk_bypass_prot_mem *skel;
+ struct nstoken *nstoken;
+ int cgroup, err;
+
+ skel = sk_bypass_prot_mem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ skel->bss->nr_cpus = libbpf_num_possible_cpus();
+
+ err = sk_bypass_prot_mem__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto destroy_skel;
+
+ cgroup = test__join_cgroup("/sk_bypass_prot_mem");
+ if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
+ goto destroy_skel;
+
+ err = make_netns("sk_bypass_prot_mem");
+ if (!ASSERT_EQ(err, 0, "make_netns"))
+ goto close_cgroup;
+
+ nstoken = open_netns("sk_bypass_prot_mem");
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto remove_netns;
+
+ err = check_bypass(test_case, skel, false);
+ if (!ASSERT_EQ(err, 0, "test_bypass(false)"))
+ goto close_netns;
+
+ err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1");
+ if (!ASSERT_EQ(err, 0, "write_sysctl(1)"))
+ goto close_netns;
+
+ err = check_bypass(test_case, skel, true);
+ if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)"))
+ goto close_netns;
+
+ err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0");
+ if (!ASSERT_EQ(err, 0, "write_sysctl(0)"))
+ goto close_netns;
+
+ skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
+ if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
+ goto close_netns;
+
+ err = check_bypass(test_case, skel, true);
+ ASSERT_EQ(err, 0, "test_bypass(true by bpf)");
+
+close_netns:
+ close_netns(nstoken);
+remove_netns:
+ remove_netns("sk_bypass_prot_mem");
+close_cgroup:
+ close(cgroup);
+destroy_skel:
+ sk_bypass_prot_mem__destroy(skel);
+}
+
+static struct test_case test_cases[] = {
+ {
+ .name = "TCP ",
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .create_sockets = tcp_create_sockets,
+ .get_memory_allocated = tcp_get_memory_allocated,
+ },
+ {
+ .name = "UDP ",
+ .family = AF_INET,
+ .type = SOCK_DGRAM,
+ .create_sockets = udp_create_sockets,
+ .get_memory_allocated = udp_get_memory_allocated,
+ },
+ {
+ .name = "TCPv6",
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .create_sockets = tcp_create_sockets,
+ .get_memory_allocated = tcp_get_memory_allocated,
+ },
+ {
+ .name = "UDPv6",
+ .family = AF_INET6,
+ .type = SOCK_DGRAM,
+ .create_sockets = udp_create_sockets,
+ .get_memory_allocated = udp_get_memory_allocated,
+ },
+};
+
+void serial_test_sk_bypass_prot_mem(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ if (test__start_subtest(test_cases[i].name))
+ run_test(&test_cases[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index 4be6fdb78c6a..594441acb707 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -116,6 +116,8 @@ static void test_snprintf_negative(void)
ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+ ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8");
+ ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");
}
void test_snprintf(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index d56e18b25528..27781df8f2fb 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -1,20 +1,875 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2024 Meta
+#include <poll.h>
#include <test_progs.h>
#include "network_helpers.h"
#include "sock_iter_batch.skel.h"
#define TEST_NS "sock_iter_batch_netns"
+#define TEST_CHILD_NS "sock_iter_batch_child_netns"
+static const int init_batch_size = 16;
static const int nr_soreuse = 4;
+struct iter_out {
+ int idx;
+ __u64 cookie;
+} __packed;
+
+struct sock_count {
+ __u64 cookie;
+ int count;
+};
+
+static int insert(__u64 cookie, struct sock_count counts[], int counts_len)
+{
+ int insert = -1;
+ int i = 0;
+
+ for (; i < counts_len; i++) {
+ if (!counts[i].cookie) {
+ insert = i;
+ } else if (counts[i].cookie == cookie) {
+ insert = i;
+ break;
+ }
+ }
+ if (insert < 0)
+ return insert;
+
+ counts[insert].cookie = cookie;
+ counts[insert].count++;
+
+ return counts[insert].count;
+}
+
+static int read_n(int iter_fd, int n, struct sock_count counts[],
+ int counts_len)
+{
+ struct iter_out out;
+ int nread = 1;
+ int i = 0;
+
+ for (; nread > 0 && (n < 0 || i < n); i++) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread"))
+ break;
+ ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert");
+ }
+
+ ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n");
+
+ return i;
+}
+
+static __u64 socket_cookie(int fd)
+{
+ __u64 cookie;
+ socklen_t cookie_len = sizeof(cookie);
+
+ if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie,
+ &cookie_len), "getsockopt(SO_COOKIE)"))
+ return 0;
+ return cookie;
+}
+
+static bool was_seen(int fd, struct sock_count counts[], int counts_len)
+{
+ __u64 cookie = socket_cookie(fd);
+ int i = 0;
+
+ for (; cookie && i < counts_len; i++)
+ if (cookie == counts[i].cookie)
+ return true;
+
+ return false;
+}
+
+static int get_seen_socket(int *fds, struct sock_count counts[], int n)
+{
+ int i = 0;
+
+ for (; i < n; i++)
+ if (was_seen(fds[i], counts, n))
+ return i;
+ return -1;
+}
+
+static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n)
+{
+ int i, nread, iter_fd;
+ int nth_sock_idx = -1;
+ struct iter_out out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ return -1;
+
+ for (; n >= 0; n--) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_GE(nread, 1, "nread"))
+ goto done;
+ }
+
+ for (i = 0; i < fds_len && nth_sock_idx < 0; i++)
+ if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie)
+ nth_sock_idx = i;
+done:
+ close(iter_fd);
+ return nth_sock_idx;
+}
+
+static void destroy(int fd)
+{
+ struct sock_iter_batch *skel = NULL;
+ __u64 cookie = socket_cookie(fd);
+ struct bpf_link *link = NULL;
+ int iter_fd = -1;
+ int nread;
+ __u64 out;
+
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ skel->rodata->destroy_cookie = cookie;
+
+ if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ /* Delete matching socket. */
+ nread = read(iter_fd, &out, sizeof(out));
+ ASSERT_GE(nread, 0, "nread");
+ if (nread)
+ ASSERT_EQ(out, cookie, "cookie matches");
+done:
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+ close(fd);
+}
+
+static int get_seen_count(int fd, struct sock_count counts[], int n)
+{
+ __u64 cookie = socket_cookie(fd);
+ int count = 0;
+ int i = 0;
+
+ for (; cookie && !count && i < n; i++)
+ if (cookie == counts[i].cookie)
+ count = counts[i].count;
+
+ return count;
+}
+
+static void check_n_were_seen_once(int *fds, int fds_len, int n,
+ struct sock_count counts[], int counts_len)
+{
+ int seen_once = 0;
+ int seen_cnt;
+ int i = 0;
+
+ for (; i < fds_len; i++) {
+ /* Skip any sockets that were closed or that weren't seen
+ * exactly once.
+ */
+ if (fds[i] < 0)
+ continue;
+ seen_cnt = get_seen_count(fds[i], counts, counts_len);
+ if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt"))
+ seen_once++;
+ }
+
+ ASSERT_EQ(seen_once, n, "seen_once");
+}
+
+static int accept_from_one(struct pollfd *server_poll_fds,
+ int server_poll_fds_len)
+{
+ static const int poll_timeout_ms = 5000; /* 5s */
+ int ret;
+ int i;
+
+ ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms);
+ if (!ASSERT_EQ(ret, 1, "poll"))
+ return -1;
+
+ for (i = 0; i < server_poll_fds_len; i++)
+ if (server_poll_fds[i].revents & POLLIN)
+ return accept(server_poll_fds[i].fd, NULL, NULL);
+
+ return -1;
+}
+
+static int *connect_to_server(int family, int sock_type, const char *addr,
+ __u16 port, int nr_connects, int *server_fds,
+ int server_fds_len)
+{
+ struct pollfd *server_poll_fds = NULL;
+ int *established_socks = NULL;
+ int i;
+
+ server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds));
+ if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds"))
+ return NULL;
+
+ for (i = 0; i < server_fds_len; i++) {
+ server_poll_fds[i].fd = server_fds[i];
+ server_poll_fds[i].events = POLLIN;
+ }
+
+ i = 0;
+
+ established_socks = malloc(sizeof(*established_socks) * nr_connects*2);
+ if (!ASSERT_OK_PTR(established_socks, "established_socks"))
+ goto error;
+
+ while (nr_connects--) {
+ established_socks[i] = connect_to_addr_str(family, sock_type,
+ addr, port, NULL);
+ if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str"))
+ goto error;
+ i++;
+ established_socks[i] = accept_from_one(server_poll_fds,
+ server_fds_len);
+ if (!ASSERT_OK_FD(established_socks[i], "accept_from_one"))
+ goto error;
+ i++;
+ }
+
+ free(server_poll_fds);
+ return established_socks;
+error:
+ free_fds(established_socks, i);
+ free(server_poll_fds);
+ return NULL;
+}
+
+static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_seen_socket(socks, counts, counts_len);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_seen_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Leave one established socket. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
+static void remove_unseen(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_unseen_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
+static void remove_all(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx, i;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ for (i = 0; i < socks_len - 1; i++) {
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+}
+
+static void remove_all_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *close_idx = NULL;
+ int i;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ close_idx = malloc(sizeof(int) * (established_socks_len - 1));
+ if (!ASSERT_OK_PTR(close_idx, "close_idx malloc"))
+ return;
+ for (i = 0; i < established_socks_len - 1; i++) {
+ close_idx[i] = get_nth_socket(established_socks,
+ established_socks_len, link,
+ listen_socks_len + i);
+ if (!ASSERT_GE(close_idx[i], 0, "close_idx"))
+ return;
+ }
+
+ for (i = 0; i < established_socks_len - 1; i++) {
+ destroy(established_socks[close_idx[i]]);
+ established_socks[close_idx[i]] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+ free(close_idx);
+}
+
+static void add_some(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Double the number of sockets in the bucket. */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+static void add_some_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts,
+ int counts_len, struct bpf_link *link,
+ int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established_socks_len - 1 sockets. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw established_socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+
+ /* Double the number of established sockets in the bucket. */
+ new_socks = connect_to_server(family, sock_type, addr, port,
+ established_socks_len / 2, listen_socks,
+ listen_socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "connect_to_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+done:
+ free_fds(new_socks, established_socks_len);
+}
+
+static void force_realloc(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socket just to initialize the batch. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Double the number of sockets in the bucket to force a realloc on the
+ * next read.
+ */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket from the first set was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+static void force_realloc_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ /* Iterate through all sockets to trigger a realloc. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+}
+
+struct test_case {
+ void (*test)(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd);
+ const char *description;
+ int ehash_buckets;
+ int connections;
+ int init_socks;
+ int max_socks;
+ int sock_type;
+ int family;
+};
+
+static struct test_case resume_tests[] = {
+ {
+ .description = "udp: resume after removing a seen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "udp: resume after removing one unseen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "udp: resume after removing all unseen sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "udp: resume after adding a few sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "udp: force a realloc to occur",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+ {
+ .description = "tcp: resume after removing a seen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "tcp: force a realloc to occur (listening)",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+ {
+ .description = "tcp: resume after removing a seen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen_established,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen_established,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all_established,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = add_some_established,
+ },
+ {
+ .description = "tcp: force a realloc to occur (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ /* Bucket size will need to double when going from listening to
+ * established sockets.
+ */
+ .connections = init_batch_size,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse + (init_batch_size * 2),
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = force_realloc_established,
+ },
+};
+
+static void do_resume_test(struct test_case *tc)
+{
+ struct sock_iter_batch *skel = NULL;
+ struct sock_count *counts = NULL;
+ static const __u16 port = 10001;
+ struct nstoken *nstoken = NULL;
+ struct bpf_link *link = NULL;
+ int *established_fds = NULL;
+ int err, iter_fd = -1;
+ const char *addr;
+ int *fds = NULL;
+
+ if (tc->ehash_buckets) {
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d",
+ tc->ehash_buckets);
+ SYS(done, "ip netns add %s", TEST_CHILD_NS);
+ SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS);
+ nstoken = open_netns(TEST_CHILD_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_child_netns"))
+ goto done;
+ }
+
+ counts = calloc(tc->max_socks, sizeof(*counts));
+ if (!ASSERT_OK_PTR(counts, "counts"))
+ goto done;
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ /* Prepare a bucket of sockets in the kernel hashtable */
+ addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1";
+ fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
+ goto done;
+ if (tc->connections) {
+ established_fds = connect_to_server(tc->family, tc->sock_type,
+ addr, port,
+ tc->connections, fds,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(established_fds, "connect_to_server"))
+ goto done;
+ }
+ skel->rodata->ports[0] = 0;
+ skel->rodata->ports[1] = 0;
+ skel->rodata->sf = tc->family;
+ skel->rodata->ss = 0;
+
+ err = sock_iter_batch__load(skel);
+ if (!ASSERT_OK(err, "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ?
+ skel->progs.iter_tcp_soreuse :
+ skel->progs.iter_udp_soreuse,
+ NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
+ established_fds, tc->connections*2, counts, tc->max_socks,
+ link, iter_fd);
+done:
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0");
+ free(counts);
+ free_fds(fds, tc->init_socks);
+ free_fds(established_fds, tc->connections*2);
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+}
+
+static void do_resume_tests(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(resume_tests); i++) {
+ if (test__start_subtest(resume_tests[i].description)) {
+ do_resume_test(&resume_tests[i]);
+ }
+ }
+}
+
static void do_test(int sock_type, bool onebyone)
{
int err, i, nread, to_read, total_read, iter_fd = -1;
- int first_idx, second_idx, indices[nr_soreuse];
+ struct iter_out outputs[nr_soreuse];
struct bpf_link *link = NULL;
struct sock_iter_batch *skel;
+ int first_idx, second_idx;
int *fds[2] = {};
skel = sock_iter_batch__open();
@@ -34,6 +889,9 @@ static void do_test(int sock_type, bool onebyone)
goto done;
skel->rodata->ports[i] = ntohs(local_port);
}
+ skel->rodata->sf = AF_INET6;
+ if (sock_type == SOCK_STREAM)
+ skel->rodata->ss = TCP_LISTEN;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
@@ -55,38 +913,38 @@ static void do_test(int sock_type, bool onebyone)
* from a bucket and leave one socket out from
* that bucket on purpose.
*/
- to_read = (nr_soreuse - 1) * sizeof(*indices);
+ to_read = (nr_soreuse - 1) * sizeof(*outputs);
total_read = 0;
first_idx = -1;
do {
- nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
- if (nread <= 0 || nread % sizeof(*indices))
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
break;
total_read += nread;
if (first_idx == -1)
- first_idx = indices[0];
- for (i = 0; i < nread / sizeof(*indices); i++)
- ASSERT_EQ(indices[i], first_idx, "first_idx");
+ first_idx = outputs[0].idx;
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, first_idx, "first_idx");
} while (total_read < to_read);
- ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread");
+ ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread");
ASSERT_EQ(total_read, to_read, "total_read");
free_fds(fds[first_idx], nr_soreuse);
fds[first_idx] = NULL;
/* Read the "whole" second bucket */
- to_read = nr_soreuse * sizeof(*indices);
+ to_read = nr_soreuse * sizeof(*outputs);
total_read = 0;
second_idx = !first_idx;
do {
- nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
- if (nread <= 0 || nread % sizeof(*indices))
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
break;
total_read += nread;
- for (i = 0; i < nread / sizeof(*indices); i++)
- ASSERT_EQ(indices[i], second_idx, "second_idx");
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, second_idx, "second_idx");
} while (total_read <= to_read);
ASSERT_EQ(nread, 0, "nread");
/* Both so_reuseport ports should be in different buckets, so
@@ -128,6 +986,7 @@ void test_sock_iter_batch(void)
do_test(SOCK_DGRAM, true);
do_test(SOCK_DGRAM, false);
}
+ do_resume_tests();
close_netns(nstoken);
done:
diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
index 1bdfb79ef009..0d59503a0c73 100644
--- a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
@@ -3,6 +3,7 @@
#ifndef __SOCKET_HELPERS__
#define __SOCKET_HELPERS__
+#include <sys/un.h>
#include <linux/vm_sockets.h>
/* include/linux/net.h */
@@ -16,11 +17,16 @@
#define VMADDR_CID_LOCAL 1
#endif
+/* include/linux/compiler_types.h */
+#if __STDC_VERSION__ < 202311L && !defined(auto)
+# define auto __auto_type
+#endif
+
/* include/linux/cleanup.h */
#define __get_and_null(p, nullvalue) \
({ \
- __auto_type __ptr = &(p); \
- __auto_type __val = *__ptr; \
+ auto __ptr = &(p); \
+ auto __val = *__ptr; \
*__ptr = nullvalue; \
__val; \
})
@@ -169,6 +175,15 @@ static inline void init_addr_loopback6(struct sockaddr_storage *ss,
*len = sizeof(*addr6);
}
+static inline void init_addr_loopback_unix(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->sun_family = AF_UNIX;
+ *len = sizeof(sa_family_t);
+}
+
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
socklen_t *len)
{
@@ -190,6 +205,9 @@ static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
case AF_INET6:
init_addr_loopback6(ss, len);
return;
+ case AF_UNIX:
+ init_addr_loopback_unix(ss, len);
+ return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
@@ -315,21 +333,27 @@ static inline int create_pair(int family, int sotype, int *p0, int *p1)
{
__close_fd int s, c = -1, p = -1;
struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
+ socklen_t len;
int err;
s = socket_loopback(family, sotype);
if (s < 0)
return s;
- err = xgetsockname(s, sockaddr(&addr), &len);
- if (err)
- return err;
-
c = xsocket(family, sotype, 0);
if (c < 0)
return c;
+ init_addr_loopback(family, &addr, &len);
+ err = xbind(c, sockaddr(&addr), len);
+ if (err)
+ return err;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
err = connect(c, sockaddr(&addr), len);
if (err) {
if (errno != EINPROGRESS) {
@@ -391,4 +415,59 @@ static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
return err;
}
+static inline const char *socket_kind_to_str(int sock_fd)
+{
+ socklen_t opt_len;
+ int domain, type;
+
+ opt_len = sizeof(domain);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_DOMAIN)");
+
+ opt_len = sizeof(type);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_TYPE)");
+
+ switch (domain) {
+ case AF_INET:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp4";
+ case SOCK_DGRAM:
+ return "udp4";
+ }
+ break;
+ case AF_INET6:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp6";
+ case SOCK_DGRAM:
+ return "udp6";
+ }
+ break;
+ case AF_UNIX:
+ switch (type) {
+ case SOCK_STREAM:
+ return "u_str";
+ case SOCK_DGRAM:
+ return "u_dgr";
+ case SOCK_SEQPACKET:
+ return "u_seq";
+ }
+ break;
+ case AF_VSOCK:
+ switch (type) {
+ case SOCK_STREAM:
+ return "v_str";
+ case SOCK_DGRAM:
+ return "v_dgr";
+ case SOCK_SEQPACKET:
+ return "v_seq";
+ }
+ break;
+ }
+
+ return "???";
+}
+
#endif // __SOCKET_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 884ad87783d5..1e3e4392dcca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -111,31 +111,35 @@ out:
static void test_sockmap_vsock_delete_on_close(void)
{
- int err, c, p, map;
- const int zero = 0;
-
- err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
- if (!ASSERT_OK(err, "create_pair(AF_VSOCK)"))
- return;
+ int map, c, p, err, zero = 0;
map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
sizeof(int), 1, NULL);
- if (!ASSERT_GE(map, 0, "bpf_map_create")) {
- close(c);
- goto out;
- }
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
- err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
- close(c);
- if (!ASSERT_OK(err, "bpf_map_update"))
- goto out;
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
+
+ if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST))
+ goto close_socks;
+
+ xclose(c);
+ xclose(p);
+
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
- err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
ASSERT_OK(err, "after close(), bpf_map_update");
-out:
- close(p);
- close(map);
+close_socks:
+ xclose(c);
+ xclose(p);
+close_map:
+ xclose(map);
}
static void test_skmsg_helpers(enum bpf_map_type map_type)
@@ -522,8 +526,8 @@ static void test_sockmap_skb_verdict_shutdown(void)
if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
goto out_close;
- n = recv(c1, &b, 1, SOCK_NONBLOCK);
- ASSERT_EQ(n, 0, "recv_timeout(fin)");
+ n = recv(c1, &b, 1, MSG_DONTWAIT);
+ ASSERT_EQ(n, 0, "recv(fin)");
out_close:
close(c1);
close(p1);
@@ -531,57 +535,6 @@ out:
test_sockmap_pass_prog__destroy(skel);
}
-static void test_sockmap_stream_pass(void)
-{
- int zero = 0, sent, recvd;
- int verdict, parser;
- int err, map;
- int c = -1, p = -1;
- struct test_sockmap_pass_prog *pass = NULL;
- char snd[256] = "0123456789";
- char rcv[256] = "0";
-
- pass = test_sockmap_pass_prog__open_and_load();
- verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
- parser = bpf_program__fd(pass->progs.prog_skb_parser);
- map = bpf_map__fd(pass->maps.sock_map_rx);
-
- err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
- if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
- goto out;
-
- err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
- if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
- goto out;
-
- err = create_pair(AF_INET, SOCK_STREAM, &c, &p);
- if (err)
- goto out;
-
- /* sk_data_ready of 'p' will be replaced by strparser handler */
- err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
- if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
- goto out_close;
-
- /*
- * as 'prog_skb_parser' return the original skb len and
- * 'prog_skb_verdict' return SK_PASS, the kernel will just
- * pass it through to original socket 'p'
- */
- sent = xsend(c, snd, sizeof(snd), 0);
- ASSERT_EQ(sent, sizeof(snd), "xsend(c)");
-
- recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK,
- IO_TIMEOUT_SEC);
- ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)");
-
-out_close:
- close(c);
- close(p);
-
-out:
- test_sockmap_pass_prog__destroy(pass);
-}
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
@@ -628,7 +581,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
/* On DROP test there will be no data to read */
if (pass_prog) {
- recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
+ recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
}
@@ -1061,6 +1014,34 @@ destroy:
test_sockmap_pass_prog__destroy(skel);
}
+static void test_sockmap_vsock_unconnected(void)
+{
+ struct sockaddr_storage addr;
+ int map, s, zero = 0;
+ socklen_t alen;
+
+ map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+ sizeof(int), 1, NULL);
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
+
+ s = xsocket(AF_VSOCK, SOCK_STREAM, 0);
+ if (s < 0)
+ goto close_map;
+
+ /* Fail connect(), but trigger transport assignment. */
+ init_addr_loopback(AF_VSOCK, &addr, &alen);
+ if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect"))
+ goto close_sock;
+
+ ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update");
+
+close_sock:
+ xclose(s);
+close_map:
+ xclose(map);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -1101,8 +1082,6 @@ void test_sockmap_basic(void)
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
if (test__start_subtest("sockmap skb_verdict shutdown"))
test_sockmap_skb_verdict_shutdown();
- if (test__start_subtest("sockmap stream parser and verdict pass"))
- test_sockmap_stream_pass();
if (test__start_subtest("sockmap skb_verdict fionread"))
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
@@ -1127,4 +1106,6 @@ void test_sockmap_basic(void)
test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
if (test__start_subtest("sockmap skb_verdict vsock poll"))
test_sockmap_skb_verdict_vsock_poll();
+ if (test__start_subtest("sockmap vsock unconnected"))
+ test_sockmap_vsock_unconnected();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index 3e5571dd578d..d815efac52fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -5,12 +5,15 @@
#define MAX_TEST_NAME 80
+#define u32(v) ((u32){(v)})
+#define u64(v) ((u64){(v)})
+
#define __always_unused __attribute__((__unused__))
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
@@ -18,7 +21,7 @@
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
@@ -26,7 +29,7 @@
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
@@ -35,7 +38,7 @@
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
@@ -43,7 +46,7 @@
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
@@ -66,21 +69,15 @@
__ret; \
})
-static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+static inline int add_to_sockmap(int mapfd, int fd1, int fd2)
{
- u64 value;
- u32 key;
int err;
- key = 0;
- value = fd1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST);
if (err)
return err;
- key = 1;
- value = fd2;
- return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST);
}
#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 2d0796314862..b87e7f39e15a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -3,77 +3,62 @@
/*
* Tests for sockmap/sockhash holding kTLS sockets.
*/
-
+#include <error.h>
#include <netinet/tcp.h>
+#include <linux/tls.h>
#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_ktls.skel.h"
#define MAX_TEST_NAME 80
#define TCP_ULP 31
-static int tcp_server(int family)
+static int init_ktls_pairs(int c, int p)
{
- int err, s;
-
- s = socket(family, SOCK_STREAM, 0);
- if (!ASSERT_GE(s, 0, "socket"))
- return -1;
-
- err = listen(s, SOMAXCONN);
- if (!ASSERT_OK(err, "listen"))
- return -1;
-
- return s;
-}
+ int err;
+ struct tls12_crypto_info_aes_gcm_128 crypto_rx;
+ struct tls12_crypto_info_aes_gcm_128 crypto_tx;
-static int disconnect(int fd)
-{
- struct sockaddr unspec = { AF_UNSPEC };
+ err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
- return connect(fd, &unspec, sizeof(unspec));
+ err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
+
+ memset(&crypto_rx, 0, sizeof(crypto_rx));
+ memset(&crypto_tx, 0, sizeof(crypto_tx));
+ crypto_rx.info.version = TLS_1_2_VERSION;
+ crypto_tx.info.version = TLS_1_2_VERSION;
+ crypto_rx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ crypto_tx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+ err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_tx, sizeof(crypto_tx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_TX)"))
+ goto out;
+
+ err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_rx, sizeof(crypto_rx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_RX)"))
+ goto out;
+ return 0;
+out:
+ return -1;
}
-/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
-static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
+static int create_ktls_pairs(int family, int sotype, int *c, int *p)
{
- struct sockaddr_storage addr = {0};
- socklen_t len = sizeof(addr);
- int err, cli, srv, zero = 0;
-
- srv = tcp_server(family);
- if (srv == -1)
- return;
-
- err = getsockname(srv, (struct sockaddr *)&addr, &len);
- if (!ASSERT_OK(err, "getsockopt"))
- goto close_srv;
-
- cli = socket(family, SOCK_STREAM, 0);
- if (!ASSERT_GE(cli, 0, "socket"))
- goto close_srv;
+ int err;
- err = connect(cli, (struct sockaddr *)&addr, len);
- if (!ASSERT_OK(err, "connect"))
- goto close_cli;
-
- err = bpf_map_update_elem(map, &zero, &cli, 0);
- if (!ASSERT_OK(err, "bpf_map_update_elem"))
- goto close_cli;
-
- err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
- if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
- goto close_cli;
-
- err = bpf_map_delete_elem(map, &zero);
- if (!ASSERT_OK(err, "bpf_map_delete_elem"))
- goto close_cli;
-
- err = disconnect(cli);
- ASSERT_OK(err, "disconnect");
+ err = create_pair(family, sotype, c, p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ return -1;
-close_cli:
- close(cli);
-close_srv:
- close(srv);
+ err = init_ktls_pairs(*c, *p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ return -1;
+ return 0;
}
static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map)
@@ -146,6 +131,278 @@ static const char *fmt_test_name(const char *subtest_name, int family,
return test_name;
}
+static void test_sockmap_ktls_offload(int family, int sotype)
+{
+ int err;
+ int c = 0, p = 0, sent, recvd;
+ char msg[12] = "hello world\0";
+ char rcv[13];
+
+ err = create_ktls_pairs(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_ktls_pairs()"))
+ goto out;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_OK(err, "send(msg)"))
+ goto out;
+
+ recvd = recv(p, rcv, sizeof(rcv), 0);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sent, "length mismatch"))
+ goto out;
+
+ ASSERT_OK(memcmp(msg, rcv, sizeof(msg)), "data mismatch");
+
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+}
+
+static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push)
+{
+ int err, off;
+ int i, j;
+ int start_push = 0, push_len = 0;
+ int c = 0, p = 0, one = 1, sent, recvd;
+ int prog_fd, map_fd;
+ char msg[12] = "hello world\0";
+ char rcv[20] = {0};
+ struct test_sockmap_ktls *skel;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ skel->bss->cork_byte = sizeof(msg);
+ if (push) {
+ start_push = 1;
+ push_len = 2;
+ }
+ skel->bss->push_start = start_push;
+ skel->bss->push_end = push_len;
+
+ off = sizeof(msg) / 2;
+ sent = send(c, msg, off, 0);
+ if (!ASSERT_EQ(sent, off, "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "expected no data"))
+ goto out;
+
+ /* send remaining msg */
+ sent = send(c, msg + off, sizeof(msg) - off, 0);
+ if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch"))
+ goto out;
+
+ for (i = 0, j = 0; i < recvd;) {
+ /* skip checking the data that has been pushed in */
+ if (i >= start_push && i <= start_push + push_len - 1) {
+ i++;
+ continue;
+ }
+ if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch"))
+ goto out;
+ i++;
+ j++;
+ }
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
+static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push)
+{
+ int c = -1, p = -1, one = 1, two = 2;
+ struct test_sockmap_ktls *skel;
+ unsigned char *data = NULL;
+ struct msghdr msg = {0};
+ struct iovec iov[2];
+ int prog_fd, map_fd;
+ int txrx_buf = 1024;
+ int iov_length = 8192;
+ int err;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int));
+ err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int));
+ if (!ASSERT_OK(err, "set buf limit"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+ goto out;
+
+ skel->bss->apply_bytes = 1024;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ data = calloc(iov_length, sizeof(char));
+ if (!data)
+ goto out;
+
+ iov[0].iov_base = data;
+ iov[0].iov_len = iov_length;
+ iov[1].iov_base = data;
+ iov[1].iov_len = iov_length;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 2;
+
+ for (;;) {
+ err = sendmsg(c, &msg, MSG_DONTWAIT);
+ if (err <= 0)
+ break;
+ }
+
+out:
+ if (data)
+ free(data);
+ if (c != -1)
+ close(c);
+ if (p != -1)
+ close(p);
+
+ test_sockmap_ktls__destroy(skel);
+}
+
+static void test_sockmap_ktls_tx_pop(int family, int sotype)
+{
+ char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0";
+ int c = 0, p = 0, one = 1, sent, recvd;
+ struct test_sockmap_ktls *skel;
+ int prog_fd, map_fd;
+ char rcv[50] = {0};
+ int err;
+ int i, m, r;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ struct {
+ int pop_start;
+ int pop_len;
+ } pop_policy[] = {
+ /* trim the start */
+ {0, 2},
+ {0, 10},
+ {1, 2},
+ {1, 10},
+ /* trim the end */
+ {35, 2},
+ /* New entries should be added before this line */
+ {-1, -1},
+ };
+
+ i = 0;
+ while (pop_policy[i].pop_start >= 0) {
+ skel->bss->pop_start = pop_policy[i].pop_start;
+ skel->bss->pop_end = pop_policy[i].pop_len;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch"))
+ goto out;
+
+ /* verify the data
+ * msg: 0123456789a bcdefghij klmnopqrstuvwxyz
+ * | |
+ * popped data
+ */
+ for (m = 0, r = 0; m < sizeof(msg);) {
+ /* skip checking the data that has been popped */
+ if (m >= pop_policy[i].pop_start &&
+ m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) {
+ m++;
+ continue;
+ }
+
+ if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch"))
+ goto out;
+ m++;
+ r++;
+ }
+ i++;
+ }
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
static void run_tests(int family, enum bpf_map_type map_type)
{
int map;
@@ -154,18 +411,32 @@ static void run_tests(int family, enum bpf_map_type map_type)
if (!ASSERT_GE(map, 0, "bpf_map_create"))
return;
- if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type)))
- test_sockmap_ktls_disconnect_after_delete(family, map);
if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type)))
test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map);
close(map);
}
+static void run_ktls_test(int family, int sotype)
+{
+ if (test__start_subtest("tls simple offload"))
+ test_sockmap_ktls_offload(family, sotype);
+ if (test__start_subtest("tls tx cork"))
+ test_sockmap_ktls_tx_cork(family, sotype, false);
+ if (test__start_subtest("tls tx cork with push"))
+ test_sockmap_ktls_tx_cork(family, sotype, true);
+ if (test__start_subtest("tls tx egress with no buf"))
+ test_sockmap_ktls_tx_no_buf(family, sotype, true);
+ if (test__start_subtest("tls tx with pop"))
+ test_sockmap_ktls_tx_pop(family, sotype);
+}
+
void test_sockmap_ktls(void)
{
run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
+ run_ktls_test(AF_INET, SOCK_STREAM);
+ run_ktls_test(AF_INET6, SOCK_STREAM);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 4ee1148d22be..f1bdccc7e4e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map)
goto close;
n = xsend(c1, buf, sizeof(buf), 0);
+ if (n == -1)
+ goto close;
if (n < sizeof(buf))
FAIL("incomplete write");
@@ -1366,237 +1368,6 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
-static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
- int sock_mapfd, int nop_mapfd,
- int verd_mapfd, enum redir_mode mode,
- int send_flags)
-{
- const char *log_prefix = redir_mode_str(mode);
- unsigned int pass;
- int err, n;
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
-
- err = add_to_sockmap(sock_mapfd, peer0, peer1);
- if (err)
- return;
-
- if (nop_mapfd >= 0) {
- err = add_to_sockmap(nop_mapfd, cli0, cli1);
- if (err)
- return;
- }
-
- /* Last byte is OOB data when send_flags has MSG_OOB bit set */
- n = xsend(cli1, "ab", 2, send_flags);
- if (n >= 0 && n < 2)
- FAIL("%s: incomplete send", log_prefix);
- if (n < 2)
- return;
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- return;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
- if (n < 0)
- FAIL_ERRNO("%s: recv_timeout", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
-
- if (send_flags & MSG_OOB) {
- /* Check that we can't read OOB while in sockmap */
- errno = 0;
- n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
- if (n != -1 || errno != EOPNOTSUPP)
- FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d",
- log_prefix, n, errno);
-
- /* Remove peer1 from sockmap */
- xbpf_map_delete_elem(sock_mapfd, &(int){ 1 });
-
- /* Check that OOB was dropped on redirect */
- errno = 0;
- n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
- if (n != -1 || errno != EINVAL)
- FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d",
- log_prefix, n, errno);
- }
-}
-
-static void unix_redir_to_connected(int sotype, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int sfd[2];
-
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- return;
- c0 = sfd[0], p0 = sfd[1];
-
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- goto close0;
- c1 = sfd[0], p1 = sfd[1];
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close0:
- xclose(c0);
- xclose(p0);
-}
-
-static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int sotype)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
- skel->bss->test_ingress = true;
- unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int sotype)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(AF_UNIX);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
- unix_skb_redir_to_connected(skel, map, sotype);
-}
-
-/* Returns two connected loopback vsock sockets */
-static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
-{
- return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
-}
-
-static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
- enum redir_mode mode, int sotype)
-{
- const char *log_prefix = redir_mode_str(mode);
- char a = 'a', b = 'b';
- int u0, u1, v0, v1;
- int sfd[2];
- unsigned int pass;
- int err, n;
- u32 key;
-
- zero_verdict_count(verd_mapfd);
-
- if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
- return;
-
- u0 = sfd[0];
- u1 = sfd[1];
-
- err = vsock_socketpair_connectible(sotype, &v0, &v1);
- if (err) {
- FAIL("vsock_socketpair_connectible() failed");
- goto close_uds;
- }
-
- err = add_to_sockmap(sock_mapfd, u0, v0);
- if (err) {
- FAIL("add_to_sockmap failed");
- goto close_vsock;
- }
-
- n = write(v1, &a, sizeof(a));
- if (n < 0)
- FAIL_ERRNO("%s: write", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete write", log_prefix);
- if (n < 1)
- goto out;
-
- n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
- if (n < 0)
- FAIL("%s: recv() err, errno=%d", log_prefix, errno);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
- if (b != a)
- FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- goto out;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-out:
- key = 0;
- bpf_map_delete_elem(sock_mapfd, &key);
- key = 1;
- bpf_map_delete_elem(sock_mapfd, &key);
-
-close_vsock:
- close(v0);
- close(v1);
-
-close_uds:
- close(u0);
- close(u1);
-}
-
-static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map,
- int sotype)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
- skel->bss->test_ingress = true;
- vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(AF_VSOCK);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
-
- vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
- vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
-}
-
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1637,224 +1408,6 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
-static int inet_socketpair(int family, int type, int *s, int *c)
-{
- return create_pair(family, type | SOCK_NONBLOCK, s, c);
-}
-
-static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
- enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int err;
-
- err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
- if (err)
- return;
- err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
- if (err)
- goto close_cli0;
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close_cli0:
- xclose(c0);
- xclose(p0);
-}
-
-static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
- skel->bss->test_ingress = true;
- udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(family);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
- udp_skb_redir_to_connected(skel, map, family);
-}
-
-static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int sfd[2];
- int err;
-
- if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
- return;
- c0 = sfd[0], p0 = sfd[1];
-
- err = inet_socketpair(family, type, &p1, &c1);
- if (err)
- goto close;
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close:
- xclose(c0);
- xclose(p0);
-}
-
-static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_EGRESS);
- inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
- REDIR_EGRESS);
- skel->bss->test_ingress = true;
- inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_INGRESS);
- inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
- REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
- int nop_mapfd, int verd_mapfd,
- enum redir_mode mode, int send_flags)
-{
- int c0, c1, p0, p1;
- int sfd[2];
- int err;
-
- err = inet_socketpair(family, type, &p0, &c0);
- if (err)
- return;
-
- if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
- goto close_cli0;
- c1 = sfd[0], p1 = sfd[1];
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd,
- verd_mapfd, mode, send_flags);
-
- xclose(c1);
- xclose(p1);
-close_cli0:
- xclose(c0);
- xclose(p0);
-}
-
-static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int nop_map = bpf_map__fd(skel->maps.nop_map);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, -1, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, -1, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
-
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
-
- /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, MSG_OOB);
-
- skel->bss->test_ingress = true;
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, -1, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, -1, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
-
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
-
- /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, MSG_OOB);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
-{
- const char *family_name, *map_name;
- struct netns_obj *netns;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(family);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
-
- netns = netns_new("sockmap_listen", true);
- if (!ASSERT_OK_PTR(netns, "netns_new"))
- return;
-
- inet_unix_skb_redir_to_connected(skel, map, family);
- unix_inet_skb_redir_to_connected(skel, map, family);
-
- netns_free(netns);
-}
-
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1863,8 +1416,6 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
- test_udp_redir(skel, map, family);
- test_udp_unix_redir(skel, map, family);
}
void serial_test_sockmap_listen(void)
@@ -1880,16 +1431,10 @@ void serial_test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
- test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
- test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
- test_vsock_redir(skel, skel->maps.sock_map);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
- test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
- test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
- test_vsock_redir(skel, skel->maps.sock_hash);
test_sockmap_listen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
new file mode 100644
index 000000000000..9c461d93113d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for sockmap/sockhash redirection.
+ *
+ * BPF_MAP_TYPE_SOCKMAP
+ * BPF_MAP_TYPE_SOCKHASH
+ * x
+ * sk_msg-to-egress
+ * sk_msg-to-ingress
+ * sk_skb-to-egress
+ * sk_skb-to-ingress
+ * x
+ * AF_INET, SOCK_STREAM
+ * AF_INET6, SOCK_STREAM
+ * AF_INET, SOCK_DGRAM
+ * AF_INET6, SOCK_DGRAM
+ * AF_UNIX, SOCK_STREAM
+ * AF_UNIX, SOCK_DGRAM
+ * AF_VSOCK, SOCK_STREAM
+ * AF_VSOCK, SOCK_SEQPACKET
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <linux/string.h>
+#include <linux/vm_sockets.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "linux/const.h"
+#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_sockmap_redir.skel.h"
+
+/* The meaning of SUPPORTED is "will redirect packet as expected".
+ */
+#define SUPPORTED _BITUL(0)
+
+/* Note on sk_skb-to-ingress ->af_vsock:
+ *
+ * Peer socket may receive the packet some time after the return from sendmsg().
+ * In a typical usage scenario, recvmsg() will block until the redirected packet
+ * appears in the destination queue, or timeout if the packet was dropped. By
+ * that point, the verdict map has already been updated to reflect what has
+ * happened.
+ *
+ * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
+ * takes place. Which means we may race the execution of the verdict logic and
+ * read map_verd before it has been updated, i.e. we might observe
+ * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
+ *
+ * This confuses the selftest logic: if there was no packet dropped, where's the
+ * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
+ * (which implies the verdict program has not been ran) just re-read the verdict
+ * map again.
+ */
+#define UNSUPPORTED_RACY_VERD _BITUL(1)
+
+enum prog_type {
+ SK_MSG_EGRESS,
+ SK_MSG_INGRESS,
+ SK_SKB_EGRESS,
+ SK_SKB_INGRESS,
+};
+
+enum {
+ SEND_INNER = 0,
+ SEND_OUTER,
+};
+
+enum {
+ RECV_INNER = 0,
+ RECV_OUTER,
+};
+
+struct maps {
+ int in;
+ int out;
+ int verd;
+};
+
+struct combo_spec {
+ enum prog_type prog_type;
+ const char *in, *out;
+};
+
+struct redir_spec {
+ const char *name;
+ int idx_send;
+ int idx_recv;
+ enum prog_type prog_type;
+};
+
+struct socket_spec {
+ int family;
+ int sotype;
+ int send_flags;
+ int in[2];
+ int out[2];
+};
+
+static int socket_spec_pairs(struct socket_spec *s)
+{
+ return create_socket_pairs(s->family, s->sotype,
+ &s->in[0], &s->out[0],
+ &s->in[1], &s->out[1]);
+}
+
+static void socket_spec_close(struct socket_spec *s)
+{
+ xclose(s->in[0]);
+ xclose(s->in[1]);
+ xclose(s->out[0]);
+ xclose(s->out[1]);
+}
+
+static void get_redir_params(struct redir_spec *redir,
+ struct test_sockmap_redir *skel, int *prog_fd,
+ enum bpf_attach_type *attach_type,
+ int *redirect_flags)
+{
+ enum prog_type type = redir->prog_type;
+ struct bpf_program *prog;
+ bool sk_msg;
+
+ sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
+ prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;
+
+ *prog_fd = bpf_program__fd(prog);
+ *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;
+
+ if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
+ *redirect_flags = BPF_F_INGRESS;
+ else
+ *redirect_flags = 0;
+}
+
+static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
+{
+ ssize_t n;
+ char buf;
+
+ errno = 0;
+ n = recv(fd, &buf, 1, flags);
+ if (n < 0 && expect_success)
+ FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
+ if (!n && !expect_success)
+ FAIL("%s: expected failure: retval=%zd", prefix, n);
+}
+
+static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
+ int sd_recv, int map_verd, int status)
+{
+ unsigned int drop, pass;
+ char recv_buf;
+ ssize_t n;
+
+get_verdict:
+ if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
+ return;
+
+ if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
+ sched_yield();
+ goto get_verdict;
+ }
+
+ if (pass != 0) {
+ FAIL("unsupported: wanted verdict pass 0, have %u", pass);
+ return;
+ }
+
+ /* If nothing was dropped, packet should have reached the peer */
+ if (drop == 0) {
+ errno = 0;
+ n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1)
+ FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
+ }
+
+ /* Ensure queues are empty */
+ try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
+ if (sd_in != sd_send)
+ try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);
+
+ try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
+ if (sd_recv != sd_out)
+ try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
+}
+
+static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
+ int sd_in, int sd_out, int sd_recv,
+ struct maps *maps, int status)
+{
+ unsigned int drop, pass;
+ char *send_buf = "ab";
+ char recv_buf = '\0';
+ ssize_t n, len = 1;
+
+ /* Zero out the verdict map */
+ if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
+ xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
+ return;
+
+ if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
+ return;
+
+ if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
+ goto del_in;
+
+ /* Last byte is OOB data when send_flags has MSG_OOB bit set */
+ if (send_flags & MSG_OOB)
+ len++;
+ n = send(sd_send, send_buf, len, send_flags);
+ if (n >= 0 && n < len)
+ FAIL("incomplete send");
+ if (n < 0) {
+ /* sk_msg redirect combo not supported? */
+ if (status & SUPPORTED || errno != EACCES)
+ FAIL_ERRNO("send");
+ goto out;
+ }
+
+ if (!(status & SUPPORTED)) {
+ handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
+ maps->verd, status);
+ goto out;
+ }
+
+ errno = 0;
+ n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1) {
+ FAIL_ERRNO("recv_timeout()");
+ goto out;
+ }
+
+ /* Check verdict _after_ recv(); af_vsock may need time to catch up */
+ if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
+ goto out;
+
+ if (drop != 0 || pass != 1)
+ FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
+ drop, pass);
+
+ if (recv_buf != send_buf[0])
+ FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);
+
+ if (send_flags & MSG_OOB) {
+ /* Fail reading OOB while in sockmap */
+ try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ /* Remove sd_out from sockmap */
+ xbpf_map_delete_elem(maps->out, &u32(0));
+
+ /* Check that OOB was dropped on redirect */
+ try_recv("recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ goto del_in;
+ }
+out:
+ xbpf_map_delete_elem(maps->out, &u32(0));
+del_in:
+ xbpf_map_delete_elem(maps->in, &u32(0));
+}
+
+static int is_redir_supported(enum prog_type type, const char *in,
+ const char *out)
+{
+ /* Matching based on strings returned by socket_kind_to_str():
+ * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
+ * Plus a wildcard: any
+ * Not in use: u_seq, v_dgr
+ */
+ struct combo_spec *c, combos[] = {
+ /* Send to local: TCP -> any, but vsock */
+ { SK_MSG_INGRESS, "tcp", "tcp" },
+ { SK_MSG_INGRESS, "tcp", "udp" },
+ { SK_MSG_INGRESS, "tcp", "u_str" },
+ { SK_MSG_INGRESS, "tcp", "u_dgr" },
+
+ /* Send to egress: TCP -> TCP */
+ { SK_MSG_EGRESS, "tcp", "tcp" },
+
+ /* Ingress to egress: any -> any */
+ { SK_SKB_EGRESS, "any", "any" },
+
+ /* Ingress to local: any -> any, but vsock */
+ { SK_SKB_INGRESS, "any", "tcp" },
+ { SK_SKB_INGRESS, "any", "udp" },
+ { SK_SKB_INGRESS, "any", "u_str" },
+ { SK_SKB_INGRESS, "any", "u_dgr" },
+ };
+
+ for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
+ if (c->prog_type == type &&
+ (!strcmp(c->in, "any") || strstarts(in, c->in)) &&
+ (!strcmp(c->out, "any") || strstarts(out, c->out)))
+ return SUPPORTED;
+ }
+
+ return 0;
+}
+
+static int get_support_status(enum prog_type type, const char *in,
+ const char *out)
+{
+ int status = is_redir_supported(type, in, out);
+
+ if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
+ status |= UNSUPPORTED_RACY_VERD;
+
+ return status;
+}
+
+static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps, struct socket_spec *s_in,
+ struct socket_spec *s_out)
+{
+ int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
+ const char *in_str, *out_str;
+ char s[MAX_TEST_NAME];
+
+ fd_in = s_in->in[0];
+ fd_out = s_out->out[0];
+ fd_send = s_in->in[redir->idx_send];
+ fd_peer = s_in->in[redir->idx_send ^ 1];
+ fd_recv = s_out->out[redir->idx_recv];
+ flags = s_in->send_flags;
+
+ in_str = socket_kind_to_str(fd_in);
+ out_str = socket_kind_to_str(fd_out);
+ status = get_support_status(redir->prog_type, in_str, out_str);
+
+ snprintf(s, sizeof(s),
+ "%-4s %-17s %-5s %s %-5s%6s",
+ /* hash sk_skb-to-ingress u_str → v_str (OOB) */
+ type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
+ redir->name,
+ in_str,
+ status & SUPPORTED ? "→" : " ",
+ out_str,
+ (flags & MSG_OOB) ? "(OOB)" : "");
+
+ if (!test__start_subtest(s))
+ return;
+
+ test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
+ maps, status);
+}
+
+static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps)
+{
+ struct socket_spec *s, sockets[] = {
+ { AF_INET, SOCK_STREAM },
+ // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
+ { AF_INET6, SOCK_STREAM },
+ { AF_INET, SOCK_DGRAM },
+ { AF_INET6, SOCK_DGRAM },
+ { AF_UNIX, SOCK_STREAM },
+ { AF_UNIX, SOCK_STREAM, MSG_OOB },
+ { AF_UNIX, SOCK_DGRAM },
+ // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */
+ { AF_VSOCK, SOCK_STREAM },
+ // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */
+ { AF_VSOCK, SOCK_SEQPACKET },
+ };
+
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ if (socket_spec_pairs(s))
+ goto out;
+
+ /* Intra-proto */
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ test_socket(type, redir, maps, s, s);
+
+ /* Cross-proto */
+ for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
+ for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
+ struct socket_spec *out = &sockets[j];
+ struct socket_spec *in = &sockets[i];
+
+ /* Skip intra-proto and between variants */
+ if (out->send_flags ||
+ (in->family == out->family &&
+ in->sotype == out->sotype))
+ continue;
+
+ test_socket(type, redir, maps, in, out);
+ }
+ }
+out:
+ while (--s >= sockets)
+ socket_spec_close(s);
+}
+
+static void test_map(enum bpf_map_type type)
+{
+ struct redir_spec *r, redirs[] = {
+ { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
+ { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
+ { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
+ { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
+ };
+
+ for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
+ enum bpf_attach_type attach_type;
+ struct test_sockmap_redir *skel;
+ struct maps maps;
+ int prog_fd;
+
+ skel = test_sockmap_redir__open_and_load();
+ if (!skel) {
+ FAIL("open_and_load");
+ return;
+ }
+
+ switch (type) {
+ case BPF_MAP_TYPE_SOCKMAP:
+ maps.in = bpf_map__fd(skel->maps.nop_map);
+ maps.out = bpf_map__fd(skel->maps.sock_map);
+ break;
+ case BPF_MAP_TYPE_SOCKHASH:
+ maps.in = bpf_map__fd(skel->maps.nop_hash);
+ maps.out = bpf_map__fd(skel->maps.sock_hash);
+ break;
+ default:
+ FAIL("Unsupported bpf_map_type");
+ return;
+ }
+
+ skel->bss->redirect_type = type;
+ maps.verd = bpf_map__fd(skel->maps.verdict_map);
+ get_redir_params(r, skel, &prog_fd, &attach_type,
+ &skel->bss->redirect_flags);
+
+ if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
+ return;
+
+ test_redir(type, r, &maps);
+
+ if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
+ return;
+
+ test_sockmap_redir__destroy(skel);
+ }
+}
+
+void serial_test_sockmap_redir(void)
+{
+ test_map(BPF_MAP_TYPE_SOCKMAP);
+ test_map(BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
new file mode 100644
index 000000000000..621b3b71888e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <netinet/tcp.h>
+#include <test_progs.h>
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_strp.skel.h"
+
+#define STRP_PKT_HEAD_LEN 4
+#define STRP_PKT_BODY_LEN 6
+#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN)
+
+static const char packet[STRP_PKT_FULL_LEN] = "head+body\0";
+static const int test_packet_num = 100;
+
+/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready
+ * with sk held if an skb exists in sk_receive_queue. Then for the
+ * data_ready implementation of strparser, it will delay the read
+ * operation if sk is held and EAGAIN is returned.
+ */
+static int sockmap_strp_consume_pre_data(int p)
+{
+ int recvd;
+ bool retried = false;
+ char rcv[10];
+
+retry:
+ errno = 0;
+ recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1);
+ if (recvd < 0 && errno == EAGAIN && retried == false) {
+ /* On the first call, EAGAIN will certainly be returned.
+ * A 1-second wait is enough for the workqueue to finish.
+ */
+ sleep(1);
+ retried = true;
+ goto retry;
+ }
+
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "data mismatch"))
+ return -1;
+ return 0;
+}
+
+static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass,
+ bool need_parser)
+{
+ struct test_sockmap_strp *strp = NULL;
+ int verdict, parser;
+ int err;
+
+ strp = test_sockmap_strp__open_and_load();
+ *out_map = bpf_map__fd(strp->maps.sock_map);
+
+ if (need_parser)
+ parser = bpf_program__fd(strp->progs.prog_skb_parser_partial);
+ else
+ parser = bpf_program__fd(strp->progs.prog_skb_parser);
+
+ if (pass)
+ verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass);
+ else
+ verdict = bpf_program__fd(strp->progs.prog_skb_verdict);
+
+ err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
+ goto err;
+
+ err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+ goto err;
+
+ return strp;
+err:
+ test_sockmap_strp__destroy(strp);
+ return NULL;
+}
+
+/* Dispatch packets to different socket by packet size:
+ *
+ * ------ ------
+ * | pkt4 || pkt1 |... > remote socket
+ * ------ ------ / ------ ------
+ * | pkt8 | pkt7 |...
+ * ------ ------ \ ------ ------
+ * | pkt3 || pkt2 |... > local socket
+ * ------ ------
+ */
+static void test_sockmap_strp_dispatch_pkt(int family, int sotype)
+{
+ int i, j, zero = 0, one = 1, recvd;
+ int err, map;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct test_sockmap_strp *strp = NULL;
+ int test_cnt = 6;
+ char rcv[10];
+ struct {
+ char data[7];
+ int data_len;
+ int send_cnt;
+ int *receiver;
+ } send_dir[2] = {
+ /* data expected to deliver to local */
+ {"llllll", 6, 0, &p0},
+ /* data expected to deliver to remote */
+ {"rrrrr", 5, 0, &c1}
+ };
+
+ strp = sockmap_strp_init(&map, false, false);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto out_close;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto out_close;
+
+ err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+ if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)"))
+ goto out_close;
+
+ /* deliver data with data size greater than 5 to local */
+ strp->data->verdict_max_size = 5;
+
+ for (i = 0; i < test_cnt; i++) {
+ int d = i % 2;
+
+ xsend(c0, send_dir[d].data, send_dir[d].data_len, 0);
+ send_dir[d].send_cnt++;
+ }
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < send_dir[i].send_cnt; j++) {
+ int expected = send_dir[i].data_len;
+
+ recvd = recv_timeout(*send_dir[i].receiver, rcv,
+ expected, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, expected, "recv_timeout()"))
+ goto out_close;
+ if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd),
+ "data mismatch"))
+ goto out_close;
+ }
+ }
+out_close:
+ close(c0);
+ close(c1);
+ close(p0);
+ close(p1);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* We have multiple packets in one skb
+ * ------------ ------------ ------------
+ * | packet1 | packet2 | ...
+ * ------------ ------------ ------------
+ */
+static void test_sockmap_strp_multiple_pkt(int family, int sotype)
+{
+ int i, zero = 0;
+ int sent, recvd, total;
+ int err, map;
+ int c = -1, p = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char *snd = NULL, *rcv = NULL;
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+ goto out_close;
+
+ /* construct multiple packets in one buffer */
+ total = test_packet_num * STRP_PKT_FULL_LEN;
+ snd = malloc(total);
+ rcv = malloc(total + 1);
+ if (!ASSERT_TRUE(snd, "malloc(snd)") ||
+ !ASSERT_TRUE(rcv, "malloc(rcv)"))
+ goto out_close;
+
+ for (i = 0; i < test_packet_num; i++) {
+ memcpy(snd + i * STRP_PKT_FULL_LEN,
+ packet, STRP_PKT_FULL_LEN);
+ }
+
+ sent = xsend(c, snd, total, 0);
+ if (!ASSERT_EQ(sent, total, "xsend(c)"))
+ goto out_close;
+
+ /* try to recv one more byte to avoid truncation check */
+ recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, total, "recv(rcv)"))
+ goto out_close;
+
+ /* we sent TCP segment with multiple encapsulation
+ * then check whether packets are handled correctly
+ */
+ if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch"))
+ goto out_close;
+
+out_close:
+ close(c);
+ close(p);
+ if (snd)
+ free(snd);
+ if (rcv)
+ free(rcv);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with partial read */
+static void test_sockmap_strp_partial_read(int family, int sotype)
+{
+ int zero = 0, recvd, off;
+ int err, map;
+ int c = -1, p = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ /* sk_data_ready of 'p' will be replaced by strparser handler */
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+ goto out_close;
+
+ /* 1.1 send partial head, 1 byte header left */
+ off = STRP_PKT_HEAD_LEN - 1;
+ xsend(c, packet, off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data"))
+ goto out_close;
+
+ /* 1.2 send remaining head and body */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+ goto out_close;
+
+ /* 2.1 send partial head, 1 byte header left */
+ off = STRP_PKT_HEAD_LEN - 1;
+ xsend(c, packet, off, 0);
+
+ /* 2.2 send remaining head and partial body, 1 byte body left */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0);
+ off = STRP_PKT_FULL_LEN - 1;
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data"))
+ goto out_close;
+
+ /* 2.3 send remaining body */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+ goto out_close;
+
+out_close:
+ close(c);
+ close(p);
+
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test simple socket read/write with strparser + FIONREAD */
+static void test_sockmap_strp_pass(int family, int sotype, bool fionread)
+{
+ int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail;
+ int err, map;
+ int c = -1, p = -1;
+ int test_cnt = 10, i;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ /* inject some data before bpf process, it should be read
+ * correctly because we check sk_receive_queue in
+ * tcp_bpf_recvmsg_parser().
+ */
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)"))
+ goto out_close;
+
+ /* sk_data_ready of 'p' will be replaced by strparser handler */
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+ goto out_close;
+
+ /* consume previous data we injected */
+ if (sockmap_strp_consume_pre_data(p))
+ goto out_close;
+
+ /* Previously, we encountered issues such as deadlocks and
+ * sequence errors that resulted in the inability to read
+ * continuously. Therefore, we perform multiple iterations
+ * of testing here.
+ */
+ for (i = 0; i < test_cnt; i++) {
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "xsend(c)"))
+ goto out_close;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") ||
+ !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+ "memcmp, data mismatch"))
+ goto out_close;
+ }
+
+ if (fionread) {
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)"))
+ goto out_close;
+
+ err = ioctl(p, FIONREAD, &avail);
+ if (!ASSERT_OK(err, "ioctl(FIONREAD) error") ||
+ !ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)"))
+ goto out_close;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") ||
+ !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+ "second memcmp, data mismatch"))
+ goto out_close;
+ }
+
+out_close:
+ close(c);
+ close(p);
+
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with verdict mode */
+static void test_sockmap_strp_verdict(int family, int sotype)
+{
+ int zero = 0, one = 1, sent, recvd, off;
+ int err, map;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, false, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ /* We simulate a reverse proxy server.
+ * When p0 receives data from c0, we forward it to c1.
+ * From c1's perspective, it will consider this data
+ * as being sent by p1.
+ */
+ err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto out_close;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto out_close;
+
+ sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+ if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)"))
+ goto out_close;
+
+ recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "received data does not match the sent data"))
+ goto out_close;
+
+ /* send again to ensure the stream is functioning correctly. */
+ sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+ if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)"))
+ goto out_close;
+
+ /* partial read */
+ off = STRP_PKT_FULL_LEN / 2;
+ recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "partial received data does not match the sent data"))
+ goto out_close;
+
+out_close:
+ close(c0);
+ close(c1);
+ close(p0);
+ close(p1);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+void test_sockmap_strp(void)
+{
+ if (test__start_subtest("sockmap strp tcp pass"))
+ test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false);
+ if (test__start_subtest("sockmap strp tcp v6 pass"))
+ test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false);
+ if (test__start_subtest("sockmap strp tcp pass fionread"))
+ test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true);
+ if (test__start_subtest("sockmap strp tcp v6 pass fionread"))
+ test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true);
+ if (test__start_subtest("sockmap strp tcp verdict"))
+ test_sockmap_strp_verdict(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp v6 verdict"))
+ test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp partial read"))
+ test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp multiple packets"))
+ test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp dispatch"))
+ test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index 2b0068742ef9..254fbfeab06a 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
const char *err_msg;
} spin_lock_fail_tests[] = {
{ "lock_id_kptr_preserve",
- "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
- "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "5: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2) "
+ "R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=ptr_ expected=percpu_ptr_" },
{ "lock_id_global_zero",
- "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
+ "; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
- " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
- " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
+ " R0=map_value(id=1,map=array_map,ks=4,vs=8)"
+ " R1=map_value(id=1,map=array_map,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_innermapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
" R0=map_value(id=2,ks=4,vs=8)"
- " R1_w=map_value(id=2,ks=4,vs=8)\n"
+ " R1=map_value(id=2,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
@@ -50,6 +50,9 @@ static struct {
{ "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
{ "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" },
{ "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_helper_subprog", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_kfunc_subprog", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_subprog_indirect", "global function calls are not allowed while holding a lock" },
};
static int match_regex(const char *pattern, const char *string)
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index b7ba5cd47d96..271b5cc9fc01 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -39,7 +39,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 0832fd787457..b277dddd5af7 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -66,7 +66,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
new file mode 100644
index 000000000000..c9efdd2a5b18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_ips.skel.h"
+
+#ifdef __x86_64__
+static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...)
+{
+ __u64 ips[PERF_MAX_STACK_DEPTH];
+ struct ksyms *ksyms = NULL;
+ int i, err = 0;
+ va_list args;
+
+ /* sorted by addr */
+ ksyms = load_kallsyms_local();
+ if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
+ return -1;
+
+ /* unlikely, but... */
+ if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max"))
+ return -1;
+
+ err = bpf_map_lookup_elem(fd, &key, ips);
+ if (err)
+ goto out;
+
+ /*
+ * Compare all symbols provided via arguments with stacktrace ips,
+ * and their related symbol addresses.t
+ */
+ va_start(args, cnt);
+
+ for (i = 0; i < cnt; i++) {
+ unsigned long val;
+ struct ksym *ksym;
+
+ val = va_arg(args, unsigned long);
+ ksym = ksym_search_local(ksyms, ips[i]);
+ if (!ASSERT_OK_PTR(ksym, "ksym_search_local"))
+ break;
+ ASSERT_EQ(ksym->addr, val, "stack_cmp");
+ }
+
+ va_end(args);
+
+out:
+ free_kallsyms_local(ksyms);
+ return err;
+}
+
+static void test_stacktrace_ips_kprobe_multi(bool retprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts,
+ .retprobe = retprobe
+ );
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct stacktrace_ips *skel;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts(
+ skel->progs.kprobe_multi_test,
+ "bpf_testmod_stacktrace_test", &opts);
+ if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void test_stacktrace_ips_raw_tp(void)
+{
+ __u32 info_len = sizeof(struct bpf_prog_info);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_prog_info info = {};
+ struct stacktrace_ips *skel;
+ __u64 bpf_prog_ksym = 0;
+ int err;
+
+ skel = stacktrace_ips__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+ return;
+
+ if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.rawtp_test = bpf_program__attach_raw_tracepoint(
+ skel->progs.rawtp_test,
+ "bpf_testmod_test_read");
+ if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint"))
+ goto cleanup;
+
+ /* get bpf program address */
+ info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym);
+ info.nr_jited_ksyms = 1;
+ err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test),
+ &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto cleanup;
+
+ trigger_module_test_read(1);
+
+ load_kallsyms();
+
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2,
+ bpf_prog_ksym,
+ ksym_get_addr("bpf_trace_run2"));
+
+cleanup:
+ stacktrace_ips__destroy(skel);
+}
+
+static void __test_stacktrace_ips(void)
+{
+ if (test__start_subtest("kprobe_multi"))
+ test_stacktrace_ips_kprobe_multi(false);
+ if (test__start_subtest("kretprobe_multi"))
+ test_stacktrace_ips_kprobe_multi(true);
+ if (test__start_subtest("raw_tp"))
+ test_stacktrace_ips_raw_tp();
+}
+#else
+static void __test_stacktrace_ips(void)
+{
+ test__skip();
+}
+#endif
+
+void test_stacktrace_ips(void)
+{
+ __test_stacktrace_ips();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index df59e4ae2951..c23b97414813 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -1,46 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "stacktrace_map.skel.h"
void test_stacktrace_map(void)
{
+ struct stacktrace_map *skel;
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "oncpu";
- int err, prog_fd, stack_trace_len;
- const char *file = "./test_stacktrace_map.bpf.o";
- __u32 key, val, duration = 0;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link;
+ int err, stack_trace_len;
+ __u32 key, val, stack_id, duration = 0;
+ __u64 stack[PERF_MAX_STACK_DEPTH];
- err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ skel = stacktrace_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
- prog = bpf_object__find_program_by_name(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
- goto close_prog;
-
- link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
- if (!ASSERT_OK_PTR(link, "attach_tp"))
- goto close_prog;
-
- /* find map fds */
- control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (CHECK_FAIL(control_map_fd < 0))
- goto disable_pmu;
-
- stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (CHECK_FAIL(stackid_hmap_fd < 0))
- goto disable_pmu;
-
- stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (CHECK_FAIL(stackmap_fd < 0))
- goto disable_pmu;
-
- stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (CHECK_FAIL(stack_amap_fd < 0))
- goto disable_pmu;
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+ err = stacktrace_map__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
/* give some time for bpf program run */
sleep(1);
@@ -50,26 +31,32 @@ void test_stacktrace_map(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
-
-disable_pmu:
- bpf_link__destroy(link);
-close_prog:
- bpf_object__close(obj);
+ goto out;
+
+ stack_id = skel->bss->stack_id;
+ err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_OK(err, "lookup and delete target stack_id"))
+ goto out;
+
+ err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id"))
+ goto out;
+out:
+ stacktrace_map__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index c6ef06f55cdb..e985d51d3d47 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void)
{
const char *prog_name = "oncpu";
int control_map_fd, stackid_hmap_fd, stackmap_fd;
- const char *file = "./test_stacktrace_map.bpf.o";
+ const char *file = "./stacktrace_map.bpf.o";
__u32 key, val, duration = 0;
int err, prog_fd;
struct bpf_program *prog;
@@ -46,7 +46,7 @@ void test_stacktrace_map_raw_tp(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
index 1932b1e0685c..dc2ccf6a14d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -40,7 +40,7 @@ void test_stacktrace_map_skip(void)
skel->bss->control = 1;
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap"))
diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c
new file mode 100644
index 000000000000..c3cce5c292bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stream.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+
+#include "stream.skel.h"
+#include "stream_fail.skel.h"
+
+void test_stream_failure(void)
+{
+ RUN_TESTS(stream_fail);
+}
+
+void test_stream_success(void)
+{
+ RUN_TESTS(stream);
+ return;
+}
+
+void test_stream_syscall(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ struct stream *skel;
+ int ret, prog_fd;
+ char buf[64];
+
+ skel = stream__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.stream_syscall);
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+
+ ASSERT_LT(bpf_prog_stream_read(0, BPF_STREAM_STDOUT, buf, sizeof(buf), &ropts), 0, "error");
+ ret = -errno;
+ ASSERT_EQ(ret, -EINVAL, "bad prog_fd");
+
+ ASSERT_LT(bpf_prog_stream_read(prog_fd, 0, buf, sizeof(buf), &ropts), 0, "error");
+ ret = -errno;
+ ASSERT_EQ(ret, -ENOENT, "bad stream id");
+
+ ASSERT_LT(bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, NULL, sizeof(buf), NULL), 0, "error");
+ ret = -errno;
+ ASSERT_EQ(ret, -EFAULT, "bad stream buf");
+
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL);
+ ASSERT_EQ(ret, 2, "bytes");
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL);
+ ASSERT_EQ(ret, 1, "bytes");
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 1, &ropts);
+ ASSERT_EQ(ret, 0, "no bytes stdout");
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, 1, &ropts);
+ ASSERT_EQ(ret, 0, "no bytes stderr");
+
+ stream__destroy(skel);
+}
+
+static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ int ret, prog_fd;
+ char fault_addr[64];
+ char buf[1024];
+
+ prog_fd = bpf_program__fd(prog);
+
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+
+ sprintf(fault_addr, "0x%lx", *fault_addr_p);
+
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ ASSERT_LE(ret, 1023, "len for buf");
+ buf[ret] = '\0';
+
+ if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) {
+ fprintf(stderr, "Output from stream:\n%s\n", buf);
+ fprintf(stderr, "Fault Addr: %s\n", fault_addr);
+ }
+}
+
+void test_stream_arena_fault_address(void)
+{
+ struct stream *skel;
+
+#if !defined(__x86_64__) && !defined(__aarch64__)
+ printf("%s:SKIP: arena fault reporting not supported\n", __func__);
+ test__skip();
+ return;
+#endif
+
+ skel = stream__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+ return;
+
+ if (test__start_subtest("read_fault"))
+ test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr);
+ if (test__start_subtest("write_fault"))
+ test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr);
+
+ stream__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
new file mode 100644
index 000000000000..0f3bf594e7a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include <test_progs.h>
+#include "string_kfuncs_success.skel.h"
+#include "string_kfuncs_failure1.skel.h"
+#include "string_kfuncs_failure2.skel.h"
+#include <sys/mman.h>
+
+static const char * const test_cases[] = {
+ "strcmp",
+ "strcasecmp",
+ "strchr",
+ "strchrnul",
+ "strnchr",
+ "strrchr",
+ "strlen",
+ "strnlen",
+ "strspn_str",
+ "strspn_accept",
+ "strcspn_str",
+ "strcspn_reject",
+ "strstr",
+ "strcasestr",
+ "strnstr",
+ "strncasestr",
+};
+
+void run_too_long_tests(void)
+{
+ struct string_kfuncs_failure2 *skel;
+ struct bpf_program *prog;
+ char test_name[256];
+ int err, i;
+
+ skel = string_kfuncs_failure2__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load"))
+ return;
+
+ memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str));
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ sprintf(test_name, "test_%s_too_long", test_cases[i]);
+ if (!test__start_subtest(test_name))
+ continue;
+
+ prog = bpf_object__find_program_by_name(skel->obj, test_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto cleanup;
+
+ ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG");
+ }
+
+cleanup:
+ string_kfuncs_failure2__destroy(skel);
+}
+
+void test_string_kfuncs(void)
+{
+ RUN_TESTS(string_kfuncs_success);
+ RUN_TESTS(string_kfuncs_failure1);
+
+ run_too_long_tests();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/summarization.c b/tools/testing/selftests/bpf/prog_tests/summarization.c
new file mode 100644
index 000000000000..5dd6c120a838
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/summarization.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf/libbpf.h"
+#include "summarization_freplace.skel.h"
+#include "summarization.skel.h"
+#include <test_progs.h>
+
+static void print_verifier_log(const char *log)
+{
+ if (env.verbosity >= VERBOSE_VERY)
+ fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
+}
+
+static void test_aux(const char *main_prog_name,
+ const char *to_be_replaced,
+ const char *replacement,
+ bool expect_load,
+ const char *err_msg)
+{
+ struct summarization_freplace *freplace = NULL;
+ struct bpf_program *freplace_prog = NULL;
+ struct bpf_program *main_prog = NULL;
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct summarization *main = NULL;
+ char log[16*1024];
+ int err;
+
+ opts.kernel_log_buf = log;
+ opts.kernel_log_size = sizeof(log);
+ if (env.verbosity >= VERBOSE_SUPER)
+ opts.kernel_log_level = 1 | 2 | 4;
+ main = summarization__open_opts(&opts);
+ if (!ASSERT_OK_PTR(main, "summarization__open"))
+ goto out;
+ main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name);
+ if (!ASSERT_OK_PTR(main_prog, "main_prog"))
+ goto out;
+ bpf_program__set_autoload(main_prog, true);
+ err = summarization__load(main);
+ print_verifier_log(log);
+ if (!ASSERT_OK(err, "summarization__load"))
+ goto out;
+ freplace = summarization_freplace__open_opts(&opts);
+ if (!ASSERT_OK_PTR(freplace, "summarization_freplace__open"))
+ goto out;
+ freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement);
+ if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
+ goto out;
+ bpf_program__set_autoload(freplace_prog, true);
+ bpf_program__set_autoattach(freplace_prog, true);
+ bpf_program__set_attach_target(freplace_prog,
+ bpf_program__fd(main_prog),
+ to_be_replaced);
+ err = summarization_freplace__load(freplace);
+ print_verifier_log(log);
+
+ /* The might_sleep extension doesn't work yet as sleepable calls are not
+ * allowed, but preserve the check in case it's supported later and then
+ * this particular combination can be enabled.
+ */
+ if (!strcmp("might_sleep", replacement) && err) {
+ ASSERT_HAS_SUBSTR(log, "helper call might sleep in a non-sleepable prog", "error log");
+ ASSERT_EQ(err, -EINVAL, "err");
+ test__skip();
+ goto out;
+ }
+
+ if (expect_load) {
+ ASSERT_OK(err, "summarization_freplace__load");
+ } else {
+ ASSERT_ERR(err, "summarization_freplace__load");
+ ASSERT_HAS_SUBSTR(log, err_msg, "error log");
+ }
+
+out:
+ summarization_freplace__destroy(freplace);
+ summarization__destroy(main);
+}
+
+/* There are two global subprograms in both summarization.skel.h:
+ * - one changes packet data;
+ * - another does not.
+ * It is ok to freplace subprograms that change packet data with those
+ * that either do or do not. It is only ok to freplace subprograms
+ * that do not change packet data with those that do not as well.
+ * The below tests check outcomes for each combination of such freplace.
+ * Also test a case when main subprogram itself is replaced and is a single
+ * subprogram in a program.
+ *
+ * This holds for might_sleep programs. It is ok to replace might_sleep with
+ * might_sleep and with does_not_sleep, but does_not_sleep cannot be replaced
+ * with might_sleep.
+ */
+void test_summarization_freplace(void)
+{
+ struct {
+ const char *main;
+ const char *to_be_replaced;
+ bool has_side_effect;
+ } mains[2][4] = {
+ {
+ { "main_changes_with_subprogs", "changes_pkt_data", true },
+ { "main_changes_with_subprogs", "does_not_change_pkt_data", false },
+ { "main_changes", "main_changes", true },
+ { "main_does_not_change", "main_does_not_change", false },
+ },
+ {
+ { "main_might_sleep_with_subprogs", "might_sleep", true },
+ { "main_might_sleep_with_subprogs", "does_not_sleep", false },
+ { "main_might_sleep", "main_might_sleep", true },
+ { "main_does_not_sleep", "main_does_not_sleep", false },
+ },
+ };
+ const char *pkt_err = "Extension program changes packet data";
+ const char *slp_err = "Extension program may sleep";
+ struct {
+ const char *func;
+ bool has_side_effect;
+ const char *err_msg;
+ } replacements[2][2] = {
+ {
+ { "changes_pkt_data", true, pkt_err },
+ { "does_not_change_pkt_data", false, pkt_err },
+ },
+ {
+ { "might_sleep", true, slp_err },
+ { "does_not_sleep", false, slp_err },
+ },
+ };
+ char buf[64];
+
+ for (int t = 0; t < 2; t++) {
+ for (int i = 0; i < ARRAY_SIZE(mains); ++i) {
+ for (int j = 0; j < ARRAY_SIZE(replacements); ++j) {
+ snprintf(buf, sizeof(buf), "%s_with_%s",
+ mains[t][i].to_be_replaced, replacements[t][j].func);
+ if (!test__start_subtest(buf))
+ continue;
+ test_aux(mains[t][i].main, mains[t][i].to_be_replaced, replacements[t][j].func,
+ mains[t][i].has_side_effect || !replacements[t][j].has_side_effect,
+ replacements[t][j].err_msg);
+ }
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 544144620ca6..0ab36503c3b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1195,7 +1195,7 @@ static void test_tailcall_hierarchy_count(const char *which, bool test_fentry,
bool test_fexit,
bool test_fentry_entry)
{
- int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val;
+ int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val;
struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
struct bpf_program *prog, *fentry_prog;
@@ -1600,6 +1600,7 @@ static void test_tailcall_bpf2bpf_freplace(void)
goto out;
err = bpf_link__destroy(freplace_link);
+ freplace_link = NULL;
if (!ASSERT_OK(err, "destroy link"))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
new file mode 100644
index 000000000000..2de38776a2d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_H
+#define __TASK_LOCAL_DATA_H
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdatomic.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+#include <pthread.h>
+#endif
+
+#include <bpf/bpf.h>
+
+/*
+ * OPTIONS
+ *
+ * Define the option before including the header
+ *
+ * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically
+ *
+ * Thread-specific memory for storing TLD is allocated lazily on the first call to
+ * tld_get_data(). The thread that calls it must also call tld_free() on thread exit
+ * to prevent memory leak. Pthread will be included if the option is defined. A pthread
+ * key will be registered with a destructor that calls tld_free().
+ *
+ *
+ * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically
+ * (default: 64 bytes)
+ *
+ * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using
+ * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be
+ * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated
+ * for these TLDs. This additional memory is allocated for every thread that calls
+ * tld_get_data() even if no tld_create_key are actually called, so be mindful of
+ * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory
+ * will be allocated for TLDs created with it.
+ *
+ *
+ * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62)
+ *
+ * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store,
+ * TLD_MAX_DATA_CNT.
+ *
+ *
+ * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc()
+ *
+ * When allocating the memory for storing TLDs, we need to make sure there is a memory
+ * region of the X bytes within a page. This is due to the limit posed by UPTR: memory
+ * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The
+ * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses
+ * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage
+ * as not all memory allocator can use the exact amount of memory requested to fulfill
+ * aligned_alloc(). For example, some may round the size up to the alignment. Enable the
+ * option to always use aligned_alloc() if the implementation has low memory overhead.
+ */
+
+#define TLD_PAGE_SIZE getpagesize()
+#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1))
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#ifndef TLD_DYN_DATA_SIZE
+#define TLD_DYN_DATA_SIZE 64
+#endif
+
+#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ __s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+ char name[TLD_NAME_LEN];
+ _Atomic __u16 size;
+};
+
+struct tld_meta_u {
+ _Atomic __u8 cnt;
+ __u16 size;
+ struct tld_metadata metadata[];
+};
+
+struct tld_data_u {
+ __u64 start; /* offset of tld_data_u->data in a page */
+ char data[];
+};
+
+struct tld_map_value {
+ void *data;
+ struct tld_meta_u *meta;
+};
+
+struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak));
+__thread struct tld_data_u *tld_data_p __attribute__((weak));
+__thread void *tld_data_alloc_p __attribute__((weak));
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+pthread_key_t tld_pthread_key __attribute__((weak));
+
+static void tld_free(void);
+
+static void __tld_thread_exit_handler(void *unused)
+{
+ tld_free();
+}
+#endif
+
+static int __tld_init_meta_p(void)
+{
+ struct tld_meta_u *meta, *uninit = NULL;
+ int err = 0;
+
+ meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE);
+ if (!meta) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memset(meta, 0, TLD_PAGE_SIZE);
+ meta->size = TLD_DYN_DATA_SIZE;
+
+ if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) {
+ free(meta);
+ goto out;
+ }
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+ pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler);
+#endif
+out:
+ return err;
+}
+
+static int __tld_init_data_p(int map_fd)
+{
+ bool use_aligned_alloc = false;
+ struct tld_map_value map_val;
+ struct tld_data_u *data;
+ void *data_alloc = NULL;
+ int err, tid_fd = -1;
+
+ tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL);
+ if (tid_fd < 0) {
+ err = -errno;
+ goto out;
+ }
+
+#ifdef TLD_DATA_USE_ALIGNED_ALLOC
+ use_aligned_alloc = true;
+#endif
+
+ /*
+ * tld_meta_p->size = TLD_DYN_DATA_SIZE +
+ * total size of TLDs defined via TLD_DEFINE_KEY()
+ */
+ data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ?
+ aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) :
+ malloc(tld_meta_p->size * 2);
+ if (!data_alloc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Always pass a page-aligned address to UPTR since the size of tld_map_value::data
+ * is a page in BTF. If data_alloc spans across two pages, use the page that contains large
+ * enough memory.
+ */
+ if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) {
+ map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc);
+ data = data_alloc;
+ data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) +
+ offsetof(struct tld_data_u, data);
+ } else {
+ map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+ data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+ data->start = offsetof(struct tld_data_u, data);
+ }
+ map_val.meta = TLD_READ_ONCE(tld_meta_p);
+
+ err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0);
+ if (err) {
+ free(data_alloc);
+ goto out;
+ }
+
+ tld_data_p = data;
+ tld_data_alloc_p = data_alloc;
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+ pthread_setspecific(tld_pthread_key, (void *)1);
+#endif
+out:
+ if (tid_fd >= 0)
+ close(tid_fd);
+ return err;
+}
+
+static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
+{
+ int err, i, sz, off = 0;
+ __u8 cnt;
+
+ if (!TLD_READ_ONCE(tld_meta_p)) {
+ err = __tld_init_meta_p();
+ if (err)
+ return (tld_key_t){err};
+ }
+
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+retry:
+ cnt = atomic_load(&tld_meta_p->cnt);
+ if (i < cnt) {
+ /* A metadata is not ready until size is updated with a non-zero value */
+ while (!(sz = atomic_load(&tld_meta_p->metadata[i].size)))
+ sched_yield();
+
+ if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN))
+ return (tld_key_t){-EEXIST};
+
+ off += TLD_ROUND_UP(sz, 8);
+ continue;
+ }
+
+ /*
+ * TLD_DEFINE_KEY() is given memory upto a page while at most
+ * TLD_DYN_DATA_SIZE is allocated for tld_create_key()
+ */
+ if (dyn_data) {
+ if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size)
+ return (tld_key_t){-E2BIG};
+ } else {
+ if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u))
+ return (tld_key_t){-E2BIG};
+ tld_meta_p->size += TLD_ROUND_UP(size, 8);
+ }
+
+ /*
+ * Only one tld_create_key() can increase the current cnt by one and
+ * takes the latest available slot. Other threads will check again if a new
+ * TLD can still be added, and then compete for the new slot after the
+ * succeeding thread update the size.
+ */
+ if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1))
+ goto retry;
+
+ strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN);
+ atomic_store(&tld_meta_p->metadata[i].size, size);
+ return (tld_key_t){(__s16)off};
+ }
+
+ return (tld_key_t){-ENOSPC};
+}
+
+/**
+ * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD.
+ *
+ * @name: The name of the TLD
+ * @size: The size of the TLD
+ * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN
+ *
+ * The macro can only be used in file scope.
+ *
+ * A global variable key of opaque type, tld_key_t, will be declared and initialized before
+ * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key
+ * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD.
+ * bpf programs can also fetch the same key by name.
+ *
+ * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just
+ * enough memory will be allocated for each thread on the first call to tld_get_data().
+ */
+#define TLD_DEFINE_KEY(key, name, size) \
+tld_key_t key; \
+ \
+__attribute__((constructor)) \
+void __tld_define_key_##key(void) \
+{ \
+ key = __tld_create_key(name, size, false); \
+}
+
+/**
+ * tld_create_key() - Create a TLD and return a key associated with the TLD.
+ *
+ * @name: The name the TLD
+ * @size: The size of the TLD
+ *
+ * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check
+ * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to
+ * locate the TLD. bpf programs can also fetch the same key by name.
+ *
+ * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is
+ * not known statically or a TLD needs to be created conditionally)
+ *
+ * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs
+ * created dynamically with tld_create_key(). Since only a user page is pinned to the
+ * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE -
+ * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page.
+ */
+__attribute__((unused))
+static tld_key_t tld_create_key(const char *name, size_t size)
+{
+ return __tld_create_key(name, size, true);
+}
+
+__attribute__((unused))
+static inline bool tld_key_is_err(tld_key_t key)
+{
+ return key.off < 0;
+}
+
+__attribute__((unused))
+static inline int tld_key_err_or_zero(tld_key_t key)
+{
+ return tld_key_is_err(key) ? key.off : 0;
+}
+
+/**
+ * tld_get_data() - Get a pointer to the TLD associated with the given key of the
+ * calling thread.
+ *
+ * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map
+ * of task local data.
+ * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key().
+ *
+ * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD
+ * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte
+ * aligned.
+ *
+ * Threads that call tld_get_data() must call tld_free() on exit to prevent
+ * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined.
+ */
+__attribute__((unused))
+static void *tld_get_data(int map_fd, tld_key_t key)
+{
+ if (!TLD_READ_ONCE(tld_meta_p))
+ return NULL;
+
+ /* tld_data_p is allocated on the first invocation of tld_get_data() */
+ if (!tld_data_p && __tld_init_data_p(map_fd))
+ return NULL;
+
+ return tld_data_p->data + key.off;
+}
+
+/**
+ * tld_free() - Free task local data memory of the calling thread
+ *
+ * For the calling thread, all pointers to TLDs acquired before will become invalid.
+ *
+ * Users must call tld_free() on thread exit to prevent memory leak. Alternatively,
+ * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered
+ * to free the memory automatically.
+ */
+__attribute__((unused))
+static void tld_free(void)
+{
+ if (tld_data_alloc_p) {
+ free(tld_data_alloc_p);
+ tld_data_alloc_p = NULL;
+ tld_data_p = NULL;
+ }
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __TASK_LOCAL_DATA_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/task_work_stress.c b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
new file mode 100644
index 000000000000..450d17d91a56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work_stress.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <stdlib.h>
+#include <stdatomic.h>
+
+struct test_data {
+ int prog_fd;
+ atomic_int exit;
+};
+
+void *runner(void *test_data)
+{
+ struct test_data *td = test_data;
+ int err = 0;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ while (!err && !atomic_load(&td->exit))
+ err = bpf_prog_test_run_opts(td->prog_fd, &opts);
+
+ return NULL;
+}
+
+static int get_env_int(const char *str, int def)
+{
+ const char *s = getenv(str);
+ char *end;
+ int retval;
+
+ if (!s || !*s)
+ return def;
+ errno = 0;
+ retval = strtol(s, &end, 10);
+ if (errno || *end || retval < 0)
+ return def;
+ return retval;
+}
+
+static void task_work_run(bool enable_delete)
+{
+ struct task_work_stress *skel;
+ struct bpf_program *scheduler, *deleter;
+ int nthreads = 16;
+ int test_time_s = get_env_int("BPF_TASK_WORK_TEST_TIME", 1);
+ pthread_t tid[nthreads], tid_del;
+ bool started[nthreads], started_del = false;
+ struct test_data td_sched = { .exit = 0 }, td_del = { .exit = 1 };
+ int i, err;
+
+ skel = task_work_stress__open();
+ if (!ASSERT_OK_PTR(skel, "task_work__open"))
+ return;
+
+ scheduler = bpf_object__find_program_by_name(skel->obj, "schedule_task_work");
+ bpf_program__set_autoload(scheduler, true);
+
+ deleter = bpf_object__find_program_by_name(skel->obj, "delete_task_work");
+ bpf_program__set_autoload(deleter, true);
+
+ err = task_work_stress__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ for (i = 0; i < nthreads; ++i)
+ started[i] = false;
+
+ td_sched.prog_fd = bpf_program__fd(scheduler);
+ for (i = 0; i < nthreads; ++i) {
+ if (pthread_create(&tid[i], NULL, runner, &td_sched) != 0) {
+ fprintf(stderr, "could not start thread");
+ goto cancel;
+ }
+ started[i] = true;
+ }
+
+ if (enable_delete)
+ atomic_store(&td_del.exit, 0);
+
+ td_del.prog_fd = bpf_program__fd(deleter);
+ if (pthread_create(&tid_del, NULL, runner, &td_del) != 0) {
+ fprintf(stderr, "could not start thread");
+ goto cancel;
+ }
+ started_del = true;
+
+ /* Run stress test for some time */
+ sleep(test_time_s);
+
+cancel:
+ atomic_store(&td_sched.exit, 1);
+ atomic_store(&td_del.exit, 1);
+ for (i = 0; i < nthreads; ++i) {
+ if (started[i])
+ pthread_join(tid[i], NULL);
+ }
+
+ if (started_del)
+ pthread_join(tid_del, NULL);
+
+ ASSERT_GT(skel->bss->callback_scheduled, 0, "work scheduled");
+ /* Some scheduling attempts should have failed due to contention */
+ ASSERT_GT(skel->bss->schedule_error, 0, "schedule error");
+
+ if (enable_delete) {
+ /* If delete thread is enabled, it has cancelled some callbacks */
+ ASSERT_GT(skel->bss->delete_success, 0, "delete success");
+ ASSERT_LT(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+ } else {
+ /* Without delete thread number of scheduled callbacks is the same as fired */
+ ASSERT_EQ(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+ }
+
+cleanup:
+ task_work_stress__destroy(skel);
+}
+
+void test_task_work_stress(void)
+{
+ if (test__start_subtest("no_delete"))
+ task_work_run(false);
+ if (test__start_subtest("with_delete"))
+ task_work_run(true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
index 924d0e25320c..d52a62af77bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
@@ -8,34 +8,6 @@
# define loopback 1
#endif
-static inline __u32 id_from_prog_fd(int fd)
-{
- struct bpf_prog_info prog_info = {};
- __u32 prog_info_len = sizeof(prog_info);
- int err;
-
- err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
- if (!ASSERT_OK(err, "id_from_prog_fd"))
- return 0;
-
- ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
- return prog_info.id;
-}
-
-static inline __u32 id_from_link_fd(int fd)
-{
- struct bpf_link_info link_info = {};
- __u32 link_info_len = sizeof(link_info);
- int err;
-
- err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
- if (!ASSERT_OK(err, "id_from_link_fd"))
- return 0;
-
- ASSERT_NEQ(link_info.id, 0, "link_info.id");
- return link_info.id;
-}
-
static inline __u32 ifindex_from_link_fd(int fd)
{
struct bpf_link_info link_info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
index 1af9ec1149aa..2186a24e7d8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_links.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -13,7 +13,7 @@
#include "netlink_helpers.h"
#include "tc_helpers.h"
-void serial_test_tc_links_basic(void)
+void test_ns_tc_links_basic(void)
{
LIBBPF_OPTS(bpf_prog_query_opts, optq);
LIBBPF_OPTS(bpf_tcx_opts, optl);
@@ -260,7 +260,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_before(void)
+void test_ns_tc_links_before(void)
{
test_tc_links_before_target(BPF_TCX_INGRESS);
test_tc_links_before_target(BPF_TCX_EGRESS);
@@ -414,7 +414,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_after(void)
+void test_ns_tc_links_after(void)
{
test_tc_links_after_target(BPF_TCX_INGRESS);
test_tc_links_after_target(BPF_TCX_EGRESS);
@@ -514,7 +514,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_revision(void)
+void test_ns_tc_links_revision(void)
{
test_tc_links_revision_target(BPF_TCX_INGRESS);
test_tc_links_revision_target(BPF_TCX_EGRESS);
@@ -618,7 +618,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_chain_classic(void)
+void test_ns_tc_links_chain_classic(void)
{
test_tc_chain_classic(BPF_TCX_INGRESS, false);
test_tc_chain_classic(BPF_TCX_EGRESS, false);
@@ -846,7 +846,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_replace(void)
+void test_ns_tc_links_replace(void)
{
test_tc_links_replace_target(BPF_TCX_INGRESS);
test_tc_links_replace_target(BPF_TCX_EGRESS);
@@ -1158,7 +1158,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_invalid(void)
+void test_ns_tc_links_invalid(void)
{
test_tc_links_invalid_target(BPF_TCX_INGRESS);
test_tc_links_invalid_target(BPF_TCX_EGRESS);
@@ -1314,7 +1314,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_prepend(void)
+void test_ns_tc_links_prepend(void)
{
test_tc_links_prepend_target(BPF_TCX_INGRESS);
test_tc_links_prepend_target(BPF_TCX_EGRESS);
@@ -1470,7 +1470,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_append(void)
+void test_ns_tc_links_append(void)
{
test_tc_links_append_target(BPF_TCX_INGRESS);
test_tc_links_append_target(BPF_TCX_EGRESS);
@@ -1568,7 +1568,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_links_dev_cleanup(void)
+void test_ns_tc_links_dev_cleanup(void)
{
test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS);
test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS);
@@ -1672,7 +1672,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_links_chain_mixed(void)
+void test_ns_tc_links_chain_mixed(void)
{
test_tc_chain_mixed(BPF_TCX_INGRESS);
test_tc_chain_mixed(BPF_TCX_EGRESS);
@@ -1782,7 +1782,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_ingress(void)
+void test_ns_tc_links_ingress(void)
{
test_tc_links_ingress(BPF_TCX_INGRESS, true, true);
test_tc_links_ingress(BPF_TCX_INGRESS, true, false);
@@ -1823,7 +1823,7 @@ static int qdisc_replace(int ifindex, const char *kind, bool block)
return err;
}
-void serial_test_tc_links_dev_chain0(void)
+void test_ns_tc_links_dev_chain0(void)
{
int err, ifindex;
@@ -1955,7 +1955,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_links_dev_mixed(void)
+void test_ns_tc_links_dev_mixed(void)
{
test_tc_links_dev_mixed(BPF_TCX_INGRESS);
test_tc_links_dev_mixed(BPF_TCX_EGRESS);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index f77f604389aa..dd7a138d8c3d 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -10,7 +10,7 @@
#include "test_tc_link.skel.h"
#include "tc_helpers.h"
-void serial_test_tc_opts_basic(void)
+void test_ns_tc_opts_basic(void)
{
LIBBPF_OPTS(bpf_prog_attach_opts, opta);
LIBBPF_OPTS(bpf_prog_detach_opts, optd);
@@ -254,7 +254,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_before(void)
+void test_ns_tc_opts_before(void)
{
test_tc_opts_before_target(BPF_TCX_INGRESS);
test_tc_opts_before_target(BPF_TCX_EGRESS);
@@ -445,7 +445,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_after(void)
+void test_ns_tc_opts_after(void)
{
test_tc_opts_after_target(BPF_TCX_INGRESS);
test_tc_opts_after_target(BPF_TCX_EGRESS);
@@ -554,7 +554,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_revision(void)
+void test_ns_tc_opts_revision(void)
{
test_tc_opts_revision_target(BPF_TCX_INGRESS);
test_tc_opts_revision_target(BPF_TCX_EGRESS);
@@ -655,7 +655,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_chain_classic(void)
+void test_ns_tc_opts_chain_classic(void)
{
test_tc_chain_classic(BPF_TCX_INGRESS, false);
test_tc_chain_classic(BPF_TCX_EGRESS, false);
@@ -864,7 +864,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_replace(void)
+void test_ns_tc_opts_replace(void)
{
test_tc_opts_replace_target(BPF_TCX_INGRESS);
test_tc_opts_replace_target(BPF_TCX_EGRESS);
@@ -1017,7 +1017,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_invalid(void)
+void test_ns_tc_opts_invalid(void)
{
test_tc_opts_invalid_target(BPF_TCX_INGRESS);
test_tc_opts_invalid_target(BPF_TCX_EGRESS);
@@ -1157,7 +1157,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_prepend(void)
+void test_ns_tc_opts_prepend(void)
{
test_tc_opts_prepend_target(BPF_TCX_INGRESS);
test_tc_opts_prepend_target(BPF_TCX_EGRESS);
@@ -1297,7 +1297,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_append(void)
+void test_ns_tc_opts_append(void)
{
test_tc_opts_append_target(BPF_TCX_INGRESS);
test_tc_opts_append_target(BPF_TCX_EGRESS);
@@ -1387,7 +1387,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_opts_dev_cleanup(void)
+void test_ns_tc_opts_dev_cleanup(void)
{
test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS);
test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS);
@@ -1563,7 +1563,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_mixed(void)
+void test_ns_tc_opts_mixed(void)
{
test_tc_opts_mixed_target(BPF_TCX_INGRESS);
test_tc_opts_mixed_target(BPF_TCX_EGRESS);
@@ -1642,7 +1642,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_demixed(void)
+void test_ns_tc_opts_demixed(void)
{
test_tc_opts_demixed_target(BPF_TCX_INGRESS);
test_tc_opts_demixed_target(BPF_TCX_EGRESS);
@@ -1813,7 +1813,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_detach(void)
+void test_ns_tc_opts_detach(void)
{
test_tc_opts_detach_target(BPF_TCX_INGRESS);
test_tc_opts_detach_target(BPF_TCX_EGRESS);
@@ -2020,7 +2020,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_detach_before(void)
+void test_ns_tc_opts_detach_before(void)
{
test_tc_opts_detach_before_target(BPF_TCX_INGRESS);
test_tc_opts_detach_before_target(BPF_TCX_EGRESS);
@@ -2236,7 +2236,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_detach_after(void)
+void test_ns_tc_opts_detach_after(void)
{
test_tc_opts_detach_after_target(BPF_TCX_INGRESS);
test_tc_opts_detach_after_target(BPF_TCX_EGRESS);
@@ -2265,7 +2265,7 @@ static void test_tc_opts_delete_empty(int target, bool chain_tc_old)
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_delete_empty(void)
+void test_ns_tc_opts_delete_empty(void)
{
test_tc_opts_delete_empty(BPF_TCX_INGRESS, false);
test_tc_opts_delete_empty(BPF_TCX_EGRESS, false);
@@ -2372,7 +2372,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_chain_mixed(void)
+void test_ns_tc_opts_chain_mixed(void)
{
test_tc_chain_mixed(BPF_TCX_INGRESS);
test_tc_chain_mixed(BPF_TCX_EGRESS);
@@ -2446,7 +2446,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_opts_max(void)
+void test_ns_tc_opts_max(void)
{
test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false);
test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false);
@@ -2748,7 +2748,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_query(void)
+void test_ns_tc_opts_query(void)
{
test_tc_opts_query_target(BPF_TCX_INGRESS);
test_tc_opts_query_target(BPF_TCX_EGRESS);
@@ -2807,7 +2807,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_query_attach(void)
+void test_ns_tc_opts_query_attach(void)
{
test_tc_opts_query_attach_target(BPF_TCX_INGRESS);
test_tc_opts_query_attach_target(BPF_TCX_EGRESS);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index c85798966aec..76d72a59365e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -56,6 +56,8 @@
#define MAC_DST_FWD "00:11:22:33:44:55"
#define MAC_DST "00:22:33:44:55:66"
+#define MAC_SRC_FWD "00:33:44:55:66:77"
+#define MAC_SRC "00:44:55:66:77:88"
#define IFADDR_STR_LEN 18
#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
@@ -207,11 +209,10 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
int err;
if (result->dev_mode == MODE_VETH) {
- SYS(fail, "ip link add src type veth peer name src_fwd");
- SYS(fail, "ip link add dst type veth peer name dst_fwd");
-
- SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
- SYS(fail, "ip link set dst address " MAC_DST);
+ SYS(fail, "ip link add src address " MAC_SRC " type veth "
+ "peer name src_fwd address " MAC_SRC_FWD);
+ SYS(fail, "ip link add dst address " MAC_DST " type veth "
+ "peer name dst_fwd address " MAC_DST_FWD);
} else if (result->dev_mode == MODE_NETKIT) {
err = create_netkit(NETKIT_L3, "src", "src_fwd");
if (!ASSERT_OK(err, "create_ifindex_src"))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
new file mode 100644
index 000000000000..de22734abc4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/genetlink.h>
+#include "network_helpers.h"
+#include "bpf_smc.skel.h"
+
+#ifndef IPPROTO_SMC
+#define IPPROTO_SMC 256
+#endif
+
+#define CLIENT_IP "127.0.0.1"
+#define SERVER_IP "127.0.1.0"
+#define SERVER_IP_VIA_RISK_PATH "127.0.2.0"
+
+#define SERVICE_1 80
+#define SERVICE_2 443
+#define SERVICE_3 8443
+
+#define TEST_NS "bpf_smc_netns"
+
+static struct netns_obj *test_netns;
+
+struct smc_policy_ip_key {
+ __u32 sip;
+ __u32 dip;
+};
+
+struct smc_policy_ip_value {
+ __u8 mode;
+};
+
+#if defined(__s390x__)
+/* s390x has default seid */
+static bool setup_ueid(void) { return true; }
+static void cleanup_ueid(void) {}
+#else
+enum {
+ SMC_NETLINK_ADD_UEID = 10,
+ SMC_NETLINK_REMOVE_UEID
+};
+
+enum {
+ SMC_NLA_EID_TABLE_UNSPEC,
+ SMC_NLA_EID_TABLE_ENTRY, /* string */
+};
+
+struct msgtemplate {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[1024];
+};
+
+#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN)
+
+#define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK"
+#define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID"
+
+static uint16_t smc_nl_family_id = -1;
+
+static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid,
+ __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type,
+ void *nla_data, int nla_len)
+{
+ struct nlattr *na;
+ struct sockaddr_nl nladdr;
+ int r, buflen;
+ char *buf;
+
+ struct msgtemplate msg = {0};
+
+ msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ msg.n.nlmsg_type = nlmsg_type;
+ msg.n.nlmsg_flags = nlmsg_flags;
+ msg.n.nlmsg_seq = 0;
+ msg.n.nlmsg_pid = nlmsg_pid;
+ msg.g.cmd = genl_cmd;
+ msg.g.version = 1;
+ na = (struct nlattr *)GENLMSG_DATA(&msg);
+ na->nla_type = nla_type;
+ na->nla_len = nla_len + 1 + NLA_HDRLEN;
+ memcpy(NLA_DATA(na), nla_data, nla_len);
+ msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+ buf = (char *)&msg;
+ buflen = msg.n.nlmsg_len;
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr,
+ sizeof(nladdr))) < buflen) {
+ if (r > 0) {
+ buf += r;
+ buflen -= r;
+ } else if (errno != EAGAIN) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static bool get_smc_nl_family_id(void)
+{
+ struct sockaddr_nl nl_src;
+ struct msgtemplate msg;
+ struct nlattr *nl;
+ int fd, ret;
+ pid_t pid;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (!ASSERT_OK_FD(fd, "nl_family socket"))
+ return false;
+
+ pid = getpid();
+
+ memset(&nl_src, 0, sizeof(nl_src));
+ nl_src.nl_family = AF_NETLINK;
+ nl_src.nl_pid = pid;
+
+ ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+ if (!ASSERT_OK(ret, "nl_family bind"))
+ goto fail;
+
+ ret = send_cmd(fd, GENL_ID_CTRL, pid,
+ NLM_F_REQUEST, CTRL_CMD_GETFAMILY,
+ CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME,
+ strlen(SMC_GENL_FAMILY_NAME));
+ if (!ASSERT_OK(ret, "nl_family query"))
+ goto fail;
+
+ ret = recv(fd, &msg, sizeof(msg), 0);
+ if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 ||
+ !NLMSG_OK(&msg.n, ret), "nl_family response"))
+ goto fail;
+
+ nl = (struct nlattr *)GENLMSG_DATA(&msg);
+ nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len));
+ if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type"))
+ goto fail;
+
+ smc_nl_family_id = *(uint16_t *)NLA_DATA(nl);
+ close(fd);
+ return true;
+fail:
+ close(fd);
+ return false;
+}
+
+static bool smc_ueid(int op)
+{
+ struct sockaddr_nl nl_src;
+ struct msgtemplate msg;
+ struct nlmsgerr *err;
+ char test_ueid[32];
+ int fd, ret;
+ pid_t pid;
+
+ /* UEID required */
+ memset(test_ueid, '\x20', sizeof(test_ueid));
+ memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID));
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (!ASSERT_OK_FD(fd, "ueid socket"))
+ return false;
+
+ pid = getpid();
+ memset(&nl_src, 0, sizeof(nl_src));
+ nl_src.nl_family = AF_NETLINK;
+ nl_src.nl_pid = pid;
+
+ ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+ if (!ASSERT_OK(ret, "ueid bind"))
+ goto fail;
+
+ ret = send_cmd(fd, smc_nl_family_id, pid,
+ NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY,
+ (void *)test_ueid, sizeof(test_ueid));
+ if (!ASSERT_OK(ret, "ueid cmd"))
+ goto fail;
+
+ ret = recv(fd, &msg, sizeof(msg), 0);
+ if (!ASSERT_FALSE(ret < 0 ||
+ !NLMSG_OK(&msg.n, ret), "ueid response"))
+ goto fail;
+
+ if (msg.n.nlmsg_type == NLMSG_ERROR) {
+ err = NLMSG_DATA(&msg);
+ switch (op) {
+ case SMC_NETLINK_REMOVE_UEID:
+ if (!ASSERT_FALSE((err->error && err->error != -ENOENT),
+ "ueid remove"))
+ goto fail;
+ break;
+ case SMC_NETLINK_ADD_UEID:
+ if (!ASSERT_OK(err->error, "ueid add"))
+ goto fail;
+ break;
+ default:
+ break;
+ }
+ }
+ close(fd);
+ return true;
+fail:
+ close(fd);
+ return false;
+}
+
+static bool setup_ueid(void)
+{
+ /* get smc nl id */
+ if (!get_smc_nl_family_id())
+ return false;
+ /* clear old ueid for bpftest */
+ smc_ueid(SMC_NETLINK_REMOVE_UEID);
+ /* smc-loopback required ueid */
+ return smc_ueid(SMC_NETLINK_ADD_UEID);
+}
+
+static void cleanup_ueid(void)
+{
+ smc_ueid(SMC_NETLINK_REMOVE_UEID);
+}
+#endif /* __s390x__ */
+
+static bool setup_netns(void)
+{
+ test_netns = netns_new(TEST_NS, true);
+ if (!ASSERT_OK_PTR(test_netns, "open net namespace"))
+ goto fail_netns;
+
+ SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo");
+ SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo");
+
+ return true;
+fail_ip:
+ netns_free(test_netns);
+fail_netns:
+ return false;
+}
+
+static void cleanup_netns(void)
+{
+ netns_free(test_netns);
+}
+
+static bool setup_smc(void)
+{
+ if (!setup_ueid())
+ return false;
+
+ if (!setup_netns())
+ goto fail_netns;
+
+ return true;
+fail_netns:
+ cleanup_ueid();
+ return false;
+}
+
+static int set_client_addr_cb(int fd, void *opts)
+{
+ const char *src = (const char *)opts;
+ struct sockaddr_in localaddr;
+
+ localaddr.sin_family = AF_INET;
+ localaddr.sin_port = htons(0);
+ localaddr.sin_addr.s_addr = inet_addr(src);
+ return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind");
+}
+
+static void run_link(const char *src, const char *dst, int port)
+{
+ struct network_helper_opts opts = {0};
+ int server, client;
+
+ server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL);
+ if (!ASSERT_OK_FD(server, "start service_1"))
+ return;
+
+ opts.proto = IPPROTO_TCP;
+ opts.post_socket_cb = set_client_addr_cb;
+ opts.cb_opts = (void *)src;
+
+ client = connect_to_fd_opts(server, &opts);
+ if (!ASSERT_OK_FD(client, "start connect"))
+ goto fail_client;
+
+ close(client);
+fail_client:
+ close(server);
+}
+
+static void block_link(int map_fd, const char *src, const char *dst)
+{
+ struct smc_policy_ip_value val = { .mode = /* block */ 0 };
+ struct smc_policy_ip_key key = {
+ .sip = inet_addr(src),
+ .dip = inet_addr(dst),
+ };
+
+ bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+}
+
+/*
+ * This test describes a real-life service topology as follows:
+ *
+ * +-------------> service_1
+ * link 1 | |
+ * +--------------------> server | link 2
+ * | | V
+ * | +-------------> service_2
+ * | link 3
+ * client -------------------> server_via_unsafe_path -> service_3
+ *
+ * Among them,
+ * 1. link-1 is very suitable for using SMC.
+ * 2. link-2 is not suitable for using SMC, because the mode of this link is
+ * kind of short-link services.
+ * 3. link-3 is also not suitable for using SMC, because the RDMA link is
+ * unavailable and needs to go through a long timeout before it can fallback
+ * to TCP.
+ * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl.
+ */
+static void test_topo(void)
+{
+ struct bpf_smc *skel;
+ int rc, map_fd;
+
+ skel = bpf_smc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load"))
+ return;
+
+ rc = bpf_smc__attach(skel);
+ if (!ASSERT_OK(rc, "bpf_smc__attach"))
+ goto fail;
+
+ map_fd = bpf_map__fd(skel->maps.smc_policy_ip);
+ if (!ASSERT_OK_FD(map_fd, "bpf_map__fd"))
+ goto fail;
+
+ /* Mock the process of transparent replacement, since we will modify
+ * protocol to ipproto_smc accropding to it via
+ * fmod_ret/update_socket_protocol.
+ */
+ write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck");
+
+ /* Configure ip strat */
+ block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH);
+ block_link(map_fd, SERVER_IP, SERVER_IP);
+
+ /* should go with smc */
+ run_link(CLIENT_IP, SERVER_IP, SERVICE_1);
+ /* should go with smc fallback */
+ run_link(SERVER_IP, SERVER_IP, SERVICE_2);
+
+ ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count");
+ ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+ /* should go with smc */
+ run_link(CLIENT_IP, SERVER_IP, SERVICE_2);
+
+ ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count");
+ ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+ /* should go with smc fallback */
+ run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3);
+
+ ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count");
+ ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count");
+
+fail:
+ bpf_smc__destroy(skel);
+}
+
+void test_bpf_smc(void)
+{
+ if (!setup_smc()) {
+ printf("setup for smc test failed, test SKIP:\n");
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("topo"))
+ test_topo();
+
+ cleanup_ueid();
+ cleanup_netns();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
new file mode 100644
index 000000000000..7d1b478c99a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms Inc. */
+#include <test_progs.h>
+#include "test_btf_ext.skel.h"
+#include "btf_helpers.h"
+
+static void subtest_line_func_info(void)
+{
+ struct test_btf_ext *skel;
+ struct bpf_prog_info info;
+ struct bpf_line_info line_info[128], *libbpf_line_info;
+ struct bpf_func_info func_info[128], *libbpf_func_info;
+ __u32 info_len = sizeof(info), libbbpf_line_info_cnt, libbbpf_func_info_cnt;
+ int err, fd;
+
+ skel = test_btf_ext__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.line_info = ptr_to_u64(&line_info);
+ info.nr_line_info = sizeof(line_info);
+ info.line_info_rec_size = sizeof(*line_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_line_info"))
+ goto out;
+
+ libbpf_line_info = bpf_program__line_info(skel->progs.global_func);
+ libbbpf_line_info_cnt = bpf_program__line_info_cnt(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.func_info = ptr_to_u64(&func_info);
+ info.nr_func_info = sizeof(func_info);
+ info.func_info_rec_size = sizeof(*func_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_func_info"))
+ goto out;
+
+ libbpf_func_info = bpf_program__func_info(skel->progs.global_func);
+ libbbpf_func_info_cnt = bpf_program__func_info_cnt(skel->progs.global_func);
+
+ if (!ASSERT_OK_PTR(libbpf_line_info, "bpf_program__line_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_line_info_cnt, info.nr_line_info, "line_info_cnt"))
+ goto out;
+ if (!ASSERT_OK_PTR(libbpf_func_info, "bpf_program__func_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_func_info_cnt, info.nr_func_info, "func_info_cnt"))
+ goto out;
+ ASSERT_MEMEQ(libbpf_line_info, line_info, libbbpf_line_info_cnt * sizeof(*line_info),
+ "line_info");
+ ASSERT_MEMEQ(libbpf_func_info, func_info, libbbpf_func_info_cnt * sizeof(*func_info),
+ "func_info");
+out:
+ test_btf_ext__destroy(skel);
+}
+
+void test_btf_ext(void)
+{
+ if (test__start_subtest("line_func_info"))
+ subtest_line_func_info();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index 2a27f3714f5c..bdc4fc06bc5a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -139,7 +139,7 @@ static void test_lsm_tailcall(void)
if (CHECK_FAIL(!err))
goto close_prog;
- prog_fd = bpf_program__fd(skel->progs.lsm_file_alloc_security_prog);
+ prog_fd = bpf_program__fd(skel->progs.lsm_kernfs_init_security_prog);
if (CHECK_FAIL(prog_fd < 0))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
new file mode 100644
index 000000000000..fd8762ba4b67
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_id_ops_mapping1.skel.h"
+#include "struct_ops_id_ops_mapping2.skel.h"
+
+static void test_st_ops_id_ops_mapping(void)
+{
+ struct struct_ops_id_ops_mapping1 *skel1 = NULL;
+ struct struct_ops_id_ops_mapping2 *skel2 = NULL;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int err, pid, prog1_fd, prog2_fd;
+
+ skel1 = struct_ops_id_ops_mapping1__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "struct_ops_id_ops_mapping1__open"))
+ goto out;
+
+ skel2 = struct_ops_id_ops_mapping2__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "struct_ops_id_ops_mapping2__open"))
+ goto out;
+
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel1->maps.st_ops_map),
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto out;
+
+ skel1->bss->st_ops_id = info.id;
+
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel2->maps.st_ops_map),
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto out;
+
+ skel2->bss->st_ops_id = info.id;
+
+ err = struct_ops_id_ops_mapping1__attach(skel1);
+ if (!ASSERT_OK(err, "struct_ops_id_ops_mapping1__attach"))
+ goto out;
+
+ err = struct_ops_id_ops_mapping2__attach(skel2);
+ if (!ASSERT_OK(err, "struct_ops_id_ops_mapping2__attach"))
+ goto out;
+
+ /* run tracing prog that calls .test_1 and checks return */
+ pid = getpid();
+ skel1->bss->test_pid = pid;
+ skel2->bss->test_pid = pid;
+ sys_gettid();
+ skel1->bss->test_pid = 0;
+ skel2->bss->test_pid = 0;
+
+ /* run syscall_prog that calls .test_1 and checks return */
+ prog1_fd = bpf_program__fd(skel1->progs.syscall_prog);
+ err = bpf_prog_test_run_opts(prog1_fd, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ prog2_fd = bpf_program__fd(skel2->progs.syscall_prog);
+ err = bpf_prog_test_run_opts(prog2_fd, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ ASSERT_EQ(skel1->bss->test_err, 0, "skel1->bss->test_err");
+ ASSERT_EQ(skel2->bss->test_err, 0, "skel2->bss->test_err");
+
+out:
+ struct_ops_id_ops_mapping1__destroy(skel1);
+ struct_ops_id_ops_mapping2__destroy(skel2);
+}
+
+void test_struct_ops_id_ops_mapping(void)
+{
+ if (test__start_subtest("st_ops_id_ops_mapping"))
+ test_st_ops_id_ops_mapping();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
new file mode 100644
index 000000000000..467cc72a3588
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
@@ -0,0 +1,16 @@
+#include <test_progs.h>
+
+#include "struct_ops_kptr_return.skel.h"
+#include "struct_ops_kptr_return_fail__wrong_type.skel.h"
+#include "struct_ops_kptr_return_fail__invalid_scalar.skel.h"
+#include "struct_ops_kptr_return_fail__nonzero_offset.skel.h"
+#include "struct_ops_kptr_return_fail__local_kptr.skel.h"
+
+void test_struct_ops_kptr_return(void)
+{
+ RUN_TESTS(struct_ops_kptr_return);
+ RUN_TESTS(struct_ops_kptr_return_fail__wrong_type);
+ RUN_TESTS(struct_ops_kptr_return_fail__invalid_scalar);
+ RUN_TESTS(struct_ops_kptr_return_fail__nonzero_offset);
+ RUN_TESTS(struct_ops_kptr_return_fail__local_kptr);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
new file mode 100644
index 000000000000..da60c715fc59
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
@@ -0,0 +1,14 @@
+#include <test_progs.h>
+
+#include "struct_ops_refcounted.skel.h"
+#include "struct_ops_refcounted_fail__ref_leak.skel.h"
+#include "struct_ops_refcounted_fail__global_subprog.skel.h"
+#include "struct_ops_refcounted_fail__tail_call.skel.h"
+
+void test_struct_ops_refcounted(void)
+{
+ RUN_TESTS(struct_ops_refcounted);
+ RUN_TESTS(struct_ops_refcounted_fail__ref_leak);
+ RUN_TESTS(struct_ops_refcounted_fail__global_subprog);
+ RUN_TESTS(struct_ops_refcounted_fail__tail_call);
+}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
index bcdbd27f22f0..273dd41ca09e 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
@@ -1,22 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <linux/filter.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include <bpf/bpf_endian.h>
-#include "bpf_util.h"
+#include "test_progs.h"
#include "cgroup_helpers.h"
-#include "testing_helpers.h"
#define CG_PATH "/foo"
#define MAX_INSNS 512
@@ -1608,26 +1594,19 @@ static int run_tests(int cgfd)
return fails ? -1 : 0;
}
-int main(int argc, char **argv)
+void test_sysctl(void)
{
- int cgfd = -1;
- int err = 0;
+ int cgfd;
cgfd = cgroup_setup_and_join(CG_PATH);
- if (cgfd < 0)
- goto err;
+ if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup"))
+ goto out;
- /* Use libbpf 1.0 API mode */
- libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ if (!ASSERT_OK(run_tests(cgfd), "run_tests"))
+ goto out;
- if (run_tests(cgfd))
- goto err;
-
- goto out;
-err:
- err = -1;
out:
close(cgfd);
cleanup_cgroup_environment();
- return err;
+ return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
new file mode 100644
index 000000000000..9fd6306b455c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <bpf/btf.h>
+#include <test_progs.h>
+
+#define TLD_FREE_DATA_ON_THREAD_EXIT
+#define TLD_DYN_DATA_SIZE 4096
+#include "task_local_data.h"
+
+struct test_tld_struct {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+ __u64 d;
+};
+
+#include "test_task_local_data.skel.h"
+
+TLD_DEFINE_KEY(value0_key, "value0", sizeof(int));
+
+/*
+ * Reset task local data between subtests by clearing metadata other
+ * than the statically defined value0. This is safe as subtests run
+ * sequentially. Users of task local data library should not touch
+ * library internal.
+ */
+static void reset_tld(void)
+{
+ if (TLD_READ_ONCE(tld_meta_p)) {
+ /* Remove TLDs created by tld_create_key() */
+ tld_meta_p->cnt = 1;
+ tld_meta_p->size = TLD_DYN_DATA_SIZE;
+ memset(&tld_meta_p->metadata[1], 0,
+ (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata));
+ }
+}
+
+/* Serialize access to bpf program's global variables */
+static pthread_mutex_t global_mutex;
+
+static tld_key_t *tld_keys;
+
+#define TEST_BASIC_THREAD_NUM 32
+
+void *test_task_local_data_basic_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct test_task_local_data *skel = (struct test_task_local_data *)arg;
+ int fd, err, tid, *value0, *value1;
+ struct test_tld_struct *value2;
+
+ fd = bpf_map__fd(skel->maps.tld_data_map);
+
+ value0 = tld_get_data(fd, value0_key);
+ if (!ASSERT_OK_PTR(value0, "tld_get_data"))
+ goto out;
+
+ value1 = tld_get_data(fd, tld_keys[1]);
+ if (!ASSERT_OK_PTR(value1, "tld_get_data"))
+ goto out;
+
+ value2 = tld_get_data(fd, tld_keys[2]);
+ if (!ASSERT_OK_PTR(value2, "tld_get_data"))
+ goto out;
+
+ tid = sys_gettid();
+
+ *value0 = tid + 0;
+ *value1 = tid + 1;
+ value2->a = tid + 2;
+ value2->b = tid + 3;
+ value2->c = tid + 4;
+ value2->d = tid + 5;
+
+ pthread_mutex_lock(&global_mutex);
+ /* Run task_main that read task local data and save to global variables */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+
+ ASSERT_EQ(skel->bss->test_value0, tid + 0, "tld_get_data value0");
+ ASSERT_EQ(skel->bss->test_value1, tid + 1, "tld_get_data value1");
+ ASSERT_EQ(skel->bss->test_value2.a, tid + 2, "tld_get_data value2.a");
+ ASSERT_EQ(skel->bss->test_value2.b, tid + 3, "tld_get_data value2.b");
+ ASSERT_EQ(skel->bss->test_value2.c, tid + 4, "tld_get_data value2.c");
+ ASSERT_EQ(skel->bss->test_value2.d, tid + 5, "tld_get_data value2.d");
+ pthread_mutex_unlock(&global_mutex);
+
+ /* Make sure valueX are indeed local to threads */
+ ASSERT_EQ(*value0, tid + 0, "value0");
+ ASSERT_EQ(*value1, tid + 1, "value1");
+ ASSERT_EQ(value2->a, tid + 2, "value2.a");
+ ASSERT_EQ(value2->b, tid + 3, "value2.b");
+ ASSERT_EQ(value2->c, tid + 4, "value2.c");
+ ASSERT_EQ(value2->d, tid + 5, "value2.d");
+
+ *value0 = tid + 5;
+ *value1 = tid + 4;
+ value2->a = tid + 3;
+ value2->b = tid + 2;
+ value2->c = tid + 1;
+ value2->d = tid + 0;
+
+ /* Run task_main again */
+ pthread_mutex_lock(&global_mutex);
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+
+ ASSERT_EQ(skel->bss->test_value0, tid + 5, "tld_get_data value0");
+ ASSERT_EQ(skel->bss->test_value1, tid + 4, "tld_get_data value1");
+ ASSERT_EQ(skel->bss->test_value2.a, tid + 3, "tld_get_data value2.a");
+ ASSERT_EQ(skel->bss->test_value2.b, tid + 2, "tld_get_data value2.b");
+ ASSERT_EQ(skel->bss->test_value2.c, tid + 1, "tld_get_data value2.c");
+ ASSERT_EQ(skel->bss->test_value2.d, tid + 0, "tld_get_data value2.d");
+ pthread_mutex_unlock(&global_mutex);
+
+out:
+ pthread_exit(NULL);
+}
+
+static void test_task_local_data_basic(void)
+{
+ struct test_task_local_data *skel;
+ pthread_t thread[TEST_BASIC_THREAD_NUM];
+ char dummy_key_name[TLD_NAME_LEN];
+ tld_key_t key;
+ int i, err;
+
+ reset_tld();
+
+ ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init");
+
+ skel = test_task_local_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+ if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+ goto out;
+
+ ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+ tld_keys[1] = tld_create_key("value1", sizeof(int));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[1]), "tld_create_key");
+ tld_keys[2] = tld_create_key("value2", sizeof(struct test_tld_struct));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[2]), "tld_create_key");
+
+ /*
+ * Shouldn't be able to store data exceed a page. Create a TLD just big
+ * enough to exceed a page. TLDs already created are int value0, int
+ * value1, and struct test_tld_struct value2.
+ */
+ key = tld_create_key("value_not_exist",
+ TLD_PAGE_SIZE - 2 * sizeof(int) - sizeof(struct test_tld_struct) + 1);
+ ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key");
+
+ key = tld_create_key("value2", sizeof(struct test_tld_struct));
+ ASSERT_EQ(tld_key_err_or_zero(key), -EEXIST, "tld_create_key");
+
+ /* Shouldn't be able to create the (TLD_MAX_DATA_CNT+1)-th TLD */
+ for (i = 3; i < TLD_MAX_DATA_CNT; i++) {
+ snprintf(dummy_key_name, TLD_NAME_LEN, "dummy_value%d", i);
+ tld_keys[i] = tld_create_key(dummy_key_name, sizeof(int));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key");
+ }
+ key = tld_create_key("value_not_exist", sizeof(struct test_tld_struct));
+ ASSERT_EQ(tld_key_err_or_zero(key), -ENOSPC, "tld_create_key");
+
+ /* Access TLDs from multiple threads and check if they are thread-specific */
+ for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) {
+ err = pthread_create(&thread[i], NULL, test_task_local_data_basic_thread, skel);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto out;
+ }
+
+out:
+ for (i = 0; i < TEST_BASIC_THREAD_NUM; i++)
+ pthread_join(thread[i], NULL);
+
+ if (tld_keys) {
+ free(tld_keys);
+ tld_keys = NULL;
+ }
+ tld_free();
+ test_task_local_data__destroy(skel);
+}
+
+#define TEST_RACE_THREAD_NUM (TLD_MAX_DATA_CNT - 3)
+
+void *test_task_local_data_race_thread(void *arg)
+{
+ int err = 0, id = (intptr_t)arg;
+ char key_name[32];
+ tld_key_t key;
+
+ key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1);
+ if (tld_key_err_or_zero(key) != -E2BIG) {
+ err = 1;
+ goto out;
+ }
+
+ /* Only one thread will succeed in creating value1 */
+ key = tld_create_key("value1", sizeof(int));
+ if (!tld_key_is_err(key))
+ tld_keys[1] = key;
+
+ /* Only one thread will succeed in creating value2 */
+ key = tld_create_key("value2", sizeof(struct test_tld_struct));
+ if (!tld_key_is_err(key))
+ tld_keys[2] = key;
+
+ snprintf(key_name, 32, "thread_%d", id);
+ tld_keys[id] = tld_create_key(key_name, sizeof(int));
+ if (tld_key_is_err(tld_keys[id]))
+ err = 2;
+out:
+ return (void *)(intptr_t)err;
+}
+
+static void test_task_local_data_race(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ pthread_t thread[TEST_RACE_THREAD_NUM];
+ struct test_task_local_data *skel;
+ int fd, i, j, err, *data;
+ void *ret = NULL;
+
+ skel = test_task_local_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+ if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+ goto out;
+
+ fd = bpf_map__fd(skel->maps.tld_data_map);
+
+ ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+ tld_keys[0] = value0_key;
+
+ for (j = 0; j < 100; j++) {
+ reset_tld();
+
+ for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+ /*
+ * Try to make tld_create_key() race with each other. Call
+ * tld_create_key(), both valid and invalid, from different threads.
+ */
+ err = pthread_create(&thread[i], NULL, test_task_local_data_race_thread,
+ (void *)(intptr_t)(i + 3));
+ if (CHECK_FAIL(err))
+ break;
+ }
+
+ /* Wait for all tld_create_key() to return */
+ for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+ pthread_join(thread[i], &ret);
+ if (CHECK_FAIL(ret))
+ break;
+ }
+
+ /* Write a unique number to each TLD */
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+ data = tld_get_data(fd, tld_keys[i]);
+ if (CHECK_FAIL(!data))
+ break;
+ *data = i;
+ }
+
+ /* Read TLDs and check the value to see if any address collides with another */
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+ data = tld_get_data(fd, tld_keys[i]);
+ if (CHECK_FAIL(*data != i))
+ break;
+ }
+
+ /* Run task_main to make sure no invalid TLDs are added */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+ }
+out:
+ if (tld_keys) {
+ free(tld_keys);
+ tld_keys = NULL;
+ }
+ tld_free();
+ test_task_local_data__destroy(skel);
+}
+
+void test_task_local_data(void)
+{
+ if (test__start_subtest("task_local_data_basic"))
+ test_task_local_data_basic();
+ if (test__start_subtest("task_local_data_race"))
+ test_task_local_data_race();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_work.c b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
new file mode 100644
index 000000000000..774b31a5f6ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work.skel.h"
+#include "task_work_fail.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+
+static int perf_event_open(__u32 type, __u64 config, int pid)
+{
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .size = sizeof(struct perf_event_attr),
+ .sample_period = 100000,
+ };
+
+ return syscall(__NR_perf_event_open, &attr, pid, -1, -1, 0);
+}
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+static int verify_map(struct bpf_map *map, const char *expected_data)
+{
+ int err;
+ struct elem value;
+ int processed_values = 0;
+ int k, sz;
+
+ sz = bpf_map__max_entries(map);
+ for (k = 0; k < sz; ++k) {
+ err = bpf_map__lookup_elem(map, &k, sizeof(int), &value, sizeof(struct elem), 0);
+ if (err)
+ continue;
+ if (!ASSERT_EQ(strcmp(expected_data, value.data), 0, "map data")) {
+ fprintf(stderr, "expected '%s', found '%s' in %s map", expected_data,
+ value.data, bpf_map__name(map));
+ return 2;
+ }
+ processed_values++;
+ }
+
+ return processed_values == 0;
+}
+
+static void task_work_run(const char *prog_name, const char *map_name)
+{
+ struct task_work *skel;
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ struct bpf_link *link = NULL;
+ int err, pe_fd = -1, pid, status, pipefd[2];
+ char user_string[] = "hello world";
+
+ if (!ASSERT_NEQ(pipe(pipefd), -1, "pipe"))
+ return;
+
+ pid = fork();
+ if (pid == 0) {
+ __u64 num = 1;
+ int i;
+ char buf;
+
+ close(pipefd[1]);
+ read(pipefd[0], &buf, sizeof(buf));
+ close(pipefd[0]);
+
+ for (i = 0; i < 10000; ++i)
+ num *= time(0) % 7;
+ (void)num;
+ exit(0);
+ }
+ if (!ASSERT_GT(pid, 0, "fork() failed")) {
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return;
+ }
+
+ skel = task_work__open();
+ if (!ASSERT_OK_PTR(skel, "task_work__open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ bpf_program__set_autoload(prog, false);
+ }
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "prog_name"))
+ goto cleanup;
+ bpf_program__set_autoload(prog, true);
+ skel->bss->user_ptr = (char *)user_string;
+
+ err = task_work__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pe_fd = perf_event_open(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, pid);
+ if (pe_fd == -1 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_NEQ(pe_fd, -1, "pe_fd")) {
+ fprintf(stderr, "perf_event_open errno: %d, pid: %d\n", errno, pid);
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_perf_event(prog, pe_fd);
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+ goto cleanup;
+
+ /* perf event fd ownership is passed to bpf_link */
+ pe_fd = -1;
+ close(pipefd[0]);
+ write(pipefd[1], user_string, 1);
+ close(pipefd[1]);
+ /* Wait to collect some samples */
+ waitpid(pid, &status, 0);
+ pid = 0;
+ map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(map, "find map_name"))
+ goto cleanup;
+ if (!ASSERT_OK(verify_map(map, user_string), "verify map"))
+ goto cleanup;
+cleanup:
+ if (pe_fd >= 0)
+ close(pe_fd);
+ bpf_link__destroy(link);
+ task_work__destroy(skel);
+ if (pid > 0) {
+ close(pipefd[0]);
+ write(pipefd[1], user_string, 1);
+ close(pipefd[1]);
+ waitpid(pid, &status, 0);
+ }
+}
+
+void test_task_work(void)
+{
+ if (test__start_subtest("test_task_work_hash_map"))
+ task_work_run("oncpu_hash_map", "hmap");
+
+ if (test__start_subtest("test_task_work_array_map"))
+ task_work_run("oncpu_array_map", "arrmap");
+
+ if (test__start_subtest("test_task_work_lru_map"))
+ task_work_run("oncpu_lru_map", "lrumap");
+
+ RUN_TESTS(task_work_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
new file mode 100644
index 000000000000..462512fb191f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * BPF-based flow shaping
+ *
+ * The test brings up two veth in two isolated namespaces, attach some flow
+ * shaping program onto it, and ensures that a manual speedtest maximum
+ * value matches the rate set in the BPF shapers.
+ */
+
+#include <asm-generic/socket.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <bpf/libbpf.h>
+#include <pthread.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_edt.skel.h"
+
+#define SERVER_NS "tc-edt-server-ns"
+#define CLIENT_NS "tc-edt-client-ns"
+#define IP4_ADDR_VETH1 "192.168.1.1"
+#define IP4_ADDR_VETH2 "192.168.1.2"
+#define IP4_ADDR_VETH2_HEX 0xC0A80102
+
+#define TIMEOUT_MS 2000
+#define TEST_PORT 9000
+#define TARGET_RATE_MBPS 5.0
+#define TX_BYTES_COUNT (1 * 1000 * 1000)
+#define RATE_ERROR_PERCENT 2.0
+
+struct connection {
+ int server_listen_fd;
+ int server_conn_fd;
+ int client_conn_fd;
+};
+
+static int setup(struct test_tc_edt *skel)
+{
+ struct nstoken *nstoken_client, *nstoken_server;
+ int ret;
+
+ if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns"))
+ goto fail;
+ if (!ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
+ goto fail_delete_client_ns;
+
+ nstoken_client = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+ goto fail_delete_server_ns;
+ SYS(fail_close_client_ns, "ip link add veth1 type veth peer name %s",
+ "veth2 netns " SERVER_NS);
+ SYS(fail_close_client_ns, "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
+ SYS(fail_close_client_ns, "ip link set veth1 up");
+
+ nstoken_server = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken_server, "enter server ns"))
+ goto fail_close_client_ns;
+ SYS(fail_close_server_ns, "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
+ SYS(fail_close_server_ns, "ip link set veth2 up");
+ SYS(fail_close_server_ns, "tc qdisc add dev veth2 root fq");
+ ret = tc_prog_attach("veth2", -1, bpf_program__fd(skel->progs.tc_prog));
+ if (!ASSERT_OK(ret, "attach bpf prog"))
+ goto fail_close_server_ns;
+ skel->bss->target_rate = TARGET_RATE_MBPS * 1000 * 1000;
+ close_netns(nstoken_server);
+ close_netns(nstoken_client);
+
+ return 0;
+
+fail_close_server_ns:
+ close_netns(nstoken_server);
+fail_close_client_ns:
+ close_netns(nstoken_client);
+fail_delete_server_ns:
+ remove_netns(SERVER_NS);
+fail_delete_client_ns:
+ remove_netns(CLIENT_NS);
+fail:
+ return -1;
+}
+
+static void cleanup(void)
+{
+ remove_netns(CLIENT_NS);
+ remove_netns(SERVER_NS);
+}
+
+static void run_test(void)
+{
+ int server_fd, client_fd, err;
+ double rate_mbps, rate_error;
+ struct nstoken *nstoken;
+ __u64 ts_start, ts_end;
+
+ nstoken = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+ return;
+ server_fd = start_server(AF_INET, SOCK_STREAM, IP4_ADDR_VETH2,
+ TEST_PORT, TIMEOUT_MS);
+ if (!ASSERT_OK_FD(server_fd, "start server"))
+ return;
+
+ close_netns(nstoken);
+ nstoken = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open client ns"))
+ return;
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_OK_FD(client_fd, "connect client"))
+ return;
+
+ ts_start = get_time_ns();
+ err = send_recv_data(server_fd, client_fd, TX_BYTES_COUNT);
+ ts_end = get_time_ns();
+ close_netns(nstoken);
+ ASSERT_OK(err, "send_recv_data");
+
+ rate_mbps = TX_BYTES_COUNT / ((ts_end - ts_start) / 1000.0);
+ rate_error =
+ fabs((rate_mbps - TARGET_RATE_MBPS) * 100.0 / TARGET_RATE_MBPS);
+
+ ASSERT_LE(rate_error, RATE_ERROR_PERCENT,
+ "rate error is lower than threshold");
+}
+
+void test_tc_edt(void)
+{
+ struct test_tc_edt *skel;
+
+ skel = test_tc_edt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open and load"))
+ return;
+
+ if (!ASSERT_OK(setup(skel), "global setup"))
+ return;
+
+ run_test();
+
+ cleanup();
+ test_tc_edt__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
new file mode 100644
index 000000000000..0fe0a8f62486
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -0,0 +1,714 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * End-to-end eBPF tunnel test suite
+ * The file tests BPF network tunnels implementation. For each tunnel
+ * type, the test validates that:
+ * - basic communication can first be established between the two veths
+ * - when adding a BPF-based encapsulation on client egress, it now fails
+ * to communicate with the server
+ * - when adding a kernel-based decapsulation on server ingress, client
+ * can now connect
+ * - when replacing the kernel-based decapsulation with a BPF-based one,
+ * the client can still connect
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_tunnel.skel.h"
+
+#define SERVER_NS "tc-tunnel-server-ns"
+#define CLIENT_NS "tc-tunnel-client-ns"
+#define MAC_ADDR_VETH1 "00:11:22:33:44:55"
+#define IP4_ADDR_VETH1 "192.168.1.1"
+#define IP6_ADDR_VETH1 "fd::1"
+#define MAC_ADDR_VETH2 "66:77:88:99:AA:BB"
+#define IP4_ADDR_VETH2 "192.168.1.2"
+#define IP6_ADDR_VETH2 "fd::2"
+
+#define TEST_NAME_MAX_LEN 64
+#define PROG_NAME_MAX_LEN 64
+#define TUNNEL_ARGS_MAX_LEN 128
+#define BUFFER_LEN 2000
+#define DEFAULT_TEST_DATA_SIZE 100
+#define GSO_TEST_DATA_SIZE BUFFER_LEN
+
+#define TIMEOUT_MS 1000
+#define TEST_PORT 8000
+#define UDP_PORT 5555
+#define MPLS_UDP_PORT 6635
+#define FOU_MPLS_PROTO 137
+#define VXLAN_ID 1
+#define VXLAN_PORT 8472
+#define MPLS_TABLE_ENTRIES_COUNT 65536
+
+static char tx_buffer[BUFFER_LEN], rx_buffer[BUFFER_LEN];
+
+struct subtest_cfg {
+ char *ebpf_tun_type;
+ char *iproute_tun_type;
+ char *mac_tun_type;
+ int ipproto;
+ void (*extra_decap_mod_args_cb)(struct subtest_cfg *cfg, char *dst);
+ bool tunnel_need_veth_mac;
+ bool configure_fou_rx_port;
+ char *tmode;
+ bool expect_kern_decap_failure;
+ bool configure_mpls;
+ bool test_gso;
+ char *tunnel_client_addr;
+ char *tunnel_server_addr;
+ char name[TEST_NAME_MAX_LEN];
+ char *server_addr;
+ int client_egress_prog_fd;
+ int server_ingress_prog_fd;
+ char extra_decap_mod_args[TUNNEL_ARGS_MAX_LEN];
+ int server_fd;
+};
+
+struct connection {
+ int client_fd;
+ int server_fd;
+};
+
+static int build_subtest_name(struct subtest_cfg *cfg, char *dst, size_t size)
+{
+ int ret;
+
+ ret = snprintf(dst, size, "%s_%s", cfg->ebpf_tun_type,
+ cfg->mac_tun_type);
+
+ return ret < 0 ? ret : 0;
+}
+
+static int set_subtest_progs(struct subtest_cfg *cfg, struct test_tc_tunnel *skel)
+{
+ char prog_name[PROG_NAME_MAX_LEN];
+ struct bpf_program *prog;
+ int ret;
+
+ ret = snprintf(prog_name, PROG_NAME_MAX_LEN, "__encap_");
+ if (ret < 0)
+ return ret;
+ ret = build_subtest_name(cfg, prog_name + ret, PROG_NAME_MAX_LEN - ret);
+ if (ret < 0)
+ return ret;
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!prog)
+ return -1;
+
+ cfg->client_egress_prog_fd = bpf_program__fd(prog);
+ cfg->server_ingress_prog_fd = bpf_program__fd(skel->progs.decap_f);
+ return 0;
+}
+
+static void set_subtest_addresses(struct subtest_cfg *cfg)
+{
+ if (cfg->ipproto == 6)
+ cfg->server_addr = IP6_ADDR_VETH2;
+ else
+ cfg->server_addr = IP4_ADDR_VETH2;
+
+ /* Some specific tunnel types need specific addressing, it then
+ * has been already set in the configuration table. Otherwise,
+ * deduce the relevant addressing from the ipproto
+ */
+ if (cfg->tunnel_client_addr && cfg->tunnel_server_addr)
+ return;
+
+ if (cfg->ipproto == 6) {
+ cfg->tunnel_client_addr = IP6_ADDR_VETH1;
+ cfg->tunnel_server_addr = IP6_ADDR_VETH2;
+ } else {
+ cfg->tunnel_client_addr = IP4_ADDR_VETH1;
+ cfg->tunnel_server_addr = IP4_ADDR_VETH2;
+ }
+}
+
+static int run_server(struct subtest_cfg *cfg)
+{
+ int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
+ struct nstoken *nstoken;
+ struct network_helper_opts opts = {
+ .timeout_ms = TIMEOUT_MS
+ };
+
+ nstoken = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+ return -1;
+
+ cfg->server_fd = start_server_str(family, SOCK_STREAM, cfg->server_addr,
+ TEST_PORT, &opts);
+ close_netns(nstoken);
+ if (!ASSERT_OK_FD(cfg->server_fd, "start server"))
+ return -1;
+
+ return 0;
+}
+
+static int check_server_rx_data(struct subtest_cfg *cfg,
+ struct connection *conn, int len)
+{
+ int err;
+
+ memset(rx_buffer, 0, BUFFER_LEN);
+ err = recv(conn->server_fd, rx_buffer, len, 0);
+ if (!ASSERT_EQ(err, len, "check rx data len"))
+ return 1;
+ if (!ASSERT_MEMEQ(tx_buffer, rx_buffer, len, "check received data"))
+ return 1;
+ return 0;
+}
+
+static struct connection *connect_client_to_server(struct subtest_cfg *cfg)
+{
+ struct network_helper_opts opts = {.timeout_ms = 500};
+ int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
+ struct connection *conn = NULL;
+ int client_fd, server_fd;
+
+ conn = malloc(sizeof(struct connection));
+ if (!conn)
+ return conn;
+
+ client_fd = connect_to_addr_str(family, SOCK_STREAM, cfg->server_addr,
+ TEST_PORT, &opts);
+
+ if (client_fd < 0) {
+ free(conn);
+ return NULL;
+ }
+
+ server_fd = accept(cfg->server_fd, NULL, NULL);
+ if (server_fd < 0) {
+ close(client_fd);
+ free(conn);
+ return NULL;
+ }
+
+ conn->server_fd = server_fd;
+ conn->client_fd = client_fd;
+
+ return conn;
+}
+
+static void disconnect_client_from_server(struct subtest_cfg *cfg,
+ struct connection *conn)
+{
+ close(conn->server_fd);
+ close(conn->client_fd);
+ free(conn);
+}
+
+static int send_and_test_data(struct subtest_cfg *cfg, bool must_succeed)
+{
+ struct connection *conn;
+ int err, res = -1;
+
+ conn = connect_client_to_server(cfg);
+ if (!must_succeed && !ASSERT_ERR_PTR(conn, "connection that must fail"))
+ goto end;
+ else if (!must_succeed)
+ return 0;
+
+ if (!ASSERT_OK_PTR(conn, "connection that must succeed"))
+ return -1;
+
+ err = send(conn->client_fd, tx_buffer, DEFAULT_TEST_DATA_SIZE, 0);
+ if (!ASSERT_EQ(err, DEFAULT_TEST_DATA_SIZE, "send data from client"))
+ goto end;
+ if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
+ goto end;
+
+ if (!cfg->test_gso) {
+ res = 0;
+ goto end;
+ }
+
+ err = send(conn->client_fd, tx_buffer, GSO_TEST_DATA_SIZE, 0);
+ if (!ASSERT_EQ(err, GSO_TEST_DATA_SIZE, "send (large) data from client"))
+ goto end;
+ if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
+ goto end;
+
+ res = 0;
+end:
+ disconnect_client_from_server(cfg, conn);
+ return res;
+}
+
+static void vxlan_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
+{
+ snprintf(dst, TUNNEL_ARGS_MAX_LEN, "id %d dstport %d udp6zerocsumrx",
+ VXLAN_ID, VXLAN_PORT);
+}
+
+static void udp_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
+{
+ bool is_mpls = !strcmp(cfg->mac_tun_type, "mpls");
+
+ snprintf(dst, TUNNEL_ARGS_MAX_LEN,
+ "encap fou encap-sport auto encap-dport %d",
+ is_mpls ? MPLS_UDP_PORT : UDP_PORT);
+}
+
+static int configure_fou_rx_port(struct subtest_cfg *cfg, bool add)
+{
+ bool is_mpls = strcmp(cfg->mac_tun_type, "mpls") == 0;
+ int fou_proto;
+
+ if (is_mpls)
+ fou_proto = FOU_MPLS_PROTO;
+ else
+ fou_proto = cfg->ipproto == 6 ? 41 : 4;
+
+ SYS(fail, "ip fou %s port %d ipproto %d%s", add ? "add" : "del",
+ is_mpls ? MPLS_UDP_PORT : UDP_PORT, fou_proto,
+ cfg->ipproto == 6 ? " -6" : "");
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int add_fou_rx_port(struct subtest_cfg *cfg)
+{
+ return configure_fou_rx_port(cfg, true);
+}
+
+static int del_fou_rx_port(struct subtest_cfg *cfg)
+{
+ return configure_fou_rx_port(cfg, false);
+}
+
+static int update_tunnel_intf_addr(struct subtest_cfg *cfg)
+{
+ SYS(fail, "ip link set dev testtun0 address " MAC_ADDR_VETH2);
+ return 0;
+fail:
+ return -1;
+}
+
+static int configure_kernel_for_mpls(struct subtest_cfg *cfg)
+{
+ SYS(fail, "sysctl -qw net.mpls.platform_labels=%d",
+ MPLS_TABLE_ENTRIES_COUNT);
+ SYS(fail, "ip -f mpls route add 1000 dev lo");
+ SYS(fail, "ip link set lo up");
+ SYS(fail, "sysctl -qw net.mpls.conf.testtun0.input=1");
+ SYS(fail, "sysctl -qw net.ipv4.conf.lo.rp_filter=0");
+ return 0;
+fail:
+ return -1;
+}
+
+static int configure_encapsulation(struct subtest_cfg *cfg)
+{
+ int ret;
+
+ ret = tc_prog_attach("veth1", -1, cfg->client_egress_prog_fd);
+
+ return ret;
+}
+
+static int configure_kernel_decapsulation(struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken = open_netns(SERVER_NS);
+ int ret = -1;
+
+ if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+ return ret;
+
+ if (cfg->configure_fou_rx_port &&
+ !ASSERT_OK(add_fou_rx_port(cfg), "configure FOU RX port"))
+ goto fail;
+ SYS(fail, "ip link add name testtun0 type %s %s remote %s local %s %s",
+ cfg->iproute_tun_type, cfg->tmode ? cfg->tmode : "",
+ cfg->tunnel_client_addr, cfg->tunnel_server_addr,
+ cfg->extra_decap_mod_args);
+ if (cfg->tunnel_need_veth_mac &&
+ !ASSERT_OK(update_tunnel_intf_addr(cfg), "update testtun0 mac"))
+ goto fail;
+ if (cfg->configure_mpls &&
+ (!ASSERT_OK(configure_kernel_for_mpls(cfg),
+ "configure MPLS decap")))
+ goto fail;
+ SYS(fail, "sysctl -qw net.ipv4.conf.all.rp_filter=0");
+ SYS(fail, "sysctl -qw net.ipv4.conf.testtun0.rp_filter=0");
+ SYS(fail, "ip link set dev testtun0 up");
+
+ ret = 0;
+fail:
+ close_netns(nstoken);
+ return ret;
+}
+
+static void remove_kernel_decapsulation(struct subtest_cfg *cfg)
+{
+ SYS_NOFAIL("ip link del testtun0");
+ if (cfg->configure_mpls)
+ SYS_NOFAIL("ip -f mpls route del 1000 dev lo");
+ if (cfg->configure_fou_rx_port)
+ del_fou_rx_port(cfg);
+}
+
+static int configure_ebpf_decapsulation(struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken = open_netns(SERVER_NS);
+ int ret = -1;
+
+ if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+ return ret;
+
+ if (!cfg->expect_kern_decap_failure)
+ SYS(fail, "ip link del testtun0");
+
+ if (!ASSERT_OK(tc_prog_attach("veth2", cfg->server_ingress_prog_fd, -1),
+ "attach_program"))
+ goto fail;
+
+ ret = 0;
+fail:
+ close_netns(nstoken);
+ return ret;
+}
+
+static void run_test(struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken;
+
+ if (!ASSERT_OK(run_server(cfg), "run server"))
+ return;
+
+ nstoken = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open client ns"))
+ goto fail;
+
+ /* Basic communication must work */
+ if (!ASSERT_OK(send_and_test_data(cfg, true), "connect without any encap"))
+ goto fail;
+
+ /* Attach encapsulation program to client */
+ if (!ASSERT_OK(configure_encapsulation(cfg), "configure encapsulation"))
+ goto fail;
+
+ /* If supported, insert kernel decap module, connection must succeed */
+ if (!cfg->expect_kern_decap_failure) {
+ if (!ASSERT_OK(configure_kernel_decapsulation(cfg),
+ "configure kernel decapsulation"))
+ goto fail;
+ if (!ASSERT_OK(send_and_test_data(cfg, true),
+ "connect with encap prog and kern decap"))
+ goto fail;
+ }
+
+ /* Replace kernel decapsulation with BPF decapsulation, test must pass */
+ if (!ASSERT_OK(configure_ebpf_decapsulation(cfg), "configure ebpf decapsulation"))
+ goto fail;
+ ASSERT_OK(send_and_test_data(cfg, true), "connect with encap and decap progs");
+
+fail:
+ close_netns(nstoken);
+ close(cfg->server_fd);
+}
+
+static int setup(void)
+{
+ struct nstoken *nstoken_client, *nstoken_server;
+ int fd, err;
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (!ASSERT_OK_FD(fd, "open urandom"))
+ goto fail;
+ err = read(fd, tx_buffer, BUFFER_LEN);
+ close(fd);
+
+ if (!ASSERT_EQ(err, BUFFER_LEN, "read random bytes"))
+ goto fail;
+
+ /* Configure the testing network */
+ if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns") ||
+ !ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
+ goto fail;
+
+ nstoken_client = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+ goto fail_delete_ns;
+ SYS(fail_close_ns_client, "ip link add %s type veth peer name %s",
+ "veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1,
+ "veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2);
+ SYS(fail_close_ns_client, "ethtool -K veth1 tso off");
+ SYS(fail_close_ns_client, "ip link set veth1 up");
+ nstoken_server = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
+ goto fail_close_ns_client;
+ SYS(fail_close_ns_server, "ip link set veth2 up");
+
+ close_netns(nstoken_server);
+ close_netns(nstoken_client);
+ return 0;
+
+fail_close_ns_server:
+ close_netns(nstoken_server);
+fail_close_ns_client:
+ close_netns(nstoken_client);
+fail_delete_ns:
+ SYS_NOFAIL("ip netns del " CLIENT_NS);
+ SYS_NOFAIL("ip netns del " SERVER_NS);
+fail:
+ return -1;
+}
+
+static int subtest_setup(struct test_tc_tunnel *skel, struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken_client, *nstoken_server;
+ int ret = -1;
+
+ set_subtest_addresses(cfg);
+ if (!ASSERT_OK(set_subtest_progs(cfg, skel),
+ "find subtest progs"))
+ goto fail;
+ if (cfg->extra_decap_mod_args_cb)
+ cfg->extra_decap_mod_args_cb(cfg, cfg->extra_decap_mod_args);
+
+ nstoken_client = open_netns(CLIENT_NS);
+ if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+ goto fail;
+ SYS(fail_close_client_ns,
+ "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
+ SYS(fail_close_client_ns, "ip -4 route flush table main");
+ SYS(fail_close_client_ns,
+ "ip -4 route add " IP4_ADDR_VETH2 " mtu 1450 dev veth1");
+ SYS(fail_close_client_ns,
+ "ip -6 addr add " IP6_ADDR_VETH1 "/64 dev veth1 nodad");
+ SYS(fail_close_client_ns, "ip -6 route flush table main");
+ SYS(fail_close_client_ns,
+ "ip -6 route add " IP6_ADDR_VETH2 " mtu 1430 dev veth1");
+ nstoken_server = open_netns(SERVER_NS);
+ if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
+ goto fail_close_client_ns;
+ SYS(fail_close_server_ns,
+ "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
+ SYS(fail_close_server_ns,
+ "ip -6 addr add " IP6_ADDR_VETH2 "/64 dev veth2 nodad");
+
+ ret = 0;
+
+fail_close_server_ns:
+ close_netns(nstoken_server);
+fail_close_client_ns:
+ close_netns(nstoken_client);
+fail:
+ return ret;
+}
+
+
+static void subtest_cleanup(struct subtest_cfg *cfg)
+{
+ struct nstoken *nstoken;
+
+ nstoken = open_netns(CLIENT_NS);
+ if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
+ SYS_NOFAIL("tc qdisc delete dev veth1 parent ffff:fff1");
+ SYS_NOFAIL("ip a flush veth1");
+ close_netns(nstoken);
+ }
+ nstoken = open_netns(SERVER_NS);
+ if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
+ SYS_NOFAIL("tc qdisc delete dev veth2 parent ffff:fff1");
+ SYS_NOFAIL("ip a flush veth2");
+ if (!cfg->expect_kern_decap_failure)
+ remove_kernel_decapsulation(cfg);
+ close_netns(nstoken);
+ }
+}
+
+static void cleanup(void)
+{
+ remove_netns(CLIENT_NS);
+ remove_netns(SERVER_NS);
+}
+
+static struct subtest_cfg subtests_cfg[] = {
+ {
+ .ebpf_tun_type = "ipip",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ipip",
+ .ipproto = 4,
+ },
+ {
+ .ebpf_tun_type = "ipip6",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ip6tnl",
+ .ipproto = 4,
+ .tunnel_client_addr = IP6_ADDR_VETH1,
+ .tunnel_server_addr = IP6_ADDR_VETH2,
+ },
+ {
+ .ebpf_tun_type = "ip6tnl",
+ .iproute_tun_type = "ip6tnl",
+ .mac_tun_type = "none",
+ .ipproto = 6,
+ },
+ {
+ .mac_tun_type = "none",
+ .ebpf_tun_type = "sit",
+ .iproute_tun_type = "sit",
+ .ipproto = 6,
+ .tunnel_client_addr = IP4_ADDR_VETH1,
+ .tunnel_server_addr = IP4_ADDR_VETH2,
+ },
+ {
+ .ebpf_tun_type = "vxlan",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "vxlan",
+ .ipproto = 4,
+ .extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
+ .tunnel_need_veth_mac = true
+ },
+ {
+ .ebpf_tun_type = "ip6vxlan",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "vxlan",
+ .ipproto = 6,
+ .extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
+ .tunnel_need_veth_mac = true
+ },
+ {
+ .ebpf_tun_type = "gre",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "gre",
+ .ipproto = 4,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "gre",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "gretap",
+ .ipproto = 4,
+ .tunnel_need_veth_mac = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "gre",
+ .mac_tun_type = "mpls",
+ .iproute_tun_type = "gre",
+ .ipproto = 4,
+ .configure_mpls = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6gre",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ip6gre",
+ .ipproto = 6,
+ .test_gso = true,
+ },
+ {
+ .ebpf_tun_type = "ip6gre",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "ip6gretap",
+ .ipproto = 6,
+ .tunnel_need_veth_mac = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6gre",
+ .mac_tun_type = "mpls",
+ .iproute_tun_type = "ip6gre",
+ .ipproto = 6,
+ .configure_mpls = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "udp",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ipip",
+ .ipproto = 4,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "udp",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "ipip",
+ .ipproto = 4,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .expect_kern_decap_failure = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "udp",
+ .mac_tun_type = "mpls",
+ .iproute_tun_type = "ipip",
+ .ipproto = 4,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .tmode = "mode any ttl 255",
+ .configure_mpls = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6udp",
+ .mac_tun_type = "none",
+ .iproute_tun_type = "ip6tnl",
+ .ipproto = 6,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6udp",
+ .mac_tun_type = "eth",
+ .iproute_tun_type = "ip6tnl",
+ .ipproto = 6,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .expect_kern_decap_failure = true,
+ .test_gso = true
+ },
+ {
+ .ebpf_tun_type = "ip6udp",
+ .mac_tun_type = "mpls",
+ .iproute_tun_type = "ip6tnl",
+ .ipproto = 6,
+ .extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+ .configure_fou_rx_port = true,
+ .tmode = "mode any ttl 255",
+ .expect_kern_decap_failure = true,
+ .test_gso = true
+ },
+};
+
+void test_tc_tunnel(void)
+{
+ struct test_tc_tunnel *skel;
+ struct subtest_cfg *cfg;
+ int i, ret;
+
+ skel = test_tc_tunnel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open and load"))
+ return;
+
+ if (!ASSERT_OK(setup(), "global setup"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) {
+ cfg = &subtests_cfg[i];
+ ret = build_subtest_name(cfg, cfg->name, TEST_NAME_MAX_LEN);
+ if (ret < 0 || !test__start_subtest(cfg->name))
+ continue;
+ if (subtest_setup(skel, cfg) == 0)
+ run_test(cfg);
+ subtest_cleanup(cfg);
+ }
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index cec746e77cd3..eb9309931272 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -71,6 +71,8 @@
#define IP4_ADDR2_VETH1 "172.16.1.20"
#define IP4_ADDR_TUNL_DEV0 "10.1.1.100"
#define IP4_ADDR_TUNL_DEV1 "10.1.1.200"
+#define IP6_ADDR_TUNL_DEV0 "fc80::100"
+#define IP6_ADDR_TUNL_DEV1 "fc80::200"
#define IP6_ADDR_VETH0 "::11"
#define IP6_ADDR1_VETH1 "::22"
@@ -98,6 +100,27 @@
#define XFRM_SPI_IN_TO_OUT 0x1
#define XFRM_SPI_OUT_TO_IN 0x2
+#define GRE_TUNL_DEV0 "gre00"
+#define GRE_TUNL_DEV1 "gre11"
+
+#define IP6GRE_TUNL_DEV0 "ip6gre00"
+#define IP6GRE_TUNL_DEV1 "ip6gre11"
+
+#define ERSPAN_TUNL_DEV0 "erspan00"
+#define ERSPAN_TUNL_DEV1 "erspan11"
+
+#define IP6ERSPAN_TUNL_DEV0 "ip6erspan00"
+#define IP6ERSPAN_TUNL_DEV1 "ip6erspan11"
+
+#define GENEVE_TUNL_DEV0 "geneve00"
+#define GENEVE_TUNL_DEV1 "geneve11"
+
+#define IP6GENEVE_TUNL_DEV0 "ip6geneve00"
+#define IP6GENEVE_TUNL_DEV1 "ip6geneve11"
+
+#define IP6TNL_TUNL_DEV0 "ip6tnl00"
+#define IP6TNL_TUNL_DEV1 "ip6tnl11"
+
#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
static int config_device(void)
@@ -216,6 +239,18 @@ fail:
return -1;
}
+static int set_ipv4_addr(const char *dev0, const char *dev1)
+{
+ SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip link set dev %s up", dev1);
+ SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return 1;
+}
+
static int add_ipip_tunnel(enum ipip_encap encap)
{
int err;
@@ -356,6 +391,99 @@ static void delete_xfrm_tunnel(void)
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
}
+static int add_ipv4_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+ dev0, type, opt, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+ return set_ipv4_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static void delete_tunnel(const char *dev0, const char *dev1)
+{
+ if (!dev0 || !dev1)
+ return;
+
+ SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", dev0);
+ SYS_NOFAIL("ip link delete dev %s", dev1);
+}
+
+static int set_ipv6_addr(const char *dev0, const char *dev1)
+{
+ /* disable IPv6 DAD because it might take too long and fail tests */
+ SYS(fail, "ip -n at_ns0 addr add %s/96 dev veth0 nodad", IP6_ADDR_VETH0);
+ SYS(fail, "ip -n at_ns0 link set dev veth0 up");
+ SYS(fail, "ip addr add %s/96 dev veth1 nodad", IP6_ADDR1_VETH1);
+ SYS(fail, "ip link set dev veth1 up");
+
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/96 nodad", dev0, IP6_ADDR_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+
+ SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/96 nodad", dev1, IP6_ADDR_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s up", dev1);
+ return 0;
+fail:
+ return 1;
+}
+
+static int add_ipv6_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+ dev0, type, opt, IP6_ADDR_VETH0, IP6_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+ return set_ipv6_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static int add_geneve_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s id 2 %s remote %s",
+ dev0, type, opt, IP4_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+ return set_ipv4_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static int add_ip6geneve_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s id 22 %s remote %s",
+ dev0, type, opt, IP6_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+ return set_ipv6_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
static int test_ping(int family, const char *addr)
{
SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
@@ -364,37 +492,46 @@ fail:
return -1;
}
-static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+static void ping_dev0(void)
{
- DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
- .priority = 1, .prog_fd = igr_fd);
- DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
- .priority = 1, .prog_fd = egr_fd);
- int ret;
+ /* ping from root namespace test */
+ test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+}
- ret = bpf_tc_hook_create(hook);
- if (!ASSERT_OK(ret, "create tc hook"))
- return ret;
+static void ping_dev1(void)
+{
+ struct nstoken *nstoken;
- if (igr_fd >= 0) {
- hook->attach_point = BPF_TC_INGRESS;
- ret = bpf_tc_attach(hook, &opts1);
- if (!ASSERT_OK(ret, "bpf_tc_attach")) {
- bpf_tc_hook_destroy(hook);
- return ret;
- }
- }
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ return;
- if (egr_fd >= 0) {
- hook->attach_point = BPF_TC_EGRESS;
- ret = bpf_tc_attach(hook, &opts2);
- if (!ASSERT_OK(ret, "bpf_tc_attach")) {
- bpf_tc_hook_destroy(hook);
- return ret;
- }
- }
+ test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
+}
- return 0;
+static void ping6_veth0(void)
+{
+ test_ping(AF_INET6, IP6_ADDR_VETH0);
+}
+
+static void ping6_dev0(void)
+{
+ test_ping(AF_INET6, IP6_ADDR_TUNL_DEV0);
+}
+
+static void ping6_dev1(void)
+{
+ struct nstoken *nstoken;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ return;
+
+ test_ping(AF_INET, IP6_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
}
static void test_vxlan_tunnel(void)
@@ -404,11 +541,9 @@ static void test_vxlan_tunnel(void)
int local_ip_map_fd = -1;
int set_src_prog_fd, get_src_prog_fd;
int set_dst_prog_fd;
- int key = 0, ifindex = -1;
+ int key = 0;
uint local_ip;
int err;
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
/* add vxlan tunnel */
err = add_vxlan_tunnel();
@@ -419,42 +554,22 @@ static void test_vxlan_tunnel(void)
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex(VXLAN_TUNL_DEV1);
- if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
- if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ if (tc_prog_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
/* load and attach bpf prog to veth dev tc hook point */
- ifindex = if_nametoindex("veth1");
- if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
- if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1))
+ if (tc_prog_attach("veth1", set_dst_prog_fd, -1))
goto done;
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
nstoken = open_netns("at_ns0");
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
- ifindex = if_nametoindex(VXLAN_TUNL_DEV0);
- if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
- if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+ if (tc_prog_attach(VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
goto done;
close_netns(nstoken);
@@ -468,9 +583,7 @@ static void test_vxlan_tunnel(void)
goto done;
/* ping test */
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
+ ping_dev0();
done:
/* delete vxlan tunnel */
@@ -488,11 +601,9 @@ static void test_ip6vxlan_tunnel(void)
int local_ip_map_fd = -1;
int set_src_prog_fd, get_src_prog_fd;
int set_dst_prog_fd;
- int key = 0, ifindex = -1;
+ int key = 0;
uint local_ip;
int err;
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
/* add vxlan tunnel */
err = add_ip6vxlan_tunnel();
@@ -503,31 +614,17 @@ static void test_ip6vxlan_tunnel(void)
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1);
- if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
- if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ if (tc_prog_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
nstoken = open_netns("at_ns0");
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
- ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0);
- if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
- if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+ if (tc_prog_attach(IP6VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
goto done;
close_netns(nstoken);
@@ -541,9 +638,7 @@ static void test_ip6vxlan_tunnel(void)
goto done;
/* ping test */
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
+ ping_dev0();
done:
/* delete ipv6 vxlan tunnel */
@@ -557,12 +652,8 @@ done:
static void test_ipip_tunnel(enum ipip_encap encap)
{
struct test_tunnel_kern *skel = NULL;
- struct nstoken *nstoken;
int set_src_prog_fd, get_src_prog_fd;
- int ifindex = -1;
int err;
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
/* add ipip tunnel */
err = add_ipip_tunnel(encap);
@@ -573,10 +664,6 @@ static void test_ipip_tunnel(enum ipip_encap encap)
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex(IPIP_TUNL_DEV1);
- if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
switch (encap) {
case FOU:
@@ -598,26 +685,11 @@ static void test_ipip_tunnel(enum ipip_encap encap)
skel->progs.ipip_set_tunnel);
}
- if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
- goto done;
-
- /* ping from root namespace test */
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
- if (!ASSERT_OK(err, "test_ping"))
+ if (tc_prog_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
- /* ping from at_ns0 namespace test */
- nstoken = open_netns("at_ns0");
- if (!ASSERT_OK_PTR(nstoken, "setns"))
- goto done;
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
- close_netns(nstoken);
+ ping_dev0();
+ ping_dev1();
done:
/* delete ipip tunnel */
@@ -628,11 +700,8 @@ done:
static void test_xfrm_tunnel(void)
{
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
struct test_tunnel_kern *skel = NULL;
- struct nstoken *nstoken;
int xdp_prog_fd;
int tc_prog_fd;
int ifindex;
@@ -646,19 +715,16 @@ static void test_xfrm_tunnel(void)
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex("veth1");
- if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
- goto done;
/* attach tc prog to tunnel dev */
- tc_hook.ifindex = ifindex;
tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
- if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
+ if (tc_prog_attach("veth1", tc_prog_fd, -1))
goto done;
/* attach xdp prog to tunnel dev */
+ ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+ goto done;
xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
goto done;
@@ -666,14 +732,7 @@ static void test_xfrm_tunnel(void)
if (!ASSERT_OK(err, "bpf_xdp_attach"))
goto done;
- /* ping from at_ns0 namespace test */
- nstoken = open_netns("at_ns0");
- if (!ASSERT_OK_PTR(nstoken, "setns"))
- goto done;
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
- close_netns(nstoken);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
+ ping_dev1();
if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
goto done;
@@ -690,6 +749,281 @@ done:
test_tunnel_kern__destroy(skel);
}
+enum gre_test {
+ GRE,
+ GRE_NOKEY,
+ GRETAP,
+ GRETAP_NOKEY,
+};
+
+static void test_gre_tunnel(enum gre_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case GRE:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRE_NOKEY:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq key 2");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRETAP:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRETAP_NOKEY:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq key 2");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ if (tc_prog_attach(GRE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+
+done:
+ delete_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum ip6gre_test {
+ IP6GRE,
+ IP6GRETAP
+};
+
+static void test_ip6gre_tunnel(enum ip6gre_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case IP6GRE:
+ err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+ "ip6gre", "flowlabel 0xbcdef key 2");
+ break;
+ case IP6GRETAP:
+ err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+ "ip6gretap", "flowlabel 0xbcdef key 2");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel);
+ if (tc_prog_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ ping6_dev1();
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum erspan_test {
+ V1,
+ V2
+};
+
+static void test_erspan_tunnel(enum erspan_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case V1:
+ err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+ "erspan", "seq key 2 erspan_ver 1 erspan 123");
+ break;
+ case V2:
+ err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+ "erspan",
+ "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 3");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel);
+ if (tc_prog_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6erspan_tunnel(enum erspan_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case V1:
+ err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+ "ip6erspan", "seq key 2 erspan_ver 1 erspan 123");
+ break;
+ case V2:
+ err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+ "ip6erspan",
+ "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 7");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel);
+ if (tc_prog_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_geneve_tunnel(void)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_geneve_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1,
+ "geneve", "dstport 6081");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel);
+ if (tc_prog_attach(GENEVE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6geneve_tunnel(void)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_ip6geneve_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1,
+ "geneve", "");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel);
+ if (tc_prog_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum ip6tnl_test {
+ IPIP6,
+ IP6IP6
+};
+
+static void test_ip6tnl_tunnel(enum ip6tnl_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_ipv6_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1, "ip6tnl", "");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ switch (test) {
+ case IPIP6:
+ set_fd = bpf_program__fd(skel->progs.ipip6_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ipip6_get_tunnel);
+ break;
+ case IP6IP6:
+ set_fd = bpf_program__fd(skel->progs.ip6ip6_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel);
+ break;
+ }
+ if (tc_prog_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ switch (test) {
+ case IPIP6:
+ ping_dev0();
+ ping_dev1();
+ break;
+ case IP6IP6:
+ ping6_dev0();
+ ping6_dev1();
+ break;
+ }
+
+done:
+ delete_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
#define RUN_TEST(name, ...) \
({ \
if (test__start_subtest(#name)) { \
@@ -707,6 +1041,20 @@ static void *test_tunnel_run_tests(void *arg)
RUN_TEST(ipip_tunnel, FOU);
RUN_TEST(ipip_tunnel, GUE);
RUN_TEST(xfrm_tunnel);
+ RUN_TEST(gre_tunnel, GRE);
+ RUN_TEST(gre_tunnel, GRE_NOKEY);
+ RUN_TEST(gre_tunnel, GRETAP);
+ RUN_TEST(gre_tunnel, GRETAP_NOKEY);
+ RUN_TEST(ip6gre_tunnel, IP6GRE);
+ RUN_TEST(ip6gre_tunnel, IP6GRETAP);
+ RUN_TEST(erspan_tunnel, V1);
+ RUN_TEST(erspan_tunnel, V2);
+ RUN_TEST(ip6erspan_tunnel, V1);
+ RUN_TEST(ip6erspan_tunnel, V2);
+ RUN_TEST(geneve_tunnel);
+ RUN_TEST(ip6geneve_tunnel);
+ RUN_TEST(ip6tnl_tunnel, IPIP6);
+ RUN_TEST(ip6tnl_tunnel, IP6IP6);
return NULL;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
new file mode 100644
index 000000000000..b38c16b4247f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+
+#define __CHECK_STR(str, name) \
+ do { \
+ if (!ASSERT_HAS_SUBSTR(fix->output, (str), (name))) \
+ goto out; \
+ } while (0)
+
+struct fixture {
+ char tmpfile[80];
+ int fd;
+ char *output;
+ size_t sz;
+ char veristat[80];
+};
+
+static struct fixture *init_fixture(void)
+{
+ struct fixture *fix = malloc(sizeof(struct fixture));
+
+ /* for no_alu32 and cpuv4 veristat is in parent folder */
+ if (access("./veristat", F_OK) == 0)
+ strcpy(fix->veristat, "./veristat");
+ else if (access("../veristat", F_OK) == 0)
+ strcpy(fix->veristat, "../veristat");
+ else
+ PRINT_FAIL("Can't find veristat binary");
+
+ snprintf(fix->tmpfile, sizeof(fix->tmpfile), "/tmp/test_veristat.XXXXXX");
+ fix->fd = mkstemp(fix->tmpfile);
+ fix->sz = 1000000;
+ fix->output = malloc(fix->sz);
+ return fix;
+}
+
+static void teardown_fixture(struct fixture *fix)
+{
+ free(fix->output);
+ close(fix->fd);
+ remove(fix->tmpfile);
+ free(fix);
+}
+
+static void test_set_global_vars_succeeds(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS(out,
+ "%s set_global_vars.bpf.o"\
+ " -G \"var_s64 = 0xf000000000000001\" "\
+ " -G \"var_u64 = 0xfedcba9876543210\" "\
+ " -G \"var_s32 = -0x80000000\" "\
+ " -G \"var_u32 = 0x76543210\" "\
+ " -G \"var_s16 = -32768\" "\
+ " -G \"var_u16 = 60652\" "\
+ " -G \"var_s8 = -128\" "\
+ " -G \"var_u8 = 255\" "\
+ " -G \"var_ea = EA2\" "\
+ " -G \"var_eb = EB2\" "\
+ " -G \"var_ec=EC2\" "\
+ " -G \"var_b = 1\" "\
+ " -G \"struct1[2].struct2[1][2].u.var_u8[2]=170\" "\
+ " -G \"union1.struct3.var_u8_l = 0xaa\" "\
+ " -G \"union1.struct3.var_u8_h = 0xaa\" "\
+ " -G \"arr[3]= 171\" " \
+ " -G \"arr[EA2] =172\" " \
+ " -G \"enum_arr[EC2]=EA3\" " \
+ " -G \"three_d[31][7][EA2]=173\"" \
+ " -G \"struct1[2].struct2[1][2].u.mat[5][3]=174\" " \
+ " -G \"struct11 [ 7 ] [ 5 ] .struct2[0][1].u.mat[3][0] = 175\" " \
+ " -vl2 > %s", fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
+ __CHECK_STR("=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210");
+ __CHECK_STR("=0x80000000 ", "var_s32 = -0x80000000");
+ __CHECK_STR("=0x76543210 ", "var_u32 = 0x76543210");
+ __CHECK_STR("=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("=0xecec ", "var_u16 = 60652");
+ __CHECK_STR("=128 ", "var_s8 = -128");
+ __CHECK_STR("=255 ", "var_u8 = 255");
+ __CHECK_STR("=11 ", "var_ea = EA2");
+ __CHECK_STR("=12 ", "var_eb = EB2");
+ __CHECK_STR("=13 ", "var_ec = EC2");
+ __CHECK_STR("=1 ", "var_b = 1");
+ __CHECK_STR("=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170");
+ __CHECK_STR("=0xaaaa ", "union1.var_u16 = 0xaaaa");
+ __CHECK_STR("=171 ", "arr[3]= 171");
+ __CHECK_STR("=172 ", "arr[EA2] =172");
+ __CHECK_STR("=10 ", "enum_arr[EC2]=EA3");
+ __CHECK_STR("=173 ", "matrix[31][7][11]=173");
+ __CHECK_STR("=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174");
+ __CHECK_STR("=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_set_global_vars_from_file_succeeds(void)
+{
+ struct fixture *fix = init_fixture();
+ char input_file[80];
+ const char *vars = "var_s16 = -32768\nvar_u16 = 60652";
+ int fd;
+
+ snprintf(input_file, sizeof(input_file), "/tmp/veristat_input.XXXXXX");
+ fd = mkstemp(input_file);
+ if (!ASSERT_GE(fd, 0, "valid fd"))
+ goto out;
+
+ write(fd, vars, strlen(vars));
+ syncfs(fd);
+ SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s",
+ fix->veristat, input_file, fix->tmpfile);
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("=0xecec ", "var_u16 = 60652");
+
+out:
+ close(fd);
+ remove(input_file);
+ teardown_fixture(fix);
+}
+
+static void test_set_global_vars_out_of_range(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"var_s32 = 2147483648\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("is out of range [-2147483648; 2147483647]", "out of range");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_unsupported_ptr_array_type(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"ptr_arr[0] = 0\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("Can't set ptr_arr[0]. Only ints and enums are supported", "ptr_arr");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_array_out_of_bounds(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"arr[99] = 0\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("Array index 99 is out of bounds", "arr[99]");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_array_index_not_found(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"arr[EG2] = 0\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("Can't resolve enum value EG2", "arr[EG2]");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_array_index_for_non_array(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"var_b[0] = 1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Array index is not expected for var_b", "var_b[0] = 1");
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"union1.struct3[0].var_u8_l=1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Array index is not expected for struct3", "union1.struct3[0].var_u8_l=1");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_no_array_index_for_array(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"arr = 1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Can't set arr. Only ints and enums are supported", "arr = 1");
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"struct1[0].struct2.u.var_u8[2]=1\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ pread(fix->fd, fix->output, fix->sz, 0);
+ __CHECK_STR("Can't resolve field u for non-composite type", "struct1[0].struct2.u.var_u8[2]=1");
+
+out:
+ teardown_fixture(fix);
+}
+
+void test_veristat(void)
+{
+ if (test__start_subtest("set_global_vars_succeeds"))
+ test_set_global_vars_succeeds();
+
+ if (test__start_subtest("set_global_vars_out_of_range"))
+ test_set_global_vars_out_of_range();
+
+ if (test__start_subtest("set_global_vars_from_file_succeeds"))
+ test_set_global_vars_from_file_succeeds();
+
+ if (test__start_subtest("test_unsupported_ptr_array_type"))
+ test_unsupported_ptr_array_type();
+
+ if (test__start_subtest("test_array_out_of_bounds"))
+ test_array_out_of_bounds();
+
+ if (test__start_subtest("test_array_index_not_found"))
+ test_array_index_not_found();
+
+ if (test__start_subtest("test_array_index_for_non_array"))
+ test_array_index_for_non_array();
+
+ if (test__start_subtest("test_no_array_index_for_array"))
+ test_no_array_index_for_array();
+
+}
+
+#undef __CHECK_STR
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
index 8d75424fe6bc..3e98a1665936 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
@@ -3,17 +3,50 @@
/* Create 3 namespaces with 3 veth peers, and forward packets in-between using
* native XDP
*
- * XDP_TX
- * NS1(veth11) NS2(veth22) NS3(veth33)
- * | | |
- * | | |
- * (veth1, (veth2, (veth3,
- * id:111) id:122) id:133)
- * ^ | ^ | ^ |
- * | | XDP_REDIRECT | | XDP_REDIRECT | |
- * | ------------------ ------------------ |
- * -----------------------------------------
- * XDP_REDIRECT
+ * Network topology:
+ * ---------- ---------- ----------
+ * | NS1 | | NS2 | | NS3 |
+ * | veth11 | | veth22 | | veth33 |
+ * ----|----- -----|---- -----|----
+ * | | |
+ * ----|------------------|----------------|----
+ * | veth1 veth2 veth3 |
+ * | |
+ * | NSO |
+ * ---------------------------------------------
+ *
+ * Test cases:
+ * - [test_xdp_veth_redirect] : ping veth33 from veth11
+ *
+ * veth11 veth22 veth33
+ * (XDP_PASS) (XDP_TX) (XDP_PASS)
+ * | | |
+ * | | |
+ * veth1 veth2 veth3
+ * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT)
+ * ^ | ^ | ^ |
+ * | | | | | |
+ * | ------------------ ------------------ |
+ * -----------------------------------------
+ *
+ * - [test_xdp_veth_broadcast_redirect]: broadcast from veth11
+ * - IPv4 ping : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS
+ * -> echo request received by all except veth11
+ * - IPv4 ping : BPF_F_BROADCAST
+ * -> echo request received by all veth
+ * - [test_xdp_veth_egress]:
+ * - all src mac should be the magic mac
+ *
+ * veth11 veth22 veth33
+ * (XDP_PASS) (XDP_PASS) (XDP_PASS)
+ * | | |
+ * | | |
+ * veth1 veth2 veth3
+ * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT)
+ * | ^ ^
+ * | | |
+ * ----------------------------------------
+ *
*/
#define _GNU_SOURCE
@@ -22,192 +55,545 @@
#include "network_helpers.h"
#include "xdp_dummy.skel.h"
#include "xdp_redirect_map.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
#include "xdp_tx.skel.h"
+#include <uapi/linux/if_link.h>
#define VETH_PAIRS_COUNT 3
-#define NS_SUFFIX_LEN 6
-#define VETH_NAME_MAX_LEN 16
+#define VETH_NAME_MAX_LEN 32
+#define IP_MAX_LEN 16
#define IP_SRC "10.1.1.11"
#define IP_DST "10.1.1.33"
-#define IP_CMD_MAX_LEN 128
-
-struct skeletons {
- struct xdp_dummy *xdp_dummy;
- struct xdp_tx *xdp_tx;
- struct xdp_redirect_map *xdp_redirect_maps;
-};
+#define IP_NEIGH "10.1.1.253"
+#define PROG_NAME_MAX_LEN 128
+#define NS_NAME_MAX_LEN 32
struct veth_configuration {
char local_veth[VETH_NAME_MAX_LEN]; /* Interface in main namespace */
char remote_veth[VETH_NAME_MAX_LEN]; /* Peer interface in dedicated namespace*/
- const char *namespace; /* Namespace for the remote veth */
- char next_veth[VETH_NAME_MAX_LEN]; /* Local interface to redirect traffic to */
- char *remote_addr; /* IP address of the remote veth */
+ char namespace[NS_NAME_MAX_LEN]; /* Namespace for the remote veth */
+ int next_veth; /* Local interface to redirect traffic to */
+ char remote_addr[IP_MAX_LEN]; /* IP address of the remote veth */
};
-static struct veth_configuration config[VETH_PAIRS_COUNT] = {
- {
- .local_veth = "veth1",
- .remote_veth = "veth11",
- .next_veth = "veth2",
- .remote_addr = IP_SRC,
- .namespace = "ns-veth11"
- },
- {
- .local_veth = "veth2",
- .remote_veth = "veth22",
- .next_veth = "veth3",
- .remote_addr = NULL,
- .namespace = "ns-veth22"
- },
+struct net_configuration {
+ char ns0_name[NS_NAME_MAX_LEN];
+ struct veth_configuration veth_cfg[VETH_PAIRS_COUNT];
+};
+
+static const struct net_configuration default_config = {
+ .ns0_name = "ns0-",
{
- .local_veth = "veth3",
- .remote_veth = "veth33",
- .next_veth = "veth1",
- .remote_addr = IP_DST,
- .namespace = "ns-veth33"
+ {
+ .local_veth = "veth1-",
+ .remote_veth = "veth11",
+ .next_veth = 1,
+ .remote_addr = IP_SRC,
+ .namespace = "ns-veth11-"
+ },
+ {
+ .local_veth = "veth2-",
+ .remote_veth = "veth22",
+ .next_veth = 2,
+ .remote_addr = "",
+ .namespace = "ns-veth22-"
+ },
+ {
+ .local_veth = "veth3-",
+ .remote_veth = "veth33",
+ .next_veth = 0,
+ .remote_addr = IP_DST,
+ .namespace = "ns-veth33-"
+ }
}
};
-static int attach_programs_to_veth_pair(struct skeletons *skeletons, int index)
+struct prog_configuration {
+ char local_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to local_veth */
+ char remote_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to remote_veth */
+ u32 local_flags; /* XDP flags to use on local_veth */
+ u32 remote_flags; /* XDP flags to use on remote_veth */
+};
+
+static int attach_programs_to_veth_pair(struct bpf_object **objs, size_t nb_obj,
+ struct net_configuration *net_config,
+ struct prog_configuration *prog, int index)
{
struct bpf_program *local_prog, *remote_prog;
- struct bpf_link **local_link, **remote_link;
struct nstoken *nstoken;
- struct bpf_link *link;
- int interface;
-
- switch (index) {
- case 0:
- local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_0;
- local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_0;
- remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog;
- remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog;
- break;
- case 1:
- local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_1;
- local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_1;
- remote_prog = skeletons->xdp_tx->progs.xdp_tx;
- remote_link = &skeletons->xdp_tx->links.xdp_tx;
- break;
- case 2:
- local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_2;
- local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_2;
- remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog;
- remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog;
- break;
+ int interface, ret, i;
+
+ for (i = 0; i < nb_obj; i++) {
+ local_prog = bpf_object__find_program_by_name(objs[i], prog[index].local_name);
+ if (local_prog)
+ break;
}
- interface = if_nametoindex(config[index].local_veth);
+ if (!ASSERT_OK_PTR(local_prog, "find local program"))
+ return -1;
+
+ for (i = 0; i < nb_obj; i++) {
+ remote_prog = bpf_object__find_program_by_name(objs[i], prog[index].remote_name);
+ if (remote_prog)
+ break;
+ }
+ if (!ASSERT_OK_PTR(remote_prog, "find remote program"))
+ return -1;
+
+ interface = if_nametoindex(net_config->veth_cfg[index].local_veth);
if (!ASSERT_NEQ(interface, 0, "non zero interface index"))
return -1;
- link = bpf_program__attach_xdp(local_prog, interface);
- if (!ASSERT_OK_PTR(link, "attach xdp program to local veth"))
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(local_prog),
+ prog[index].local_flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp program to local veth"))
return -1;
- *local_link = link;
- nstoken = open_netns(config[index].namespace);
+
+ nstoken = open_netns(net_config->veth_cfg[index].namespace);
if (!ASSERT_OK_PTR(nstoken, "switch to remote veth namespace"))
return -1;
- interface = if_nametoindex(config[index].remote_veth);
+
+ interface = if_nametoindex(net_config->veth_cfg[index].remote_veth);
if (!ASSERT_NEQ(interface, 0, "non zero interface index")) {
close_netns(nstoken);
return -1;
}
- link = bpf_program__attach_xdp(remote_prog, interface);
- *remote_link = link;
- close_netns(nstoken);
- if (!ASSERT_OK_PTR(link, "attach xdp program to remote veth"))
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(remote_prog),
+ prog[index].remote_flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp program to remote veth")) {
+ close_netns(nstoken);
return -1;
+ }
+ close_netns(nstoken);
return 0;
}
-static int configure_network(struct skeletons *skeletons)
+static int create_network(struct net_configuration *net_config)
{
- int interface_id;
- int map_fd;
- int err;
- int i = 0;
+ struct nstoken *nstoken = NULL;
+ int i, err;
+
+ memcpy(net_config, &default_config, sizeof(struct net_configuration));
+
+ /* Create unique namespaces */
+ err = append_tid(net_config->ns0_name, NS_NAME_MAX_LEN);
+ if (!ASSERT_OK(err, "append TID to ns0 name"))
+ goto fail;
+ SYS(fail, "ip netns add %s", net_config->ns0_name);
- /* First create and configure all interfaces */
for (i = 0; i < VETH_PAIRS_COUNT; i++) {
- SYS(fail, "ip netns add %s", config[i].namespace);
- SYS(fail, "ip link add %s type veth peer name %s netns %s",
- config[i].local_veth, config[i].remote_veth, config[i].namespace);
- SYS(fail, "ip link set dev %s up", config[i].local_veth);
- if (config[i].remote_addr)
- SYS(fail, "ip -n %s addr add %s/24 dev %s", config[i].namespace,
- config[i].remote_addr, config[i].remote_veth);
- SYS(fail, "ip -n %s link set dev %s up", config[i].namespace,
- config[i].remote_veth);
+ err = append_tid(net_config->veth_cfg[i].namespace, NS_NAME_MAX_LEN);
+ if (!ASSERT_OK(err, "append TID to ns name"))
+ goto fail;
+ SYS(fail, "ip netns add %s", net_config->veth_cfg[i].namespace);
}
- /* Then configure the redirect map and attach programs to interfaces */
- map_fd = bpf_map__fd(skeletons->xdp_redirect_maps->maps.tx_port);
- if (!ASSERT_GE(map_fd, 0, "open redirect map"))
+ /* Create interfaces */
+ nstoken = open_netns(net_config->ns0_name);
+ if (!nstoken)
goto fail;
+
for (i = 0; i < VETH_PAIRS_COUNT; i++) {
- interface_id = if_nametoindex(config[i].next_veth);
- if (!ASSERT_NEQ(interface_id, 0, "non zero interface index"))
- goto fail;
- err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY);
- if (!ASSERT_OK(err, "configure interface redirection through map"))
- goto fail;
- if (attach_programs_to_veth_pair(skeletons, i))
- goto fail;
+ SYS(fail, "ip link add %s type veth peer name %s netns %s",
+ net_config->veth_cfg[i].local_veth, net_config->veth_cfg[i].remote_veth,
+ net_config->veth_cfg[i].namespace);
+ SYS(fail, "ip link set dev %s up", net_config->veth_cfg[i].local_veth);
+ if (net_config->veth_cfg[i].remote_addr[0])
+ SYS(fail, "ip -n %s addr add %s/24 dev %s",
+ net_config->veth_cfg[i].namespace,
+ net_config->veth_cfg[i].remote_addr,
+ net_config->veth_cfg[i].remote_veth);
+ SYS(fail, "ip -n %s link set dev %s up", net_config->veth_cfg[i].namespace,
+ net_config->veth_cfg[i].remote_veth);
}
+ close_netns(nstoken);
return 0;
fail:
+ close_netns(nstoken);
return -1;
}
-static void cleanup_network(void)
+static void cleanup_network(struct net_configuration *net_config)
{
int i;
- /* Deleting namespaces is enough to automatically remove veth pairs as well
- */
+ SYS_NOFAIL("ip netns del %s", net_config->ns0_name);
for (i = 0; i < VETH_PAIRS_COUNT; i++)
- SYS_NOFAIL("ip netns del %s", config[i].namespace);
+ SYS_NOFAIL("ip netns del %s", net_config->veth_cfg[i].namespace);
}
-static int check_ping(struct skeletons *skeletons)
+#define VETH_REDIRECT_SKEL_NB 3
+static void xdp_veth_redirect(u32 flags)
{
+ struct prog_configuration ping_config[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_0",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_1",
+ .remote_name = "xdp_tx",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_2",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ struct bpf_object *bpf_objs[VETH_REDIRECT_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct net_configuration net_config;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ int map_fd;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_tx, "xdp_tx__open_and_load"))
+ goto destroy_xdp_dummy;
+
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_tx;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ /* Then configure the redirect map and attach programs to interfaces */
+ map_fd = bpf_map__fd(xdp_redirect_map->maps.tx_port);
+ if (!ASSERT_OK_FD(map_fd, "open redirect map"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_tx->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int next_veth = net_config.veth_cfg[i].next_veth;
+ int interface_id;
+ int err;
+
+ interface_id = if_nametoindex(net_config.veth_cfg[next_veth].local_veth);
+ if (!ASSERT_NEQ(interface_id, 0, "non zero interface index"))
+ goto destroy_xdp_redirect_map;
+ err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY);
+ if (!ASSERT_OK(err, "configure interface redirection through map"))
+ goto destroy_xdp_redirect_map;
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+ &net_config, ping_config, i))
+ goto destroy_xdp_redirect_map;
+ }
+
/* Test: if all interfaces are properly configured, we must be able to ping
* veth33 from veth11
*/
- return SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null",
- config[0].namespace, IP_DST);
+ ASSERT_OK(SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null",
+ net_config.veth_cfg[0].namespace, IP_DST), "ping");
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_tx:
+ xdp_tx__destroy(xdp_tx);
+destroy_xdp_dummy:
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
}
-void test_xdp_veth_redirect(void)
+#define BROADCAST_REDIRECT_SKEL_NB 2
+static void xdp_veth_broadcast_redirect(u32 attach_flags, u64 redirect_flags)
{
- struct skeletons skeletons = {};
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_0",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_1",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_2",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ }
+ };
+ struct bpf_object *bpf_objs[BROADCAST_REDIRECT_SKEL_NB];
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct bpf_devmap_val devmap_val = {};
+ struct net_configuration net_config;
+ struct nstoken *nstoken = NULL;
+ u16 protocol = ETH_P_IP;
+ int group_map;
+ int flags_map;
+ int cnt_map;
+ u64 cnt = 0;
+ int i, err;
- skeletons.xdp_dummy = xdp_dummy__open_and_load();
- if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
return;
- skeletons.xdp_tx = xdp_tx__open_and_load();
- if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ group_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_all);
+ if (!ASSERT_OK_FD(group_map, "open map_all"))
+ goto destroy_xdp_redirect_map;
+
+ flags_map = bpf_map__fd(xdp_redirect_multi_kern->maps.redirect_flags);
+ if (!ASSERT_OK_FD(group_map, "open map_all"))
+ goto destroy_xdp_redirect_map;
+
+ err = bpf_map_update_elem(flags_map, &protocol, &redirect_flags, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "init IP count"))
+ goto destroy_xdp_redirect_map;
+
+ cnt_map = bpf_map__fd(xdp_redirect_map->maps.rxcnt);
+ if (!ASSERT_OK_FD(cnt_map, "open rxcnt map"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_redirect_multi_kern->obj;
+ bpf_objs[1] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, BROADCAST_REDIRECT_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+ devmap_val.ifindex = ifindex;
+ err = bpf_map_update_elem(group_map, &ifindex, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ }
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ err = bpf_map_lookup_elem(cnt_map, &i, &cnt);
+ if (!ASSERT_OK(err, "get IP cnt"))
+ goto destroy_xdp_redirect_map;
+
+ if (redirect_flags & BPF_F_EXCLUDE_INGRESS)
+ /* veth11 shouldn't receive the ICMP requests;
+ * others should
+ */
+ ASSERT_EQ(cnt, i ? 4 : 0, "compare IP cnt");
+ else
+ /* All remote veth should receive the ICMP requests */
+ ASSERT_EQ(cnt, 4, "compare IP cnt");
+ }
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
+
+ cleanup_network(&net_config);
+}
+
+#define VETH_EGRESS_SKEL_NB 3
+static void xdp_veth_egress(u32 flags)
+{
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_1",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_2",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct bpf_devmap_val devmap_val = {};
+ struct net_configuration net_config;
+ int mac_map, egress_map, res_map;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ int err;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
goto destroy_xdp_dummy;
- skeletons.xdp_redirect_maps = xdp_redirect_map__open_and_load();
- if (!ASSERT_OK_PTR(skeletons.xdp_redirect_maps, "xdp_redirect_map__open_and_load"))
- goto destroy_xdp_tx;
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
- if (configure_network(&skeletons))
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
goto destroy_xdp_redirect_map;
- ASSERT_OK(check_ping(&skeletons), "ping");
+ mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map);
+ if (!ASSERT_OK_FD(mac_map, "open mac_map"))
+ goto destroy_xdp_redirect_map;
+
+ egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress);
+ if (!ASSERT_OK_FD(egress_map, "open map_egress"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.bpf_prog.fd = bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog);
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_redirect_multi_kern->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.ifindex = ifindex;
+ err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+ }
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac);
+ if (!ASSERT_OK_FD(res_map, "open rx_map"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < 2; i++) {
+ u32 key = i;
+ u64 res;
+
+ err = bpf_map_lookup_elem(res_map, &key, &res);
+ if (!ASSERT_OK(err, "get MAC res"))
+ goto destroy_xdp_redirect_map;
+
+ ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac");
+ }
destroy_xdp_redirect_map:
- xdp_redirect_map__destroy(skeletons.xdp_redirect_maps);
-destroy_xdp_tx:
- xdp_tx__destroy(skeletons.xdp_tx);
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
destroy_xdp_dummy:
- xdp_dummy__destroy(skeletons.xdp_dummy);
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
+}
+
+void test_xdp_veth_redirect(void)
+{
+ if (test__start_subtest("0"))
+ xdp_veth_redirect(0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_veth_redirect(XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_veth_redirect(XDP_FLAGS_SKB_MODE);
+}
+
+void test_xdp_veth_broadcast_redirect(void)
+{
+ if (test__start_subtest("0/BROADCAST"))
+ xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST);
+
+ if (test__start_subtest("0/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+ if (test__start_subtest("DRV_MODE/BROADCAST"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, BPF_F_BROADCAST);
+
+ if (test__start_subtest("DRV_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+ if (test__start_subtest("SKB_MODE/BROADCAST"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, BPF_F_BROADCAST);
+
+ if (test__start_subtest("SKB_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+void test_xdp_veth_egress(void)
+{
+ if (test__start_subtest("0/egress"))
+ xdp_veth_egress(0);
+
+ if (test__start_subtest("DRV_MODE/egress"))
+ xdp_veth_egress(XDP_FLAGS_DRV_MODE);
- cleanup_network();
+ if (test__start_subtest("SKB_MODE/egress"))
+ xdp_veth_egress(XDP_FLAGS_SKB_MODE);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
new file mode 100644
index 000000000000..5af28f359cfd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -0,0 +1,2596 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/if_link.h>
+#include <linux/mman.h>
+#include <linux/netdev.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "network_helpers.h"
+#include "test_xsk.h"
+#include "xsk_xdp_common.h"
+#include "xsk_xdp_progs.skel.h"
+
+#define DEFAULT_BATCH_SIZE 64
+#define MIN_PKT_SIZE 64
+#define MAX_ETH_JUMBO_SIZE 9000
+#define MAX_INTERFACES 2
+#define MAX_TEARDOWN_ITER 10
+#define MAX_TX_BUDGET_DEFAULT 32
+#define PKT_DUMP_NB_TO_PRINT 16
+/* Just to align the data in the packet */
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2)
+#define POLL_TMOUT 1000
+#define THREAD_TMOUT 3
+#define UMEM_HEADROOM_TEST_SIZE 128
+#define XSK_DESC__INVALID_OPTION (0xffff)
+#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
+#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
+#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
+
+static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
+
+bool opt_verbose;
+pthread_barrier_t barr;
+pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int pkts_in_flight;
+
+/* The payload is a word consisting of a packet sequence number in the upper
+ * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
+ * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
+ */
+static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
+{
+ u32 *ptr = (u32 *)dest, i;
+
+ start /= sizeof(*ptr);
+ size /= sizeof(*ptr);
+ for (i = 0; i < size; i++)
+ ptr[i] = htonl(pkt_nb << 16 | (i + start));
+}
+
+static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
+{
+ memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
+ memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
+ eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
+}
+
+static bool is_umem_valid(struct ifobject *ifobj)
+{
+ return !!ifobj->umem->umem;
+}
+
+static u32 mode_to_xdp_flags(enum test_mode mode)
+{
+ return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+}
+
+static u64 umem_size(struct xsk_umem_info *umem)
+{
+ return umem->num_frames * umem->frame_size;
+}
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
+ u64 size)
+{
+ struct xsk_umem_config cfg = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = umem->frame_size,
+ .frame_headroom = umem->frame_headroom,
+ .flags = XSK_UMEM__DEFAULT_FLAGS
+ };
+ int ret;
+
+ if (umem->fill_size)
+ cfg.fill_size = umem->fill_size;
+
+ if (umem->comp_size)
+ cfg.comp_size = umem->comp_size;
+
+ if (umem->unaligned_mode)
+ cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+
+ ret = xsk_umem__create(&umem->umem, buffer, size,
+ &umem->fq, &umem->cq, &cfg);
+ if (ret)
+ return ret;
+
+ umem->buffer = buffer;
+ if (ifobj->shared_umem && ifobj->rx_on) {
+ umem->base_addr = umem_size(umem);
+ umem->next_buffer = umem_size(umem);
+ }
+
+ return 0;
+}
+
+static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
+{
+ u64 addr;
+
+ addr = umem->next_buffer;
+ umem->next_buffer += umem->frame_size;
+ if (umem->next_buffer >= umem->base_addr + umem_size(umem))
+ umem->next_buffer = umem->base_addr;
+
+ return addr;
+}
+
+static void umem_reset_alloc(struct xsk_umem_info *umem)
+{
+ umem->next_buffer = 0;
+}
+
+static int enable_busy_poll(struct xsk_socket_info *xsk)
+{
+ int sock_opt;
+
+ sock_opt = 1;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ return -errno;
+
+ sock_opt = 20;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ return -errno;
+
+ sock_opt = xsk->batch_size;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+ struct ifobject *ifobject, bool shared)
+{
+ struct xsk_socket_config cfg = {};
+ struct xsk_ring_cons *rxr;
+ struct xsk_ring_prod *txr;
+
+ xsk->umem = umem;
+ cfg.rx_size = xsk->rxqsize;
+ cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg.bind_flags = ifobject->bind_flags;
+ if (shared)
+ cfg.bind_flags |= XDP_SHARED_UMEM;
+ if (ifobject->mtu > MAX_ETH_PKT_SIZE)
+ cfg.bind_flags |= XDP_USE_SG;
+ if (umem->comp_size)
+ cfg.tx_size = umem->comp_size;
+ if (umem->fill_size)
+ cfg.rx_size = umem->fill_size;
+
+ txr = ifobject->tx_on ? &xsk->tx : NULL;
+ rxr = ifobject->rx_on ? &xsk->rx : NULL;
+ return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
+}
+
+#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
+static unsigned int get_max_skb_frags(void)
+{
+ unsigned int max_skb_frags = 0;
+ FILE *file;
+
+ file = fopen(MAX_SKB_FRAGS_PATH, "r");
+ if (!file) {
+ ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
+ return 0;
+ }
+
+ if (fscanf(file, "%u", &max_skb_frags) != 1)
+ ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
+
+ fclose(file);
+ return max_skb_frags;
+}
+
+static int set_ring_size(struct ifobject *ifobj)
+{
+ int ret;
+ u32 ctr = 0;
+
+ while (ctr++ < SOCK_RECONF_CTR) {
+ ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
+ if (!ret)
+ break;
+
+ /* Retry if it fails */
+ if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
+ return -errno;
+
+ usleep(USLEEP_MAX);
+ }
+
+ return ret;
+}
+
+int hw_ring_size_reset(struct ifobject *ifobj)
+{
+ ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
+ ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
+ return set_ring_size(ifobj);
+}
+
+static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx)
+{
+ u32 i, j;
+
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+ ifobj->xsk = &ifobj->xsk_arr[0];
+ ifobj->use_poll = false;
+ ifobj->use_fill_ring = true;
+ ifobj->release_rx = true;
+ ifobj->validation_func = NULL;
+ ifobj->use_metadata = false;
+
+ if (i == 0) {
+ ifobj->rx_on = false;
+ ifobj->tx_on = true;
+ } else {
+ ifobj->rx_on = true;
+ ifobj->tx_on = false;
+ }
+
+ memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+ ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+ ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
+ for (j = 0; j < MAX_SOCKETS; j++) {
+ memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+ ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
+ if (i == 0)
+ ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
+ else
+ ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
+
+ memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
+ memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
+ ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
+ ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
+ }
+ }
+
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
+ test->ifobj_tx = ifobj_tx;
+ test->ifobj_rx = ifobj_rx;
+ test->current_step = 0;
+ test->total_steps = 1;
+ test->nb_sockets = 1;
+ test->fail = false;
+ test->set_ring = false;
+ test->adjust_tail = false;
+ test->adjust_tail_support = false;
+ test->mtu = MAX_ETH_PKT_SIZE;
+ test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
+ test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
+ test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
+ test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx, enum test_mode mode,
+ const struct test_spec *test_to_run)
+{
+ struct pkt_stream *tx_pkt_stream;
+ struct pkt_stream *rx_pkt_stream;
+ u32 i;
+
+ tx_pkt_stream = test->tx_pkt_stream_default;
+ rx_pkt_stream = test->rx_pkt_stream_default;
+ memset(test, 0, sizeof(*test));
+ test->tx_pkt_stream_default = tx_pkt_stream;
+ test->rx_pkt_stream_default = rx_pkt_stream;
+
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+ ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
+ if (mode == TEST_MODE_ZC)
+ ifobj->bind_flags |= XDP_ZEROCOPY;
+ else
+ ifobj->bind_flags |= XDP_COPY;
+ }
+
+ memcpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
+ test->test_func = test_to_run->test_func;
+ test->mode = mode;
+ __test_spec_init(test, ifobj_tx, ifobj_rx);
+}
+
+static void test_spec_reset(struct test_spec *test)
+{
+ __test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
+}
+
+static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
+ struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
+ struct bpf_map *xskmap_tx)
+{
+ test->xdp_prog_rx = xdp_prog_rx;
+ test->xdp_prog_tx = xdp_prog_tx;
+ test->xskmap_rx = xskmap_rx;
+ test->xskmap_tx = xskmap_tx;
+}
+
+static int test_spec_set_mtu(struct test_spec *test, int mtu)
+{
+ int err;
+
+ if (test->ifobj_rx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_rx->mtu = mtu;
+ }
+ if (test->ifobj_tx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_tx->mtu = mtu;
+ }
+
+ return 0;
+}
+
+void pkt_stream_reset(struct pkt_stream *pkt_stream)
+{
+ if (pkt_stream) {
+ pkt_stream->current_pkt_nb = 0;
+ pkt_stream->nb_rx_pkts = 0;
+ }
+}
+
+static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
+{
+ if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
+ return NULL;
+
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+}
+
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
+{
+ while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
+ (*pkts_sent)++;
+ if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+ pkt_stream->current_pkt_nb++;
+ }
+ return NULL;
+}
+
+void pkt_stream_delete(struct pkt_stream *pkt_stream)
+{
+ free(pkt_stream->pkts);
+ free(pkt_stream);
+}
+
+void pkt_stream_restore_default(struct test_spec *test)
+{
+ struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+ struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+
+ if (tx_pkt_stream != test->tx_pkt_stream_default) {
+ pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
+ test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
+ }
+
+ if (rx_pkt_stream != test->rx_pkt_stream_default) {
+ pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+ test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
+ }
+}
+
+static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
+{
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = calloc(1, sizeof(*pkt_stream));
+ if (!pkt_stream)
+ return NULL;
+
+ pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+ if (!pkt_stream->pkts) {
+ free(pkt_stream);
+ return NULL;
+ }
+
+ pkt_stream->nb_pkts = nb_pkts;
+ return pkt_stream;
+}
+
+static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
+{
+ u32 nb_frags = 1, next_frag;
+
+ if (!pkt)
+ return 1;
+
+ if (!pkt_stream->verbatim) {
+ if (!pkt->valid || !pkt->len)
+ return 1;
+ return ceil_u32(pkt->len, frame_size);
+ }
+
+ /* Search for the end of the packet in verbatim mode */
+ if (!pkt_continues(pkt->options))
+ return nb_frags;
+
+ next_frag = pkt_stream->current_pkt_nb;
+ pkt++;
+ while (next_frag++ < pkt_stream->nb_pkts) {
+ nb_frags++;
+ if (!pkt_continues(pkt->options) || !pkt->valid)
+ break;
+ pkt++;
+ }
+ return nb_frags;
+}
+
+static bool set_pkt_valid(int offset, u32 len)
+{
+ return len <= MAX_ETH_JUMBO_SIZE;
+}
+
+static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+ pkt->offset = offset;
+ pkt->len = len;
+ pkt->valid = set_pkt_valid(offset, len);
+}
+
+static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+ bool prev_pkt_valid = pkt->valid;
+
+ pkt_set(pkt_stream, pkt, offset, len);
+ pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
+}
+
+static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
+{
+ return ceil_u32(len, umem->frame_size) * umem->frame_size;
+}
+
+static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = __pkt_stream_alloc(nb_pkts);
+ if (!pkt_stream)
+ return NULL;
+
+ pkt_stream->nb_pkts = nb_pkts;
+ pkt_stream->max_pkt_len = pkt_len;
+ for (i = 0; i < nb_pkts; i++) {
+ struct pkt *pkt = &pkt_stream->pkts[i];
+
+ pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
+ pkt->pkt_nb = nb_start + i * nb_off;
+ }
+
+ return pkt_stream;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+ return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
+}
+
+static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
+{
+ return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+}
+
+static int pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
+{
+ ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+
+ if (!ifobj->xsk->pkt_stream)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+ int ret;
+
+ ret = pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
+ if (ret)
+ return ret;
+
+ return pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
+}
+
+static int __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
+ int offset)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
+ if (!pkt_stream)
+ return -ENOMEM;
+
+ for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
+ pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
+
+ ifobj->xsk->pkt_stream = pkt_stream;
+
+ return 0;
+}
+
+static int pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+{
+ int ret = __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
+
+ if (ret)
+ return ret;
+
+ return __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
+}
+
+static int pkt_stream_receive_half(struct test_spec *test)
+{
+ struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+ u32 i;
+
+ if (test->ifobj_rx->xsk->pkt_stream != test->rx_pkt_stream_default)
+ /* Packet stream has already been replaced so we have to release this one.
+ * The newly created one will be freed by the restore_default() at the
+ * end of the test
+ */
+ pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
+ pkt_stream->pkts[0].len);
+ if (!test->ifobj_rx->xsk->pkt_stream)
+ return -ENOMEM;
+
+ pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+ for (i = 1; i < pkt_stream->nb_pkts; i += 2)
+ pkt_stream->pkts[i].valid = false;
+
+ pkt_stream->nb_valid_entries /= 2;
+
+ return 0;
+}
+
+static int pkt_stream_even_odd_sequence(struct test_spec *test)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ if (!pkt_stream)
+ return -ENOMEM;
+ test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
+
+ pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ if (!pkt_stream)
+ return -ENOMEM;
+ test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
+ }
+
+ return 0;
+}
+
+static void release_even_odd_sequence(struct test_spec *test)
+{
+ struct pkt_stream *later_free_tx = test->ifobj_tx->xsk->pkt_stream;
+ struct pkt_stream *later_free_rx = test->ifobj_rx->xsk->pkt_stream;
+ int i;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ /* later_free_{rx/tx} will be freed by restore_default() */
+ if (test->ifobj_tx->xsk_arr[i].pkt_stream != later_free_tx)
+ pkt_stream_delete(test->ifobj_tx->xsk_arr[i].pkt_stream);
+ if (test->ifobj_rx->xsk_arr[i].pkt_stream != later_free_rx)
+ pkt_stream_delete(test->ifobj_rx->xsk_arr[i].pkt_stream);
+ }
+
+}
+
+static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
+{
+ if (!pkt->valid)
+ return pkt->offset;
+ return pkt->offset + umem_alloc_buffer(umem);
+}
+
+static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
+{
+ pkt_stream->current_pkt_nb--;
+}
+
+static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
+ u32 pkt_nb, u32 bytes_written)
+{
+ void *data = xsk_umem__get_data(umem->buffer, addr);
+
+ if (len < MIN_PKT_SIZE)
+ return;
+
+ if (!bytes_written) {
+ gen_eth_hdr(xsk, data);
+
+ len -= PKT_HDR_SIZE;
+ data += PKT_HDR_SIZE;
+ } else {
+ bytes_written -= PKT_HDR_SIZE;
+ }
+
+ write_payload(data, pkt_nb, bytes_written, len);
+}
+
+static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
+ u32 nb_frames, bool verbatim)
+{
+ u32 i, len = 0, pkt_nb = 0, payload = 0;
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = __pkt_stream_alloc(nb_frames);
+ if (!pkt_stream)
+ return NULL;
+
+ for (i = 0; i < nb_frames; i++) {
+ struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
+ struct pkt *frame = &frames[i];
+
+ pkt->offset = frame->offset;
+ if (verbatim) {
+ *pkt = *frame;
+ pkt->pkt_nb = payload;
+ if (!frame->valid || !pkt_continues(frame->options))
+ payload++;
+ } else {
+ if (frame->valid)
+ len += frame->len;
+ if (frame->valid && pkt_continues(frame->options))
+ continue;
+
+ pkt->pkt_nb = pkt_nb;
+ pkt->len = len;
+ pkt->valid = frame->valid;
+ pkt->options = 0;
+
+ len = 0;
+ }
+
+ print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
+ pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
+
+ if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
+ pkt_stream->max_pkt_len = pkt->len;
+
+ if (pkt->valid)
+ pkt_stream->nb_valid_entries++;
+
+ pkt_nb++;
+ }
+
+ pkt_stream->nb_pkts = pkt_nb;
+ pkt_stream->verbatim = verbatim;
+ return pkt_stream;
+}
+
+static int pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
+ if (!pkt_stream)
+ return -ENOMEM;
+ test->ifobj_tx->xsk->pkt_stream = pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
+ if (!pkt_stream)
+ return -ENOMEM;
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+
+ return 0;
+}
+
+static void pkt_print_data(u32 *data, u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ u32 seqnum, pkt_nb;
+
+ seqnum = ntohl(*data) & 0xffff;
+ pkt_nb = ntohl(*data) >> 16;
+ ksft_print_msg("%u:%u ", pkt_nb, seqnum);
+ data++;
+ }
+}
+
+static void pkt_dump(void *pkt, u32 len, bool eth_header)
+{
+ struct ethhdr *ethhdr = pkt;
+ u32 i, *data;
+
+ if (eth_header) {
+ /*extract L2 frame */
+ ksft_print_msg("DEBUG>> L2: dst mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ ksft_print_msg("%02X", ethhdr->h_dest[i]);
+
+ ksft_print_msg("\nDEBUG>> L2: src mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ ksft_print_msg("%02X", ethhdr->h_source[i]);
+
+ data = pkt + PKT_HDR_SIZE;
+ } else {
+ data = pkt;
+ }
+
+ /*extract L5 frame */
+ ksft_print_msg("\nDEBUG>> L5: seqnum: ");
+ pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
+ ksft_print_msg("....");
+ if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
+ ksft_print_msg("\n.... ");
+ pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
+ PKT_DUMP_NB_TO_PRINT);
+ }
+ ksft_print_msg("\n---------------------------------------\n");
+}
+
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
+{
+ u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
+ u32 offset = addr % umem->frame_size, expected_offset;
+ int pkt_offset = pkt->valid ? pkt->offset : 0;
+
+ if (!umem->unaligned_mode)
+ pkt_offset = 0;
+
+ expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+
+ if (offset == expected_offset)
+ return true;
+
+ ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
+ return false;
+}
+
+static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
+{
+ void *data = xsk_umem__get_data(buffer, addr);
+ struct xdp_info *meta = data - sizeof(struct xdp_info);
+
+ if (meta->count != pkt->pkt_nb) {
+ ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+ __func__, pkt->pkt_nb,
+ (unsigned long long)meta->count);
+ return false;
+ }
+
+ return true;
+}
+
+static int is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx, bool *supported)
+{
+ struct bpf_map *data_map;
+ int adjust_value = 0;
+ int key = 0;
+ int ret;
+
+ data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+ if (!data_map || !bpf_map__is_internal(data_map)) {
+ ksft_print_msg("Error: could not find bss section of XDP program\n");
+ return -EINVAL;
+ }
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
+ if (ret) {
+ ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
+ return ret;
+ }
+
+ /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
+ * helper is not supported. Skip the adjust_tail test case in this scenario.
+ */
+ *supported = adjust_value != -EOPNOTSUPP;
+
+ return 0;
+}
+
+static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
+ u32 bytes_processed)
+{
+ u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
+ void *data = xsk_umem__get_data(umem->buffer, addr);
+
+ addr -= umem->base_addr;
+
+ if (addr >= umem->num_frames * umem->frame_size ||
+ addr + len > umem->num_frames * umem->frame_size) {
+ ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
+ return false;
+ }
+ if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
+ ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
+ return false;
+ }
+
+ pkt_data = data;
+ if (!bytes_processed) {
+ pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
+ len -= PKT_HDR_SIZE;
+ } else {
+ bytes_processed -= PKT_HDR_SIZE;
+ }
+
+ expected_seqnum = bytes_processed / sizeof(*pkt_data);
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ pkt_nb = ntohl(*pkt_data) >> 16;
+
+ if (expected_pkt_nb != pkt_nb) {
+ ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
+ __func__, expected_pkt_nb, pkt_nb);
+ goto error;
+ }
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
+ goto error;
+ }
+
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ pkt_data += words_to_end;
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ expected_seqnum += words_to_end;
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
+ goto error;
+ }
+
+ return true;
+
+error:
+ pkt_dump(data, len, !bytes_processed);
+ return false;
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+{
+ if (pkt->len != len) {
+ ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
+ __func__, pkt->len, len);
+ pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
+ return false;
+ }
+
+ return true;
+}
+
+static u32 load_value(u32 *counter)
+{
+ return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
+}
+
+static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
+{
+ u32 max_budget = MAX_TX_BUDGET_DEFAULT;
+ u32 cons, ready_to_send;
+ int delta;
+
+ cons = load_value(xsk->tx.consumer);
+ ready_to_send = load_value(xsk->tx.producer) - cons;
+ *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+ delta = load_value(xsk->tx.consumer) - cons;
+ /* By default, xsk should consume exact @max_budget descs at one
+ * send in this case where hitting the max budget limit in while
+ * loop is triggered in __xsk_generic_xmit(). Please make sure that
+ * the number of descs to be sent is larger than @max_budget, or
+ * else the tx.consumer will be updated in xskq_cons_peek_desc()
+ * in time which hides the issue we try to verify.
+ */
+ if (ready_to_send > max_budget && delta != max_budget)
+ return false;
+
+ return true;
+}
+
+int kick_tx(struct xsk_socket_info *xsk)
+{
+ int ret;
+
+ if (xsk->check_consumer) {
+ if (!kick_tx_with_check(xsk, &ret))
+ return TEST_FAILURE;
+ } else {
+ ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ }
+ if (ret >= 0)
+ return TEST_PASS;
+ if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
+ usleep(100);
+ return TEST_PASS;
+ }
+ return TEST_FAILURE;
+}
+
+int kick_rx(struct xsk_socket_info *xsk)
+{
+ int ret;
+
+ ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+ if (ret < 0)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
+{
+ unsigned int rcvd;
+ u32 idx;
+ int ret;
+
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+ }
+
+ rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+ if (rcvd) {
+ if (rcvd > xsk->outstanding_tx) {
+ u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
+
+ ksft_print_msg("[%s] Too many packets completed\n", __func__);
+ ksft_print_msg("Last completion address: %llx\n",
+ (unsigned long long)addr);
+ return TEST_FAILURE;
+ }
+
+ xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+ xsk->outstanding_tx -= rcvd;
+ }
+
+ return TEST_PASS;
+}
+
+static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
+{
+ u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
+ u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+ struct ifobject *ifobj = test->ifobj_rx;
+ struct xsk_umem_info *umem = xsk->umem;
+ struct pollfd fds = { };
+ struct pkt *pkt;
+ u64 first_addr = 0;
+ int ret;
+
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLIN;
+
+ ret = kick_rx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+
+ if (ifobj->use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ return TEST_FAILURE;
+
+ if (!ret) {
+ if (!is_umem_valid(test->ifobj_tx))
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+ return TEST_CONTINUE;
+ }
+
+ if (!(fds.revents & POLLIN))
+ return TEST_CONTINUE;
+ }
+
+ rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
+ if (!rcvd)
+ return TEST_CONTINUE;
+
+ if (ifobj->use_fill_ring) {
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ return TEST_FAILURE;
+ }
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ }
+ }
+
+ while (frags_processed < rcvd) {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ u64 addr = desc->addr, orig;
+
+ orig = xsk_umem__extract_addr(addr);
+ addr = xsk_umem__add_offset_to_addr(addr);
+
+ if (!nb_frags) {
+ pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
+ if (!pkt) {
+ ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
+ __func__, addr, desc->len);
+ return TEST_FAILURE;
+ }
+ }
+
+ print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
+ addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
+
+ if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
+ !is_metadata_correct(pkt, umem->buffer, addr)))
+ return TEST_FAILURE;
+
+ if (!nb_frags++)
+ first_addr = addr;
+ frags_processed++;
+ pkt_len += desc->len;
+ if (ifobj->use_fill_ring)
+ *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+
+ if (pkt_continues(desc->options))
+ continue;
+
+ /* The complete packet has been received */
+ if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr))
+ return TEST_FAILURE;
+
+ pkt_stream->nb_rx_pkts++;
+ nb_frags = 0;
+ pkt_len = 0;
+ }
+
+ if (nb_frags) {
+ /* In the middle of a packet. Start over from beginning of packet. */
+ idx_rx -= nb_frags;
+ xsk_ring_cons__cancel(&xsk->rx, nb_frags);
+ if (ifobj->use_fill_ring) {
+ idx_fq -= nb_frags;
+ xsk_ring_prod__cancel(&umem->fq, nb_frags);
+ }
+ frags_processed -= nb_frags;
+ }
+
+ if (ifobj->use_fill_ring)
+ xsk_ring_prod__submit(&umem->fq, frags_processed);
+ if (ifobj->release_rx)
+ xsk_ring_cons__release(&xsk->rx, frags_processed);
+
+ pthread_mutex_lock(&pacing_mutex);
+ pkts_in_flight -= pkts_sent;
+ pthread_mutex_unlock(&pacing_mutex);
+ pkts_sent = 0;
+
+ return TEST_CONTINUE;
+}
+
+bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
+ unsigned long *bitmap)
+{
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+
+ if (!pkt_stream) {
+ __set_bit(sock_num, bitmap);
+ return false;
+ }
+
+ if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
+ __set_bit(sock_num, bitmap);
+ if (bitmap_full(bitmap, test->nb_sockets))
+ return true;
+ }
+
+ return false;
+}
+
+static int receive_pkts(struct test_spec *test)
+{
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ struct xsk_socket_info *xsk;
+ u32 sock_num = 0;
+ int res, ret;
+
+ bitmap_zero(bitmap, test->nb_sockets);
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ return TEST_FAILURE;
+
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (1) {
+ xsk = &test->ifobj_rx->xsk_arr[sock_num];
+
+ if ((all_packets_received(test, xsk, sock_num, bitmap)))
+ break;
+
+ res = __receive_pkts(test, xsk);
+ if (!(res == TEST_PASS || res == TEST_CONTINUE))
+ return res;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ return TEST_FAILURE;
+
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+ sock_num = (sock_num + 1) % test->nb_sockets;
+ }
+
+ return TEST_PASS;
+}
+
+static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
+{
+ u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+ struct xsk_umem_info *umem = ifobject->umem;
+ bool use_poll = ifobject->use_poll;
+ struct pollfd fds = { };
+ int ret;
+
+ buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
+ /* pkts_in_flight might be negative if many invalid packets are sent */
+ if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
+ buffer_len)) {
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+ return TEST_CONTINUE;
+ }
+
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLOUT;
+
+ while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
+ if (use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (timeout) {
+ if (ret < 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, errno);
+ return TEST_FAILURE;
+ }
+ if (ret == 0)
+ return TEST_PASS;
+ break;
+ }
+ if (ret <= 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, errno);
+ return TEST_FAILURE;
+ }
+ }
+
+ complete_pkts(xsk, xsk->batch_size);
+ }
+
+ for (i = 0; i < xsk->batch_size; i++) {
+ struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ u32 nb_frags_left, nb_frags, bytes_written = 0;
+
+ if (!pkt)
+ break;
+
+ nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
+ if (nb_frags > xsk->batch_size - i) {
+ pkt_stream_cancel(pkt_stream);
+ xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
+ break;
+ }
+ nb_frags_left = nb_frags;
+
+ while (nb_frags_left--) {
+ struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+ tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
+ if (pkt_stream->verbatim) {
+ tx_desc->len = pkt->len;
+ tx_desc->options = pkt->options;
+ } else if (nb_frags_left) {
+ tx_desc->len = umem->frame_size;
+ tx_desc->options = XDP_PKT_CONTD;
+ } else {
+ tx_desc->len = pkt->len - bytes_written;
+ tx_desc->options = 0;
+ }
+ if (pkt->valid)
+ pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
+ bytes_written);
+ bytes_written += tx_desc->len;
+
+ print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
+ tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
+
+ if (nb_frags_left) {
+ i++;
+ if (pkt_stream->verbatim)
+ pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ }
+ }
+
+ if (pkt && pkt->valid) {
+ valid_pkts++;
+ valid_frags += nb_frags;
+ }
+ }
+
+ pthread_mutex_lock(&pacing_mutex);
+ pkts_in_flight += valid_pkts;
+ pthread_mutex_unlock(&pacing_mutex);
+
+ xsk_ring_prod__submit(&xsk->tx, i);
+ xsk->outstanding_tx += valid_frags;
+
+ if (use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret <= 0) {
+ if (ret == 0 && timeout)
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
+ return TEST_FAILURE;
+ }
+ }
+
+ if (!timeout) {
+ if (complete_pkts(xsk, i))
+ return TEST_FAILURE;
+
+ usleep(10);
+ return TEST_PASS;
+ }
+
+ return TEST_CONTINUE;
+}
+
+static int wait_for_tx_completion(struct xsk_socket_info *xsk)
+{
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ int ret;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ return TEST_FAILURE;
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (xsk->outstanding_tx) {
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ return TEST_FAILURE;
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+
+ complete_pkts(xsk, xsk->batch_size);
+ }
+
+ return TEST_PASS;
+}
+
+bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
+{
+ return bitmap_full(bitmap, test->nb_sockets);
+}
+
+static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
+{
+ bool timeout = !is_umem_valid(test->ifobj_rx);
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ u32 i, ret;
+
+ bitmap_zero(bitmap, test->nb_sockets);
+
+ while (!(all_packets_sent(test, bitmap))) {
+ for (i = 0; i < test->nb_sockets; i++) {
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = ifobject->xsk_arr[i].pkt_stream;
+ if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
+ __set_bit(i, bitmap);
+ continue;
+ }
+ ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
+ if (ret == TEST_CONTINUE && !test->fail)
+ continue;
+
+ if ((ret || test->fail) && !timeout)
+ return TEST_FAILURE;
+
+ if (ret == TEST_PASS && timeout)
+ return ret;
+
+ ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
+ if (ret)
+ return TEST_FAILURE;
+ }
+ }
+
+ return TEST_PASS;
+}
+
+static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
+{
+ int fd = xsk_socket__fd(xsk), err;
+ socklen_t optlen, expected_len;
+
+ optlen = sizeof(*stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
+ if (err) {
+ ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return TEST_FAILURE;
+ }
+
+ expected_len = sizeof(struct xdp_statistics);
+ if (optlen != expected_len) {
+ ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
+ __func__, expected_len, optlen);
+ return TEST_FAILURE;
+ }
+
+ return TEST_PASS;
+}
+
+static int validate_rx_dropped(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ /* The receiver calls getsockopt after receiving the last (valid)
+ * packet which is not the final packet sent in this test (valid and
+ * invalid packets are sent in alternating fashion with the final
+ * packet being invalid). Since the last packet may or may not have
+ * been dropped already, both outcomes must be allowed.
+ */
+ if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
+ stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_rx_full(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ usleep(1000);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ if (stats.rx_ring_full)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_fill_empty(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ usleep(1000);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ if (stats.rx_fill_ring_empty_descs)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_tx_invalid_descs(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err) {
+ ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return TEST_FAILURE;
+ }
+
+ if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
+ ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+ __func__,
+ (unsigned long long)stats.tx_invalid_descs,
+ ifobject->xsk->pkt_stream->nb_pkts);
+ return TEST_FAILURE;
+ }
+
+ return TEST_PASS;
+}
+
+static int xsk_configure(struct test_spec *test, struct ifobject *ifobject,
+ struct xsk_umem_info *umem, bool tx)
+{
+ int i, ret;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ bool shared = (ifobject->shared_umem && tx) ? true : !!i;
+ u32 ctr = 0;
+
+ while (ctr++ < SOCK_RECONF_CTR) {
+ ret = xsk_configure_socket(&ifobject->xsk_arr[i], umem,
+ ifobject, shared);
+ if (!ret)
+ break;
+
+ /* Retry if it fails as xsk_socket__create() is asynchronous */
+ if (ctr >= SOCK_RECONF_CTR)
+ return ret;
+ usleep(USLEEP_MAX);
+ }
+ if (ifobject->busy_poll) {
+ ret = enable_busy_poll(&ifobject->xsk_arr[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
+{
+ int ret = xsk_configure(test, ifobject, test->ifobj_rx->umem, true);
+
+ if (ret)
+ return ret;
+ ifobject->xsk = &ifobject->xsk_arr[0];
+ ifobject->xskmap = test->ifobj_rx->xskmap;
+ memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
+ ifobject->umem->base_addr = 0;
+
+ return 0;
+}
+
+static int xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
+ bool fill_up)
+{
+ u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
+ u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
+ int ret;
+
+ if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+ buffers_to_fill = umem->num_frames;
+ else
+ buffers_to_fill = umem->fill_size;
+
+ ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
+ if (ret != buffers_to_fill)
+ return -ENOSPC;
+
+ while (filled < buffers_to_fill) {
+ struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
+ u64 addr;
+ u32 i;
+
+ for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
+ if (!pkt) {
+ if (!fill_up)
+ break;
+ addr = filled * umem->frame_size + umem->base_addr;
+ } else if (pkt->offset >= 0) {
+ addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
+ } else {
+ addr = pkt->offset + umem_alloc_buffer(umem);
+ }
+
+ *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+ if (++filled >= buffers_to_fill)
+ break;
+ }
+ }
+ xsk_ring_prod__submit(&umem->fq, filled);
+ xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
+
+ pkt_stream_reset(pkt_stream);
+ umem_reset_alloc(umem);
+
+ return 0;
+}
+
+static int thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ int mmap_flags;
+ u64 umem_sz;
+ void *bufs;
+ int ret;
+ u32 i;
+
+ umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
+ mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+
+ if (ifobject->umem->unaligned_mode)
+ mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
+
+ if (ifobject->shared_umem)
+ umem_sz *= 2;
+
+ bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (bufs == MAP_FAILED)
+ return -errno;
+
+ ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
+ if (ret)
+ return ret;
+
+ ret = xsk_configure(test, ifobject, ifobject->umem, false);
+ if (ret)
+ return ret;
+
+ ifobject->xsk = &ifobject->xsk_arr[0];
+
+ if (!ifobject->rx_on)
+ return 0;
+
+ ret = xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream,
+ ifobject->use_fill_ring);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ ifobject->xsk = &ifobject->xsk_arr[i];
+ ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void *worker_testapp_validate_tx(void *arg)
+{
+ struct test_spec *test = (struct test_spec *)arg;
+ struct ifobject *ifobject = test->ifobj_tx;
+ int err;
+
+ if (test->current_step == 1) {
+ if (!ifobject->shared_umem) {
+ if (thread_common_ops(test, ifobject)) {
+ test->fail = true;
+ pthread_exit(NULL);
+ }
+ } else {
+ if (thread_common_ops_tx(test, ifobject)) {
+ test->fail = true;
+ pthread_exit(NULL);
+ }
+ }
+ }
+
+ err = send_pkts(test, ifobject);
+
+ if (!err && ifobject->validation_func)
+ err = ifobject->validation_func(ifobject);
+ if (err)
+ test->fail = true;
+
+ pthread_exit(NULL);
+}
+
+void *worker_testapp_validate_rx(void *arg)
+{
+ struct test_spec *test = (struct test_spec *)arg;
+ struct ifobject *ifobject = test->ifobj_rx;
+ int err;
+
+ if (test->current_step == 1) {
+ err = thread_common_ops(test, ifobject);
+ } else {
+ xsk_clear_xskmap(ifobject->xskmap);
+ err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
+ if (err)
+ ksft_print_msg("Error: Failed to update xskmap, error %s\n",
+ strerror(-err));
+ }
+
+ pthread_barrier_wait(&barr);
+
+ /* We leave only now in case of error to avoid getting stuck in the barrier */
+ if (err) {
+ test->fail = true;
+ pthread_exit(NULL);
+ }
+
+ err = receive_pkts(test);
+
+ if (!err && ifobject->validation_func)
+ err = ifobject->validation_func(ifobject);
+
+ if (err) {
+ if (!test->adjust_tail) {
+ test->fail = true;
+ } else {
+ bool supported;
+
+ if (is_adjust_tail_supported(ifobject->xdp_progs, &supported))
+ test->fail = true;
+ else if (!supported)
+ test->adjust_tail_support = false;
+ else
+ test->fail = true;
+ }
+ }
+
+ pthread_exit(NULL);
+}
+
+static void testapp_clean_xsk_umem(struct ifobject *ifobj)
+{
+ u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
+
+ if (ifobj->shared_umem)
+ umem_sz *= 2;
+
+ umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+ xsk_umem__delete(ifobj->umem->umem);
+ munmap(ifobj->umem->buffer, umem_sz);
+}
+
+static void handler(int signum)
+{
+ pthread_exit(NULL);
+}
+
+static bool xdp_prog_changed_rx(struct test_spec *test)
+{
+ struct ifobject *ifobj = test->ifobj_rx;
+
+ return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
+}
+
+static bool xdp_prog_changed_tx(struct test_spec *test)
+{
+ struct ifobject *ifobj = test->ifobj_tx;
+
+ return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
+}
+
+static int xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
+ struct bpf_map *xskmap, enum test_mode mode)
+{
+ int err;
+
+ xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
+ err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
+ if (err) {
+ ksft_print_msg("Error attaching XDP program\n");
+ return err;
+ }
+
+ if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
+ if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
+ ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
+ return -EINVAL;
+ }
+
+ ifobj->xdp_prog = xdp_prog;
+ ifobj->xskmap = xskmap;
+ ifobj->mode = mode;
+
+ return 0;
+}
+
+static int xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
+ struct ifobject *ifobj_tx)
+{
+ int err = 0;
+
+ if (xdp_prog_changed_rx(test)) {
+ err = xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
+ if (err)
+ return err;
+ }
+
+ if (!ifobj_tx || ifobj_tx->shared_umem)
+ return 0;
+
+ if (xdp_prog_changed_tx(test))
+ err = xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
+
+ return err;
+}
+
+static void clean_sockets(struct test_spec *test, struct ifobject *ifobj)
+{
+ u32 i;
+
+ if (!ifobj || !test)
+ return;
+
+ for (i = 0; i < test->nb_sockets; i++)
+ xsk_socket__delete(ifobj->xsk_arr[i].xsk);
+}
+
+static void clean_umem(struct test_spec *test, struct ifobject *ifobj1, struct ifobject *ifobj2)
+{
+ if (!ifobj1)
+ return;
+
+ testapp_clean_xsk_umem(ifobj1);
+ if (ifobj2 && !ifobj2->shared_umem)
+ testapp_clean_xsk_umem(ifobj2);
+}
+
+static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
+ struct ifobject *ifobj2)
+{
+ pthread_t t0, t1;
+ int err;
+
+ if (test->mtu > MAX_ETH_PKT_SIZE) {
+ if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
+ (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
+ ksft_print_msg("Multi buffer for zero-copy not supported.\n");
+ return TEST_SKIP;
+ }
+ if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
+ (ifobj2 && !ifobj2->multi_buff_supp))) {
+ ksft_print_msg("Multi buffer not supported.\n");
+ return TEST_SKIP;
+ }
+ }
+ err = test_spec_set_mtu(test, test->mtu);
+ if (err) {
+ ksft_print_msg("Error, could not set mtu.\n");
+ return TEST_FAILURE;
+ }
+
+ if (ifobj2) {
+ if (pthread_barrier_init(&barr, NULL, 2))
+ return TEST_FAILURE;
+ pkt_stream_reset(ifobj2->xsk->pkt_stream);
+ }
+
+ test->current_step++;
+ pkt_stream_reset(ifobj1->xsk->pkt_stream);
+ pkts_in_flight = 0;
+
+ signal(SIGUSR1, handler);
+ /*Spawn RX thread */
+ pthread_create(&t0, NULL, ifobj1->func_ptr, test);
+
+ if (ifobj2) {
+ pthread_barrier_wait(&barr);
+ if (pthread_barrier_destroy(&barr)) {
+ pthread_kill(t0, SIGUSR1);
+ clean_sockets(test, ifobj1);
+ clean_umem(test, ifobj1, NULL);
+ return TEST_FAILURE;
+ }
+
+ /*Spawn TX thread */
+ pthread_create(&t1, NULL, ifobj2->func_ptr, test);
+
+ pthread_join(t1, NULL);
+ }
+
+ if (!ifobj2)
+ pthread_kill(t0, SIGUSR1);
+ else
+ pthread_join(t0, NULL);
+
+ if (test->total_steps == test->current_step || test->fail) {
+ clean_sockets(test, ifobj1);
+ clean_sockets(test, ifobj2);
+ clean_umem(test, ifobj1, ifobj2);
+ }
+
+ if (test->fail)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+static int testapp_validate_traffic(struct test_spec *test)
+{
+ struct ifobject *ifobj_rx = test->ifobj_rx;
+ struct ifobject *ifobj_tx = test->ifobj_tx;
+
+ if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
+ (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
+ ksft_print_msg("No huge pages present.\n");
+ return TEST_SKIP;
+ }
+
+ if (test->set_ring) {
+ if (ifobj_tx->hw_ring_size_supp) {
+ if (set_ring_size(ifobj_tx)) {
+ ksft_print_msg("Failed to change HW ring size.\n");
+ return TEST_FAILURE;
+ }
+ } else {
+ ksft_print_msg("Changing HW ring size not supported.\n");
+ return TEST_SKIP;
+ }
+ }
+
+ if (xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx))
+ return TEST_FAILURE;
+ return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
+}
+
+static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
+{
+ return __testapp_validate_traffic(test, ifobj, NULL);
+}
+
+int testapp_teardown(struct test_spec *test)
+{
+ int i;
+
+ for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+ test_spec_reset(test);
+ }
+
+ return TEST_PASS;
+}
+
+static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
+{
+ thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
+ struct ifobject *tmp_ifobj = (*ifobj1);
+
+ (*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
+ (*ifobj2)->func_ptr = tmp_func_ptr;
+
+ *ifobj1 = *ifobj2;
+ *ifobj2 = tmp_ifobj;
+}
+
+int testapp_bidirectional(struct test_spec *test)
+{
+ int res;
+
+ test->ifobj_tx->rx_on = true;
+ test->ifobj_rx->tx_on = true;
+ test->total_steps = 2;
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+
+ print_verbose("Switching Tx/Rx direction\n");
+ swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+ res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+
+ swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+ return res;
+}
+
+static int swap_xsk_resources(struct test_spec *test)
+{
+ int ret;
+
+ test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
+ test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
+ test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
+ test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
+
+ ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
+ if (ret)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+int testapp_xdp_prog_cleanup(struct test_spec *test)
+{
+ test->total_steps = 2;
+ test->nb_sockets = 2;
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+
+ if (swap_xsk_resources(test)) {
+ clean_sockets(test, test->ifobj_rx);
+ clean_sockets(test, test->ifobj_tx);
+ clean_umem(test, test->ifobj_rx, test->ifobj_tx);
+ return TEST_FAILURE;
+ }
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_headroom(struct test_spec *test)
+{
+ test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_dropped(struct test_spec *test)
+{
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_print_msg("Can not run RX_DROPPED test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ if (pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0))
+ return TEST_FAILURE;
+ test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+ XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
+ if (pkt_stream_receive_half(test))
+ return TEST_FAILURE;
+ test->ifobj_rx->validation_func = validate_rx_dropped;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_tx_invalid_descs(struct test_spec *test)
+{
+ if (pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0))
+ return TEST_FAILURE;
+ test->ifobj_tx->validation_func = validate_tx_invalid_descs;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_full(struct test_spec *test)
+{
+ if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+ return TEST_FAILURE;
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+ test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
+ test->ifobj_rx->release_rx = false;
+ test->ifobj_rx->validation_func = validate_rx_full;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_fill_empty(struct test_spec *test)
+{
+ if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+ return TEST_FAILURE;
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+ test->ifobj_rx->use_fill_ring = false;
+ test->ifobj_rx->validation_func = validate_fill_empty;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* Let half of the packets straddle a 4K buffer boundary */
+ if (pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2))
+ return TEST_FAILURE;
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_single_pkt(struct test_spec *test)
+{
+ struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
+
+ if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE))
+ return TEST_FAILURE;
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc_mb(struct test_spec *test)
+{
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
+ struct pkt pkts[] = {
+ /* Valid packet for synch to start with */
+ {0, MIN_PKT_SIZE, 0, true, 0},
+ /* Zero frame len is not legal */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, 0, 0, false, 0},
+ /* Invalid address in the second frame */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid len in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid options in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
+ /* Transmit 2 frags, receive 3 */
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
+ /* Middle frame crosses chunk boundary with small length */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
+ /* Valid packet for synch so that something is received */
+ {0, MIN_PKT_SIZE, 0, true, 0}};
+
+ if (umem->unaligned_mode) {
+ /* Crossing a chunk boundary allowed */
+ pkts[12].valid = true;
+ pkts[13].valid = true;
+ }
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc(struct test_spec *test)
+{
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
+ struct pkt pkts[] = {
+ /* Zero packet address allowed */
+ {0, MIN_PKT_SIZE, 0, true},
+ /* Allowed packet */
+ {0, MIN_PKT_SIZE, 0, true},
+ /* Straddling the start of umem */
+ {-2, MIN_PKT_SIZE, 0, false},
+ /* Packet too large */
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+ /* Up to end of umem allowed */
+ {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
+ /* After umem ends */
+ {umem_size, MIN_PKT_SIZE, 0, false},
+ /* Straddle the end of umem */
+ {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 4K boundary */
+ {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 2K boundary */
+ {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
+ /* Valid packet for synch so that something is received */
+ {0, MIN_PKT_SIZE, 0, true}};
+
+ if (umem->unaligned_mode) {
+ /* Crossing a page boundary allowed */
+ pkts[7].valid = true;
+ }
+ if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+ /* Crossing a 2K frame size boundary not allowed */
+ pkts[8].valid = false;
+ }
+
+ if (test->ifobj_tx->shared_umem) {
+ pkts[4].offset += umem_size;
+ pkts[5].offset += umem_size;
+ pkts[6].offset += umem_size;
+ }
+
+ if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_drop(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ if (pkt_stream_receive_half(test))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_metadata_copy(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
+ skel_tx->progs.xsk_xdp_populate_metadata,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+ test->ifobj_rx->use_metadata = true;
+
+ skel_rx->bss->count = 0;
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_shared_umem(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+ int ret;
+
+ test->total_steps = 1;
+ test->nb_sockets = 2;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
+ skel_tx->progs.xsk_xdp_shared_umem,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ if (pkt_stream_even_odd_sequence(test))
+ return TEST_FAILURE;
+
+ ret = testapp_validate_traffic(test);
+
+ release_even_odd_sequence(test);
+
+ return ret;
+}
+
+int testapp_poll_txq_tmout(struct test_spec *test)
+{
+ test->ifobj_tx->use_poll = true;
+ /* create invalid frame by set umem frame_size and pkt length equal to 2048 */
+ test->ifobj_tx->umem->frame_size = 2048;
+ if (pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048))
+ return TEST_FAILURE;
+ return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+}
+
+int testapp_poll_rxq_tmout(struct test_spec *test)
+{
+ test->ifobj_rx->use_poll = true;
+ return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
+}
+
+int testapp_too_many_frags(struct test_spec *test)
+{
+ struct pkt *pkts;
+ u32 max_frags, i;
+ int ret = TEST_FAILURE;
+
+ if (test->mode == TEST_MODE_ZC) {
+ max_frags = test->ifobj_tx->xdp_zc_max_segs;
+ } else {
+ max_frags = get_max_skb_frags();
+ if (!max_frags) {
+ ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n");
+ max_frags = 17;
+ }
+ max_frags += 1;
+ }
+
+ pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
+ if (!pkts)
+ return TEST_FAILURE;
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+
+ /* Valid packet for synch */
+ pkts[0].len = MIN_PKT_SIZE;
+ pkts[0].valid = true;
+
+ /* One valid packet with the max amount of frags */
+ for (i = 1; i < max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = true;
+ }
+ pkts[max_frags].options = 0;
+
+ /* An invalid packet with the max amount of frags but signals packet
+ * continues on the last frag
+ */
+ for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = false;
+ }
+
+ /* Valid packet for synch */
+ pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
+ pkts[2 * max_frags + 1].valid = true;
+
+ if (pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2)) {
+ free(pkts);
+ return TEST_FAILURE;
+ }
+
+ ret = testapp_validate_traffic(test);
+ free(pkts);
+ return ret;
+}
+
+static int xsk_load_xdp_programs(struct ifobject *ifobj)
+{
+ ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
+ if (libbpf_get_error(ifobj->xdp_progs))
+ return libbpf_get_error(ifobj->xdp_progs);
+
+ return 0;
+}
+
+/* Simple test */
+static bool hugepages_present(void)
+{
+ size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ void *bufs;
+
+ bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
+ if (bufs == MAP_FAILED)
+ return false;
+
+ mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+ munmap(bufs, mmap_sz);
+ return true;
+}
+
+int init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ int err;
+
+ ifobj->func_ptr = func_ptr;
+
+ err = xsk_load_xdp_programs(ifobj);
+ if (err) {
+ ksft_print_msg("Error loading XDP program\n");
+ return err;
+ }
+
+ if (hugepages_present())
+ ifobj->unaligned_supp = true;
+
+ err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (err) {
+ ksft_print_msg("Error querying XDP capabilities\n");
+ return err;
+ }
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
+ ifobj->multi_buff_supp = true;
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+ if (query_opts.xdp_zc_max_segs > 1) {
+ ifobj->multi_buff_zc_supp = true;
+ ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
+ } else {
+ ifobj->xdp_zc_max_segs = 0;
+ }
+ }
+
+ return 0;
+}
+
+int testapp_send_receive(struct test_spec *test)
+{
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_poll_rx(struct test_spec *test)
+{
+ test->ifobj_rx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_poll_tx(struct test_spec *test)
+{
+ test->ifobj_tx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_aligned_inv_desc(struct test_spec *test)
+{
+ return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
+{
+ u64 page_size, umem_size;
+
+ /* Odd frame size so the UMEM doesn't end near a page boundary. */
+ test->ifobj_tx->umem->frame_size = 4001;
+ test->ifobj_rx->umem->frame_size = 4001;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* This test exists to test descriptors that staddle the end of
+ * the UMEM but not a page.
+ */
+ page_size = sysconf(_SC_PAGESIZE);
+ umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+ assert(umem_size % page_size > MIN_PKT_SIZE);
+ assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+
+ return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_mb(struct test_spec *test)
+{
+ return testapp_invalid_desc_mb(test);
+}
+
+int testapp_unaligned_inv_desc_mb(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc_mb(test);
+}
+
+int testapp_xdp_metadata(struct test_spec *test)
+{
+ return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_xdp_metadata_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_hw_sw_min_ring_size(struct test_spec *test)
+{
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
+ test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
+ test->ifobj_tx->xsk->batch_size = 1;
+ test->ifobj_rx->xsk->batch_size = 1;
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch size to hw_ring_size - 1 */
+ test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_hw_sw_max_ring_size(struct test_spec *test)
+{
+ u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
+ test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending;
+ test->ifobj_rx->umem->num_frames = max_descs;
+ test->ifobj_rx->umem->fill_size = max_descs;
+ test->ifobj_rx->umem->comp_size = max_descs;
+ test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
+ * updating the Rx HW tail register.
+ */
+ test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ if (pkt_stream_replace(test, max_descs, MIN_PKT_SIZE)) {
+ clean_sockets(test, test->ifobj_tx);
+ clean_sockets(test, test->ifobj_rx);
+ clean_umem(test, test->ifobj_rx, test->ifobj_tx);
+ return TEST_FAILURE;
+ }
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
+ skel_tx->progs.xsk_xdp_adjust_tail,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ skel_rx->bss->adjust_value = adjust_value;
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
+{
+ int ret;
+
+ test->adjust_tail_support = true;
+ test->adjust_tail = true;
+ test->total_steps = 1;
+
+ ret = pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
+ if (ret)
+ return TEST_FAILURE;
+
+ ret = pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
+ if (ret)
+ return TEST_FAILURE;
+
+ ret = testapp_xdp_adjust_tail(test, value);
+ if (ret)
+ return ret;
+
+ if (!test->adjust_tail_support) {
+ ksft_print_msg("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
+ mode_string(test), busy_poll_string(test));
+ return TEST_SKIP;
+ }
+
+ return 0;
+}
+
+int testapp_adjust_tail_shrink(struct test_spec *test)
+{
+ /* Shrink by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_shrink_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Shrink by the frag size */
+ return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow(struct test_spec *test)
+{
+ /* Grow by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
+ return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
+ XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_tx_queue_consumer(struct test_spec *test)
+{
+ int nr_packets;
+
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_print_msg("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
+ if (pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE))
+ return TEST_FAILURE;
+ test->ifobj_tx->xsk->batch_size = nr_packets;
+ test->ifobj_tx->xsk->check_consumer = true;
+
+ return testapp_validate_traffic(test);
+}
+
+struct ifobject *ifobject_create(void)
+{
+ struct ifobject *ifobj;
+
+ ifobj = calloc(1, sizeof(struct ifobject));
+ if (!ifobj)
+ return NULL;
+
+ ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
+ if (!ifobj->xsk_arr)
+ goto out_xsk_arr;
+
+ ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+ if (!ifobj->umem)
+ goto out_umem;
+
+ return ifobj;
+
+out_umem:
+ free(ifobj->xsk_arr);
+out_xsk_arr:
+ free(ifobj);
+ return NULL;
+}
+
+void ifobject_delete(struct ifobject *ifobj)
+{
+ free(ifobj->umem);
+ free(ifobj->xsk_arr);
+ free(ifobj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.h b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
new file mode 100644
index 000000000000..8fc78a057de0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TEST_XSK_H_
+#define TEST_XSK_H_
+
+#include <linux/ethtool.h>
+#include <linux/if_xdp.h>
+
+#include "../kselftest.h"
+#include "xsk.h"
+
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
+#define TEST_PASS 0
+#define TEST_FAILURE -1
+#define TEST_CONTINUE 1
+#define TEST_SKIP 2
+
+#define DEFAULT_PKT_CNT (4 * 1024)
+#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
+#define HUGEPAGE_SIZE (2 * 1024 * 1024)
+#define MIN_PKT_SIZE 64
+#define MAX_ETH_PKT_SIZE 1518
+#define MAX_INTERFACE_NAME_CHARS 16
+#define MAX_TEST_NAME_SIZE 48
+#define SOCK_RECONF_CTR 10
+#define USLEEP_MAX 10000
+
+extern bool opt_verbose;
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
+
+
+static inline u32 ceil_u32(u32 a, u32 b)
+{
+ return (a + b - 1) / b;
+}
+
+static inline u64 ceil_u64(u64 a, u64 b)
+{
+ return (a + b - 1) / b;
+}
+
+/* Simple test */
+enum test_mode {
+ TEST_MODE_SKB,
+ TEST_MODE_DRV,
+ TEST_MODE_ZC,
+ TEST_MODE_ALL
+};
+
+struct ifobject;
+struct test_spec;
+typedef int (*validation_func_t)(struct ifobject *ifobj);
+typedef void *(*thread_func_t)(void *arg);
+typedef int (*test_func_t)(struct test_spec *test);
+
+struct xsk_socket_info {
+ struct xsk_ring_cons rx;
+ struct xsk_ring_prod tx;
+ struct xsk_umem_info *umem;
+ struct xsk_socket *xsk;
+ struct pkt_stream *pkt_stream;
+ u32 outstanding_tx;
+ u32 rxqsize;
+ u32 batch_size;
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
+ bool check_consumer;
+};
+
+int kick_rx(struct xsk_socket_info *xsk);
+int kick_tx(struct xsk_socket_info *xsk);
+
+struct xsk_umem_info {
+ struct xsk_ring_prod fq;
+ struct xsk_ring_cons cq;
+ struct xsk_umem *umem;
+ u64 next_buffer;
+ u32 num_frames;
+ u32 frame_headroom;
+ void *buffer;
+ u32 frame_size;
+ u32 base_addr;
+ u32 fill_size;
+ u32 comp_size;
+ bool unaligned_mode;
+};
+
+struct set_hw_ring {
+ u32 default_tx;
+ u32 default_rx;
+};
+
+int hw_ring_size_reset(struct ifobject *ifobj);
+
+struct ifobject {
+ char ifname[MAX_INTERFACE_NAME_CHARS];
+ struct xsk_socket_info *xsk;
+ struct xsk_socket_info *xsk_arr;
+ struct xsk_umem_info *umem;
+ thread_func_t func_ptr;
+ validation_func_t validation_func;
+ struct xsk_xdp_progs *xdp_progs;
+ struct bpf_map *xskmap;
+ struct bpf_program *xdp_prog;
+ struct ethtool_ringparam ring;
+ struct set_hw_ring set_ring;
+ enum test_mode mode;
+ int ifindex;
+ int mtu;
+ u32 bind_flags;
+ u32 xdp_zc_max_segs;
+ bool tx_on;
+ bool rx_on;
+ bool use_poll;
+ bool busy_poll;
+ bool use_fill_ring;
+ bool release_rx;
+ bool shared_umem;
+ bool use_metadata;
+ bool unaligned_supp;
+ bool multi_buff_supp;
+ bool multi_buff_zc_supp;
+ bool hw_ring_size_supp;
+};
+struct ifobject *ifobject_create(void);
+void ifobject_delete(struct ifobject *ifobj);
+int init_iface(struct ifobject *ifobj, thread_func_t func_ptr);
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, u64 size);
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+ struct ifobject *ifobject, bool shared);
+
+
+struct pkt {
+ int offset;
+ u32 len;
+ u32 pkt_nb;
+ bool valid;
+ u16 options;
+};
+
+struct pkt_stream {
+ u32 nb_pkts;
+ u32 current_pkt_nb;
+ struct pkt *pkts;
+ u32 max_pkt_len;
+ u32 nb_rx_pkts;
+ u32 nb_valid_entries;
+ bool verbatim;
+};
+
+static inline bool pkt_continues(u32 options)
+{
+ return options & XDP_PKT_CONTD;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len);
+void pkt_stream_delete(struct pkt_stream *pkt_stream);
+void pkt_stream_reset(struct pkt_stream *pkt_stream);
+void pkt_stream_restore_default(struct test_spec *test);
+
+struct test_spec {
+ struct ifobject *ifobj_tx;
+ struct ifobject *ifobj_rx;
+ struct pkt_stream *tx_pkt_stream_default;
+ struct pkt_stream *rx_pkt_stream_default;
+ struct bpf_program *xdp_prog_rx;
+ struct bpf_program *xdp_prog_tx;
+ struct bpf_map *xskmap_rx;
+ struct bpf_map *xskmap_tx;
+ test_func_t test_func;
+ int mtu;
+ u16 total_steps;
+ u16 current_step;
+ u16 nb_sockets;
+ bool fail;
+ bool set_ring;
+ bool adjust_tail;
+ bool adjust_tail_support;
+ enum test_mode mode;
+ char name[MAX_TEST_NAME_SIZE];
+};
+
+#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
+static inline char *mode_string(struct test_spec *test)
+{
+ switch (test->mode) {
+ case TEST_MODE_SKB:
+ return "SKB";
+ case TEST_MODE_DRV:
+ return "DRV";
+ case TEST_MODE_ZC:
+ return "ZC";
+ default:
+ return "BOGUS";
+ }
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx, enum test_mode mode,
+ const struct test_spec *test_to_run);
+
+int testapp_adjust_tail_grow(struct test_spec *test);
+int testapp_adjust_tail_grow_mb(struct test_spec *test);
+int testapp_adjust_tail_shrink(struct test_spec *test);
+int testapp_adjust_tail_shrink_mb(struct test_spec *test);
+int testapp_aligned_inv_desc(struct test_spec *test);
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test);
+int testapp_aligned_inv_desc_mb(struct test_spec *test);
+int testapp_bidirectional(struct test_spec *test);
+int testapp_headroom(struct test_spec *test);
+int testapp_hw_sw_max_ring_size(struct test_spec *test);
+int testapp_hw_sw_min_ring_size(struct test_spec *test);
+int testapp_poll_rx(struct test_spec *test);
+int testapp_poll_rxq_tmout(struct test_spec *test);
+int testapp_poll_tx(struct test_spec *test);
+int testapp_poll_txq_tmout(struct test_spec *test);
+int testapp_send_receive(struct test_spec *test);
+int testapp_send_receive_2k_frame(struct test_spec *test);
+int testapp_send_receive_mb(struct test_spec *test);
+int testapp_send_receive_unaligned(struct test_spec *test);
+int testapp_send_receive_unaligned_mb(struct test_spec *test);
+int testapp_single_pkt(struct test_spec *test);
+int testapp_stats_fill_empty(struct test_spec *test);
+int testapp_stats_rx_dropped(struct test_spec *test);
+int testapp_stats_tx_invalid_descs(struct test_spec *test);
+int testapp_stats_rx_full(struct test_spec *test);
+int testapp_teardown(struct test_spec *test);
+int testapp_too_many_frags(struct test_spec *test);
+int testapp_tx_queue_consumer(struct test_spec *test);
+int testapp_unaligned_inv_desc(struct test_spec *test);
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test);
+int testapp_unaligned_inv_desc_mb(struct test_spec *test);
+int testapp_xdp_drop(struct test_spec *test);
+int testapp_xdp_metadata(struct test_spec *test);
+int testapp_xdp_metadata_mb(struct test_spec *test);
+int testapp_xdp_prog_cleanup(struct test_spec *test);
+int testapp_xdp_shared_umem(struct test_spec *test);
+
+void *worker_testapp_validate_rx(void *arg);
+void *worker_testapp_validate_tx(void *arg);
+
+static const struct test_spec tests[] = {
+ {.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
+ {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
+ {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
+ {.name = "POLL_RX", .test_func = testapp_poll_rx},
+ {.name = "POLL_TX", .test_func = testapp_poll_tx},
+ {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
+ {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
+ {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
+ {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
+ {.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
+ {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
+ {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
+ {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
+ {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
+ {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
+ {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
+ {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
+ {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
+ {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
+ {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
+ {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
+ {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
+ {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
+ {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
+ };
+
+static const struct test_spec ci_skip_tests[] = {
+ /* Flaky tests */
+ {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
+ {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
+ {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
+ {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
+ /* Tests with huge page dependency */
+ {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
+ {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
+ {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
+ .test_func = testapp_unaligned_inv_desc_4001_frame},
+ {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
+ .test_func = testapp_send_receive_unaligned_mb},
+ {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
+ /* Test with HW ring size dependency */
+ {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
+ {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+ /* Too long test */
+ {.name = "TEARDOWN", .test_func = testapp_teardown},
+};
+
+
+#endif /* TEST_XSK_H_ */
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index d66687f1ee6a..34f9ccce2602 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -3,6 +3,7 @@
#include <test_progs.h>
#include "timer.skel.h"
#include "timer_failure.skel.h"
+#include "timer_interrupt.skel.h"
#define NUM_THR 8
@@ -86,6 +87,10 @@ void serial_test_timer(void)
int err;
timer_skel = timer__open_and_load();
+ if (!timer_skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
return;
@@ -95,3 +100,36 @@ void serial_test_timer(void)
RUN_TESTS(timer_failure);
}
+
+void test_timer_interrupt(void)
+{
+ struct timer_interrupt *skel = NULL;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ skel = timer_interrupt__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
+ if (!ASSERT_OK_PTR(skel, "timer_interrupt__open_and_load"))
+ return;
+
+ err = timer_interrupt__attach(skel);
+ if (!ASSERT_OK(err, "timer_interrupt__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_timer_interrupt);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+
+ usleep(50);
+
+ ASSERT_EQ(skel->bss->in_interrupt, 0, "in_interrupt");
+ if (skel->bss->preempt_count)
+ ASSERT_NEQ(skel->bss->in_interrupt_cb, 0, "in_interrupt_cb");
+
+out:
+ timer_interrupt__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
index f74b82305da8..b841597c8a3a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_crash.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
@@ -12,6 +12,10 @@ static void test_timer_crash_mode(int mode)
struct timer_crash *skel;
skel = timer_crash__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load"))
return;
skel->bss->pid = getpid();
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
index 1a2f99596916..eb303fa1e09a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -59,6 +59,10 @@ void test_timer_lockup(void)
}
skel = timer_lockup__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index 9ff7843909e7..c930c7d7105b 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -65,6 +65,10 @@ void serial_test_timer_mim(void)
goto cleanup;
timer_skel = timer_mim__open_and_load();
+ if (!timer_skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index c3ab9b6fb069..b81dde283052 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -19,6 +19,7 @@
#include "priv_prog.skel.h"
#include "dummy_st_ops_success.skel.h"
#include "token_lsm.skel.h"
+#include "priv_freplace_prog.skel.h"
static inline int sys_mount(const char *dev_name, const char *dir_name,
const char *type, unsigned long flags,
@@ -114,7 +115,7 @@ static int create_bpffs_fd(void)
static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
{
- int mnt_fd, err;
+ int err;
/* set up token delegation mount options */
err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
@@ -135,12 +136,7 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
if (err < 0)
return -errno;
- /* create O_PATH fd for detached mount */
- mnt_fd = sys_fsmount(fs_fd, 0, 0);
- if (err < 0)
- return -errno;
-
- return mnt_fd;
+ return 0;
}
/* send FD over Unix domain (AF_UNIX) socket */
@@ -286,6 +282,7 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba
{
int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
struct token_lsm *lsm_skel = NULL;
+ char one;
/* load and attach LSM "policy" before we go into unpriv userns */
lsm_skel = token_lsm__open_and_load();
@@ -332,13 +329,19 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba
err = sendfd(sock_fd, fs_fd);
if (!ASSERT_OK(err, "send_fs_fd"))
goto cleanup;
- zclose(fs_fd);
+
+ /* wait that the parent reads the fd, does the fsconfig() calls
+ * and send us a signal that it is done
+ */
+ err = read(sock_fd, &one, sizeof(one));
+ if (!ASSERT_GE(err, 0, "read_one"))
+ goto cleanup;
/* avoid mucking around with mount namespaces and mounting at
- * well-known path, just get detach-mounted BPF FS fd back from parent
+ * well-known path, just create O_PATH fd for detached mount
*/
- err = recvfd(sock_fd, &mnt_fd);
- if (!ASSERT_OK(err, "recv_mnt_fd"))
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (!ASSERT_OK_FD(mnt_fd, "mnt_fd"))
goto cleanup;
/* try to fspick() BPF FS and try to add some delegation options */
@@ -428,24 +431,24 @@ again:
static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
{
- int fs_fd = -1, mnt_fd = -1, token_fd = -1, err;
+ int fs_fd = -1, token_fd = -1, err;
+ char one = 1;
err = recvfd(sock_fd, &fs_fd);
if (!ASSERT_OK(err, "recv_bpffs_fd"))
goto cleanup;
- mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
- if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
+ err = materialize_bpffs_fd(fs_fd, bpffs_opts);
+ if (!ASSERT_GE(err, 0, "materialize_bpffs_fd")) {
err = -EINVAL;
goto cleanup;
}
- zclose(fs_fd);
- /* pass BPF FS context object to parent */
- err = sendfd(sock_fd, mnt_fd);
- if (!ASSERT_OK(err, "send_mnt_fd"))
+ /* notify the child that we did the fsconfig() calls and it can proceed. */
+ err = write(sock_fd, &one, sizeof(one));
+ if (!ASSERT_EQ(err, sizeof(one), "send_one"))
goto cleanup;
- zclose(mnt_fd);
+ zclose(fs_fd);
/* receive BPF token FD back from child for some extra tests */
err = recvfd(sock_fd, &token_fd);
@@ -458,7 +461,6 @@ static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
cleanup:
zclose(sock_fd);
zclose(fs_fd);
- zclose(mnt_fd);
zclose(token_fd);
if (child_pid > 0)
@@ -788,6 +790,84 @@ static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
return 0;
}
+static int userns_obj_priv_freplace_setup(int mnt_fd, struct priv_freplace_prog **fr_skel,
+ struct priv_prog **skel, int *tgt_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ int err;
+ char buf[256];
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ *skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(*skel, "priv_prog__open_opts"))
+ return -EINVAL;
+ err = priv_prog__load(*skel);
+ if (!ASSERT_OK(err, "priv_prog__load"))
+ return -EINVAL;
+
+ *fr_skel = priv_freplace_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(*skel, "priv_freplace_prog__open_opts"))
+ return -EINVAL;
+
+ *tgt_fd = bpf_program__fd((*skel)->progs.xdp_prog1);
+ return 0;
+}
+
+/* Verify that freplace works from user namespace, because bpf token is loaded
+ * in bpf_object__prepare
+ */
+static int userns_obj_priv_freplace_prog(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ struct priv_freplace_prog *fr_skel = NULL;
+ struct priv_prog *skel = NULL;
+ int err, tgt_fd;
+
+ err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+ if (!ASSERT_OK(err, "setup"))
+ goto out;
+
+ err = bpf_object__prepare(fr_skel->obj);
+ if (!ASSERT_OK(err, "freplace__prepare"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+ if (!ASSERT_OK(err, "set_attach_target"))
+ goto out;
+
+ err = priv_freplace_prog__load(fr_skel);
+ ASSERT_OK(err, "priv_freplace_prog__load");
+
+out:
+ priv_freplace_prog__destroy(fr_skel);
+ priv_prog__destroy(skel);
+ return err;
+}
+
+/* Verify that replace fails to set attach target from user namespace without bpf token */
+static int userns_obj_priv_freplace_prog_fail(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ struct priv_freplace_prog *fr_skel = NULL;
+ struct priv_prog *skel = NULL;
+ int err, tgt_fd;
+
+ err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+ if (!ASSERT_OK(err, "setup"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+ if (ASSERT_ERR(err, "attach fails"))
+ err = 0;
+ else
+ err = -EINVAL;
+
+out:
+ priv_freplace_prog__destroy(fr_skel);
+ priv_prog__destroy(skel);
+ return err;
+}
+
/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
* which should cause struct_ops application to fail, as BTF won't be uploaded
* into the kernel, even if STRUCT_OPS programs themselves are allowed
@@ -967,6 +1047,41 @@ err_out:
#define bit(n) (1ULL << (n))
+static int userns_bpf_token_info(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ int err, token_fd = -1;
+ struct bpf_token_info info;
+ u32 len = sizeof(struct bpf_token_info);
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ memset(&info, 0, len);
+ err = bpf_obj_get_info_by_fd(token_fd, &info, &len);
+ if (!ASSERT_ERR(err, "bpf_obj_get_token_info"))
+ goto cleanup;
+ if (!ASSERT_EQ(info.allowed_cmds, bit(BPF_MAP_CREATE), "token_info_cmds_map_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ if (!ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_XDP), "token_info_progs_xdp")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* The BPF_PROG_TYPE_EXT is not set in token */
+ if (ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_EXT), "token_info_progs_ext"))
+ err = -EINVAL;
+
+cleanup:
+ zclose(token_fd);
+ return err;
+}
+
void test_token(void)
{
if (test__start_subtest("map_token")) {
@@ -1004,12 +1119,28 @@ void test_token(void)
if (test__start_subtest("obj_priv_prog")) {
struct bpffs_opts opts = {
.cmds = bit(BPF_PROG_LOAD),
- .progs = bit(BPF_PROG_TYPE_KPROBE),
+ .progs = bit(BPF_PROG_TYPE_XDP),
.attachs = ~0ULL,
};
subtest_userns(&opts, userns_obj_priv_prog);
}
+ if (test__start_subtest("obj_priv_freplace_prog")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+ .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+ subtest_userns(&opts, userns_obj_priv_freplace_prog);
+ }
+ if (test__start_subtest("obj_priv_freplace_prog_fail")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+ .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+ subtest_userns(&opts, userns_obj_priv_freplace_prog_fail);
+ }
if (test__start_subtest("obj_priv_btf_fail")) {
struct bpffs_opts opts = {
/* disallow BTF loading */
@@ -1054,4 +1185,13 @@ void test_token(void)
subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
}
+ if (test__start_subtest("bpf_token_info")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_MAP_CREATE),
+ .progs = bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_bpf_token_info);
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
index a222df765bc3..10e231965589 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -28,10 +28,62 @@ out:
tracing_failure__destroy(skel);
}
+static void test_tracing_fail_prog(const char *prog_name, const char *exp_msg)
+{
+ struct tracing_failure *skel;
+ struct bpf_program *prog;
+ char log_buf[256];
+ int err;
+
+ skel = tracing_failure__open();
+ if (!ASSERT_OK_PTR(skel, "tracing_failure__open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+
+ bpf_program__set_autoload(prog, true);
+ bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf));
+
+ err = tracing_failure__load(skel);
+ if (!ASSERT_ERR(err, "tracing_failure__load"))
+ goto out;
+
+ ASSERT_HAS_SUBSTR(log_buf, exp_msg, "log_buf");
+out:
+ tracing_failure__destroy(skel);
+}
+
+static void test_tracing_deny(void)
+{
+ int btf_id;
+
+ /* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */
+ btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY);
+ if (btf_id <= 0) {
+ test__skip();
+ return;
+ }
+
+ test_tracing_fail_prog("tracing_deny",
+ "Attaching tracing programs to function '__rcu_read_lock' is rejected.");
+}
+
+static void test_fexit_noreturns(void)
+{
+ test_tracing_fail_prog("fexit_noreturns",
+ "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.");
+}
+
void test_tracing_failure(void)
{
if (test__start_subtest("bpf_spin_lock"))
test_bpf_spin_lock(true);
if (test__start_subtest("bpf_spin_unlock"))
test_bpf_spin_lock(false);
+ if (test__start_subtest("tracing_deny"))
+ test_tracing_deny();
+ if (test__start_subtest("fexit_noreturns"))
+ test_fexit_noreturns();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
index 19e68d4b3532..6f8c0bfb0415 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -112,10 +112,39 @@ destroy_skel:
tracing_struct_many_args__destroy(skel);
}
+static void test_union_args(void)
+{
+ struct tracing_struct *skel;
+ int err;
+
+ skel = tracing_struct__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load"))
+ return;
+
+ err = tracing_struct__attach(skel);
+ if (!ASSERT_OK(err, "tracing_struct__attach"))
+ goto out;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->ut1_a_a, 1, "ut1:a.arg.a");
+ ASSERT_EQ(skel->bss->ut1_b, 4, "ut1:b");
+ ASSERT_EQ(skel->bss->ut1_c, 5, "ut1:c");
+
+ ASSERT_EQ(skel->bss->ut2_a, 6, "ut2:a");
+ ASSERT_EQ(skel->bss->ut2_b_a, 2, "ut2:b.arg.a");
+ ASSERT_EQ(skel->bss->ut2_b_b, 3, "ut2:b.arg.b");
+
+out:
+ tracing_struct__destroy(skel);
+}
+
void test_tracing_struct(void)
{
if (test__start_subtest("struct_args"))
test_struct_args();
if (test__start_subtest("struct_many_args"))
test_struct_many_args();
+ if (test__start_subtest("union_args"))
+ test_union_args();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c
index cf3e0e7a64fa..86404476c1da 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2023 Hengqi Chen */
#include <test_progs.h>
+#include <asm/ptrace.h>
#include "test_uprobe.skel.h"
static FILE *urand_spawn(int *pid)
@@ -33,7 +34,7 @@ static int urand_trigger(FILE **urand_pipe)
return exit_code;
}
-void test_uprobe(void)
+static void test_uprobe_attach(void)
{
LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
struct test_uprobe *skel;
@@ -93,3 +94,156 @@ cleanup:
pclose(urand_pipe);
test_uprobe__destroy(skel);
}
+
+#ifdef __x86_64__
+__naked __maybe_unused unsigned long uprobe_regs_change_trigger(void)
+{
+ asm volatile (
+ "ret\n"
+ );
+}
+
+static __naked void uprobe_regs_change(struct pt_regs *before, struct pt_regs *after)
+{
+ asm volatile (
+ "movq %r11, 48(%rdi)\n"
+ "movq %r10, 56(%rdi)\n"
+ "movq %r9, 64(%rdi)\n"
+ "movq %r8, 72(%rdi)\n"
+ "movq %rax, 80(%rdi)\n"
+ "movq %rcx, 88(%rdi)\n"
+ "movq %rdx, 96(%rdi)\n"
+ "movq %rsi, 104(%rdi)\n"
+ "movq %rdi, 112(%rdi)\n"
+
+ /* save 2nd argument */
+ "pushq %rsi\n"
+ "call uprobe_regs_change_trigger\n"
+
+ /* save return value and load 2nd argument pointer to rax */
+ "pushq %rax\n"
+ "movq 8(%rsp), %rax\n"
+
+ "movq %r11, 48(%rax)\n"
+ "movq %r10, 56(%rax)\n"
+ "movq %r9, 64(%rax)\n"
+ "movq %r8, 72(%rax)\n"
+ "movq %rcx, 88(%rax)\n"
+ "movq %rdx, 96(%rax)\n"
+ "movq %rsi, 104(%rax)\n"
+ "movq %rdi, 112(%rax)\n"
+
+ /* restore return value and 2nd argument */
+ "pop %rax\n"
+ "pop %rsi\n"
+
+ "movq %rax, 80(%rsi)\n"
+ "ret\n"
+ );
+}
+
+static void regs_common(void)
+{
+ struct pt_regs before = {}, after = {}, expected = {
+ .rax = 0xc0ffe,
+ .rcx = 0xbad,
+ .rdx = 0xdead,
+ .r8 = 0x8,
+ .r9 = 0x9,
+ .r10 = 0x10,
+ .r11 = 0x11,
+ .rdi = 0x12,
+ .rsi = 0x13,
+ };
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_pid = getpid();
+ skel->bss->regs = expected;
+
+ uprobe_opts.func_name = "uprobe_regs_change_trigger";
+ skel->links.test_regs_change = bpf_program__attach_uprobe_opts(skel->progs.test_regs_change,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test_regs_change, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ uprobe_regs_change(&before, &after);
+
+ ASSERT_EQ(after.rax, expected.rax, "ax");
+ ASSERT_EQ(after.rcx, expected.rcx, "cx");
+ ASSERT_EQ(after.rdx, expected.rdx, "dx");
+ ASSERT_EQ(after.r8, expected.r8, "r8");
+ ASSERT_EQ(after.r9, expected.r9, "r9");
+ ASSERT_EQ(after.r10, expected.r10, "r10");
+ ASSERT_EQ(after.r11, expected.r11, "r11");
+ ASSERT_EQ(after.rdi, expected.rdi, "rdi");
+ ASSERT_EQ(after.rsi, expected.rsi, "rsi");
+
+cleanup:
+ test_uprobe__destroy(skel);
+}
+
+static noinline unsigned long uprobe_regs_change_ip_1(void)
+{
+ return 0xc0ffee;
+}
+
+static noinline unsigned long uprobe_regs_change_ip_2(void)
+{
+ return 0xdeadbeef;
+}
+
+static void regs_ip(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+ unsigned long ret;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_pid = getpid();
+ skel->bss->ip = (unsigned long) uprobe_regs_change_ip_2;
+
+ uprobe_opts.func_name = "uprobe_regs_change_ip_1";
+ skel->links.test_regs_change_ip = bpf_program__attach_uprobe_opts(
+ skel->progs.test_regs_change_ip,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test_regs_change_ip, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ ret = uprobe_regs_change_ip_1();
+ ASSERT_EQ(ret, 0xdeadbeef, "ret");
+
+cleanup:
+ test_uprobe__destroy(skel);
+}
+
+static void test_uprobe_regs_change(void)
+{
+ if (test__start_subtest("regs_change_common"))
+ regs_common();
+ if (test__start_subtest("regs_change_ip"))
+ regs_ip();
+}
+#else
+static void test_uprobe_regs_change(void) { }
+#endif
+
+void test_uprobe(void)
+{
+ if (test__start_subtest("attach"))
+ test_uprobe_attach();
+ test_uprobe_regs_change();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index c397336fe1ed..955a37751b52 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -8,22 +8,31 @@
#include <asm/ptrace.h>
#include <linux/compiler.h>
#include <linux/stringify.h>
+#include <linux/kernel.h>
#include <sys/wait.h>
#include <sys/syscall.h>
#include <sys/prctl.h>
#include <asm/prctl.h>
#include "uprobe_syscall.skel.h"
#include "uprobe_syscall_executed.skel.h"
+#include "bpf/libbpf_internal.h"
-__naked unsigned long uretprobe_regs_trigger(void)
+#define USDT_NOP .byte 0x0f, 0x1f, 0x44, 0x00, 0x00
+#include "usdt.h"
+
+#pragma GCC diagnostic ignored "-Wattributes"
+
+__attribute__((aligned(16)))
+__nocf_check __weak __naked unsigned long uprobe_regs_trigger(void)
{
asm volatile (
+ ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n" /* nop5 */
"movq $0xdeadbeef, %rax\n"
"ret\n"
);
}
-__naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
+__naked void uprobe_regs(struct pt_regs *before, struct pt_regs *after)
{
asm volatile (
"movq %r15, 0(%rdi)\n"
@@ -44,15 +53,17 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
"movq $0, 120(%rdi)\n" /* orig_rax */
"movq $0, 128(%rdi)\n" /* rip */
"movq $0, 136(%rdi)\n" /* cs */
+ "pushq %rax\n"
"pushf\n"
"pop %rax\n"
"movq %rax, 144(%rdi)\n" /* eflags */
+ "pop %rax\n"
"movq %rsp, 152(%rdi)\n" /* rsp */
"movq $0, 160(%rdi)\n" /* ss */
/* save 2nd argument */
"pushq %rsi\n"
- "call uretprobe_regs_trigger\n"
+ "call uprobe_regs_trigger\n"
/* save return value and load 2nd argument pointer to rax */
"pushq %rax\n"
@@ -92,25 +103,37 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
);
}
-static void test_uretprobe_regs_equal(void)
+static void test_uprobe_regs_equal(bool retprobe)
{
+ LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = retprobe,
+ );
struct uprobe_syscall *skel = NULL;
struct pt_regs before = {}, after = {};
unsigned long *pb = (unsigned long *) &before;
unsigned long *pa = (unsigned long *) &after;
unsigned long *pp;
+ unsigned long offset;
unsigned int i, cnt;
- int err;
+
+ offset = get_uprobe_offset(&uprobe_regs_trigger);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ return;
skel = uprobe_syscall__open_and_load();
if (!ASSERT_OK_PTR(skel, "uprobe_syscall__open_and_load"))
goto cleanup;
- err = uprobe_syscall__attach(skel);
- if (!ASSERT_OK(err, "uprobe_syscall__attach"))
+ skel->links.probe = bpf_program__attach_uprobe_opts(skel->progs.probe,
+ 0, "/proc/self/exe", offset, &opts);
+ if (!ASSERT_OK_PTR(skel->links.probe, "bpf_program__attach_uprobe_opts"))
goto cleanup;
- uretprobe_regs(&before, &after);
+ /* make sure uprobe gets optimized */
+ if (!retprobe)
+ uprobe_regs_trigger();
+
+ uprobe_regs(&before, &after);
pp = (unsigned long *) &skel->bss->regs;
cnt = sizeof(before)/sizeof(*pb);
@@ -119,7 +142,7 @@ static void test_uretprobe_regs_equal(void)
unsigned int offset = i * sizeof(unsigned long);
/*
- * Check register before and after uretprobe_regs_trigger call
+ * Check register before and after uprobe_regs_trigger call
* that triggers the uretprobe.
*/
switch (offset) {
@@ -133,7 +156,7 @@ static void test_uretprobe_regs_equal(void)
/*
* Check register seen from bpf program and register after
- * uretprobe_regs_trigger call
+ * uprobe_regs_trigger call (with rax exception, check below).
*/
switch (offset) {
/*
@@ -146,6 +169,15 @@ static void test_uretprobe_regs_equal(void)
case offsetof(struct pt_regs, rsp):
case offsetof(struct pt_regs, ss):
break;
+ /*
+ * uprobe does not see return value in rax, it needs to see the
+ * original (before) rax value
+ */
+ case offsetof(struct pt_regs, rax):
+ if (!retprobe) {
+ ASSERT_EQ(pp[i], pb[i], "uprobe rax prog-before value check");
+ break;
+ }
default:
if (!ASSERT_EQ(pp[i], pa[i], "register prog-after value check"))
fprintf(stdout, "failed register offset %u\n", offset);
@@ -175,7 +207,7 @@ static int write_bpf_testmod_uprobe(unsigned long offset)
return ret != n ? (int) ret : 0;
}
-static void test_uretprobe_regs_change(void)
+static void test_regs_change(void)
{
struct pt_regs before = {}, after = {};
unsigned long *pb = (unsigned long *) &before;
@@ -183,13 +215,16 @@ static void test_uretprobe_regs_change(void)
unsigned long cnt = sizeof(before)/sizeof(*pb);
unsigned int i, err, offset;
- offset = get_uprobe_offset(uretprobe_regs_trigger);
+ offset = get_uprobe_offset(uprobe_regs_trigger);
err = write_bpf_testmod_uprobe(offset);
if (!ASSERT_OK(err, "register_uprobe"))
return;
- uretprobe_regs(&before, &after);
+ /* make sure uprobe gets optimized */
+ uprobe_regs_trigger();
+
+ uprobe_regs(&before, &after);
err = write_bpf_testmod_uprobe(0);
if (!ASSERT_OK(err, "unregister_uprobe"))
@@ -251,7 +286,8 @@ static void test_uretprobe_syscall_call(void)
.retprobe = true,
);
struct uprobe_syscall_executed *skel;
- int pid, status, err, go[2], c;
+ int pid, status, err, go[2], c = 0;
+ struct bpf_link *link;
if (!ASSERT_OK(pipe(go), "pipe"))
return;
@@ -277,11 +313,14 @@ static void test_uretprobe_syscall_call(void)
_exit(0);
}
- skel->links.test = bpf_program__attach_uprobe_multi(skel->progs.test, pid,
- "/proc/self/exe",
- "uretprobe_syscall_call", &opts);
- if (!ASSERT_OK_PTR(skel->links.test, "bpf_program__attach_uprobe_multi"))
+ skel->bss->pid = pid;
+
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi,
+ pid, "/proc/self/exe",
+ "uretprobe_syscall_call", &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
goto cleanup;
+ skel->links.test_uretprobe_multi = link;
/* kick the child */
write(go[1], &c, 1);
@@ -301,6 +340,256 @@ cleanup:
close(go[0]);
}
+#define TRAMP "[uprobes-trampoline]"
+
+__attribute__((aligned(16)))
+__nocf_check __weak __naked void uprobe_test(void)
+{
+ asm volatile (" \n"
+ ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n"
+ "ret \n"
+ );
+}
+
+__attribute__((aligned(16)))
+__nocf_check __weak void usdt_test(void)
+{
+ USDT(optimized_uprobe, usdt);
+}
+
+static int find_uprobes_trampoline(void *tramp_addr)
+{
+ void *start, *end;
+ char line[128];
+ int ret = -1;
+ FILE *maps;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ fprintf(stderr, "cannot open maps\n");
+ return -1;
+ }
+
+ while (fgets(line, sizeof(line), maps)) {
+ int m = -1;
+
+ /* We care only about private r-x mappings. */
+ if (sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", &start, &end, &m) != 2)
+ continue;
+ if (m < 0)
+ continue;
+ if (!strncmp(&line[m], TRAMP, sizeof(TRAMP)-1) && (start == tramp_addr)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ fclose(maps);
+ return ret;
+}
+
+static unsigned char nop5[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+
+static void *find_nop5(void *fn)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ if (!memcmp(nop5, fn + i, 5))
+ return fn + i;
+ }
+ return NULL;
+}
+
+typedef void (__attribute__((nocf_check)) *trigger_t)(void);
+
+static void *check_attach(struct uprobe_syscall_executed *skel, trigger_t trigger,
+ void *addr, int executed)
+{
+ struct __arch_relative_insn {
+ __u8 op;
+ __s32 raddr;
+ } __packed *call;
+ void *tramp = NULL;
+
+ /* Uprobe gets optimized after first trigger, so let's press twice. */
+ trigger();
+ trigger();
+
+ /* Make sure bpf program got executed.. */
+ ASSERT_EQ(skel->bss->executed, executed, "executed");
+
+ /* .. and check the trampoline is as expected. */
+ call = (struct __arch_relative_insn *) addr;
+ tramp = (void *) (call + 1) + call->raddr;
+ ASSERT_EQ(call->op, 0xe8, "call");
+ ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
+
+ return tramp;
+}
+
+static void check_detach(void *addr, void *tramp)
+{
+ /* [uprobes_trampoline] stays after detach */
+ ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
+ ASSERT_OK(memcmp(addr, nop5, 5), "nop5");
+}
+
+static void check(struct uprobe_syscall_executed *skel, struct bpf_link *link,
+ trigger_t trigger, void *addr, int executed)
+{
+ void *tramp;
+
+ tramp = check_attach(skel, trigger, addr, executed);
+ bpf_link__destroy(link);
+ check_detach(addr, tramp);
+}
+
+static void test_uprobe_legacy(void)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = true,
+ );
+ struct bpf_link *link;
+ unsigned long offset;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ goto cleanup;
+
+ /* uprobe */
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ 0, "/proc/self/exe", offset, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+ /* uretprobe */
+ skel->bss->executed = 0;
+
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uretprobe,
+ 0, "/proc/self/exe", offset, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_multi(void)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+ struct bpf_link *link;
+ unsigned long offset;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ goto cleanup;
+
+ opts.offsets = &offset;
+ opts.cnt = 1;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ /* uprobe.multi */
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_multi,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+ /* uretprobe.multi */
+ skel->bss->executed = 0;
+ opts.retprobe = true;
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_session(void)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+ .session = true,
+ );
+ struct bpf_link *link;
+ unsigned long offset;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ goto cleanup;
+
+ opts.offsets = &offset;
+ opts.cnt = 1;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_session,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 4);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_usdt(void)
+{
+ struct uprobe_syscall_executed *skel;
+ struct bpf_link *link;
+ void *addr;
+
+ errno = 0;
+ addr = find_nop5(usdt_test);
+ if (!ASSERT_OK_PTR(addr, "find_nop5"))
+ return;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ link = bpf_program__attach_usdt(skel->progs.test_usdt,
+ -1 /* all PIDs */, "/proc/self/exe",
+ "optimized_uprobe", "usdt", NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ check(skel, link, usdt_test, addr, 2);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
/*
* Borrowed from tools/testing/selftests/x86/test_shadow_stack.c.
*
@@ -343,43 +632,172 @@ static void test_uretprobe_shadow_stack(void)
return;
}
- /* Run all of the uretprobe tests. */
- test_uretprobe_regs_equal();
- test_uretprobe_regs_change();
+ /* Run all the tests with shadow stack in place. */
+
+ test_uprobe_regs_equal(false);
+ test_uprobe_regs_equal(true);
test_uretprobe_syscall_call();
+ test_uprobe_legacy();
+ test_uprobe_multi();
+ test_uprobe_session();
+ test_uprobe_usdt();
+
+ test_regs_change();
+
ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
}
-#else
-static void test_uretprobe_regs_equal(void)
+
+static volatile bool race_stop;
+
+static USDT_DEFINE_SEMA(race);
+
+static void *worker_trigger(void *arg)
{
- test__skip();
+ unsigned long rounds = 0;
+
+ while (!race_stop) {
+ uprobe_test();
+ rounds++;
+ }
+
+ printf("tid %ld trigger rounds: %lu\n", sys_gettid(), rounds);
+ return NULL;
}
-static void test_uretprobe_regs_change(void)
+static void *worker_attach(void *arg)
{
- test__skip();
+ LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct uprobe_syscall_executed *skel;
+ unsigned long rounds = 0, offset;
+ const char *sema[2] = {
+ __stringify(USDT_SEMA(race)),
+ NULL,
+ };
+ unsigned long *ref;
+ int err;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ return NULL;
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 1, (const char **) &sema, &ref, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_sema"))
+ return NULL;
+
+ opts.ref_ctr_offset = *ref;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ while (!race_stop) {
+ skel->links.test_uprobe = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ 0, "/proc/self/exe", offset, &opts);
+ if (!ASSERT_OK_PTR(skel->links.test_uprobe, "bpf_program__attach_uprobe_opts"))
+ break;
+
+ bpf_link__destroy(skel->links.test_uprobe);
+ skel->links.test_uprobe = NULL;
+ rounds++;
+ }
+
+ printf("tid %ld attach rounds: %lu hits: %d\n", sys_gettid(), rounds, skel->bss->executed);
+ uprobe_syscall_executed__destroy(skel);
+ free(ref);
+ return NULL;
}
-static void test_uretprobe_syscall_call(void)
+static useconds_t race_msec(void)
{
- test__skip();
+ char *env;
+
+ env = getenv("BPF_SELFTESTS_UPROBE_SYSCALL_RACE_MSEC");
+ if (env)
+ return atoi(env);
+
+ /* default duration is 500ms */
+ return 500;
}
-static void test_uretprobe_shadow_stack(void)
+static void test_uprobe_race(void)
{
- test__skip();
+ int err, i, nr_threads;
+ pthread_t *threads;
+
+ nr_threads = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(nr_threads, 0, "libbpf_num_possible_cpus"))
+ return;
+ nr_threads = max(2, nr_threads);
+
+ threads = alloca(sizeof(*threads) * nr_threads);
+ if (!ASSERT_OK_PTR(threads, "malloc"))
+ return;
+
+ for (i = 0; i < nr_threads; i++) {
+ err = pthread_create(&threads[i], NULL, i % 2 ? worker_trigger : worker_attach,
+ NULL);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto cleanup;
+ }
+
+ usleep(race_msec() * 1000);
+
+cleanup:
+ race_stop = true;
+ for (nr_threads = i, i = 0; i < nr_threads; i++)
+ pthread_join(threads[i], NULL);
+
+ ASSERT_FALSE(USDT_SEMA_IS_ACTIVE(race), "race_semaphore");
}
+
+#ifndef __NR_uprobe
+#define __NR_uprobe 336
#endif
-void test_uprobe_syscall(void)
+static void test_uprobe_error(void)
+{
+ long err = syscall(__NR_uprobe);
+
+ ASSERT_EQ(err, -1, "error");
+ ASSERT_EQ(errno, ENXIO, "errno");
+}
+
+static void __test_uprobe_syscall(void)
{
if (test__start_subtest("uretprobe_regs_equal"))
- test_uretprobe_regs_equal();
- if (test__start_subtest("uretprobe_regs_change"))
- test_uretprobe_regs_change();
+ test_uprobe_regs_equal(true);
if (test__start_subtest("uretprobe_syscall_call"))
test_uretprobe_syscall_call();
if (test__start_subtest("uretprobe_shadow_stack"))
test_uretprobe_shadow_stack();
+ if (test__start_subtest("uprobe_legacy"))
+ test_uprobe_legacy();
+ if (test__start_subtest("uprobe_multi"))
+ test_uprobe_multi();
+ if (test__start_subtest("uprobe_session"))
+ test_uprobe_session();
+ if (test__start_subtest("uprobe_usdt"))
+ test_uprobe_usdt();
+ if (test__start_subtest("uprobe_race"))
+ test_uprobe_race();
+ if (test__start_subtest("uprobe_error"))
+ test_uprobe_error();
+ if (test__start_subtest("uprobe_regs_equal"))
+ test_uprobe_regs_equal(false);
+ if (test__start_subtest("regs_change"))
+ test_regs_change();
+}
+#else
+static void __test_uprobe_syscall(void)
+{
+ test__skip();
+}
+#endif
+
+void test_uprobe_syscall(void)
+{
+ __test_uprobe_syscall();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 56ed1eb9b527..f4be5269fa90 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -40,12 +40,79 @@ static void __always_inline trigger_func(int x) {
}
}
-static void subtest_basic_usdt(void)
+#if defined(__x86_64__) || defined(__i386__)
+/*
+ * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)"
+ * - 'size' is the size in bytes of the array element, and its sign indicates
+ * whether the type is signed (negative) or unsigned (positive).
+ * - 'base_reg' is the register holding the base address, normally rdx or edx
+ * - 'index_reg' is the register holding the index, normally rax or eax
+ * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the
+ * size of the element type.
+ *
+ * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be:
+ * - size: -2 (negative because 'short' is signed)
+ * - scale: 2 (since sizeof(short) == 2)
+ *
+ * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386
+ */
+static volatile short array[] = {-1, -2, -3, -4};
+
+#if defined(__x86_64__)
+#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2)
+#else
+#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2)
+#endif
+
+unsigned short test_usdt_sib_semaphore SEC(".probes");
+
+static void trigger_sib_spec(void)
+{
+ /*
+ * Force SIB addressing with inline assembly.
+ *
+ * You must compile with -std=gnu99 or -std=c99 to use the
+ * STAP_PROBE_ASM macro.
+ *
+ * The STAP_PROBE_ASM macro generates a quoted string that gets
+ * inserted between the surrounding assembly instructions. In this
+ * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction
+ * stream, creating a probe point between the asm statement boundaries.
+ * It works fine with gcc/clang.
+ *
+ * Register constraints:
+ * - "d"(array): Binds the 'array' variable to %rdx or %edx register
+ * - "a"(0): Binds the constant 0 to %rax or %eax register
+ * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and
+ * %%rax(%eax), they contain the expected values for SIB addressing.
+ *
+ * The "memory" clobber prevents the compiler from reordering memory
+ * accesses around the probe point, ensuring that the probe behavior
+ * is predictable and consistent.
+ */
+ asm volatile(
+ STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC)
+ :
+ : "d"(array), "a"(0)
+ : "memory"
+ );
+}
+#endif
+
+static void subtest_basic_usdt(bool optimized)
{
LIBBPF_OPTS(bpf_usdt_opts, opts);
struct test_usdt *skel;
struct test_usdt__bss *bss;
- int err;
+ int err, i, called;
+ const __u64 expected_cookie = 0xcafedeadbeeffeed;
+
+#define TRIGGER(x) ({ \
+ trigger_func(x); \
+ if (optimized) \
+ trigger_func(x); \
+ optimized ? 2 : 1; \
+ })
skel = test_usdt__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -59,22 +126,32 @@ static void subtest_basic_usdt(void)
goto cleanup;
/* usdt0 won't be auto-attached */
- opts.usdt_cookie = 0xcafedeadbeeffeed;
+ opts.usdt_cookie = expected_cookie;
skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
0 /*self*/, "/proc/self/exe",
"test", "usdt0", &opts);
if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
goto cleanup;
- trigger_func(1);
+#if defined(__x86_64__) || defined(__i386__)
+ opts.usdt_cookie = expected_cookie;
+ skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
+ 0 /*self*/, "/proc/self/exe",
+ "test", "usdt_sib", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
+ goto cleanup;
+#endif
+
+ called = TRIGGER(1);
- ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
- ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
- ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
+ ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");
- ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+ ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+ ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
/* auto-attached usdt3 gets default zero cookie value */
ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
@@ -86,6 +163,9 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+ ASSERT_EQ(bss->usdt3_arg_sizes[0], 4, "usdt3_arg1_size");
+ ASSERT_EQ(bss->usdt3_arg_sizes[1], 8, "usdt3_arg2_size");
+ ASSERT_EQ(bss->usdt3_arg_sizes[2], 8, "usdt3_arg3_size");
/* auto-attached usdt12 gets default zero cookie value */
ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
@@ -104,17 +184,22 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
+ int usdt12_expected_arg_sizes[12] = { 4, 4, 8, 8, 4, 8, 8, 8, 4, 2, 2, 1 };
+
+ for (i = 0; i < 12; i++)
+ ASSERT_EQ(bss->usdt12_arg_sizes[i], usdt12_expected_arg_sizes[i], "usdt12_arg_size");
+
/* trigger_func() is marked __always_inline, so USDT invocations will be
* inlined in two different places, meaning that each USDT will have
* at least 2 different places to be attached to. This verifies that
* bpf_program__attach_usdt() handles this properly and attaches to
* all possible places of USDT invocation.
*/
- trigger_func(2);
+ called += TRIGGER(2);
- ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called");
- ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called");
- ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called");
+ ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");
/* only check values that depend on trigger_func()'s input value */
ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");
@@ -133,9 +218,9 @@ static void subtest_basic_usdt(void)
if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
goto cleanup;
- trigger_func(3);
+ called += TRIGGER(3);
- ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called");
+ ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
/* this time usdt3 has custom cookie */
ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
@@ -147,8 +232,19 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+#if defined(__x86_64__) || defined(__i386__)
+ trigger_sib_spec();
+ ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
+ ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
+ ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
+ ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
+ ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
+ ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
+#endif
+
cleanup:
test_usdt__destroy(skel);
+#undef TRIGGER
}
unsigned short test_usdt_100_semaphore SEC(".probes");
@@ -261,8 +357,16 @@ static void subtest_multispec_usdt(void)
*/
trigger_300_usdts();
- /* we'll reuse usdt_100 BPF program for usdt_300 test */
bpf_link__destroy(skel->links.usdt_100);
+
+ bss->usdt_100_called = 0;
+ bss->usdt_100_sum = 0;
+
+ /* If built with arm64/clang, there will be much less number of specs
+ * for usdt_300 call sites.
+ */
+#if !defined(__aarch64__) || !defined(__clang__)
+ /* we'll reuse usdt_100 BPF program for usdt_300 test */
skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
"test", "usdt_300", NULL);
err = -errno;
@@ -273,13 +377,11 @@ static void subtest_multispec_usdt(void)
/* let's check that there are no "dangling" BPF programs attached due
* to partial success of the above test:usdt_300 attachment
*/
- bss->usdt_100_called = 0;
- bss->usdt_100_sum = 0;
-
f300(777); /* this is 301st instance of usdt_300 */
ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+#endif
/* This time we have USDT with 400 inlined invocations, but arg specs
* should be the same across all sites, so libbpf will only need to
@@ -410,7 +512,11 @@ cleanup:
void test_usdt(void)
{
if (test__start_subtest("basic"))
- subtest_basic_usdt();
+ subtest_basic_usdt(false);
+#ifdef __x86_64__
+ if (test__start_subtest("basic_optimized"))
+ subtest_basic_usdt(true);
+#endif
if (test__start_subtest("multispec"))
subtest_multispec_usdt();
if (test__start_subtest("urand_auto_attach"))
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index d424e7ecbd12..9fd3ae987321 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -21,8 +21,7 @@
#include "../progs/test_user_ringbuf.h"
static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ;
-static const long c_ringbuf_size = 1 << 12; /* 1 small page */
-static const long c_max_entries = c_ringbuf_size / c_sample_size;
+static long c_ringbuf_size, c_max_entries;
static void drain_current_samples(void)
{
@@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void)
uint32_t remaining_samples = total_samples;
int err;
- BUILD_BUG_ON(total_samples <= c_max_entries);
+ if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries"))
+ return;
+
err = load_skel_create_user_ringbuf(&skel, &ringbuf);
if (err)
return;
@@ -686,6 +687,9 @@ void test_user_ringbuf(void)
{
int i;
+ c_ringbuf_size = getpagesize(); /* 1 page */
+ c_max_entries = c_ringbuf_size / c_sample_size;
+
for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
if (!test__start_subtest(success_tests[i].test_name))
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 8a0e1ff8a2dc..4b4b081b46cc 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -7,6 +7,7 @@
#include "verifier_arena.skel.h"
#include "verifier_arena_large.skel.h"
#include "verifier_array_access.skel.h"
+#include "verifier_async_cb_context.skel.h"
#include "verifier_basic_stack.skel.h"
#include "verifier_bitfield_write.skel.h"
#include "verifier_bounds.skel.h"
@@ -14,6 +15,7 @@
#include "verifier_bounds_deduction_non_const.skel.h"
#include "verifier_bounds_mix_sign_unsign.skel.h"
#include "verifier_bpf_get_stack.skel.h"
+#include "verifier_bpf_trap.skel.h"
#include "verifier_bswap.skel.h"
#include "verifier_btf_ctx_access.skel.h"
#include "verifier_btf_unreliable_prog.skel.h"
@@ -33,6 +35,7 @@
#include "verifier_global_subprogs.skel.h"
#include "verifier_global_ptr_args.skel.h"
#include "verifier_gotol.skel.h"
+#include "verifier_gotox.skel.h"
#include "verifier_helper_access_var_len.skel.h"
#include "verifier_helper_packet_access.skel.h"
#include "verifier_helper_restricted.skel.h"
@@ -45,6 +48,8 @@
#include "verifier_ldsx.skel.h"
#include "verifier_leak_ptr.skel.h"
#include "verifier_linked_scalars.skel.h"
+#include "verifier_live_stack.skel.h"
+#include "verifier_load_acquire.skel.h"
#include "verifier_loops1.skel.h"
#include "verifier_lwt.skel.h"
#include "verifier_map_in_map.skel.h"
@@ -57,6 +62,7 @@
#include "verifier_meta_access.skel.h"
#include "verifier_movsx.skel.h"
#include "verifier_mtu.skel.h"
+#include "verifier_mul.skel.h"
#include "verifier_netfilter_ctx.skel.h"
#include "verifier_netfilter_retcode.skel.h"
#include "verifier_bpf_fastcall.skel.h"
@@ -80,8 +86,10 @@
#include "verifier_spill_fill.skel.h"
#include "verifier_spin_lock.skel.h"
#include "verifier_stack_ptr.skel.h"
+#include "verifier_store_release.skel.h"
#include "verifier_subprog_precision.skel.h"
#include "verifier_subreg.skel.h"
+#include "verifier_tailcall.skel.h"
#include "verifier_tailcall_jit.skel.h"
#include "verifier_typedef.skel.h"
#include "verifier_uninit.skel.h"
@@ -121,7 +129,7 @@ static void run_tests_aux(const char *skel_name,
/* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */
err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps);
if (err) {
- PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
return;
}
@@ -131,7 +139,7 @@ static void run_tests_aux(const char *skel_name,
err = cap_enable_effective(old_caps, NULL);
if (err)
- PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
}
#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
@@ -146,6 +154,7 @@ void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction);
void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); }
void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); }
void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); }
+void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); }
void test_verifier_bswap(void) { RUN(verifier_bswap); }
void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); }
void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); }
@@ -165,6 +174,7 @@ void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); }
void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); }
void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); }
void test_verifier_gotol(void) { RUN(verifier_gotol); }
+void test_verifier_gotox(void) { RUN(verifier_gotox); }
void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); }
void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); }
void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); }
@@ -173,10 +183,12 @@ void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); }
void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); }
void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); }
+void test_verifier_load_acquire(void) { RUN(verifier_load_acquire); }
void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); }
void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); }
+void test_verifier_live_stack(void) { RUN(verifier_live_stack); }
void test_verifier_loops1(void) { RUN(verifier_loops1); }
void test_verifier_lwt(void) { RUN(verifier_lwt); }
void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); }
@@ -188,6 +200,7 @@ void test_verifier_may_goto_1(void) { RUN(verifier_may_goto_1); }
void test_verifier_may_goto_2(void) { RUN(verifier_may_goto_2); }
void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
void test_verifier_movsx(void) { RUN(verifier_movsx); }
+void test_verifier_mul(void) { RUN(verifier_mul); }
void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); }
@@ -211,8 +224,10 @@ void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); }
void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
+void test_verifier_store_release(void) { RUN(verifier_store_release); }
void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); }
void test_verifier_subreg(void) { RUN(verifier_subreg); }
+void test_verifier_tailcall(void) { RUN(verifier_tailcall); }
void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); }
void test_verifier_typedef(void) { RUN(verifier_typedef); }
void test_verifier_uninit(void) { RUN(verifier_uninit); }
@@ -268,6 +283,7 @@ void test_verifier_array_access(void)
verifier_array_access__elf_bytes,
init_array_access_maps);
}
+void test_verifier_async_cb_context(void) { RUN(verifier_async_cb_context); }
static int init_value_ptr_arith_maps(struct bpf_object *obj)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
index ab0f02faa80c..4d69d9d55e17 100644
--- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -268,7 +268,7 @@ static void test_verify_pkcs7_sig_from_map(void)
char *tmp_dir;
struct test_verify_pkcs7_sig *skel = NULL;
struct bpf_map *map;
- struct data data;
+ struct data data = {};
int ret, zero = 0;
/* Trigger creation of session keyring. */
diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c
index 99e438fe12ac..15c67d23128b 100644
--- a/tools/testing/selftests/bpf/prog_tests/wq.c
+++ b/tools/testing/selftests/bpf/prog_tests/wq.c
@@ -38,3 +38,59 @@ void serial_test_failures_wq(void)
{
RUN_TESTS(wq_failures);
}
+
+static void test_failure_map_no_btf(void)
+{
+ struct wq *skel = NULL;
+ char log[8192];
+ const struct bpf_insn *insns;
+ size_t insn_cnt;
+ int ret, err, map_fd;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_size = sizeof(log), .log_buf = log,
+ .log_level = 2);
+
+ skel = wq__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = bpf_object__prepare(skel->obj);
+ if (!ASSERT_OK(err, "skel__prepare"))
+ goto out;
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "map_no_btf", sizeof(__u32), sizeof(__u64), 100,
+ NULL);
+ if (!ASSERT_GT(map_fd, -1, "map create"))
+ goto out;
+
+ err = bpf_map__reuse_fd(skel->maps.array, map_fd);
+ if (!ASSERT_OK(err, "map reuse fd")) {
+ close(map_fd);
+ goto out;
+ }
+
+ insns = bpf_program__insns(skel->progs.test_map_no_btf);
+ if (!ASSERT_OK_PTR(insns, "insns ptr"))
+ goto out;
+
+ insn_cnt = bpf_program__insn_cnt(skel->progs.test_map_no_btf);
+ if (!ASSERT_GT(insn_cnt, 0u, "insn cnt"))
+ goto out;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ if (!ASSERT_LT(ret, 0, "prog load failed")) {
+ if (ret > 0)
+ close(ret);
+ goto out;
+ }
+
+ ASSERT_HAS_SUBSTR(log, "map 'map_no_btf' has to have BTF in order to use bpf_wq",
+ "log complains no map BTF");
+out:
+ wq__destroy(skel);
+}
+
+void test_wq_custom(void)
+{
+ if (test__start_subtest("test_failure_map_no_btf"))
+ test_failure_map_no_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index b2b2d85dbb1b..43264347e7d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -37,21 +37,26 @@ static void test_xdp_adjust_tail_shrink(void)
bpf_object__close(obj);
}
-static void test_xdp_adjust_tail_grow(void)
+static void test_xdp_adjust_tail_grow(bool is_64k_pagesize)
{
const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
struct bpf_object *obj;
- char buf[4096]; /* avoid segfault: large buf to hold grow results */
+ char buf[8192]; /* avoid segfault: large buf to hold grow results */
__u32 expect_sz;
int err, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
- .data_size_in = sizeof(pkt_v4),
.data_out = buf,
.data_size_out = sizeof(buf),
.repeat = 1,
);
+ /* topts.data_size_in as a special signal to bpf prog */
+ if (is_64k_pagesize)
+ topts.data_size_in = sizeof(pkt_v4) - 1;
+ else
+ topts.data_size_in = sizeof(pkt_v4);
+
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
@@ -208,7 +213,7 @@ out:
bpf_object__close(obj);
}
-static void test_xdp_adjust_frags_tail_grow(void)
+static void test_xdp_adjust_frags_tail_grow_4k(void)
{
const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
__u32 exp_size;
@@ -246,14 +251,20 @@ static void test_xdp_adjust_frags_tail_grow(void)
ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval");
ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
- for (i = 0; i < 9000; i++)
- ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+ for (i = 0; i < 9000; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+ }
- for (i = 9000; i < 9010; i++)
- ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+ for (i = 9000; i < 9010; i++) {
+ if (buf[i] != 0)
+ ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+ }
- for (i = 9010; i < 16384; i++)
- ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+ for (i = 9010; i < 16384; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+ }
/* Test a too large grow */
memset(buf, 1, 16384);
@@ -273,16 +284,93 @@ out:
bpf_object__close(obj);
}
+static void test_xdp_adjust_frags_tail_grow_64k(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, i, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(262144);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb"))
+ goto out;
+
+ /* Test case add 10 bytes to last frag */
+ memset(buf, 1, 262144);
+ exp_size = 90000 + 10;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 90000;
+ topts.data_size_out = 262144;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "90Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+ for (i = 0; i < 90000; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "90Kb+10b-old");
+ }
+
+ for (i = 90000; i < 90010; i++) {
+ if (buf[i] != 0)
+ ASSERT_EQ(buf[i], 0, "90Kb+10b-new");
+ }
+
+ for (i = 90010; i < 262144; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched");
+ }
+
+ /* Test a too large grow */
+ memset(buf, 1, 262144);
+ exp_size = 90001;
+
+ topts.data_in = topts.data_out = buf;
+ topts.data_size_in = 90001;
+ topts.data_size_out = 262144;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "90Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
void test_xdp_adjust_tail(void)
{
+ int page_size = getpagesize();
+
if (test__start_subtest("xdp_adjust_tail_shrink"))
test_xdp_adjust_tail_shrink();
if (test__start_subtest("xdp_adjust_tail_grow"))
- test_xdp_adjust_tail_grow();
+ test_xdp_adjust_tail_grow(page_size == 65536);
if (test__start_subtest("xdp_adjust_tail_grow2"))
test_xdp_adjust_tail_grow2();
if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
test_xdp_adjust_frags_tail_shrink();
- if (test__start_subtest("xdp_adjust_frags_tail_grow"))
- test_xdp_adjust_frags_tail_grow();
+ if (test__start_subtest("xdp_adjust_frags_tail_grow")) {
+ if (page_size == 65536)
+ test_xdp_adjust_frags_tail_grow_64k();
+ else
+ test_xdp_adjust_frags_tail_grow_4k();
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index 937da9b7532a..ee94c281888a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -4,12 +4,21 @@
#include "test_xdp_context_test_run.skel.h"
#include "test_xdp_meta.skel.h"
-#define TX_ADDR "10.0.0.1"
-#define RX_ADDR "10.0.0.2"
#define RX_NAME "veth0"
#define TX_NAME "veth1"
#define TX_NETNS "xdp_context_tx"
#define RX_NETNS "xdp_context_rx"
+#define TAP_NAME "tap0"
+#define DUMMY_NAME "dum0"
+#define TAP_NETNS "xdp_context_tuntap"
+
+#define TEST_PAYLOAD_LEN 32
+static const __u8 test_payload[TEST_PAYLOAD_LEN] = {
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+};
void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
__u32 data_meta, __u32 data, __u32 data_end,
@@ -88,9 +97,7 @@ void test_xdp_context_test_run(void)
/* Meta data must be 255 bytes or smaller */
test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
- /* Total size of data must match data_end - data_meta */
- test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
- sizeof(data) - 1, 0, 0, 0);
+ /* Total size of data must be data_end - data_meta or larger */
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
sizeof(data) + 1, 0, 0, 0);
@@ -112,7 +119,82 @@ void test_xdp_context_test_run(void)
test_xdp_context_test_run__destroy(skel);
}
-void test_xdp_context_functional(void)
+static int send_test_packet(int ifindex)
+{
+ int n, sock = -1;
+ __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+
+ /* We use the Ethernet header only to identify the test packet */
+ struct ethhdr eth = {
+ .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+ };
+
+ memcpy(packet, &eth, sizeof(eth));
+ memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
+
+ sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_GE(sock, 0, "socket"))
+ goto err;
+
+ struct sockaddr_ll saddr = {
+ .sll_family = PF_PACKET,
+ .sll_ifindex = ifindex,
+ .sll_halen = ETH_ALEN
+ };
+ n = sendto(sock, packet, sizeof(packet), 0, (struct sockaddr *)&saddr,
+ sizeof(saddr));
+ if (!ASSERT_EQ(n, sizeof(packet), "sendto"))
+ goto err;
+
+ close(sock);
+ return 0;
+
+err:
+ if (sock >= 0)
+ close(sock);
+ return -1;
+}
+
+static int write_test_packet(int tap_fd)
+{
+ __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+ int n;
+
+ /* The Ethernet header is mostly not relevant. We use it to identify the
+ * test packet and some BPF helpers we exercise expect to operate on
+ * Ethernet frames carrying IP packets. Pretend that's the case.
+ */
+ struct ethhdr eth = {
+ .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+ .h_proto = htons(ETH_P_IP),
+ };
+
+ memcpy(packet, &eth, sizeof(eth));
+ memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);
+
+ n = write(tap_fd, packet, sizeof(packet));
+ if (!ASSERT_EQ(n, sizeof(packet), "write packet"))
+ return -1;
+
+ return 0;
+}
+
+static void dump_err_stream(const struct bpf_program *prog)
+{
+ char buf[512];
+ int ret;
+
+ ret = 0;
+ do {
+ ret = bpf_prog_stream_read(bpf_program__fd(prog),
+ BPF_STREAM_STDERR, buf, sizeof(buf),
+ NULL);
+ if (ret > 0)
+ fwrite(buf, sizeof(buf[0]), ret, stderr);
+ } while (ret > 0);
+}
+
+void test_xdp_context_veth(void)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
@@ -120,7 +202,7 @@ void test_xdp_context_functional(void)
struct bpf_program *tc_prog, *xdp_prog;
struct test_xdp_meta *skel = NULL;
struct nstoken *nstoken = NULL;
- int rx_ifindex;
+ int rx_ifindex, tx_ifindex;
int ret;
tx_ns = netns_new(TX_NETNS, false);
@@ -138,7 +220,6 @@ void test_xdp_context_functional(void)
if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
goto close;
- SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME);
SYS(close, "ip link set dev " RX_NAME " up");
skel = test_xdp_meta__open_and_load();
@@ -179,9 +260,20 @@ void test_xdp_context_functional(void)
if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
goto close;
- SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME);
SYS(close, "ip link set dev " TX_NAME " up");
- ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping");
+
+ tx_ifindex = if_nametoindex(TX_NAME);
+ if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx"))
+ goto close;
+
+ skel->bss->test_pass = false;
+
+ ret = send_test_packet(tx_ifindex);
+ if (!ASSERT_OK(ret, "send_test_packet"))
+ goto close;
+
+ if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass"))
+ dump_err_stream(tc_prog);
close:
close_netns(nstoken);
@@ -190,3 +282,231 @@ close:
netns_free(tx_ns);
}
+static void test_tuntap(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prio_1_prog,
+ struct bpf_program *tc_prio_2_prog,
+ bool *test_pass)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *ns = NULL;
+ int tap_fd = -1;
+ int tap_ifindex;
+ int ret;
+
+ *test_pass = false;
+
+ ns = netns_new(TAP_NETNS, true);
+ if (!ASSERT_OK_PTR(ns, "create and open ns"))
+ return;
+
+ tap_fd = open_tuntap(TAP_NAME, true);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto close;
+
+ SYS(close, "ip link set dev " TAP_NAME " up");
+
+ tap_ifindex = if_nametoindex(TAP_NAME);
+ if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ tc_hook.ifindex = tap_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ if (tc_prio_2_prog) {
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2,
+ .prog_fd = bpf_program__fd(tc_prio_2_prog));
+
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+ }
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog),
+ 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
+
+ if (!ASSERT_TRUE(*test_pass, "test_pass"))
+ dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog);
+
+close:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ netns_free(ns);
+}
+
+/* Write a packet to a tap dev and copy it to ingress of a dummy dev */
+static void test_tuntap_mirred(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prog,
+ bool *test_pass)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *ns = NULL;
+ int dummy_ifindex;
+ int tap_fd = -1;
+ int tap_ifindex;
+ int ret;
+
+ *test_pass = false;
+
+ ns = netns_new(TAP_NETNS, true);
+ if (!ASSERT_OK_PTR(ns, "netns_new"))
+ return;
+
+ /* Setup dummy interface */
+ SYS(close, "ip link add name " DUMMY_NAME " type dummy");
+ SYS(close, "ip link set dev " DUMMY_NAME " up");
+
+ dummy_ifindex = if_nametoindex(DUMMY_NAME);
+ if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ tc_hook.ifindex = dummy_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ /* Setup TAP interface */
+ tap_fd = open_tuntap(TAP_NAME, true);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto close;
+
+ SYS(close, "ip link set dev " TAP_NAME " up");
+
+ tap_ifindex = if_nametoindex(TAP_NAME);
+ if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ /* Copy all packets received from TAP to dummy ingress */
+ SYS(close, "tc qdisc add dev " TAP_NAME " clsact");
+ SYS(close, "tc filter add dev " TAP_NAME " ingress "
+ "protocol all matchall "
+ "action mirred ingress mirror dev " DUMMY_NAME);
+
+ /* Receive a packet on TAP */
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
+
+ if (!ASSERT_TRUE(*test_pass, "test_pass"))
+ dump_err_stream(tc_prog);
+
+close:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ netns_free(ns);
+}
+
+void test_xdp_context_tuntap(void)
+{
+ struct test_xdp_meta *skel = NULL;
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ return;
+
+ if (test__start_subtest("data_meta"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_read"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_read,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_slice"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_slice,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_write"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_write,
+ skel->progs.ing_cls_dynptr_read,
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_slice_rdwr"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_slice_rdwr,
+ skel->progs.ing_cls_dynptr_slice,
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_offset"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_offset_wr,
+ skel->progs.ing_cls_dynptr_offset_rd,
+ &skel->bss->test_pass);
+ if (test__start_subtest("dynptr_offset_oob"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_offset_oob,
+ skel->progs.ing_cls,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_data_meta_survives_data_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_survives_data_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_data_meta_survives_meta_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_survives_meta_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_meta_dynptr_survives_data_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_meta_dynptr_survives_meta_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write,
+ &skel->bss->test_pass);
+ /* Tests for BPF helpers which touch headroom */
+ if (test__start_subtest("helper_skb_vlan_push_pop"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_vlan_push_pop,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("helper_skb_adjust_room"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_adjust_room,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("helper_skb_change_head_tail"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_change_head_tail,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("helper_skb_change_proto"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_change_proto,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+
+ test_xdp_meta__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index c7f74f068e78..df27535995af 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -52,10 +52,10 @@ static void test_xdp_with_cpumap_helpers(void)
ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
/* send a packet to trigger any potential bugs in there */
- char data[10] = {};
+ char data[ETH_HLEN] = {};
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = 10,
+ .data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 27ffed17d4be..a8ab05216c38 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -7,6 +7,7 @@
#include <test_progs.h>
#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_devmap_tailcall.skel.h"
#include "test_xdp_with_devmap_frags_helpers.skel.h"
#include "test_xdp_with_devmap_helpers.skel.h"
@@ -23,7 +24,7 @@ static void test_xdp_with_devmap_helpers(void)
__u32 len = sizeof(info);
int err, dm_fd, dm_fd_redir, map_fd;
struct nstoken *nstoken = NULL;
- char data[10] = {};
+ char data[ETH_HLEN] = {};
__u32 idx = 0;
SYS(out_close, "ip netns add %s", TEST_NS);
@@ -58,7 +59,7 @@ static void test_xdp_with_devmap_helpers(void)
/* send a packet to trigger any potential bugs in there */
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = 10,
+ .data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
@@ -107,6 +108,29 @@ static void test_neg_xdp_devmap_helpers(void)
}
}
+static void test_xdp_devmap_tailcall(enum bpf_attach_type prog_dev,
+ enum bpf_attach_type prog_tail,
+ bool expect_reject)
+{
+ struct test_xdp_devmap_tailcall *skel;
+ int err;
+
+ skel = test_xdp_devmap_tailcall__open();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_devmap_tailcall__open"))
+ return;
+
+ bpf_program__set_expected_attach_type(skel->progs.xdp_devmap, prog_dev);
+ bpf_program__set_expected_attach_type(skel->progs.xdp_entry, prog_tail);
+
+ err = test_xdp_devmap_tailcall__load(skel);
+ if (expect_reject)
+ ASSERT_ERR(err, "test_xdp_devmap_tailcall__load");
+ else
+ ASSERT_OK(err, "test_xdp_devmap_tailcall__load");
+
+ test_xdp_devmap_tailcall__destroy(skel);
+}
+
static void test_xdp_with_devmap_frags_helpers(void)
{
struct test_xdp_with_devmap_frags_helpers *skel;
@@ -158,7 +182,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
struct nstoken *nstoken = NULL;
__u32 len = sizeof(info);
int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst;
- char data[10] = {};
+ char data[ETH_HLEN] = {};
__u32 idx = 0;
SYS(out_close, "ip netns add %s", TEST_NS);
@@ -208,7 +232,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
/* send a packet to trigger any potential bugs in there */
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = 10,
+ .data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
@@ -238,8 +262,13 @@ void serial_test_xdp_devmap_attach(void)
if (test__start_subtest("DEVMAP with frags programs in entries"))
test_xdp_with_devmap_frags_helpers();
- if (test__start_subtest("Verifier check of DEVMAP programs"))
+ if (test__start_subtest("Verifier check of DEVMAP programs")) {
test_neg_xdp_devmap_helpers();
+ test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, BPF_XDP_DEVMAP, false);
+ test_xdp_devmap_tailcall(0, 0, true);
+ test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, 0, true);
+ test_xdp_devmap_tailcall(0, BPF_XDP_DEVMAP, true);
+ }
if (test__start_subtest("DEVMAP with programs in entries on veth"))
test_xdp_with_devmap_helpers_veth();
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 7dac044664ac..dd34b0cc4b4e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -66,16 +66,25 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
#else
#define MAX_PKT_SIZE 3408
#endif
+
+#define PAGE_SIZE_4K 4096
+#define PAGE_SIZE_64K 65536
+
static void test_max_pkt_size(int fd)
{
- char data[MAX_PKT_SIZE + 1] = {};
+ char data[PAGE_SIZE_64K + 1] = {};
int err;
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = MAX_PKT_SIZE,
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
+
+ if (getpagesize() == PAGE_SIZE_64K)
+ opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K;
+ else
+ opts.data_size_in = MAX_PKT_SIZE;
+
err = bpf_prog_test_run_opts(fd, &opts);
ASSERT_OK(err, "prog_run_max_size");
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 3d47878ef6bf..19f92affc2da 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -351,9 +351,10 @@ void test_xdp_metadata(void)
struct xdp_metadata2 *bpf_obj2 = NULL;
struct xdp_metadata *bpf_obj = NULL;
struct bpf_program *new_prog, *prog;
+ struct bpf_devmap_val devmap_e = {};
+ struct bpf_map *prog_arr, *devmap;
struct nstoken *tok = NULL;
__u32 queue_id = QUEUE_ID;
- struct bpf_map *prog_arr;
struct xsk tx_xsk = {};
struct xsk rx_xsk = {};
__u32 val, key = 0;
@@ -409,6 +410,13 @@ void test_xdp_metadata(void)
bpf_program__set_ifindex(prog, rx_ifindex);
bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+ /* Make sure we can load a dev-bound program that performs
+ * XDP_REDIRECT into a devmap.
+ */
+ new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect");
+ bpf_program__set_ifindex(new_prog, rx_ifindex);
+ bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
goto out;
@@ -423,6 +431,18 @@ void test_xdp_metadata(void)
"update prog_arr"))
goto out;
+ /* Make sure we can't add dev-bound programs to devmaps. */
+ devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map");
+ if (!ASSERT_OK_PTR(devmap, "no dev_map found"))
+ goto out;
+
+ devmap_e.bpf_prog.fd = val;
+ if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key),
+ &devmap_e, sizeof(devmap_e),
+ BPF_ANY),
+ "update dev_map"))
+ goto out;
+
/* Attach BPF program to RX interface. */
ret = bpf_xdp_attach(rx_ifindex,
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
new file mode 100644
index 000000000000..efa350d04ec5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_pull_data.skel.h"
+
+#define PULL_MAX (1 << 31)
+#define PULL_PLUS_ONE (1 << 30)
+
+#define XDP_PACKET_HEADROOM 256
+
+/* Find headroom and tailroom occupied by struct xdp_frame and struct
+ * skb_shared_info so that we can calculate the maximum pull lengths for
+ * test cases. They might not be the real size of the structures due to
+ * cache alignment.
+ */
+static int find_xdp_sizes(struct test_xdp_pull_data *skel, int frame_sz)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct xdp_md ctx = {};
+ int prog_fd, err;
+ __u8 *buf;
+
+ buf = calloc(frame_sz, sizeof(__u8));
+ if (!ASSERT_OK_PTR(buf, "calloc buf"))
+ return -ENOMEM;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = frame_sz;
+ topts.data_size_out = frame_sz;
+ /* Pass a data_end larger than the linear space available to make sure
+ * bpf_prog_test_run_xdp() will fill the linear data area so that
+ * xdp_find_sizes can infer the size of struct skb_shared_info
+ */
+ ctx.data_end = frame_sz;
+ topts.ctx_in = &ctx;
+ topts.ctx_out = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ topts.ctx_size_out = sizeof(ctx);
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_find_sizes);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ free(buf);
+
+ return err;
+}
+
+/* xdp_pull_data_prog will directly read a marker 0xbb stored at buf[1024]
+ * so caller expecting XDP_PASS should always pass pull_len no less than 1024
+ */
+static void run_test(struct test_xdp_pull_data *skel, int retval,
+ int frame_sz, int buff_len, int meta_len, int data_len,
+ int pull_len)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct xdp_md ctx = {};
+ int prog_fd, err;
+ __u8 *buf;
+
+ buf = calloc(buff_len, sizeof(__u8));
+ if (!ASSERT_OK_PTR(buf, "calloc buf"))
+ return;
+
+ buf[meta_len + 1023] = 0xaa;
+ buf[meta_len + 1024] = 0xbb;
+ buf[meta_len + 1025] = 0xcc;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = buff_len;
+ topts.data_size_out = buff_len;
+ ctx.data = meta_len;
+ ctx.data_end = meta_len + data_len;
+ topts.ctx_in = &ctx;
+ topts.ctx_out = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ topts.ctx_size_out = sizeof(ctx);
+
+ skel->bss->data_len = data_len;
+ if (pull_len & PULL_MAX) {
+ int headroom = XDP_PACKET_HEADROOM - meta_len - skel->bss->xdpf_sz;
+ int tailroom = frame_sz - XDP_PACKET_HEADROOM -
+ data_len - skel->bss->sinfo_sz;
+
+ pull_len = pull_len & PULL_PLUS_ONE ? 1 : 0;
+ pull_len += headroom + tailroom + data_len;
+ }
+ skel->bss->pull_len = pull_len;
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_pull_data_prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(topts.retval, retval, "xdp_pull_data_prog retval");
+
+ if (retval == XDP_DROP)
+ goto out;
+
+ ASSERT_EQ(ctx.data_end, meta_len + pull_len, "linear data size");
+ ASSERT_EQ(topts.data_size_out, buff_len, "linear + non-linear data size");
+ /* Make sure data around xdp->data_end was not messed up by
+ * bpf_xdp_pull_data()
+ */
+ ASSERT_EQ(buf[meta_len + 1023], 0xaa, "data[1023]");
+ ASSERT_EQ(buf[meta_len + 1024], 0xbb, "data[1024]");
+ ASSERT_EQ(buf[meta_len + 1025], 0xcc, "data[1025]");
+out:
+ free(buf);
+}
+
+static void test_xdp_pull_data_basic(void)
+{
+ u32 pg_sz, max_meta_len, max_data_len;
+ struct test_xdp_pull_data *skel;
+
+ skel = test_xdp_pull_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load"))
+ return;
+
+ pg_sz = sysconf(_SC_PAGE_SIZE);
+
+ if (find_xdp_sizes(skel, pg_sz))
+ goto out;
+
+ max_meta_len = XDP_PACKET_HEADROOM - skel->bss->xdpf_sz;
+ max_data_len = pg_sz - XDP_PACKET_HEADROOM - skel->bss->sinfo_sz;
+
+ /* linear xdp pkt, pull 0 byte */
+ run_test(skel, XDP_PASS, pg_sz, 2048, 0, 2048, 2048);
+
+ /* multi-buf pkt, pull results in linear xdp pkt */
+ run_test(skel, XDP_PASS, pg_sz, 2048, 0, 1024, 2048);
+
+ /* multi-buf pkt, pull 1 byte to linear data area */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1024, 1025);
+
+ /* multi-buf pkt, pull 0 byte to linear data area */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025);
+
+ /* multi-buf pkt, empty linear data area, pull requires memmove */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX);
+
+ /* multi-buf pkt, no headroom */
+ run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX);
+
+ /* multi-buf pkt, no tailroom, pull requires memmove */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX);
+
+ /* Test cases with invalid pull length */
+
+ /* linear xdp pkt, pull more than total data len */
+ run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049);
+
+ /* multi-buf pkt with no space left in linear data area */
+ run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len,
+ PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, empty linear data area */
+ run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, no headroom */
+ run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024,
+ PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, no tailroom */
+ run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len,
+ PULL_MAX | PULL_PLUS_ONE);
+
+out:
+ test_xdp_pull_data__destroy(skel);
+}
+
+void test_xdp_pull_data(void)
+{
+ if (test__start_subtest("xdp_pull_data"))
+ test_xdp_pull_data_basic();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
new file mode 100644
index 000000000000..18dd25344de7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Network topology:
+ * ----------- -----------
+ * | NS1 | | NS2 |
+ * | veth0 -|--------|- veth0 |
+ * ----------- -----------
+ *
+ */
+
+#define _GNU_SOURCE
+#include <net/if.h>
+#include <uapi/linux/if_link.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_xdp_vlan.skel.h"
+
+
+#define VETH_NAME "veth0"
+#define NS_MAX_SIZE 32
+#define NS1_NAME "ns-xdp-vlan-1-"
+#define NS2_NAME "ns-xdp-vlan-2-"
+#define NS1_IP_ADDR "100.64.10.1"
+#define NS2_IP_ADDR "100.64.10.2"
+#define VLAN_ID 4011
+
+static int setup_network(char *ns1, char *ns2)
+{
+ if (!ASSERT_OK(append_tid(ns1, NS_MAX_SIZE), "create ns1 name"))
+ goto fail;
+ if (!ASSERT_OK(append_tid(ns2, NS_MAX_SIZE), "create ns2 name"))
+ goto fail;
+
+ SYS(fail, "ip netns add %s", ns1);
+ SYS(fail, "ip netns add %s", ns2);
+ SYS(fail, "ip -n %s link add %s type veth peer name %s netns %s",
+ ns1, VETH_NAME, VETH_NAME, ns2);
+
+ /* NOTICE: XDP require VLAN header inside packet payload
+ * - Thus, disable VLAN offloading driver features
+ */
+ SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns1, VETH_NAME);
+ SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns2, VETH_NAME);
+
+ /* NS1 configuration */
+ SYS(fail, "ip -n %s addr add %s/24 dev %s", ns1, NS1_IP_ADDR, VETH_NAME);
+ SYS(fail, "ip -n %s link set %s up", ns1, VETH_NAME);
+
+ /* NS2 configuration */
+ SYS(fail, "ip -n %s link add link %s name %s.%d type vlan id %d",
+ ns2, VETH_NAME, VETH_NAME, VLAN_ID, VLAN_ID);
+ SYS(fail, "ip -n %s addr add %s/24 dev %s.%d", ns2, NS2_IP_ADDR, VETH_NAME, VLAN_ID);
+ SYS(fail, "ip -n %s link set %s up", ns2, VETH_NAME);
+ SYS(fail, "ip -n %s link set %s.%d up", ns2, VETH_NAME, VLAN_ID);
+
+ /* At this point ping should fail because VLAN tags are only used by NS2 */
+ return !SYS_NOFAIL("ip netns exec %s ping -W 1 -c1 %s", ns2, NS1_IP_ADDR);
+
+fail:
+ return -1;
+}
+
+static void cleanup_network(const char *ns1, const char *ns2)
+{
+ SYS_NOFAIL("ip netns del %s", ns1);
+ SYS_NOFAIL("ip netns del %s", ns2);
+}
+
+static void xdp_vlan(struct bpf_program *xdp, struct bpf_program *tc, u32 flags)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ char ns1[NS_MAX_SIZE] = NS1_NAME;
+ char ns2[NS_MAX_SIZE] = NS2_NAME;
+ struct nstoken *nstoken = NULL;
+ int interface;
+ int ret;
+
+ if (!ASSERT_OK(setup_network(ns1, ns2), "setup network"))
+ goto cleanup;
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open NS1"))
+ goto cleanup;
+
+ interface = if_nametoindex(VETH_NAME);
+ if (!ASSERT_NEQ(interface, 0, "get interface index"))
+ goto cleanup;
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(xdp), flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp_vlan_change"))
+ goto cleanup;
+
+ tc_hook.ifindex = interface;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto detach_xdp;
+
+ /* Now we'll use BPF programs to pop/push the VLAN tags */
+ tc_opts.prog_fd = bpf_program__fd(tc);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto detach_xdp;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ /* Now the namespaces can reach each-other, test with pings */
+ SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns1, NS2_IP_ADDR);
+ SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns2, NS1_IP_ADDR);
+
+
+detach_tc:
+ bpf_tc_detach(&tc_hook, &tc_opts);
+detach_xdp:
+ bpf_xdp_detach(interface, flags, NULL);
+cleanup:
+ close_netns(nstoken);
+ cleanup_network(ns1, ns2);
+}
+
+/* First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
+ * egress use TC to add back VLAN tag 4011
+ */
+void test_xdp_vlan_change(void)
+{
+ struct test_xdp_vlan *skel;
+
+ skel = test_xdp_vlan__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+ return;
+
+ if (test__start_subtest("0"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+ XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+ XDP_FLAGS_SKB_MODE);
+
+ test_xdp_vlan__destroy(skel);
+}
+
+/* Second test: XDP prog fully remove vlan header
+ *
+ * Catch kernel bug for generic-XDP, that doesn't allow us to
+ * remove a VLAN header, because skb->protocol still contain VLAN
+ * ETH_P_8021Q indication, and this cause overwriting of our changes.
+ */
+void test_xdp_vlan_remove(void)
+{
+ struct test_xdp_vlan *skel;
+
+ skel = test_xdp_vlan__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+ return;
+
+ if (test__start_subtest("0"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+ XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+ XDP_FLAGS_SKB_MODE);
+
+ test_xdp_vlan__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xsk.c b/tools/testing/selftests/bpf/prog_tests/xsk.c
new file mode 100644
index 000000000000..dd4c35c0e428
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xsk.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/if.h>
+#include <stdarg.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_xsk.h"
+#include "xsk_xdp_progs.skel.h"
+
+#define VETH_RX "veth0"
+#define VETH_TX "veth1"
+#define MTU 1500
+
+int setup_veth(bool busy_poll)
+{
+ SYS(fail,
+ "ip link add %s numtxqueues 4 numrxqueues 4 type veth peer name %s numtxqueues 4 numrxqueues 4",
+ VETH_RX, VETH_TX);
+ SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_RX);
+ SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_TX);
+
+ if (busy_poll) {
+ SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_RX);
+ SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_RX);
+ SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_TX);
+ SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_TX);
+ }
+
+ SYS(fail, "ip link set %s mtu %d", VETH_RX, MTU);
+ SYS(fail, "ip link set %s mtu %d", VETH_TX, MTU);
+ SYS(fail, "ip link set %s up", VETH_RX);
+ SYS(fail, "ip link set %s up", VETH_TX);
+
+ return 0;
+
+fail:
+ return -1;
+}
+
+void delete_veth(void)
+{
+ SYS_NOFAIL("ip link del %s", VETH_RX);
+ SYS_NOFAIL("ip link del %s", VETH_TX);
+}
+
+int configure_ifobj(struct ifobject *tx, struct ifobject *rx)
+{
+ rx->ifindex = if_nametoindex(VETH_RX);
+ if (!ASSERT_OK_FD(rx->ifindex, "get RX ifindex"))
+ return -1;
+
+ tx->ifindex = if_nametoindex(VETH_TX);
+ if (!ASSERT_OK_FD(tx->ifindex, "get TX ifindex"))
+ return -1;
+
+ tx->shared_umem = false;
+ rx->shared_umem = false;
+
+
+ return 0;
+}
+
+static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode)
+{
+ struct ifobject *ifobj_tx, *ifobj_rx;
+ struct test_spec test;
+ int ret;
+
+ ifobj_tx = ifobject_create();
+ if (!ASSERT_OK_PTR(ifobj_tx, "create ifobj_tx"))
+ return;
+
+ ifobj_rx = ifobject_create();
+ if (!ASSERT_OK_PTR(ifobj_rx, "create ifobj_rx"))
+ goto delete_tx;
+
+ if (!ASSERT_OK(configure_ifobj(ifobj_tx, ifobj_rx), "conigure ifobj"))
+ goto delete_rx;
+
+ ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring);
+ if (!ret) {
+ ifobj_tx->hw_ring_size_supp = true;
+ ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending;
+ ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
+ }
+
+ if (!ASSERT_OK(init_iface(ifobj_rx, worker_testapp_validate_rx), "init RX"))
+ goto delete_rx;
+ if (!ASSERT_OK(init_iface(ifobj_tx, worker_testapp_validate_tx), "init TX"))
+ goto delete_rx;
+
+ test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+
+ test.tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ if (!ASSERT_OK_PTR(test.tx_pkt_stream_default, "TX pkt generation"))
+ goto delete_rx;
+ test.rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ if (!ASSERT_OK_PTR(test.rx_pkt_stream_default, "RX pkt generation"))
+ goto delete_rx;
+
+
+ test_init(&test, ifobj_tx, ifobj_rx, mode, test_to_run);
+ ret = test.test_func(&test);
+ if (ret != TEST_SKIP)
+ ASSERT_OK(ret, "Run test");
+ pkt_stream_restore_default(&test);
+
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
+ pkt_stream_delete(test.tx_pkt_stream_default);
+ pkt_stream_delete(test.rx_pkt_stream_default);
+ xsk_xdp_progs__destroy(ifobj_tx->xdp_progs);
+ xsk_xdp_progs__destroy(ifobj_rx->xdp_progs);
+
+delete_rx:
+ ifobject_delete(ifobj_rx);
+delete_tx:
+ ifobject_delete(ifobj_tx);
+}
+
+void test_ns_xsk_skb(void)
+{
+ int i;
+
+ if (!ASSERT_OK(setup_veth(false), "setup veth"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (test__start_subtest(tests[i].name))
+ test_xsk(&tests[i], TEST_MODE_SKB);
+ }
+
+ delete_veth();
+}
+
+void test_ns_xsk_drv(void)
+{
+ int i;
+
+ if (!ASSERT_OK(setup_veth(false), "setup veth"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (test__start_subtest(tests[i].name))
+ test_xsk(&tests[i], TEST_MODE_DRV);
+ }
+
+ delete_veth();
+}
+
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
index 40dd57fca5cc..d1841aac94a2 100644
--- a/tools/testing/selftests/bpf/progs/arena_atomics.c
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -6,6 +6,8 @@
#include <stdbool.h>
#include <stdatomic.h>
#include "bpf_arena_common.h"
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_ARENA);
@@ -19,9 +21,18 @@ struct {
} arena SEC(".maps");
#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
-bool skip_tests __attribute((__section__(".data"))) = false;
+bool skip_all_tests __attribute((__section__(".data"))) = false;
#else
-bool skip_tests = true;
+bool skip_all_tests = true;
+#endif
+
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
+bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_lacq_srel_tests = true;
#endif
__u32 pid = 0;
@@ -274,4 +285,113 @@ int uaf(const void *ctx)
return 0;
}
+#if __clang_major__ >= 18
+__u8 __arena_global load_acquire8_value = 0x12;
+__u16 __arena_global load_acquire16_value = 0x1234;
+__u32 __arena_global load_acquire32_value = 0x12345678;
+__u64 __arena_global load_acquire64_value = 0x1234567890abcdef;
+
+__u8 __arena_global load_acquire8_result = 0;
+__u16 __arena_global load_acquire16_result = 0;
+__u32 __arena_global load_acquire32_result = 0;
+__u64 __arena_global load_acquire64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global load_acquire8_value;
+__u64 __arena_global load_acquire16_value;
+__u64 __arena_global load_acquire32_value;
+__u64 __arena_global load_acquire64_value;
+
+__u64 __arena_global load_acquire8_result;
+__u64 __arena_global load_acquire16_result;
+__u64 __arena_global load_acquire32_result;
+__u64 __arena_global load_acquire64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int load_acquire(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
+
+#define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \
+ { asm volatile ( \
+ "r1 = %[" #SRC "] ll;" \
+ "r1 = addr_space_cast(r1, 0x0, 0x1);" \
+ ".8byte %[load_acquire_insn];" \
+ "r3 = %[" #DST "] ll;" \
+ "r3 = addr_space_cast(r3, 0x0, 0x1);" \
+ "*(" #SIZE " *)(r3 + 0) = r2;" \
+ : \
+ : __imm_addr(SRC), \
+ __imm_insn(load_acquire_insn, \
+ BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_LOAD_ACQ, \
+ BPF_REG_2, BPF_REG_1, 0)), \
+ __imm_addr(DST) \
+ : __clobber_all); } \
+
+ LOAD_ACQUIRE_ARENA(B, u8, load_acquire8_value, load_acquire8_result)
+ LOAD_ACQUIRE_ARENA(H, u16, load_acquire16_value,
+ load_acquire16_result)
+ LOAD_ACQUIRE_ARENA(W, u32, load_acquire32_value,
+ load_acquire32_result)
+ LOAD_ACQUIRE_ARENA(DW, u64, load_acquire64_value,
+ load_acquire64_result)
+#undef LOAD_ACQUIRE_ARENA
+
+#endif
+ return 0;
+}
+
+#if __clang_major__ >= 18
+__u8 __arena_global store_release8_result = 0;
+__u16 __arena_global store_release16_result = 0;
+__u32 __arena_global store_release32_result = 0;
+__u64 __arena_global store_release64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global store_release8_result;
+__u64 __arena_global store_release16_result;
+__u64 __arena_global store_release32_result;
+__u64 __arena_global store_release64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int store_release(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
+
+#define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \
+ { asm volatile ( \
+ "r1 = " VAL ";" \
+ "r2 = %[" #DST "] ll;" \
+ "r2 = addr_space_cast(r2, 0x0, 0x1);" \
+ ".8byte %[store_release_insn];" \
+ : \
+ : __imm_addr(DST), \
+ __imm_insn(store_release_insn, \
+ BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_STORE_REL, \
+ BPF_REG_2, BPF_REG_1, 0)) \
+ : __clobber_all); } \
+
+ STORE_RELEASE_ARENA(B, store_release8_result, "0x12")
+ STORE_RELEASE_ARENA(H, store_release16_result, "0x1234")
+ STORE_RELEASE_ARENA(W, store_release32_result, "0x12345678")
+ STORE_RELEASE_ARENA(DW, store_release64_result,
+ "0x1234567890abcdef ll")
+#undef STORE_RELEASE_ARENA
+
+#endif
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
new file mode 100644
index 000000000000..086b57a426cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_arena_spin_lock.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+int cs_count;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+arena_spinlock_t __arena lock;
+int test_skip = 1;
+#else
+int test_skip = 2;
+#endif
+
+int counter;
+int limit;
+
+SEC("tc")
+int prog(void *ctx)
+{
+ int ret = -2;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ unsigned long flags;
+
+ if ((ret = arena_spin_lock_irqsave(&lock, flags))) {
+ if (ret == -EOPNOTSUPP)
+ test_skip = 3;
+ return ret;
+ }
+ if (counter != limit)
+ counter++;
+ bpf_repeat(cs_count);
+ ret = 0;
+ arena_spin_unlock_irqrestore(&lock, flags);
+#endif
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_strsearch.c b/tools/testing/selftests/bpf/progs/arena_strsearch.c
new file mode 100644
index 000000000000..ef6b76658f7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_strsearch.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+} arena SEC(".maps");
+
+#include "bpf_arena_strsearch.h"
+
+struct glob_test {
+ char const __arena *pat, *str;
+ bool expected;
+};
+
+static bool test(char const __arena *pat, char const __arena *str, bool expected)
+{
+ bool match = glob_match(pat, str);
+ bool success = match == expected;
+
+ /* bpf_printk("glob_match %s %s res %d ok %d", pat, str, match, success); */
+ return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section. The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static const char __arena glob_tests[] =
+ /* Some basic tests */
+ "1" "a\0" "a\0"
+ "0" "a\0" "b\0"
+ "0" "a\0" "aa\0"
+ "0" "a\0" "\0"
+ "1" "\0" "\0"
+ "0" "\0" "a\0"
+ /* Simple character class tests */
+ "1" "[a]\0" "a\0"
+ "0" "[a]\0" "b\0"
+ "0" "[!a]\0" "a\0"
+ "1" "[!a]\0" "b\0"
+ "1" "[ab]\0" "a\0"
+ "1" "[ab]\0" "b\0"
+ "0" "[ab]\0" "c\0"
+ "1" "[!ab]\0" "c\0"
+ "1" "[a-c]\0" "b\0"
+ "0" "[a-c]\0" "d\0"
+ /* Corner cases in character class parsing */
+ "1" "[a-c-e-g]\0" "-\0"
+ "0" "[a-c-e-g]\0" "d\0"
+ "1" "[a-c-e-g]\0" "f\0"
+ "1" "[]a-ceg-ik[]\0" "a\0"
+ "1" "[]a-ceg-ik[]\0" "]\0"
+ "1" "[]a-ceg-ik[]\0" "[\0"
+ "1" "[]a-ceg-ik[]\0" "h\0"
+ "0" "[]a-ceg-ik[]\0" "f\0"
+ "0" "[!]a-ceg-ik[]\0" "h\0"
+ "0" "[!]a-ceg-ik[]\0" "]\0"
+ "1" "[!]a-ceg-ik[]\0" "f\0"
+ /* Simple wild cards */
+ "1" "?\0" "a\0"
+ "0" "?\0" "aa\0"
+ "0" "??\0" "a\0"
+ "1" "?x?\0" "axb\0"
+ "0" "?x?\0" "abx\0"
+ "0" "?x?\0" "xab\0"
+ /* Asterisk wild cards (backtracking) */
+ "0" "*??\0" "a\0"
+ "1" "*??\0" "ab\0"
+ "1" "*??\0" "abc\0"
+ "1" "*??\0" "abcd\0"
+ "0" "??*\0" "a\0"
+ "1" "??*\0" "ab\0"
+ "1" "??*\0" "abc\0"
+ "1" "??*\0" "abcd\0"
+ "0" "?*?\0" "a\0"
+ "1" "?*?\0" "ab\0"
+ "1" "?*?\0" "abc\0"
+ "1" "?*?\0" "abcd\0"
+ "1" "*b\0" "b\0"
+ "1" "*b\0" "ab\0"
+ "0" "*b\0" "ba\0"
+ "1" "*b\0" "bb\0"
+ "1" "*b\0" "abb\0"
+ "1" "*b\0" "bab\0"
+ "1" "*bc\0" "abbc\0"
+ "1" "*bc\0" "bc\0"
+ "1" "*bc\0" "bbc\0"
+ "1" "*bc\0" "bcbc\0"
+ /* Multiple asterisks (complex backtracking) */
+ "1" "*ac*\0" "abacadaeafag\0"
+ "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+ "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+ "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+ "1" "*abcd*\0" "abcabcabcabcdefg\0"
+ "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+ "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+ "0" "*abcd*\0" "abcabcabcabcefg\0"
+ "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+bool skip = false;
+
+SEC("syscall")
+int arena_strsearch(void *ctx)
+{
+ unsigned successes = 0;
+ unsigned n = 0;
+ char const __arena *p = glob_tests;
+
+ /*
+ * Tests are jammed together in a string. The first byte is '1'
+ * or '0' to indicate the expected outcome, or '\0' to indicate the
+ * end of the tests. Then come two null-terminated strings: the
+ * pattern and the string to match it against.
+ */
+ while (*p) {
+ bool expected = *p++ & 1;
+ char const __arena *pat = p;
+
+ cond_break;
+ p += bpf_arena_strlen(p) + 1;
+ successes += test(pat, p, expected);
+ p += bpf_arena_strlen(p) + 1;
+ n++;
+ }
+
+ n -= successes;
+ /* bpf_printk("glob: %u self-tests passed, %u failed\n", successes, n); */
+
+ return n ? -1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
new file mode 100644
index 000000000000..079bf3794b3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+long process_byte = 0;
+int verdict_dir = 0;
+int dropped = 0;
+int pkt_size = 0;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return pkt_size;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ int one = 1;
+ int ret = bpf_sk_redirect_map(skb, &sock_map_rx, one, verdict_dir);
+
+ if (ret == SK_DROP)
+ dropped++;
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return ret;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_pass(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ int one = 1;
+
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return bpf_msg_redirect_map(msg, &sock_map_tx, one, verdict_dir);
+}
+
+SEC("sk_msg")
+int prog_skmsg_pass(struct sk_msg_md *msg)
+{
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
new file mode 100644
index 000000000000..f90531cf3ee5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
@@ -0,0 +1,542 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ARENA_SPIN_LOCK_H
+#define BPF_ARENA_SPIN_LOCK_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_atomic.h"
+
+#define arch_mcs_spin_lock_contended_label(l, label) smp_cond_load_acquire_label(l, VAL, label)
+#define arch_mcs_spin_unlock_contended(l) smp_store_release((l), 1)
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+
+#define EBUSY 16
+#define EOPNOTSUPP 95
+#define ETIMEDOUT 110
+
+#ifndef __arena
+#define __arena __attribute__((address_space(1)))
+#endif
+
+extern unsigned long CONFIG_NR_CPUS __kconfig;
+
+/*
+ * Typically, we'd just rely on the definition in vmlinux.h for qspinlock, but
+ * PowerPC overrides the definition to define lock->val as u32 instead of
+ * atomic_t, leading to compilation errors. Import a local definition below so
+ * that we don't depend on the vmlinux.h version.
+ */
+
+struct __qspinlock {
+ union {
+ atomic_t val;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ struct {
+ u8 locked;
+ u8 pending;
+ };
+ struct {
+ u16 locked_pending;
+ u16 tail;
+ };
+#else
+ struct {
+ u16 tail;
+ u16 locked_pending;
+ };
+ struct {
+ u8 reserved[2];
+ u8 pending;
+ u8 locked;
+ };
+#endif
+ };
+};
+
+#define arena_spinlock_t struct __qspinlock
+/* FIXME: Using typedef causes CO-RE relocation error */
+/* typedef struct qspinlock arena_spinlock_t; */
+
+struct arena_mcs_spinlock {
+ struct arena_mcs_spinlock __arena *next;
+ int locked;
+ int count;
+};
+
+struct arena_qnode {
+ struct arena_mcs_spinlock mcs;
+};
+
+#define _Q_MAX_NODES 4
+#define _Q_PENDING_LOOPS 1
+
+/*
+ * Bitfields in the atomic value:
+ *
+ * 0- 7: locked byte
+ * 8: pending
+ * 9-15: not used
+ * 16-17: tail index
+ * 18-31: tail cpu (+1)
+ */
+#define _Q_MAX_CPUS 1024
+
+#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
+ << _Q_ ## type ## _OFFSET)
+#define _Q_LOCKED_OFFSET 0
+#define _Q_LOCKED_BITS 8
+#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED)
+
+#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_BITS 8
+#define _Q_PENDING_MASK _Q_SET_MASK(PENDING)
+
+#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
+#define _Q_TAIL_IDX_BITS 2
+#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX)
+
+#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
+#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET)
+#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
+
+#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET
+#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
+
+struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES];
+
+static inline u32 encode_tail(int cpu, int idx)
+{
+ u32 tail;
+
+ tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+ tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
+
+ return tail;
+}
+
+static inline struct arena_mcs_spinlock __arena *decode_tail(u32 tail)
+{
+ u32 cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
+ u32 idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+
+ return &qnodes[cpu][idx].mcs;
+}
+
+static inline
+struct arena_mcs_spinlock __arena *grab_mcs_node(struct arena_mcs_spinlock __arena *base, int idx)
+{
+ return &((struct arena_qnode __arena *)base + idx)->mcs;
+}
+
+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
+/**
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
+ * @lock : Pointer to queued spinlock structure
+ * @tail : The new queue tail code word
+ * Return: The previous queue tail code word
+ *
+ * xchg(lock, tail)
+ *
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
+ */
+static __always_inline u32 xchg_tail(arena_spinlock_t __arena *lock, u32 tail)
+{
+ u32 old, new;
+
+ old = atomic_read(&lock->val);
+ do {
+ new = (old & _Q_LOCKED_PENDING_MASK) | tail;
+ /*
+ * We can use relaxed semantics since the caller ensures that
+ * the MCS node is properly initialized before updating the
+ * tail.
+ */
+ /* These loops are not expected to stall, but we still need to
+ * prove to the verifier they will terminate eventually.
+ */
+ cond_break_label(out);
+ } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
+
+ return old;
+out:
+ bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+ return old;
+}
+
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->pending, 0);
+}
+
+/**
+ * clear_pending_set_locked - take ownership and clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,0 -> *,0,1
+ *
+ * Lock stealing is not allowed if this function is used.
+ */
+static __always_inline void clear_pending_set_locked(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
+}
+
+/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+}
+
+static __always_inline
+u32 arena_fetch_set_pending_acquire(arena_spinlock_t __arena *lock)
+{
+ u32 old, new;
+
+ old = atomic_read(&lock->val);
+ do {
+ new = old | _Q_PENDING_VAL;
+ /*
+ * These loops are not expected to stall, but we still need to
+ * prove to the verifier they will terminate eventually.
+ */
+ cond_break_label(out);
+ } while (!atomic_try_cmpxchg_acquire(&lock->val, &old, new));
+
+ return old;
+out:
+ bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+ return old;
+}
+
+/**
+ * arena_spin_trylock - try to acquire the queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock)
+{
+ int val = atomic_read(&lock->val);
+
+ if (unlikely(val))
+ return 0;
+
+ return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
+}
+
+__noinline
+int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val)
+{
+ struct arena_mcs_spinlock __arena *prev, *next, *node0, *node;
+ int ret = -ETIMEDOUT;
+ u32 old, tail;
+ int idx;
+
+ /*
+ * Wait for in-progress pending->locked hand-overs with a bounded
+ * number of spins so that we guarantee forward progress.
+ *
+ * 0,1,0 -> 0,0,1
+ */
+ if (val == _Q_PENDING_VAL) {
+ int cnt = _Q_PENDING_LOOPS;
+ val = atomic_cond_read_relaxed_label(&lock->val,
+ (VAL != _Q_PENDING_VAL) || !cnt--,
+ release_err);
+ }
+
+ /*
+ * If we observe any contention; queue.
+ */
+ if (val & ~_Q_LOCKED_MASK)
+ goto queue;
+
+ /*
+ * trylock || pending
+ *
+ * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
+ */
+ val = arena_fetch_set_pending_acquire(lock);
+
+ /*
+ * If we observe contention, there is a concurrent locker.
+ *
+ * Undo and queue; our setting of PENDING might have made the
+ * n,0,0 -> 0,0,0 transition fail and it will now be waiting
+ * on @next to become !NULL.
+ */
+ if (unlikely(val & ~_Q_LOCKED_MASK)) {
+
+ /* Undo PENDING if we set it. */
+ if (!(val & _Q_PENDING_MASK))
+ clear_pending(lock);
+
+ goto queue;
+ }
+
+ /*
+ * We're pending, wait for the owner to go away.
+ *
+ * 0,1,1 -> *,1,0
+ *
+ * this wait loop must be a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because not all
+ * clear_pending_set_locked() implementations imply full
+ * barriers.
+ */
+ if (val & _Q_LOCKED_MASK)
+ (void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err);
+
+ /*
+ * take ownership and clear the pending bit.
+ *
+ * 0,1,0 -> 0,0,1
+ */
+ clear_pending_set_locked(lock);
+ return 0;
+
+ /*
+ * End of pending bit optimistic spinning and beginning of MCS
+ * queuing.
+ */
+queue:
+ node0 = &(qnodes[bpf_get_smp_processor_id()])[0].mcs;
+ idx = node0->count++;
+ tail = encode_tail(bpf_get_smp_processor_id(), idx);
+
+ /*
+ * 4 nodes are allocated based on the assumption that there will not be
+ * nested NMIs taking spinlocks. That may not be true in some
+ * architectures even though the chance of needing more than 4 nodes
+ * will still be extremely unlikely. When that happens, we simply return
+ * an error. Original qspinlock has a trylock fallback in this case.
+ */
+ if (unlikely(idx >= _Q_MAX_NODES)) {
+ ret = -EBUSY;
+ goto release_node_err;
+ }
+
+ node = grab_mcs_node(node0, idx);
+
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+
+ node->locked = 0;
+ node->next = NULL;
+
+ /*
+ * We touched a (possibly) cold cacheline in the per-cpu queue node;
+ * attempt the trylock once more in the hope someone let go while we
+ * weren't watching.
+ */
+ if (arena_spin_trylock(lock))
+ goto release;
+
+ /*
+ * Ensure that the initialisation of @node is complete before we
+ * publish the updated tail via xchg_tail() and potentially link
+ * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
+ */
+ smp_wmb();
+
+ /*
+ * Publish the updated tail.
+ * We have already touched the queueing cacheline; don't bother with
+ * pending stuff.
+ *
+ * p,*,* -> n,*,*
+ */
+ old = xchg_tail(lock, tail);
+ next = NULL;
+
+ /*
+ * if there was a previous node; link it and wait until reaching the
+ * head of the waitqueue.
+ */
+ if (old & _Q_TAIL_MASK) {
+ prev = decode_tail(old);
+
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
+
+ (void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err);
+
+ /*
+ * While waiting for the MCS lock, the next pointer may have
+ * been set by another lock waiter. We cannot prefetch here
+ * due to lack of equivalent instruction in BPF ISA.
+ */
+ next = READ_ONCE(node->next);
+ }
+
+ /*
+ * we're at the head of the waitqueue, wait for the owner & pending to
+ * go away.
+ *
+ * *,x,y -> *,0,0
+ *
+ * this wait loop must use a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because the set_locked() function below
+ * does not imply a full barrier.
+ */
+ val = atomic_cond_read_acquire_label(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK),
+ release_node_err);
+
+ /*
+ * claim the lock:
+ *
+ * n,0,0 -> 0,0,1 : lock, uncontended
+ * *,*,0 -> *,*,1 : lock, contended
+ *
+ * If the queue head is the only one in the queue (lock value == tail)
+ * and nobody is pending, clear the tail code and grab the lock.
+ * Otherwise, we only need to grab the lock.
+ */
+
+ /*
+ * In the PV case we might already have _Q_LOCKED_VAL set, because
+ * of lock stealing; therefore we must also allow:
+ *
+ * n,0,1 -> 0,0,1
+ *
+ * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
+ * above wait condition, therefore any concurrent setting of
+ * PENDING will make the uncontended transition fail.
+ */
+ if ((val & _Q_TAIL_MASK) == tail) {
+ if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+ goto release; /* No contention */
+ }
+
+ /*
+ * Either somebody is queued behind us or _Q_PENDING_VAL got set
+ * which will then detect the remaining tail and queue behind us
+ * ensuring we'll see a @next.
+ */
+ set_locked(lock);
+
+ /*
+ * contended path; wait for next if not observed yet, release.
+ */
+ if (!next)
+ next = smp_cond_load_relaxed_label(&node->next, (VAL), release_node_err);
+
+ arch_mcs_spin_unlock_contended(&next->locked);
+
+release:;
+ /*
+ * release the node
+ *
+ * Doing a normal dec vs this_cpu_dec is fine. An upper context always
+ * decrements count it incremented before returning, thus we're fine.
+ * For contexts interrupting us, they either observe our dec or not.
+ * Just ensure the compiler doesn't reorder this statement, as a
+ * this_cpu_dec implicitly implied that.
+ */
+ barrier();
+ node0->count--;
+ return 0;
+release_node_err:
+ barrier();
+ node0->count--;
+ goto release_err;
+release_err:
+ return ret;
+}
+
+/**
+ * arena_spin_lock - acquire a queued spinlock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * On error, returned value will be negative.
+ * On success, zero is returned.
+ *
+ * The return value _must_ be tested against zero for success,
+ * instead of checking it against negative, for passing the
+ * BPF verifier.
+ *
+ * The user should do:
+ * if (arena_spin_lock(...) != 0) // failure
+ * or
+ * if (arena_spin_lock(...) == 0) // success
+ * or
+ * if (arena_spin_lock(...)) // failure
+ * or
+ * if (!arena_spin_lock(...)) // success
+ * instead of:
+ * if (arena_spin_lock(...) < 0) // failure
+ *
+ * The return value can still be inspected later.
+ */
+static __always_inline int arena_spin_lock(arena_spinlock_t __arena *lock)
+{
+ int val = 0;
+
+ if (CONFIG_NR_CPUS > 1024)
+ return -EOPNOTSUPP;
+
+ bpf_preempt_disable();
+ if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
+ return 0;
+
+ val = arena_spin_lock_slowpath(lock, val);
+ /* FIXME: bpf_assert_range(-MAX_ERRNO, 0) once we have it working for all cases. */
+ if (val)
+ bpf_preempt_enable();
+ return val;
+}
+
+/**
+ * arena_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ */
+static __always_inline void arena_spin_unlock(arena_spinlock_t __arena *lock)
+{
+ /*
+ * unlock() needs release semantics:
+ */
+ smp_store_release(&lock->locked, 0);
+ bpf_preempt_enable();
+}
+
+#define arena_spin_lock_irqsave(lock, flags) \
+ ({ \
+ int __ret; \
+ bpf_local_irq_save(&(flags)); \
+ __ret = arena_spin_lock((lock)); \
+ if (__ret) \
+ bpf_local_irq_restore(&(flags)); \
+ (__ret); \
+ })
+
+#define arena_spin_unlock_irqrestore(lock, flags) \
+ ({ \
+ arena_spin_unlock((lock)); \
+ bpf_local_irq_restore(&(flags)); \
+ })
+
+#endif
+
+#endif /* BPF_ARENA_SPIN_LOCK_H */
diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
index 1654a530aa3d..9af19dfe4e80 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
@@ -22,10 +22,6 @@
#define TCP_PACING_CA_RATIO (120)
#define TCP_REORDERING (12)
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#define after(seq2, seq1) before(seq1, seq2)
-
extern void cubictcp_init(struct sock *sk) __ksym;
extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
@@ -34,11 +30,6 @@ extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
-static bool before(__u32 seq1, __u32 seq2)
-{
- return (__s32)(seq1-seq2) < 0;
-}
-
static __u64 div64_u64(__u64 dividend, __u64 divisor)
{
return dividend / divisor;
@@ -101,7 +92,7 @@ static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
tp->snd_cwnd = pkts_in_flight + sndcnt;
}
-/* Decide wheather to run the increase function of congestion control. */
+/* Decide whether to run the increase function of congestion control. */
static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
if (tcp_sk(sk)->reordering > TCP_REORDERING)
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index f089faa97ae6..46fb2b37d3a7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -20,13 +20,6 @@
char _license[] SEC("license") = "GPL";
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
-static bool before(__u32 seq1, __u32 seq2)
-{
- return (__s32)(seq1-seq2) < 0;
-}
-#define after(seq2, seq1) before(seq1, seq2)
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 7cd73e75f52a..1cc83140849f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
-/* WARNING: This implemenation is not necessarily the same
+/* WARNING: This implementation is not necessarily the same
* as the tcp_dctcp.c. The purpose is mainly for testing
* the kernel BPF logic.
*/
@@ -13,16 +13,10 @@
#ifndef EBUSY
#define EBUSY 16
#endif
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
#define min_not_zero(x, y) ({ \
typeof(x) __x = (x); \
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-static bool before(__u32 seq1, __u32 seq2)
-{
- return (__s32)(seq1-seq2) < 0;
-}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_gotox.c b/tools/testing/selftests/bpf/progs/bpf_gotox.c
new file mode 100644
index 000000000000..216c71b94c64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_gotox.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+__u64 in_user;
+__u64 ret_user;
+
+int pid;
+
+/*
+ * Skip all the tests if compiler doesn't support indirect jumps.
+ *
+ * If tests are skipped, then all functions below are compiled as
+ * dummy, such that the skeleton looks the same, and the userspace
+ * program can avoid any checks rather than if data->skip is set.
+ */
+#ifdef __BPF_FEATURE_GOTOX
+__u64 skip SEC(".data") = 0;
+#else
+__u64 skip = 1;
+#endif
+
+struct simple_ctx {
+ __u64 x;
+};
+
+#ifdef __BPF_FEATURE_GOTOX
+__u64 some_var;
+
+/*
+ * This function adds code which will be replaced by a different
+ * number of instructions by the verifier. This adds additional
+ * stress on testing the insn_array maps corresponding to indirect jumps.
+ */
+static __always_inline void adjust_insns(__u64 x)
+{
+ some_var ^= x + bpf_jiffies64();
+}
+
+SEC("syscall")
+int one_switch(struct simple_ctx *ctx)
+{
+ switch (ctx->x) {
+ case 0:
+ adjust_insns(ctx->x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(ctx->x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(ctx->x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(ctx->x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(ctx->x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(ctx->x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int one_switch_non_zero_sec_off(struct simple_ctx *ctx)
+{
+ switch (ctx->x) {
+ case 0:
+ adjust_insns(ctx->x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(ctx->x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(ctx->x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(ctx->x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(ctx->x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(ctx->x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int simple_test_other_sec(struct pt_regs *ctx)
+{
+ __u64 x = in_user;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ switch (x) {
+ case 0:
+ adjust_insns(x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int two_switches(struct simple_ctx *ctx)
+{
+ switch (ctx->x) {
+ case 0:
+ adjust_insns(ctx->x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(ctx->x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(ctx->x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(ctx->x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(ctx->x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(ctx->x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ switch (ctx->x + !!ret_user) {
+ case 1:
+ adjust_insns(ctx->x + 7);
+ ret_user = 103;
+ break;
+ case 2:
+ adjust_insns(ctx->x + 9);
+ ret_user = 104;
+ break;
+ case 3:
+ adjust_insns(ctx->x + 11);
+ ret_user = 107;
+ break;
+ case 4:
+ adjust_insns(ctx->x + 11);
+ ret_user = 205;
+ break;
+ case 5:
+ adjust_insns(ctx->x + 11);
+ ret_user = 115;
+ break;
+ default:
+ adjust_insns(ctx->x + 177);
+ ret_user = 1019;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int big_jump_table(struct simple_ctx *ctx __attribute__((unused)))
+{
+ const void *const jt[256] = {
+ [0 ... 255] = &&default_label,
+ [0] = &&l0,
+ [11] = &&l11,
+ [27] = &&l27,
+ [31] = &&l31,
+ };
+
+ goto *jt[ctx->x & 0xff];
+
+l0:
+ adjust_insns(ctx->x + 1);
+ ret_user = 2;
+ return 0;
+
+l11:
+ adjust_insns(ctx->x + 7);
+ ret_user = 3;
+ return 0;
+
+l27:
+ adjust_insns(ctx->x + 9);
+ ret_user = 4;
+ return 0;
+
+l31:
+ adjust_insns(ctx->x + 11);
+ ret_user = 5;
+ return 0;
+
+default_label:
+ adjust_insns(ctx->x + 177);
+ ret_user = 19;
+ return 0;
+}
+
+SEC("syscall")
+int one_jump_two_maps(struct simple_ctx *ctx __attribute__((unused)))
+{
+ __label__ l1, l2, l3, l4;
+ void *jt1[2] = { &&l1, &&l2 };
+ void *jt2[2] = { &&l3, &&l4 };
+ unsigned int a = ctx->x % 2;
+ unsigned int b = (ctx->x / 2) % 2;
+ volatile int ret = 0;
+
+ if (!(a < 2 && b < 2))
+ return 19;
+
+ if (ctx->x % 2)
+ goto *jt1[a];
+ else
+ goto *jt2[b];
+
+ l1: ret += 1;
+ l2: ret += 3;
+ l3: ret += 5;
+ l4: ret += 7;
+
+ ret_user = ret;
+ return ret;
+}
+
+SEC("syscall")
+int one_map_two_jumps(struct simple_ctx *ctx __attribute__((unused)))
+{
+ __label__ l1, l2, l3;
+ void *jt[3] = { &&l1, &&l2, &&l3 };
+ unsigned int a = (ctx->x >> 2) & 1;
+ unsigned int b = (ctx->x >> 3) & 1;
+ volatile int ret = 0;
+
+ if (ctx->x % 2)
+ goto *jt[a];
+
+ if (ctx->x % 3)
+ goto *jt[a + b];
+
+ l1: ret += 3;
+ l2: ret += 5;
+ l3: ret += 7;
+
+ ret_user = ret;
+ return ret;
+}
+
+/* Just to introduce some non-zero offsets in .text */
+static __noinline int f0(volatile struct simple_ctx *ctx __arg_ctx)
+{
+ if (ctx)
+ return 1;
+ else
+ return 13;
+}
+
+SEC("syscall") int f1(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ return f0(ctx);
+}
+
+static __noinline int __static_global(__u64 x)
+{
+ switch (x) {
+ case 0:
+ adjust_insns(x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int use_static_global1(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ return __static_global(ctx->x);
+}
+
+SEC("syscall")
+int use_static_global2(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ adjust_insns(ctx->x + 1);
+ return __static_global(ctx->x);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int use_static_global_other_sec(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ return __static_global(in_user);
+}
+
+__noinline int __nonstatic_global(__u64 x)
+{
+ switch (x) {
+ case 0:
+ adjust_insns(x + 1);
+ ret_user = 2;
+ break;
+ case 1:
+ adjust_insns(x + 7);
+ ret_user = 3;
+ break;
+ case 2:
+ adjust_insns(x + 9);
+ ret_user = 4;
+ break;
+ case 3:
+ adjust_insns(x + 11);
+ ret_user = 5;
+ break;
+ case 4:
+ adjust_insns(x + 17);
+ ret_user = 7;
+ break;
+ default:
+ adjust_insns(x + 177);
+ ret_user = 19;
+ break;
+ }
+
+ return 0;
+}
+
+SEC("syscall")
+int use_nonstatic_global1(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ return __nonstatic_global(ctx->x);
+}
+
+SEC("syscall")
+int use_nonstatic_global2(struct simple_ctx *ctx)
+{
+ ret_user = 0;
+ adjust_insns(ctx->x + 1);
+ return __nonstatic_global(ctx->x);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int use_nonstatic_global_other_sec(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ return __nonstatic_global(in_user);
+}
+
+#else /* __BPF_FEATURE_GOTOX */
+
+#define SKIP_TEST(TEST_NAME) \
+ SEC("syscall") int TEST_NAME(void *ctx) \
+ { \
+ return 0; \
+ }
+
+SKIP_TEST(one_switch);
+SKIP_TEST(one_switch_non_zero_sec_off);
+SKIP_TEST(simple_test_other_sec);
+SKIP_TEST(two_switches);
+SKIP_TEST(big_jump_table);
+SKIP_TEST(one_jump_two_maps);
+SKIP_TEST(one_map_two_jumps);
+SKIP_TEST(use_static_global1);
+SKIP_TEST(use_static_global2);
+SKIP_TEST(use_static_global_other_sec);
+SKIP_TEST(use_nonstatic_global1);
+SKIP_TEST(use_nonstatic_global2);
+SKIP_TEST(use_nonstatic_global_other_sec);
+
+#endif /* __BPF_FEATURE_GOTOX */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
new file mode 100644
index 000000000000..2f20485e0de3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_map_values(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 value = 0;
+
+ if (ctx->value == (void *)0)
+ return 0;
+
+ bpf_probe_read_kernel(&value, sizeof(value), ctx->value);
+ value_sum += value;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
index 774d4dbe8189..a8aa5a71d846 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -18,23 +18,10 @@
unsigned short reuse_listen_hport = 0;
unsigned short listen_hport = 0;
-char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+const char cubic_cc[] = "bpf_cubic";
char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
bool random_retry = false;
-static bool tcp_cc_eq(const char *a, const char *b)
-{
- int i;
-
- for (i = 0; i < TCP_CA_NAME_MAX; i++) {
- if (a[i] != b[i])
- return false;
- if (!a[i])
- break;
- }
-
- return true;
-}
SEC("iter/tcp")
int change_tcp_cc(struct bpf_iter__tcp *ctx)
@@ -58,7 +45,7 @@ int change_tcp_cc(struct bpf_iter__tcp *ctx)
cur_cc, sizeof(cur_cc)))
return 0;
- if (!tcp_cc_eq(cur_cc, cubic_cc))
+ if (bpf_strncmp(cur_cc, TCP_CA_NAME_MAX, cubic_cc))
return 0;
if (random_retry && bpf_get_prandom_u32() % 4 == 1)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
index bc10c4e4b4fa..966ee5a7b066 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
@@ -9,6 +9,13 @@ char _license[] SEC("license") = "GPL";
uint32_t tid = 0;
int num_unknown_tid = 0;
int num_known_tid = 0;
+void *user_ptr = 0;
+void *user_ptr_long = 0;
+uint32_t pid = 0;
+
+static char big_str1[5000];
+static char big_str2[5005];
+static char big_str3[4996];
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
@@ -35,7 +42,9 @@ int dump_task(struct bpf_iter__task *ctx)
}
int num_expected_failure_copy_from_user_task = 0;
+int num_expected_failure_copy_from_user_task_str = 0;
int num_success_copy_from_user_task = 0;
+int num_success_copy_from_user_task_str = 0;
SEC("iter.s/task")
int dump_task_sleepable(struct bpf_iter__task *ctx)
@@ -44,6 +53,9 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
struct task_struct *task = ctx->task;
static const char info[] = " === END ===";
struct pt_regs *regs;
+ char task_str1[10] = "aaaaaaaaaa";
+ char task_str2[10], task_str3[10];
+ char task_str4[20] = "aaaaaaaaaaaaaaaaaaaa";
void *ptr;
uint32_t user_data = 0;
int ret;
@@ -78,8 +90,106 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
+
++num_success_copy_from_user_task;
+ /* Read an invalid pointer and ensure we get an error */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, 0);
+ if (ret >= 0 || task_str1[9] != 'a' || task_str1[0] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Read an invalid pointer and ensure we get error with pad zeros flag */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1),
+ ptr, task, BPF_F_PAD_ZEROS);
+ if (ret >= 0 || task_str1[9] != '\0' || task_str1[0] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_expected_failure_copy_from_user_task_str;
+
+ /* Same length as the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str2, 10, user_ptr, task, 0);
+ /* only need to do the task pid check once */
+ if (bpf_strncmp(task_str2, 10, "test_data\0") != 0 || ret != 10 || task->tgid != pid) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Shorter length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str3, 2, user_ptr, task, 0);
+ if (bpf_strncmp(task_str3, 2, "t\0") != 0 || ret != 2) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, 0);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+ || task_str4[sizeof(task_str4) - 1] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string with pad zeros flag */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+ || task_str4[sizeof(task_str4) - 1] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string past a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr, task, 0);
+ if (bpf_strncmp(big_str1, 10, "test_data\0") != 0 || ret != 10) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* String that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr_long, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(big_str1, 4, "baba") != 0 || ret != 5000
+ || bpf_strncmp(big_str1 + 4996, 4, "bab\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ for (int i = 0; i < 4999; ++i) {
+ if (i % 2 == 0) {
+ if (big_str1[i] != 'b') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ } else {
+ if (big_str1[i] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ }
+ }
+
+ /* Longer length than the string that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str2, 5005, user_ptr_long, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(big_str2, 4, "baba") != 0 || ret != 5000
+ || bpf_strncmp(big_str2 + 4996, 5, "bab\0\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Shorter length than the string that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str3, 4996, user_ptr_long, task, 0);
+ if (bpf_strncmp(big_str3, 4, "baba") != 0 || ret != 4996
+ || bpf_strncmp(big_str3 + 4992, 4, "bab\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_success_copy_from_user_task_str;
+
if (ctx->meta->seq_num == 0)
BPF_SEQ_PRINTF(seq, " tgid gid data\n");
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index d22449c69363..b1e509b231cd 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -99,13 +99,13 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk->icsk_timeout;
+ timer_expires = sp->tcp_retransmit_timer.expires;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk->icsk_timeout;
- } else if (timer_pending(&sp->sk_timer)) {
+ timer_expires = sp->tcp_retransmit_timer.expires;
+ } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
timer_active = 2;
- timer_expires = sp->sk_timer.expires;
+ timer_expires = icsk->icsk_keepalive_timer.expires;
} else {
timer_active = 0;
timer_expires = bpf_jiffies64();
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index 8b072666f9d9..dbc7166aee91 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -99,13 +99,13 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk->icsk_timeout;
+ timer_expires = sp->tcp_retransmit_timer.expires;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk->icsk_timeout;
- } else if (timer_pending(&sp->sk_timer)) {
+ timer_expires = sp->tcp_retransmit_timer.expires;
+ } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
timer_active = 2;
- timer_expires = sp->sk_timer.expires;
+ timer_expires = icsk->icsk_keepalive_timer.expires;
} else {
timer_active = 0;
timer_expires = bpf_jiffies64();
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index ffbd4b116d17..23b2aa2604de 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -64,7 +64,8 @@ int dump_udp4(struct bpf_iter__udp *ctx)
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
- inet->sk.sk_drops.counter);
+ udp_sk->drop_counters.drops0.counter +
+ udp_sk->drop_counters.drops1.counter);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 47ff7754f4fd..c48b05aa2a4b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -72,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx)
0, 0L, 0, ctx->uid, 0,
sock_i_ino(&inet->sk),
inet->sk.sk_refcnt.refs.counter, udp_sk,
- inet->sk.sk_drops.counter);
-
+ udp_sk->drop_counters.drops0.counter +
+ udp_sk->drop_counters.drops1.counter);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index f45f4352feeb..c9bfbe1bafc1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -33,7 +33,20 @@
* e.g. "foo{{[0-9]+}}" matches strings like "foo007".
* Extended POSIX regular expression syntax is allowed
* inside the brackets.
+ * __not_msg Message not expected to be found in verifier log.
+ * If __msg_not is situated between __msg tags
+ * framework matches __msg tags first, and then
+ * checks that __msg_not is not present in a portion of
+ * a log between bracketing __msg tags.
+ * Same regex syntax as for __msg is supported.
* __msg_unpriv Same as __msg but for unprivileged mode.
+ * __not_msg_unpriv Same as __not_msg but for unprivileged mode.
+ *
+ * __stderr Message expected to be found in bpf stderr stream. The
+ * same regex rules apply like __msg.
+ * __stderr_unpriv Same as __stderr but for unpriveleged mode.
+ * __stdout Same as __stderr but for stdout stream.
+ * __stdout_unpriv Same as __stdout but for unpriveleged mode.
*
* __xlated Expect a line in a disassembly log after verifier applies rewrites.
* Multiple __xlated attributes could be specified.
@@ -83,9 +96,11 @@
* expect return value to match passed parameter:
* - a decimal number
* - a hexadecimal number, when starts from 0x
- * - literal INT_MIN
- * - literal POINTER_VALUE (see definition below)
- * - literal TEST_DATA_LEN (see definition below)
+ * - a macro which expands to one of the above
+ * - literal _INT_MIN (expands to INT_MIN)
+ * In addition, two special macros are defined below:
+ * - POINTER_VALUE
+ * - TEST_DATA_LEN
* __retval_unpriv Same, but load program in unprivileged mode.
*
* __description Text to be used instead of a program name for display
@@ -111,22 +126,27 @@
* Several __arch_* annotations could be specified at once.
* When test case is not run on current arch it is marked as skipped.
* __caps_unpriv Specify the capabilities that should be set when running the test.
+ *
+ * __linear_size Specify the size of the linear area of non-linear skbs, or
+ * 0 for linear skbs.
*/
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
#define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg)))
#define __jited(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc)))
#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __xlated_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __jited_unpriv(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
-#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val)))
-#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val)))
+#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="XSTR(val))))
+#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val))))
#define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary")))
#define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv")))
#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path)))
@@ -134,7 +154,15 @@
#define __arch_x86_64 __arch("X86_64")
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
+#define __arch_s390x __arch("s390x")
#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
+#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited")))
+#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited")))
+#define __stderr(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr=" XSTR(__COUNTER__) "=" msg)))
+#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __linear_size(sz) __attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz))))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12
@@ -152,8 +180,12 @@
#define __imm_ptr(name) [name]"r"(&name)
#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
/* Magic constants used with __retval() */
-#define POINTER_VALUE 0xcafe4all
+#define POINTER_VALUE 0xbadcafe
#define TEST_DATA_LEN 64
#ifndef __used
@@ -172,6 +204,9 @@
#elif defined(__TARGET_ARCH_riscv)
#define SYSCALL_WRAPPER 1
#define SYS_PREFIX "__riscv_"
+#elif defined(__TARGET_ARCH_powerpc)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX ""
#else
#define SYSCALL_WRAPPER 0
#define SYS_PREFIX "__se_"
@@ -208,4 +243,31 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+#define CAN_USE_GOTOL
+#endif
+
+#if __clang_major__ >= 18
+#define CAN_USE_BPF_ST
+#endif
+
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) || \
+ (defined(__TARGET_ARCH_powerpc))
+#define CAN_USE_LOAD_ACQ_STORE_REL
+#endif
+
+#if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)
+#define SPEC_V1
+#endif
+
+#if defined(__TARGET_ARCH_x86)
+#define SPEC_V4
+#endif
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
new file mode 100644
index 000000000000..3754f581b328
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_QDISC_COMMON_H
+#define _BPF_QDISC_COMMON_H
+
+#define NET_XMIT_SUCCESS 0x00
+#define NET_XMIT_DROP 0x01 /* skb dropped */
+#define NET_XMIT_CN 0x02 /* congestion notification */
+
+#define TC_PRIO_CONTROL 7
+#define TC_PRIO_MAX 15
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+
+struct bpf_sk_buff_ptr;
+
+static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
+{
+ return (struct qdisc_skb_cb *)skb->cb;
+}
+
+static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
+{
+ return qdisc_skb_cb(skb)->pkt_len;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
new file mode 100644
index 000000000000..f188062ed730
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops")
+int BPF_PROG(bpf_qdisc_test_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops")
+struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch)
+{
+ return NULL;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch)
+{
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops test = {
+ .enqueue = (void *)bpf_qdisc_test_enqueue,
+ .dequeue = (void *)bpf_qdisc_test_dequeue,
+ .reset = (void *)bpf_qdisc_test_reset,
+ .destroy = (void *)bpf_qdisc_test_destroy,
+ .id = "bpf_qdisc_test",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
new file mode 100644
index 000000000000..1de2be3e370b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct skb_node {
+ struct sk_buff __kptr * skb;
+ struct bpf_list_node node;
+};
+
+private(A) struct bpf_spin_lock q_fifo_lock;
+private(A) struct bpf_list_head q_fifo __contains(skb_node, node);
+
+bool init_called;
+
+SEC("struct_ops/bpf_fifo_enqueue")
+int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct skb_node *skbn;
+ u32 pkt_len;
+
+ if (sch->q.qlen == sch->limit)
+ goto drop;
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn)
+ goto drop;
+
+ pkt_len = qdisc_pkt_len(skb);
+
+ sch->q.qlen++;
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&q_fifo_lock);
+ bpf_list_push_back(&q_fifo, &skbn->node);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ sch->qstats.backlog += pkt_len;
+ return NET_XMIT_SUCCESS;
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops/bpf_fifo_dequeue")
+struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+ if (!node)
+ return NULL;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+ if (!skb)
+ return NULL;
+
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fifo_init")
+int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ sch->limit = 1000;
+ init_called = true;
+ return 0;
+}
+
+SEC("struct_ops/bpf_fifo_reset")
+void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct skb_node *skbn;
+ int i;
+
+ bpf_for(i, 0, sch->q.qlen) {
+ struct sk_buff *skb = NULL;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ if (!node)
+ break;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_kfree_skb(skb);
+ bpf_obj_drop(skbn);
+ }
+ sch->q.qlen = 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fifo_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fifo = {
+ .enqueue = (void *)bpf_fifo_enqueue,
+ .dequeue = (void *)bpf_fifo_dequeue,
+ .init = (void *)bpf_fifo_init,
+ .reset = (void *)bpf_fifo_reset,
+ .destroy = (void *)bpf_fifo_destroy,
+ .id = "bpf_fifo",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
new file mode 100644
index 000000000000..1a3233a275c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
@@ -0,0 +1,756 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct
+ * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before
+ * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was
+ * introduced. It gives each flow a fair chance to transmit packets in a
+ * round-robin fashion. Note that for flow pacing, bpf_fq currently only
+ * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there
+ * are multiple bpf_fq instances, they will have a shared view of flows and
+ * configuration since some key data structure such as fq_prio_flows,
+ * fq_nonprio_flows, and fq_bpf_data are global.
+ *
+ * To use bpf_fq alone without running selftests, use the following commands.
+ *
+ * 1. Register bpf_fq to the kernel
+ * bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf
+ * 2. Add bpf_fq to an interface
+ * tc qdisc add dev <interface name> root handle <handle> bpf_fq
+ * 3. Delete bpf_fq attached to the interface
+ * tc qdisc delete dev <interface name> root
+ * 4. Unregister bpf_fq
+ * bpftool struct_ops unregister name fq
+ *
+ * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id.
+ * The struct_ops_map_name, fq, used in the bpftool command is the name of the
+ * Qdisc_ops.
+ *
+ * SEC(".struct_ops")
+ * struct Qdisc_ops fq = {
+ * ...
+ * .id = "bpf_fq",
+ * };
+ */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_SEC 1000000000L
+
+#define NUM_QUEUE (1 << 20)
+
+struct fq_bpf_data {
+ u32 quantum;
+ u32 initial_quantum;
+ u32 flow_refill_delay;
+ u32 flow_plimit;
+ u64 horizon;
+ u32 orphan_mask;
+ u32 timer_slack;
+ u64 time_next_delayed_flow;
+ u64 unthrottle_latency_ns;
+ u8 horizon_drop;
+ u32 new_flow_cnt;
+ u32 old_flow_cnt;
+ u64 ktime_cache;
+};
+
+enum {
+ CLS_RET_PRIO = 0,
+ CLS_RET_NONPRIO = 1,
+ CLS_RET_ERR = 2,
+};
+
+struct skb_node {
+ u64 tstamp;
+ struct sk_buff __kptr * skb;
+ struct bpf_rb_node node;
+};
+
+struct fq_flow_node {
+ int credit;
+ u32 qlen;
+ u64 age;
+ u64 time_next_packet;
+ struct bpf_list_node list_node;
+ struct bpf_rb_node rb_node;
+ struct bpf_rb_root queue __contains(skb_node, node);
+ struct bpf_spin_lock lock;
+ struct bpf_refcount refcount;
+};
+
+struct dequeue_nonprio_ctx {
+ bool stop_iter;
+ u64 expire;
+ u64 now;
+};
+
+struct remove_flows_ctx {
+ bool gc_only;
+ u32 reset_cnt;
+ u32 reset_max;
+};
+
+struct unset_throttled_flows_ctx {
+ bool unset_all;
+ u64 now;
+};
+
+struct fq_stashed_flow {
+ struct fq_flow_node __kptr * flow;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, NUM_QUEUE);
+} fq_nonprio_flows SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, 1);
+} fq_prio_flows SEC(".maps");
+
+private(A) struct bpf_spin_lock fq_delayed_lock;
+private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node);
+
+private(B) struct bpf_spin_lock fq_new_flows_lock;
+private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node);
+
+private(C) struct bpf_spin_lock fq_old_flows_lock;
+private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node);
+
+private(D) struct fq_bpf_data q;
+
+/* Wrapper for bpf_kptr_xchg that expects NULL dst */
+static void bpf_kptr_xchg_back(void *map_val, void *ptr)
+{
+ void *ret;
+
+ ret = bpf_kptr_xchg(map_val, ptr);
+ if (ret)
+ bpf_obj_drop(ret);
+}
+
+static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct skb_node *skbn_a;
+ struct skb_node *skbn_b;
+
+ skbn_a = container_of(a, struct skb_node, node);
+ skbn_b = container_of(b, struct skb_node, node);
+
+ return skbn_a->tstamp < skbn_b->tstamp;
+}
+
+static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct fq_flow_node *flow_a;
+ struct fq_flow_node *flow_b;
+
+ flow_a = container_of(a, struct fq_flow_node, rb_node);
+ flow_b = container_of(b, struct fq_flow_node, rb_node);
+
+ return flow_a->time_next_packet < flow_b->time_next_packet;
+}
+
+static void
+fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_back(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct bpf_list_node **node, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ *node = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ *flow_cnt -= 1;
+}
+
+static bool
+fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock)
+{
+ struct bpf_list_node *node;
+
+ bpf_spin_lock(lock);
+ node = bpf_list_pop_front(head);
+ if (node) {
+ bpf_list_push_front(head, node);
+ bpf_spin_unlock(lock);
+ return false;
+ }
+ bpf_spin_unlock(lock);
+
+ return true;
+}
+
+/* flow->age is used to denote the state of the flow (not-detached, detached, throttled)
+ * as well as the timestamp when the flow is detached.
+ *
+ * 0: not-detached
+ * 1 - (~0ULL-1): detached
+ * ~0ULL: throttled
+ */
+static void fq_flow_set_detached(struct fq_flow_node *flow)
+{
+ flow->age = bpf_jiffies64();
+}
+
+static bool fq_flow_is_detached(struct fq_flow_node *flow)
+{
+ return flow->age != 0 && flow->age != ~0ULL;
+}
+
+static bool sk_listener(struct sock *sk)
+{
+ return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
+}
+
+static void fq_gc(void);
+
+static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash)
+{
+ struct fq_stashed_flow tmp = {};
+ struct fq_flow_node *flow;
+ int ret;
+
+ flow = bpf_obj_new(typeof(*flow));
+ if (!flow)
+ return -ENOMEM;
+
+ flow->credit = q.initial_quantum,
+ flow->qlen = 0,
+ flow->age = 1,
+ flow->time_next_packet = 0,
+
+ ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0);
+ if (ret == -ENOMEM || ret == -E2BIG) {
+ fq_gc();
+ bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0);
+ }
+
+ *sflow = bpf_map_lookup_elem(flow_map, &hash);
+ if (!*sflow) {
+ bpf_obj_drop(flow);
+ return -ENOMEM;
+ }
+
+ bpf_kptr_xchg_back(&(*sflow)->flow, flow);
+ return 0;
+}
+
+static int
+fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow)
+{
+ struct sock *sk = skb->sk;
+ int ret = CLS_RET_NONPRIO;
+ u64 hash = 0;
+
+ if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) {
+ *sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ ret = CLS_RET_PRIO;
+ } else {
+ if (!sk || sk_listener(sk)) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ /* Avoid collision with an existing flow hash, which
+ * only uses the lower 32 bits of hash, by setting the
+ * upper half of hash to 1.
+ */
+ hash |= (1ULL << 32);
+ } else if (sk->__sk_common.skc_state == TCP_CLOSE) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ hash |= (1ULL << 32);
+ } else {
+ hash = sk->__sk_common.skc_hash;
+ }
+ *sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash);
+ }
+
+ if (!*sflow)
+ ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ?
+ CLS_RET_ERR : CLS_RET_NONPRIO;
+
+ return ret;
+}
+
+static bool fq_packet_beyond_horizon(struct sk_buff *skb)
+{
+ return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon);
+}
+
+SEC("struct_ops/bpf_fq_enqueue")
+int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct fq_flow_node *flow = NULL, *flow_copy;
+ struct fq_stashed_flow *sflow;
+ u64 time_to_send, jiffies;
+ struct skb_node *skbn;
+ int ret;
+
+ if (sch->q.qlen >= sch->limit)
+ goto drop;
+
+ if (!skb->tstamp) {
+ time_to_send = q.ktime_cache = bpf_ktime_get_ns();
+ } else {
+ if (fq_packet_beyond_horizon(skb)) {
+ q.ktime_cache = bpf_ktime_get_ns();
+ if (fq_packet_beyond_horizon(skb)) {
+ if (q.horizon_drop)
+ goto drop;
+
+ skb->tstamp = q.ktime_cache + q.horizon;
+ }
+ }
+ time_to_send = skb->tstamp;
+ }
+
+ ret = fq_classify(skb, &sflow);
+ if (ret == CLS_RET_ERR)
+ goto drop;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ goto drop;
+
+ if (ret == CLS_RET_NONPRIO) {
+ if (flow->qlen >= q.flow_plimit) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ if (fq_flow_is_detached(flow)) {
+ flow_copy = bpf_refcount_acquire(flow);
+
+ jiffies = bpf_jiffies64();
+ if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) {
+ if (flow_copy->credit < q.quantum)
+ flow_copy->credit = q.quantum;
+ }
+ flow_copy->age = 0;
+ fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy,
+ &q.new_flow_cnt);
+ }
+ }
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ skbn->tstamp = skb->tstamp = time_to_send;
+
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&flow->lock);
+ bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less);
+ bpf_spin_unlock(&flow->lock);
+
+ flow->qlen++;
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ sch->q.qlen++;
+ return NET_XMIT_SUCCESS;
+
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ sch->qstats.drops++;
+ return NET_XMIT_DROP;
+}
+
+static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx)
+{
+ struct bpf_rb_node *node = NULL;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_delayed_lock);
+
+ node = bpf_rbtree_first(&fq_delayed);
+ if (!node) {
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ if (!ctx->unset_all && flow->time_next_packet > ctx->now) {
+ q.time_next_delayed_flow = flow->time_next_packet;
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node);
+
+ bpf_spin_unlock(&fq_delayed_lock);
+
+ if (!node)
+ return 1;
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ flow->age = 0;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+
+ return 0;
+}
+
+static void fq_flow_set_throttled(struct fq_flow_node *flow)
+{
+ flow->age = ~0ULL;
+
+ if (q.time_next_delayed_flow > flow->time_next_packet)
+ q.time_next_delayed_flow = flow->time_next_packet;
+
+ bpf_spin_lock(&fq_delayed_lock);
+ bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less);
+ bpf_spin_unlock(&fq_delayed_lock);
+}
+
+static void fq_check_throttled(u64 now)
+{
+ struct unset_throttled_flows_ctx ctx = {
+ .unset_all = false,
+ .now = now,
+ };
+ unsigned long sample;
+
+ if (q.time_next_delayed_flow > now)
+ return;
+
+ sample = (unsigned long)(now - q.time_next_delayed_flow);
+ q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3;
+ q.unthrottle_latency_ns += sample >> 3;
+
+ q.time_next_delayed_flow = ~0ULL;
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0);
+}
+
+static struct sk_buff*
+fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx)
+{
+ u64 time_next_packet, time_to_send;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct bpf_list_head *head;
+ struct bpf_list_node *node;
+ struct bpf_spin_lock *lock;
+ struct fq_flow_node *flow;
+ struct skb_node *skbn;
+ bool is_empty;
+ u32 *cnt;
+
+ if (q.new_flow_cnt) {
+ head = &fq_new_flows;
+ lock = &fq_new_flows_lock;
+ cnt = &q.new_flow_cnt;
+ } else if (q.old_flow_cnt) {
+ head = &fq_old_flows;
+ lock = &fq_old_flows_lock;
+ cnt = &q.old_flow_cnt;
+ } else {
+ if (q.time_next_delayed_flow != ~0ULL)
+ ctx->expire = q.time_next_delayed_flow;
+ goto break_loop;
+ }
+
+ fq_flows_remove_front(head, lock, &node, cnt);
+ if (!node)
+ goto break_loop;
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ if (flow->credit <= 0) {
+ flow->credit += q.quantum;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ return NULL;
+ }
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock);
+ if (head == &fq_new_flows && !is_empty) {
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ } else {
+ fq_flow_set_detached(flow);
+ bpf_obj_drop(flow);
+ }
+ return NULL;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ time_to_send = skbn->tstamp;
+
+ time_next_packet = (time_to_send > flow->time_next_packet) ?
+ time_to_send : flow->time_next_packet;
+ if (ctx->now < time_next_packet) {
+ bpf_spin_unlock(&flow->lock);
+ flow->time_next_packet = time_next_packet;
+ fq_flow_set_throttled(flow);
+ return NULL;
+ }
+
+ rb_node = bpf_rbtree_remove(&flow->queue, rb_node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto add_flow_and_break;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+ if (!skb)
+ goto add_flow_and_break;
+
+ flow->credit -= qdisc_skb_cb(skb)->pkt_len;
+ flow->qlen--;
+
+add_flow_and_break:
+ fq_flows_add_head(head, lock, flow, cnt);
+
+break_loop:
+ ctx->stop_iter = true;
+ return skb;
+}
+
+static struct sk_buff *fq_dequeue_prio(void)
+{
+ struct fq_flow_node *flow = NULL;
+ struct fq_stashed_flow *sflow;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+ u64 hash = 0;
+
+ sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ if (!sflow)
+ return NULL;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ return NULL;
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ goto out;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto out;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+out:
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fq_dequeue")
+struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch)
+{
+ struct dequeue_nonprio_ctx cb_ctx = {};
+ struct sk_buff *skb = NULL;
+ int i;
+
+ if (!sch->q.qlen)
+ goto out;
+
+ skb = fq_dequeue_prio();
+ if (skb)
+ goto dequeue;
+
+ q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns();
+ fq_check_throttled(q.ktime_cache);
+ bpf_for(i, 0, sch->limit) {
+ skb = fq_dequeue_nonprio_flows(i, &cb_ctx);
+ if (cb_ctx.stop_iter)
+ break;
+ };
+
+ if (skb) {
+dequeue:
+ sch->q.qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ return skb;
+ }
+
+ if (cb_ctx.expire)
+ bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack);
+out:
+ return NULL;
+}
+
+static int fq_remove_flows_in_list(u32 index, void *ctx)
+{
+ struct bpf_list_node *node;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_new_flows_lock);
+ node = bpf_list_pop_front(&fq_new_flows);
+ bpf_spin_unlock(&fq_new_flows_lock);
+ if (!node) {
+ bpf_spin_lock(&fq_old_flows_lock);
+ node = bpf_list_pop_front(&fq_old_flows);
+ bpf_spin_unlock(&fq_old_flows_lock);
+ if (!node)
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ bpf_obj_drop(flow);
+
+ return 0;
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+/* limit number of collected flows per round */
+#define FQ_GC_MAX 8
+#define FQ_GC_AGE (3*CONFIG_HZ)
+
+static bool fq_gc_candidate(struct fq_flow_node *flow)
+{
+ u64 jiffies = bpf_jiffies64();
+
+ return fq_flow_is_detached(flow) &&
+ ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0);
+}
+
+static int
+fq_remove_flows(struct bpf_map *flow_map, u64 *hash,
+ struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx)
+{
+ if (sflow->flow &&
+ (!ctx->gc_only || fq_gc_candidate(sflow->flow))) {
+ bpf_map_delete_elem(flow_map, hash);
+ ctx->reset_cnt++;
+ }
+
+ return ctx->reset_cnt < ctx->reset_max ? 0 : 1;
+}
+
+static void fq_gc(void)
+{
+ struct remove_flows_ctx cb_ctx = {
+ .gc_only = true,
+ .reset_cnt = 0,
+ .reset_max = FQ_GC_MAX,
+ };
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_reset")
+void BPF_PROG(bpf_fq_reset, struct Qdisc *sch)
+{
+ struct unset_throttled_flows_ctx utf_ctx = {
+ .unset_all = true,
+ };
+ struct remove_flows_ctx rf_ctx = {
+ .gc_only = false,
+ .reset_cnt = 0,
+ .reset_max = NUM_QUEUE,
+ };
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0);
+
+ rf_ctx.reset_cnt = 0;
+ bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0);
+ fq_new_flow(&fq_prio_flows, &sflow, hash);
+
+ bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0);
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_init")
+int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = sch->dev_queue->dev;
+ u32 psched_mtu = dev->mtu + dev->hard_header_len;
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0)
+ return -ENOMEM;
+
+ sch->limit = 10000;
+ q.initial_quantum = 10 * psched_mtu;
+ q.quantum = 2 * psched_mtu;
+ q.flow_refill_delay = 40;
+ q.flow_plimit = 100;
+ q.horizon = 10ULL * NSEC_PER_SEC;
+ q.horizon_drop = 1;
+ q.orphan_mask = 1024 - 1;
+ q.timer_slack = 10 * NSEC_PER_USEC;
+ q.time_next_delayed_flow = ~0ULL;
+ q.unthrottle_latency_ns = 0ULL;
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ return 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fq_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fq = {
+ .enqueue = (void *)bpf_fq_enqueue,
+ .dequeue = (void *)bpf_fq_dequeue,
+ .reset = (void *)bpf_fq_reset,
+ .init = (void *)bpf_fq_init,
+ .destroy = (void *)bpf_fq_destroy,
+ .id = "bpf_fq",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c
new file mode 100644
index 000000000000..70d8b08f5914
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_smc.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+enum {
+ BPF_SMC_LISTEN = 10,
+};
+
+struct smc_sock___local {
+ struct sock sk;
+ struct smc_sock *listen_smc;
+ bool use_fallback;
+} __attribute__((preserve_access_index));
+
+int smc_cnt = 0;
+int fallback_cnt = 0;
+
+SEC("fentry/smc_release")
+int BPF_PROG(bpf_smc_release, struct socket *sock)
+{
+ /* only count from one side (client) */
+ if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN)
+ return 0;
+ smc_cnt++;
+ return 0;
+}
+
+SEC("fentry/smc_switch_to_fallback")
+int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc)
+{
+ /* only count from one side (client) */
+ if (smc && !smc->listen_smc)
+ fallback_cnt++;
+ return 0;
+}
+
+/* go with default value if no strat was found */
+bool default_ip_strat_value = true;
+
+struct smc_policy_ip_key {
+ __u32 sip;
+ __u32 dip;
+};
+
+struct smc_policy_ip_value {
+ __u8 mode;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct smc_policy_ip_key));
+ __uint(value_size, sizeof(struct smc_policy_ip_value));
+ __uint(max_entries, 128);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} smc_policy_ip SEC(".maps");
+
+static bool smc_check(__u32 src, __u32 dst)
+{
+ struct smc_policy_ip_value *value;
+ struct smc_policy_ip_key key = {
+ .sip = src,
+ .dip = dst,
+ };
+
+ value = bpf_map_lookup_elem(&smc_policy_ip, &key);
+ return value ? value->mode : default_ip_strat_value;
+}
+
+SEC("fmod_ret/update_socket_protocol")
+int BPF_PROG(smc_run, int family, int type, int protocol)
+{
+ struct task_struct *task;
+
+ if (family != AF_INET && family != AF_INET6)
+ return protocol;
+
+ if ((type & 0xf) != SOCK_STREAM)
+ return protocol;
+
+ if (protocol != 0 && protocol != IPPROTO_TCP)
+ return protocol;
+
+ task = bpf_get_current_task_btf();
+ /* Prevent from affecting other tests */
+ if (!task || !task->nsproxy->net_ns->smc.hs_ctrl)
+ return protocol;
+
+ return IPPROTO_SMC;
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp,
+ struct inet_request_sock *ireq)
+{
+ return smc_check(ireq->req.__req_common.skc_daddr,
+ ireq->req.__req_common.skc_rcv_saddr);
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp)
+{
+ return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr,
+ tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr);
+}
+
+SEC(".struct_ops")
+struct smc_hs_ctrl linkcheck = {
+ .name = "linkcheck",
+ .syn_option = (void *)bpf_smc_set_tcp_option,
+ .synack_option = (void *)bpf_smc_set_tcp_option_cond,
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_test_utils.h b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
new file mode 100644
index 000000000000..f4e67b492dd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_TEST_UTILS_H__
+#define __BPF_TEST_UTILS_H__
+
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Clobber as many native registers and stack slots as possible. */
+static __always_inline void clobber_regs_stack(void)
+{
+ char tmp_str[] = "123456789";
+ unsigned long tmp;
+
+ bpf_strtoul(tmp_str, sizeof(tmp_str), 0, &tmp);
+ __sink(tmp);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 59843b430f76..d8dacef37c16 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -15,7 +15,11 @@
#define SO_KEEPALIVE 9
#define SO_PRIORITY 12
#define SO_REUSEPORT 15
+#if defined(__TARGET_ARCH_powerpc)
+#define SO_RCVLOWAT 16
+#else
#define SO_RCVLOWAT 18
+#endif
#define SO_BINDTODEVICE 25
#define SO_MARK 36
#define SO_MAX_PACING_RATE 47
@@ -49,6 +53,7 @@
#define TCP_SAVED_SYN 28
#define TCP_CA_NAME_MAX 16
#define TCP_NAGLE_OFF 1
+#define TCP_RTO_MAX_MS 44
#define TCP_ECN_OK 1
#define TCP_ECN_QUEUE_CWR 2
@@ -123,6 +128,7 @@
#define sk_refcnt __sk_common.skc_refcnt
#define sk_state __sk_common.skc_state
#define sk_net __sk_common.skc_net
+#define sk_rcv_saddr __sk_common.skc_rcv_saddr
#define sk_v6_daddr __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
#define sk_flags __sk_common.skc_flags
@@ -140,6 +146,20 @@
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+static inline bool before(__u32 seq1, __u32 seq2)
+{
+ return (__s32)(seq1 - seq2) < 0;
+}
+
+#define after(seq2, seq1) before(seq1, seq2)
+
static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
{
return (struct inet_connection_sock *)sk;
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
new file mode 100644
index 000000000000..21a560427b10
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_bad_signed_arr_elem_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 38f78d9345de..d93f68024cc6 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -30,36 +30,42 @@ int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
/* trace_cgroup_mkdir(struct cgroup *cgrp, const char *path)
*
- * struct cgroup_rstat_cpu {
+ * struct css_rstat_cpu {
* ...
- * struct cgroup *updated_children;
+ * struct cgroup_subsys_state *updated_children;
* ...
* };
*
- * struct cgroup {
+ * struct cgroup_subsys_state {
+ * ...
+ * struct css_rstat_cpu __percpu *rstat_cpu;
* ...
- * struct cgroup_rstat_cpu __percpu *rstat_cpu;
+ * };
+ *
+ * struct cgroup {
+ * struct cgroup_subsys_state self;
* ...
* };
*/
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
{
- g = (__u64)cgrp->rstat_cpu->updated_children;
+ g = (__u64)cgrp->self.rstat_cpu->updated_children;
return 0;
}
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
{
- struct cgroup_rstat_cpu *rstat;
+ struct css_rstat_cpu *rstat;
__u32 cpu;
cpu = bpf_get_smp_processor_id();
- rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(cgrp->rstat_cpu, cpu);
+ rstat = (struct css_rstat_cpu *)bpf_per_cpu_ptr(
+ cgrp->self.rstat_cpu, cpu);
if (rstat) {
/* READ_ONCE */
- *(volatile int *)rstat;
+ *(volatile long *)rstat;
}
return 0;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
index c74362854948..ff189a736ad8 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -37,8 +37,9 @@ struct {
__type(value, struct attach_counter);
} attach_counters SEC(".maps");
-extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
-extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
+extern void css_rstat_updated(
+ struct cgroup_subsys_state *css, int cpu) __ksym;
+extern void css_rstat_flush(struct cgroup_subsys_state *css) __ksym;
static uint64_t cgroup_id(struct cgroup *cgrp)
{
@@ -75,7 +76,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
else if (create_percpu_attach_counter(cg_id, 1))
return 0;
- cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
+ css_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
return 0;
}
@@ -141,7 +142,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
return 1;
/* Flush the stats to make sure we get the most updated numbers */
- cgroup_rstat_flush(cgrp);
+ css_rstat_flush(&cgrp->self);
total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
if (!total_counter) {
diff --git a/tools/testing/selftests/bpf/progs/cgroup_mprog.c b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
new file mode 100644
index 000000000000..6a0ea02c4de2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/getsockopt")
+int getsockopt_1(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_2(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_3(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_4(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_preorder.c b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
new file mode 100644
index 000000000000..4ef6202baa0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned int idx;
+__u8 result[4];
+
+SEC("cgroup/getsockopt")
+int child(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 1;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int child_2(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 2;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 3;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent_2(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 4;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
new file mode 100644
index 000000000000..88e13e17ec9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+char value[16];
+
+static __always_inline void read_xattr(struct cgroup *cgroup)
+{
+ struct bpf_dynptr value_ptr;
+
+ bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr);
+ bpf_cgroup_read_xattr(cgroup, "user.bpf_test",
+ &value_ptr);
+}
+
+SEC("lsm.s/socket_connect")
+__success
+int BPF_PROG(trusted_cgroup_ptr_sleepable)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(trusted_cgroup_ptr_non_sleepable)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(use_css_iter_non_sleepable)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+ read_xattr(css->cgroup);
+
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
+int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+ read_xattr(css->cgroup);
+
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("lsm.s/socket_connect")
+__success
+int BPF_PROG(use_css_iter_sleepable_with_rcu_lock)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css;
+ struct cgroup *cgrp;
+
+ bpf_rcu_read_lock();
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ goto out;
+
+ bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+ read_xattr(css->cgroup);
+
+ bpf_cgroup_release(cgrp);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(use_bpf_cgroup_ancestor)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp, *ancestor;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, 1);
+ if (!ancestor)
+ goto out;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(ancestor);
+out:
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int BPF_PROG(cgroup_skb)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp, *ancestor;
+
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp)
+ return 0;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, 1);
+ if (!ancestor)
+ goto out;
+
+ read_xattr(cgrp);
+ bpf_cgroup_release(ancestor);
+out:
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
index 5354455a01be..02d8f160ca0e 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -221,3 +221,15 @@ int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path)
return 0;
}
+
+SEC("syscall")
+int test_cgrp_from_id_ns(void *ctx)
+{
+ struct cgroup *cg;
+
+ cg = bpf_cgroup_from_id(1);
+ if (!cg)
+ return 42;
+ bpf_cgroup_release(cg);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c
deleted file mode 100644
index 43cada48b28a..000000000000
--- a/tools/testing/selftests/bpf/progs/changes_pkt_data.c
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-__noinline
-long changes_pkt_data(struct __sk_buff *sk)
-{
- return bpf_skb_pull_data(sk, 0);
-}
-
-__noinline __weak
-long does_not_change_pkt_data(struct __sk_buff *sk)
-{
- return 0;
-}
-
-SEC("?tc")
-int main_with_subprogs(struct __sk_buff *sk)
-{
- changes_pkt_data(sk);
- does_not_change_pkt_data(sk);
- return 0;
-}
-
-SEC("?tc")
-int main_changes(struct __sk_buff *sk)
-{
- bpf_skb_pull_data(sk, 0);
- return 0;
-}
-
-SEC("?tc")
-int main_does_not_change(struct __sk_buff *sk)
-{
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c
new file mode 100644
index 000000000000..6884ab99a421
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_arena_common.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} test_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1);
+} arena SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg(" 0: .......... (b7) r0 = 42")
+__msg(" 1: 0......... (bf) r1 = r0")
+__msg(" 2: .1........ (bf) r2 = r1")
+__msg(" 3: ..2....... (bf) r3 = r2")
+__msg(" 4: ...3...... (bf) r4 = r3")
+__msg(" 5: ....4..... (bf) r5 = r4")
+__msg(" 6: .....5.... (bf) r6 = r5")
+__msg(" 7: ......6... (bf) r7 = r6")
+__msg(" 8: .......7.. (bf) r8 = r7")
+__msg(" 9: ........8. (bf) r9 = r8")
+__msg("10: .........9 (bf) r0 = r9")
+__msg("11: 0......... (95) exit")
+__naked void assign_chain(void)
+{
+ asm volatile (
+ "r0 = 42;"
+ "r1 = r0;"
+ "r2 = r1;"
+ "r3 = r2;"
+ "r4 = r3;"
+ "r5 = r4;"
+ "r6 = r5;"
+ "r7 = r6;"
+ "r8 = r7;"
+ "r9 = r8;"
+ "r0 = r9;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 7")
+__msg("1: .1........ (07) r1 += 7")
+__msg("2: .......... (b7) r2 = 7")
+__msg("3: ..2....... (b7) r3 = 42")
+__msg("4: ..23...... (0f) r2 += r3")
+__msg("5: .......... (b7) r0 = 0")
+__msg("6: 0......... (95) exit")
+__naked void arithmetics(void)
+{
+ asm volatile (
+ "r1 = 7;"
+ "r1 += 7;"
+ "r2 = 7;"
+ "r3 = 42;"
+ "r2 += r3;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#ifdef CAN_USE_BPF_ST
+SEC("socket")
+__log_level(2)
+__msg(" 1: .1........ (07) r1 += -8")
+__msg(" 2: .1........ (7a) *(u64 *)(r1 +0) = 7")
+__msg(" 3: .1........ (b7) r2 = 42")
+__msg(" 4: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg(" 5: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg(" 6: .......... (b7) r0 = 0")
+__naked void store(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "*(u64 *)(r1 +0) = 7;"
+ "r2 = 42;"
+ "*(u64 *)(r1 +0) = r2;"
+ "*(u64 *)(r1 +0) = r2;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("1: ....4..... (07) r4 += -8")
+__msg("2: ....4..... (79) r5 = *(u64 *)(r4 +0)")
+__msg("3: ....45.... (07) r4 += -8")
+__naked void load(void)
+{
+ asm volatile (
+ "r4 = r10;"
+ "r4 += -8;"
+ "r5 = *(u64 *)(r4 +0);"
+ "r4 += -8;"
+ "r0 = r5;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .1........ (61) r2 = *(u32 *)(r1 +0)")
+__msg("1: ..2....... (d4) r2 = le64 r2")
+__msg("2: ..2....... (bf) r0 = r2")
+__naked void endian(void)
+{
+ asm volatile (
+ "r2 = *(u32 *)(r1 +0);"
+ "r2 = le64 r2;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg(" 8: 0......... (b7) r1 = 1")
+__msg(" 9: 01........ (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)")
+__msg("10: 01........ (c3) lock *(u32 *)(r0 +0) += r1")
+__msg("11: 01........ (db) r1 = atomic64_xchg((u64 *)(r0 +0), r1)")
+__msg("12: 01........ (bf) r2 = r0")
+__msg("13: .12....... (bf) r0 = r1")
+__msg("14: 012....... (db) r0 = atomic64_cmpxchg((u64 *)(r2 +0), r0, r1)")
+__naked void atomic(void)
+{
+ asm volatile (
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = 0;"
+ "*(u64 *)(r2 +0) = r1;"
+ "r1 = %[test_map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto 1f;"
+ "r1 = 1;"
+ "r1 = atomic_fetch_add((u64 *)(r0 +0), r1);"
+ ".8byte %[add_nofetch];" /* same as "lock *(u32 *)(r0 +0) += r1;" */
+ "r1 = xchg_64(r0 + 0, r1);"
+ "r2 = r0;"
+ "r0 = r1;"
+ "r0 = cmpxchg_64(r2 + 0, r0, r1);"
+ "1: exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(test_map),
+ __imm_insn(add_nofetch, BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__log_level(2)
+__msg("2: .12....... (db) store_release((u64 *)(r2 -8), r1)")
+__msg("3: .......... (bf) r3 = r10")
+__msg("4: ...3...... (db) r4 = load_acquire((u64 *)(r3 -8))")
+__naked void atomic_load_acq_store_rel(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ "r2 = r10;"
+ ".8byte %[store_release_insn];" /* store_release((u64 *)(r2 - 8), r1); */
+ "r3 = r10;"
+ ".8byte %[load_acquire_insn];" /* r4 = load_acquire((u64 *)(r3 + 0)); */
+ "r0 = r4;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_4, BPF_REG_3, -8))
+ : __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__log_level(2)
+__msg("4: .12....7.. (85) call bpf_trace_printk#6")
+__msg("5: 0......7.. (0f) r0 += r7")
+__naked void regular_call(void)
+{
+ asm volatile (
+ "r7 = 1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 1;"
+ "call %[bpf_trace_printk];"
+ "r0 += r7;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (25) if r1 > 0x7 goto pc+1")
+__msg("3: ..2....... (bf) r0 = r2")
+__naked void if1(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "if r1 > 0x7 goto +1;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("3: 0123...... (2d) if r1 > r3 goto pc+1")
+__msg("4: ..2....... (bf) r0 = r2")
+__naked void if2(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "r3 = 7;"
+ "if r1 > r3 goto +1;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Verifier misses that r2 is alive if jset is not handled properly */
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (45) if r1 & 0x7 goto pc+1")
+__naked void if3_jset_bug(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "if r1 & 0x7 goto +1;"
+ "exit;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 0")
+__msg("1: .1........ (b7) r2 = 7")
+__msg("2: .12....... (25) if r1 > 0x7 goto pc+4")
+__msg("3: .12....... (07) r1 += 1")
+__msg("4: .12....... (27) r2 *= 2")
+__msg("5: .12....... (05) goto pc+0")
+__msg("6: .12....... (05) goto pc-5")
+__msg("7: .......... (b7) r0 = 0")
+__msg("8: 0......... (95) exit")
+__naked void loop(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "r2 = 7;"
+ "if r1 > 0x7 goto +4;"
+ "r1 += 1;"
+ "r2 *= 2;"
+ "goto +0;"
+ "goto -5;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+#ifdef CAN_USE_GOTOL
+SEC("socket")
+__log_level(2)
+__msg("2: .123...... (25) if r1 > 0x7 goto pc+2")
+__msg("3: ..2....... (bf) r0 = r2")
+__msg("4: 0......... (06) gotol pc+1")
+__msg("5: ...3...... (bf) r0 = r3")
+__msg("6: 0......... (95) exit")
+__naked void gotol(void)
+{
+ asm volatile (
+ "r2 = 42;"
+ "r3 = 24;"
+ "if r1 > 0x7 goto +2;"
+ "r0 = r2;"
+ "gotol +1;"
+ "r0 = r3;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 1")
+__msg("1: .1........ (e5) may_goto pc+1")
+__msg("2: .......... (05) goto pc-3")
+__msg("3: .1........ (bf) r0 = r1")
+__msg("4: 0......... (95) exit")
+__naked void may_goto(void)
+{
+ asm volatile (
+ "1: r1 = 1;"
+ ".8byte %[may_goto];"
+ "goto 1b;"
+ "r0 = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("1: 0......... (18) r2 = 0x7")
+__msg("3: 0.2....... (0f) r0 += r2")
+__naked void ldimm64(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r2 = 0x7 ll;"
+ "r0 += r2;"
+ "exit;"
+ :
+ :: __clobber_all);
+}
+
+/* No rules specific for LD_ABS/LD_IND, default behaviour kicks in */
+SEC("socket")
+__log_level(2)
+__msg("2: 0123456789 (30) r0 = *(u8 *)skb[42]")
+__msg("3: 012.456789 (0f) r7 += r0")
+__msg("4: 012.456789 (b7) r3 = 42")
+__msg("5: 0123456789 (50) r0 = *(u8 *)skb[r3 + 0]")
+__msg("6: 0......7.. (0f) r7 += r0")
+__naked void ldabs(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r7 = 0;"
+ "r0 = *(u8 *)skb[42];"
+ "r7 += r0;"
+ "r3 = 42;"
+ ".8byte %[ld_ind];" /* same as "r0 = *(u8 *)skb[r3];" */
+ "r7 += r0;"
+ "r0 = r7;"
+ "exit;"
+ :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_B, BPF_REG_3, 0))
+ : __clobber_all);
+}
+
+
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__log_level(2)
+__msg(" 6: .12345.... (85) call bpf_arena_alloc_pages")
+__msg(" 7: 0......... (bf) r1 = addr_space_cast(r0, 0, 1)")
+__msg(" 8: .1........ (b7) r2 = 42")
+__naked void addr_space_cast(void)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = 0;"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r1 = addr_space_cast(r0, 0, 1);"
+ "r2 = 42;"
+ "*(u64 *)(r1 +0) = r2;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena)
+ : __clobber_all);
+}
+#endif
+
+static __used __naked int aux1(void)
+{
+ asm volatile (
+ "r0 = r1;"
+ "r0 += r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: ....45.... (b7) r1 = 1")
+__msg("1: .1..45.... (b7) r2 = 2")
+__msg("2: .12.45.... (b7) r3 = 3")
+/* Conservative liveness for subprog parameters. */
+__msg("3: .12345.... (85) call pc+2")
+__msg("4: .......... (b7) r0 = 0")
+__msg("5: 0......... (95) exit")
+__msg("6: .12....... (bf) r0 = r1")
+__msg("7: 0.2....... (0f) r0 += r2")
+/* Conservative liveness for subprog return value. */
+__msg("8: 0......... (95) exit")
+__naked void subprog1(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "r2 = 2;"
+ "r3 = 3;"
+ "call aux1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+ bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
index d3f4c5e4fb69..a3819a5d09c8 100644
--- a/tools/testing/selftests/bpf/progs/connect4_dropper.c
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -13,12 +13,14 @@
#define VERDICT_REJECT 0
#define VERDICT_PROCEED 1
+int port;
+
SEC("cgroup/connect4")
int connect_v4_dropper(struct bpf_sock_addr *ctx)
{
if (ctx->type != SOCK_STREAM)
return VERDICT_PROCEED;
- if (ctx->user_port == bpf_htons(60120))
+ if (ctx->user_port == bpf_htons(port))
return VERDICT_REJECT;
return VERDICT_PROCEED;
}
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 9e9ebf27b878..9d158cfad981 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -34,6 +34,9 @@
#define SOL_TCP 6
#endif
+const char reno[] = "reno";
+const char cubic[] = "cubic";
+
__attribute__ ((noinline)) __weak
int do_bind(struct bpf_sock_addr *ctx)
{
@@ -50,35 +53,27 @@ int do_bind(struct bpf_sock_addr *ctx)
}
static __inline int verify_cc(struct bpf_sock_addr *ctx,
- char expected[TCP_CA_NAME_MAX])
+ const char expected[])
{
char buf[TCP_CA_NAME_MAX];
- int i;
if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
return 1;
- for (i = 0; i < TCP_CA_NAME_MAX; i++) {
- if (buf[i] != expected[i])
- return 1;
- if (buf[i] == 0)
- break;
- }
+ if (bpf_strncmp(buf, TCP_CA_NAME_MAX, expected))
+ return 1;
return 0;
}
static __inline int set_cc(struct bpf_sock_addr *ctx)
{
- char reno[TCP_CA_NAME_MAX] = "reno";
- char cubic[TCP_CA_NAME_MAX] = "cubic";
-
- if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)reno, sizeof(reno)))
return 1;
if (verify_cc(ctx, reno))
return 1;
- if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)cubic, sizeof(cubic)))
return 1;
if (verify_cc(ctx, cubic))
return 1;
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index fd8e1b4c6762..5760ae015e09 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -347,6 +347,7 @@ struct core_reloc_nesting___err_too_deep {
*/
struct core_reloc_arrays_output {
int a2;
+ int a3;
char b123;
int c1c;
int d00d;
@@ -455,6 +456,15 @@ struct core_reloc_arrays___err_bad_zero_sz_arr {
struct core_reloc_arrays_substruct d[1][2];
};
+struct core_reloc_arrays___err_bad_signed_arr_elem_sz {
+ /* int -> short (signed!): not supported case */
+ short a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
/*
* PRIMITIVES
*/
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index 4ece7873ba60..86085b79f5ca 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -61,6 +61,7 @@ u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak;
u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
const struct cpumask *src2) __ksym __weak;
u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak;
+int bpf_cpumask_populate(struct cpumask *cpumask, void *src, size_t src__sz) __ksym __weak;
void bpf_rcu_read_lock(void) __ksym __weak;
void bpf_rcu_read_unlock(void) __ksym __weak;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index b40b52548ffb..8a2fd596c8a3 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -222,3 +222,41 @@ int BPF_PROG(test_invalid_nested_array, struct task_struct *task, u64 clone_flag
return 0;
}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("type=scalar expected=fp")
+int BPF_PROG(test_populate_invalid_destination, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *invalid = (struct bpf_cpumask *)0x123456;
+ u64 bits;
+ int ret;
+
+ ret = bpf_cpumask_populate((struct cpumask *)invalid, &bits, sizeof(bits));
+ if (!ret)
+ err = 2;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("leads to invalid memory access")
+int BPF_PROG(test_populate_invalid_source, struct task_struct *task, u64 clone_flags)
+{
+ void *garbage = (void *)0x123456;
+ struct bpf_cpumask *local;
+ int ret;
+
+ local = create_cpumask();
+ if (!local) {
+ err = 1;
+ return 0;
+ }
+
+ ret = bpf_cpumask_populate((struct cpumask *)local, garbage, 8);
+ if (!ret)
+ err = 2;
+
+ bpf_cpumask_release(local);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index 80ee469b0b60..0e04c31b91c0 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -749,7 +749,6 @@ out:
}
SEC("tp_btf/task_newtask")
-__success
int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *mask1, *mask2;
@@ -770,3 +769,122 @@ free_masks_return:
bpf_cpumask_release(mask2);
return 0;
}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_small_mask, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local;
+ u8 toofewbits;
+ int ret;
+
+ if (!is_test_task())
+ return 0;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ /* The kfunc should prevent this operation */
+ ret = bpf_cpumask_populate((struct cpumask *)local, &toofewbits, sizeof(toofewbits));
+ if (ret != -EACCES)
+ err = 2;
+
+ bpf_cpumask_release(local);
+
+ return 0;
+}
+
+/* Mask is guaranteed to be large enough for bpf_cpumask_t. */
+#define CPUMASK_TEST_MASKLEN (sizeof(cpumask_t))
+
+/* Add an extra word for the test_populate_reject_unaligned test. */
+u64 bits[CPUMASK_TEST_MASKLEN / 8 + 1];
+extern bool CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS __kconfig __weak;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_unaligned, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask;
+ char *src;
+ int ret;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Skip if unaligned accesses are fine for this arch. */
+ if (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ return 0;
+
+ mask = bpf_cpumask_create();
+ if (!mask) {
+ err = 1;
+ return 0;
+ }
+
+ /* Misalign the source array by a byte. */
+ src = &((char *)bits)[1];
+
+ ret = bpf_cpumask_populate((struct cpumask *)mask, src, CPUMASK_TEST_MASKLEN);
+ if (ret != -EINVAL)
+ err = 2;
+
+ bpf_cpumask_release(mask);
+
+ return 0;
+}
+
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask;
+ bool bit;
+ int ret;
+ int i;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Set only odd bits. */
+ __builtin_memset(bits, 0xaa, CPUMASK_TEST_MASKLEN);
+
+ mask = bpf_cpumask_create();
+ if (!mask) {
+ err = 1;
+ return 0;
+ }
+
+ /* Pass the entire bits array, the kfunc will only copy the valid bits. */
+ ret = bpf_cpumask_populate((struct cpumask *)mask, bits, CPUMASK_TEST_MASKLEN);
+ if (ret) {
+ err = 2;
+ goto out;
+ }
+
+ /*
+ * Test is there to appease the verifier. We cannot directly
+ * access NR_CPUS, the upper bound for nr_cpus, so we infer
+ * it from the size of cpumask_t.
+ */
+ if (nr_cpus < 0 || nr_cpus >= CPUMASK_TEST_MASKLEN * 8) {
+ err = 3;
+ goto out;
+ }
+
+ bpf_for(i, 0, nr_cpus) {
+ /* Odd-numbered bits should be set, even ones unset. */
+ bit = bpf_cpumask_test_cpu(i, (const struct cpumask *)mask);
+ if (bit == (i % 2 != 0))
+ continue;
+
+ err = 4;
+ break;
+ }
+
+out:
+ bpf_cpumask_release(mask);
+
+ return 0;
+}
+
+#undef CPUMASK_TEST_MASKLEN
diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c
index 645be6cddf36..dfd8a258f14a 100644
--- a/tools/testing/selftests/bpf/progs/crypto_sanity.c
+++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c
@@ -14,7 +14,7 @@ unsigned char key[256] = {};
u16 udp_test_port = 7777;
u32 authsize, key_len;
char algo[128] = {};
-char dst[16] = {};
+char dst[16] = {}, dst_bad[8] = {};
int status;
static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc)
@@ -59,10 +59,9 @@ int skb_crypto_setup(void *ctx)
.authsize = authsize,
};
struct bpf_crypto_ctx *cctx;
- int err = 0;
+ int err;
status = 0;
-
if (key_len > 256) {
status = -EINVAL;
return 0;
@@ -70,8 +69,8 @@ int skb_crypto_setup(void *ctx)
__builtin_memcpy(&params.algo, algo, sizeof(algo));
__builtin_memcpy(&params.key, key, sizeof(key));
- cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
if (!cctx) {
status = err;
return 0;
@@ -80,7 +79,6 @@ int skb_crypto_setup(void *ctx)
err = crypto_ctx_insert(cctx);
if (err && err != -EEXIST)
status = err;
-
return 0;
}
@@ -92,6 +90,7 @@ int decrypt_sanity(struct __sk_buff *skb)
struct bpf_dynptr psrc, pdst;
int err;
+ status = 0;
err = skb_dynptr_validate(skb, &psrc);
if (err < 0) {
status = err;
@@ -110,13 +109,23 @@ int decrypt_sanity(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- /* dst is a global variable to make testing part easier to check. In real
- * production code, a percpu map should be used to store the result.
+ /* Check also bad case where the dst buffer is smaller than the
+ * skb's linear section.
+ */
+ bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
+ if (!status)
+ status = -EIO;
+ if (status != -EINVAL)
+ goto err;
+
+ /* dst is a global variable to make testing part easier to check.
+ * In real production code, a percpu map should be used to store
+ * the result.
*/
bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
-
status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
-
+err:
return TC_ACT_SHOT;
}
@@ -129,7 +138,6 @@ int encrypt_sanity(struct __sk_buff *skb)
int err;
status = 0;
-
err = skb_dynptr_validate(skb, &psrc);
if (err < 0) {
status = err;
@@ -148,13 +156,23 @@ int encrypt_sanity(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- /* dst is a global variable to make testing part easier to check. In real
- * production code, a percpu map should be used to store the result.
+ /* Check also bad case where the dst buffer is smaller than the
+ * skb's linear section.
+ */
+ bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
+ if (!status)
+ status = -EIO;
+ if (status != -EINVAL)
+ goto err;
+
+ /* dst is a global variable to make testing part easier to check.
+ * In real production code, a percpu map should be used to store
+ * the result.
*/
bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
-
status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
-
+err:
return TC_ACT_SHOT;
}
diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
new file mode 100644
index 000000000000..13cdb11fdeb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+/* From uapi/linux/dma-buf.h */
+#define DMA_BUF_NAME_LEN 32
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, DMA_BUF_NAME_LEN);
+ __type(value, bool);
+ __uint(max_entries, 5);
+} testbuf_hash SEC(".maps");
+
+/*
+ * Fields output by this iterator are delimited by newlines. Convert any
+ * newlines in user-provided printed strings to spaces.
+ */
+static void sanitize_string(char *src, size_t size)
+{
+ for (char *c = src; (size_t)(c - src) < size && *c; ++c)
+ if (*c == '\n')
+ *c = ' ';
+}
+
+SEC("iter/dmabuf")
+int dmabuf_collector(struct bpf_iter__dmabuf *ctx)
+{
+ const struct dma_buf *dmabuf = ctx->dmabuf;
+ struct seq_file *seq = ctx->meta->seq;
+ unsigned long inode = 0;
+ size_t size;
+ const char *pname, *exporter;
+ char name[DMA_BUF_NAME_LEN] = {'\0'};
+
+ if (!dmabuf)
+ return 0;
+
+ if (BPF_CORE_READ_INTO(&inode, dmabuf, file, f_inode, i_ino) ||
+ bpf_core_read(&size, sizeof(size), &dmabuf->size) ||
+ bpf_core_read(&pname, sizeof(pname), &dmabuf->name) ||
+ bpf_core_read(&exporter, sizeof(exporter), &dmabuf->exp_name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (pname) {
+ if (bpf_probe_read_kernel(name, sizeof(name), pname))
+ return 1;
+
+ /* Name strings can be provided by userspace */
+ sanitize_string(name, sizeof(name));
+ }
+
+ BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter);
+ return 0;
+}
+
+SEC("syscall")
+int iter_dmabuf_for_each(const void *ctx)
+{
+ struct dma_buf *d;
+
+ bpf_for_each(dmabuf, d) {
+ char name[DMA_BUF_NAME_LEN];
+ const char *pname;
+ bool *found;
+ long len;
+ int i;
+
+ if (bpf_core_read(&pname, sizeof(pname), &d->name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (!pname)
+ continue;
+
+ len = bpf_probe_read_kernel_str(name, sizeof(name), pname);
+ if (len < 0)
+ return 1;
+
+ /*
+ * The entire name buffer is used as a map key.
+ * Zeroize any uninitialized trailing bytes after the NUL.
+ */
+ bpf_for(i, len, DMA_BUF_NAME_LEN)
+ name[i] = 0;
+
+ found = bpf_map_lookup_elem(&testbuf_hash, name);
+ if (found) {
+ bool t = true;
+
+ bpf_map_update_elem(&testbuf_hash, name, &t, BPF_EXIST);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index bd8f15229f5c..dda6a8dada82 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
return SK_PASS;
}
+/* A metadata slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *(md + 1) = 42;
+
+ return SK_PASS;
+}
+
SEC("?raw_tp")
__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_map_value(void *ctx)
@@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb)
return SK_PASS;
}
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem")
+int skb_meta_invalid_slice_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?tc")
__failure __msg("invalid mem access 'scalar'")
@@ -1192,6 +1232,188 @@ int skb_invalid_data_slice4(struct __sk_buff *skb)
return SK_PASS;
}
+/* Read-only skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *d;
+
+ return SK_PASS;
+}
+
+/* Read-write skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *d = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb data slice */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
@@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx)
return 0;
}
+/* Only supported prog type can create skb_meta-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed")
+int skb_meta_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr meta;
+
+ /* this should fail */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+
+ return 0;
+}
+
SEC("fentry/skb_tx_error")
__failure __msg("must be referenced or trusted")
int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
@@ -1665,6 +1900,29 @@ int clone_skb_packet_data(struct __sk_buff *skb)
return 0;
}
+/* A skb clone's metadata slice becomes invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr clone, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ bpf_dynptr_clone(&meta, &clone);
+ md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return 0;
+}
+
/* A xdp clone's data slices should be invalid anytime packet data changes */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index bfcc85686cf0..e0d672d93adf 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -1,20 +1,21 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Facebook */
+#include <vmlinux.h>
#include <string.h>
#include <stdbool.h>
-#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
-#include "bpf_kfuncs.h"
#include "errno.h"
+#define PAGE_SIZE_64K 65536
+
char _license[] SEC("license") = "GPL";
int pid, err, val;
-struct sample {
+struct ringbuf_sample {
int pid;
int seq;
long value;
@@ -121,7 +122,7 @@ int test_dynptr_data(void *ctx)
static int ringbuf_callback(__u32 index, void *data)
{
- struct sample *sample;
+ struct ringbuf_sample *sample;
struct bpf_dynptr *ptr = (struct bpf_dynptr *)data;
@@ -138,7 +139,7 @@ SEC("?tp/syscalls/sys_enter_nanosleep")
int test_ringbuf(void *ctx)
{
struct bpf_dynptr ptr;
- struct sample *sample;
+ struct ringbuf_sample *sample;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 0;
@@ -210,6 +211,61 @@ int test_dynptr_skb_data(struct __sk_buff *skb)
return 1;
}
+SEC("?tc")
+int test_dynptr_skb_meta_data(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ /* This should return NULL. Must use bpf_dynptr_slice API */
+ err = 2;
+ md = bpf_dynptr_data(&meta, 0, sizeof(*md));
+ if (md)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
+/* Check that skb metadata dynptr ops don't accept any flags. */
+SEC("?tc")
+int test_dynptr_skb_meta_flags(struct __sk_buff *skb)
+{
+ const __u64 INVALID_FLAGS = ~0ULL;
+ struct bpf_dynptr meta;
+ __u8 buf;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 2;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ err = 3;
+ ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 4;
+ ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
SEC("tp/syscalls/sys_enter_nanosleep")
int test_adjust(void *ctx)
{
@@ -567,3 +623,515 @@ int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location)
return 1;
}
+
+static inline int bpf_memcmp(const char *a, const char *b, u32 size)
+{
+ int i;
+
+ bpf_for(i, 0, size) {
+ if (a[i] != b[i])
+ return a[i] < b[i] ? -1 : 1;
+ }
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_copy(void *ctx)
+{
+ char data[] = "hello there, world!!";
+ char buf[32] = {'\0'};
+ __u32 sz = sizeof(data);
+ struct bpf_dynptr src, dst;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &src);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &dst);
+
+ /* Test basic case of copying contiguous memory backed dynptrs */
+ err = bpf_dynptr_write(&src, 0, data, sz, 0);
+ err = err ?: bpf_dynptr_copy(&dst, 0, &src, 0, sz);
+ err = err ?: bpf_dynptr_read(buf, sz, &dst, 0, 0);
+ err = err ?: bpf_memcmp(data, buf, sz);
+
+ /* Test that offsets are handled correctly */
+ err = err ?: bpf_dynptr_copy(&dst, 3, &src, 5, sz - 5);
+ err = err ?: bpf_dynptr_read(buf, sz - 5, &dst, 3, 0);
+ err = err ?: bpf_memcmp(data + 5, buf, sz - 5);
+
+ bpf_ringbuf_discard_dynptr(&src, 0);
+ bpf_ringbuf_discard_dynptr(&dst, 0);
+ return 0;
+}
+
+SEC("xdp")
+int test_dynptr_copy_xdp(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr_buf, ptr_xdp;
+ char data[] = "qwertyuiopasdfghjkl";
+ char buf[32] = {'\0'};
+ __u32 len = sizeof(data), xdp_data_size;
+ int i, chunks = 200;
+
+ /* ptr_xdp is backed by non-contiguous memory */
+ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ xdp_data_size = bpf_dynptr_size(&ptr_xdp);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf);
+
+ /* Destination dynptr is backed by non-contiguous memory */
+ bpf_for(i, 0, chunks) {
+ err = bpf_dynptr_write(&ptr_buf, i * len, data, len, 0);
+ if (err)
+ goto out;
+ }
+
+ err = bpf_dynptr_copy(&ptr_xdp, 0, &ptr_buf, 0, len * chunks);
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_xdp, i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ /* Source dynptr is backed by non-contiguous memory */
+ __builtin_memset(buf, 0, sizeof(buf));
+ bpf_for(i, 0, chunks) {
+ err = bpf_dynptr_write(&ptr_buf, i * len, buf, len, 0);
+ if (err)
+ goto out;
+ }
+
+ err = bpf_dynptr_copy(&ptr_buf, 0, &ptr_xdp, 0, len * chunks);
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_buf, i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ /* Both source and destination dynptrs are backed by non-contiguous memory */
+ err = bpf_dynptr_copy(&ptr_xdp, 2, &ptr_xdp, len, len * (chunks - 1));
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks - 1) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_xdp, 2 + i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ if (bpf_dynptr_copy(&ptr_xdp, xdp_data_size - 3000, &ptr_xdp, 0, len * chunks) != -E2BIG)
+ err = 1;
+
+out:
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+ return XDP_DROP;
+}
+
+char memset_zero_data[] = "data to be zeroed";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero(void *ctx)
+{
+ __u32 data_sz = sizeof(memset_zero_data);
+ char zeroes[32] = {'\0'};
+ struct bpf_dynptr ptr;
+
+ err = bpf_dynptr_from_mem(memset_zero_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, 0);
+ err = err ?: bpf_memcmp(zeroes, memset_zero_data, data_sz);
+
+ return 0;
+}
+
+#define DYNPTR_MEMSET_VAL 42
+
+char memset_notzero_data[] = "data to be overwritten";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_notzero(void *ctx)
+{
+ u32 data_sz = sizeof(memset_notzero_data);
+ struct bpf_dynptr ptr;
+ char expected[32];
+
+ __builtin_memset(expected, DYNPTR_MEMSET_VAL, data_sz);
+
+ err = bpf_dynptr_from_mem(memset_notzero_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, DYNPTR_MEMSET_VAL);
+ err = err ?: bpf_memcmp(expected, memset_notzero_data, data_sz);
+
+ return 0;
+}
+
+char memset_zero_offset_data[] = "data to be zeroed partially";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero_offset(void *ctx)
+{
+ char expected[] = "data to \0\0\0\0eroed partially";
+ __u32 data_sz = sizeof(memset_zero_offset_data);
+ struct bpf_dynptr ptr;
+
+ err = bpf_dynptr_from_mem(memset_zero_offset_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_memset(&ptr, 8, 4, 0);
+ err = err ?: bpf_memcmp(expected, memset_zero_offset_data, data_sz);
+
+ return 0;
+}
+
+char memset_zero_adjusted_data[] = "data to be zeroed partially";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero_adjusted(void *ctx)
+{
+ char expected[] = "data\0\0\0\0be zeroed partially";
+ __u32 data_sz = sizeof(memset_zero_adjusted_data);
+ struct bpf_dynptr ptr;
+
+ err = bpf_dynptr_from_mem(memset_zero_adjusted_data, data_sz, 0, &ptr);
+ err = err ?: bpf_dynptr_adjust(&ptr, 4, 8);
+ err = err ?: bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0);
+ err = err ?: bpf_memcmp(expected, memset_zero_adjusted_data, data_sz);
+
+ return 0;
+}
+
+char memset_overflow_data[] = "memset overflow data";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_overflow(void *ctx)
+{
+ __u32 data_sz = sizeof(memset_overflow_data);
+ struct bpf_dynptr ptr;
+ int ret;
+
+ err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr);
+ ret = bpf_dynptr_memset(&ptr, 0, data_sz + 1, 0);
+ if (ret != -E2BIG)
+ err = 1;
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_overflow_offset(void *ctx)
+{
+ __u32 data_sz = sizeof(memset_overflow_data);
+ struct bpf_dynptr ptr;
+ int ret;
+
+ err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr);
+ ret = bpf_dynptr_memset(&ptr, 1, data_sz, 0);
+ if (ret != -E2BIG)
+ err = 1;
+
+ return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_memset_readonly(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ int ret;
+
+ err = bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* cgroup skbs are read only, memset should fail */
+ ret = bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0);
+ if (ret != -EINVAL)
+ err = 1;
+
+ return 0;
+}
+
+#define min_t(type, x, y) ({ \
+ type __x = (x); \
+ type __y = (y); \
+ __x < __y ? __x : __y; })
+
+SEC("xdp")
+int test_dynptr_memset_xdp_chunks(struct xdp_md *xdp)
+{
+ u32 data_sz, chunk_sz, offset = 0;
+ const int max_chunks = 200;
+ struct bpf_dynptr ptr_xdp;
+ char expected_buf[32];
+ char buf[32];
+ int i;
+
+ __builtin_memset(expected_buf, DYNPTR_MEMSET_VAL, sizeof(expected_buf));
+
+ /* ptr_xdp is backed by non-contiguous memory */
+ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ data_sz = bpf_dynptr_size(&ptr_xdp);
+
+ err = bpf_dynptr_memset(&ptr_xdp, 0, data_sz, DYNPTR_MEMSET_VAL);
+ if (err) {
+ /* bpf_dynptr_memset() eventually called bpf_xdp_pointer()
+ * where if data_sz is greater than 0xffff, -EFAULT will be
+ * returned. For 64K page size, data_sz is greater than
+ * 64K, so error is expected and let us zero out error and
+ * return success.
+ */
+ if (data_sz >= PAGE_SIZE_64K)
+ err = 0;
+ goto out;
+ }
+
+ bpf_for(i, 0, max_chunks) {
+ offset = i * sizeof(buf);
+ if (offset >= data_sz)
+ goto out;
+ chunk_sz = min_t(u32, sizeof(buf), data_sz - offset);
+ err = bpf_dynptr_read(&buf, chunk_sz, &ptr_xdp, offset, 0);
+ if (err)
+ goto out;
+ err = bpf_memcmp(buf, expected_buf, sizeof(buf));
+ if (err)
+ goto out;
+ }
+out:
+ return XDP_DROP;
+}
+
+void *user_ptr;
+/* Contains the copy of the data pointed by user_ptr.
+ * Size 384 to make it not fit into a single kernel chunk when copying
+ * but less than the maximum bpf stack size (512).
+ */
+char expected_str[384];
+__u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257};
+
+typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr);
+
+/* Returns the offset just before the end of the maximum sized xdp fragment.
+ * Any write larger than 32 bytes will be split between 2 fragments.
+ */
+__u32 xdp_near_frag_end_offset(void)
+{
+ const __u32 headroom = 256;
+ const __u32 max_frag_size = __PAGE_SIZE - headroom - sizeof(struct skb_shared_info);
+
+ /* 32 bytes before the approximate end of the fragment */
+ return max_frag_size - 32;
+}
+
+/* Use __always_inline on test_dynptr_probe[_str][_xdp]() and callbacks
+ * of type bpf_read_dynptr_fn_t to prevent compiler from generating
+ * indirect calls that make program fail to load with "unknown opcode" error.
+ */
+static __always_inline void test_dynptr_probe(void *ptr, bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ int i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_buf, 0, test_len[i], ptr);
+ if (len > sizeof(buf))
+ break;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_buf, 0, buf, len, 0);
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_str(void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ __u32 cnt, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_buf, 0, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+static __always_inline void test_dynptr_probe_str_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 cnt, off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ if (err)
+ return;
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+SEC("xdp")
+int test_probe_read_user_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(user_ptr, bpf_probe_read_user_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, user_ptr, bpf_probe_read_user_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(expected_str, bpf_probe_read_kernel_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, expected_str, bpf_probe_read_kernel_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_user_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(user_ptr, bpf_probe_read_user_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, user_ptr, bpf_probe_read_user_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(expected_str, bpf_probe_read_kernel_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, expected_str, bpf_probe_read_kernel_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_from_user_dynptr);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_from_user_str_dynptr);
+ return 0;
+}
+
+static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_str_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_data_from_user_task);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_data_from_user_task_str);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 5e0a1ca96d4e..a01c2736890f 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,43 +18,43 @@
return *(u64 *)num; \
}
-__msg(": R0_w=0xffffffff80000000")
+__msg(": R0=0xffffffff80000000")
check_assert(s64, ==, eq_int_min, INT_MIN);
-__msg(": R0_w=0x7fffffff")
+__msg(": R0=0x7fffffff")
check_assert(s64, ==, eq_int_max, INT_MAX);
-__msg(": R0_w=0")
+__msg(": R0=0")
check_assert(s64, ==, eq_zero, 0);
-__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000")
+__msg(": R0=0x8000000000000000 R1=0x8000000000000000")
check_assert(s64, ==, eq_llong_min, LLONG_MIN);
-__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff")
+__msg(": R0=0x7fffffffffffffff R1=0x7fffffffffffffff")
check_assert(s64, ==, eq_llong_max, LLONG_MAX);
-__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)")
+__msg(": R0=scalar(id=1,smax=0x7ffffffe)")
check_assert(s64, <, lt_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
check_assert(s64, <, lt_zero, 0);
-__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff")
+__msg(": R0=scalar(id=1,smax=0xffffffff7fffffff")
check_assert(s64, <, lt_neg, INT_MIN);
-__msg(": R0_w=scalar(id=1,smax=0x7fffffff)")
+__msg(": R0=scalar(id=1,smax=0x7fffffff)")
check_assert(s64, <=, le_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smax=0)")
+__msg(": R0=scalar(id=1,smax=0)")
check_assert(s64, <=, le_zero, 0);
-__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000")
+__msg(": R0=scalar(id=1,smax=0xffffffff80000000")
check_assert(s64, <=, le_neg, INT_MIN);
-__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >, gt_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >, gt_zero, 0);
-__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001")
+__msg(": R0=scalar(id=1,smin=0xffffffff80000001")
check_assert(s64, >, gt_neg, INT_MIN);
-__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >=, ge_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >=, ge_zero, 0);
-__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000")
+__msg(": R0=scalar(id=1,smin=0xffffffff80000000")
check_assert(s64, >=, ge_neg, INT_MIN);
SEC("?tc")
diff --git a/tools/testing/selftests/bpf/progs/fd_htab_lookup.c b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
new file mode 100644
index 000000000000..a4a9e1db626f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct inner_map_type);
+} outer_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c
new file mode 100644
index 000000000000..4d756b623557
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/file_reader.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+struct elem {
+ struct file *file;
+ struct bpf_task_work tw;
+};
+
+char user_buf[256000];
+char tmp_buf[256000];
+
+int pid = 0;
+int err, run_success = 0;
+
+static int validate_file_read(struct file *file);
+static int task_work_callback(struct bpf_map *map, void *key, void *value);
+
+SEC("lsm/file_open")
+int on_open_expect_fault(void *c)
+{
+ struct bpf_dynptr dynptr;
+ struct file *file;
+ int local_err = 1;
+ __u32 user_buf_sz = sizeof(user_buf);
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ file = bpf_get_task_exe_file(bpf_get_current_task_btf());
+ if (!file)
+ return 0;
+
+ if (bpf_dynptr_from_file(file, 0, &dynptr))
+ goto out;
+
+ local_err = bpf_dynptr_read(tmp_buf, user_buf_sz, &dynptr, user_buf_sz, 0);
+ if (local_err == -EFAULT) { /* Expect page fault */
+ local_err = 0;
+ run_success = 1;
+ }
+out:
+ bpf_dynptr_file_discard(&dynptr);
+ if (local_err)
+ err = local_err;
+ bpf_put_file(file);
+ return 0;
+}
+
+SEC("lsm/file_open")
+int on_open_validate_file_read(void *c)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct elem *work;
+ int key = 0;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work) {
+ err = 1;
+ return 0;
+ }
+ bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, task_work_callback, NULL);
+ return 0;
+}
+
+/* Called in a sleepable context, read 256K bytes, cross check with user space read data */
+static int task_work_callback(struct bpf_map *map, void *key, void *value)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct file *file = bpf_get_task_exe_file(task);
+
+ if (!file)
+ return 0;
+
+ err = validate_file_read(file);
+ if (!err)
+ run_success = 1;
+ bpf_put_file(file);
+ return 0;
+}
+
+static int verify_dynptr_read(struct bpf_dynptr *ptr, u32 off, char *user_buf, u32 len)
+{
+ int i;
+
+ if (bpf_dynptr_read(tmp_buf, len, ptr, off, 0))
+ return 1;
+
+ /* Verify file contents read from BPF is the same as the one read from userspace */
+ bpf_for(i, 0, len)
+ {
+ if (tmp_buf[i] != user_buf[i])
+ return 1;
+ }
+ return 0;
+}
+
+static int validate_file_read(struct file *file)
+{
+ struct bpf_dynptr dynptr;
+ int loc_err = 1, off;
+ __u32 user_buf_sz = sizeof(user_buf);
+
+ if (bpf_dynptr_from_file(file, 0, &dynptr))
+ goto cleanup;
+
+ loc_err = verify_dynptr_read(&dynptr, 0, user_buf, user_buf_sz);
+ off = 1;
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
+ off = user_buf_sz - 1;
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
+ /* Read file with random offset and length */
+ off = 4097;
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, 100);
+
+ /* Adjust dynptr, verify read */
+ loc_err = loc_err ?: bpf_dynptr_adjust(&dynptr, off, off + 1);
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 1);
+ /* Can't read more than 1 byte */
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 2) == 0;
+ /* Can't read with far offset */
+ loc_err = loc_err ?: verify_dynptr_read(&dynptr, 1, user_buf + off, 1) == 0;
+
+cleanup:
+ bpf_dynptr_file_discard(&dynptr);
+ return loc_err;
+}
diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c
new file mode 100644
index 000000000000..32fe28ed2439
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+void *user_ptr;
+
+SEC("lsm/file_open")
+__failure
+__msg("Unreleased reference id=")
+int on_nanosleep_unreleased_ref(void *ctx)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct file *file = bpf_get_task_exe_file(task);
+ struct bpf_dynptr dynptr;
+
+ if (!file)
+ return 0;
+
+ err = bpf_dynptr_from_file(file, 0, &dynptr);
+ return err ? 1 : 0;
+}
+
+SEC("xdp")
+__failure
+__msg("Expected a dynptr of type file as arg #0")
+int xdp_wrong_dynptr_type(struct xdp_md *xdp)
+{
+ struct bpf_dynptr dynptr;
+
+ bpf_dynptr_from_xdp(xdp, 0, &dynptr);
+ bpf_dynptr_file_discard(&dynptr);
+ return 0;
+}
+
+SEC("xdp")
+__failure
+__msg("Expected an initialized dynptr as arg #0")
+int xdp_no_dynptr_type(struct xdp_md *xdp)
+{
+ struct bpf_dynptr dynptr;
+
+ bpf_dynptr_file_discard(&dynptr);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
new file mode 100644
index 000000000000..82307166f789
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Intel Corporation */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 128);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg)
+{
+ bpf_map_delete_elem(map, key);
+ bpf_map_update_elem(map, key, val, 0);
+ return 0;
+}
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ (void)skb;
+
+ bpf_for_each_map_elem(&hashmap, cb, NULL, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
index 544e5ac90461..d09bbd8ae8a8 100644
--- a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -12,7 +12,7 @@
SEC("freplace/connect_v4_prog")
int new_connect_v4_prog(struct bpf_sock_addr *ctx)
{
- // return value thats in invalid range
+ // return value that's in invalid range
return 255;
}
diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c
index 7481bb30b29b..195d3b2fba00 100644
--- a/tools/testing/selftests/bpf/progs/htab_update.c
+++ b/tools/testing/selftests/bpf/progs/htab_update.c
@@ -6,24 +6,31 @@
char _license[] SEC("license") = "GPL";
+/* Map value type: has BTF-managed field (bpf_timer) */
+struct val {
+ struct bpf_timer t;
+ __u64 payload;
+};
+
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 1);
- __uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(__u32));
+ __type(key, __u32);
+ __type(value, struct val);
} htab SEC(".maps");
int pid = 0;
int update_err = 0;
-SEC("?fentry/lookup_elem_raw")
-int lookup_elem_raw(void *ctx)
+SEC("?fentry/bpf_obj_free_fields")
+int bpf_obj_free_fields(void *ctx)
{
- __u32 key = 0, value = 1;
+ __u32 key = 0;
+ struct val value = { .payload = 1 };
if ((bpf_get_current_pid_tgid() >> 32) != pid)
return 0;
- update_err = bpf_map_update_elem(&htab, &key, &value, 0);
+ update_err = bpf_map_update_elem(&htab, &key, &value, BPF_ANY);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
index 645b2c9f7867..0e87ad1ebcfa 100644
--- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
@@ -12,11 +12,6 @@
#define IP_OFFSET 0x1FFF
#define NEXTHDR_FRAGMENT 44
-extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
- struct bpf_dynptr *ptr__uninit) __ksym;
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
- void *buffer, uint32_t buffer__sz) __ksym;
-
volatile int shootdowns = 0;
static bool is_frag_v4(struct iphdr *iph)
diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c
index b0b53d980964..74d912b22de9 100644
--- a/tools/testing/selftests/bpf/progs/irq.c
+++ b/tools/testing/selftests/bpf/progs/irq.c
@@ -11,6 +11,9 @@ extern void bpf_local_irq_save(unsigned long *) __weak __ksym;
extern void bpf_local_irq_restore(unsigned long *) __weak __ksym;
extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym;
+struct bpf_res_spin_lock lockA __hidden SEC(".data.A");
+struct bpf_res_spin_lock lockB __hidden SEC(".data.B");
+
SEC("?tc")
__failure __msg("arg#0 doesn't point to an irq flag on stack")
int irq_save_bad_arg(struct __sk_buff *ctx)
@@ -222,7 +225,7 @@ int __noinline global_local_irq_balance(void)
}
SEC("?tc")
-__failure __msg("global function calls are not allowed with IRQs disabled")
+__success
int irq_global_subprog(struct __sk_buff *ctx)
{
unsigned long flags;
@@ -441,4 +444,123 @@ int irq_ooo_refs_array(struct __sk_buff *ctx)
return 0;
}
+int __noinline
+global_subprog(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+__success
+int irq_non_sleepable_global_subprog(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_subprog(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_helper_global_subprog(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_sleepable_helper_subprog(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_global_subprog_indirect(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_subprog_calling_sleepable_global(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_ooo_lock_cond_inv(struct __sk_buff *ctx)
+{
+ unsigned long flags1, flags2;
+
+ if (bpf_res_spin_lock_irqsave(&lockA, &flags1))
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&lockB, &flags2)) {
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+ return 0;
+ }
+
+ bpf_res_spin_unlock_irqrestore(&lockB, &flags1);
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags2);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed")
+int irq_wrong_kfunc_class_1(struct __sk_buff *ctx)
+{
+ unsigned long flags1;
+
+ if (bpf_res_spin_lock_irqsave(&lockA, &flags1))
+ return 0;
+ /* For now, bpf_local_irq_restore is not allowed in critical section,
+ * but this test ensures error will be caught with kfunc_class when it's
+ * opened up. Tested by temporarily permitting this kfunc in critical
+ * section.
+ */
+ bpf_local_irq_restore(&flags1);
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed")
+int irq_wrong_kfunc_class_2(struct __sk_buff *ctx)
+{
+ unsigned long flags1, flags2;
+
+ bpf_local_irq_save(&flags1);
+ if (bpf_res_spin_lock_irqsave(&lockA, &flags2))
+ return 0;
+ bpf_local_irq_restore(&flags2);
+ bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 190822b2f08b..7dd92a303bf6 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1175,6 +1175,122 @@ __naked int loop_state_deps2(void)
}
SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps3(void)
+{
+ /* This is equivalent to a C program below.
+ *
+ * if (random() != 24) { // assume false branch is placed first
+ * i = iter_new(); // fp[-8]
+ * while (iter_next(i));
+ * iter_destroy(i);
+ * return;
+ * }
+ *
+ * for (i = 10; i > 0; i--); // increase dfs_depth for child states
+ *
+ * i = iter_new(); // fp[-8]
+ * b = -24; // r8
+ * for (;;) { // checkpoint (L)
+ * if (iter_next(i)) // checkpoint (N)
+ * break;
+ * if (random() == 77) { // assume false branch is placed first
+ * *(u64 *)(r10 + b) = 7; // this is not safe when b == -25
+ * iter_destroy(i);
+ * return;
+ * }
+ * if (random() == 42) { // assume false branch is placed first
+ * b = -25;
+ * }
+ * }
+ * iter_destroy(i);
+ *
+ * In case of a buggy verifier first loop might poison
+ * env->cur_state->loop_entry with a state having 0 branches
+ * and small dfs_depth. This would trigger NOT_EXACT states
+ * comparison for some states within second loop.
+ * Specifically, checkpoint (L) might be problematic if:
+ * - branch with '*(u64 *)(r10 + b) = 7' is not explored yet;
+ * - checkpoint (L) is first reached in state {b=-24};
+ * - traversal is pruned at checkpoint (N) setting checkpoint's (L)
+ * branch count to 0, thus making it eligible for use in pruning;
+ * - checkpoint (L) is next reached in state {b=-25},
+ * this would cause NOT_EXACT comparison with a state {b=-24}
+ * while 'b' is not marked precise yet.
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 24 goto 2f;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 5;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 != 0 goto 1b;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "2:"
+ /* loop to increase dfs_depth */
+ "r0 = 10;"
+ "3:"
+ "r0 -= 1;"
+ "if r0 != 0 goto 3b;"
+ /* end of loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r8 = -24;"
+ "main_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto main_loop_end_%=;"
+ /* first if */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 77 goto unsafe_write_%=;"
+ /* second if */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 42 goto poison_r8_%=;"
+ /* iterate */
+ "goto main_loop_%=;"
+ "main_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+
+ "unsafe_write_%=:"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto main_loop_end_%=;"
+
+ "poison_r8_%=:"
+ "r8 = -25;"
+ "goto main_loop_%=;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
__success
__naked int triple_continue(void)
{
@@ -1512,4 +1628,302 @@ int iter_destroy_bad_arg(const void *ctx)
return 0;
}
+SEC("raw_tp")
+__success
+int clean_live_states(const void *ctx)
+{
+ char buf[1];
+ int i, j, k, l, m, n, o;
+
+ bpf_for(i, 0, 10)
+ bpf_for(j, 0, 10)
+ bpf_for(k, 0, 10)
+ bpf_for(l, 0, 10)
+ bpf_for(m, 0, 10)
+ bpf_for(n, 0, 10)
+ bpf_for(o, 0, 10) {
+ if (unlikely(bpf_get_prandom_u32()))
+ buf[0] = 42;
+ bpf_printk("%s", buf);
+ }
+ return 0;
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * r8 = bpf_get_prandom_u32();
+ * r6 = -32;
+ * bpf_iter_num_new(&fp[-8], 0, 10);
+ * if (unlikely(bpf_get_prandom_u32()))
+ * r6 = -31;
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "r7 = 0;"
+ "r6 = -32;"
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "loop_%=:"
+ "call noop;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int noop(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state2(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * r8 = bpf_get_prandom_u32();
+ * r6 = -32;
+ * bpf_iter_num_new(&fp[-8], 0, 10);
+ * if (unlikely(bpf_get_prandom_u32())) {
+ * r6 = -31;
+ * jump_into_loop:
+ * goto +0;
+ * goto loop;
+ * }
+ * if (unlikely(bpf_get_prandom_u32()))
+ * goto jump_into_loop;
+ * loop:
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "r7 = 0;"
+ "r6 = -32;"
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto jump_into_loop_%=;"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "jump_into_loop_%=: "
+ "goto +0;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state3(void)
+{
+ /*
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * loop1(-32, &fp[-8])
+ * loop1_wrapper(&fp[-8])
+ * bpf_iter_num_destroy(&fp[-8])
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ /* call #1 */
+ "r1 = -32;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call loop1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ /* call #2 */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call loop1_wrapper;"
+ /* return */
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int loop1(void)
+{
+ /*
+ * int loop1(num, iter) {
+ * r6 = num;
+ * r7 = iter;
+ * while (bpf_iter_num_next(r7)) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * return 0
+ * }
+ */
+ asm volatile (
+ "r6 = r1;"
+ "r7 = r2;"
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "loop_%=:"
+ "r1 = r7;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_next),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int loop1_wrapper(void)
+{
+ /*
+ * int loop1_wrapper(iter) {
+ * r6 = -32;
+ * r7 = iter;
+ * if (unlikely(bpf_get_prandom_u32()))
+ * r6 = -31;
+ * loop1(r6, r7);
+ * return 0;
+ * }
+ */
+ asm volatile (
+ "r6 = -32;"
+ "r7 = r1;"
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "loop_%=:"
+ "r1 = r6;"
+ "r2 = r7;"
+ "call loop1;"
+ "r0 = 0;"
+ "exit;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_next),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c
index 05fa5ce7fc59..d00fd570255a 100644
--- a/tools/testing/selftests/bpf/progs/iters_looping.c
+++ b/tools/testing/selftests/bpf/progs/iters_looping.c
@@ -161,3 +161,56 @@ int simplest_loop(void *ctx)
return 0;
}
+
+__used
+static void iterator_with_diff_stack_depth(int x)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ "if r1 == 42 goto 0f;"
+ "*(u64 *)(r10 - 128) = 0;"
+ "0:"
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ /* consume next item */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "goto 1b;"
+ "2:"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("socket")
+__success
+__naked int widening_stack_size_bug(void *ctx)
+{
+ /*
+ * Depending on iterator_with_diff_stack_depth() parameter value,
+ * subprogram stack depth is either 8 or 128 bytes. Arrange values so
+ * that it is 128 on a first call and 8 on a second. This triggered a
+ * bug in verifier's widen_imprecise_scalars() logic.
+ */
+ asm volatile (
+ "r6 = 0;"
+ "r1 = 0;"
+ "1:"
+ "call iterator_with_diff_stack_depth;"
+ "r1 = 42;"
+ "r6 += 1;"
+ "if r6 < 2 goto 1b;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c
index f41257eadbb2..d273b46dfc7c 100644
--- a/tools/testing/selftests/bpf/progs/iters_state_safety.c
+++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c
@@ -30,7 +30,7 @@ int force_clang_to_emit_btf_for_externs(void *ctx)
SEC("?raw_tp")
__success __log_level(2)
-__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
int create_and_destroy(void *ctx)
{
struct bpf_iter_num iter;
@@ -196,7 +196,7 @@ int leak_iter_from_subprog_fail(void *ctx)
SEC("?raw_tp")
__success __log_level(2)
-__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
int valid_stack_reuse(void *ctx)
{
struct bpf_iter_num iter;
@@ -345,7 +345,7 @@ int __naked read_from_iter_slot_fail(void)
"r3 = 1000;"
"call %[bpf_iter_num_new];"
- /* attemp to leak bpf_iter_num state */
+ /* attempt to leak bpf_iter_num state */
"r7 = *(u64 *)(r6 + 0);"
"r8 = *(u64 *)(r6 + 8);"
diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c
index 6b1588d70652..fe3663dedbe1 100644
--- a/tools/testing/selftests/bpf/progs/iters_task_failure.c
+++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c
@@ -15,7 +15,7 @@ void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+__failure __msg("kernel func bpf_iter_task_new requires RCU critical section protection")
int BPF_PROG(iter_tasks_without_lock)
{
struct task_struct *pos;
@@ -27,7 +27,7 @@ int BPF_PROG(iter_tasks_without_lock)
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
int BPF_PROG(iter_css_without_lock)
{
u64 cg_id = bpf_get_current_cgroup_id();
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c
index 9e4b45201e69..5379e9960ffd 100644
--- a/tools/testing/selftests/bpf/progs/iters_testmod.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod.c
@@ -123,3 +123,49 @@ out:
bpf_iter_num_destroy(&num_it);
return 0;
}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test requires RCU critical section protection")
+int iter_ret_rcu_test_protected(const void *ctx)
+{
+ struct task_struct *p;
+
+ p = bpf_kfunc_ret_rcu_test();
+ return p->pid;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=")
+int iter_ret_rcu_test_type(const void *ctx)
+{
+ struct task_struct *p;
+
+ bpf_rcu_read_lock();
+ p = bpf_kfunc_ret_rcu_test();
+ bpf_this_cpu_ptr(p);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test_nostruct requires RCU critical section protection")
+int iter_ret_rcu_test_protected_nostruct(const void *ctx)
+{
+ void *p;
+
+ p = bpf_kfunc_ret_rcu_test_nostruct(4);
+ return *(int *)p;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rdonly_rcu_mem_or_null expected=")
+int iter_ret_rcu_test_type_nostruct(const void *ctx)
+{
+ void *p;
+
+ bpf_rcu_read_lock();
+ p = bpf_kfunc_ret_rcu_test_nostruct(4);
+ bpf_this_cpu_ptr(p);
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
index 6543d5b6e0a9..83791348bed5 100644
--- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
@@ -20,7 +20,7 @@ __s64 res_empty;
SEC("raw_tp/sys_enter")
__success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
__msg("call bpf_iter_testmod_seq_destroy")
int testmod_seq_empty(const void *ctx)
@@ -38,7 +38,7 @@ __s64 res_full;
SEC("raw_tp/sys_enter")
__success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
__msg("call bpf_iter_testmod_seq_destroy")
int testmod_seq_full(const void *ctx)
@@ -58,7 +58,7 @@ static volatile int zero = 0;
SEC("raw_tp/sys_enter")
__success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
__msg("call bpf_iter_testmod_seq_destroy")
int testmod_seq_truncated(const void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
new file mode 100644
index 000000000000..f77aef0474d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#if defined(__TARGET_ARCH_x86)
+SEC("kprobe")
+int kprobe_write_ctx(struct pt_regs *ctx)
+{
+ ctx->ax = 0;
+ return 0;
+}
+
+SEC("kprobe.multi")
+int kprobe_multi_write_ctx(struct pt_regs *ctx)
+{
+ ctx->ax = 0;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index 6438982b928b..ddd26d1a083f 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -226,8 +226,7 @@ int obj_new_no_composite(void *ctx)
SEC("?tc")
int obj_new_no_struct(void *ctx)
{
-
- bpf_obj_new(union { int data; unsigned udata; });
+ (void)bpf_obj_new(union { int data; unsigned udata; });
return 0;
}
@@ -252,7 +251,7 @@ int new_null_ret(void *ctx)
SEC("?tc")
int obj_new_acq(void *ctx)
{
- bpf_obj_new(struct foo);
+ (void)bpf_obj_new(struct foo);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/linked_list_peek.c b/tools/testing/selftests/bpf/progs/linked_list_peek.c
new file mode 100644
index 000000000000..264e81bfb287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_peek.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_list_node l;
+ int key;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(node_data, l);
+
+#define list_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+SEC("syscall")
+__retval(0)
+long list_peek(void *ctx)
+{
+ struct bpf_list_node *l_n;
+ struct node_data *n;
+ int i, err = 0;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_front(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_back(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+ n->key = i;
+ bpf_spin_lock(&glock);
+ bpf_list_push_back(&ghead, &n->l);
+ bpf_spin_unlock(&glock);
+ }
+
+ bpf_spin_lock(&glock);
+
+ l_n = bpf_list_front(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != 0) {
+ err = __LINE__;
+ goto done;
+ }
+
+ l_n = bpf_list_back(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != NR_NODES - 1) {
+ err = __LINE__;
+ goto done;
+ }
+
+done:
+ bpf_spin_unlock(&glock);
+ return err;
+}
+
+#define TEST_FB(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_list_node *l_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock); \
+ l_n = bpf_list_##op(&ghead); \
+ if (l_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock); \
+ \
+ return !!jiffies; \
+}
+
+#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_FB(front, true)
+TEST_FB(back, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head"
+TEST_FB(front, false)
+TEST_FB(back, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/livepatch_trampoline.c b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
new file mode 100644
index 000000000000..15579d5bcd91
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int fentry_hit;
+int fexit_hit;
+int my_pid;
+
+SEC("fentry/cmdline_proc_show")
+int BPF_PROG(fentry_cmdline)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ fentry_hit = 1;
+ return 0;
+}
+
+SEC("fexit/cmdline_proc_show")
+int BPF_PROG(fexit_cmdline)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ fexit_hit = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
index 50e66772c046..b0fa26fb4760 100644
--- a/tools/testing/selftests/bpf/progs/loop1.c
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
index 947bb7e988c2..0227409d4b0e 100644
--- a/tools/testing/selftests/bpf/progs/loop2.c
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
index 717dab14322b..5d1c9a775e6b 100644
--- a/tools/testing/selftests/bpf/progs/loop3.c
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
index e4ff97fbcce1..dd36aff4fba3 100644
--- a/tools/testing/selftests/bpf/progs/loop6.c
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/ptrace.h>
-#include <stddef.h>
-#include <linux/bpf.h>
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
@@ -26,12 +25,6 @@ char _license[] SEC("license") = "GPL";
#define SG_CHAIN 0x01UL
#define SG_END 0x02UL
-struct scatterlist {
- unsigned long page_link;
- unsigned int offset;
- unsigned int length;
-};
-
#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
#define sg_is_last(sg) ((sg)->page_link & SG_END)
#define sg_chain_ptr(sg) \
@@ -62,7 +55,7 @@ static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
return sgp;
}
-int config = 0;
+int run_once = 0;
int result = 0;
SEC("kprobe/virtqueue_add_sgs")
@@ -73,14 +66,14 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
__u64 length1 = 0, length2 = 0;
unsigned int i, n, len;
- if (config != 0)
+ if (run_once != 0)
return 0;
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
__sink(out_sgs);
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
- bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ len = BPF_CORE_READ(sgp, length);
length1 += len;
n++;
}
@@ -90,13 +83,13 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
__sink(in_sgs);
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
- bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ len = BPF_CORE_READ(sgp, length);
length2 += len;
n++;
}
}
- config = 1;
+ run_once = 1;
result = length2 - length1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie.h b/tools/testing/selftests/bpf/progs/lpm_trie.h
new file mode 100644
index 000000000000..76aa5821807f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_LPM_TRIE_H
+#define __PROGS_LPM_TRIE_H
+
+struct trie_key {
+ __u32 prefixlen;
+ __u32 data;
+};
+
+/* Benchmark operations */
+enum {
+ LPM_OP_NOOP = 0,
+ LPM_OP_BASELINE,
+ LPM_OP_LOOKUP,
+ LPM_OP_INSERT,
+ LPM_OP_UPDATE,
+ LPM_OP_DELETE,
+ LPM_OP_FREE
+};
+
+/*
+ * Return values from run_bench.
+ *
+ * Negative values are also allowed and represent kernel error codes.
+ */
+#define LPM_BENCH_SUCCESS 0
+#define LPM_BENCH_REINIT_MAP 1 /* Reset trie to initial state for current op */
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
new file mode 100644
index 000000000000..a0e6ebd5507a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_atomic.h"
+#include "progs/lpm_trie.h"
+
+#define BPF_OBJ_NAME_LEN 16U
+#define MAX_ENTRIES 100000000
+#define NR_LOOPS 10000
+
+char _license[] SEC("license") = "GPL";
+
+/* Filled by userspace. See fill_map() in bench_lpm_trie_map.c */
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct trie_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+} trie_map SEC(".maps");
+
+long hits;
+long duration_ns;
+
+/* Configured from userspace */
+__u32 nr_entries;
+__u32 prefixlen;
+bool random;
+__u8 op;
+
+static __u64 latency_free_start;
+
+SEC("fentry/bpf_map_free_deferred")
+int BPF_PROG(trie_free_entry, struct work_struct *work)
+{
+ struct bpf_map *map = container_of(work, struct bpf_map, work);
+ char name[BPF_OBJ_NAME_LEN];
+ u32 map_type;
+
+ map_type = BPF_CORE_READ(map, map_type);
+ if (map_type != BPF_MAP_TYPE_LPM_TRIE)
+ return 0;
+
+ /*
+ * Ideally we'd have access to the map ID but that's already
+ * freed before we enter trie_free().
+ */
+ BPF_CORE_READ_STR_INTO(&name, map, name);
+ if (bpf_strncmp(name, BPF_OBJ_NAME_LEN, "trie_free_map"))
+ return 0;
+
+ latency_free_start = bpf_ktime_get_ns();
+
+ return 0;
+}
+
+SEC("fexit/bpf_map_free_deferred")
+int BPF_PROG(trie_free_exit, struct work_struct *work)
+{
+ __u64 val;
+
+ if (!latency_free_start)
+ return 0;
+
+ val = bpf_ktime_get_ns() - latency_free_start;
+ latency_free_start = 0;
+
+ __sync_add_and_fetch(&duration_ns, val);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+static __u32 cur_key;
+
+static __always_inline void generate_key(struct trie_key *key)
+{
+ key->prefixlen = prefixlen;
+
+ if (random)
+ key->data = bpf_get_prandom_u32() % nr_entries;
+ else
+ key->data = cur_key++ % nr_entries;
+}
+
+static int noop(__u32 index, __u32 *unused)
+{
+ return 0;
+}
+
+static int baseline(__u32 index, __u32 *unused)
+{
+ struct trie_key key;
+ __u32 blackbox = 0;
+
+ generate_key(&key);
+ /* Avoid compiler optimizing out the modulo */
+ barrier_var(blackbox);
+ blackbox = READ_ONCE(key.data);
+
+ return 0;
+}
+
+static int lookup(__u32 index, int *retval)
+{
+ struct trie_key key;
+
+ generate_key(&key);
+ if (!bpf_map_lookup_elem(&trie_map, &key)) {
+ *retval = -ENOENT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int insert(__u32 index, int *retval)
+{
+ struct trie_key key;
+ u32 val = 1;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_update_elem(&trie_map, &key, &val, BPF_NOEXIST);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ /* Is this the last entry? */
+ if (key.data == nr_entries - 1) {
+ /* For atomicity concerns, see the comment in delete() */
+ *retval = LPM_BENCH_REINIT_MAP;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int update(__u32 index, int *retval)
+{
+ struct trie_key key;
+ u32 val = 1;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_update_elem(&trie_map, &key, &val, BPF_EXIST);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int delete(__u32 index, int *retval)
+{
+ struct trie_key key;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_delete_elem(&trie_map, &key);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ /* Do we need to refill the map? */
+ if (key.data == nr_entries - 1) {
+ /*
+ * Atomicity isn't required because DELETE only supports
+ * one producer running concurrently. What we need is a
+ * way to track how many entries have been deleted from
+ * the trie between consecutive invocations of the BPF
+ * prog because a single bpf_loop() call might not
+ * delete all entries, e.g. when NR_LOOPS < nr_entries.
+ */
+ *retval = LPM_BENCH_REINIT_MAP;
+ return 1;
+ }
+
+ return 0;
+}
+
+SEC("xdp")
+int BPF_PROG(run_bench)
+{
+ int err = LPM_BENCH_SUCCESS;
+ u64 start, delta;
+ int loops;
+
+ start = bpf_ktime_get_ns();
+
+ switch (op) {
+ case LPM_OP_NOOP:
+ loops = bpf_loop(NR_LOOPS, noop, NULL, 0);
+ break;
+ case LPM_OP_BASELINE:
+ loops = bpf_loop(NR_LOOPS, baseline, NULL, 0);
+ break;
+ case LPM_OP_LOOKUP:
+ loops = bpf_loop(NR_LOOPS, lookup, &err, 0);
+ break;
+ case LPM_OP_INSERT:
+ loops = bpf_loop(NR_LOOPS, insert, &err, 0);
+ break;
+ case LPM_OP_UPDATE:
+ loops = bpf_loop(NR_LOOPS, update, &err, 0);
+ break;
+ case LPM_OP_DELETE:
+ loops = bpf_loop(NR_LOOPS, delete, &err, 0);
+ break;
+ default:
+ bpf_printk("invalid benchmark operation\n");
+ return -1;
+ }
+
+ delta = bpf_ktime_get_ns() - start;
+
+ __sync_add_and_fetch(&duration_ns, delta);
+ __sync_add_and_fetch(&hits, loops);
+
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_map.c b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
new file mode 100644
index 000000000000..6e60d686b664
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_ENTRIES 100000000
+
+struct trie_key {
+ __u32 prefixlen;
+ __u32 data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct trie_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+} trie_free_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index 0c13b7409947..7de173daf27b 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -89,14 +89,16 @@ SEC("lsm/file_mprotect")
int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
unsigned long reqprot, unsigned long prot, int ret)
{
- if (ret != 0)
+ struct mm_struct *mm = vma->vm_mm;
+
+ if (ret != 0 || !mm)
return ret;
__s32 pid = bpf_get_current_pid_tgid() >> 32;
int is_stack = 0;
- is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
- vma->vm_end >= vma->vm_mm->start_stack);
+ is_stack = (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack);
if (is_stack && monitored_pid == pid) {
mprotect_count++;
diff --git a/tools/testing/selftests/bpf/progs/lsm_tailcall.c b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
index 49c075ce2d4c..6e7e58051e64 100644
--- a/tools/testing/selftests/bpf/progs/lsm_tailcall.c
+++ b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
@@ -20,14 +20,14 @@ int lsm_file_permission_prog(void *ctx)
return 0;
}
-SEC("lsm/file_alloc_security")
-int lsm_file_alloc_security_prog(void *ctx)
+SEC("lsm/kernfs_init_security")
+int lsm_kernfs_init_security_prog(void *ctx)
{
return 0;
}
-SEC("lsm/file_alloc_security")
-int lsm_file_alloc_security_entry(void *ctx)
+SEC("lsm/kernfs_init_security")
+int lsm_kernfs_init_security_entry(void *ctx)
{
bpf_tail_call_static(ctx, &jmp_table, 0);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/map_excl.c b/tools/testing/selftests/bpf/progs/map_excl.c
new file mode 100644
index 000000000000..d461684728e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_excl.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} excl_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_have_access(void *ctx)
+{
+ int key = 0, value = 0xdeadbeef;
+
+ bpf_map_update_elem(&excl_map, &key, &value, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_not_have_access(void *ctx)
+{
+ int key = 0, value = 0xdeadbeef;
+
+ bpf_map_update_elem(&excl_map, &key, &value, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..3b984b6ae7c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r8 = *(u64 *)(r7 +0) ; R7=ptr_nameidata(off={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)")
+__msg("r9 = *(u8 *)(r8 +0) ; R8=rdonly_untrusted_mem(sz=0) R9=scalar")
+int btf_id_to_ptr_mem(void *ctx)
+{
+ struct task_struct *task;
+ struct nameidata *idata;
+ u64 ret, off;
+
+ task = bpf_get_current_task_btf();
+ idata = task->nameidata;
+ off = bpf_core_field_offset(struct nameidata, pathname);
+ /*
+ * asm block to have reliable match target for __msg, equivalent of:
+ * ret = task->nameidata->pathname[0];
+ */
+ asm volatile (
+ "r7 = %[idata];"
+ "r7 += %[off];"
+ "r8 = *(u64 *)(r7 + 0);"
+ "r9 = *(u8 *)(r8 + 0);"
+ "%[ret] = r9;"
+ : [ret]"=r"(ret)
+ : [idata]"r"(idata),
+ [off]"r"(off)
+ : "r7", "r8", "r9");
+ return ret;
+}
+
+SEC("socket")
+__success
+__retval(0)
+int ldx_is_ok_bad_addr(void *ctx)
+{
+ char *p;
+
+ if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID))
+ return 42;
+
+ p = bpf_rdonly_cast(0, 0);
+ return p[0x7fff];
+}
+
+SEC("socket")
+__success
+__retval(1)
+int ldx_is_ok_good_addr(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ return *p;
+}
+
+SEC("socket")
+__success
+int offset_not_tracked(void *ctx)
+{
+ int *p, i, s;
+
+ p = bpf_rdonly_cast(0, 0);
+ s = 0;
+ bpf_for(i, 0, 1000 * 1000 * 1000) {
+ p++;
+ s += *p;
+ }
+ return s;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int stx_not_ok(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ *p = 1;
+ return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_not_ok(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ __sync_fetch_and_add(p, 1);
+ return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_rmw_not_ok(void *ctx)
+{
+ long v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ return __sync_val_compare_and_swap(p, 0, 42);
+}
+
+SEC("socket")
+__failure
+__msg("invalid access to memory, mem_size=0 off=0 size=4")
+__msg("R1 min value is outside of the allowed memory range")
+int kfunc_param_not_ok(void *ctx)
+{
+ int *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ bpf_kfunc_trusted_num_test(p);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure
+__msg("R1 type=rdonly_untrusted_mem expected=")
+int helper_param_not_ok(void *ctx)
+{
+ char *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ /*
+ * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do,
+ * the most permissive constraint
+ */
+ bpf_copy_from_user(p, 0, (void *)42);
+ return 0;
+}
+
+static __noinline u64 *get_some_addr(void)
+{
+ if (bpf_get_prandom_u32())
+ return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock));
+ else
+ return bpf_rdonly_cast(0, 0);
+}
+
+SEC("socket")
+__success
+__retval(0)
+int mixed_mem_type(void *ctx)
+{
+ u64 *p;
+
+ /* Try to avoid compiler hoisting load to if branches by using __noinline func. */
+ p = get_some_addr();
+ return *p;
+}
+
+__attribute__((__aligned__(8)))
+u8 global[] = {
+ 0x11, 0x22, 0x33, 0x44,
+ 0x55, 0x66, 0x77, 0x88,
+ 0x99
+};
+
+__always_inline
+static u64 combine(void *p)
+{
+ u64 acc;
+
+ acc = 0;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ acc |= (*(u64 *)p >> 56) << 24;
+ acc |= (*(u32 *)p >> 24) << 16;
+ acc |= (*(u16 *)p >> 8) << 8;
+ acc |= *(u8 *)p;
+#else
+ acc |= (*(u64 *)p & 0xff) << 24;
+ acc |= (*(u32 *)p & 0xff) << 16;
+ acc |= (*(u16 *)p & 0xff) << 8;
+ acc |= *(u8 *)p;
+#endif
+ return acc;
+}
+
+SEC("socket")
+__retval(0x88442211)
+int diff_size_access(void *ctx)
+{
+ return combine(bpf_rdonly_cast(&global, 0));
+}
+
+SEC("socket")
+__retval(0x99553322)
+int misaligned_access(void *ctx)
+{
+ return combine(bpf_rdonly_cast(&global, 0) + 1);
+}
+
+__weak int return_one(void)
+{
+ return 1;
+}
+
+SEC("socket")
+__success
+__retval(1)
+int null_check(void *ctx)
+{
+ int *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ if (p == 0)
+ /* make this a function call to avoid compiler
+ * moving r0 assignment before check.
+ */
+ return return_one();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
new file mode 100644
index 000000000000..d4eef0cbadb9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+int sk_index;
+int redirect_idx;
+int trace_port;
+int helper_ret;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+ __uint(max_entries, 100);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int mptcp_sockmap_inject(struct bpf_sock_ops *skops)
+{
+ struct bpf_sock *sk;
+
+ /* only accept specified connection */
+ if (skops->local_port != trace_port ||
+ skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
+ return 1;
+
+ sk = skops->sk;
+ if (!sk)
+ return 1;
+
+ /* update sk handler */
+ helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST);
+
+ return 1;
+}
+
+SEC("sk_skb/stream_verdict")
+int mptcp_sockmap_redirect(struct __sk_buff *skb)
+{
+ /* redirect skb to the sk under sock_map[redirect_idx] */
+ return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index 70302477e326..41389e579578 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -117,7 +117,7 @@ int _getsockopt_subflow(struct bpf_sockopt *ctx)
return 1;
msk = bpf_core_cast(sk, struct mptcp_sock);
- if (msk->pm.subflows != 1) {
+ if (msk->pm.extra_subflows != 1) {
ctx->retval = -1;
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/net_timestamping.c b/tools/testing/selftests/bpf/progs/net_timestamping.c
new file mode 100644
index 000000000000..b4c2f0f2be11
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/net_timestamping.c
@@ -0,0 +1,248 @@
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include <errno.h>
+
+__u32 monitored_pid = 0;
+
+int nr_active;
+int nr_snd;
+int nr_passive;
+int nr_sched;
+int nr_txsw;
+int nr_ack;
+
+struct sk_stg {
+ __u64 sendmsg_ns; /* record ts when sendmsg is called */
+};
+
+struct sk_tskey {
+ u64 cookie;
+ u32 tskey;
+};
+
+struct delay_info {
+ u64 sendmsg_ns; /* record ts when sendmsg is called */
+ u32 sched_delay; /* SCHED_CB - sendmsg_ns */
+ u32 snd_sw_delay; /* SND_SW_CB - SCHED_CB */
+ u32 ack_delay; /* ACK_CB - SND_SW_CB */
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct sk_stg);
+} sk_stg_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct sk_tskey);
+ __type(value, struct delay_info);
+ __uint(max_entries, 1024);
+} time_map SEC(".maps");
+
+static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */
+
+extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym;
+
+static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected)
+{
+ int tmp, new = SK_BPF_CB_TX_TIMESTAMPING;
+ int opt = SK_BPF_CB_FLAGS;
+ int level = SOL_SOCKET;
+
+ if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected)
+ return 1;
+
+ if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected ||
+ (!expected && tmp != new))
+ return 1;
+
+ return 0;
+}
+
+static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk)
+{
+ if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP))
+ return true;
+ return false;
+}
+
+static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops)
+{
+ u8 opt[3] = {0};
+ int load_flags = 0;
+ int ret;
+
+ ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags);
+ if (ret != -EOPNOTSUPP)
+ return true;
+
+ return false;
+}
+
+static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops)
+{
+ int ret;
+
+ ret = bpf_sock_ops_cb_flags_set(skops, 0);
+ if (ret != -EOPNOTSUPP)
+ return true;
+
+ return false;
+}
+
+/* In the timestamping callbacks, we're not allowed to call the following
+ * BPF CALLs for the safety concern. Return false if expected.
+ */
+static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops,
+ const struct sock *sk)
+{
+ if (bpf_test_access_sockopt(skops, sk))
+ return true;
+
+ if (bpf_test_access_load_hdr_opt(skops))
+ return true;
+
+ if (bpf_test_access_cb_flags_set(skops))
+ return true;
+
+ return false;
+}
+
+static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk)
+{
+ struct bpf_sock_ops_kern *skops_kern;
+ u64 timestamp = bpf_ktime_get_ns();
+ struct skb_shared_info *shinfo;
+ struct delay_info dinfo = {0};
+ struct sk_tskey key = {0};
+ struct delay_info *val;
+ struct sk_buff *skb;
+ struct sk_stg *stg;
+ u64 prior_ts, delay;
+
+ if (bpf_test_access_bpf_calls(skops, sk))
+ return false;
+
+ skops_kern = bpf_cast_to_kern_ctx(skops);
+ skb = skops_kern->skb;
+ shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info);
+
+ key.cookie = bpf_get_socket_cookie(skops);
+ if (!key.cookie)
+ return false;
+
+ if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) {
+ stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0);
+ if (!stg)
+ return false;
+ dinfo.sendmsg_ns = stg->sendmsg_ns;
+ bpf_sock_ops_enable_tx_tstamp(skops_kern, 0);
+ key.tskey = shinfo->tskey;
+ if (!key.tskey)
+ return false;
+ bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY);
+ return true;
+ }
+
+ key.tskey = shinfo->tskey;
+ if (!key.tskey)
+ return false;
+
+ val = bpf_map_lookup_elem(&time_map, &key);
+ if (!val)
+ return false;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
+ val->sched_delay = timestamp - val->sendmsg_ns;
+ delay = val->sched_delay;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
+ prior_ts = val->sched_delay + val->sendmsg_ns;
+ val->snd_sw_delay = timestamp - prior_ts;
+ delay = val->snd_sw_delay;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_ACK_CB:
+ prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns;
+ val->ack_delay = timestamp - prior_ts;
+ delay = val->ack_delay;
+ break;
+ }
+
+ if (delay >= delay_tolerance_nsec)
+ return false;
+
+ /* Since it's the last one, remove from the map after latency check */
+ if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB)
+ bpf_map_delete_elem(&time_map, &key);
+
+ return true;
+}
+
+SEC("fentry/tcp_sendmsg_locked")
+int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg,
+ size_t size)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ u64 timestamp = bpf_ktime_get_ns();
+ u32 flag = sk->sk_bpf_cb_flags;
+ struct sk_stg *stg;
+
+ if (pid != monitored_pid || !flag)
+ return 0;
+
+ stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!stg)
+ return 0;
+
+ stg->sendmsg_ns = timestamp;
+ nr_snd += 1;
+ return 0;
+}
+
+SEC("sockops")
+int skops_sockopt(struct bpf_sock_ops *skops)
+{
+ struct bpf_sock *bpf_sk = skops->sk;
+ const struct sock *sk;
+
+ if (!bpf_sk)
+ return 1;
+
+ sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
+ if (!sk)
+ return 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ nr_active += !bpf_test_sockopt(skops, sk, 0);
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_snd += 1;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_sched += 1;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_txsw += 1;
+ break;
+ case BPF_SOCK_OPS_TSTAMP_ACK_CB:
+ if (bpf_test_delay(skops, sk))
+ nr_ack += 1;
+ break;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
index c6edf8dbefeb..94040714af18 100644
--- a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -28,6 +28,7 @@ struct {
} sock_map SEC(".maps");
int tcx_init_netns_cookie, tcx_netns_cookie;
+int cgroup_skb_init_netns_cookie, cgroup_skb_netns_cookie;
SEC("sockops")
int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
@@ -91,4 +92,12 @@ int get_netns_cookie_tcx(struct __sk_buff *skb)
return TCX_PASS;
}
+SEC("cgroup_skb/ingress")
+int get_netns_cookie_cgroup_skb(struct __sk_buff *skb)
+{
+ cgroup_skb_init_netns_cookie = bpf_get_netns_cookie(NULL);
+ cgroup_skb_netns_cookie = bpf_get_netns_cookie(skb);
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c
index 6c5797bf0ead..7d04254e61f1 100644
--- a/tools/testing/selftests/bpf/progs/preempt_lock.c
+++ b/tools/testing/selftests/bpf/progs/preempt_lock.c
@@ -134,7 +134,7 @@ int __noinline preempt_global_subprog(void)
}
SEC("?tc")
-__failure __msg("global function calls are not allowed with preemption disabled")
+__success
int preempt_global_subprog_test(struct __sk_buff *ctx)
{
preempt_disable();
@@ -143,4 +143,70 @@ int preempt_global_subprog_test(struct __sk_buff *ctx)
return 0;
}
+int __noinline
+global_subprog(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_sleepable_helper_subprog(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_sleepable_kfunc_subprog(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_subprog_calling_sleepable_global(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c
new file mode 100644
index 000000000000..cfc1f48e0d28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/prepare.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map SEC(".maps");
+
+SEC("cgroup_skb/egress")
+int program(struct __sk_buff *skb)
+{
+ err = 0;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_freplace_prog.c b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
new file mode 100644
index 000000000000..ccf1b04010ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("freplace/xdp_prog1")
+int new_xdp_prog2(struct xdp_md *xd)
+{
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
index 3c7b2b618c8a..725e29595079 100644
--- a/tools/testing/selftests/bpf/progs/priv_prog.c
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -6,8 +6,8 @@
char _license[] SEC("license") = "GPL";
-SEC("kprobe")
-int kprobe_prog(void *ctx)
+SEC("xdp")
+int xdp_prog1(struct xdp_md *xdp)
{
- return 1;
+ return XDP_DROP;
}
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
new file mode 100644
index 000000000000..a5a8f08ac8fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void __kfunc_btf_root(void)
+{
+ bpf_kfunc_st_ops_inc10(NULL);
+}
+
+static __noinline __used int subprog(struct st_ops_args *args)
+{
+ args->a += 1;
+ return args->a;
+}
+
+__success
+/* prologue */
+__xlated("0: r8 = r1")
+__xlated("1: r1 = 0")
+__xlated("2: call kernel-function")
+__xlated("3: if r0 != 0x0 goto pc+5")
+__xlated("4: r6 = *(u64 *)(r8 +0)")
+__xlated("5: r7 = *(u64 *)(r6 +0)")
+__xlated("6: r7 += 1000")
+__xlated("7: *(u64 *)(r6 +0) = r7")
+__xlated("8: goto pc+2")
+__xlated("9: r1 = r0")
+__xlated("10: call kernel-function")
+__xlated("11: r1 = r8")
+/* save __u64 *ctx to stack */
+__xlated("12: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("13: r1 = *(u64 *)(r1 +0)")
+__xlated("14: r6 = r1")
+__xlated("15: call kernel-function")
+__xlated("16: r1 = r6")
+__xlated("17: call pc+")
+/* epilogue */
+__xlated("18: r1 = 0")
+__xlated("19: r6 = 0")
+__xlated("20: call kernel-function")
+__xlated("21: if r0 != 0x0 goto pc+6")
+__xlated("22: r1 = *(u64 *)(r10 -8)")
+__xlated("23: r1 = *(u64 *)(r1 +0)")
+__xlated("24: r6 = *(u64 *)(r1 +0)")
+__xlated("25: r6 += 10000")
+__xlated("26: *(u64 *)(r1 +0) = r6")
+__xlated("27: goto pc+2")
+__xlated("28: r1 = r0")
+__xlated("29: call kernel-function")
+__xlated("30: r0 = r6")
+__xlated("31: r0 *= 2")
+__xlated("32: exit")
+SEC("struct_ops/test_pro_epilogue")
+__naked int test_kfunc_pro_epilogue(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r1 +0);"
+ "r6 = r1;"
+ "call %[bpf_kfunc_st_ops_inc10];"
+ "r1 = r6;"
+ "call subprog;"
+ "exit;"
+ :
+ : __imm(bpf_kfunc_st_ops_inc10)
+ : __clobber_all);
+}
+
+SEC("syscall")
+__retval(22022) /* (PROLOGUE_A [1000] + KFUNC_INC10 + SUBPROG_A [1] + EPILOGUE_A [10000]) * 2 */
+int syscall_pro_epilogue(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_pro_epilogue(&args);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops pro_epilogue_with_kfunc = {
+ .test_pro_epilogue = (void *)test_kfunc_pro_epilogue,
+};
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 8bd1ebd7d6af..813143b4985d 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -223,7 +223,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) {
payload += filepart_length;
}
- cgroup_node = BPF_CORE_READ(cgroup_node, parent);
+ cgroup_node = BPF_CORE_READ(cgroup_node, __parent);
}
return payload;
}
diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c
index 5927054b6dd9..efa416f53968 100644
--- a/tools/testing/selftests/bpf/progs/raw_tp_null.c
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c
@@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL";
int tid;
int i;
-SEC("tp_btf/bpf_testmod_test_raw_tp_null")
+SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp")
int BPF_PROG(test_raw_tp_null, struct sk_buff *skb)
{
struct task_struct *task = bpf_get_current_task_btf();
diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
index 38d669957bf1..0d58114a4955 100644
--- a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
@@ -8,7 +8,7 @@
char _license[] SEC("license") = "GPL";
/* Ensure module parameter has PTR_MAYBE_NULL */
-SEC("tp_btf/bpf_testmod_test_raw_tp_null")
+SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp")
__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) {
asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all);
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
index a3620c15c136..49fe93d7e059 100644
--- a/tools/testing/selftests/bpf/progs/rbtree.c
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -61,19 +61,19 @@ static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock)
}
m->key = 1;
- bpf_spin_lock(&glock);
- bpf_rbtree_add(&groot, &n->node, less);
- bpf_rbtree_add(&groot, &m->node, less);
- bpf_spin_unlock(&glock);
+ bpf_spin_lock(lock);
+ bpf_rbtree_add(root, &n->node, less);
+ bpf_rbtree_add(root, &m->node, less);
+ bpf_spin_unlock(lock);
n = bpf_obj_new(typeof(*n));
if (!n)
return 3;
n->key = 3;
- bpf_spin_lock(&glock);
- bpf_rbtree_add(&groot, &n->node, less);
- bpf_spin_unlock(&glock);
+ bpf_spin_lock(lock);
+ bpf_rbtree_add(root, &n->node, less);
+ bpf_spin_unlock(lock);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index dbd5eee8e25e..4acb6af2dfe3 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -69,11 +69,11 @@ long rbtree_api_nolock_first(void *ctx)
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__retval(0)
long rbtree_api_remove_unadded_node(void *ctx)
{
struct node_data *n, *m;
- struct bpf_rb_node *res;
+ struct bpf_rb_node *res_n, *res_m;
n = bpf_obj_new(typeof(*n));
if (!n)
@@ -88,19 +88,20 @@ long rbtree_api_remove_unadded_node(void *ctx)
bpf_spin_lock(&glock);
bpf_rbtree_add(&groot, &n->node, less);
- /* This remove should pass verifier */
- res = bpf_rbtree_remove(&groot, &n->node);
- n = container_of(res, struct node_data, node);
+ res_n = bpf_rbtree_remove(&groot, &n->node);
- /* This remove shouldn't, m isn't in an rbtree */
- res = bpf_rbtree_remove(&groot, &m->node);
- m = container_of(res, struct node_data, node);
+ res_m = bpf_rbtree_remove(&groot, &m->node);
bpf_spin_unlock(&glock);
- if (n)
- bpf_obj_drop(n);
- if (m)
- bpf_obj_drop(m);
+ bpf_obj_drop(m);
+ if (res_n)
+ bpf_obj_drop(container_of(res_n, struct node_data, node));
+ if (res_m) {
+ bpf_obj_drop(container_of(res_m, struct node_data, node));
+ /* m was not added to the rbtree */
+ return 2;
+ }
+
return 0;
}
@@ -178,7 +179,7 @@ err_out:
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
long rbtree_api_add_release_unlock_escape(void *ctx)
{
struct node_data *n;
@@ -202,7 +203,7 @@ long rbtree_api_add_release_unlock_escape(void *ctx)
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
long rbtree_api_first_release_unlock_escape(void *ctx)
{
struct bpf_rb_node *res;
diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c
new file mode 100644
index 000000000000..b05565d1db0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_search.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_refcount ref;
+ struct bpf_rb_node r0;
+ struct bpf_rb_node r1;
+ int key0;
+ int key1;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock0;
+private(A) struct bpf_rb_root groot0 __contains(node_data, r0);
+
+private(B) struct bpf_spin_lock glock1;
+private(B) struct bpf_rb_root groot1 __contains(node_data, r1);
+
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+static bool less0(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r0);
+ node_b = rb_entry(b, struct node_data, r0);
+
+ return node_a->key0 < node_b->key0;
+}
+
+static bool less1(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r1);
+ node_b = rb_entry(b, struct node_data, r1);
+
+ return node_a->key1 < node_b->key1;
+}
+
+SEC("syscall")
+__retval(0)
+long rbtree_search(void *ctx)
+{
+ struct bpf_rb_node *rb_n, *rb_m, *gc_ns[NR_NODES];
+ long lookup_key = NR_NODES / 2;
+ struct node_data *n, *m;
+ int i, nr_gc = 0;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+
+ m = bpf_refcount_acquire(n);
+
+ n->key0 = i;
+ m->key1 = i;
+
+ bpf_spin_lock(&glock0);
+ bpf_rbtree_add(&groot0, &n->r0, less0);
+ bpf_spin_unlock(&glock0);
+
+ bpf_spin_lock(&glock1);
+ bpf_rbtree_add(&groot1, &m->r1, less1);
+ bpf_spin_unlock(&glock1);
+ }
+
+ n = NULL;
+ bpf_spin_lock(&glock0);
+ rb_n = bpf_rbtree_root(&groot0);
+ while (can_loop) {
+ if (!rb_n) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ n = rb_entry(rb_n, struct node_data, r0);
+ if (lookup_key == n->key0)
+ break;
+ if (nr_gc < NR_NODES)
+ gc_ns[nr_gc++] = rb_n;
+ if (lookup_key < n->key0)
+ rb_n = bpf_rbtree_left(&groot0, rb_n);
+ else
+ rb_n = bpf_rbtree_right(&groot0, rb_n);
+ }
+
+ if (!n || lookup_key != n->key0) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ gc_ns[i] = bpf_rbtree_remove(&groot0, rb_n);
+ }
+
+ m = bpf_refcount_acquire(n);
+ bpf_spin_unlock(&glock0);
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ if (rb_n) {
+ n = rb_entry(rb_n, struct node_data, r0);
+ bpf_obj_drop(n);
+ }
+ }
+
+ if (!m)
+ return __LINE__;
+
+ bpf_spin_lock(&glock1);
+ rb_m = bpf_rbtree_remove(&groot1, &m->r1);
+ bpf_spin_unlock(&glock1);
+ bpf_obj_drop(m);
+ if (!rb_m)
+ return __LINE__;
+ bpf_obj_drop(rb_entry(rb_m, struct node_data, r1));
+
+ return 0;
+}
+
+#define TEST_ROOT(dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_root_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+#define TEST_LR(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ struct node_data *n; \
+ __u64 jiffies = 0; \
+ \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (!rb_n) { \
+ bpf_spin_unlock(&glock0); \
+ return 1; \
+ } \
+ n = rb_entry(rb_n, struct node_data, r0); \
+ n = bpf_refcount_acquire(n); \
+ bpf_spin_unlock(&glock0); \
+ if (!n) \
+ return 1; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_##op(&groot0, &n->r0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+/*
+ * Use a separate MSG macro instead of passing to TEST_XXX(..., MSG)
+ * to ensure the message itself is not in the bpf prog lineinfo
+ * which the verifier includes in its log.
+ * Otherwise, the test_loader will incorrectly match the prog lineinfo
+ * instead of the log generated by the verifier.
+ */
+#define MSG "call bpf_rbtree_root{{.+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_ROOT(true)
+#undef MSG
+#define MSG "call bpf_rbtree_{{(left|right).+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_LR(left, true)
+TEST_LR(right, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_rb_root"
+TEST_ROOT(false)
+TEST_LR(left, false)
+TEST_LR(right, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index ab3a532b7dd6..d70c28824bbe 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -16,10 +16,11 @@ struct {
__type(value, long);
} map_a SEC(".maps");
-__u32 user_data, key_serial, target_pid;
+__u32 user_data, target_pid;
+__s32 key_serial;
__u64 flags, task_storage_val, cgroup_id;
-struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
void bpf_key_put(struct bpf_key *key) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
@@ -242,7 +243,8 @@ out:
}
SEC("?lsm.s/bpf")
-int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size,
+ bool kernel)
{
struct bpf_key *bkey;
@@ -276,6 +278,46 @@ out:
return 0;
}
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region_unbalanced_1(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* nested rcu read lock regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region_unbalanced_2(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* nested rcu read lock regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
int task_trusted_non_rcuptr(void *ctx)
{
@@ -439,3 +481,61 @@ int rcu_read_lock_global_subprog_unlock(void *ctx)
ret += global_subprog_unlock(ret);
return 0;
}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_helper_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_sleepable_helper_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_kfunc_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_sleepable_kfunc_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_global_subprog_indirect(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_subprog_calling_sleepable_global(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c
new file mode 100644
index 000000000000..405adbe5e8b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid = 0;
+
+char xattr_value[64];
+static const char expected_value_a[] = "bpf_selftest_value_a";
+static const char expected_value_b[] = "bpf_selftest_value_b";
+bool found_value_a;
+bool found_value_b;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup_subsys_state *css, *tmp;
+ struct bpf_dynptr value_ptr;
+ struct cgroup *cgrp;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ bpf_rcu_read_lock();
+ cgrp = bpf_cgroup_from_id(cgrp_id);
+ if (!cgrp) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ css = &cgrp->self;
+ bpf_dynptr_from_mem(xattr_value, sizeof(xattr_value), 0, &value_ptr);
+ bpf_for_each(css, tmp, css, BPF_CGROUP_ITER_ANCESTORS_UP) {
+ int ret;
+
+ ret = bpf_cgroup_read_xattr(tmp->cgroup, "user.bpf_test",
+ &value_ptr);
+ if (ret < 0)
+ continue;
+
+ if (ret == sizeof(expected_value_a) &&
+ !bpf_strncmp(xattr_value, sizeof(expected_value_a), expected_value_a))
+ found_value_a = true;
+ if (ret == sizeof(expected_value_b) &&
+ !bpf_strncmp(xattr_value, sizeof(expected_value_b), expected_value_b))
+ found_value_b = true;
+ }
+
+ bpf_rcu_read_unlock();
+ bpf_cgroup_release(cgrp);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
index 39ebef430059..395591374d4f 100644
--- a/tools/testing/selftests/bpf/progs/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -8,14 +8,16 @@
int target_pid = 0;
void *user_ptr = 0;
-int read_ret[9];
+int read_ret[10];
char _license[] SEC("license") = "GPL";
/*
- * This is the only kfunc, the others are helpers
+ * These are the kfuncs, the others are helpers
*/
int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym;
+int bpf_copy_from_user_task_str(void *dst, u32, const void *,
+ struct task_struct *, u64) __weak __ksym;
SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int do_probe_read(void *ctx)
@@ -47,6 +49,11 @@ int do_copy_from_user(void *ctx)
read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
bpf_get_current_task_btf(), 0);
read_ret[8] = bpf_copy_from_user_str((char *)buf, sizeof(buf), user_ptr, 0);
+ read_ret[9] = bpf_copy_from_user_task_str((char *)buf,
+ sizeof(buf),
+ user_ptr,
+ bpf_get_current_task_btf(),
+ 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
index 893a4fdb4b6e..1aca85d86aeb 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -568,4 +568,64 @@ err_out:
return 0;
}
+private(kptr_ref) u64 ref;
+
+static int probe_read_refcount(void)
+{
+ u32 refcount;
+
+ bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
+ return refcount;
+}
+
+static int __insert_in_list(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct node_data __kptr **node)
+{
+ struct node_data *node_new, *node_ref, *node_old;
+
+ node_new = bpf_obj_new(typeof(*node_new));
+ if (!node_new)
+ return -1;
+
+ node_ref = bpf_refcount_acquire(node_new);
+ node_old = bpf_kptr_xchg(node, node_new);
+ if (node_old) {
+ bpf_obj_drop(node_old);
+ bpf_obj_drop(node_ref);
+ return -2;
+ }
+
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &node_ref->l);
+ ref = (u64)(void *) &node_ref->ref;
+ bpf_spin_unlock(lock);
+ return probe_read_refcount();
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} percpu_hash SEC(".maps");
+
+SEC("tc")
+int percpu_hash_refcount_leak(void *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&percpu_hash, &key);
+ if (!v)
+ return 0;
+
+ return __insert_in_list(&head, &lock, &v->node);
+}
+
+SEC("tc")
+int check_percpu_hash_refcount(void *ctx)
+{
+ return probe_read_refcount();
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c
new file mode 100644
index 000000000000..22c4fb8b9266
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define EDEADLK 35
+#define ETIMEDOUT 110
+
+struct arr_elem {
+ struct bpf_res_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+struct bpf_res_spin_lock lockA __hidden SEC(".data.A");
+struct bpf_res_spin_lock lockB __hidden SEC(".data.B");
+
+SEC("tc")
+int res_spin_lock_test(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem1, *elem2;
+ int r;
+
+ elem1 = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem1)
+ return -1;
+ elem2 = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem2)
+ return -1;
+
+ r = bpf_res_spin_lock(&elem1->lock);
+ if (r)
+ return r;
+ r = bpf_res_spin_lock(&elem2->lock);
+ if (!r) {
+ bpf_res_spin_unlock(&elem2->lock);
+ bpf_res_spin_unlock(&elem1->lock);
+ return -1;
+ }
+ bpf_res_spin_unlock(&elem1->lock);
+ return r != -EDEADLK;
+}
+
+SEC("tc")
+int res_spin_lock_test_AB(struct __sk_buff *ctx)
+{
+ int r;
+
+ r = bpf_res_spin_lock(&lockA);
+ if (r)
+ return !r;
+ /* Only unlock if we took the lock. */
+ if (!bpf_res_spin_lock(&lockB))
+ bpf_res_spin_unlock(&lockB);
+ bpf_res_spin_unlock(&lockA);
+ return 0;
+}
+
+int err;
+
+SEC("tc")
+int res_spin_lock_test_BA(struct __sk_buff *ctx)
+{
+ int r;
+
+ r = bpf_res_spin_lock(&lockB);
+ if (r)
+ return !r;
+ if (!bpf_res_spin_lock(&lockA))
+ bpf_res_spin_unlock(&lockA);
+ else
+ err = -EDEADLK;
+ bpf_res_spin_unlock(&lockB);
+ return err ?: 0;
+}
+
+SEC("tc")
+int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx)
+{
+ struct bpf_res_spin_lock *locks[48] = {};
+ struct arr_elem *e;
+ u64 time_beg, time;
+ int ret = 0, i;
+
+ _Static_assert(ARRAY_SIZE(((struct rqspinlock_held){}).locks) == 31,
+ "RES_NR_HELD assumed to be 31");
+
+ for (i = 0; i < 34; i++) {
+ int key = i;
+
+ /* We cannot pass in i as it will get spilled/filled by the compiler and
+ * loses bounds in verifier state.
+ */
+ e = bpf_map_lookup_elem(&arrmap, &key);
+ if (!e)
+ return 1;
+ locks[i] = &e->lock;
+ }
+
+ for (; i < 48; i++) {
+ int key = i - 2;
+
+ /* We cannot pass in i as it will get spilled/filled by the compiler and
+ * loses bounds in verifier state.
+ */
+ e = bpf_map_lookup_elem(&arrmap, &key);
+ if (!e)
+ return 1;
+ locks[i] = &e->lock;
+ }
+
+ time_beg = bpf_ktime_get_ns();
+ for (i = 0; i < 34; i++) {
+ if (bpf_res_spin_lock(locks[i]))
+ goto end;
+ }
+
+ /* Trigger AA, after exhausting entries in the held lock table. This
+ * time, only the timeout can save us, as AA detection won't succeed.
+ */
+ ret = bpf_res_spin_lock(locks[34]);
+ if (!ret) {
+ bpf_res_spin_unlock(locks[34]);
+ ret = 1;
+ goto end;
+ }
+
+ ret = ret != -ETIMEDOUT ? 2 : 0;
+
+end:
+ for (i = i - 1; i >= 0; i--)
+ bpf_res_spin_unlock(locks[i]);
+ time = bpf_ktime_get_ns() - time_beg;
+ /* Time spent should be easily above our limit (1/4 s), since AA
+ * detection won't be expedited due to lack of held lock entry.
+ */
+ return ret ?: (time > 1000000000 / 4 ? 0 : 1);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c
new file mode 100644
index 000000000000..330682a88c16
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct arr_elem {
+ struct bpf_res_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+long value;
+
+struct bpf_spin_lock lock __hidden SEC(".data.A");
+struct bpf_res_spin_lock res_lock __hidden SEC(".data.B");
+
+SEC("?tc")
+__failure __msg("point to map value or allocated object")
+int res_spin_lock_arg(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_res_spin_lock((struct bpf_res_spin_lock *)bpf_core_cast(&elem->lock, struct __sk_buff));
+ bpf_res_spin_lock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("AA deadlock detected")
+int res_spin_lock_AA(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_res_spin_lock(&elem->lock);
+ bpf_res_spin_lock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("AA deadlock detected")
+int res_spin_lock_cond_AA(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&elem->lock))
+ return 0;
+ bpf_res_spin_lock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_mismatch_1(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&elem->lock))
+ return 0;
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_mismatch_2(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&res_lock))
+ return 0;
+ bpf_res_spin_unlock(&elem->lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_irq_mismatch_1(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ unsigned long f1;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_local_irq_save(&f1);
+ if (bpf_res_spin_lock(&res_lock))
+ return 0;
+ bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_irq_mismatch_2(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ unsigned long f1;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&res_lock, &f1))
+ return 0;
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+}
+
+SEC("?tc")
+__success
+int res_spin_lock_ooo(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock(&res_lock))
+ return 0;
+ if (bpf_res_spin_lock(&elem->lock)) {
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+ }
+ bpf_res_spin_unlock(&elem->lock);
+ bpf_res_spin_unlock(&res_lock);
+ return 0;
+}
+
+SEC("?tc")
+__success
+int res_spin_lock_ooo_irq(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ unsigned long f1, f2;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&res_lock, &f1))
+ return 0;
+ if (bpf_res_spin_lock_irqsave(&elem->lock, &f2)) {
+ bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+ /* We won't have a unreleased IRQ flag error here. */
+ return 0;
+ }
+ bpf_res_spin_unlock_irqrestore(&elem->lock, &f2);
+ bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+ return 0;
+}
+
+struct bpf_res_spin_lock lock1 __hidden SEC(".data.OO1");
+struct bpf_res_spin_lock lock2 __hidden SEC(".data.OO2");
+
+SEC("?tc")
+__failure __msg("bpf_res_spin_unlock cannot be out of order")
+int res_spin_lock_ooo_unlock(struct __sk_buff *ctx)
+{
+ if (bpf_res_spin_lock(&lock1))
+ return 0;
+ if (bpf_res_spin_lock(&lock2)) {
+ bpf_res_spin_unlock(&lock1);
+ return 0;
+ }
+ bpf_res_spin_unlock(&lock1);
+ bpf_res_spin_unlock(&lock2);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("off 1 doesn't point to 'struct bpf_res_spin_lock' that is at 0")
+int res_spin_lock_bad_off(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem)
+ return 0;
+ bpf_res_spin_lock((void *)&elem->lock + 1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 doesn't have constant offset. bpf_res_spin_lock has to be at the constant offset")
+int res_spin_lock_var_off(struct __sk_buff *ctx)
+{
+ struct arr_elem *elem;
+ u64 val = value;
+
+ elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!elem) {
+ // FIXME: Only inline assembly use in assert macro doesn't emit
+ // BTF definition.
+ bpf_throw(0);
+ return 0;
+ }
+ bpf_assert_range(val, 0, 40);
+ bpf_res_spin_lock((void *)&value + val);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("map 'res_spin.bss' has no valid bpf_res_spin_lock")
+int res_spin_lock_no_lock_map(struct __sk_buff *ctx)
+{
+ bpf_res_spin_lock((void *)&value + 1);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("local 'kptr' has no valid bpf_res_spin_lock")
+int res_spin_lock_no_lock_kptr(struct __sk_buff *ctx)
+{
+ struct { int i; } *p = bpf_obj_new(typeof(*p));
+
+ if (!p)
+ return 0;
+ bpf_res_spin_lock((void *)p);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
index 6a468496f539..d96c7d1e8fc2 100644
--- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
+#include <stdbool.h>
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
@@ -14,9 +15,11 @@ struct {
const volatile int batch_cnt = 0;
const volatile long use_output = 0;
+const volatile bool bench_producer = false;
long sample_val = 42;
long dropped __attribute__((aligned(128))) = 0;
+long hits __attribute__((aligned(128))) = 0;
const volatile long wakeup_data_size = 0;
@@ -24,6 +27,9 @@ static __always_inline long get_flags()
{
long sz;
+ if (bench_producer)
+ return BPF_RB_NO_WAKEUP;
+
if (!wakeup_data_size)
return 0;
@@ -47,6 +53,8 @@ int bench_ringbuf(void *ctx)
*sample = sample_val;
flags = get_flags();
bpf_ringbuf_submit(sample, flags);
+ if (bench_producer)
+ __sync_add_and_fetch(&hits, 1);
}
}
} else {
@@ -55,6 +63,9 @@ int bench_ringbuf(void *ctx)
if (bpf_ringbuf_output(&ringbuf, &sample_val,
sizeof(sample_val), flags))
__sync_add_and_fetch(&dropped, 1);
+ else if (bench_producer)
+ __sync_add_and_fetch(&hits, 1);
+
}
}
return 0;
diff --git a/tools/testing/selftests/bpf/progs/security_bpf_map.c b/tools/testing/selftests/bpf/progs/security_bpf_map.c
new file mode 100644
index 000000000000..7216b3450e96
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/security_bpf_map.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define EPERM 1 /* Operation not permitted */
+
+/* From include/linux/mm.h. */
+#define FMODE_WRITE 0x2
+
+struct map;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} prot_status_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} prot_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} not_prot_map SEC(".maps");
+
+SEC("fmod_ret/security_bpf_map")
+int BPF_PROG(fmod_bpf_map, struct bpf_map *map, int fmode)
+{
+ __u32 key = 0;
+ __u32 *status_ptr = bpf_map_lookup_elem(&prot_status_map, &key);
+
+ if (!status_ptr || !*status_ptr)
+ return 0;
+
+ if (map == &prot_map) {
+ /* Allow read-only access */
+ if (fmode & FMODE_WRITE)
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+/*
+ * This program keeps references to maps. This is needed to prevent
+ * optimizing them out.
+ */
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_dummy1, int a)
+{
+ __u32 key = 0;
+ __u32 val1 = a;
+ __u32 val2 = a + 1;
+
+ bpf_map_update_elem(&prot_map, &key, &val1, BPF_ANY);
+ bpf_map_update_elem(&not_prot_map, &key, &val2, BPF_ANY);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c
new file mode 100644
index 000000000000..ebaef28b2cb3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/set_global_vars.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include <stdbool.h>
+
+char _license[] SEC("license") = "GPL";
+
+typedef __s32 s32;
+typedef s32 i32;
+typedef __u8 u8;
+
+enum Enum { EA1 = 0, EA2 = 11, EA3 = 10 };
+enum Enumu64 {EB1 = 0llu, EB2 = 12llu };
+enum Enums64 { EC1 = 0ll, EC2 = 13ll };
+
+const volatile __s64 var_s64 = -1;
+const volatile __u64 var_u64 = 0;
+const volatile i32 var_s32 = -1;
+const volatile __u32 var_u32 = 0;
+const volatile __s16 var_s16 = -1;
+const volatile __u16 var_u16 = 0;
+const volatile __s8 var_s8 = -1;
+const volatile u8 var_u8 = 0;
+const volatile enum Enum var_ea = EA1;
+const volatile enum Enumu64 var_eb = EB1;
+const volatile enum Enums64 var_ec = EC1;
+const volatile bool var_b = false;
+const volatile i32 arr[32];
+const volatile enum Enum enum_arr[32];
+const volatile i32 three_d[47][19][17];
+const volatile i32 *ptr_arr[32];
+
+struct Struct {
+ int:16;
+ __u16 filler;
+ struct {
+ const __u16 filler2;
+ };
+ struct Struct2 {
+ __u16 filler;
+ volatile struct {
+ const int:1;
+ union {
+ const volatile u8 var_u8[3];
+ const volatile __s16 filler3;
+ const int:1;
+ s32 mat[7][5];
+ } u;
+ };
+ } struct2[2][4];
+};
+
+const volatile __u32 stru = 0; /* same prefix as below */
+const volatile struct Struct struct1[3];
+const volatile struct Struct struct11[11][7];
+
+struct Struct3 {
+ struct {
+ u8 var_u8_l;
+ };
+ struct {
+ struct {
+ u8 var_u8_h;
+ };
+ };
+};
+
+typedef struct Struct3 Struct3_t;
+
+union Union {
+ __u16 var_u16;
+ Struct3_t struct3;
+};
+
+const volatile union Union union1 = {.var_u16 = -1};
+
+SEC("socket")
+int test_set_globals(void *ctx)
+{
+ volatile __s8 a;
+
+ a = var_s64;
+ a = var_u64;
+ a = var_s32;
+ a = var_u32;
+ a = var_s16;
+ a = var_u16;
+ a = var_s8;
+ a = var_u8;
+ a = var_ea;
+ a = var_eb;
+ a = var_ec;
+ a = var_b;
+ a = struct1[2].struct2[1][2].u.var_u8[2];
+ a = union1.var_u16;
+ a = arr[3];
+ a = arr[EA2];
+ a = enum_arr[EC2];
+ a = three_d[31][7][EA2];
+ a = struct1[2].struct2[1][2].u.mat[5][3];
+ a = struct11[7][5].struct2[0][1].u.mat[3][0];
+
+ return a;
+}
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 6dd4318debbf..d330b1511979 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -61,6 +61,9 @@ static const struct sockopt_test sol_tcp_tests[] = {
{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
{ .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
.expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
+ { .opt = TCP_BPF_DELACK_MAX, .new = 30000, .expected = 30000, },
+ { .opt = TCP_BPF_RTO_MIN, .new = 30000, .expected = 30000, },
+ { .opt = TCP_RTO_MAX_MS, .new = 2000, .expected = 2000, },
{ .opt = 0, },
};
@@ -80,6 +83,14 @@ struct loop_ctx {
struct sock *sk;
};
+static bool sk_is_tcp(struct sock *sk)
+{
+ return (sk->__sk_common.skc_family == AF_INET ||
+ sk->__sk_common.skc_family == AF_INET6) &&
+ sk->sk_type == SOCK_STREAM &&
+ sk->sk_protocol == IPPROTO_TCP;
+}
+
static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
const struct sockopt_test *t,
int level)
@@ -88,6 +99,9 @@ static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
opt = t->opt;
+ if (opt == SO_TXREHASH && !sk_is_tcp(sk))
+ return 0;
+
if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)))
return 1;
/* kernel initialized txrehash to 255 */
diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..09a00d11ffcc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+extern int tcp_memory_per_cpu_fw_alloc __ksym;
+extern int udp_memory_per_cpu_fw_alloc __ksym;
+
+int nr_cpus;
+bool tcp_activated, udp_activated;
+long tcp_memory_allocated, udp_memory_allocated;
+
+struct sk_prot {
+ long *memory_allocated;
+ int *memory_per_cpu_fw_alloc;
+};
+
+static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx)
+{
+ int *memory_per_cpu_fw_alloc;
+
+ memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i);
+ if (memory_per_cpu_fw_alloc)
+ *sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc;
+
+ return 0;
+}
+
+static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc)
+{
+ struct sock *sk = bpf_core_cast(_sk, struct sock);
+ struct sk_prot sk_prot_ctx;
+ long memory_allocated;
+
+ /* net_aligned_data.{tcp,udp}_memory_allocated was not available. */
+ memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter;
+
+ sk_prot_ctx.memory_allocated = &memory_allocated;
+ sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc;
+
+ bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0);
+
+ return memory_allocated;
+}
+
+static void fentry_init_sock(struct sock *sk, bool *activated,
+ long *memory_allocated, int *memory_per_cpu_fw_alloc)
+{
+ if (!*activated)
+ return;
+
+ *memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
+ *activated = false;
+}
+
+SEC("fentry/tcp_init_sock")
+int BPF_PROG(fentry_tcp_init_sock, struct sock *sk)
+{
+ fentry_init_sock(sk, &tcp_activated,
+ &tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc);
+ return 0;
+}
+
+SEC("fentry/udp_init_sock")
+int BPF_PROG(fentry_udp_init_sock, struct sock *sk)
+{
+ fentry_init_sock(sk, &udp_activated,
+ &udp_memory_allocated, &udp_memory_per_cpu_fw_alloc);
+ return 0;
+}
+
+SEC("cgroup/sock_create")
+int sock_create(struct bpf_sock *ctx)
+{
+ int err, val = 1;
+
+ err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+ &val, sizeof(val));
+ if (err)
+ goto err;
+
+ val = 0;
+
+ err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+ &val, sizeof(val));
+ if (err)
+ goto err;
+
+ if (val != 1) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ return 1;
+
+err:
+ bpf_set_retval(err);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index 96531b0d9d55..77966ded5467 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -17,6 +17,13 @@ static bool ipv6_addr_loopback(const struct in6_addr *a)
a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0;
}
+static bool ipv4_addr_loopback(__be32 a)
+{
+ return a == bpf_ntohl(0x7f000001);
+}
+
+volatile const unsigned int sf;
+volatile const unsigned int ss;
volatile const __u16 ports[2];
unsigned int bucket[2];
@@ -26,22 +33,28 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
struct sock *sk = (struct sock *)ctx->sk_common;
struct inet_hashinfo *hinfo;
unsigned int hash;
+ __u64 sock_cookie;
struct net *net;
int idx;
if (!sk)
return 0;
+ sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
- if (sk->sk_family != AF_INET6 ||
- sk->sk_state != TCP_LISTEN ||
- !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ if (sk->sk_family != sf ||
+ (ss && sk->sk_state != ss) ||
+ (sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
return 0;
if (sk->sk_num == ports[0])
idx = 0;
else if (sk->sk_num == ports[1])
idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
else
return 0;
@@ -52,6 +65,28 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
hinfo = net->ipv4.tcp_death_row.hashinfo;
bucket[idx] = hash & hinfo->lhash2_mask;
bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
+
+ return 0;
+}
+
+volatile const __u64 destroy_cookie;
+
+SEC("iter/tcp")
+int iter_tcp_destroy(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = (struct sock_common *)ctx->sk_common;
+ __u64 sock_cookie;
+
+ if (!sk_common)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk_common);
+ if (sock_cookie != destroy_cookie)
+ return 0;
+
+ bpf_sock_destroy(sk_common);
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
return 0;
}
@@ -63,20 +98,26 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
{
struct sock *sk = (struct sock *)ctx->udp_sk;
struct udp_table *udptable;
+ __u64 sock_cookie;
int idx;
if (!sk)
return 0;
+ sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
- if (sk->sk_family != AF_INET6 ||
- !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ if (sk->sk_family != sf ||
+ (sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
return 0;
if (sk->sk_num == ports[0])
idx = 0;
else if (sk->sk_num == ports[1])
idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
else
return 0;
@@ -84,6 +125,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
udptable = sk->sk_net.net->ipv4.udp_table;
bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask;
bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
new file mode 100644
index 000000000000..a96c8150d7f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 16384);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+extern bool CONFIG_UNWINDER_ORC __kconfig __weak;
+
+/*
+ * This function is here to have CONFIG_UNWINDER_ORC
+ * used and added to object BTF.
+ */
+int unused(void)
+{
+ return CONFIG_UNWINDER_ORC ? 0 : 1;
+}
+
+__u32 stack_key;
+
+SEC("kprobe.multi")
+int kprobe_multi_test(struct pt_regs *ctx)
+{
+ stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+ return 0;
+}
+
+SEC("raw_tp/bpf_testmod_test_read")
+int rawtp_test(void *ctx)
+{
+ /* Skip ebpf program entry in the stack. */
+ stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/stacktrace_map.c
index 47568007b668..0c77df05be7f 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map.c
@@ -50,6 +50,7 @@ struct sched_switch_args {
int next_prio;
};
+__u32 stack_id;
SEC("tracepoint/sched/sched_switch")
int oncpu(struct sched_switch_args *ctx)
{
@@ -64,6 +65,7 @@ int oncpu(struct sched_switch_args *ctx)
/* The size of stackmap and stackid_hmap should be the same */
key = bpf_get_stackid(ctx, &stackmap, 0);
if ((int)key >= 0) {
+ stack_id = key;
bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
stack_p = bpf_map_lookup_elem(&stack_amap, &key);
if (stack_p)
diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c
new file mode 100644
index 000000000000..4a5bd852f10c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stream.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+struct arr_elem {
+ struct bpf_res_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1); /* number of pages */
+} arena SEC(".maps");
+
+struct elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+#define ENOSPC 28
+#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+int size;
+u64 fault_addr;
+void *arena_ptr;
+
+SEC("syscall")
+__success __retval(0)
+int stream_exhaust(void *ctx)
+{
+ /* Use global variable for loop convergence. */
+ size = 0;
+ bpf_repeat(BPF_MAX_LOOPS) {
+ if (bpf_stream_printk(BPF_STDOUT, _STR) == -ENOSPC && size == 99954)
+ return 0;
+ size += sizeof(_STR) - 1;
+ }
+ return 1;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__arch_s390x
+__success __retval(0)
+__stderr("ERROR: Timeout detected for may_goto instruction")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_cond_break(void *ctx)
+{
+ while (can_loop)
+ ;
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+__stderr("ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock")
+__stderr("{{Attempted lock = (0x[0-9a-fA-F]+)\n"
+"Total held locks = 1\n"
+"Held lock\\[ 0\\] = \\1}}")
+__stderr("...")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_deadlock(void *ctx)
+{
+ struct bpf_res_spin_lock *lock, *nlock;
+
+ lock = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!lock)
+ return 1;
+ nlock = bpf_map_lookup_elem(&arrmap, &(int){0});
+ if (!nlock)
+ return 1;
+ if (bpf_res_spin_lock(lock))
+ return 1;
+ if (bpf_res_spin_lock(nlock)) {
+ bpf_res_spin_unlock(lock);
+ return 0;
+ }
+ bpf_res_spin_unlock(nlock);
+ bpf_res_spin_unlock(lock);
+ return 1;
+}
+
+SEC("syscall")
+__success __retval(0)
+int stream_syscall(void *ctx)
+{
+ bpf_stream_printk(BPF_STDOUT, "foo");
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_write_fault(void *ctx)
+{
+ struct bpf_arena *ptr = (void *)&arena;
+ u64 user_vm_start;
+
+ /* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+ * triggers bounds checking since the map definition is smaller than struct
+ * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+ * bounds analysis
+ */
+ barrier_var(ptr);
+ user_vm_start = ptr->user_vm_start;
+ fault_addr = user_vm_start + 0x7fff;
+ bpf_addr_space_cast(user_vm_start, 0, 1);
+ asm volatile (
+ "r1 = %0;"
+ "r2 = 1;"
+ "*(u32 *)(r1 + 0x7fff) = r2;"
+ :
+ : "r" (user_vm_start)
+ : "r1", "r2"
+ );
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena READ access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_read_fault(void *ctx)
+{
+ struct bpf_arena *ptr = (void *)&arena;
+ u64 user_vm_start;
+
+ /* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+ * triggers bounds checking since the map definition is smaller than struct
+ * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+ * bounds analysis
+ */
+ barrier_var(ptr);
+ user_vm_start = ptr->user_vm_start;
+ fault_addr = user_vm_start + 0x7fff;
+ bpf_addr_space_cast(user_vm_start, 0, 1);
+ asm volatile (
+ "r1 = %0;"
+ "r1 = *(u32 *)(r1 + 0x7fff);"
+ :
+ : "r" (user_vm_start)
+ : "r1"
+ );
+ return 0;
+}
+
+static __noinline void subprog(void)
+{
+ int __arena *addr = (int __arena *)0xdeadbeef;
+
+ arena_ptr = &arena;
+ *addr = 1;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_subprog_fault(void *ctx)
+{
+ subprog();
+ return 0;
+}
+
+static __noinline int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ int __arena *addr = (int __arena *)0xdeadbeef;
+
+ arena_ptr = &arena;
+ *addr = 1;
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_callback_fault(void *ctx)
+{
+ struct bpf_timer *arr_timer;
+
+ arr_timer = bpf_map_lookup_elem(&array, &(int){0});
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, 1);
+ bpf_timer_set_callback(arr_timer, timer_cb);
+ bpf_timer_start(arr_timer, 0, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c
new file mode 100644
index 000000000000..3662515f0107
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stream_fail.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+SEC("syscall")
+__failure __msg("Possibly NULL pointer passed")
+int stream_vprintk_null_arg(void *ctx)
+{
+ bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL);
+ return 0;
+}
+
+SEC("syscall")
+__failure __msg("R3 type=scalar expected=")
+int stream_vprintk_scalar_arg(void *ctx)
+{
+ bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL);
+ return 0;
+}
+
+SEC("syscall")
+__failure __msg("arg#1 doesn't point to a const string")
+int stream_vprintk_string_arg(void *ctx)
+{
+ bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
new file mode 100644
index 000000000000..826e6b6aff7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char *user_ptr = (char *)1;
+char *invalid_kern_ptr = (char *)-1;
+
+/*
+ * When passing userspace pointers, the error code differs based on arch:
+ * -ERANGE on arches with non-overlapping address spaces
+ * -EFAULT on other arches
+ */
+#if defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_loongarch) || \
+ defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_x86)
+#define USER_PTR_ERR -ERANGE
+#else
+#define USER_PTR_ERR -EFAULT
+#endif
+
+/*
+ * On s390, __get_kernel_nofault (used in string kfuncs) returns 0 for NULL and
+ * user_ptr (instead of causing an exception) so the below two groups of tests
+ * are not applicable.
+ */
+#ifndef __TARGET_ARCH_s390
+
+/* Passing NULL to string kfuncs (treated as a userspace ptr) */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strrchr_null(void *ctx) { return bpf_strrchr(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strlen_null(void *ctx) { return bpf_strlen(NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnlen_null(void *ctx) { return bpf_strnlen(NULL, 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null1(void *ctx) { return bpf_strspn(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null2(void *ctx) { return bpf_strspn("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return bpf_strcspn(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcasestr_null1(void *ctx) { return bpf_strcasestr(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcasestr_null2(void *ctx) { return bpf_strcasestr("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strncasestr_null1(void *ctx) { return bpf_strncasestr(NULL, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strncasestr_null2(void *ctx) { return bpf_strncasestr("hello", NULL, 1); }
+
+/* Passing userspace ptr to string kfuncs */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strrchr_user_ptr(void *ctx) { return bpf_strrchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strlen_user_ptr(void *ctx) { return bpf_strlen(user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnlen_user_ptr(void *ctx) { return bpf_strnlen(user_ptr, 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr1(void *ctx) { return bpf_strspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr2(void *ctx) { return bpf_strspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { return bpf_strcspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr1(void *ctx) { return bpf_strcasestr(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr2(void *ctx) { return bpf_strcasestr("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr1(void *ctx) { return bpf_strncasestr(user_ptr, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr2(void *ctx) { return bpf_strncasestr("hello", user_ptr, 1); }
+
+#endif /* __TARGET_ARCH_s390 */
+
+/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strrchr_pagefault(void *ctx) { return bpf_strrchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strlen_pagefault(void *ctx) { return bpf_strlen(invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnlen_pagefault(void *ctx) { return bpf_strnlen(invalid_kern_ptr, 1); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault1(void *ctx) { return bpf_strspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault2(void *ctx) { return bpf_strspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return bpf_strcspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault1(void *ctx) { return bpf_strcasestr(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault2(void *ctx) { return bpf_strcasestr("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); }
+SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault1(void *ctx) { return bpf_strncasestr(invalid_kern_ptr, "hello", 1); }
+SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault2(void *ctx) { return bpf_strncasestr("hello", invalid_kern_ptr, 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
new file mode 100644
index 000000000000..05e1da1f250f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+
+char long_str[XATTR_SIZE_MAX + 1];
+
+SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); }
+SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); }
+SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); }
+SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); }
+SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); }
+SEC("syscall") int test_strrchr_too_long(void *ctx) { return bpf_strrchr(long_str, 'b'); }
+SEC("syscall") int test_strlen_too_long(void *ctx) { return bpf_strlen(long_str); }
+SEC("syscall") int test_strnlen_too_long(void *ctx) { return bpf_strnlen(long_str, sizeof(long_str)); }
+SEC("syscall") int test_strspn_str_too_long(void *ctx) { return bpf_strspn(long_str, "a"); }
+SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b", long_str); }
+SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); }
+SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); }
+SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); }
+SEC("syscall") int test_strcasestr_too_long(void *ctx) { return bpf_strcasestr(long_str, "hello"); }
+SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); }
+SEC("syscall") int test_strncasestr_too_long(void *ctx) { return bpf_strncasestr(long_str, "hello", sizeof(long_str)); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
new file mode 100644
index 000000000000..a8513964516b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char str[] = "hello world";
+
+#define __test(retval) SEC("syscall") __success __retval(retval)
+
+/* Functional tests */
+__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); }
+__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); }
+__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); }
+__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); }
+__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); }
+__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); }
+__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); }
+__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); }
+__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); }
+__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); }
+__test(1) int test_strchrnul_found(void *ctx) { return bpf_strchrnul(str, 'e'); }
+__test(11) int test_strchrnul_notfound(void *ctx) { return bpf_strchrnul(str, 'x'); }
+__test(1) int test_strnchr_found(void *ctx) { return bpf_strnchr(str, 5, 'e'); }
+__test(11) int test_strnchr_null(void *ctx) { return bpf_strnchr(str, 12, '\0'); }
+__test(-ENOENT) int test_strnchr_notfound(void *ctx) { return bpf_strnchr(str, 5, 'w'); }
+__test(9) int test_strrchr_found(void *ctx) { return bpf_strrchr(str, 'l'); }
+__test(11) int test_strrchr_null(void *ctx) { return bpf_strrchr(str, '\0'); }
+__test(-ENOENT) int test_strrchr_notfound(void *ctx) { return bpf_strrchr(str, 'x'); }
+__test(11) int test_strlen(void *ctx) { return bpf_strlen(str); }
+__test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); }
+__test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); }
+__test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); }
+__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); }
+__test(6) int test_strcasestr_found(void *ctx) { return bpf_strcasestr(str, "woRLD"); }
+__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); }
+__test(-ENOENT) int test_strcasestr_notfound(void *ctx) { return bpf_strcasestr(str, "hi"); }
+__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); }
+__test(0) int test_strcasestr_empty(void *ctx) { return bpf_strcasestr(str, ""); }
+__test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); }
+__test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); }
+__test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); }
+__test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, "hi", 10); }
+__test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); }
+__test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); }
+__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); }
+__test(0) int test_strncasestr_found1(void *ctx) { return bpf_strncasestr("", "", 0); }
+__test(0) int test_strncasestr_found2(void *ctx) { return bpf_strncasestr(str, "heLLO", 5); }
+__test(0) int test_strncasestr_found3(void *ctx) { return bpf_strncasestr(str, "heLLO", 6); }
+__test(-ENOENT) int test_strncasestr_notfound1(void *ctx) { return bpf_strncasestr(str, "hi", 10); }
+__test(-ENOENT) int test_strncasestr_notfound2(void *ctx) { return bpf_strncasestr(str, "hello", 4); }
+__test(-ENOENT) int test_strncasestr_notfound3(void *ctx) { return bpf_strncasestr("", "a", 0); }
+__test(0) int test_strncasestr_empty(void *ctx) { return bpf_strncasestr(str, "", 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
index 18373a7df76e..f47bf88f8d2a 100644
--- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
+++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
@@ -35,7 +35,10 @@ static __always_inline int local_strncmp(const char *s1, unsigned int sz,
SEC("tp/syscalls/sys_enter_getpgid")
int strncmp_no_helper(void *ctx)
{
- if (local_strncmp(str, cmp_str_len + 1, target) < 0)
+ const char *target_str = target;
+
+ barrier_var(target_str);
+ if (local_strncmp(str, cmp_str_len + 1, target_str) < 0)
__sync_add_and_fetch(&hits, 1);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index a5c74d31a244..6e1918deaf26 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -330,9 +330,9 @@ static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
}
bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
- return tls_ptr && tls_ptr != (void *)-1
- ? tls_ptr + tls_index.offset
- : NULL;
+ if (!tls_ptr || tls_ptr == (void *)-1)
+ return NULL;
+ return tls_ptr + tls_index.offset;
}
#ifdef SUBPROGS
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
new file mode 100644
index 000000000000..ad8bb546c9bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP1_MAGIC 1234
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ return MAP1_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP1_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP1_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
new file mode 100644
index 000000000000..cea1a2f4b62f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP2_MAGIC 4567
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ return MAP2_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP2_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP2_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
new file mode 100644
index 000000000000..2b98b7710816
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task
+ * here is acquired automatically as the task argument is tagged with "__ref".
+ */
+SEC("struct_ops/test_return_ref_kptr")
+struct task_struct *BPF_PROG(kptr_return, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ if (dummy % 2) {
+ bpf_task_release(task);
+ return NULL;
+ }
+ return task;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
new file mode 100644
index 000000000000..caeea158ef69
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
@@ -0,0 +1,26 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a non-zero scalar value.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 has smin=1 smax=1 should have been in [0, 0]")
+struct task_struct *BPF_PROG(kptr_return_fail__invalid_scalar, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ bpf_task_release(task);
+ return (struct task_struct *)1;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__invalid_scalar,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
new file mode 100644
index 000000000000..b8b4f05c3d7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
@@ -0,0 +1,34 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a local kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__local_kptr, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ struct task_struct *t;
+
+ bpf_task_release(task);
+
+ t = bpf_obj_new(typeof(*task));
+ if (!t)
+ return NULL;
+
+ return t;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__local_kptr,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
new file mode 100644
index 000000000000..7ddeb28c2329
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
@@ -0,0 +1,25 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a modified referenced kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("dereference of modified trusted_ptr_ ptr R0 off={{[0-9]+}} disallowed")
+struct task_struct *BPF_PROG(kptr_return_fail__nonzero_offset, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ return (struct task_struct *)&task->jobctl;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__nonzero_offset,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
new file mode 100644
index 000000000000..6a2dd5367802
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a referenced kptr of the wrong type.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__wrong_type, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ struct task_struct *ret;
+
+ ret = (struct task_struct *)bpf_cgroup_acquire(cgrp);
+ bpf_task_release(task);
+
+ return ret;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__wrong_type,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
index 0e4d2ff63ab8..dbe646013811 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
index 58d5d8dc2235..3d89ad7cbe2a 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
index 31e58389bb8b..b1f6d7e5a8e5 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
new file mode 100644
index 000000000000..9c0a65466356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
@@ -0,0 +1,31 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This is a test BPF program that uses struct_ops to access a referenced
+ * kptr argument. This is a test for the verifier to ensure that it
+ * 1) recognizes the task as a referenced object (i.e., ref_obj_id > 0), and
+ * 2) the same reference can be acquired from multiple paths as long as it
+ * has not been released.
+ */
+SEC("struct_ops/test_refcounted")
+int BPF_PROG(refcounted, int dummy, struct task_struct *task)
+{
+ if (dummy == 1)
+ bpf_task_release(task);
+ else
+ bpf_task_release(task);
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_refcounted = {
+ .test_refcounted = (void *)refcounted,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
new file mode 100644
index 000000000000..ae074aa62852
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
@@ -0,0 +1,39 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern void bpf_task_release(struct task_struct *p) __ksym;
+
+__noinline int subprog_release(__u64 *ctx __arg_ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+ int dummy = (int)ctx[0];
+
+ bpf_task_release(task);
+
+ return dummy + 1;
+}
+
+/* Test that the verifier rejects a program that contains a global
+ * subprogram with referenced kptr arguments
+ */
+SEC("struct_ops/test_refcounted")
+__failure __log_level(2)
+__msg("Validating subprog_release() func#1...")
+__msg("invalid bpf_context access off=8. Reference may already be released")
+int refcounted_fail__global_subprog(unsigned long long *ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+
+ bpf_task_release(task);
+
+ return subprog_release(ctx);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__global_subprog,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
new file mode 100644
index 000000000000..e945b1a04294
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
@@ -0,0 +1,22 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Test that the verifier rejects a program that acquires a referenced
+ * kptr through context without releasing the reference
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("Unreleased reference id=1 alloc_insn=0")
+int BPF_PROG(refcounted_fail__ref_leak, int dummy,
+ struct task_struct *task)
+{
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__ref_leak,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
new file mode 100644
index 000000000000..3b125025a1f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+/* Test that the verifier rejects a program with referenced kptr arguments
+ * that tail call
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("program with __ref argument cannot tail call")
+int refcounted_fail__tail_call(unsigned long long *ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+
+ bpf_task_release(task);
+ bpf_tail_call(ctx, &prog_array, 0);
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__tail_call,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/summarization.c b/tools/testing/selftests/bpf/progs/summarization.c
new file mode 100644
index 000000000000..f89effe82c9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/summarization.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline
+long changes_pkt_data(struct __sk_buff *sk)
+{
+ return bpf_skb_pull_data(sk, 0);
+}
+
+__noinline __weak
+long does_not_change_pkt_data(struct __sk_buff *sk)
+{
+ return 0;
+}
+
+SEC("?tc")
+int main_changes_with_subprogs(struct __sk_buff *sk)
+{
+ changes_pkt_data(sk);
+ does_not_change_pkt_data(sk);
+ return 0;
+}
+
+SEC("?tc")
+int main_changes(struct __sk_buff *sk)
+{
+ bpf_skb_pull_data(sk, 0);
+ return 0;
+}
+
+SEC("?tc")
+int main_does_not_change(struct __sk_buff *sk)
+{
+ return 0;
+}
+
+__noinline
+long might_sleep(struct pt_regs *ctx __arg_ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+__noinline __weak
+long does_not_sleep(struct pt_regs *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep_with_subprogs(struct pt_regs *ctx)
+{
+ might_sleep(ctx);
+ does_not_sleep(ctx);
+ return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep(struct pt_regs *ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+SEC("?uprobe.s")
+int main_does_not_sleep(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/summarization_freplace.c
index f9a622705f1b..935f00e0e9ea 100644
--- a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c
+++ b/tools/testing/selftests/bpf/progs/summarization_freplace.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
SEC("?freplace")
@@ -15,4 +15,19 @@ long does_not_change_pkt_data(struct __sk_buff *sk)
return 0;
}
+SEC("?freplace")
+long might_sleep(struct pt_regs *ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+SEC("?freplace")
+long does_not_sleep(struct pt_regs *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
index 327ca395e860..d556b19413d7 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_legacy.h"
+#include "bpf_test_utils.h"
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -24,6 +25,8 @@ int entry(struct __sk_buff *skb)
{
int ret = 1;
+ clobber_regs_stack();
+
count++;
subprog_tail(skb);
subprog_tail(skb);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
index 72fd0d577506..ae94c9c70ab7 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_test_utils.h"
int classifier_0(struct __sk_buff *skb);
int classifier_1(struct __sk_buff *skb);
@@ -60,6 +61,8 @@ int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb)
{
int ret = 0;
+ clobber_regs_stack();
+
subprog_tail0(skb);
subprog_tail1(skb);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
index a7fb91cb05b7..56b6b0099840 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_test_utils.h"
int classifier_0(struct __sk_buff *skb);
@@ -53,6 +54,8 @@ int tailcall_bpf2bpf_hierarchy_3(struct __sk_buff *skb)
{
int ret = 0;
+ clobber_regs_stack();
+
bpf_tail_call_static(skb, &jmp_table0, 0);
__sink(ret);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
index c87f9ca982d3..5261395713cd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
@@ -4,6 +4,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_test_utils.h"
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -24,6 +25,8 @@ int subprog_tail(void *ctx)
SEC("fentry/dummy")
int BPF_PROG(fentry, struct sk_buff *skb)
{
+ clobber_regs_stack();
+
count++;
subprog_tail(ctx);
subprog_tail(ctx);
diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
new file mode 100644
index 000000000000..432fff2af844
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_BPF_H
+#define __TASK_LOCAL_DATA_BPF_H
+
+/*
+ * Task local data is a library that facilitates sharing per-task data
+ * between user space and bpf programs.
+ *
+ *
+ * USAGE
+ *
+ * A TLD, an entry of data in task local data, first needs to be created by the
+ * user space. This is done by calling user space API, TLD_DEFINE_KEY() or
+ * tld_create_key(), with the name of the TLD and the size.
+ *
+ * TLD_DEFINE_KEY(prio, "priority", sizeof(int));
+ *
+ * or
+ *
+ * void func_call(...) {
+ * tld_key_t prio, in_cs;
+ *
+ * prio = tld_create_key("priority", sizeof(int));
+ * in_cs = tld_create_key("in_critical_section", sizeof(bool));
+ * ...
+ *
+ * A key associated with the TLD, which has an opaque type tld_key_t, will be
+ * initialized or returned. It can be used to get a pointer to the TLD in the
+ * user space by calling tld_get_data().
+ *
+ * In a bpf program, tld_object_init() first needs to be called to initialized a
+ * tld_object on the stack. Then, TLDs can be accessed by calling tld_get_data().
+ * The API will try to fetch the key by the name and use it to locate the data.
+ * A pointer to the TLD will be returned. It also caches the key in a task local
+ * storage map, tld_key_map, whose value type, struct tld_keys, must be defined
+ * by the developer.
+ *
+ * struct tld_keys {
+ * tld_key_t prio;
+ * tld_key_t in_cs;
+ * };
+ *
+ * SEC("struct_ops")
+ * void prog(struct task_struct task, ...)
+ * {
+ * struct tld_object tld_obj;
+ * int err, *p;
+ *
+ * err = tld_object_init(task, &tld_obj);
+ * if (err)
+ * return;
+ *
+ * p = tld_get_data(&tld_obj, prio, "priority", sizeof(int));
+ * if (p)
+ * // do something depending on *p
+ */
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_MAX_DATA_CNT (__PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifndef TLD_KEY_MAP_CREATE_RETRY
+#define TLD_KEY_MAP_CREATE_RETRY 10
+#endif
+
+typedef struct {
+ __s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+ char name[TLD_NAME_LEN];
+ __u16 size;
+};
+
+struct tld_meta_u {
+ __u8 cnt;
+ __u16 size;
+ struct tld_metadata metadata[TLD_MAX_DATA_CNT];
+};
+
+struct tld_data_u {
+ __u64 start; /* offset of tld_data_u->data in a page */
+ char data[__PAGE_SIZE - sizeof(__u64)];
+};
+
+struct tld_map_value {
+ struct tld_data_u __uptr *data;
+ struct tld_meta_u __uptr *meta;
+};
+
+typedef struct tld_uptr_dummy {
+ struct tld_data_u data[0];
+ struct tld_meta_u meta[0];
+} *tld_uptr_dummy_t;
+
+struct tld_object {
+ struct tld_map_value *data_map;
+ struct tld_keys *key_map;
+ /*
+ * Force the compiler to generate the actual definition of tld_meta_u
+ * and tld_data_u in BTF. Without it, tld_meta_u and u_tld_data will
+ * be BTF_KIND_FWD.
+ */
+ tld_uptr_dummy_t dummy[0];
+};
+
+/*
+ * Map value of tld_key_map for caching keys. Must be defined by the developer.
+ * Members should be tld_key_t and passed to the 3rd argument of tld_fetch_key().
+ */
+struct tld_keys;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tld_map_value);
+} tld_data_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tld_keys);
+} tld_key_map SEC(".maps");
+
+/**
+ * tld_object_init() - Initialize a tld_object.
+ *
+ * @task: The task_struct of the target task
+ * @tld_obj: A pointer to a tld_object to be initialized
+ *
+ * Return 0 on success; -ENODATA if the user space did not initialize task local data
+ * for the current task through tld_get_data(); -ENOMEM if the creation of tld_key_map
+ * fails
+ */
+__attribute__((unused))
+static int tld_object_init(struct task_struct *task, struct tld_object *tld_obj)
+{
+ int i;
+
+ tld_obj->data_map = bpf_task_storage_get(&tld_data_map, task, 0, 0);
+ if (!tld_obj->data_map)
+ return -ENODATA;
+
+ bpf_for(i, 0, TLD_KEY_MAP_CREATE_RETRY) {
+ tld_obj->key_map = bpf_task_storage_get(&tld_key_map, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (likely(tld_obj->key_map))
+ break;
+ }
+ if (!tld_obj->key_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * Return the offset of TLD if @name is found. Otherwise, return the current TLD count
+ * using the nonpositive range so that the next tld_get_data() can skip fetching key if
+ * no new TLD is added or start comparing name from the first newly added TLD.
+ */
+__attribute__((unused))
+static int __tld_fetch_key(struct tld_object *tld_obj, const char *name, int i_start)
+{
+ struct tld_metadata *metadata;
+ int i, cnt, start, off = 0;
+
+ if (!tld_obj->data_map || !tld_obj->data_map->data || !tld_obj->data_map->meta)
+ return 0;
+
+ start = tld_obj->data_map->data->start;
+ cnt = tld_obj->data_map->meta->cnt;
+ metadata = tld_obj->data_map->meta->metadata;
+
+ bpf_for(i, 0, cnt) {
+ if (i >= TLD_MAX_DATA_CNT)
+ break;
+
+ if (i >= i_start && !bpf_strncmp(metadata[i].name, TLD_NAME_LEN, name))
+ return start + off;
+
+ off += TLD_ROUND_UP(metadata[i].size, 8);
+ }
+
+ return -cnt;
+}
+
+/**
+ * tld_get_data() - Retrieve a pointer to the TLD associated with the name.
+ *
+ * @tld_obj: A pointer to a valid tld_object initialized by tld_object_init()
+ * @key: The cached key of the TLD in tld_key_map
+ * @name: The name of the key associated with a TLD
+ * @size: The size of the TLD. Must be a known constant value
+ *
+ * Return a pointer to the TLD associated with @name; NULL if not found or @size is too
+ * big. @key is used to cache the key if the TLD is found to speed up subsequent calls.
+ * It should be defined as an member of tld_keys of tld_key_t type by the developer.
+ */
+#define tld_get_data(tld_obj, key, name, size) \
+ ({ \
+ void *data = NULL, *_data = (tld_obj)->data_map->data; \
+ long off = (tld_obj)->key_map->key.off; \
+ int cnt; \
+ \
+ if (likely(_data)) { \
+ if (likely(off > 0)) { \
+ barrier_var(off); \
+ if (likely(off < __PAGE_SIZE - size)) \
+ data = _data + off; \
+ } else { \
+ cnt = -(off); \
+ if (likely((tld_obj)->data_map->meta) && \
+ cnt < (tld_obj)->data_map->meta->cnt) { \
+ off = __tld_fetch_key(tld_obj, name, cnt); \
+ (tld_obj)->key_map->key.off = off; \
+ \
+ if (likely(off < __PAGE_SIZE - size)) { \
+ barrier_var(off); \
+ if (off > 0) \
+ data = _data + off; \
+ } \
+ } \
+ } \
+ } \
+ data; \
+ })
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c
new file mode 100644
index 000000000000..663a80990f8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} lrumap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ struct elem *work = value;
+
+ bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+ return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+int oncpu_hash_map(struct pt_regs *args)
+{
+ struct elem empty_work = { .data = { 0 } };
+ struct elem *work;
+ struct task_struct *task;
+ int err;
+
+ task = bpf_get_current_task_btf();
+ err = bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+ if (err)
+ return 0;
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work)
+ return 0;
+
+ bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+int oncpu_array_map(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+int oncpu_lru_map(struct pt_regs *args)
+{
+ struct elem empty_work = { .data = { 0 } };
+ struct elem *work;
+ struct task_struct *task;
+ int err;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&lrumap, &key);
+ if (work)
+ return 0;
+ err = bpf_map_update_elem(&lrumap, &key, &empty_work, BPF_NOEXIST);
+ if (err)
+ return 0;
+ work = bpf_map_lookup_elem(&lrumap, &key);
+ if (!work || work->data[0])
+ return 0;
+ bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c
new file mode 100644
index 000000000000..1270953fd092
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_fail.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ struct elem *work = value;
+
+ bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+ return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+__failure __msg("doesn't match map pointer in R3")
+int mismatch_map(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("arg#1 doesn't point to a map value")
+int no_map_task_work(struct pt_regs *args)
+{
+ struct task_struct *task;
+ struct bpf_task_work tw;
+
+ task = bpf_get_current_task_btf();
+ bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
+int task_work_null(struct pt_regs *args)
+{
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg2")
+int map_null(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c
new file mode 100644
index 000000000000..55e555f7f41b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_stress.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+#define ENTRIES 128
+
+char _license[] SEC("license") = "GPL";
+
+__u64 callback_scheduled = 0;
+__u64 callback_success = 0;
+__u64 schedule_error = 0;
+__u64 delete_success = 0;
+
+struct elem {
+ __u32 count;
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, ENTRIES);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ __sync_fetch_and_add(&callback_success, 1);
+ return 0;
+}
+
+SEC("syscall")
+int schedule_task_work(void *ctx)
+{
+ struct elem empty_work = {.count = 0};
+ struct elem *work;
+ int key = 0, err;
+
+ key = bpf_ktime_get_ns() % ENTRIES;
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work) {
+ bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work)
+ return 0;
+ }
+ err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap,
+ process_work, NULL);
+ if (err)
+ __sync_fetch_and_add(&schedule_error, 1);
+ else
+ __sync_fetch_and_add(&callback_scheduled, 1);
+ return 0;
+}
+
+SEC("syscall")
+int delete_task_work(void *ctx)
+{
+ int key = 0, err;
+
+ key = bpf_get_prandom_u32() % ENTRIES;
+ err = bpf_map_delete_elem(&hmap, &key);
+ if (!err)
+ __sync_fetch_and_add(&delete_success, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
index a58b5194fc89..022291f21dfb 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -8,8 +8,6 @@ char _license[] SEC("license") = "GPL";
#define USEC_PER_SEC 1000000UL
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
static unsigned int tcp_left_out(const struct tcp_sock *tp)
{
return tp->sacked_out + tp->lost_out;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_ext.c b/tools/testing/selftests/bpf/progs/test_btf_ext.c
new file mode 100644
index 000000000000..cdf20331db04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_ext.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms Inc. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__noinline static void f0(void)
+{
+ __u64 a = 1;
+
+ __sink(a);
+}
+
+SEC("xdp")
+__u64 global_func(struct xdp_md *xdp)
+{
+ f0();
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
index 44628865fe1d..4fee0fdc7607 100644
--- a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -51,13 +51,13 @@ out:
}
SEC("lsm/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
return bpf_link_create_verify(cmd);
}
SEC("lsm.s/bpf")
-int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
return bpf_link_create_verify(cmd);
}
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index 2ec1de11a3ae..7b6b2b342c1d 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -7,6 +7,7 @@
#include <stddef.h>
#include <stdint.h>
+#include <errno.h>
char _license[] SEC("license") = "GPL";
@@ -288,3 +289,14 @@ int tc_input_len_exceed(struct __sk_buff *ctx)
global_bpf_mtu_xdp = mtu_len;
return retval;
}
+
+SEC("tc")
+int tc_chk_segs_flag(struct __sk_buff *ctx)
+{
+ __u32 mtu_len = 0;
+ int err;
+
+ err = bpf_check_mtu(ctx, GLOBAL_USER_IFINDEX, &mtu_len, 0, BPF_MTU_CHK_SEGS);
+
+ return err == -EINVAL ? BPF_OK : BPF_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index f344c6835e84..26a53e54b8fa 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -22,6 +22,7 @@
#include "bpf_compiler.h"
#include "test_cls_redirect.h"
+#include "bpf_misc.h"
#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@@ -31,9 +32,6 @@
#define INLINING __always_inline
#endif
-#define offsetofend(TYPE, MEMBER) \
- (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
-
#define IP_OFFSET_MASK (0x1FFF)
#define IP_MF (0x2000)
@@ -129,7 +127,7 @@ typedef uint8_t *net_ptr __attribute__((align_value(8)));
typedef struct buf {
struct __sk_buff *skb;
net_ptr head;
- /* NB: tail musn't have alignment other than 1, otherwise
+ /* NB: tail mustn't have alignment other than 1, otherwise
* LLVM will go and eliminate code, e.g. when checking packet lengths.
*/
uint8_t *const tail;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
index d0f7670351e5..dfd4a2710391 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -494,7 +494,7 @@ static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_header
*offset += sizeof(*next_hop);
- /* Skip the remainig next hops (may be zero). */
+ /* Skip the remaining next hops (may be zero). */
return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1);
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
index 51b3f79df523..448403634eea 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
@@ -15,6 +15,7 @@ struct {
struct core_reloc_arrays_output {
int a2;
+ int a3;
char b123;
int c1c;
int d00d;
@@ -41,6 +42,7 @@ int test_core_arrays(void *ctx)
{
struct core_reloc_arrays *in = (void *)&data.in;
struct core_reloc_arrays_output *out = (void *)&data.out;
+ int *a;
if (CORE_READ(&out->a2, &in->a[2]))
return 1;
@@ -53,6 +55,9 @@ int test_core_arrays(void *ctx)
if (CORE_READ(&out->f01c, &in->f[0][1].c))
return 1;
+ a = __builtin_preserve_access_index(({ in->a; }));
+ out->a3 = a[0] + a[1] + a[2] + a[3];
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
index 66e737720f7c..54305f4c9f2d 100644
--- a/tools/testing/selftests/bpf/progs/test_get_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
@@ -6,6 +6,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -17,12 +18,23 @@ static const char expected_value[] = "hello";
char value1[32];
char value2[32];
+/* Matches caller of test_get_xattr() in prog_tests/fs_kfuncs.c */
+static const char xattr_names[][64] = {
+ /* The following work. */
+ "user.kfuncs",
+ "security.bpf.xxx",
+
+ /* The following do not work. */
+ "security.bpf",
+ "security.selinux"
+};
+
SEC("lsm.s/file_open")
int BPF_PROG(test_file_open, struct file *f)
{
struct bpf_dynptr value_ptr;
__u32 pid;
- int ret;
+ int ret, i;
pid = bpf_get_current_pid_tgid() >> 32;
if (pid != monitored_pid)
@@ -30,7 +42,11 @@ int BPF_PROG(test_file_open, struct file *f)
bpf_dynptr_from_mem(value1, sizeof(value1), 0, &value_ptr);
- ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr);
+ for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+ ret = bpf_get_file_xattr(f, xattr_names[i], &value_ptr);
+ if (ret == sizeof(expected_value))
+ break;
+ }
if (ret != sizeof(expected_value))
return 0;
if (bpf_strncmp(value1, ret, expected_value))
@@ -44,7 +60,7 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
{
struct bpf_dynptr value_ptr;
__u32 pid;
- int ret;
+ int ret, i;
pid = bpf_get_current_pid_tgid() >> 32;
if (pid != monitored_pid)
@@ -52,7 +68,11 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
bpf_dynptr_from_mem(value2, sizeof(value2), 0, &value_ptr);
- ret = bpf_get_dentry_xattr(dentry, "user.kfuncs", &value_ptr);
+ for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+ ret = bpf_get_dentry_xattr(dentry, xattr_names[i], &value_ptr);
+ if (ret == sizeof(expected_value))
+ break;
+ }
if (ret != sizeof(expected_value))
return 0;
if (bpf_strncmp(value2, ret, expected_value))
diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
index a3f220ba7025..ee65bad0436d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c
+++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
@@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last");
int percpu_arr[1] SEC(".data.percpu_arr");
+/* at least one extern is included, to ensure that a specific
+ * regression is tested whereby resizing resulted in a free-after-use
+ * bug after type information is invalidated by the resize operation.
+ *
+ * There isn't a particularly good API to test for this specific condition,
+ * but by having externs for the resizing tests it will cover this path.
+ */
+extern int LINUX_KERNEL_VERSION __kconfig;
+long version_sink;
+
SEC("tp/syscalls/sys_enter_getpid")
int bss_array_sum(void *ctx)
{
@@ -44,6 +54,9 @@ int bss_array_sum(void *ctx)
for (size_t i = 0; i < bss_array_len; ++i)
sum += array[i];
+ /* see above; ensure this is not optimized out */
+ version_sink = LINUX_KERNEL_VERSION;
+
return 0;
}
@@ -59,6 +72,9 @@ int data_array_sum(void *ctx)
for (size_t i = 0; i < data_array_len; ++i)
sum += my_array[i];
+ /* see above; ensure this is not optimized out */
+ version_sink = LINUX_KERNEL_VERSION;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_kernel_flag.c b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
new file mode 100644
index 000000000000..b45fab3be352
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2025 Microsoft Corporation
+ *
+ * Author: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_tid;
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ __u32 tid;
+
+ tid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
+ if (!kernel || tid != monitored_tid)
+ return 0;
+ else
+ return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index cd4d752bd089..061befb004c2 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -36,7 +36,7 @@ char _license[] SEC("license") = "GPL";
SEC("?lsm.s/bpf")
__failure __msg("cannot pass in dynptr at an offset=-8")
-int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
unsigned long val;
@@ -46,7 +46,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
SEC("?lsm.s/bpf")
__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
-int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
unsigned long val = 0;
@@ -55,7 +55,7 @@ int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
}
SEC("lsm.s/bpf")
-int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct bpf_key *trusted_keyring;
struct bpf_dynptr ptr;
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c
index c73776990ae3..1f7e1e59b073 100644
--- a/tools/testing/selftests/bpf/progs/test_lookup_key.c
+++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c
@@ -14,16 +14,16 @@
char _license[] SEC("license") = "GPL";
__u32 monitored_pid;
-__u32 key_serial;
+__s32 key_serial;
__u32 key_id;
__u64 flags;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
extern void bpf_key_put(struct bpf_key *key) __ksym;
SEC("lsm.s/bpf")
-int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct bpf_key *bkey;
__u32 pid;
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index fb07f5773888..03d7f89787a1 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -19,7 +19,7 @@ int BPF_PROG(handle_raw_tp,
__u32 raw_tp_bare_write_sz = 0;
-SEC("raw_tp/bpf_testmod_test_write_bare")
+SEC("raw_tp/bpf_testmod_test_write_bare_tp")
int BPF_PROG(handle_raw_tp_bare,
struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx)
{
@@ -31,7 +31,7 @@ int raw_tp_writable_bare_in_val = 0;
int raw_tp_writable_bare_early_ret = 0;
int raw_tp_writable_bare_out_val = 0;
-SEC("raw_tp.w/bpf_testmod_test_writable_bare")
+SEC("raw_tp.w/bpf_testmod_test_writable_bare_tp")
int BPF_PROG(handle_raw_tp_writable_bare,
struct bpf_testmod_test_writable_ctx *writable)
{
@@ -117,7 +117,7 @@ int BPF_PROG(handle_fexit_ret, int arg, struct file *ret)
bpf_probe_read_kernel(&buf, 8, ret);
bpf_probe_read_kernel(&buf, 8, (char *)ret + 256);
- *(volatile long long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index abb7344b531f..5edf3cdc213d 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -1,9 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
-#include <stdbool.h>
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c
index a1ccc831c882..05ac9410cd68 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_branches.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c
@@ -8,6 +8,7 @@
#include <bpf/bpf_tracing.h>
int valid = 0;
+int run_cnt = 0;
int required_size_out = 0;
int written_stack_out = 0;
int written_global_out = 0;
@@ -24,6 +25,8 @@ int perf_branches(void *ctx)
__u64 entries[4 * 3] = {0};
int required_size, written_stack, written_global;
+ ++run_cnt;
+
/* write to stack */
written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
/* ignore spurious events */
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_devmap.c b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
new file mode 100644
index 000000000000..c855f8f87eff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap2 SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_htab.c b/tools/testing/selftests/bpf/progs/test_pinning_htab.c
new file mode 100644
index 000000000000..ae227930c73c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_htab.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct timer_val {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, struct timer_val);
+ __uint(max_entries, 1);
+} timer_prealloc SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, struct timer_val);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} timer_no_prealloc SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
index 2fdc44e76624..89b0cd5a3e06 100644
--- a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
@@ -7,7 +7,7 @@
char tp_name[128];
SEC("lsm.s/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
switch (cmd) {
case BPF_RAW_TRACEPOINT_OPEN:
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c
new file mode 100644
index 000000000000..ff4aa67ddacc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(map_flags, BPF_F_RB_OVERWRITE);
+} ringbuf SEC(".maps");
+
+int pid;
+
+const volatile unsigned long LEN1;
+const volatile unsigned long LEN2;
+const volatile unsigned long LEN3;
+const volatile unsigned long LEN4;
+const volatile unsigned long LEN5;
+
+long reserve1_fail = 0;
+long reserve2_fail = 0;
+long reserve3_fail = 0;
+long reserve4_fail = 0;
+long reserve5_fail = 0;
+
+unsigned long avail_data = 0;
+unsigned long ring_size = 0;
+unsigned long cons_pos = 0;
+unsigned long prod_pos = 0;
+unsigned long over_pos = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_overwrite_ringbuf(void *ctx)
+{
+ char *rec1, *rec2, *rec3, *rec4, *rec5;
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (cur_pid != pid)
+ return 0;
+
+ rec1 = bpf_ringbuf_reserve(&ringbuf, LEN1, 0);
+ if (!rec1) {
+ reserve1_fail = 1;
+ return 0;
+ }
+
+ rec2 = bpf_ringbuf_reserve(&ringbuf, LEN2, 0);
+ if (!rec2) {
+ bpf_ringbuf_discard(rec1, 0);
+ reserve2_fail = 1;
+ return 0;
+ }
+
+ rec3 = bpf_ringbuf_reserve(&ringbuf, LEN3, 0);
+ /* expect failure */
+ if (!rec3) {
+ reserve3_fail = 1;
+ } else {
+ bpf_ringbuf_discard(rec1, 0);
+ bpf_ringbuf_discard(rec2, 0);
+ bpf_ringbuf_discard(rec3, 0);
+ return 0;
+ }
+
+ rec4 = bpf_ringbuf_reserve(&ringbuf, LEN4, 0);
+ if (!rec4) {
+ reserve4_fail = 1;
+ bpf_ringbuf_discard(rec1, 0);
+ bpf_ringbuf_discard(rec2, 0);
+ return 0;
+ }
+
+ bpf_ringbuf_submit(rec1, 0);
+ bpf_ringbuf_submit(rec2, 0);
+ bpf_ringbuf_submit(rec4, 0);
+
+ rec5 = bpf_ringbuf_reserve(&ringbuf, LEN5, 0);
+ if (!rec5) {
+ reserve5_fail = 1;
+ return 0;
+ }
+
+ for (int i = 0; i < LEN3; i++)
+ rec5[i] = 0xdd;
+
+ bpf_ringbuf_submit(rec5, 0);
+
+ ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE);
+ avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+ cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS);
+ prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS);
+ over_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_OVERWRITE_POS);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
index 350513c0e4c9..f063a0013f85 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
@@ -26,11 +26,11 @@ int test_ringbuf_write(void *ctx)
if (cur_pid != pid)
return 0;
- sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
if (!sample1)
return 0;
/* first one can pass */
- sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
if (!sample2) {
bpf_ringbuf_discard(sample1, 0);
__sync_fetch_and_add(&discarded, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 5eb25c6ad75b..a5be3267dbb0 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
-#include <stdlib.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
diff --git a/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
new file mode 100644
index 000000000000..6a612cf168d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+
+const char xattr_foo[] = "security.bpf.foo";
+const char xattr_bar[] = "security.bpf.bar";
+static const char xattr_selinux[] = "security.selinux";
+char value_bar[] = "world";
+char read_value[32];
+
+bool set_security_bpf_bar_success;
+bool remove_security_bpf_bar_success;
+bool set_security_selinux_fail;
+bool remove_security_selinux_fail;
+
+char name_buf[32];
+
+static inline bool name_match_foo(const char *name)
+{
+ bpf_probe_read_kernel(name_buf, sizeof(name_buf), name);
+
+ return !bpf_strncmp(name_buf, sizeof(xattr_foo), xattr_foo);
+}
+
+/* Test bpf_set_dentry_xattr and bpf_remove_dentry_xattr */
+SEC("lsm.s/inode_getxattr")
+int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* Only do the following for security.bpf.foo */
+ if (!name_match_foo(name))
+ return 0;
+
+ bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+ /* read security.bpf.bar */
+ ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+ if (ret < 0) {
+ /* If security.bpf.bar doesn't exist, set it */
+ bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+ ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+ if (!ret)
+ set_security_bpf_bar_success = true;
+ ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+ if (ret)
+ set_security_selinux_fail = true;
+ } else {
+ /* If security.bpf.bar exists, remove it */
+ ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+ if (!ret)
+ remove_security_bpf_bar_success = true;
+
+ ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+ if (ret)
+ remove_security_selinux_fail = true;
+ }
+
+ return 0;
+}
+
+bool locked_set_security_bpf_bar_success;
+bool locked_remove_security_bpf_bar_success;
+bool locked_set_security_selinux_fail;
+bool locked_remove_security_selinux_fail;
+
+/* Test bpf_set_dentry_xattr_locked and bpf_remove_dentry_xattr_locked.
+ * It not necessary to differentiate the _locked version and the
+ * not-_locked version in the BPF program. The verifier will fix them up
+ * properly.
+ */
+SEC("lsm.s/inode_setxattr")
+int BPF_PROG(test_inode_setxattr, struct mnt_idmap *idmap,
+ struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* Only do the following for security.bpf.foo */
+ if (!name_match_foo(name))
+ return 0;
+
+ bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+ /* read security.bpf.bar */
+ ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+ if (ret < 0) {
+ /* If security.bpf.bar doesn't exist, set it */
+ bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+ ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+ if (!ret)
+ locked_set_security_bpf_bar_success = true;
+ ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+ if (ret)
+ locked_set_security_selinux_fail = true;
+ } else {
+ /* If security.bpf.bar exists, remove it */
+ ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+ if (!ret)
+ locked_remove_security_bpf_bar_success = true;
+
+ ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+ if (ret)
+ locked_remove_security_selinux_fail = true;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
index 8ef6b39335b6..34b30e2603f0 100644
--- a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
@@ -40,7 +40,7 @@ char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
__u32 monitored_pid;
char sig[MAX_SIG_SIZE];
__u32 sig_size;
-__u32 user_keyring_serial;
+__s32 user_keyring_serial;
SEC("lsm.s/file_open")
int BPF_PROG(test_file_open, struct file *f)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
index 2796dd8545eb..1c7941a4ad00 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
@@ -1,8 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 ByteDance */
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
+
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 1);
@@ -31,7 +36,7 @@ int prog_skb_verdict(struct __sk_buff *skb)
change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0);
return SK_PASS;
} else if (data[0] == 'E') { /* Error */
- change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+ change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
return SK_PASS;
}
return SK_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
new file mode 100644
index 000000000000..83df4919c224
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+int cork_byte;
+int push_start;
+int push_end;
+int apply_bytes;
+int pop_start;
+int pop_end;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_msg")
+int prog_sk_policy(struct sk_msg_md *msg)
+{
+ if (cork_byte > 0)
+ bpf_msg_cork_bytes(msg, cork_byte);
+ if (push_start > 0 && push_end > 0)
+ bpf_msg_push_data(msg, push_start, push_end, 0);
+ if (pop_start >= 0 && pop_end > 0)
+ bpf_msg_pop_data(msg, pop_start, pop_end, 0);
+
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_sk_policy_redir(struct sk_msg_md *msg)
+{
+ int two = 2;
+
+ bpf_msg_apply_bytes(msg, apply_bytes);
+ return bpf_msg_redirect_map(msg, &sock_map, two, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_redir.c b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
new file mode 100644
index 000000000000..34d9f4f2f0a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_map, sock_map;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_hash, sock_hash;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, unsigned int);
+} verdict_map;
+
+/* Set by user space */
+int redirect_type;
+int redirect_flags;
+
+#define redirect_map(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_map, \
+ struct sk_msg_md * : bpf_msg_redirect_map \
+ )((__data), &sock_map, (__u32){0}, redirect_flags)
+
+#define redirect_hash(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_hash, \
+ struct sk_msg_md * : bpf_msg_redirect_hash \
+ )((__data), &sock_hash, &(__u32){0}, redirect_flags)
+
+#define DEFINE_PROG(__type, __param) \
+SEC("sk_" XSTR(__type)) \
+int prog_ ## __type ## _verdict(__param data) \
+{ \
+ unsigned int *count; \
+ int verdict; \
+ \
+ if (redirect_type == BPF_MAP_TYPE_SOCKMAP) \
+ verdict = redirect_map(data); \
+ else if (redirect_type == BPF_MAP_TYPE_SOCKHASH) \
+ verdict = redirect_hash(data); \
+ else \
+ verdict = redirect_type - __MAX_BPF_MAP_TYPE; \
+ \
+ count = bpf_map_lookup_elem(&verdict_map, &verdict); \
+ if (count) \
+ (*count)++; \
+ \
+ return verdict; \
+}
+
+DEFINE_PROG(skb, struct __sk_buff *);
+DEFINE_PROG(msg, struct sk_msg_md *);
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
new file mode 100644
index 000000000000..dde3d5bec515
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+int verdict_max_size = 10000;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ __u32 one = 1;
+
+ if (skb->len > verdict_max_size)
+ return SK_PASS;
+
+ return bpf_sk_redirect_map(skb, &sock_map, one, 0);
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_pass(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser_partial(struct __sk_buff *skb)
+{
+ /* agreement with the test program on a 4-byte size header
+ * and 6-byte body.
+ */
+ if (skb->len < 4) {
+ /* need more header to determine full length */
+ return 0;
+ }
+ /* return full length decoded from header.
+ * the return value may be larger than skb->len which
+ * means framework must wait body coming.
+ */
+ return 10;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
index 1c8b678e2e9a..f678ee6bd7ea 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -245,4 +245,73 @@ int lock_global_subprog_call2(struct __sk_buff *ctx)
return ret;
}
+int __noinline
+global_subprog_int(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog_int(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_sleepable_helper_subprog(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_sleepable_kfunc_subprog(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_subprog_calling_sleepable_global(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
index e2a21fbd4e44..dcac69f5928a 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
@@ -21,7 +21,7 @@ static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
SEC("fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
{
- *(volatile long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
triggered++;
@@ -31,7 +31,7 @@ int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
SEC("fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
{
- *(volatile long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
triggered++;
@@ -41,7 +41,7 @@ int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
SEC("fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret)
{
- *(volatile long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
triggered++;
diff --git a/tools/testing/selftests/bpf/progs/test_task_local_data.c b/tools/testing/selftests/bpf/progs/test_task_local_data.c
new file mode 100644
index 000000000000..fffafc013044
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_local_data.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_local_data.bpf.h"
+
+struct tld_keys {
+ tld_key_t value0;
+ tld_key_t value1;
+ tld_key_t value2;
+ tld_key_t value_not_exist;
+};
+
+struct test_tld_struct {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+ __u64 d;
+};
+
+int test_value0;
+int test_value1;
+struct test_tld_struct test_value2;
+
+SEC("syscall")
+int task_main(void *ctx)
+{
+ struct tld_object tld_obj;
+ struct test_tld_struct *struct_p;
+ struct task_struct *task;
+ int err, *int_p;
+
+ task = bpf_get_current_task_btf();
+ err = tld_object_init(task, &tld_obj);
+ if (err)
+ return 1;
+
+ int_p = tld_get_data(&tld_obj, value0, "value0", sizeof(int));
+ if (int_p)
+ test_value0 = *int_p;
+ else
+ return 2;
+
+ int_p = tld_get_data(&tld_obj, value1, "value1", sizeof(int));
+ if (int_p)
+ test_value1 = *int_p;
+ else
+ return 3;
+
+ struct_p = tld_get_data(&tld_obj, value2, "value2", sizeof(struct test_tld_struct));
+ if (struct_p)
+ test_value2 = *struct_p;
+ else
+ return 4;
+
+ int_p = tld_get_data(&tld_obj, value_not_exist, "value_not_exist", sizeof(int));
+ if (int_p)
+ return 5;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
index 7e750309ce27..0b74b8bd22e8 100644
--- a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -49,7 +49,7 @@ out:
}
SEC("lsm.s/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct cgroup *cgrp = NULL;
struct task_struct *task;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
index 28edafe803f0..fcba8299f0bc 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/pkt_cls.h>
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
long change_tail_ret = 1;
@@ -94,7 +94,7 @@ int change_tail(struct __sk_buff *skb)
bpf_skb_change_tail(skb, len, 0);
return TCX_PASS;
} else if (payload[0] == 'E') { /* Error */
- change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+ change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
return TCX_PASS;
} else if (payload[0] == 'Z') { /* Zero */
change_tail_ret = bpf_skb_change_tail(skb, 0, 0);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
index 950a70b61e74..4f6f03122d61 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -14,7 +14,6 @@
#define TIME_HORIZON_NS (2000 * 1000 * 1000)
#define NS_PER_SEC 1000000000
#define ECN_HORIZON_NS 5000000
-#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
/* flow_key => last_tstamp timestamp used */
struct {
@@ -24,12 +23,13 @@ struct {
__uint(max_entries, 1);
} flow_map SEC(".maps");
+__uint64_t target_rate;
+
static inline int throttle_flow(struct __sk_buff *skb)
{
int key = 0;
uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
- uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
- THROTTLE_RATE_BPS;
+ uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC / target_rate;
uint64_t now = bpf_ktime_get_ns();
uint64_t tstamp, next_tstamp = 0;
@@ -70,7 +70,7 @@ static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
if ((void *)(tcp + 1) > data_end)
return TC_ACT_SHOT;
- if (tcp->dest == bpf_htons(9000))
+ if (tcp->source == bpf_htons(9000))
return throttle_flow(skb);
return TC_ACT_OK;
@@ -99,7 +99,8 @@ static inline int handle_ipv4(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("cls_test") int tc_prog(struct __sk_buff *skb)
+SEC("tc")
+int tc_prog(struct __sk_buff *skb)
{
if (skb->protocol == bpf_htons(ETH_P_IP))
return handle_ipv4(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 404124a93892..7330c61b5730 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -2,23 +2,11 @@
/* In-place tunneling */
-#include <stdbool.h>
-#include <string.h>
-
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/mpls.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/pkt_cls.h>
-#include <linux/types.h>
+#include <vmlinux.h>
-#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
#include "bpf_compiler.h"
#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@@ -27,6 +15,14 @@ static const int cfg_port = 8000;
static const int cfg_udp_src = 20000;
+#define ETH_P_MPLS_UC 0x8847
+#define ETH_P_TEB 0x6558
+
+#define MPLS_LS_S_MASK 0x00000100
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) \
+ (((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+
#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
#define UDP_PORT 5555
@@ -36,10 +32,9 @@ static const int cfg_udp_src = 20000;
#define EXTPROTO_VXLAN 0x1
-#define VXLAN_N_VID (1u << 24)
-#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
-#define VXLAN_FLAGS 0x8
-#define VXLAN_VNI 1
+#define VXLAN_FLAGS bpf_htonl(1<<27)
+#define VNI_ID 1
+#define VXLAN_VNI bpf_htonl(VNI_ID << 8)
#ifndef NEXTHDR_DEST
#define NEXTHDR_DEST 60
@@ -48,12 +43,6 @@ static const int cfg_udp_src = 20000;
/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
MPLS_LS_S_MASK | 0xff);
-
-struct vxlanhdr {
- __be32 vx_flags;
- __be32 vx_vni;
-} __attribute__((packed));
-
struct gre_hdr {
__be16 flags;
__be16 protocol;
@@ -94,8 +83,8 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto, __u16 ext_proto)
{
+ struct iphdr iph_inner = {0};
__u16 udp_dst = UDP_PORT;
- struct iphdr iph_inner;
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
@@ -122,7 +111,6 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
/* Derive the IPv4 header fields from the IPv6 header */
- memset(&iph_inner, 0, sizeof(iph_inner));
iph_inner.version = 4;
iph_inner.ihl = 5;
iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
@@ -210,7 +198,7 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
- vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+ vxlan_hdr->vx_vni = VXLAN_VNI;
l2_hdr += sizeof(struct vxlanhdr);
}
@@ -340,7 +328,7 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
vxlan_hdr->vx_flags = VXLAN_FLAGS;
- vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+ vxlan_hdr->vx_vni = VXLAN_VNI;
l2_hdr += sizeof(struct vxlanhdr);
}
@@ -372,8 +360,8 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
static int encap_ipv6_ipip6(struct __sk_buff *skb)
{
+ struct v6hdr h_outer = {0};
struct iphdr iph_inner;
- struct v6hdr h_outer;
struct tcphdr tcph;
struct ethhdr eth;
__u64 flags;
@@ -400,13 +388,12 @@ static int encap_ipv6_ipip6(struct __sk_buff *skb)
return TC_ACT_SHOT;
/* prepare new outer network header */
- memset(&h_outer.ip, 0, sizeof(h_outer.ip));
h_outer.ip.version = 6;
h_outer.ip.hop_limit = iph_inner.ttl;
- h_outer.ip.saddr.s6_addr[1] = 0xfd;
- h_outer.ip.saddr.s6_addr[15] = 1;
- h_outer.ip.daddr.s6_addr[1] = 0xfd;
- h_outer.ip.daddr.s6_addr[15] = 2;
+ h_outer.ip.saddr.in6_u.u6_addr8[1] = 0xfd;
+ h_outer.ip.saddr.in6_u.u6_addr8[15] = 1;
+ h_outer.ip.daddr.in6_u.u6_addr8[1] = 0xfd;
+ h_outer.ip.daddr.in6_u.u6_addr8[15] = 2;
h_outer.ip.payload_len = iph_inner.tot_len;
h_outer.ip.nexthdr = IPPROTO_IPIP;
@@ -431,7 +418,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
return __encap_ipv6(skb, encap_proto, l2_proto, 0);
}
-SEC("encap_ipip_none")
+SEC("tc")
int __encap_ipip_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -440,7 +427,7 @@ int __encap_ipip_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_gre_none")
+SEC("tc")
int __encap_gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -449,7 +436,7 @@ int __encap_gre_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_gre_mpls")
+SEC("tc")
int __encap_gre_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -458,7 +445,7 @@ int __encap_gre_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_gre_eth")
+SEC("tc")
int __encap_gre_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -467,7 +454,7 @@ int __encap_gre_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_udp_none")
+SEC("tc")
int __encap_udp_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -476,7 +463,7 @@ int __encap_udp_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_udp_mpls")
+SEC("tc")
int __encap_udp_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -485,7 +472,7 @@ int __encap_udp_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_udp_eth")
+SEC("tc")
int __encap_udp_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -494,7 +481,7 @@ int __encap_udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_vxlan_eth")
+SEC("tc")
int __encap_vxlan_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -505,7 +492,7 @@ int __encap_vxlan_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_sit_none")
+SEC("tc")
int __encap_sit_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -514,7 +501,7 @@ int __encap_sit_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6tnl_none")
+SEC("tc")
int __encap_ip6tnl_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -523,7 +510,7 @@ int __encap_ip6tnl_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ipip6_none")
+SEC("tc")
int __encap_ipip6_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -532,7 +519,7 @@ int __encap_ipip6_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6gre_none")
+SEC("tc")
int __encap_ip6gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -541,7 +528,7 @@ int __encap_ip6gre_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6gre_mpls")
+SEC("tc")
int __encap_ip6gre_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -550,7 +537,7 @@ int __encap_ip6gre_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6gre_eth")
+SEC("tc")
int __encap_ip6gre_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -559,7 +546,7 @@ int __encap_ip6gre_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6udp_none")
+SEC("tc")
int __encap_ip6udp_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -568,7 +555,7 @@ int __encap_ip6udp_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6udp_mpls")
+SEC("tc")
int __encap_ip6udp_mpls(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -577,7 +564,7 @@ int __encap_ip6udp_mpls(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6udp_eth")
+SEC("tc")
int __encap_ip6udp_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -586,7 +573,7 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("encap_ip6vxlan_eth")
+SEC("tc")
int __encap_ip6vxlan_eth(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -693,7 +680,7 @@ static int decap_ipv6(struct __sk_buff *skb)
iph_outer.nexthdr);
}
-SEC("decap")
+SEC("tc")
int decap_f(struct __sk_buff *skb)
{
switch (skb->protocol) {
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index eb5cca1fce16..7d5293de1952 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -294,7 +294,9 @@ static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
(ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
goto err;
- if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
+ if (!ctx->attrs.wscale_ok ||
+ !ctx->attrs.snd_wscale ||
+ ctx->attrs.snd_wscale >= BPF_SYNCOOKIE_WSCALE_MASK)
goto err;
if (!ctx->attrs.tstamp_ok)
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
index 5f4e87ee949a..1ecdf4c54de4 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -14,10 +14,7 @@
#include <bpf/bpf_endian.h>
#define BPF_PROG_TEST_TCP_HDR_OPTIONS
#include "test_tcp_hdr_options.h"
-
-#ifndef sizeof_field
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-#endif
+#include "bpf_misc.h"
__u8 test_kind = TCPOPT_EXP;
__u16 test_magic = 0xeB9F;
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index 540181c115a8..ef00d38b0a8d 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -23,7 +23,6 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(max_entries, 2);
__type(key, int);
__type(value, __u32);
} perf_event_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
index 39ff06f2c834..cf0547a613ff 100644
--- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
+++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
@@ -6,14 +6,14 @@
#include "../test_kmods/bpf_testmod.h"
#include "bpf_misc.h"
-SEC("tp_btf/bpf_testmod_test_nullable_bare")
+SEC("tp_btf/bpf_testmod_test_nullable_bare_tp")
__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx)
{
return nullable_ctx->len;
}
-SEC("tp_btf/bpf_testmod_test_nullable_bare")
+SEC("tp_btf/bpf_testmod_test_nullable_bare_tp")
int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx)
{
if (nullable_ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c
index 896c88a4960d..12f4065fca20 100644
--- a/tools/testing/selftests/bpf/progs/test_uprobe.c
+++ b/tools/testing/selftests/bpf/progs/test_uprobe.c
@@ -59,3 +59,41 @@ int BPF_UPROBE(test4)
test4_result = 1;
return 0;
}
+
+#if defined(__TARGET_ARCH_x86)
+struct pt_regs regs;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ ctx->ax = regs.ax;
+ ctx->cx = regs.cx;
+ ctx->dx = regs.dx;
+ ctx->r8 = regs.r8;
+ ctx->r9 = regs.r9;
+ ctx->r10 = regs.r10;
+ ctx->r11 = regs.r11;
+ ctx->di = regs.di;
+ ctx->si = regs.si;
+ return 0;
+}
+
+unsigned long ip;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change_ip)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ ctx->ip = ip;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index 505aab9a5234..a78c87537b07 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -11,6 +11,7 @@ int usdt0_called;
u64 usdt0_cookie;
int usdt0_arg_cnt;
int usdt0_arg_ret;
+int usdt0_arg_size;
SEC("usdt")
int usdt0(struct pt_regs *ctx)
@@ -26,6 +27,7 @@ int usdt0(struct pt_regs *ctx)
usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
/* should return -ENOENT for any arg_num */
usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+ usdt0_arg_size = bpf_usdt_arg_size(ctx, bpf_get_prandom_u32());
return 0;
}
@@ -34,6 +36,7 @@ u64 usdt3_cookie;
int usdt3_arg_cnt;
int usdt3_arg_rets[3];
u64 usdt3_args[3];
+int usdt3_arg_sizes[3];
SEC("usdt//proc/self/exe:test:usdt3")
int usdt3(struct pt_regs *ctx)
@@ -50,12 +53,15 @@ int usdt3(struct pt_regs *ctx)
usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
usdt3_args[0] = (int)tmp;
+ usdt3_arg_sizes[0] = bpf_usdt_arg_size(ctx, 0);
usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
usdt3_args[1] = (long)tmp;
+ usdt3_arg_sizes[1] = bpf_usdt_arg_size(ctx, 1);
usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
usdt3_args[2] = (uintptr_t)tmp;
+ usdt3_arg_sizes[2] = bpf_usdt_arg_size(ctx, 2);
return 0;
}
@@ -64,12 +70,15 @@ int usdt12_called;
u64 usdt12_cookie;
int usdt12_arg_cnt;
u64 usdt12_args[12];
+int usdt12_arg_sizes[12];
SEC("usdt//proc/self/exe:test:usdt12")
int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
long a6, __u64 a7, uintptr_t a8, int a9, short a10,
short a11, signed char a12)
{
+ int i;
+
if (my_pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
@@ -90,6 +99,42 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
usdt12_args[9] = a10;
usdt12_args[10] = a11;
usdt12_args[11] = a12;
+
+ bpf_for(i, 0, 12) {
+ usdt12_arg_sizes[i] = bpf_usdt_arg_size(ctx, i);
+ }
+
+ return 0;
+}
+
+int usdt_sib_called;
+u64 usdt_sib_cookie;
+int usdt_sib_arg_cnt;
+int usdt_sib_arg_ret;
+short usdt_sib_arg;
+int usdt_sib_arg_size;
+
+/*
+ * usdt_sib is only tested on x86-related architectures, so it requires
+ * manual attach since auto-attach will panic tests under other architectures
+ */
+SEC("usdt")
+int usdt_sib(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt_sib_called, 1);
+
+ usdt_sib_cookie = bpf_usdt_cookie(ctx);
+ usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
+ usdt_sib_arg = (short)tmp;
+ usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index 12034a73ee2d..ff8d755548b9 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -17,7 +17,7 @@
#define MAX_SIG_SIZE 1024
__u32 monitored_pid;
-__u32 user_keyring_serial;
+__s32 user_keyring_serial;
__u64 system_keyring_id;
struct data {
@@ -37,7 +37,7 @@ struct {
char _license[] SEC("license") = "GPL";
SEC("lsm.s/bpf")
-int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct bpf_dynptr data_ptr, sig_ptr;
struct data *data_val;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index dc74d8cf9e3f..5904f45cfbc4 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -19,7 +19,9 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
/* Data length determine test case */
if (data_len == 54) { /* sizeof(pkt_v4) */
- offset = 4096; /* test too large offset */
+ offset = 4096; /* test too large offset, 4k page size */
+ } else if (data_len == 53) { /* sizeof(pkt_v4) - 1 */
+ offset = 65536; /* test too large offset, 64k page size */
} else if (data_len == 74) { /* sizeof(pkt_v6) */
offset = 40;
} else if (data_len == 64) {
@@ -31,6 +33,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
offset = 10;
} else if (data_len == 9001) {
offset = 4096;
+ } else if (data_len == 90000) {
+ offset = 10; /* test a small offset, 64k page size */
+ } else if (data_len == 90001) {
+ offset = 65536; /* test too large offset, 64k page size */
} else {
return XDP_ABORTED; /* No matching test */
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
new file mode 100644
index 000000000000..814e2a980e97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+SEC("xdp")
+int xdp_devmap(struct xdp_md *ctx)
+{
+ return ctx->egress_ifindex;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, int (void *));
+} xdp_map SEC(".maps") = {
+ .values = {
+ [0] = (void *)&xdp_devmap,
+ },
+};
+
+SEC("xdp")
+int xdp_entry(struct xdp_md *ctx)
+{
+ bpf_tail_call(ctx, &xdp_map, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index fe2d71ae0e71..0a0f371a2dec 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -1,40 +1,320 @@
+#include <stdbool.h>
#include <linux/bpf.h>
+#include <linux/errno.h>
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_kfuncs.h"
+
+#define META_SIZE 32
-#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
-#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
+/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta.
+ *
+ * The XDP program extracts a fixed-size payload following the Ethernet header
+ * and stores it as packet metadata to test the driver's metadata support. The
+ * TC program then verifies if the passed metadata is correct.
+ */
+
+bool test_pass;
+
+static const __u8 smac_want[ETH_ALEN] = {
+ 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF,
+};
+
+static const __u8 meta_want[META_SIZE] = {
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+};
+
+static bool check_smac(const struct ethhdr *eth)
+{
+ return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN);
+}
+
+static bool check_metadata(const char *file, int line, __u8 *meta_have)
+{
+ if (!__builtin_memcmp(meta_have, meta_want, META_SIZE))
+ return true;
+
+ bpf_stream_printk(BPF_STREAM_STDERR,
+ "FAIL:%s:%d: metadata mismatch\n"
+ " have:\n %pI6\n %pI6\n"
+ " want:\n %pI6\n %pI6\n",
+ file, line,
+ &meta_have[0x00], &meta_have[0x10],
+ &meta_want[0x00], &meta_want[0x10]);
+ return false;
+}
+
+#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have)
+
+static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb)
+{
+ __u8 *data_meta = ctx_ptr(skb, data_meta);
+ __u8 *data = ctx_ptr(skb, data);
+
+ return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta);
+}
+
+#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb)
+
SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
- __u8 *data, *data_meta, *data_end;
- __u32 diff = 0;
+ __u8 *meta_have = ctx_ptr(ctx, data_meta);
+ __u8 *data = ctx_ptr(ctx, data);
- data_meta = ctx_ptr(ctx, data_meta);
- data_end = ctx_ptr(ctx, data_end);
- data = ctx_ptr(ctx, data);
+ if (meta_have + META_SIZE > data)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/* Read from metadata using bpf_dynptr_read helper */
+SEC("tc")
+int ing_cls_dynptr_read(struct __sk_buff *ctx)
+{
+ __u8 meta_have[META_SIZE];
+ struct bpf_dynptr meta;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+
+ if (!check_metadata(meta_have))
+ goto out;
- if (data + ETH_ALEN > data_end ||
- data_meta + round_up(ETH_ALEN, 4) > data)
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using bpf_dynptr_write helper */
+SEC("tc")
+int ing_cls_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
return TC_ACT_SHOT;
- diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
- diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, META_SIZE, 0);
- return diff ? TC_ACT_SHOT : TC_ACT_OK;
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read from metadata using read-only dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ __u8 *meta_have;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
+ if (!meta_have)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using writeable dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src, *dst;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(dst, src, META_SIZE);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read skb metadata in chunks from various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
+{
+ const __u32 chunk_len = META_SIZE / 4;
+ __u8 meta_have[META_SIZE];
+ struct bpf_dynptr meta;
+ __u8 *dst, *src;
+
+ dst = meta_have;
+
+ /* 1. Regular read */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 2. Read from an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 3. Read at an offset */
+ bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0);
+ dst += chunk_len;
+
+ /* 4. Read from a slice starting at an offset */
+ src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!src)
+ goto out;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/* Write skb metadata in chunks at various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx)
+{
+ const __u32 chunk_len = META_SIZE / 4;
+ __u8 payload[META_SIZE];
+ struct bpf_dynptr meta;
+ __u8 *dst, *src;
+
+ bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload));
+ src = payload;
+
+ /* 1. Regular write */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 2. Write to an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 3. Write at an offset */
+ bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 4. Write to a slice starting at an offset */
+ dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!dst)
+ return TC_ACT_SHOT;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Pass an OOB offset to dynptr read, write, adjust, slice. */
+SEC("tc")
+int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ __u8 md, *p;
+ int err;
+
+ err = bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (err)
+ goto fail;
+
+ /* read offset OOB */
+ err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* write offset OOB */
+ err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* adjust end offset OOB */
+ err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* adjust start offset OOB */
+ err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* slice offset OOB */
+ p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ /* slice rdwr offset OOB */
+ p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ return TC_ACT_UNSPEC;
+fail:
+ return TC_ACT_SHOT;
+}
+
+/* Reserve and clear space for metadata but don't populate it */
+SEC("xdp")
+int ing_xdp_zalloc_meta(struct xdp_md *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+ __u8 *meta;
+ int ret;
+
+ /* Drop any non-test packets */
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ return XDP_DROP;
+ if (!check_smac(eth))
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
+ if (ret < 0)
+ return XDP_DROP;
+
+ meta = ctx_ptr(ctx, data_meta);
+ if (meta + META_SIZE > ctx_ptr(ctx, data))
+ return XDP_DROP;
+
+ __builtin_memset(meta, 0, META_SIZE);
+
+ return XDP_PASS;
}
SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
- __u8 *data, *data_meta, *data_end;
+ __u8 *data, *data_meta, *data_end, *payload;
+ struct ethhdr *eth;
int ret;
- ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
+ ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
if (ret < 0)
return XDP_DROP;
@@ -42,12 +322,352 @@ int ing_xdp(struct xdp_md *ctx)
data_end = ctx_ptr(ctx, data_end);
data = ctx_ptr(ctx, data);
- if (data + ETH_ALEN > data_end ||
- data_meta + round_up(ETH_ALEN, 4) > data)
+ eth = (struct ethhdr *)data;
+ payload = data + sizeof(struct ethhdr);
+
+ if (payload + META_SIZE > data_end ||
+ data_meta + META_SIZE > data)
return XDP_DROP;
- __builtin_memcpy(data_meta, data, ETH_ALEN);
+ /* The Linux networking stack may send other packets on the test
+ * interface that interfere with the test. Just drop them.
+ * The test packets can be recognized by their source MAC address.
+ */
+ if (!check_smac(eth))
+ return XDP_DROP;
+
+ __builtin_memcpy(data_meta, payload, META_SIZE);
return XDP_PASS;
}
+/*
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _payload_ using packet pointers.
+ */
+SEC("tc")
+int clone_data_meta_survives_data_write(struct __sk_buff *ctx)
+{
+ __u8 *meta_have = ctx_ptr(ctx, data_meta);
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ if (meta_have + META_SIZE > eth)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ /* Packet write to trigger unclone in prologue */
+ eth->h_proto = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _metadata_ using packet pointers.
+ */
+SEC("tc")
+int clone_data_meta_survives_meta_write(struct __sk_buff *ctx)
+{
+ __u8 *meta_have = ctx_ptr(ctx, data_meta);
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ if (meta_have + META_SIZE > eth)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ /* Metadata write to trigger unclone in prologue */
+ *meta_have = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates a r/w slice to packet _payload_.
+ */
+SEC("tc")
+int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 meta_have[META_SIZE];
+ struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates an r/w slice to packet _metadata_.
+ */
+SEC("tc")
+int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+ __u8 *meta_have;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+ if (!meta_have)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _payload_ using dynptr_write helper and metadata
+ * remains intact before and after the write.
+ */
+SEC("tc")
+int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 meta_have[META_SIZE];
+ const struct ethhdr *eth;
+ int err;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ /* Expect read-write metadata before unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta))
+ goto out;
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ /* Helper write to payload will unclone the packet */
+ bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _metadata_ using dynptr_write helper and
+ * metadata remains intact before and after the write.
+ */
+SEC("tc")
+int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 meta_have[META_SIZE];
+ const struct ethhdr *eth;
+ int err;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (!check_smac(eth))
+ goto out;
+
+ /* Expect read-write metadata before unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta))
+ goto out;
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ /* Helper write to metadata will unclone the packet */
+ bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0);
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_vlan_push_pop(struct __sk_buff *ctx)
+{
+ int err;
+
+ /* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only
+ * secondary tag push triggers an actual MAC header modification.
+ */
+ err = bpf_skb_vlan_push(ctx, 0, 42);
+ if (err)
+ goto out;
+ err = bpf_skb_vlan_push(ctx, 0, 207);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ err = bpf_skb_vlan_pop(ctx);
+ if (err)
+ goto out;
+ err = bpf_skb_vlan_pop(ctx);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_adjust_room(struct __sk_buff *ctx)
+{
+ int err;
+
+ /* Grow a 1 byte hole after the MAC header */
+ err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Shrink a 1 byte hole after the MAC header */
+ err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Grow a 256 byte hole to trigger head reallocation */
+ err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_head_tail(struct __sk_buff *ctx)
+{
+ int err;
+
+ /* Reserve 1 extra in the front for packet data */
+ err = bpf_skb_change_head(ctx, 1, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Reserve 256 extra bytes in the front to trigger head reallocation */
+ err = bpf_skb_change_head(ctx, 256, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Reserve 4k extra bytes in the back to trigger head reallocation */
+ err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_proto(struct __sk_buff *ctx)
+{
+ int err;
+
+ err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
new file mode 100644
index 000000000000..c41a21413eaa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+int xdpf_sz;
+int sinfo_sz;
+int data_len;
+int pull_len;
+
+#define XDP_PACKET_HEADROOM 256
+
+SEC("xdp.frags")
+int xdp_find_sizes(struct xdp_md *ctx)
+{
+ xdpf_sz = sizeof(struct xdp_frame);
+ sinfo_sz = __PAGE_SIZE - XDP_PACKET_HEADROOM -
+ (ctx->data_end - ctx->data);
+
+ return XDP_PASS;
+}
+
+SEC("xdp.frags")
+int xdp_pull_data_prog(struct xdp_md *ctx)
+{
+ __u8 *data_end = (void *)(long)ctx->data_end;
+ __u8 *data = (void *)(long)ctx->data;
+ __u8 *val_p;
+ int err;
+
+ if (data_len != data_end - data)
+ return XDP_DROP;
+
+ err = bpf_xdp_pull_data(ctx, pull_len);
+ if (err)
+ return XDP_DROP;
+
+ val_p = (void *)(long)ctx->data + 1024;
+ if (val_p + 1 > (void *)(long)ctx->data_end)
+ return XDP_DROP;
+
+ if (*val_p != 0xbb)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index a7588302268d..a80cc5f2f4f2 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -102,8 +102,8 @@ bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
#define TESTVLAN 4011 /* 0xFAB */
// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */
-SEC("xdp_drop_vlan_4011")
-int xdp_prognum0(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_drop_vlan_4011(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -144,8 +144,8 @@ Load prog with ip tool:
/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
#define TO_VLAN 0
-SEC("xdp_vlan_change")
-int xdp_prognum1(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_change(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -178,8 +178,8 @@ int xdp_prognum1(struct xdp_md *ctx)
#endif
#define VLAN_HDR_SZ 4 /* bytes */
-SEC("xdp_vlan_remove_outer")
-int xdp_prognum2(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -224,8 +224,8 @@ void shift_mac_4bytes_32bit(void *data)
p[1] = p[0];
}
-SEC("xdp_vlan_remove_outer2")
-int xdp_prognum3(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer2(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -254,8 +254,8 @@ int xdp_prognum3(struct xdp_md *ctx)
* The TC-clsact eBPF programs (currently) need to be attach via TC commands
*/
-SEC("tc_vlan_push")
-int _tc_progA(struct __sk_buff *ctx)
+SEC("tc")
+int tc_vlan_push(struct __sk_buff *ctx)
{
bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
diff --git a/tools/testing/selftests/bpf/progs/timer_interrupt.c b/tools/testing/selftests/bpf/progs/timer_interrupt.c
new file mode 100644
index 000000000000..19180a455f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_interrupt.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define CLOCK_MONOTONIC 1
+
+int preempt_count;
+int in_interrupt;
+int in_interrupt_cb;
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+static int timer_in_interrupt(void *map, int *key, struct bpf_timer *timer)
+{
+ preempt_count = get_preempt_count();
+ in_interrupt_cb = bpf_in_interrupt();
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test_timer_interrupt)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&array, &key);
+ if (!timer)
+ return 0;
+
+ in_interrupt = bpf_in_interrupt();
+ bpf_timer_init(timer, &array, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(timer, timer_in_interrupt);
+ bpf_timer_start(timer, 0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c
index d41665d2ec8c..65e485c4468c 100644
--- a/tools/testing/selftests/bpf/progs/tracing_failure.c
+++ b/tools/testing/selftests/bpf/progs/tracing_failure.c
@@ -18,3 +18,15 @@ int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock)
{
return 0;
}
+
+SEC("?fentry/__rcu_read_lock")
+int BPF_PROG(tracing_deny)
+{
+ return 0;
+}
+
+SEC("?fexit/do_exit")
+int BPF_PROG(fexit_noreturns)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
index c435a3a8328a..d460732e2023 100644
--- a/tools/testing/selftests/bpf/progs/tracing_struct.c
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -18,6 +18,18 @@ struct bpf_testmod_struct_arg_3 {
int b[];
};
+union bpf_testmod_union_arg_1 {
+ char a;
+ short b;
+ struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+ int a;
+ long b;
+ struct bpf_testmod_struct_arg_2 arg;
+};
+
long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
@@ -26,6 +38,9 @@ long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
long t5_ret;
int t6;
+long ut1_a_a, ut1_b, ut1_c;
+long ut2_a, ut2_b_a, ut2_b_b;
+
SEC("fentry/bpf_testmod_test_struct_arg_1")
int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
{
@@ -130,4 +145,22 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a)
return 0;
}
+SEC("fexit/bpf_testmod_test_union_arg_1")
+int BPF_PROG2(test_union_arg_1, union bpf_testmod_union_arg_1, a, int, b, int, c)
+{
+ ut1_a_a = a.arg.a;
+ ut1_b = b;
+ ut1_c = c;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_union_arg_2")
+int BPF_PROG2(test_union_arg_2, int, a, union bpf_testmod_union_arg_2, b)
+{
+ ut2_a = a;
+ ut2_b_a = b.arg.a;
+ ut2_b_b = b.arg.b;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 044a6d78923e..2898b3749d07 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -42,12 +42,14 @@ int bench_trigger_uprobe_multi(void *ctx)
const volatile int batch_iters = 0;
SEC("?raw_tp")
-int trigger_count(void *ctx)
+int trigger_kernel_count(void *ctx)
{
int i;
- for (i = 0; i < batch_iters; i++)
+ for (i = 0; i < batch_iters; i++) {
inc_counter();
+ bpf_get_numa_node_id();
+ }
return 0;
}
@@ -97,6 +99,12 @@ int bench_trigger_kprobe_multi(void *ctx)
return 0;
}
+SEC("?kprobe.multi/bpf_get_numa_node_id")
+int bench_kprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
SEC("?kretprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kretprobe_multi(void *ctx)
{
@@ -104,6 +112,12 @@ int bench_trigger_kretprobe_multi(void *ctx)
return 0;
}
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
+int bench_kretprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
SEC("?fentry/bpf_get_numa_node_id")
int bench_trigger_fentry(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall.c b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
index 8a4fa6c7ef59..e08c31669e5a 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
@@ -7,8 +7,8 @@ struct pt_regs regs;
char _license[] SEC("license") = "GPL";
-SEC("uretprobe//proc/self/exe:uretprobe_regs_trigger")
-int uretprobe(struct pt_regs *ctx)
+SEC("uprobe")
+int probe(struct pt_regs *ctx)
{
__builtin_memcpy(&regs, ctx, sizeof(regs));
return 0;
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
index 0d7f1a7db2e2..915d38591bf6 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/usdt.bpf.h>
#include <string.h>
struct pt_regs regs;
@@ -8,10 +10,64 @@ struct pt_regs regs;
char _license[] SEC("license") = "GPL";
int executed = 0;
+int pid;
+
+SEC("uprobe")
+int BPF_UPROBE(test_uprobe)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uretprobe")
+int BPF_URETPROBE(test_uretprobe)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uprobe.multi")
+int test_uprobe_multi(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
SEC("uretprobe.multi")
-int test(struct pt_regs *regs)
+int test_uretprobe_multi(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uprobe.session")
+int test_uprobe_session(struct pt_regs *ctx)
{
- executed = 1;
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("usdt")
+int test_usdt(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/uretprobe_stack.c b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
index 9fdcf396b8f4..a2951e2f1711 100644
--- a/tools/testing/selftests/bpf/progs/uretprobe_stack.c
+++ b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
@@ -26,8 +26,8 @@ int usdt_len;
SEC("uprobe//proc/self/exe:target_1")
int BPF_UPROBE(uprobe_1)
{
- /* target_1 is recursive wit depth of 2, so we capture two separate
- * stack traces, depending on which occurence it is
+ /* target_1 is recursive with depth of 2, so we capture two separate
+ * stack traces, depending on which occurrence it is
*/
static bool recur = false;
diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c
index e97e518516b6..2b4fdca162be 100644
--- a/tools/testing/selftests/bpf/progs/verifier_and.c
+++ b/tools/testing/selftests/bpf/progs/verifier_and.c
@@ -85,8 +85,14 @@ l0_%=: r0 = r0; \
SEC("socket")
__description("check known subreg with unknown reg")
-__success __failure_unpriv __msg_unpriv("R1 !read_ok")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w0 < 0x1 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R1 !read_ok'` */
+__xlated_unpriv("goto pc-1") /* `r1 = *(u32*)(r1 + 512)`, sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void known_subreg_with_unknown_reg(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 67509c5d3982..7f4827eede3c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -3,6 +3,7 @@
#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
@@ -114,6 +115,111 @@ int basic_alloc3(void *ctx)
return 0;
}
+SEC("syscall")
+__success __retval(0)
+int basic_reserve1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page)
+ return 1;
+
+ page += __PAGE_SIZE;
+
+ /* Reserve the second page */
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 2;
+
+ /* Try to explicitly allocate the reserved page. */
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 3;
+
+ /* Try to implicitly allocate the page (since there's only 2 of them). */
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (page)
+ return 4;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_reserve2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+ if ((u64)page)
+ return 2;
+#endif
+ return 0;
+}
+
+/* Reserve the same page twice, should return -EBUSY. */
+SEC("syscall")
+__success __retval(0)
+int reserve_twice(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret)
+ return 1;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 1);
+ if (ret != -EBUSY)
+ return 2;
+#endif
+ return 0;
+}
+
+/* Try to reserve past the end of the arena. */
+SEC("syscall")
+__success __retval(0)
+int reserve_invalid_region(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *page;
+ int ret;
+
+ /* Try a NULL pointer. */
+ ret = bpf_arena_reserve_pages(&arena, NULL, 3);
+ if (ret != -EINVAL)
+ return 1;
+
+ page = arena_base(&arena);
+
+ ret = bpf_arena_reserve_pages(&arena, page, 3);
+ if (ret != -EINVAL)
+ return 2;
+
+ ret = bpf_arena_reserve_pages(&arena, page, 4096);
+ if (ret != -EINVAL)
+ return 3;
+
+ ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
+ if (ret != -EINVAL)
+ return 4;
+#endif
+ return 0;
+}
+
SEC("iter.s/bpf_map")
__success __log_level(2)
int iter_maps1(struct bpf_iter__bpf_map *ctx)
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
index f94f30cf1bb8..f19e15400b3e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -67,6 +67,104 @@ int big_alloc1(void *ctx)
return 0;
}
+/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */
+SEC("syscall")
+__success __retval(0)
+int access_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page;
+ char __arena *base;
+ const size_t len = 4;
+ int ret, i;
+
+ /* Get a separate region of the arena. */
+ page = base = arena_base(&arena) + 16384 * PAGE_SIZE;
+
+ ret = bpf_arena_reserve_pages(&arena, base, len);
+ if (ret)
+ return 1;
+
+ /* Try to dirty reserved memory. */
+ for (i = 0; i < len && can_loop; i++)
+ *page = 0x5a;
+
+ for (i = 0; i < len && can_loop; i++) {
+ page = (volatile char __arena *)(base + i * PAGE_SIZE);
+
+ /*
+ * Error out in case either the write went through,
+ * or the address has random garbage.
+ */
+ if (*page == 0x5a)
+ return 2 + 2 * i;
+
+ if (*page)
+ return 2 + 2 * i + 1;
+ }
+#endif
+ return 0;
+}
+
+/* Try to allocate a region overlapping with a reservation. */
+SEC("syscall")
+__success __retval(0)
+int request_partially_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page;
+ char __arena *base;
+ int ret;
+
+ /* Add an arbitrary page offset. */
+ page = base = arena_base(&arena) + 4096 * __PAGE_SIZE;
+
+ ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4);
+ if (ret)
+ return 1;
+
+ page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0);
+ if ((u64)page != 0ULL)
+ return 2;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int free_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ char __arena *addr;
+ char __arena *page;
+ int ret;
+
+ /* Add an arbitrary page offset. */
+ addr = arena_base(&arena) + 32768 * __PAGE_SIZE;
+
+ page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0);
+ if (!page)
+ return 1;
+
+ ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2);
+ if (ret)
+ return 2;
+
+ /*
+ * Reserved and allocated pages should be interchangeable for
+ * bpf_arena_free_pages(). Free a reserved and an allocated
+ * page with a single call.
+ */
+ bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2);
+
+ /* The free call above should have succeeded, so this allocation should too. */
+ page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0);
+ if (!page)
+ return 3;
+#endif
+ return 0;
+}
+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
#define PAGE_CNT 100
__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */
@@ -142,6 +240,7 @@ int big_alloc2(void *ctx)
return 5;
bpf_arena_free_pages(&arena, (void __arena *)pg, 2);
page[i] = NULL;
+ barrier();
page[i + 1] = NULL;
cond_break;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c
index 29eb9568633f..0a187ff725cc 100644
--- a/tools/testing/selftests/bpf/progs/verifier_array_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c
@@ -713,4 +713,19 @@ unsigned int non_stack_key_lookup(void)
return val->index;
}
+SEC("socket")
+__description("doesn't reject UINT64_MAX as s64 for irrelevant maps")
+__success __retval(42)
+unsigned int doesnt_reject_irrelevant_maps(void)
+{
+ __u64 key = 0xFFFFFFFFFFFFFFFF;
+ struct test_val *val;
+
+ val = bpf_map_lookup_elem(&map_hash_48b, &key);
+ if (val)
+ return val->index;
+
+ return 42;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
new file mode 100644
index 000000000000..7efa9521105e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Timer tests */
+
+struct timer_elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct timer_elem);
+} timer_map SEC(".maps");
+
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ u32 data;
+ /* Timer callbacks are never sleepable, even from non-sleepable programs */
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__failure __msg("helper call might sleep in a non-sleepable prog")
+int timer_non_sleepable_prog(void *ctx)
+{
+ struct timer_elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&timer_map, &key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->t, &timer_map, 0);
+ bpf_timer_set_callback(&val->t, timer_cb);
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("helper call might sleep in a non-sleepable prog")
+int timer_sleepable_prog(void *ctx)
+{
+ struct timer_elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&timer_map, &key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->t, &timer_map, 0);
+ bpf_timer_set_callback(&val->t, timer_cb);
+ return 0;
+}
+
+/* Workqueue tests */
+
+struct wq_elem {
+ struct bpf_wq w;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct wq_elem);
+} wq_map SEC(".maps");
+
+static int wq_cb(void *map, int *key, void *value)
+{
+ u32 data;
+ /* Workqueue callbacks are always sleepable, even from non-sleepable programs */
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__success
+int wq_non_sleepable_prog(void *ctx)
+{
+ struct wq_elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&wq_map, &key);
+ if (!val)
+ return 0;
+
+ if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
+ return 0;
+ if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+ return 0;
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__success
+int wq_sleepable_prog(void *ctx)
+{
+ struct wq_elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&wq_map, &key);
+ if (!val)
+ return 0;
+
+ if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
+ return 0;
+ if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+ return 0;
+ return 0;
+}
+
+/* Task work tests */
+
+struct task_work_elem {
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct task_work_elem);
+} task_work_map SEC(".maps");
+
+static int task_work_cb(struct bpf_map *map, void *key, void *value)
+{
+ u32 data;
+ /* Task work callbacks are always sleepable, even from non-sleepable programs */
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__success
+int task_work_non_sleepable_prog(void *ctx)
+{
+ struct task_work_elem *val;
+ struct task_struct *task;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&task_work_map, &key);
+ if (!val)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
+
+ bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+ return 0;
+}
+
+SEC("lsm.s/file_open")
+__success
+int task_work_sleepable_prog(void *ctx)
+{
+ struct task_work_elem *val;
+ struct task_struct *task;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&task_work_map, &key);
+ if (!val)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
+
+ bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 0eb33bb801b5..411a18437d7e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -2,6 +2,7 @@
/* Converted from tools/testing/selftests/bpf/verifier/bounds.c */
#include <linux/bpf.h>
+#include <../../../include/linux/filter.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
@@ -620,8 +621,14 @@ l1_%=: exit; \
SEC("socket")
__description("bounds check mixed 32bit and 64bit arithmetic. test1")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
__naked void _32bit_and_64bit_arithmetic_test1(void)
{
asm volatile (" \
@@ -643,8 +650,14 @@ l1_%=: exit; \
SEC("socket")
__description("bounds check mixed 32bit and 64bit arithmetic. test2")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
__naked void _32bit_and_64bit_arithmetic_test2(void)
{
asm volatile (" \
@@ -691,9 +704,14 @@ l0_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg = 0, reg xor 1")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg_0_reg_xor_1(void)
{
asm volatile (" \
@@ -719,9 +737,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg32 = 0, reg32 xor 1")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg32_0_reg32_xor_1(void)
{
asm volatile (" \
@@ -747,9 +770,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg = 2, reg xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 > 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg_2_reg_xor_3(void)
{
asm volatile (" \
@@ -829,9 +857,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg > 0, reg xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg_0_reg_xor_3(void)
{
asm volatile (" \
@@ -858,9 +891,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg32 > 0, reg32 xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg32_0_reg32_xor_3(void)
{
asm volatile (" \
@@ -888,7 +926,7 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for non const xor src dst")
__success __log_level(2)
-__msg("5: (af) r0 ^= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__msg("5: (af) r0 ^= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
__naked void non_const_xor_src_dst(void)
{
asm volatile (" \
@@ -909,7 +947,7 @@ __naked void non_const_xor_src_dst(void)
SEC("socket")
__description("bounds check for non const or src dst")
__success __log_level(2)
-__msg("5: (4f) r0 |= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__msg("5: (4f) r0 |= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
__naked void non_const_or_src_dst(void)
{
asm volatile (" \
@@ -930,7 +968,7 @@ __naked void non_const_or_src_dst(void)
SEC("socket")
__description("bounds check for non const mul regs")
__success __log_level(2)
-__msg("5: (2f) r0 *= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
+__msg("5: (2f) r0 *= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
__naked void non_const_mul_regs(void)
{
asm volatile (" \
@@ -1028,7 +1066,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
__success __retval(0)
-__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
+__flag(BPF_F_TEST_REG_INVARIANTS)
__naked void crossing_64_bit_signed_boundary_2(void)
{
asm volatile (" \
@@ -1203,7 +1241,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("multiply mixed sign bounds. test 1")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))")
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))")
__naked void mult_mixed0_sign(void)
{
asm volatile (
@@ -1226,7 +1264,7 @@ __naked void mult_mixed0_sign(void)
SEC("tc")
__description("multiply mixed sign bounds. test 2")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=smin32=-100,smax=smax32=200)")
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=smin32=-100,smax=smax32=200)")
__naked void mult_mixed1_sign(void)
{
asm volatile (
@@ -1249,7 +1287,7 @@ __naked void mult_mixed1_sign(void)
SEC("tc")
__description("multiply negative bounds")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))")
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))")
__naked void mult_sign_bounds(void)
{
asm volatile (
@@ -1273,7 +1311,7 @@ __naked void mult_sign_bounds(void)
SEC("tc")
__description("multiply bounds that don't cross signed boundary")
__success __log_level(2)
-__msg("r8 *= r6 {{.*}}; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8_w=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))")
+__msg("r8 *= r6 {{.*}}; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))")
__naked void mult_no_sign_crossing(void)
{
asm volatile (
@@ -1293,7 +1331,7 @@ __naked void mult_no_sign_crossing(void)
SEC("tc")
__description("multiplication overflow, result in unbounded reg. test 1")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar()")
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
__naked void mult_unsign_ovf(void)
{
asm volatile (
@@ -1315,7 +1353,7 @@ __naked void mult_unsign_ovf(void)
SEC("tc")
__description("multiplication overflow, result in unbounded reg. test 2")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar()")
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
__naked void mult_sign_ovf(void)
{
asm volatile (
@@ -1334,4 +1372,495 @@ __naked void mult_sign_ovf(void)
__imm(bpf_skb_store_bytes)
: __clobber_all);
}
+
+SEC("socket")
+__description("64-bit addition, all outcomes overflow")
+__success __log_level(2)
+__msg("5: (0f) r3 += r3 {{.*}} R3=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)")
+__retval(0)
+__naked void add64_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r4 = r0;"
+ "r3 = 0xa000000000000000 ll;"
+ "r3 |= r4;"
+ "r3 += r3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit addition, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("4: (0f) r3 += r3 {{.*}} R3=scalar()")
+__retval(0)
+__naked void add64_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r4 = r0;"
+ "r3 = 2;"
+ "r3 |= r4;"
+ "r3 += r3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition overflow, all outcomes overflow")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w4 = w0;"
+ "w3 = 0xa0000000;"
+ "w3 |= w4;"
+ "w3 += w3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w4 = w0;"
+ "w3 = 2;"
+ "w3 |= w4;"
+ "w3 += w3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtraction, all outcomes underflow")
+__success __log_level(2)
+__msg("6: (1f) r3 -= r1 {{.*}} R3=scalar(umin=1,umax=0x8000000000000000)")
+__retval(0)
+__naked void sub64_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r1 = r0;"
+ "r2 = 0x8000000000000000 ll;"
+ "r1 |= r2;"
+ "r3 = 0;"
+ "r3 -= r1;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtraction, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar()")
+__retval(0)
+__naked void sub64_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r3 = r0;"
+ "r2 = 1;"
+ "r3 -= r2;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtraction overflow, all outcomes underflow")
+__success __log_level(2)
+__msg("5: (1c) w3 -= w1 {{.*}} R3=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w1 = w0;"
+ "w2 = 0x80000000;"
+ "w1 |= w2;"
+ "w3 = 0;"
+ "w3 -= w1;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtraction, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("3: (1c) w3 -= w2 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w3 = w0;"
+ "w2 = 1;"
+ "w3 -= w2;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead branch on jset, does not result in invariants violation error")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_range_analysis(void)
+{
+ asm volatile (" \
+ call %[bpf_get_netns_cookie]; \
+ if r0 == 0 goto l0_%=; \
+ if r0 & 0xffffffff goto +0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges
+ * overlap on the negative side. At instruction 7, the ranges look as follows:
+ *
+ * 0 umin=0xfffffcf1 umax=0xff..ff6e U64_MAX
+ * | [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxxxx] [xxxxxxxxxxxx|
+ * 0 smax=0xeffffeee smin=-655 -1
+ *
+ * We should therefore deduce the following new bounds:
+ *
+ * 0 u64=[0xff..ffd71;0xff..ff6e] U64_MAX
+ * | [xxx] |
+ * |----------------------------|------------------------------|
+ * | [xxx] |
+ * 0 s64=[-655;-146] -1
+ *
+ * Without the deduction cross sign boundary, we end up with an invariant
+ * violation error.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, negative overlap")
+__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=smin32=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,umin32=0xfffffd71,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))")
+__retval(0)
+__naked void bounds_deduct_negative_overlap(void)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w3 = w0; \
+ w6 = (s8)w0; \
+ r0 = (s8)r0; \
+ if w6 >= 0xf0000000 goto l0_%=; \
+ r0 += r6; \
+ r6 += 400; \
+ r0 -= r6; \
+ if r3 < r0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges
+ * overlap on the positive side. At instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0 umax=0xffffffffffffff00 U64_MAX
+ * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxx] [xxxxxxxx|
+ * 0 smax=127 smin=-128 -1
+ *
+ * We should therefore deduce the following new bounds:
+ *
+ * 0 u64=[0;127] U64_MAX
+ * [xxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * [xxxxxxxx] |
+ * 0 s64=[0;127] -1
+ *
+ * Without the deduction cross sign boundary, the program is rejected due to
+ * the frame pointer write.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, positive overlap")
+__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))")
+__retval(0)
+__naked void bounds_deduct_positive_overlap(void)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ r0 = (s8)r0; \
+ r1 = 0xffffffffffffff00; \
+ if r0 > r1 goto l0_%=; \
+ if r0 < 128 goto l0_%=; \
+ r10 = 0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* This test is the same as above, but the s64 and u64 ranges overlap in two
+ * places. At instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0 umax=0xffffffffffffff80 U64_MAX
+ * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxx] [xxxxxxxx|
+ * 0 smax=127 smin=-128 -1
+ *
+ * 0xffffffffffffff80 = (u64)-128. We therefore can't deduce anything new and
+ * the program should fail due to the frame pointer write.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, two overlaps")
+__failure __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)")
+__msg("frame pointer is read only")
+__naked void bounds_deduct_two_overlaps(void)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ r0 = (s8)r0; \
+ r1 = 0xffffffffffffff80; \
+ if r0 > r1 goto l0_%=; \
+ if r0 < 128 goto l0_%=; \
+ r10 = 0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead jne branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jne_disagreeing_tnums(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w0 = w0; \
+ r0 >>= 30; \
+ r0 <<= 30; \
+ r1 = r0; \
+ r1 += 1024; \
+ if r1 != r0 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead jeq branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jeq_disagreeing_tnums(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w0 = w0; \
+ r0 >>= 30; \
+ r0 <<= 30; \
+ r1 = r0; \
+ r1 += 1024; \
+ if r1 == r0 goto +1; \
+ exit; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("conditional jump on same register, branch taken")
+__not_msg("20: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void condition_jump_on_same_register(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w8 = 0x80000000; \
+ r0 &= r8; \
+ if r0 == r0 goto +1; \
+ goto l1_%=; \
+ if r0 >= r0 goto +1; \
+ goto l1_%=; \
+ if r0 s>= r0 goto +1; \
+ goto l1_%=; \
+ if r0 <= r0 goto +1; \
+ goto l1_%=; \
+ if r0 s<= r0 goto +1; \
+ goto l1_%=; \
+ if r0 != r0 goto l1_%=; \
+ if r0 > r0 goto l1_%=; \
+ if r0 s> r0 goto l1_%=; \
+ if r0 < r0 goto l1_%=; \
+ if r0 s< r0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, constant value branch taken")
+__not_msg("7: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_1(void *ctx)
+{
+ asm volatile(" \
+ r0 = 0; \
+ if r0 & r0 goto l1_%=; \
+ r0 = 1; \
+ if r0 & r0 goto +1; \
+ goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value branch taken")
+__not_msg("12: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_2(void *ctx)
+{
+ asm volatile(" \
+ /* range [1;2] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x1; \
+ r0 += 1; \
+ if r0 & r0 goto +1; \
+ goto l1_%=; \
+ /* range [-2;-1] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x1; \
+ r0 -= 2; \
+ if r0 & r0 goto +1; \
+ goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 1")
+__msg("3: (b7) r0 = 0 {{.*}} R0=0")
+__msg("5: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_3(void *ctx)
+{
+ asm volatile(" \
+ /* range [0;1] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x1; \
+ if r0 & r0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 2")
+__msg("4: (b7) r0 = 0 {{.*}} R0=0")
+__msg("6: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_4(void *ctx)
+{
+ asm volatile(" \
+ /* range [-1;0] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x1; \
+ r0 -= 1; \
+ if r0 & r0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 3")
+__msg("4: (b7) r0 = 0 {{.*}} R0=0")
+__msg("6: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_5(void *ctx)
+{
+ asm volatile(" \
+ /* range [-1;1] */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x2; \
+ r0 -= 1; \
+ if r0 & r0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
index c506afbdd936..260a6df264e3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
@@ -159,13 +159,16 @@ __failure_unpriv
__naked void deducing_bounds_from_const_10(void)
{
asm volatile (" \
+ r6 = r1; \
r0 = 0; \
if r0 s<= 0 goto l0_%=; \
-l0_%=: /* Marks reg as unknown. */ \
- r0 = -r0; \
- r0 -= r1; \
+l0_%=: /* Marks r0 as unknown. */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 -= r6; \
exit; \
-" ::: __clobber_all);
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
index 5094c288cfd7..fb4fa465d67c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@@ -12,7 +12,7 @@ SEC("raw_tp")
__arch_x86_64
__log_level(4) __msg("stack depth 8")
__xlated("4: r5 = 5")
-__xlated("5: w0 = ")
+__xlated("5: r0 = ")
__xlated("6: r0 = &(void __percpu *)(r0)")
__xlated("7: r0 = *(u32 *)(r0 +0)")
__xlated("8: exit")
@@ -620,23 +620,66 @@ __naked void helper_call_does_not_prevent_bpf_fastcall(void)
SEC("raw_tp")
__arch_x86_64
-__log_level(4) __msg("stack depth 16")
-/* may_goto counter at -16 */
-__xlated("0: *(u64 *)(r10 -16) =")
-__xlated("1: r1 = 1")
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
__xlated("...")
-__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("4: r0 = &(void __percpu *)(r0)")
__xlated("...")
/* may_goto expansion starts */
-__xlated("5: r11 = *(u64 *)(r10 -16)")
-__xlated("6: if r11 == 0x0 goto pc+3")
-__xlated("7: r11 -= 1")
-__xlated("8: *(u64 *)(r10 -16) = r11")
+__xlated("6: r11 = *(u64 *)(r10 -24)")
+__xlated("7: if r11 == 0x0 goto pc+6")
+__xlated("8: r11 -= 1")
+__xlated("9: if r11 != 0x0 goto pc+2")
+__xlated("10: r11 = -24")
+__xlated("11: call unknown")
+__xlated("12: *(u64 *)(r10 -24) = r11")
/* may_goto expansion ends */
-__xlated("9: *(u64 *)(r10 -8) = r1")
-__xlated("10: exit")
+__xlated("13: *(u64 *)(r10 -8) = r1")
+__xlated("14: exit")
+__success
+__naked void may_goto_interaction_x86_64(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ ".8byte %[may_goto];"
+ /* just touch some stack at -8 */
+ "*(u64 *)(r10 - 8) = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_arm64
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
+__xlated("3: call bpf_get_smp_processor_id")
+/* may_goto expansion starts */
+__xlated("4: r11 = *(u64 *)(r10 -24)")
+__xlated("5: if r11 == 0x0 goto pc+6")
+__xlated("6: r11 -= 1")
+__xlated("7: if r11 != 0x0 goto pc+2")
+__xlated("8: r11 = -24")
+__xlated("9: call unknown")
+__xlated("10: *(u64 *)(r10 -24) = r11")
+/* may_goto expansion ends */
+__xlated("11: *(u64 *)(r10 -8) = r1")
+__xlated("12: exit")
__success
-__naked void may_goto_interaction(void)
+__naked void may_goto_interaction_arm64(void)
{
asm volatile (
"r1 = 1;"
@@ -666,7 +709,7 @@ SEC("raw_tp")
__arch_x86_64
__log_level(4) __msg("stack depth 32+0")
__xlated("2: r1 = 1")
-__xlated("3: w0 =")
+__xlated("3: r0 =")
__xlated("4: r0 = &(void __percpu *)(r0)")
__xlated("5: r0 = *(u32 *)(r0 +0)")
/* bpf_loop params setup */
@@ -715,7 +758,7 @@ __arch_x86_64
__log_level(4) __msg("stack depth 40+0")
/* call bpf_get_smp_processor_id */
__xlated("2: r1 = 42")
-__xlated("3: w0 =")
+__xlated("3: r0 =")
__xlated("4: r0 = &(void __percpu *)(r0)")
__xlated("5: r0 = *(u32 *)(r0 +0)")
/* call bpf_get_prandom_u32 */
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
new file mode 100644
index 000000000000..35e2cdc00a01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if __clang_major__ >= 21 && 0
+SEC("socket")
+__description("__builtin_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_builtin_trap_with_simple_c(void)
+{
+ __builtin_trap();
+}
+#endif
+
+SEC("socket")
+__description("__bpf_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_trap_with_simple_c(void)
+{
+ __bpf_trap();
+}
+
+SEC("socket")
+__description("__bpf_trap as the second-from-last insn")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void bpf_trap_at_func_end(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "call %[__bpf_trap];"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead code __bpf_trap in the middle of code")
+__success
+__naked void dead_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 0 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("reachable __bpf_trap in the middle of code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void live_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 1 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
index 28b939572cda..03942cec07e5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
@@ -65,4 +65,16 @@ __naked void ctx_access_u32_pointer_reject_8(void)
" ::: __clobber_all);
}
+SEC("fentry/bpf_fentry_test10")
+__description("btf_ctx_access const void pointer accept")
+__success __retval(0)
+__naked void ctx_access_const_void_pointer_accept(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); /* load 1st argument value (const void pointer) */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
index a83809a1dbbf..5ebf7d9bcc55 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Converted from tools/testing/selftests/bpf/verifier/ctx.c */
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
@@ -218,4 +218,78 @@ __naked void null_check_8_null_bind(void)
: __clobber_all);
}
+#define narrow_load(type, ctx, field) \
+ SEC(type) \
+ __description("narrow load on field " #field " of " #ctx) \
+ __failure __msg("invalid bpf_context access") \
+ __naked void invalid_narrow_load##ctx##field(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u32 *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(off, offsetof(struct ctx, field) + 4) \
+ : __clobber_all); \
+ }
+
+narrow_load("cgroup/getsockopt", bpf_sockopt, sk);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end);
+narrow_load("tc", __sk_buff, sk);
+narrow_load("cgroup/bind4", bpf_sock_addr, sk);
+narrow_load("sockops", bpf_sock_ops, sk);
+narrow_load("sockops", bpf_sock_ops, skb_data);
+narrow_load("sockops", bpf_sock_ops, skb_data_end);
+narrow_load("sockops", bpf_sock_ops, skb_hwtstamp);
+
+#define unaligned_access(type, ctx, field) \
+ SEC(type) \
+ __description("unaligned access on field " #field " of " #ctx) \
+ __failure __msg("invalid bpf_context access") \
+ __naked void unaligned_ctx_access_##ctx##field(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u%[size] *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(size, sizeof_field(struct ctx, field) * 8), \
+ __imm_const(off, offsetof(struct ctx, field) + 1) \
+ : __clobber_all); \
+ }
+
+unaligned_access("flow_dissector", __sk_buff, data);
+unaligned_access("netfilter", bpf_nf_ctx, skb);
+
+#define padding_access(type, ctx, prev_field, sz) \
+ SEC(type) \
+ __description("access on " #ctx " padding after " #prev_field) \
+ __naked void padding_ctx_access_##ctx(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u%[size] *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(size, sz * 8), \
+ __imm_const(off, offsetofend(struct ctx, prev_field)) \
+ : __clobber_all); \
+ }
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/bind4", bpf_sock_addr, msg_src_ip6[3], 4);
+
+__success
+padding_access("sk_lookup", bpf_sk_lookup, remote_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("tc", __sk_buff, tstamp_type, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/post_bind4", bpf_sock, dst_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("sk_reuseport", sk_reuseport_md, hash, 4);
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index 28b602ac9cbe..911caa8fd1b7 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */
+#include <linux/if_ether.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
@@ -800,4 +801,62 @@ l0_%=: /* exit(0) */ \
: __clobber_all);
}
+#define access_test_non_linear(name, type, desc, retval, linear_sz, off) \
+ SEC(type) \
+ __description("direct packet access: " #name " (non-linear, " type ", " desc ")") \
+ __success __retval(retval) \
+ __linear_size(linear_sz) \
+ __naked void access_non_linear_##name(void) \
+ { \
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[skb_data]); \
+ r3 = *(u32*)(r1 + %[skb_data_end]); \
+ r0 = r2; \
+ r0 += %[offset]; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r0 - 1); \
+ r0 = 0; \
+ exit; \
+ l0_%=: r0 = 1; \
+ exit; \
+ " : \
+ : __imm_const(skb_data, offsetof(struct __sk_buff, data)), \
+ __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)), \
+ __imm_const(offset, off) \
+ : __clobber_all); \
+ }
+
+access_test_non_linear(test31, "tc", "too short eth", 1, ETH_HLEN, 22);
+access_test_non_linear(test32, "tc", "too short 1", 1, 1, 22);
+access_test_non_linear(test33, "tc", "long enough", 0, 22, 22);
+access_test_non_linear(test34, "cgroup_skb/ingress", "too short eth", 1, ETH_HLEN, 8);
+access_test_non_linear(test35, "cgroup_skb/ingress", "too short 1", 1, 1, 8);
+access_test_non_linear(test36, "cgroup_skb/ingress", "long enough", 0, 22, 8);
+
+SEC("tc")
+__description("direct packet access: test37 (non-linear, linearized)")
+__success __retval(0)
+__linear_size(ETH_HLEN)
+__naked void access_non_linear_linearized(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r2 = 22; \
+ call %[bpf_skb_pull_data]; \
+ r2 = *(u32*)(r6 + %[skb_data]); \
+ r3 = *(u32*)(r6 + %[skb_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r0 - 1); \
+ exit; \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_skb_pull_data),
+ __imm_const(skb_data, offsetof(struct __sk_buff, data)),
+ __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
index 458984da804c..34e0c012ee76 100644
--- a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
+++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
@@ -77,7 +77,7 @@ l0_%=: exit; \
SEC("tc")
__description("MOD32 overflow, check 1")
-__success __retval(INT_MIN)
+__success __retval(_INT_MIN)
__naked void mod32_overflow_check_1(void)
{
asm volatile (" \
@@ -92,7 +92,7 @@ __naked void mod32_overflow_check_1(void)
SEC("tc")
__description("MOD32 overflow, check 2")
-__success __retval(INT_MIN)
+__success __retval(_INT_MIN)
__naked void mod32_overflow_check_2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
index 4ab0ef18d7eb..1204fbc58178 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -179,4 +179,132 @@ int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags)
return subprog_trusted_acq_rel(task);
}
+__weak int subprog_untrusted_bad_tags(struct task_struct *task __arg_untrusted __arg_nullable)
+{
+ return task->pid;
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("arg#0 untrusted cannot be combined with any other tags")
+int untrusted_bad_tags(void *ctx)
+{
+ return subprog_untrusted_bad_tags(0);
+}
+
+struct local_type_wont_be_accepted {};
+
+__weak int subprog_untrusted_bad_type(struct local_type_wont_be_accepted *p __arg_untrusted)
+{
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("arg#0 reference type('STRUCT local_type_wont_be_accepted') has no matches")
+int untrusted_bad_type(void *ctx)
+{
+ return subprog_untrusted_bad_type(bpf_rdonly_cast(0, 0));
+}
+
+__weak int subprog_untrusted(const volatile struct task_struct *restrict task __arg_untrusted)
+{
+ return task->pid;
+}
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
+__msg("Func#1 ('subprog_untrusted') is global and assumed valid.")
+__msg("Validating subprog_untrusted() func#1...")
+__msg(": R1=untrusted_ptr_task_struct")
+int trusted_to_untrusted(void *ctx)
+{
+ return subprog_untrusted(bpf_get_current_task_btf());
+}
+
+char mem[16];
+u32 offset;
+
+SEC("tp_btf/sys_enter")
+__success
+int anything_to_untrusted(void *ctx)
+{
+ /* untrusted to untrusted */
+ subprog_untrusted(bpf_core_cast(0, struct task_struct));
+ /* wrong type to untrusted */
+ subprog_untrusted((void *)bpf_core_cast(0, struct bpf_verifier_env));
+ /* map value to untrusted */
+ subprog_untrusted((void *)mem);
+ /* scalar to untrusted */
+ subprog_untrusted(0);
+ /* variable offset to untrusted (map) */
+ subprog_untrusted((void *)mem + offset);
+ /* variable offset to untrusted (trusted) */
+ subprog_untrusted((void *)bpf_get_current_task_btf() + offset);
+ return 0;
+}
+
+__weak int subprog_untrusted2(struct task_struct *task __arg_untrusted)
+{
+ return subprog_trusted_task_nullable(task);
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("R1 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#{{.*}} ('subprog_trusted_task_nullable')")
+int untrusted_to_trusted(void *ctx)
+{
+ return subprog_untrusted2(bpf_get_current_task_btf());
+}
+
+__weak int subprog_void_untrusted(void *p __arg_untrusted)
+{
+ return *(int *)p;
+}
+
+__weak int subprog_char_untrusted(char *p __arg_untrusted)
+{
+ return *(int *)p;
+}
+
+__weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted)
+{
+ return *(int *)p;
+}
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
+__msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.")
+__msg("Validating subprog_void_untrusted() func#1...")
+__msg(": R1=rdonly_untrusted_mem(sz=0)")
+int trusted_to_untrusted_mem(void *ctx)
+{
+ return subprog_void_untrusted(bpf_get_current_task_btf());
+}
+
+SEC("tp_btf/sys_enter")
+__success
+int anything_to_untrusted_mem(void *ctx)
+{
+ /* untrusted to untrusted mem */
+ subprog_void_untrusted(bpf_core_cast(0, struct task_struct));
+ /* map value to untrusted mem */
+ subprog_void_untrusted(mem);
+ /* scalar to untrusted mem */
+ subprog_void_untrusted(0);
+ /* variable offset to untrusted mem (map) */
+ subprog_void_untrusted((void *)mem + offset);
+ /* variable offset to untrusted mem (trusted) */
+ subprog_void_untrusted(bpf_get_current_task_btf() + offset);
+ /* variable offset to untrusted char/enum (map) */
+ subprog_char_untrusted(mem + offset);
+ subprog_enum_untrusted((void *)mem + offset);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index 05a329ee45ee..d5d8f24df394 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -4,11 +4,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
- defined(__TARGET_ARCH_loongarch)) && \
- __clang_major__ >= 18
+#ifdef CAN_USE_GOTOL
SEC("socket")
__description("gotol, small_imm")
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotox.c b/tools/testing/selftests/bpf/progs/verifier_gotox.c
new file mode 100644
index 000000000000..607dad058ca1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_gotox.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Isovalent */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../../../include/linux/filter.h"
+
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+
+#define DEFINE_SIMPLE_JUMP_TABLE_PROG(NAME, SRC_REG, OFF, IMM, OUTCOME) \
+ \
+ SEC("socket") \
+ OUTCOME \
+ __naked void jump_table_ ## NAME(void) \
+ { \
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+ jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+ " : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, (SRC_REG), (OFF) , (IMM))) \
+ : __clobber_all); \
+ }
+
+/*
+ * The first program which doesn't use reserved fields
+ * loads and works properly. The rest fail to load.
+ */
+DEFINE_SIMPLE_JUMP_TABLE_PROG(ok, BPF_REG_0, 0, 0, __success __retval(1))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_src_reg, BPF_REG_1, 0, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_off, BPF_REG_0, 1, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_imm, BPF_REG_0, 0, 1, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+
+/*
+ * Gotox is forbidden when there is no jump table loaded
+ * which points to the sub-function where the gotox is used
+ */
+SEC("socket")
+__failure __msg("no jump tables found for subprog starting at 0")
+__naked void jump_table_no_jump_table(void)
+{
+ asm volatile (" \
+ .8byte %[gotox_r0]; \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+/*
+ * Incorrect type of the target register, only PTR_TO_INSN allowed
+ */
+SEC("socket")
+__failure __msg("R1 has type scalar, expected PTR_TO_INSN")
+__naked void jump_table_incorrect_dst_reg_type(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(u64 *)(r0 + 0); \
+ r1 = 42; \
+ .8byte %[gotox_r1]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r1, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_1, 0, 0 , 0))
+ : __clobber_all);
+}
+
+#define DEFINE_INVALID_SIZE_PROG(READ_SIZE, OUTCOME) \
+ \
+ SEC("socket") \
+ OUTCOME \
+ __naked void jump_table_invalid_read_size_ ## READ_SIZE(void) \
+ { \
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+ jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(" #READ_SIZE " *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+ " : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) \
+ : __clobber_all); \
+ }
+
+DEFINE_INVALID_SIZE_PROG(u32, __failure __msg("Invalid read of 4 bytes from insn_array"))
+DEFINE_INVALID_SIZE_PROG(u16, __failure __msg("Invalid read of 2 bytes from insn_array"))
+DEFINE_INVALID_SIZE_PROG(u8, __failure __msg("Invalid read of 1 bytes from insn_array"))
+
+SEC("socket")
+__failure __msg("misaligned value access off 0+1+0 size 8")
+__naked void jump_table_misaligned_access(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 1; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("invalid access to map value, value_size=16 off=24 size=8")
+__naked void jump_table_invalid_mem_acceess_pos(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 24; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("invalid access to map value, value_size=16 off=-24 size=8")
+__naked void jump_table_invalid_mem_acceess_neg(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 -= 24; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void jump_table_add_sub_ok(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 -= 24; \
+ r0 += 32; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("write into map forbidden, value_size=16 off=8 size=8")
+__naked void jump_table_no_writes(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r1 = 0xbeef; \
+ *(u64 *)(r0 + 0) = r1; \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+#define DEFINE_JUMP_TABLE_USE_REG(REG) \
+ SEC("socket") \
+ __success __retval(1) \
+ __naked void jump_table_use_reg_r ## REG(void) \
+ { \
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+ jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 16; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r" #REG " = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_rX]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+ " : \
+ : __imm_insn(gotox_rX, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_ ## REG, 0, 0 , 0)) \
+ : __clobber_all); \
+ }
+
+DEFINE_JUMP_TABLE_USE_REG(0)
+DEFINE_JUMP_TABLE_USE_REG(1)
+DEFINE_JUMP_TABLE_USE_REG(2)
+DEFINE_JUMP_TABLE_USE_REG(3)
+DEFINE_JUMP_TABLE_USE_REG(4)
+DEFINE_JUMP_TABLE_USE_REG(5)
+DEFINE_JUMP_TABLE_USE_REG(6)
+DEFINE_JUMP_TABLE_USE_REG(7)
+DEFINE_JUMP_TABLE_USE_REG(8)
+DEFINE_JUMP_TABLE_USE_REG(9)
+
+__used static int test_subprog(void)
+{
+ return 0;
+}
+
+SEC("socket")
+__failure __msg("jump table for insn 4 points outside of the subprog [0,10]")
+__naked void jump_table_outside_subprog(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret_out_%= - socket; \
+ .size jt0_%=, 24; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ call test_subprog; \
+ exit; \
+ ret_out_%=: \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void jump_table_contains_non_unique_values(void)
+{
+ asm volatile (" \
+ .pushsection .jumptables,\"\",@progbits; \
+jt0_%=: \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .quad ret0_%= - socket; \
+ .quad ret1_%= - socket; \
+ .size jt0_%=, 80; \
+ .global jt0_%=; \
+ .popsection; \
+ \
+ r0 = jt0_%= ll; \
+ r0 += 8; \
+ r0 = *(u64 *)(r0 + 0); \
+ .8byte %[gotox_r0]; \
+ ret0_%=: \
+ r0 = 0; \
+ exit; \
+ ret1_%=: \
+ r0 = 1; \
+ exit; \
+" : \
+ : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+ : __clobber_all);
+}
+
+#endif /* __TARGET_ARCH_x86 || __TARGET_ARCH_arm64 */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index e54bb5385bc1..75dd922e4e9f 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -407,11 +407,7 @@ l0_%=: call %[bpf_jiffies64]; \
: __clobber_all);
}
-#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
- defined(__TARGET_ARCH_loongarch)) && \
- __clang_major__ >= 18
+#ifdef CAN_USE_GOTOL
SEC("socket")
__success __retval(0)
__naked void gotol_and_may_goto(void)
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
index 52edee41caf6..c8494b682c31 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_arena_common.h"
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
@@ -10,6 +11,12 @@
defined(__TARGET_ARCH_loongarch)) && \
__clang_major__ >= 18
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1);
+} arena SEC(".maps");
+
SEC("socket")
__description("LDSX, S8")
__success __success_unpriv __retval(-2)
@@ -65,7 +72,7 @@ __naked void ldsx_s32(void)
SEC("socket")
__description("LDSX, S8 range checking, privileged")
__log_level(2) __success __retval(1)
-__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)")
+__msg("R1=scalar(smin=smin32=-128,smax=smax32=127)")
__naked void ldsx_s8_range_priv(void)
{
asm volatile (
@@ -256,6 +263,175 @@ __naked void ldsx_ctx_8(void)
: __clobber_all);
}
+SEC("syscall")
+__description("Arena LDSX Disasm")
+__success
+__arch_x86_64
+__jited("movslq 0x10(%rax,%r12), %r14")
+__jited("movswq 0x18(%rax,%r12), %r14")
+__jited("movsbq 0x20(%rax,%r12), %r14")
+__jited("movslq 0x10(%rdi,%r12), %r15")
+__jited("movswq 0x18(%rdi,%r12), %r15")
+__jited("movsbq 0x20(%rdi,%r12), %r15")
+__arch_arm64
+__jited("add x11, x7, x28")
+__jited("ldrsw x21, [x11, #0x10]")
+__jited("add x11, x7, x28")
+__jited("ldrsh x21, [x11, #0x18]")
+__jited("add x11, x7, x28")
+__jited("ldrsb x21, [x11, #0x20]")
+__jited("add x11, x0, x28")
+__jited("ldrsw x22, [x11, #0x10]")
+__jited("add x11, x0, x28")
+__jited("ldrsh x22, [x11, #0x18]")
+__jited("add x11, x0, x28")
+__jited("ldrsb x22, [x11, #0x20]")
+__naked void arena_ldsx_disasm(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = r0;"
+ "r8 = *(s32 *)(r0 + 16);"
+ "r8 = *(s16 *)(r0 + 24);"
+ "r8 = *(s8 *)(r0 + 32);"
+ "r9 = *(s32 *)(r1 + 16);"
+ "r9 = *(s16 *)(r1 + 24);"
+ "r9 = *(s8 *)(r1 + 32);"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX Exception")
+__success __retval(0)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_exception(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r0 = 0xdeadbeef;"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fe;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r0 = *(s8 *)(r0 + 0);"
+ "exit;"
+ :
+ : __imm_addr(arena)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S8")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s8(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s8 *)(r0 + 0);"
+#else
+ "r0 = *(s8 *)(r0 + 7);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S16")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s16(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fffe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s16 *)(r0 + 0);"
+#else
+ "r0 = *(s16 *)(r0 + 6);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S32")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s32(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0xfffffffe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s32 *)(r0 + 0);"
+#else
+ "r0 = *(s32 *)(r0 + 4);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+ bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
#else
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
new file mode 100644
index 000000000000..2de105057bbc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, long long);
+} map SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) frame 0 insn 1 +live -24")
+__msg("(0) frame 0 insn 1 +written -8")
+__msg("(0) frame 0 insn 0 +live -8,-24")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void simple_read_simple_write(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = *(u64 *)(r10 - 24);"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 1 +live -8")
+__not_msg("(0) frame 0 insn 1 +written")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 1 +live -16")
+__msg("(0) frame 0 insn 1 +written -32")
+__msg("(0) live stack update done in 2 iterations")
+__naked void read_write_join(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 > 42 goto 1f;"
+ "r0 = *(u64 *)(r10 - 8);"
+ "*(u64 *)(r10 - 32) = r0;"
+ "*(u64 *)(r10 - 40) = r0;"
+ "exit;"
+"1:"
+ "r0 = *(u64 *)(r10 - 16);"
+ "*(u64 *)(r10 - 32) = r0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: (25) if r0 > 0x2a goto pc+1")
+__msg("7: (95) exit")
+__msg("(0) frame 0 insn 2 +written -16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("7: (95) exit")
+__not_msg("(0) frame 0 insn 2")
+__msg("(0) live stack update done in 1 iterations")
+__naked void must_write_not_same_slot(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r1 = -8;"
+ "if r0 > 42 goto 1f;"
+ "r1 = -16;"
+"1:"
+ "r2 = r10;"
+ "r2 += r1;"
+ "*(u64 *)(r2 + 0) = r0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 0 +written -8,-16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void must_write_not_same_type(void)
+{
+ asm volatile (
+ "*(u64*)(r10 - 8) = 0;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = %[map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 != 0 goto 1f;"
+ "r0 = r10;"
+ "r0 += -16;"
+"1:"
+ "*(u64 *)(r0 + 0) = 42;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(2,4) frame 0 insn 4 +written -8")
+__msg("(2,4) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_write(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call write_first_param;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void write_first_param(void)
+{
+ asm volatile (
+ "*(u64 *)(r1 + 0) = 7;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+/* caller_stack_read() function */
+__msg("2: .12345.... (85) call pc+4")
+__msg("5: .12345.... (85) call pc+1")
+__msg("6: 0......... (95) exit")
+/* read_first_param() function */
+__msg("7: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("8: 0......... (95) exit")
+/* update for callsite at (2) */
+__msg("(2,7) frame 0 insn 7 +live -8")
+__msg("(2,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +live -8")
+__msg("(0) live stack update done in 2 iterations")
+/* update for callsite at (5) */
+__msg("(5,7) frame 0 insn 7 +live -16")
+__msg("(5,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 5 +live -16")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_read(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call read_first_param;"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call read_first_param;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void read_first_param(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+/* read_first_param2() function */
+__msg(" 9: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("10: .......... (b7) r0 = 0")
+__msg("11: 0......... (05) goto pc+0")
+__msg("12: 0......... (95) exit")
+/*
+ * The purpose of the test is to check that checkpoint in
+ * read_first_param2() stops path traversal. This will only happen if
+ * verifier understands that fp[0]-8 at insn (12) is not alive.
+ */
+__msg("12: safe")
+__msg("processed 20 insns")
+__naked void caller_stack_pruning(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 42 goto 1f;"
+ "r0 = %[map] ll;"
+"1:"
+ "*(u64 *)(r10 - 8) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ /*
+ * fp[0]-8 is either pointer to map or a scalar,
+ * preventing state pruning at checkpoint created for call.
+ */
+ "call read_first_param2;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+static __used __naked void read_first_param2(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ /*
+ * Checkpoint at goto +0 should fire,
+ * as caller stack fp[0]-8 is not alive at this point.
+ */
+ "goto +0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg("R1 type=scalar expected=map_ptr")
+__naked void caller_stack_pruning_callback(void)
+{
+ asm volatile (
+ "r0 = %[map] ll;"
+ "*(u64 *)(r10 - 8) = r0;"
+ "r1 = 2;"
+ "r2 = loop_cb ll;"
+ "r3 = r10;"
+ "r3 += -8;"
+ "r4 = 0;"
+ /*
+ * fp[0]-8 is either pointer to map or a scalar,
+ * preventing state pruning at checkpoint created for call.
+ */
+ "call %[bpf_loop];"
+ "r0 = 42;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_loop),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+static __used __naked void loop_cb(void)
+{
+ asm volatile (
+ /*
+ * Checkpoint at function entry should not fire, as caller
+ * stack fp[0]-8 is alive at this point.
+ */
+ "r6 = r2;"
+ "r1 = *(u64 *)(r6 + 0);"
+ "*(u64*)(r10 - 8) = 7;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call %[bpf_map_lookup_elem];"
+ /*
+ * This should stop verifier on a second loop iteration,
+ * but only if verifier correctly maintains that fp[0]-8
+ * is still alive.
+ */
+ "*(u64 *)(r6 + 0) = 0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Because of a bug in verifier.c:compute_postorder()
+ * the program below overflowed traversal queue in that function.
+ */
+SEC("socket")
+__naked void syzbot_postorder_bug1(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 != 0 goto -1;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} map_array SEC(".maps");
+
+SEC("socket")
+__failure __msg("invalid read from stack R2 off=-1024 size=8")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked unsigned long caller_stack_write_tail_call(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "*(u64 *)(r10 - 8) = -8;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto 1f;"
+ "goto 2f;"
+ "1:"
+ "*(u64 *)(r10 - 8) = -1024;"
+ "2:"
+ "r1 = r6;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call write_tail_call;"
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = r10;"
+ "r2 += r1;"
+ "r0 = *(u64 *)(r2 + 0);"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+static __used __naked unsigned long write_tail_call(void)
+{
+ asm volatile (
+ "r6 = r2;"
+ "r2 = %[map_array] ll;"
+ "r3 = 0;"
+ "call %[bpf_tail_call];"
+ "*(u64 *)(r6 + 0) = -16;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_array)
+ : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
new file mode 100644
index 000000000000..74f4f19c10b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__description("load-acquire, 8-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_8(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0xfe;"
+ "*(u8 *)(r10 - 1) = w1;"
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 16-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_16(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0xfedc;"
+ "*(u16 *)(r10 - 2) = w1;"
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 32-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_32(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0xfedcba09;"
+ "*(u32 *)(r10 - 4) = w1;"
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 64-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_64(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r1 = 0xfedcba0987654321 ll;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void load_acquire_with_uninitialized_src_reg(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r2 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with non-pointer src_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void load_acquire_with_non_pointer_src_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r1 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned load-acquire")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void load_acquire_misaligned(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 5));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -5))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire from ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC loads from R1 ctx is not allowed")
+__naked void load_acquire_from_ctx_pointer(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r1 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("load-acquire from pkt pointer")
+__failure __msg("BPF_ATOMIC loads from R2 pkt is not allowed")
+__naked void load_acquire_from_pkt_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+ "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+ "r1 = r2;"
+ "r1 += 8;"
+ "if r1 >= r3 goto l0_%=;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+"l0_%=: r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("load-acquire from flow_keys pointer")
+__failure __msg("BPF_ATOMIC loads from R2 flow_keys is not allowed")
+__naked void load_acquire_from_flow_keys_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+ "exit;"
+ :
+ : __imm_const(__sk_buff_flow_keys,
+ offsetof(struct __sk_buff, flow_keys)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("load-acquire from sock pointer")
+__failure __msg("BPF_ATOMIC loads from R2 sock is not allowed")
+__naked void load_acquire_from_sock_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+ // w0 = load_acquire((u8 *)(r2 + offsetof(struct bpf_sock, family)));
+ ".8byte %[load_acquire_insn];"
+ "exit;"
+ :
+ : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2,
+ offsetof(struct bpf_sock, family)))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void load_acquire_with_invalid_reg(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r15 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, 15 /* invalid reg */, 0))
+ : __clobber_all);
+}
+
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support load-acquire, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index e07b43b78fd2..fbdde80e7b90 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -283,4 +283,25 @@ exit_%=: \
: __clobber_all);
}
+/*
+ * This test case triggered a bug in verifier.c:maybe_exit_scc().
+ * Speculative execution path reaches stack access instruction,
+ * stops and triggers maybe_exit_scc() w/o accompanying maybe_enter_scc() call.
+ */
+SEC("socket")
+__arch_x86_64
+__caps_unpriv(CAP_BPF)
+__naked void maybe_exit_scc_bug1(void)
+{
+ asm volatile (
+ "r0 = 100;"
+"1:"
+ /* Speculative execution path reaches and stops here. */
+ "*(u64 *)(r10 - 512) = r0;"
+ /* Condition is always false, but verifier speculatively executes the true branch. */
+ "if r0 <= 0x0 goto 1b;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c
index 32e5e779cb96..6af9100a37ff 100644
--- a/tools/testing/selftests/bpf/progs/verifier_lsm.c
+++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c
@@ -4,7 +4,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-SEC("lsm/file_alloc_security")
+SEC("lsm/file_permission")
__description("lsm bpf prog with -4095~0 retval. test 1")
__success
__naked int errno_zero_retval_test1(void *ctx)
@@ -15,7 +15,7 @@ __naked int errno_zero_retval_test1(void *ctx)
::: __clobber_all);
}
-SEC("lsm/file_alloc_security")
+SEC("lsm/file_permission")
__description("lsm bpf prog with -4095~0 retval. test 2")
__success
__naked int errno_zero_retval_test2(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
index 7d088ba99ea5..16b761e510f0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
@@ -139,4 +139,122 @@ __naked void on_the_inner_map_pointer(void)
: __clobber_all);
}
+SEC("socket")
+__description("map_ptr is never null")
+__success
+__naked void map_ptr_is_never_null(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = %[map_in_map] ll; \
+ if r1 != 0 goto l0_%=; \
+ r10 = 42; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner")
+__success
+__naked void map_ptr_is_never_null_inner(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r10 = 42; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner spill fill")
+__success
+__naked void map_ptr_is_never_null_inner_spill_fill(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: *(u64 *)(r10 -16) = r0; \
+ r1 = *(u64 *)(r10 -16); \
+ if r1 == 0 goto l1_%=; \
+ exit; \
+l1_%=: r10 = 42; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 64 * 1024);
+ });
+} rb_in_map SEC(".maps");
+
+struct rb_ctx {
+ void *rb;
+ struct bpf_dynptr dptr;
+};
+
+static __always_inline struct rb_ctx __rb_event_reserve(__u32 sz)
+{
+ struct rb_ctx rb_ctx = {};
+ void *rb;
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u32 rb_slot = cpu & 1;
+
+ rb = bpf_map_lookup_elem(&rb_in_map, &rb_slot);
+ if (!rb)
+ return rb_ctx;
+
+ rb_ctx.rb = rb;
+ bpf_ringbuf_reserve_dynptr(rb, sz, 0, &rb_ctx.dptr);
+
+ return rb_ctx;
+}
+
+static __noinline void __rb_event_submit(struct rb_ctx *ctx)
+{
+ if (!ctx->rb)
+ return;
+
+ /* If the verifier (incorrectly) concludes that ctx->rb can be
+ * NULL at this point, we'll get "BPF_EXIT instruction in main
+ * prog would lead to reference leak" error
+ */
+ bpf_ringbuf_submit_dynptr(&ctx->dptr, 0);
+}
+
+SEC("socket")
+int map_ptr_is_never_null_rb(void *ctx)
+{
+ struct rb_ctx event_ctx = __rb_event_reserve(256);
+ __rb_event_submit(&event_ctx);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
index 11a079145966..e2767d27d8aa 100644
--- a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
@@ -70,10 +70,13 @@ __naked void bpf_map_ptr_write_rejected(void)
: __clobber_all);
}
+/* The first element of struct bpf_map is a SHA256 hash of 32 bytes, accessing
+ * into this array is valid. The opts field is now at offset 33.
+ */
SEC("socket")
__description("bpf_map_ptr: read non-existent field rejected")
__failure
-__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4")
+__msg("cannot access ptr member ops with moff 32 in struct bpf_map with off 33 size 4")
__failure_unpriv
__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
__flag(BPF_F_ANY_ALIGNMENT)
@@ -82,7 +85,7 @@ __naked void read_non_existent_field_rejected(void)
asm volatile (" \
r6 = 0; \
r1 = %[map_array_48b] ll; \
- r6 = *(u32*)(r1 + 1); \
+ r6 = *(u32*)(r1 + 33); \
r0 = 1; \
exit; \
" :
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
index e81097c96fe2..6d1edaef9213 100644
--- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
@@ -9,6 +9,8 @@
SEC("raw_tp")
__description("may_goto 0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: r0 = 1")
__xlated("1: exit")
__success
@@ -27,6 +29,8 @@ __naked void may_goto_simple(void)
SEC("raw_tp")
__description("batch 2 of may_goto 0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: r0 = 1")
__xlated("1: exit")
__success
@@ -47,6 +51,8 @@ __naked void may_goto_batch_0(void)
SEC("raw_tp")
__description("may_goto batch with offsets 2/1/0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: r0 = 1")
__xlated("1: exit")
__success
@@ -71,14 +77,20 @@ __naked void may_goto_batch_1(void)
SEC("raw_tp")
__description("may_goto batch with offsets 2/0")
__arch_x86_64
-__xlated("0: *(u64 *)(r10 -8) = 8388608")
-__xlated("1: r11 = *(u64 *)(r10 -8)")
-__xlated("2: if r11 == 0x0 goto pc+3")
-__xlated("3: r11 -= 1")
-__xlated("4: *(u64 *)(r10 -8) = r11")
-__xlated("5: r0 = 1")
-__xlated("6: r0 = 2")
-__xlated("7: exit")
+__arch_s390x
+__arch_arm64
+__xlated("0: *(u64 *)(r10 -16) = 65535")
+__xlated("1: *(u64 *)(r10 -8) = 0")
+__xlated("2: r11 = *(u64 *)(r10 -16)")
+__xlated("3: if r11 == 0x0 goto pc+6")
+__xlated("4: r11 -= 1")
+__xlated("5: if r11 != 0x0 goto pc+2")
+__xlated("6: r11 = -16")
+__xlated("7: call unknown")
+__xlated("8: *(u64 *)(r10 -16) = r11")
+__xlated("9: r0 = 1")
+__xlated("10: r0 = 2")
+__xlated("11: exit")
__success
__naked void may_goto_batch_2(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index 994bbc346d25..a4d8814eb5ed 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -245,7 +245,13 @@ l0_%=: \
SEC("socket")
__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension")
__success __retval(0)
-__failure_unpriv __msg_unpriv("frame pointer is read only")
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
__naked void mov64sx_s32_varoff_2(void)
{
asm volatile (" \
@@ -267,7 +273,13 @@ l0_%=: \
SEC("socket")
__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension")
__success __retval(0)
-__failure_unpriv __msg_unpriv("frame pointer is read only")
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
__naked void mov64sx_s32_varoff_3(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_mul.c b/tools/testing/selftests/bpf/progs/verifier_mul.c
new file mode 100644
index 000000000000..7145fe3351d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_mul.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Nandakumar Edamana */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+/* Intended to test the abstract multiplication technique(s) used by
+ * the verifier. Using assembly to avoid compiler optimizations.
+ */
+SEC("fentry/bpf_fentry_test1")
+void BPF_PROG(mul_precise, int x)
+{
+ /* First, force the verifier to be uncertain about the value:
+ * unsigned int a = (bpf_get_prandom_u32() & 0x2) | 0x1;
+ *
+ * Assuming the verifier is using tnum, a must be tnum{.v=0x1, .m=0x2}.
+ * Then a * 0x3 would be m0m1 (m for uncertain). Added imprecision
+ * would cause the following to fail, because the required return value
+ * is 0:
+ * return (a * 0x3) & 0x4);
+ */
+ asm volatile ("\
+ call %[bpf_get_prandom_u32];\
+ r0 &= 0x2;\
+ r0 |= 0x1;\
+ r0 *= 0x3;\
+ r0 &= 0x4;\
+ if r0 != 0 goto l0_%=;\
+ r0 = 0;\
+ goto l1_%=;\
+l0_%=:\
+ r0 = 1;\
+l1_%=:\
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
index ab9f9f2620ed..e2cbc5bda65e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
@@ -79,11 +79,6 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx)
return NF_ACCEPT;
}
-extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
- struct bpf_dynptr *ptr__uninit) __ksym;
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
- void *buffer, uint32_t buffer__sz) __ksym;
-
SEC("netfilter")
__description("netfilter test prog with skb and state read access")
__success __failure_unpriv
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 6b564d4c0986..1fe090cd6744 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2023 SUSE LLC */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
SEC("?raw_tp")
@@ -90,27 +91,74 @@ __naked int bpf_end_bswap(void)
::: __clobber_all);
}
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (db) r2 = load_acquire((u64 *)(r10 -8))")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_load_acquire(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" /* r2 = load_acquire((u64 *)(r10 - 8)); */
+ "r3 = r10;"
+ "r3 += r2;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r2 = r10")
+__msg("mark_precise: frame0: regs=r1 stack= before 2: (79) r1 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (db) store_release((u64 *)(r10 -8), r1)")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_store_release(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ ".8byte %[store_release_insn];" /* store_release((u64 *)(r10 - 8), r1); */
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = r10;"
+ "r2 += r1;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
#endif /* v4 instruction */
SEC("?raw_tp")
__success __log_level(2)
/*
* Without the bug fix there will be no history between "last_idx 3 first_idx 3"
- * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * and "parent state regs=" lines. "R0=6" parts are here to help anchor
* expected log messages to the one specific mark_chain_precision operation.
*
* This is quite fragile: if verifier checkpointing heuristic changes, this
* might need adjusting.
*/
-__msg("2: (07) r0 += 1 ; R0_w=6")
+__msg("2: (07) r0 += 1 ; R0=6")
__msg("3: (35) if r0 >= 0xa goto pc+1")
__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
-__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4")
-__msg("3: R0_w=6")
+__msg("mark_precise: frame0: parent state regs= stack=: R0=P4")
+__msg("3: R0=6")
__naked int state_loop_first_last_equal(void)
{
asm volatile (
@@ -130,4 +178,127 @@ __naked int state_loop_first_last_equal(void)
);
}
+__used __naked static void __bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r2 = 2314885393468386424 ll;"
+ "goto +0;"
+ "if r2 <= r10 goto +3;"
+ "if r1 >= -1835016 goto +0;"
+ "if r2 <= 8 goto +0;"
+ "if r3 <= 0 goto +0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (bd) if r2 <= r10 goto pc+3")
+__msg("9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("10: (b5) if r2 <= 0x8 goto pc+0")
+__msg("mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3")
+__msg("mark_precise: frame1: regs=r2 stack= before 7: (05) goto pc+0")
+__naked void bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r3 = 0 ll;"
+ "call __bpf_cond_op_r10;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("3: (bf) r3 = r10")
+__msg("4: (bd) if r3 <= r2 goto pc+1")
+__msg("5: (b5) if r2 <= 0x8 goto pc+2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bd) if r3 <= r2 goto pc+1")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__naked void bpf_cond_op_not_r10(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r2 = 2314885393468386424 ll;"
+ "r3 = r10;"
+ "if r3 <= r2 goto +1;"
+ "if r2 <= 8 goto +2;"
+ "r0 = 2 ll;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1 ; R0=1")
+__msg("1: (84) w0 = -w0 ; R0=0xffffffff")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_2(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -1 is allowed
+ */
+ asm volatile (
+ "r0 = 1;"
+ "w0 = -w0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_3(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -10000 is not allowed.
+ */
+ asm volatile (
+ "r0 = 10000;"
+ "w0 = -w0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1 ; R0=1")
+__msg("1: (87) r0 = -r0 ; R0=-1")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_4(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -1 is allowed
+ */
+ asm volatile (
+ "r0 = 1;"
+ "r0 = -r0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_5(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -10000 is not allowed.
+ */
+ asm volatile (
+ "r0 = 10000;"
+ "r0 = -r0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
index b1fbdf119553..1ecd34ebde19 100644
--- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
@@ -8,7 +8,7 @@
/* From include/linux/filter.h */
#define MAX_BPF_STACK 512
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
struct elem {
struct bpf_timer t;
@@ -27,9 +27,21 @@ __description("Private stack, single prog")
__success
__arch_x86_64
__jited(" movabsq $0x{{.*}}, %r9")
-__jited(" addq %gs:0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
__jited(" movl $0x2a, %edi")
__jited(" movq %rdi, -0x100(%r9)")
+__arch_arm64
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited("...")
+__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_single_prog(void)
{
asm volatile (" \
@@ -45,6 +57,9 @@ __description("No private stack")
__success
__arch_x86_64
__jited(" subq $0x8, %rsp")
+__arch_arm64
+__jited(" mov x25, sp")
+__jited(" sub sp, sp, #0x10")
__naked void no_private_stack_nested(void)
{
asm volatile (" \
@@ -74,13 +89,26 @@ __success
__arch_x86_64
/* private stack fp for the main prog */
__jited(" movabsq $0x{{.*}}, %r9")
-__jited(" addq %gs:0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
__jited(" movl $0x2a, %edi")
__jited(" movq %rdi, -0x200(%r9)")
__jited(" pushq %r9")
__jited(" callq 0x{{.*}}")
__jited(" popq %r9")
__jited(" xorl %eax, %eax")
+__arch_arm64
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl {{.*}}")
+__jited("...")
+__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_nested_1(void)
{
asm volatile (" \
@@ -122,7 +150,7 @@ __jited(" pushq %rbp")
__jited(" movq %rsp, %rbp")
__jited(" endbr64")
__jited(" movabsq $0x{{.*}}, %r9")
-__jited(" addq %gs:0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
@@ -131,6 +159,24 @@ __jited(" movq %rdi, -0x200(%r9)")
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
+__arch_arm64
+__jited("func #1")
+__jited("...")
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" mov x7, #0x0")
+__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_callback(void)
{
asm volatile (" \
@@ -154,6 +200,28 @@ __arch_x86_64
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
+__arch_arm64
+__jited(" stp x29, x30, [sp, #-0x10]!")
+__jited(" mov x29, sp")
+__jited(" stp xzr, x26, [sp, #-0x10]!")
+__jited(" mov x26, sp")
+__jited(" stp x19, x20, [sp, #-0x10]!")
+__jited(" stp x21, x22, [sp, #-0x10]!")
+__jited(" stp x23, x24, [sp, #-0x10]!")
+__jited(" stp x25, x26, [sp, #-0x10]!")
+__jited(" stp x27, x28, [sp, #-0x10]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" mov x0, #0x0")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" ldp x27, x28, [sp], #0x10")
int private_stack_exception_main_prog(void)
{
asm volatile (" \
@@ -179,6 +247,19 @@ __jited(" movq %rdi, -0x200(%r9)")
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
+__arch_arm64
+__jited(" stp x27, x28, [sp, #-0x10]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" ldp x27, x28, [sp], #0x10")
int private_stack_exception_sub_prog(void)
{
asm volatile (" \
@@ -220,6 +301,10 @@ __description("Private stack, async callback, not nested")
__success __retval(0)
__arch_x86_64
__jited(" movabsq $0x{{.*}}, %r9")
+__arch_arm64
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
int private_stack_async_callback_1(void)
{
struct bpf_timer *arr_timer;
@@ -241,6 +326,8 @@ __description("Private stack, async callback, potential nesting")
__success __retval(0)
__arch_x86_64
__jited(" subq $0x100, %rsp")
+__arch_arm64
+__jited(" sub sp, sp, #0x100")
int private_stack_async_callback_2(void)
{
struct bpf_timer *arr_timer;
diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
index 683a882b3e6d..910365201f68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
@@ -27,7 +27,7 @@ struct bpf_key {} __attribute__((preserve_access_index));
extern void bpf_key_put(struct bpf_key *key) __ksym;
extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
/* BTF FUNC records are not generated for kfuncs referenced
* from inline assembly. These records are necessary for
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
index 7c5e5e6d10eb..c0ce690ddb68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -349,11 +349,11 @@ __naked void precision_two_ids(void)
SEC("socket")
__success __log_level(2)
__flag(BPF_F_TEST_STATE_FREQ)
-/* check thar r0 and r6 have different IDs after 'if',
+/* check that r0 and r6 have different IDs after 'if',
* collect_linked_regs() can't tie more than 6 registers for a single insn.
*/
__msg("8: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1")
-__msg("9: (bf) r6 = r6 ; R6_w=scalar(id=2")
+__msg("9: (bf) r6 = r6 ; R6=scalar(id=2")
/* check that r{0-5} are marked precise after 'if' */
__msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0")
__msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:")
@@ -779,12 +779,12 @@ __success
__retval(0)
/* Check that verifier believes r1/r0 are zero at exit */
__log_level(2)
-__msg("4: (77) r1 >>= 32 ; R1_w=0")
-__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0")
+__msg("4: (77) r1 >>= 32 ; R1=0")
+__msg("5: (bf) r0 = r1 ; R0=0 R1=0")
__msg("6: (95) exit")
__msg("from 3 to 4")
-__msg("4: (77) r1 >>= 32 ; R1_w=0")
-__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0")
+__msg("4: (77) r1 >>= 32 ; R1=0")
+__msg("5: (bf) r0 = r1 ; R0=0 R1=0")
__msg("6: (95) exit")
/* Verify that statements to randomize upper half of r1 had not been
* generated.
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c
index 0d5e56dffabb..a2132c72d3b8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sock.c
@@ -1,14 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Converted from tools/testing/selftests/bpf/verifier/sock.c */
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-#define offsetofend(TYPE, MEMBER) \
- (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
-
struct {
__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
__uint(max_entries, 1);
@@ -1073,16 +1069,65 @@ int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk)
}
__noinline
+long xdp_pull_data2(struct xdp_md *x, __u32 len)
+{
+ return bpf_xdp_pull_data(x, len);
+}
+
+__noinline
+long xdp_pull_data1(struct xdp_md *x, __u32 len)
+{
+ return xdp_pull_data2(x, len);
+}
+
+/* global function calls bpf_xdp_pull_data(), which invalidates packet
+ * pointers established before global function call.
+ */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers_from_global_func(struct xdp_md *x)
+{
+ int *p = (void *)(long)x->data;
+
+ if ((void *)(p + 1) > (void *)(long)x->data_end)
+ return XDP_DROP;
+ xdp_pull_data1(x, 0);
+ *p = 42; /* this is unsafe */
+ return XDP_PASS;
+}
+
+/* XDP packet changing kfunc calls invalidate packet pointers */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers(struct xdp_md *x)
+{
+ int *p = (void *)(long)x->data;
+
+ if ((void *)(p + 1) > (void *)(long)x->data_end)
+ return XDP_DROP;
+ bpf_xdp_pull_data(x, 0);
+ *p = 42; /* this is unsafe */
+ return XDP_PASS;
+}
+
+__noinline
int tail_call(struct __sk_buff *sk)
{
bpf_tail_call_static(sk, &jmp_table, 0);
return 0;
}
-/* Tail calls invalidate packet pointers. */
+static __noinline
+int static_tail_call(struct __sk_buff *sk)
+{
+ bpf_tail_call_static(sk, &jmp_table, 0);
+ return 0;
+}
+
+/* Tail calls in sub-programs invalidate packet pointers. */
SEC("tc")
__failure __msg("invalid mem access")
-int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk)
+int invalidate_pkt_pointers_by_global_tail_call(struct __sk_buff *sk)
{
int *p = (void *)(long)sk->data;
@@ -1093,4 +1138,32 @@ int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk)
return TCX_PASS;
}
+/* Tail calls in static sub-programs invalidate packet pointers. */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_by_static_tail_call(struct __sk_buff *sk)
+{
+ int *p = (void *)(long)sk->data;
+
+ if ((void *)(p + 1) > (void *)(long)sk->data_end)
+ return TCX_DROP;
+ static_tail_call(sk);
+ *p = 42; /* this is unsafe */
+ return TCX_PASS;
+}
+
+/* Direct tail calls do not invalidate packet pointers. */
+SEC("tc")
+__success
+int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk)
+{
+ int *p = (void *)(long)sk->data;
+
+ if ((void *)(p + 1) > (void *)(long)sk->data_end)
+ return TCX_DROP;
+ bpf_tail_call_static(sk, &jmp_table, 0);
+ *p = 42; /* this is NOT unsafe: tail calls don't return */
+ return TCX_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 1e5a511e8494..7a13dbd794b2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -506,17 +506,17 @@ SEC("raw_tp")
__log_level(2)
__success
/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */
-__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0")
+__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8=0")
/* but fp-16 is spilled IMPRECISE zero const reg */
-__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0")
+__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0=0 R10=fp0 fp-16=0")
/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register
* precise immediately; if necessary, it will be marked precise later
*/
-__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0")
+__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0")
/* similarly, when R2 is assigned from spilled register, it is initially
* imprecise, but will be marked precise later once it is used in precise context
*/
-__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0")
+__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2=0 R10=fp0 fp-16=0")
__msg("11: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
@@ -598,7 +598,7 @@ __log_level(2)
__success
/* fp-4 is STACK_ZERO */
__msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????")
-__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????")
+__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0000????")
__msg("5: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)")
@@ -640,25 +640,25 @@ SEC("raw_tp")
__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
__success
/* make sure fp-8 is IMPRECISE fake register spill */
-__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1")
+__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8=1")
/* and fp-16 is spilled IMPRECISE const reg */
-__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1")
+__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=1")
/* validate load from fp-8, which was initialized using BPF_ST_MEM */
-__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1")
+__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2=1 R10=fp0 fp-8=1")
__msg("9: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
-__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
/* validate load from fp-16, which was initialized using BPF_STX_MEM */
-__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1")
+__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2=1 R10=fp0 fp-16=1")
__msg("13: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)")
@@ -668,12 +668,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
/* now both fp-8 and fp-16 are precise, very good */
-__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=P1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
__naked void stack_load_preserves_const_precision(void)
{
asm volatile (
@@ -719,22 +719,22 @@ __success
/* make sure fp-8 is 32-bit FAKE subregister spill */
__msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1")
/* but fp-16 is spilled IMPRECISE zero const reg */
-__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1")
+__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=????1")
/* validate load from fp-8, which was initialized using BPF_ST_MEM */
-__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1")
+__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2=1 R10=fp0 fp-8=????1")
__msg("9: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
-__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
/* validate load from fp-16, which was initialized using BPF_STX_MEM */
-__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1")
+__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2=1 R10=fp0 fp-16=????1")
__msg("13: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)")
@@ -743,12 +743,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2
__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????P1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
__naked void stack_load_preserves_const_precision_subreg(void)
{
asm volatile (
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
index 417c61cd4b19..24aabc6083fd 100644
--- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -481,4 +481,56 @@ l1_%=: r0 = 42; \
: __clobber_all);
}
+SEC("socket")
+__description("PTR_TO_STACK stack size > 512")
+__failure __msg("invalid write to stack R1 off=-520 size=8")
+__naked void stack_check_size_gt_512(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -520; \
+ r0 = 42; \
+ *(u64*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+#ifdef __BPF_FEATURE_MAY_GOTO
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto with jit")
+__load_if_JITed()
+__success __retval(42)
+__naked void stack_check_size_512_with_may_goto_jit(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u32*)(r1 + 0) = r0; \
+ may_goto l0_%=; \
+ r2 = 100; \
+ l0_%=: \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto without jit")
+__load_if_no_JITed()
+__failure __msg("stack size 520(extra 8) is too large")
+__naked void stack_check_size_512_with_may_goto(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u32*)(r1 + 0) = r0; \
+ may_goto l0_%=; \
+ r2 = 100; \
+ l0_%=: \
+ exit; \
+" ::: __clobber_all);
+}
+#endif
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c
new file mode 100644
index 000000000000..72f1eb006074
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__description("store-release, 8-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_8(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0x12;"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1);
+ "w2 = *(u8 *)(r10 - 1);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 16-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_16(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0x1234;"
+ ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1);
+ "w2 = *(u16 *)(r10 - 2);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_H, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -2))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 32-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_32(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "w1 = 0x12345678;"
+ ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1);
+ "w2 = *(u32 *)(r10 - 4);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -4))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 64-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_64(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r1 = 0x1234567890abcdef ll;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+ "r2 = *(u64 *)(r10 - 8);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_src_reg(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r2);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized dst_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_dst_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r2 - 8), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with non-pointer dst_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void store_release_with_non_pointer_dst_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r1 + 0), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_1, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned store-release")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void store_release_misaligned(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 5), w0);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_0, -5))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release to ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__naked void store_release_to_ctx_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ // store_release((u8 *)(r1 + offsetof(struct __sk_buff, cb[0])), w0);
+ ".8byte %[store_release_insn];"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("store-release to pkt pointer")
+__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed")
+__naked void store_release_to_pkt_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+ "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+ "r1 = r2;"
+ "r1 += 8;"
+ "if r1 >= r3 goto l0_%=;"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+"l0_%=: r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("store-release to flow_keys pointer")
+__failure __msg("BPF_ATOMIC stores into R2 flow_keys is not allowed")
+__naked void store_release_to_flow_keys_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+ "exit;"
+ :
+ : __imm_const(__sk_buff_flow_keys,
+ offsetof(struct __sk_buff, flow_keys)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("store-release to sock pointer")
+__failure __msg("R2 cannot write into sock")
+__naked void store_release_to_sock_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+ "exit;"
+ :
+ : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, leak pointer to stack")
+__success __success_unpriv __retval(0)
+__naked void store_release_leak_pointer_to_stack(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("store-release, leak pointer to map")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("R6 leaks addr into map")
+__naked void store_release_leak_pointer_to_map(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r1 = %[map_hash_8b] ll;"
+ "r2 = 0;"
+ "*(u64 *)(r10 - 8) = r2;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto l0_%=;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r0 + 0), r6);
+"l0_%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(map_hash_8b),
+ __imm(bpf_map_lookup_elem),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_0, BPF_REG_6, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void store_release_with_invalid_reg(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r15 + 0), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, 15 /* invalid reg */, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index 9d415f7ce599..61886ed554de 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -105,7 +105,7 @@ __msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
__msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit")
__msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10")
-__msg("7: R0_w=scalar")
+__msg("7: R0=scalar")
__naked int fp_precise_subprog_result(void)
{
asm volatile (
@@ -141,7 +141,7 @@ __msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1")
* anyways, at which point we'll break precision chain
*/
__msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10")
-__msg("7: R0_w=scalar")
+__msg("7: R0=scalar")
__naked int sneaky_fp_precise_subprog_result(void)
{
asm volatile (
@@ -681,7 +681,7 @@ __msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=2 R6=1 R8=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8=P1")
__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
@@ -793,4 +793,57 @@ __naked int stack_slot_aliases_precision(void)
);
}
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} map_array SEC(".maps");
+
+__naked __noinline __used
+static unsigned long identity_tail_call(void)
+{
+ /* the simplest identity function involving a tail call */
+ asm volatile (
+ "r6 = r2;"
+ "r2 = %[map_array] ll;"
+ "r3 = 0;"
+ "call %[bpf_tail_call];"
+ "r0 = r6;"
+ "exit;"
+ :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_array)
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("13: (85) call bpf_tail_call#12")
+__msg("mark_precise: frame1: last_idx 13 first_idx 0 subseq_idx -1 ")
+__msg("returning from callee:")
+__msg("frame1: R0=scalar() R6=3 R10=fp0")
+__msg("to caller at 4:")
+__msg("R0=scalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0")
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: parent state regs=r0 stack=: R0=Pscalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0")
+__msg("math between map_value pointer and register with unbounded min value is not allowed")
+__naked int subprog_result_tail_call(void)
+{
+ asm volatile (
+ "r2 = 3;"
+ "call identity_tail_call;"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common
+ );
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall.c b/tools/testing/selftests/bpf/progs/verifier_tailcall.c
new file mode 100644
index 000000000000..b4acce60fb9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_tailcall.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} map_array SEC(".maps");
+
+SEC("socket")
+__description("invalid map type for tail call")
+__failure __msg("expected prog array map for tail call")
+__failure_unpriv
+__naked void invalid_map_for_tail_call(void)
+{
+ asm volatile (" \
+ r2 = %[map_array] ll; \
+ r3 = 0; \
+ call %[bpf_tail_call]; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_array)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index a4a5e2071604..28b4f7035ceb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -572,8 +572,14 @@ l0_%=: exit; \
SEC("socket")
__description("alu32: mov u32 const")
-__success __failure_unpriv __msg_unpriv("R7 invalid mem access 'scalar'")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 == 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R7 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
__naked void alu32_mov_u32_const(void)
{
asm volatile (" \
@@ -619,12 +625,11 @@ __naked void pass_pointer_to_tail_call(void)
SEC("socket")
__description("unpriv: cmp map pointer with zero")
-__success __failure_unpriv __msg_unpriv("R1 pointer comparison")
+__success __success_unpriv
__retval(0)
__naked void cmp_map_pointer_with_zero(void)
{
asm volatile (" \
- r1 = 0; \
r1 = %[map_hash_8b] ll; \
if r1 == 0 goto l0_%=; \
l0_%=: r0 = 0; \
@@ -635,6 +640,22 @@ l0_%=: r0 = 0; \
}
SEC("socket")
+__description("unpriv: cmp map pointer with const")
+__success __failure_unpriv __msg_unpriv("R1 pointer comparison prohibited")
+__retval(0)
+__naked void cmp_map_pointer_with_const(void)
+{
+ asm volatile (" \
+ r1 = %[map_hash_8b] ll; \
+ if r1 == 0x0000beef goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
__description("unpriv: write into frame pointer")
__failure __msg("frame pointer is read only")
__failure_unpriv
@@ -723,4 +744,210 @@ l0_%=: r0 = 0; \
" ::: __clobber_all);
}
+SEC("socket")
+__description("unpriv: Spectre v1 path-based type confusion of scalar as stack-ptr")
+__success __success_unpriv __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 != 0x1 goto pc+2")
+/* This nospec prevents the exploit because it forces the mispredicted (not
+ * taken) `if r0 != 0x0 goto l0_%=` to resolve before using r6 as a pointer.
+ * This causes the CPU to realize that `r6 = r9` should have never executed. It
+ * ensures that r6 always contains a readable stack slot ptr when the insn after
+ * the nospec executes.
+ */
+__xlated_unpriv("nospec")
+__xlated_unpriv("r9 = *(u8 *)(r6 +0)")
+#endif
+__naked void unpriv_spec_v1_type_confusion(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ /* r0: pointer to a map array entry */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ /* r1, r2: prepared call args */ \
+ r6 = r10; \
+ r6 += -8; \
+ /* r6: pointer to readable stack slot */ \
+ r9 = 0xffffc900; \
+ r9 <<= 32; \
+ /* r9: scalar controlled by attacker */ \
+ r0 = *(u64 *)(r0 + 0); /* cache miss */ \
+ if r0 != 0x0 goto l0_%=; \
+ r6 = r9; \
+l0_%=: if r0 != 0x1 goto l1_%=; \
+ r9 = *(u8 *)(r6 + 0); \
+l1_%=: /* leak r9 */ \
+ r9 &= 1; \
+ r9 <<= 9; \
+ *(u64*)(r10 - 8) = r9; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ /* leak secret into is_cached(map[0|512]): */ \
+ r0 = *(u64 *)(r0 + 0); \
+l2_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: ldimm64 before Spectre v4 barrier")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V4
+__xlated_unpriv("r1 = 0x2020200005642020") /* should not matter */
+__xlated_unpriv("*(u64 *)(r10 -8) = r1")
+__xlated_unpriv("nospec")
+#endif
+__naked void unpriv_ldimm64_spectre_v4(void)
+{
+ asm volatile (" \
+ r1 = 0x2020200005642020 ll; \
+ *(u64 *)(r10 -8) = r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: Spectre v1 and v4 barrier")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+/* starts with r0 == r8 == r9 == 0 */
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("if r9 == 0x0 goto pc+4")
+__xlated_unpriv("r2 = r0")
+/* Following nospec required to prevent following dangerous `*(u64 *)(NOT_FP -64)
+ * = r1` iff `if r9 == 0 goto pc+4` was mispredicted because of Spectre v1. The
+ * test therefore ensures the Spectre-v4--induced nospec does not prevent the
+ * Spectre-v1--induced speculative path from being fully analyzed.
+ */
+__xlated_unpriv("nospec") /* Spectre v1 */
+__xlated_unpriv("*(u64 *)(r2 -64) = r1") /* could be used to leak r2 */
+__xlated_unpriv("nospec") /* Spectre v4 */
+#endif
+#endif
+__naked void unpriv_spectre_v1_and_v4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r8 = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r9 = r0; \
+ r0 = r10; \
+ r1 = 0; \
+ r2 = r10; \
+ if r8 != 0 goto l0_%=; \
+ if r9 != 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r8 != 0 goto l1_%=; \
+ goto l2_%=; \
+l1_%=: if r9 == 0 goto l3_%=; \
+ r2 = r0; \
+l2_%=: *(u64 *)(r2 -64) = r1; \
+l3_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: Spectre v1 and v4 barrier (simple)")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */
+__xlated_unpriv("goto pc-1") /* r2 = r0 */
+__xlated_unpriv("nospec")
+__xlated_unpriv("*(u64 *)(r2 -64) = r1")
+__xlated_unpriv("nospec")
+#endif
+#endif
+__naked void unpriv_spectre_v1_and_v4_simple(void)
+{
+ asm volatile (" \
+ r8 = 0; \
+ r9 = 0; \
+ r0 = r10; \
+ r1 = 0; \
+ r2 = r10; \
+ if r8 != 0 goto l0_%=; \
+ if r9 != 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r8 != 0 goto l1_%=; \
+ goto l2_%=; \
+l1_%=: if r9 == 0 goto l3_%=; \
+ r2 = r0; \
+l2_%=: *(u64 *)(r2 -64) = r1; \
+l3_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: ldimm64 before Spectre v1 and v4 barrier (simple)")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+4")
+__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */
+__xlated_unpriv("goto pc-1") /* r2 = r0 */
+__xlated_unpriv("goto pc-1") /* r1 = 0x2020200005642020 ll */
+__xlated_unpriv("goto pc-1") /* second part of ldimm64 */
+__xlated_unpriv("nospec")
+__xlated_unpriv("*(u64 *)(r2 -64) = r1")
+__xlated_unpriv("nospec")
+#endif
+#endif
+__naked void unpriv_ldimm64_spectre_v1_and_v4_simple(void)
+{
+ asm volatile (" \
+ r8 = 0; \
+ r9 = 0; \
+ r0 = r10; \
+ r1 = 0; \
+ r2 = r10; \
+ if r8 != 0 goto l0_%=; \
+ if r9 != 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r8 != 0 goto l1_%=; \
+ goto l2_%=; \
+l1_%=: if r9 == 0 goto l3_%=; \
+ r2 = r0; \
+ r1 = 0x2020200005642020 ll; \
+l2_%=: *(u64 *)(r2 -64) = r1; \
+l3_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
index a9ab37d3b9e2..2129e4353fd9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
#define MAX_ENTRIES 11
@@ -146,6 +147,24 @@ l0_%=: exit; \
: __clobber_all);
}
+SEC("socket")
+__description("map_ptr illegal alu op, map_ptr = -map_ptr")
+__failure __msg("R0 invalid mem access 'scalar'")
+__failure_unpriv __msg_unpriv("R0 pointer arithmetic prohibited")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void map_ptr_illegal_alu_op(void)
+{
+ asm volatile (" \
+ r0 = %[map_hash_48b] ll; \
+ r0 = -r0; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
SEC("flow_dissector")
__description("flow_keys illegal alu op with variable offset")
__failure __msg("R7 pointer arithmetic on flow_keys prohibited")
@@ -165,4 +184,32 @@ __naked void flow_keys_illegal_variable_offset_alu(void)
: __clobber_all);
}
+#define DEFINE_BAD_OFFSET_TEST(name, op, off, imm) \
+ SEC("socket") \
+ __failure __msg("BPF_ALU uses reserved fields") \
+ __naked void name(void) \
+ { \
+ asm volatile( \
+ "r0 = 1;" \
+ ".8byte %[insn];" \
+ "r0 = 0;" \
+ "exit;" \
+ : \
+ : __imm_insn(insn, BPF_RAW_INSN((op), 0, 0, (off), (imm))) \
+ : __clobber_all); \
+ }
+
+/*
+ * Offset fields of 0 and 1 are legal for BPF_{DIV,MOD} instructions.
+ * Offset fields of 0 are legal for the rest of ALU instructions.
+ * Test that error is reported for illegal offsets, assuming that tests
+ * for legal offsets exist.
+ */
+DEFINE_BAD_OFFSET_TEST(bad_offset_divx, BPF_ALU64 | BPF_DIV | BPF_X, -1, 0)
+DEFINE_BAD_OFFSET_TEST(bad_offset_modk, BPF_ALU64 | BPF_MOD | BPF_K, -1, 1)
+DEFINE_BAD_OFFSET_TEST(bad_offset_addx, BPF_ALU64 | BPF_ADD | BPF_X, -1, 0)
+DEFINE_BAD_OFFSET_TEST(bad_offset_divx2, BPF_ALU64 | BPF_DIV | BPF_X, 2, 0)
+DEFINE_BAD_OFFSET_TEST(bad_offset_modk2, BPF_ALU64 | BPF_MOD | BPF_K, 2, 1)
+DEFINE_BAD_OFFSET_TEST(bad_offset_addx2, BPF_ALU64 | BPF_ADD | BPF_X, 1, 0)
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
index 5ba6e53571c8..af7938ce56cb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
@@ -231,6 +231,10 @@ __retval(1)
__naked void ptr_unknown_vs_unknown_lt(void)
{
asm volatile (" \
+ r8 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r9 = r0; \
+ r1 = r8; \
r0 = *(u32*)(r1 + %[__sk_buff_len]); \
r1 = 0; \
*(u64*)(r10 - 8) = r1; \
@@ -245,11 +249,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \
r4 = *(u8*)(r0 + 0); \
if r4 == 1 goto l3_%=; \
r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x3; \
goto l4_%=; \
l3_%=: r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x7; \
l4_%=: r1 += r0; \
r0 = *(u8*)(r1 + 0); \
@@ -259,7 +263,8 @@ l2_%=: r0 = 1; \
: __imm(bpf_map_lookup_elem),
__imm_addr(map_array_48b),
__imm_addr(map_hash_16b),
- __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm(bpf_get_prandom_u32)
: __clobber_all);
}
@@ -271,6 +276,10 @@ __retval(1)
__naked void ptr_unknown_vs_unknown_gt(void)
{
asm volatile (" \
+ r8 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r9 = r0; \
+ r1 = r8; \
r0 = *(u32*)(r1 + %[__sk_buff_len]); \
r1 = 0; \
*(u64*)(r10 - 8) = r1; \
@@ -285,11 +294,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \
r4 = *(u8*)(r0 + 0); \
if r4 == 1 goto l3_%=; \
r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x7; \
goto l4_%=; \
l3_%=: r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x3; \
l4_%=: r1 += r0; \
r0 = *(u8*)(r1 + 0); \
@@ -299,7 +308,8 @@ l2_%=: r0 = 1; \
: __imm(bpf_map_lookup_elem),
__imm_addr(map_array_48b),
__imm_addr(map_hash_16b),
- __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm(bpf_get_prandom_u32)
: __clobber_all);
}
@@ -398,7 +408,8 @@ l2_%=: r0 = 1; \
SEC("socket")
__description("map access: mixing value pointer and scalar, 1")
-__success __failure_unpriv __msg_unpriv("R2 pointer comparison prohibited")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
__retval(0)
__naked void value_pointer_and_scalar_1(void)
{
@@ -433,6 +444,7 @@ l2_%=: /* common instruction */ \
l3_%=: /* branch B */ \
r0 = 0x13371337; \
/* verifier follows fall-through */ \
+ /* unpriv: nospec (inserted to prevent `R2 pointer comparison prohibited`) */\
if r2 != 0x100000 goto l4_%=; \
r0 = 0; \
exit; \
@@ -450,7 +462,8 @@ l4_%=: /* fake-dead code; targeted from branch A to \
SEC("socket")
__description("map access: mixing value pointer and scalar, 2")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
__retval(0)
__naked void value_pointer_and_scalar_2(void)
{
@@ -492,6 +505,7 @@ l4_%=: /* fake-dead code; targeted from branch A to \
* prevent dead code sanitization, rejected \
* via branch B however \
*/ \
+ /* unpriv: nospec (inserted to prevent `R0 invalid mem access 'scalar'`) */\
r0 = *(u8*)(r0 + 0); \
r0 = 0; \
exit; \
@@ -1296,9 +1310,13 @@ l0_%=: r0 = 1; \
SEC("socket")
__description("map access: value_ptr -= unknown scalar, 2")
-__success __failure_unpriv
-__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__success __success_unpriv
__retval(1)
+#ifdef SPEC_V1
+__xlated_unpriv("r1 &= 7")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 pointer arithmetic of map value goes out of range` */
+__xlated_unpriv("r0 -= r1")
+#endif
__naked void value_ptr_unknown_scalar_2_2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
index 1d36d01b746e..f345466bca68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_var_off.c
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -114,8 +114,8 @@ __naked void stack_write_priv_vs_unpriv(void)
}
/* Similar to the previous test, but this time also perform a read from the
- * address written to with a variable offset. The read is allowed, showing that,
- * after a variable-offset write, a priviledged program can read the slots that
+ * address written to with a variable offet. The read is allowed, showing that,
+ * after a variable-offset write, a privileged program can read the slots that
* were in the range of that write (even if the verifier doesn't actually know if
* the slot being read was really written to or not.
*
@@ -157,7 +157,7 @@ __naked void stack_write_followed_by_read(void)
SEC("socket")
__description("variable-offset stack write clobbers spilled regs")
__failure
-/* In the priviledged case, dereferencing a spilled-and-then-filled
+/* In the privileged case, dereferencing a spilled-and-then-filled
* register is rejected because the previous variable offset stack
* write might have overwritten the spilled pointer (i.e. we lose track
* of the spilled register when we analyze the write).
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
index a7c0a553aa50..55398c04290a 100644
--- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2024 Google LLC. */
#include <vmlinux.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -69,7 +70,7 @@ __success
int BPF_PROG(path_d_path_from_file_argument, struct file *file)
{
int ret;
- struct path *path;
+ const struct path *path;
/* The f_path member is a path which is embedded directly within a
* file. Therefore, a pointer to such embedded members are still
@@ -82,4 +83,21 @@ int BPF_PROG(path_d_path_from_file_argument, struct file *file)
return 0;
}
+SEC("lsm.s/inode_rename")
+__success
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct inode *inode = new_dentry->d_inode;
+ ino_t ino;
+
+ if (!inode)
+ return 0;
+ ino = inode->i_ino;
+ if (ino == 0)
+ return -EACCES;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
index d6d3f4fcb24c..4b392c6c8fc4 100644
--- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2024 Google LLC. */
#include <vmlinux.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/limits.h>
@@ -158,4 +159,18 @@ int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f)
return 0;
}
+SEC("lsm.s/inode_rename")
+__failure __msg("invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct inode *inode = new_dentry->d_inode;
+ ino_t ino;
+
+ ino = inode->i_ino;
+ if (ino == 0)
+ return -EACCES;
+ return 0;
+}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c
index 2f1ba08c293e..25be2cd9d42c 100644
--- a/tools/testing/selftests/bpf/progs/wq.c
+++ b/tools/testing/selftests/bpf/progs/wq.c
@@ -187,3 +187,20 @@ long test_call_lru_sleepable(void *ctx)
return test_elem_callback(&lru, &key, wq_callback);
}
+
+SEC("tc")
+long test_map_no_btf(void *ctx)
+{
+ struct elem *val;
+ struct bpf_wq *wq;
+ int key = 42;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -2;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, &array, 0) != 0)
+ return -3;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
index 4240211a1900..d06f6d40594a 100644
--- a/tools/testing/selftests/bpf/progs/wq_failures.c
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -142,3 +142,26 @@ long test_wrong_wq_pointer_offset(void *ctx)
return -22;
}
+
+SEC("tc")
+__log_level(2)
+__failure
+__msg(": (85) call bpf_wq_init#")
+__msg("R1 doesn't have constant offset. bpf_wq has to be at the constant offset")
+long test_bad_wq_off(void *ctx)
+{
+ struct elem *val;
+ struct bpf_wq *wq;
+ int key = 42;
+ u64 unknown;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -2;
+
+ unknown = bpf_get_prandom_u32();
+ wq = &val->w + unknown;
+ if (bpf_wq_init(wq, &array, 0) != 0)
+ return -3;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index 31ca229bb3c0..09bb8a038d52 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -19,6 +19,13 @@ struct {
__type(value, __u32);
} prog_arr SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} dev_map SEC(".maps");
+
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
@@ -95,4 +102,10 @@ int rx(struct xdp_md *ctx)
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
+SEC("xdp")
+int redirect(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&dev_map, ctx->rx_queue_index, XDP_PASS);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
index 682dda8dabbc..50c8958f94e5 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -1,7 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/if_ether.h>
+
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
@@ -28,4 +31,89 @@ int xdp_redirect_map_2(struct xdp_md *xdp)
return bpf_redirect_map(&tx_port, 2, 0);
}
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} rxcnt SEC(".maps");
+
+static int xdp_count(struct xdp_md *xdp, __u32 key)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u64 *count;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_DROP;
+
+ if (bpf_htons(eth->h_proto) == ETH_P_IP) {
+ /* We only count IPv4 packets */
+ count = bpf_map_lookup_elem(&rxcnt, &key);
+ if (count)
+ *count += 1;
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_count_0(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 0);
+}
+
+SEC("xdp")
+int xdp_count_1(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 1);
+}
+
+SEC("xdp")
+int xdp_count_2(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 2);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __be64);
+} rx_mac SEC(".maps");
+
+static int store_mac(struct xdp_md *xdp, __u32 id)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u32 key = id;
+ __be64 mac = 0;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_DROP;
+
+ /* Only store IPv4 MAC to avoid being polluted by IPv6 packets */
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ __builtin_memcpy(&mac, eth->h_source, ETH_ALEN);
+ bpf_map_update_elem(&rx_mac, &key, &mac, 0);
+ bpf_printk("%s - %x", __func__, mac);
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int store_mac_1(struct xdp_md *xdp)
+{
+ return store_mac(xdp, 0);
+}
+
+SEC("xdp")
+int store_mac_2(struct xdp_md *xdp)
+{
+ return store_mac(xdp, 1);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 97b26a30b59a..bc2945ed8a80 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -34,6 +34,14 @@ struct {
__uint(max_entries, 128);
} mac_map SEC(".maps");
+/* map to store redirect flags for each protocol*/
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u16);
+ __type(value, __u64);
+ __uint(max_entries, 16);
+} redirect_flags SEC(".maps");
+
SEC("xdp")
int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
{
@@ -41,25 +49,34 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
void *data = (void *)(long)ctx->data;
int if_index = ctx->ingress_ifindex;
struct ethhdr *eth = data;
+ __u64 *flags_from_map;
__u16 h_proto;
__u64 nh_off;
+ __u64 flags;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return XDP_DROP;
- h_proto = eth->h_proto;
-
- /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
- if (h_proto == bpf_htons(ETH_P_IP))
- return bpf_redirect_map(&map_all, 0,
- BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
- /* Using IPv6 for none flag testing */
- else if (h_proto == bpf_htons(ETH_P_IPV6))
- return bpf_redirect_map(&map_all, if_index, 0);
- /* All others for BPF_F_BROADCAST testing */
- else
- return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+ h_proto = bpf_htons(eth->h_proto);
+
+ flags_from_map = bpf_map_lookup_elem(&redirect_flags, &h_proto);
+
+ /* Default flags for IPv4 : (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) */
+ if (h_proto == ETH_P_IP) {
+ flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS;
+ return bpf_redirect_map(&map_all, 0, flags);
+ }
+ /* Default flags for IPv6 : 0 */
+ if (h_proto == ETH_P_IPV6) {
+ flags = flags_from_map ? *flags_from_map : 0;
+ return bpf_redirect_map(&map_all, if_index, flags);
+ }
+ /* Default flags for others BPF_F_BROADCAST : 0 */
+ else {
+ flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST;
+ return bpf_redirect_map(&map_all, 0, flags);
+ }
}
/* The following 2 progs are for 2nd devmap prog testing */
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index ccde6a4c6319..683306db8594 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -4,6 +4,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/errno.h>
#include "xsk_xdp_common.h"
struct {
@@ -14,6 +16,7 @@ struct {
} xsk SEC(".maps");
static unsigned int idx;
+int adjust_value = 0;
int count = 0;
SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
@@ -70,4 +73,51 @@ SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
return bpf_redirect_map(&xsk, idx, XDP_DROP);
}
+SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp)
+{
+ __u32 buff_len, curr_buff_len;
+ int ret;
+
+ buff_len = bpf_xdp_get_buff_len(xdp);
+ if (buff_len == 0)
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_tail(xdp, adjust_value);
+ if (ret < 0) {
+ /* Handle unsupported cases */
+ if (ret == -EOPNOTSUPP) {
+ /* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case
+ * is unsupported
+ */
+ adjust_value = -EOPNOTSUPP;
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+ }
+
+ return XDP_DROP;
+ }
+
+ curr_buff_len = bpf_xdp_get_buff_len(xdp);
+ if (curr_buff_len != buff_len + adjust_value)
+ return XDP_DROP;
+
+ if (curr_buff_len > buff_len) {
+ __u32 *pkt_data = (void *)(long)xdp->data;
+ __u32 len, words_to_end, seq_num;
+
+ len = curr_buff_len - PKT_HDR_ALIGN;
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ seq_num = words_to_end;
+
+ /* Convert sequence number to network byte order. Store this in the last 4 bytes of
+ * the packet. Use 'adjust_value' to determine the position at the end of the
+ * packet for storing the sequence number.
+ */
+ seq_num = __constant_htonl(words_to_end);
+ bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num,
+ sizeof(seq_num));
+ }
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index 1453a53ed547..b03a87571592 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -90,10 +90,6 @@ echo -e "... through kbuild\n"
if [ -f ".config" ] ; then
make_and_clean tools/bpf
- ## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for
- ## runqslower, but the default (used for the "clean" target) is .output.
- ## Let's make sure we clean runqslower's directory properly.
- make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean
## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed
## down from toplevel Makefile to bpftool's Makefile.
diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh
new file mode 100755
index 000000000000..515b1df0501e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_map.sh
@@ -0,0 +1,398 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME="bpftool_map"
+BPF_FILE="security_bpf_map.bpf.o"
+BPF_ITER_FILE="bpf_iter_map_elem.bpf.o"
+PROTECTED_MAP_NAME="prot_map"
+NOT_PROTECTED_MAP_NAME="not_prot_map"
+BPF_FS_TMP_PARENT="/tmp"
+BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT}
+# bpftool will mount bpf file system under BPF_DIR if it is not mounted
+# under BPF_FS_PARENT.
+BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME"
+SCRIPT_DIR=$(dirname $(realpath "$0"))
+BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE"
+BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE"
+BPFTOOL_PATH="bpftool"
+# Assume the script is located under tools/testing/selftests/bpf/
+KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../)
+
+_cleanup()
+{
+ set +eu
+
+ # If BPF_DIR is a mount point this will not remove the mount point itself.
+ [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null
+
+ # Unmount if BPF filesystem was temporarily created.
+ if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then
+ # A loop and recursive unmount are required as bpftool might
+ # create multiple mounts. For example, a bind mount of the directory
+ # to itself. The bind mount is created to change mount propagation
+ # flags on an actual mount point.
+ max_attempts=3
+ attempt=0
+ while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do
+ umount -R "$BPF_DIR" 2>/dev/null
+ attempt=$((attempt+1))
+ done
+
+ # The directory still exists. Remove it now.
+ [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null
+ fi
+}
+
+cleanup_skip()
+{
+ echo "selftests: $TESTNAME [SKIP]"
+ _cleanup
+
+ exit $ksft_skip
+}
+
+cleanup()
+{
+ if [ "$?" = 0 ]; then
+ echo "selftests: $TESTNAME [PASS]"
+ else
+ echo "selftests: $TESTNAME [FAILED]"
+ fi
+ _cleanup
+}
+
+check_root_privileges() {
+ if [ $(id -u) -ne 0 ]; then
+ echo "Need root privileges"
+ exit $ksft_skip
+ fi
+}
+
+# Function to verify bpftool path.
+# Parameters:
+# $1: bpftool path
+verify_bpftool_path() {
+ local bpftool_path="$1"
+ if ! "$bpftool_path" version > /dev/null 2>&1; then
+ echo "Could not run test without bpftool"
+ exit $ksft_skip
+ fi
+}
+
+# Function to verify BTF support.
+# The test requires BTF support for fmod_ret programs.
+verify_btf_support() {
+ if [ ! -f /sys/kernel/btf/vmlinux ]; then
+ echo "Could not run test without BTF support"
+ exit $ksft_skip
+ fi
+}
+
+# Function to initialize map entries with keys [0..2] and values set to 0.
+# Parameters:
+# $1: Map name
+# $2: bpftool path
+initialize_map_entries() {
+ local map_name="$1"
+ local bpftool_path="$2"
+
+ for key in 0 1 2; do
+ "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key
+ done
+}
+
+# Test read access to the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+access_for_read() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+
+ # Test read access to the map.
+ if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ echo " Read access to $key in $map_name failed"
+ exit 1
+ fi
+
+ # Test read access to map's BTF data.
+ if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then
+ echo " Read access to $map_name for BTF data failed"
+ exit 1
+ fi
+}
+
+# Test write access to the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+# $5: Whether write should succeed (true/false)
+access_for_write() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+ local write_should_succeed="$5"
+ local value="1 1 1 1"
+
+ if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+ $value 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Write access to $key in $map_name succeeded but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Write access to $key in $map_name failed but should have succeeded"
+ exit 1
+ fi
+ fi
+}
+
+# Test entry deletion for the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+# $5: Whether write should succeed (true/false)
+access_for_deletion() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+ local write_should_succeed="$5"
+ local value="1 1 1 1"
+
+ # Test deletion by key for the map.
+ # Before deleting, check the key exists.
+ if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ echo " Key $key does not exist in $map_name"
+ exit 1
+ fi
+
+ # Delete by key.
+ if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Deletion for $key in $map_name succeeded but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Deletion for $key in $map_name failed but should have succeeded"
+ exit 1
+ fi
+ fi
+
+ # After deleting, check the entry existence according to the expected status.
+ if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Key $key for $map_name was not deleted but should have been deleted"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "false" ]; then
+ echo "Key $key for $map_name was deleted but should have not been deleted"
+ exit 1
+ fi
+ fi
+
+ # Test creation of map's deleted entry, if deletion was successful.
+ # Otherwise, the entry exists.
+ if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+ $value 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded"
+ exit 1
+ fi
+ fi
+}
+
+# Test map elements iterator.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: BPF_DIR
+# $5: bpf iterator object file path
+iterate_map_elem() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local bpf_dir="$4"
+ local bpf_file="$5"
+ local pin_path="$bpf_dir/map_iterator"
+
+ "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name"
+ if [ ! -f "$pin_path" ]; then
+ echo " Failed to pin iterator to $pin_path"
+ exit 1
+ fi
+
+ cat "$pin_path" 1>/dev/null
+ rm "$pin_path" 2>/dev/null
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key for rw
+# $5: key to delete
+# $6: Whether write should succeed (true/false)
+# $7: BPF_DIR
+# $8: bpf iterator object file path
+access_map() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key_for_rw="$4"
+ local key_to_del="$5"
+ local write_should_succeed="$6"
+ local bpf_dir="$7"
+ local bpf_iter_file_path="$8"
+
+ access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw"
+ access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \
+ "$write_should_succeed"
+ access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \
+ "$write_should_succeed"
+ iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \
+ "$bpf_iter_file_path"
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+# $1: Map name
+# $2: bpftool path
+# $3: BPF_DIR
+# $4: Whether write should succeed (true/false)
+# $5: bpf iterator object file path
+test_map_access() {
+ local map_name="$1"
+ local bpftool_path="$2"
+ local bpf_dir="$3"
+ local pin_path="$bpf_dir/${map_name}_pinned"
+ local write_should_succeed="$4"
+ local bpf_iter_file_path="$5"
+
+ # Test access to the map by name.
+ access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \
+ "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+
+ # Pin the map to the BPF filesystem
+ "$bpftool_path" map pin name "$map_name" "$pin_path"
+ if [ ! -e "$pin_path" ]; then
+ echo " Failed to pin $map_name"
+ exit 1
+ fi
+
+ # Test access to the pinned map.
+ access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \
+ "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+}
+
+# Function to test map creation and map-of-maps
+# Parameters:
+# $1: bpftool path
+# $2: BPF_DIR
+test_map_creation_and_map_of_maps() {
+ local bpftool_path="$1"
+ local bpf_dir="$2"
+ local outer_map_name="outer_map_tt"
+ local inner_map_name="inner_map_tt"
+
+ "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \
+ value 4 entries 4 name "$inner_map_name"
+ if [ ! -f "$bpf_dir/$inner_map_name" ]; then
+ echo " Failed to create inner map file at $bpf_dir/$outer_map_name"
+ return 1
+ fi
+
+ "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \
+ key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name"
+ if [ ! -f "$bpf_dir/$outer_map_name" ]; then
+ echo " Failed to create outer map file at $bpf_dir/$outer_map_name"
+ return 1
+ fi
+
+ # Add entries to the outer map by name and by pinned path.
+ "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \
+ value pinned "$bpf_dir/$inner_map_name"
+ "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \
+ name "$inner_map_name"
+
+ # The outer map should be full by now.
+ # The following map update command is expected to fail.
+ if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \
+ "$inner_map_name" 2>/dev/null; then
+ echo " Update for $outer_map_name succeeded but should have failed"
+ exit 1
+ fi
+}
+
+# Function to test map access with the btf list command
+# Parameters:
+# $1: bpftool path
+test_map_access_with_btf_list() {
+ local bpftool_path="$1"
+
+ # The btf list command iterates over maps for
+ # loaded BPF programs.
+ if ! "$bpftool_path" btf list 1>/dev/null; then
+ echo " Failed to access btf data"
+ exit 1
+ fi
+}
+
+set -eu
+
+trap cleanup_skip EXIT
+
+check_root_privileges
+
+verify_bpftool_path "$BPFTOOL_PATH"
+
+verify_btf_support
+
+trap cleanup EXIT
+
+# Load and attach the BPF programs to control maps access.
+"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach
+
+initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+
+# Activate the map protection mechanism. Protection status is controlled
+# by a value stored in the prot_status_map at index 0.
+"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0
+
+# Test protected map (write should fail).
+test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \
+ "$BPF_ITER_FILE_PATH"
+
+# Test not protected map (write should succeed).
+test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \
+ "$BPF_ITER_FILE_PATH"
+
+test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR"
+
+test_map_access_with_btf_list "$BPFTOOL_PATH"
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index fb4f4714eeb4..e65889ab4adf 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -72,9 +72,15 @@
#define BTF_TYPE_FLOAT_ENC(name, sz) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+#define BTF_DECL_ATTR_ENC(value, type, component_idx) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), type), (component_idx)
+
#define BTF_DECL_TAG_ENC(value, type, component_idx) \
BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+#define BTF_TYPE_ATTR_ENC(value, type) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 1, 0), type)
+
#define BTF_TYPE_TAG_ENC(value, type) \
BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)
diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile
index d4e50c4509c9..63c4d3f6a12f 100644
--- a/tools/testing/selftests/bpf/test_kmods/Makefile
+++ b/tools/testing/selftests/bpf/test_kmods/Makefile
@@ -8,7 +8,7 @@ Q = @
endif
MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
- bpf_test_modorder_y.ko
+ bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
$(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o)))
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
new file mode 100644
index 000000000000..7b4ae5e81d32
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/prandom.h>
+#include <linux/ktime.h>
+#include <asm/rqspinlock.h>
+#include <linux/perf_event.h>
+#include <linux/kthread.h>
+#include <linux/atomic.h>
+#include <linux/slab.h>
+
+static struct perf_event_attr hw_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 1,
+ .sample_period = 100000,
+};
+
+static rqspinlock_t lock_a;
+static rqspinlock_t lock_b;
+static rqspinlock_t lock_c;
+
+#define RQSL_SLOW_THRESHOLD_MS 10
+static const unsigned int rqsl_hist_ms[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 12, 14, 16, 18, 20, 25, 30, 40, 50, 75,
+ 100, 150, 200, 250, 1000,
+};
+#define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms)
+
+enum rqsl_context {
+ RQSL_CTX_NORMAL = 0,
+ RQSL_CTX_NMI,
+ RQSL_CTX_MAX,
+};
+
+struct rqsl_cpu_hist {
+ atomic64_t hist[RQSL_CTX_MAX][RQSL_NR_HIST_BUCKETS];
+ atomic64_t success[RQSL_CTX_MAX];
+ atomic64_t failure[RQSL_CTX_MAX];
+};
+
+static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists);
+
+enum rqsl_mode {
+ RQSL_MODE_AA = 0,
+ RQSL_MODE_ABBA,
+ RQSL_MODE_ABBCCA,
+};
+
+static int test_mode = RQSL_MODE_AA;
+module_param(test_mode, int, 0644);
+MODULE_PARM_DESC(test_mode,
+ "rqspinlock test mode: 0 = AA, 1 = ABBA, 2 = ABBCCA");
+
+static int normal_delay = 20;
+module_param(normal_delay, int, 0644);
+MODULE_PARM_DESC(normal_delay,
+ "rqspinlock critical section length for normal context (20ms default)");
+
+static int nmi_delay = 10;
+module_param(nmi_delay, int, 0644);
+MODULE_PARM_DESC(nmi_delay,
+ "rqspinlock critical section length for NMI context (10ms default)");
+
+static struct perf_event **rqsl_evts;
+static int rqsl_nevts;
+
+static struct task_struct **rqsl_threads;
+static int rqsl_nthreads;
+static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0);
+
+static int pause = 0;
+
+static const char *rqsl_mode_names[] = {
+ [RQSL_MODE_AA] = "AA",
+ [RQSL_MODE_ABBA] = "ABBA",
+ [RQSL_MODE_ABBCCA] = "ABBCCA",
+};
+
+struct rqsl_lock_pair {
+ rqspinlock_t *worker_lock;
+ rqspinlock_t *nmi_lock;
+};
+
+static struct rqsl_lock_pair rqsl_get_lock_pair(int cpu)
+{
+ int mode = READ_ONCE(test_mode);
+
+ switch (mode) {
+ default:
+ case RQSL_MODE_AA:
+ return (struct rqsl_lock_pair){ &lock_a, &lock_a };
+ case RQSL_MODE_ABBA:
+ if (cpu & 1)
+ return (struct rqsl_lock_pair){ &lock_b, &lock_a };
+ return (struct rqsl_lock_pair){ &lock_a, &lock_b };
+ case RQSL_MODE_ABBCCA:
+ switch (cpu % 3) {
+ case 0:
+ return (struct rqsl_lock_pair){ &lock_a, &lock_b };
+ case 1:
+ return (struct rqsl_lock_pair){ &lock_b, &lock_c };
+ default:
+ return (struct rqsl_lock_pair){ &lock_c, &lock_a };
+ }
+ }
+}
+
+static u32 rqsl_hist_bucket_idx(u32 delta_ms)
+{
+ int i;
+
+ for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+ if (delta_ms <= rqsl_hist_ms[i])
+ return i;
+ }
+
+ return RQSL_NR_HIST_BUCKETS - 1;
+}
+
+static void rqsl_record_lock_result(u64 delta_ns, enum rqsl_context ctx, int ret)
+{
+ struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists);
+ u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC);
+ u32 bucket = rqsl_hist_bucket_idx(delta_ms);
+ atomic64_t *buckets = hist->hist[ctx];
+
+ atomic64_inc(&buckets[bucket]);
+ if (!ret)
+ atomic64_inc(&hist->success[ctx]);
+ else
+ atomic64_inc(&hist->failure[ctx]);
+}
+
+static int rqspinlock_worker_fn(void *arg)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ u64 start_ns;
+ int ret;
+
+ if (cpu) {
+ atomic_inc(&rqsl_ready_cpus);
+
+ while (!kthread_should_stop()) {
+ struct rqsl_lock_pair locks = rqsl_get_lock_pair(cpu);
+ rqspinlock_t *worker_lock = locks.worker_lock;
+
+ if (READ_ONCE(pause)) {
+ msleep(1000);
+ continue;
+ }
+ start_ns = ktime_get_mono_fast_ns();
+ ret = raw_res_spin_lock_irqsave(worker_lock, flags);
+ rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+ RQSL_CTX_NORMAL, ret);
+ mdelay(normal_delay);
+ if (!ret)
+ raw_res_spin_unlock_irqrestore(worker_lock, flags);
+ cpu_relax();
+ }
+ return 0;
+ }
+
+ while (!kthread_should_stop()) {
+ int expected = rqsl_nthreads > 0 ? rqsl_nthreads - 1 : 0;
+ int ready = atomic_read(&rqsl_ready_cpus);
+
+ if (ready == expected && !READ_ONCE(pause)) {
+ for (int i = 0; i < rqsl_nevts; i++)
+ perf_event_enable(rqsl_evts[i]);
+ pr_err("Waiting 5 secs to pause the test\n");
+ msleep(1000 * 5);
+ WRITE_ONCE(pause, 1);
+ pr_err("Paused the test\n");
+ } else {
+ msleep(1000);
+ cpu_relax();
+ }
+ }
+ return 0;
+}
+
+static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct rqsl_lock_pair locks;
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ u64 start_ns;
+ int ret;
+
+ if (!cpu || READ_ONCE(pause))
+ return;
+
+ locks = rqsl_get_lock_pair(cpu);
+ start_ns = ktime_get_mono_fast_ns();
+ ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags);
+ rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+ RQSL_CTX_NMI, ret);
+
+ mdelay(nmi_delay);
+
+ if (!ret)
+ raw_res_spin_unlock_irqrestore(locks.nmi_lock, flags);
+}
+
+static void free_rqsl_threads(void)
+{
+ int i;
+
+ if (rqsl_threads) {
+ for_each_online_cpu(i) {
+ if (rqsl_threads[i])
+ kthread_stop(rqsl_threads[i]);
+ }
+ kfree(rqsl_threads);
+ }
+}
+
+static void free_rqsl_evts(void)
+{
+ int i;
+
+ if (rqsl_evts) {
+ for (i = 0; i < rqsl_nevts; i++) {
+ if (rqsl_evts[i])
+ perf_event_release_kernel(rqsl_evts[i]);
+ }
+ kfree(rqsl_evts);
+ }
+}
+
+static int bpf_test_rqspinlock_init(void)
+{
+ int i, ret;
+ int ncpus = num_online_cpus();
+
+ if (test_mode < RQSL_MODE_AA || test_mode > RQSL_MODE_ABBCCA) {
+ pr_err("Invalid mode %d\n", test_mode);
+ return -EINVAL;
+ }
+
+ pr_err("Mode = %s\n", rqsl_mode_names[test_mode]);
+
+ if (ncpus < test_mode + 2)
+ return -ENOTSUPP;
+
+ raw_res_spin_lock_init(&lock_a);
+ raw_res_spin_lock_init(&lock_b);
+ raw_res_spin_lock_init(&lock_c);
+
+ rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL);
+ if (!rqsl_evts)
+ return -ENOMEM;
+ rqsl_nevts = ncpus - 1;
+
+ for (i = 1; i < ncpus; i++) {
+ struct perf_event *e;
+
+ e = perf_event_create_kernel_counter(&hw_attr, i, NULL, nmi_cb, NULL);
+ if (IS_ERR(e)) {
+ ret = PTR_ERR(e);
+ goto err_perf_events;
+ }
+ rqsl_evts[i - 1] = e;
+ }
+
+ rqsl_threads = kcalloc(ncpus, sizeof(*rqsl_threads), GFP_KERNEL);
+ if (!rqsl_threads) {
+ ret = -ENOMEM;
+ goto err_perf_events;
+ }
+ rqsl_nthreads = ncpus;
+
+ for_each_online_cpu(i) {
+ struct task_struct *t;
+
+ t = kthread_create(rqspinlock_worker_fn, NULL, "rqsl_w/%d", i);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ goto err_threads_create;
+ }
+ kthread_bind(t, i);
+ rqsl_threads[i] = t;
+ wake_up_process(t);
+ }
+ return 0;
+
+err_threads_create:
+ free_rqsl_threads();
+err_perf_events:
+ free_rqsl_evts();
+ return ret;
+}
+
+module_init(bpf_test_rqspinlock_init);
+
+static void rqsl_print_histograms(void)
+{
+ int cpu, i;
+
+ pr_err("rqspinlock acquisition latency histogram (ms):\n");
+
+ for_each_online_cpu(cpu) {
+ struct rqsl_cpu_hist *hist = per_cpu_ptr(&rqsl_cpu_hists, cpu);
+ u64 norm_counts[RQSL_NR_HIST_BUCKETS];
+ u64 nmi_counts[RQSL_NR_HIST_BUCKETS];
+ u64 total_counts[RQSL_NR_HIST_BUCKETS];
+ u64 norm_success, nmi_success, success_total;
+ u64 norm_failure, nmi_failure, failure_total;
+ u64 norm_total = 0, nmi_total = 0, total = 0;
+ bool has_slow = false;
+
+ for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+ norm_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NORMAL][i]);
+ nmi_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NMI][i]);
+ total_counts[i] = norm_counts[i] + nmi_counts[i];
+ norm_total += norm_counts[i];
+ nmi_total += nmi_counts[i];
+ total += total_counts[i];
+ if (rqsl_hist_ms[i] > RQSL_SLOW_THRESHOLD_MS &&
+ total_counts[i])
+ has_slow = true;
+ }
+
+ norm_success = atomic64_read(&hist->success[RQSL_CTX_NORMAL]);
+ nmi_success = atomic64_read(&hist->success[RQSL_CTX_NMI]);
+ norm_failure = atomic64_read(&hist->failure[RQSL_CTX_NORMAL]);
+ nmi_failure = atomic64_read(&hist->failure[RQSL_CTX_NMI]);
+ success_total = norm_success + nmi_success;
+ failure_total = norm_failure + nmi_failure;
+
+ if (!total)
+ continue;
+
+ if (!has_slow) {
+ pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+ "success %llu (normal %llu, nmi %llu) | "
+ "failure %llu (normal %llu, nmi %llu), all within 0-%ums\n",
+ cpu, total, norm_total, nmi_total,
+ success_total, norm_success, nmi_success,
+ failure_total, norm_failure, nmi_failure,
+ RQSL_SLOW_THRESHOLD_MS);
+ continue;
+ }
+
+ pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+ "success %llu (normal %llu, nmi %llu) | "
+ "failure %llu (normal %llu, nmi %llu)\n",
+ cpu, total, norm_total, nmi_total,
+ success_total, norm_success, nmi_success,
+ failure_total, norm_failure, nmi_failure);
+ for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+ unsigned int start_ms;
+
+ if (!total_counts[i])
+ continue;
+
+ start_ms = i == 0 ? 0 : rqsl_hist_ms[i - 1] + 1;
+ if (i == RQSL_NR_HIST_BUCKETS - 1) {
+ pr_err(" >= %ums: total %llu (normal %llu, nmi %llu)\n",
+ start_ms, total_counts[i],
+ norm_counts[i], nmi_counts[i]);
+ } else {
+ pr_err(" %u-%ums: total %llu (normal %llu, nmi %llu)\n",
+ start_ms, rqsl_hist_ms[i],
+ total_counts[i],
+ norm_counts[i], nmi_counts[i]);
+ }
+ }
+ }
+}
+
+static void bpf_test_rqspinlock_exit(void)
+{
+ WRITE_ONCE(pause, 1);
+ free_rqsl_threads();
+ free_rqsl_evts();
+ rqsl_print_histograms();
+}
+
+module_exit(bpf_test_rqspinlock_exit);
+
+MODULE_AUTHOR("Kumar Kartikeya Dwivedi");
+MODULE_DESCRIPTION("BPF rqspinlock stress test module");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index cc9dde507aba..1669a7eeda26 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -62,6 +62,18 @@ struct bpf_testmod_struct_arg_5 {
long d;
};
+union bpf_testmod_union_arg_1 {
+ char a;
+ short b;
+ struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+ int a;
+ long b;
+ struct bpf_testmod_struct_arg_2 arg;
+};
+
__bpf_hook_start();
noinline int
@@ -129,11 +141,29 @@ bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f,
}
noinline int
+bpf_testmod_test_union_arg_1(union bpf_testmod_union_arg_1 a, int b, int c)
+{
+ bpf_testmod_test_struct_arg_result = a.arg.a + b + c;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_union_arg_2(int a, union bpf_testmod_union_arg_2 b)
+{
+ bpf_testmod_test_struct_arg_result = a + b.arg.a + b.arg.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
bpf_testmod_test_struct_arg_result = a->a;
return bpf_testmod_test_struct_arg_result;
}
+__weak noinline void bpf_testmod_looooooooooooooooooooooooooooooong_name(void)
+{
+}
+
__bpf_kfunc void
bpf_testmod_test_mod_kfunc(int i)
{
@@ -214,6 +244,16 @@ __bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr)
{
}
+__bpf_kfunc struct task_struct *bpf_kfunc_ret_rcu_test(void)
+{
+ return NULL;
+}
+
+__bpf_kfunc int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size)
+{
+ return NULL;
+}
+
__bpf_kfunc struct bpf_testmod_ctx *
bpf_testmod_ctx_create(int *err)
{
@@ -377,11 +417,35 @@ noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d,
return a + (long)b + c + d + (long)e + f + g + h + i + j + k;
}
+noinline void bpf_testmod_stacktrace_test(void)
+{
+ /* used for stacktrace test as attach function */
+ asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_3(void)
+{
+ bpf_testmod_stacktrace_test();
+ asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_2(void)
+{
+ bpf_testmod_stacktrace_test_3();
+ asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_1(void)
+{
+ bpf_testmod_stacktrace_test_2();
+ asm volatile ("");
+}
+
int bpf_testmod_fentry_ok;
noinline ssize_t
bpf_testmod_test_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
struct bpf_testmod_test_read_ctx ctx = {
@@ -394,6 +458,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
struct bpf_testmod_struct_arg_3 *struct_arg3;
struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22};
struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26};
+ union bpf_testmod_union_arg_1 union_arg1 = { .arg = {1} };
+ union bpf_testmod_union_arg_2 union_arg2 = { .arg = {2, 3} };
int i = 1;
while (bpf_testmod_return_ptr(i))
@@ -411,9 +477,12 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
(void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20,
21, 22, struct_arg5, 27);
+ (void)bpf_testmod_test_union_arg_1(union_arg1, 4, 5);
+ (void)bpf_testmod_test_union_arg_2(6, union_arg2);
+
(void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2);
- (void)trace_bpf_testmod_test_raw_tp_null(NULL);
+ (void)trace_bpf_testmod_test_raw_tp_null_tp(NULL);
bpf_testmod_test_struct_ops3();
@@ -431,14 +500,14 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
if (bpf_testmod_loop_test(101) > 100)
trace_bpf_testmod_test_read(current, &ctx);
- trace_bpf_testmod_test_nullable_bare(NULL);
+ trace_bpf_testmod_test_nullable_bare_tp(NULL);
/* Magic number to enable writable tp */
if (len == 64) {
struct bpf_testmod_test_writable_ctx writable = {
.val = 1024,
};
- trace_bpf_testmod_test_writable_bare(&writable);
+ trace_bpf_testmod_test_writable_bare_tp(&writable);
if (writable.early_ret)
return snprintf(buf, len, "%d\n", writable.val);
}
@@ -452,6 +521,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
21, 22, 23, 24, 25, 26) != 231)
goto out;
+ bpf_testmod_stacktrace_test_1();
+
bpf_testmod_fentry_ok = 1;
out:
return -EIO; /* always fail */
@@ -461,7 +532,7 @@ ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
noinline ssize_t
bpf_testmod_test_write(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
struct bpf_testmod_test_write_ctx ctx = {
@@ -470,7 +541,7 @@ bpf_testmod_test_write(struct file *file, struct kobject *kobj,
.len = len,
};
- trace_bpf_testmod_test_write_bare(current, &ctx);
+ trace_bpf_testmod_test_write_bare_tp(current, &ctx);
return -EIO; /* always fail */
}
@@ -497,14 +568,20 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
#ifdef __x86_64__
static int
+uprobe_handler(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data)
+{
+ regs->cx = 0x87654321feebdaed;
+ return 0;
+}
+
+static int
uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
struct pt_regs *regs, __u64 *data)
{
regs->ax = 0x12345678deadbeef;
- regs->cx = 0x87654321feebdaed;
regs->r11 = (u64) -1;
- return true;
+ return 0;
}
struct testmod_uprobe {
@@ -516,6 +593,7 @@ struct testmod_uprobe {
static DEFINE_MUTEX(testmod_uprobe_mutex);
static struct testmod_uprobe uprobe = {
+ .consumer.handler = uprobe_handler,
.consumer.ret_handler = uprobe_ret_handler,
};
@@ -563,7 +641,7 @@ static void testmod_unregister_uprobe(void)
static ssize_t
bpf_testmod_uprobe_write(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
unsigned long offset = 0;
@@ -619,6 +697,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test, KF_RET_NULL | KF_RCU_PROTECTED)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test_nostruct, KF_RET_NULL | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1)
@@ -846,7 +926,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args)
goto out;
}
- err = kernel_connect(sock, (struct sockaddr *)&args->addr,
+ err = kernel_connect(sock, (struct sockaddr_unsized *)&args->addr,
args->addrlen, 0);
out:
mutex_unlock(&sock_lock);
@@ -869,7 +949,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args)
goto out;
}
- err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen);
+ err = kernel_bind(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen);
out:
mutex_unlock(&sock_lock);
@@ -1053,6 +1133,8 @@ __bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args)
return args->a;
}
+__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id);
+
BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@@ -1093,6 +1175,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABL
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
static int bpf_testmod_ops_init(struct btf *btf)
@@ -1130,6 +1213,7 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
};
static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
+ .get_func_proto = bpf_base_func_proto,
.is_valid_access = bpf_testmod_ops_is_valid_access,
};
@@ -1176,10 +1260,25 @@ static int bpf_testmod_ops__test_maybe_null(int dummy,
return 0;
}
+static int bpf_testmod_ops__test_refcounted(int dummy,
+ struct task_struct *task__ref)
+{
+ return 0;
+}
+
+static struct task_struct *
+bpf_testmod_ops__test_return_ref_kptr(int dummy, struct task_struct *task__ref,
+ struct cgroup *cgrp)
+{
+ return NULL;
+}
+
static struct bpf_testmod_ops __bpf_testmod_ops = {
.test_1 = bpf_testmod_test_1,
.test_2 = bpf_testmod_test_2,
.test_maybe_null = bpf_testmod_ops__test_maybe_null,
+ .test_refcounted = bpf_testmod_ops__test_refcounted,
+ .test_return_ref_kptr = bpf_testmod_ops__test_return_ref_kptr,
};
struct bpf_struct_ops bpf_bpf_testmod_ops = {
@@ -1293,6 +1392,85 @@ static int bpf_test_mod_st_ops__test_pro_epilogue(struct st_ops_args *args)
return 0;
}
+static int bpf_cgroup_from_id_id;
+static int bpf_cgroup_release_id;
+
+static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ /* r8 = r1; // r8 will be "u64 *ctx".
+ * r1 = 0;
+ * r0 = bpf_cgroup_from_id(r1);
+ * if r0 != 0 goto pc+5;
+ * r6 = r8[0]; // r6 will be "struct st_ops *args".
+ * r7 = r6->a;
+ * r7 += 1000;
+ * r6->a = r7;
+ * goto pc+2;
+ * r1 = r0;
+ * bpf_cgroup_release(r1);
+ * r1 = r8;
+ */
+ *insn++ = BPF_MOV64_REG(BPF_REG_8, BPF_REG_1);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 5);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_8, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8);
+ *insn++ = prog->insnsi[0];
+
+ return insn - insn_buf;
+}
+
+static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struct bpf_prog *prog,
+ s16 ctx_stack_off)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ /* r1 = 0;
+ * r6 = 0;
+ * r0 = bpf_cgroup_from_id(r1);
+ * if r0 != 0 goto pc+6;
+ * r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
+ * r1 = r1[0]; // r1 will be "struct st_ops *args"
+ * r6 = r1->a;
+ * r6 += 10000;
+ * r1->a = r6;
+ * goto pc+2
+ * r1 = r0;
+ * bpf_cgroup_release(r1);
+ * r0 = r6;
+ * r0 *= 2;
+ * BPF_EXIT;
+ */
+ *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_6, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 6);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
+ *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
+ *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
+ *insn++ = BPF_EXIT_INSN();
+
+ return insn - insn_buf;
+}
+
+#define KFUNC_PRO_EPI_PREFIX "test_kfunc_"
static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog)
{
@@ -1302,6 +1480,9 @@ static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
return 0;
+ if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+ return st_ops_gen_prologue_with_kfunc(insn_buf, direct_write, prog);
+
/* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx".
* r7 = r6->a;
* r7 += 1000;
@@ -1325,6 +1506,9 @@ static int st_ops_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog
strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
return 0;
+ if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+ return st_ops_gen_epilogue_with_kfunc(insn_buf, prog, ctx_stack_off);
+
/* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
* r1 = r1[0]; // r1 will be "struct st_ops *args"
* r6 = r1->a;
@@ -1395,6 +1579,13 @@ static void st_ops_unreg(void *kdata, struct bpf_link *link)
static int st_ops_init(struct btf *btf)
{
+ struct btf *kfunc_btf;
+
+ bpf_cgroup_from_id_id = bpf_find_btf_id("bpf_cgroup_from_id", BTF_KIND_FUNC, &kfunc_btf);
+ bpf_cgroup_release_id = bpf_find_btf_id("bpf_cgroup_release", BTF_KIND_FUNC, &kfunc_btf);
+ if (bpf_cgroup_from_id_id < 0 || bpf_cgroup_release_id < 0)
+ return -EINVAL;
+
return 0;
}
@@ -1416,6 +1607,114 @@ static struct bpf_struct_ops testmod_st_ops = {
.owner = THIS_MODULE,
};
+struct hlist_head multi_st_ops_list;
+static DEFINE_SPINLOCK(multi_st_ops_lock);
+
+static int multi_st_ops_init(struct btf *btf)
+{
+ spin_lock_init(&multi_st_ops_lock);
+ INIT_HLIST_HEAD(&multi_st_ops_list);
+
+ return 0;
+}
+
+static int multi_st_ops_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return 0;
+}
+
+static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+
+ hlist_for_each_entry(st_ops, &multi_st_ops_list, node) {
+ if (st_ops->id == id)
+ return st_ops;
+ }
+
+ return NULL;
+}
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ unsigned long flags;
+ int ret = -1;
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ st_ops = multi_st_ops_find_nolock(id);
+ if (st_ops)
+ ret = st_ops->test_1(args);
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+ return ret;
+}
+
+static int multi_st_ops_reg(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_multi_st_ops *st_ops =
+ (struct bpf_testmod_multi_st_ops *)kdata;
+ unsigned long flags;
+ int err = 0;
+ u32 id;
+
+ if (!st_ops->test_1)
+ return -EINVAL;
+
+ id = bpf_struct_ops_id(kdata);
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ if (multi_st_ops_find_nolock(id)) {
+ pr_err("multi_st_ops(id:%d) has already been registered\n", id);
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ st_ops->id = id;
+ hlist_add_head(&st_ops->node, &multi_st_ops_list);
+unlock:
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+ return err;
+}
+
+static void multi_st_ops_unreg(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ unsigned long flags;
+ u32 id;
+
+ id = bpf_struct_ops_id(kdata);
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ st_ops = multi_st_ops_find_nolock(id);
+ if (st_ops)
+ hlist_del(&st_ops->node);
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+}
+
+static int bpf_testmod_multi_st_ops__test_1(struct st_ops_args *args)
+{
+ return 0;
+}
+
+static struct bpf_testmod_multi_st_ops multi_st_ops_cfi_stubs = {
+ .test_1 = bpf_testmod_multi_st_ops__test_1,
+};
+
+struct bpf_struct_ops testmod_multi_st_ops = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = multi_st_ops_init,
+ .init_member = multi_st_ops_init_member,
+ .reg = multi_st_ops_reg,
+ .unreg = multi_st_ops_unreg,
+ .cfi_stubs = &multi_st_ops_cfi_stubs,
+ .name = "bpf_testmod_multi_st_ops",
+ .owner = THIS_MODULE,
+};
+
extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
@@ -1438,6 +1737,7 @@ static int bpf_testmod_init(void)
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3);
ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops);
+ ret = ret ?: register_bpf_struct_ops(&testmod_multi_st_ops, bpf_testmod_multi_st_ops);
ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors,
ARRAY_SIZE(bpf_testmod_dtors),
THIS_MODULE);
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
index 356803d1c10e..f6e492f9d042 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
@@ -6,6 +6,7 @@
#include <linux/types.h>
struct task_struct;
+struct cgroup;
struct bpf_testmod_test_read_ctx {
char *buf;
@@ -36,6 +37,11 @@ struct bpf_testmod_ops {
/* Used to test nullable arguments. */
int (*test_maybe_null)(int dummy, struct task_struct *task);
int (*unsupported_ops)(void);
+ /* Used to test ref_acquired arguments. */
+ int (*test_refcounted)(int dummy, struct task_struct *task);
+ /* Used to test returning referenced kptr. */
+ struct task_struct *(*test_return_ref_kptr)(int dummy, struct task_struct *task,
+ struct cgroup *cgrp);
/* The following fields are used to test shadow copies. */
char onebyte;
@@ -110,4 +116,10 @@ struct bpf_testmod_st_ops {
struct module *owner;
};
+struct bpf_testmod_multi_st_ops {
+ int (*test_1)(struct st_ops_args *args);
+ struct hlist_node node;
+ int id;
+};
+
#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
index b58817938deb..4df6fa6a92cb 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
@@ -158,5 +158,9 @@ void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym;
void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym;
void bpf_kfunc_trusted_num_test(int *ptr) __ksym;
void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym;
+struct task_struct *bpf_kfunc_ret_rcu_test(void) __ksym;
+int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) __ksym;
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym;
#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
index 4694422aa76c..88e4aeab21b7 100644
--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -74,7 +74,7 @@ int main(int argc, char **argv)
/* Let's try detach it before it was ever attached */
ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
- if (ret != -1 || errno != ENOENT) {
+ if (ret != -ENOENT) {
printf("bpf_prog_detach2 not attached should fail: %m\n");
return 1;
}
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 53b06647cf57..338c035c3688 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <linux/capability.h>
#include <stdlib.h>
-#include <regex.h>
#include <test_progs.h>
#include <bpf/btf.h>
@@ -20,10 +19,12 @@
#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX "comment:test_expect_not_msg="
#define TEST_TAG_EXPECT_XLATED_PFX "comment:test_expect_xlated="
#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV "comment:test_expect_not_msg_unpriv="
#define TEST_TAG_EXPECT_XLATED_PFX_UNPRIV "comment:test_expect_xlated_unpriv="
#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
@@ -37,9 +38,15 @@
#define TEST_TAG_JITED_PFX "comment:test_jited="
#define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
+#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
+#define TEST_TAG_EXPECT_STDERR_PFX "comment:test_expect_stderr="
+#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
+#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
+#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
+#define TEST_TAG_LINEAR_SIZE "comment:test_linear_size="
/* Warning: duplicated in bpf_misc.h */
-#define POINTER_VALUE 0xcafe4all
+#define POINTER_VALUE 0xbadcafe
#define TEST_DATA_LEN 64
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -55,16 +62,9 @@ enum mode {
UNPRIV = 2
};
-struct expect_msg {
- const char *substr; /* substring match */
- regex_t regex;
- bool is_regex;
- bool on_next_line;
-};
-
-struct expected_msgs {
- struct expect_msg *patterns;
- size_t cnt;
+enum load_mode {
+ JITED = 1 << 0,
+ NO_JITED = 1 << 1,
};
struct test_subspec {
@@ -73,6 +73,8 @@ struct test_subspec {
struct expected_msgs expect_msgs;
struct expected_msgs expect_xlated;
struct expected_msgs jited;
+ struct expected_msgs stderr;
+ struct expected_msgs stdout;
int retval;
bool execute;
__u64 caps;
@@ -87,6 +89,8 @@ struct test_spec {
int prog_flags;
int mode_mask;
int arch_mask;
+ int load_mask;
+ int linear_sz;
bool auxiliary;
bool valid;
};
@@ -132,6 +136,10 @@ static void free_test_spec(struct test_spec *spec)
free_msgs(&spec->unpriv.expect_xlated);
free_msgs(&spec->priv.jited);
free_msgs(&spec->unpriv.jited);
+ free_msgs(&spec->unpriv.stderr);
+ free_msgs(&spec->priv.stderr);
+ free_msgs(&spec->unpriv.stdout);
+ free_msgs(&spec->priv.stdout);
free(spec->priv.name);
free(spec->unpriv.name);
@@ -199,7 +207,8 @@ static int compile_regex(const char *pattern, regex_t *regex)
return 0;
}
-static int __push_msg(const char *pattern, bool on_next_line, struct expected_msgs *msgs)
+static int __push_msg(const char *pattern, bool on_next_line, bool negative,
+ struct expected_msgs *msgs)
{
struct expect_msg *msg;
void *tmp;
@@ -215,6 +224,7 @@ static int __push_msg(const char *pattern, bool on_next_line, struct expected_ms
msg = &msgs->patterns[msgs->cnt];
msg->on_next_line = on_next_line;
msg->substr = pattern;
+ msg->negative = negative;
msg->is_regex = false;
if (strstr(pattern, "{{")) {
err = compile_regex(pattern, &msg->regex);
@@ -233,16 +243,16 @@ static int clone_msgs(struct expected_msgs *from, struct expected_msgs *to)
for (i = 0; i < from->cnt; i++) {
msg = &from->patterns[i];
- err = __push_msg(msg->substr, msg->on_next_line, to);
+ err = __push_msg(msg->substr, msg->on_next_line, msg->negative, to);
if (err)
return err;
}
return 0;
}
-static int push_msg(const char *substr, struct expected_msgs *msgs)
+static int push_msg(const char *substr, bool negative, struct expected_msgs *msgs)
{
- return __push_msg(substr, false, msgs);
+ return __push_msg(substr, false, negative, msgs);
}
static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct expected_msgs *msgs)
@@ -253,7 +263,7 @@ static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct exp
*on_next_line = false;
return 0;
}
- err = __push_msg(regex_str, *on_next_line, msgs);
+ err = __push_msg(regex_str, *on_next_line, false, msgs);
if (err)
return err;
*on_next_line = true;
@@ -311,20 +321,14 @@ static int parse_caps(const char *str, __u64 *val, const char *name)
static int parse_retval(const char *str, int *val, const char *name)
{
- struct {
- char *name;
- int val;
- } named_values[] = {
- { "INT_MIN" , INT_MIN },
- { "POINTER_VALUE", POINTER_VALUE },
- { "TEST_DATA_LEN", TEST_DATA_LEN },
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(named_values); ++i) {
- if (strcmp(str, named_values[i].name) != 0)
- continue;
- *val = named_values[i].val;
+ /*
+ * INT_MIN is defined as (-INT_MAX -1), i.e. it doesn't expand to a
+ * single int and cannot be parsed with strtol, so we handle it
+ * separately here. In addition, it expands to different expressions in
+ * different compilers so we use a prefixed _INT_MIN instead.
+ */
+ if (strcmp(str, "_INT_MIN") == 0) {
+ *val = INT_MIN;
return 0;
}
@@ -373,6 +377,7 @@ enum arch {
ARCH_X86_64 = 0x2,
ARCH_ARM64 = 0x4,
ARCH_RISCV64 = 0x8,
+ ARCH_S390X = 0x10,
};
static int get_current_arch(void)
@@ -383,6 +388,8 @@ static int get_current_arch(void)
return ARCH_ARM64;
#elif defined(__riscv) && __riscv_xlen == 64
return ARCH_RISCV64;
+#elif defined(__s390x__)
+ return ARCH_S390X;
#endif
return ARCH_UNKNOWN;
}
@@ -403,9 +410,14 @@ static int parse_test_spec(struct test_loader *tester,
bool xlated_on_next_line = true;
bool unpriv_jit_on_next_line;
bool jit_on_next_line;
+ bool stderr_on_next_line = true;
+ bool unpriv_stderr_on_next_line = true;
+ bool stdout_on_next_line = true;
+ bool unpriv_stdout_on_next_line = true;
bool collect_jit = false;
int func_id, i, err = 0;
u32 arch_mask = 0;
+ u32 load_mask = 0;
struct btf *btf;
enum arch arch;
@@ -463,12 +475,22 @@ static int parse_test_spec(struct test_loader *tester,
spec->auxiliary = true;
spec->mode_mask |= UNPRIV;
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX))) {
- err = push_msg(msg, &spec->priv.expect_msgs);
+ err = push_msg(msg, false, &spec->priv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX))) {
+ err = push_msg(msg, true, &spec->priv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= PRIV;
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV))) {
- err = push_msg(msg, &spec->unpriv.expect_msgs);
+ err = push_msg(msg, false, &spec->unpriv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV))) {
+ err = push_msg(msg, true, &spec->unpriv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
@@ -563,8 +585,10 @@ static int parse_test_spec(struct test_loader *tester,
arch = ARCH_ARM64;
} else if (strcmp(val, "RISCV64") == 0) {
arch = ARCH_RISCV64;
+ } else if (strcmp(val, "s390x") == 0) {
+ arch = ARCH_S390X;
} else {
- PRINT_FAIL("bad arch spec: '%s'", val);
+ PRINT_FAIL("bad arch spec: '%s'\n", val);
err = -EINVAL;
goto cleanup;
}
@@ -580,10 +604,57 @@ static int parse_test_spec(struct test_loader *tester,
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_LOAD_MODE_PFX)) {
+ val = s + sizeof(TEST_TAG_LOAD_MODE_PFX) - 1;
+ if (strcmp(val, "jited") == 0) {
+ load_mask = JITED;
+ } else if (strcmp(val, "no_jited") == 0) {
+ load_mask = NO_JITED;
+ } else {
+ PRINT_FAIL("bad load spec: '%s'", val);
+ err = -EINVAL;
+ goto cleanup;
+ }
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX))) {
+ err = push_disasm_msg(msg, &stderr_on_next_line,
+ &spec->priv.stderr);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX_UNPRIV))) {
+ err = push_disasm_msg(msg, &unpriv_stderr_on_next_line,
+ &spec->unpriv.stderr);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX))) {
+ err = push_disasm_msg(msg, &stdout_on_next_line,
+ &spec->priv.stdout);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV))) {
+ err = push_disasm_msg(msg, &unpriv_stdout_on_next_line,
+ &spec->unpriv.stdout);
+ if (err)
+ goto cleanup;
+ } else if (str_has_pfx(s, TEST_TAG_LINEAR_SIZE)) {
+ switch (bpf_program__type(prog)) {
+ case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ val = s + sizeof(TEST_TAG_LINEAR_SIZE) - 1;
+ err = parse_int(val, &spec->linear_sz, "test linear size");
+ if (err)
+ goto cleanup;
+ break;
+ default:
+ PRINT_FAIL("__linear_size for unsupported program type");
+ err = -EINVAL;
+ goto cleanup;
+ }
}
}
spec->arch_mask = arch_mask ?: -1;
+ spec->load_mask = load_mask ?: (JITED | NO_JITED);
if (spec->mode_mask == 0)
spec->mode_mask = PRIV;
@@ -632,6 +703,10 @@ static int parse_test_spec(struct test_loader *tester,
clone_msgs(&spec->priv.expect_xlated, &spec->unpriv.expect_xlated);
if (spec->unpriv.jited.cnt == 0)
clone_msgs(&spec->priv.jited, &spec->unpriv.jited);
+ if (spec->unpriv.stderr.cnt == 0)
+ clone_msgs(&spec->priv.stderr, &spec->unpriv.stderr);
+ if (spec->unpriv.stdout.cnt == 0)
+ clone_msgs(&spec->priv.stdout, &spec->unpriv.stdout);
}
spec->valid = true;
@@ -693,44 +768,155 @@ static void emit_jited(const char *jited, bool force)
fprintf(stdout, "JITED:\n=============\n%s=============\n", jited);
}
-static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
- void (*emit_fn)(const char *buf, bool force))
+static void emit_stderr(const char *stderr, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "STDERR:\n=============\n%s=============\n", stderr);
+}
+
+static void emit_stdout(const char *bpf_stdout, bool force)
{
- const char *log = log_buf, *prev_match;
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "STDOUT:\n=============\n%s=============\n", bpf_stdout);
+}
+
+static const char *match_msg(struct expect_msg *msg, const char **log)
+{
+ const char *match = NULL;
regmatch_t reg_match[1];
- int prev_match_line;
- int match_line;
- int i, j, err;
+ int err;
+
+ if (!msg->is_regex) {
+ match = strstr(*log, msg->substr);
+ if (match)
+ *log = match + strlen(msg->substr);
+ } else {
+ err = regexec(&msg->regex, *log, 1, reg_match, 0);
+ if (err == 0) {
+ match = *log + reg_match[0].rm_so;
+ *log += reg_match[0].rm_eo;
+ }
+ }
+ return match;
+}
+
+static int count_lines(const char *start, const char *end)
+{
+ const char *tmp;
+ int n = 0;
+
+ for (tmp = start; tmp < end; ++tmp)
+ if (*tmp == '\n')
+ n++;
+ return n;
+}
+
+struct match {
+ const char *start;
+ const char *end;
+ int line;
+};
+
+/*
+ * Positive messages are matched sequentially, each next message
+ * is looked for starting from the end of a previous matched one.
+ */
+static void match_positive_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+ const char *prev_match;
+ int i, line;
- prev_match_line = -1;
- match_line = 0;
prev_match = log;
+ line = 0;
for (i = 0; i < msgs->cnt; i++) {
struct expect_msg *msg = &msgs->patterns[i];
- const char *match = NULL, *pat_status;
- bool wrong_line = false;
-
- if (!msg->is_regex) {
- match = strstr(log, msg->substr);
- if (match)
- log = match + strlen(msg->substr);
- } else {
- err = regexec(&msg->regex, log, 1, reg_match, 0);
- if (err == 0) {
- match = log + reg_match[0].rm_so;
- log += reg_match[0].rm_eo;
+ const char *match = NULL;
+
+ if (msg->negative)
+ continue;
+
+ match = match_msg(msg, &log);
+ if (match) {
+ line += count_lines(prev_match, match);
+ matches[i].start = match;
+ matches[i].end = log;
+ matches[i].line = line;
+ prev_match = match;
+ }
+ }
+}
+
+/*
+ * Each negative messages N located between positive messages P1 and P2
+ * is matched in the span P1.end .. P2.start. Consequently, negative messages
+ * are unordered within the span.
+ */
+static void match_negative_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+ const char *start = log, *end, *next, *match;
+ const char *log_end = log + strlen(log);
+ int i, j, next_positive;
+
+ for (i = 0; i < msgs->cnt; i++) {
+ struct expect_msg *msg = &msgs->patterns[i];
+
+ /* positive message bumps span start */
+ if (!msg->negative) {
+ start = matches[i].end ?: start;
+ continue;
+ }
+
+ /* count stride of negative patterns and adjust span end */
+ end = log_end;
+ for (next_positive = i + 1; next_positive < msgs->cnt; next_positive++) {
+ if (!msgs->patterns[next_positive].negative) {
+ end = matches[next_positive].start;
+ break;
}
}
- if (match) {
- for (; prev_match < match; ++prev_match)
- if (*prev_match == '\n')
- ++match_line;
- wrong_line = msg->on_next_line && prev_match_line >= 0 &&
- prev_match_line + 1 != match_line;
+ /* try matching negative messages within identified span */
+ for (j = i; j < next_positive; j++) {
+ next = start;
+ match = match_msg(msg, &next);
+ if (match && next <= end) {
+ matches[j].start = match;
+ matches[j].end = next;
+ }
}
- if (!match || wrong_line) {
+ /* -1 to account for i++ */
+ i = next_positive - 1;
+ }
+}
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+ void (*emit_fn)(const char *buf, bool force))
+{
+ struct match matches[msgs->cnt];
+ struct match *prev_match = NULL;
+ int i, j;
+
+ memset(matches, 0, sizeof(*matches) * msgs->cnt);
+ match_positive_msgs(log_buf, msgs, matches);
+ match_negative_msgs(log_buf, msgs, matches);
+
+ for (i = 0; i < msgs->cnt; i++) {
+ struct expect_msg *msg = &msgs->patterns[i];
+ struct match *match = &matches[i];
+ const char *pat_status;
+ bool unexpected;
+ bool wrong_line;
+ bool no_match;
+
+ no_match = !msg->negative && !match->start;
+ wrong_line = !msg->negative &&
+ msg->on_next_line &&
+ prev_match && prev_match->line + 1 != match->line;
+ unexpected = msg->negative && match->start;
+ if (no_match || wrong_line || unexpected) {
PRINT_FAIL("expect_msg\n");
if (env.verbosity == VERBOSE_NONE)
emit_fn(log_buf, true /*force*/);
@@ -740,8 +926,10 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
pat_status = "MATCHED ";
else if (wrong_line)
pat_status = "WRONG LINE";
- else
+ else if (no_match)
pat_status = "EXPECTED ";
+ else
+ pat_status = "UNEXPECTED";
msg = &msgs->patterns[j];
fprintf(stderr, "%s %s: '%s'\n",
pat_status,
@@ -751,12 +939,13 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
if (wrong_line) {
fprintf(stderr,
"expecting match at line %d, actual match is at line %d\n",
- prev_match_line + 1, match_line);
+ prev_match->line + 1, match->line);
}
break;
}
- prev_match_line = match_line;
+ if (!msg->negative)
+ prev_match = match;
}
}
@@ -773,7 +962,7 @@ static int drop_capabilities(struct cap_state *caps)
err = cap_disable_effective(caps_to_drop, &caps->old_caps);
if (err) {
- PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(-err));
return err;
}
@@ -790,7 +979,7 @@ static int restore_capabilities(struct cap_state *caps)
err = cap_enable_effective(caps->old_caps, NULL);
if (err)
- PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(-err));
caps->initialized = false;
return err;
}
@@ -835,10 +1024,11 @@ static bool is_unpriv_capable_map(struct bpf_map *map)
}
}
-static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
+static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts, int linear_sz)
{
__u8 tmp_out[TEST_DATA_LEN << 2] = {};
__u8 tmp_in[TEST_DATA_LEN] = {};
+ struct __sk_buff ctx = {};
int err, saved_errno;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = tmp_in,
@@ -848,6 +1038,12 @@ static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
.repeat = 1,
);
+ if (linear_sz) {
+ ctx.data_end = linear_sz;
+ topts.ctx_in = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ }
+
if (empty_opts) {
memset(&topts, 0, sizeof(struct bpf_test_run_opts));
topts.sz = sizeof(struct bpf_test_run_opts);
@@ -915,6 +1111,19 @@ out:
return err;
}
+/* Read the bpf stream corresponding to the stream_id */
+static int get_stream(int stream_id, int prog_fd, char *text, size_t text_sz)
+{
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ int ret;
+
+ ret = bpf_prog_stream_read(prog_fd, stream_id, text, text_sz, &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ text[ret] = '\0';
+
+ return ret;
+}
+
/* this function is forced noinline and has short generic name to look better
* in test_progs output (in case of a failure)
*/
@@ -928,6 +1137,7 @@ void run_subtest(struct test_loader *tester,
bool unpriv)
{
struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv;
+ int current_runtime = is_jit_enabled() ? JITED : NO_JITED;
struct bpf_program *tprog = NULL, *tprog_iter;
struct bpf_link *link, *links[32] = {};
struct test_spec *spec_iter;
@@ -946,6 +1156,11 @@ void run_subtest(struct test_loader *tester,
return;
}
+ if ((current_runtime & spec->load_mask) == 0) {
+ test__skip();
+ return;
+ }
+
if (unpriv) {
if (!can_execute_unpriv(tester, spec)) {
test__skip();
@@ -959,7 +1174,7 @@ void run_subtest(struct test_loader *tester,
if (subspec->caps) {
err = cap_enable_effective(subspec->caps, NULL);
if (err) {
- PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(-err));
goto subtest_cleanup;
}
}
@@ -1016,6 +1231,14 @@ void run_subtest(struct test_loader *tester,
emit_verifier_log(tester->log_buf, false /*force*/);
validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log);
+ /* Restore capabilities because the kernel will silently ignore requests
+ * for program info (such as xlated program text) if we are not
+ * bpf-capable. Also, for some reason test_verifier executes programs
+ * with all capabilities restored. Do the same here.
+ */
+ if (restore_capabilities(&caps))
+ goto tobj_cleanup;
+
if (subspec->expect_xlated.cnt) {
err = get_xlated_program_text(bpf_program__fd(tprog),
tester->log_buf, tester->log_buf_sz);
@@ -1041,12 +1264,6 @@ void run_subtest(struct test_loader *tester,
}
if (should_do_test_run(spec, subspec)) {
- /* For some reason test_verifier executes programs
- * with all capabilities restored. Do the same here.
- */
- if (restore_capabilities(&caps))
- goto tobj_cleanup;
-
/* Do bpf_map__attach_struct_ops() for each struct_ops map.
* This should trigger bpf_struct_ops->reg callback on kernel side.
*/
@@ -1061,7 +1278,7 @@ void run_subtest(struct test_loader *tester,
link = bpf_map__attach_struct_ops(map);
if (!link) {
PRINT_FAIL("bpf_map__attach_struct_ops failed for map %s: err=%d\n",
- bpf_map__name(map), err);
+ bpf_map__name(map), -errno);
goto tobj_cleanup;
}
links[links_cnt++] = link;
@@ -1075,12 +1292,38 @@ void run_subtest(struct test_loader *tester,
}
}
- do_prog_test_run(bpf_program__fd(tprog), &retval,
- bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
- if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
+ err = do_prog_test_run(bpf_program__fd(tprog), &retval,
+ bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false,
+ spec->linear_sz);
+ if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) {
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;
}
+
+ if (subspec->stderr.cnt) {
+ err = get_stream(2, bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err <= 0) {
+ PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+ err, errno);
+ goto tobj_cleanup;
+ }
+ emit_stderr(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->stderr, emit_stderr);
+ }
+
+ if (subspec->stdout.cnt) {
+ err = get_stream(1, bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err <= 0) {
+ PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+ err, errno);
+ goto tobj_cleanup;
+ }
+ emit_stdout(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->stdout, emit_stdout);
+ }
+
/* redo bpf_map__attach_struct_ops for each test */
while (links_cnt > 0)
bpf_link__destroy(links[--links_cnt]);
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index fda7589c5023..0921939532c6 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -138,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try)
return ret;
}
+/* Derive target_free from map_size, same as bpf_common_lru_populate */
+static unsigned int __tgt_size(unsigned int map_size)
+{
+ return (map_size / nr_cpus) / 2;
+}
+
+/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */
+static unsigned int __map_size(unsigned int tgt_free)
+{
+ return tgt_free * nr_cpus * 2;
+}
+
/* Size of the LRU map is 2
* Add key=1 (+1 key)
* Add key=2 (+1 key)
@@ -231,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags)
printf("Pass\n");
}
-/* Size of the LRU map is 1.5*tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Lookup 1 to tgt_free/2
- * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+/* Verify that unreferenced elements are recycled before referenced ones.
+ * Insert elements.
+ * Reference a subset of these.
+ * Insert more, enough to trigger recycling.
+ * Verify that unreferenced are recycled.
*/
static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -257,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -266,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Lookup 1 to tgt_free/2 */
+ /* Lookup 1 to batch_size */
end_key = 1 + batch_size;
for (key = 1; key < end_key; key++) {
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -280,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
BPF_NOEXIST));
}
- /* Insert 1+tgt_free to 2*tgt_free
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+ /* Insert another map_size - batch_size keys
+ * Map will contain 1 to batch_size plus these latest, i.e.,
+ * => previous 1+batch_size to map_size - batch_size will have been
* removed by LRU
*/
- key = 1 + tgt_free;
- end_key = key + tgt_free;
+ key = 1 + __map_size(tgt_free);
+ end_key = key + __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -301,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
printf("Pass\n");
}
-/* Size of the LRU map 1.5 * tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Update 1 to tgt_free/2
- * => The original 1 to tgt_free/2 will be removed due to
- * the LRU shrink process
- * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
- * Insert 1+tgt_free to tgt_free*3/2
- * Insert 1+tgt_free*3/2 to tgt_free*5/2
- * => Key 1+tgt_free to tgt_free*3/2
- * will be removed from LRU because it has never
- * been lookup and ref bit is not set
+/* Verify that insertions exceeding map size will recycle the oldest.
+ * Verify that unreferenced elements are recycled before referenced.
*/
static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -334,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -343,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -357,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
* shrink the inactive list to get tgt_free
* number of free nodes.
*
- * Hence, the oldest key 1 to tgt_free/2
- * are removed from the LRU list.
+ * Hence, the oldest key is removed from the LRU list.
*/
key = 1;
if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
@@ -370,8 +373,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
BPF_EXIST));
}
- /* Re-insert 1 to tgt_free/2 again and do a lookup
- * immeidately.
+ /* Re-insert 1 to batch_size again and do a lookup immediately.
*/
end_key = 1 + batch_size;
value[0] = 4321;
@@ -387,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1+tgt_free to tgt_free*3/2 */
- end_key = 1 + tgt_free + batch_size;
- for (key = 1 + tgt_free; key < end_key; key++)
+ /* Insert batch_size new elements */
+ key = 1 + __map_size(tgt_free);
+ end_key = key + batch_size;
+ for (; key < end_key; key++)
/* These newly added but not referenced keys will be
* gone during the next LRU shrink.
*/
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */
- end_key = key + tgt_free;
+ /* Insert map_size - batch_size elements */
+ end_key += __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -413,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
printf("Pass\n");
}
-/* Size of the LRU map is 2*tgt_free
- * It is to test the active/inactive list rotation
- * Insert 1 to 2*tgt_free (+2*tgt_free keys)
- * Lookup key 1 to tgt_free*3/2
- * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
- * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
+/* Test the active/inactive list rotation
+ *
+ * Fill the whole map, deplete the free list.
+ * Reference all except the last lru->target_free elements.
+ * Insert lru->target_free new elements. This triggers one shrink.
+ * Verify that the non-referenced elements are replaced.
*/
static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -437,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
assert(sched_next_online(0, &next_cpu) != -1);
- batch_size = tgt_free / 2;
- assert(batch_size * 2 == tgt_free);
+ batch_size = __tgt_size(tgt_free);
map_size = tgt_free * 2;
lru_map_fd = create_map(map_type, map_flags, map_size);
@@ -449,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
- end_key = 1 + (2 * tgt_free);
+ /* Fill the map */
+ end_key = 1 + map_size;
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Lookup key 1 to tgt_free*3/2 */
- end_key = tgt_free + batch_size;
+ /* Reference all but the last batch_size */
+ end_key = 1 + map_size - batch_size;
for (key = 1; key < end_key; key++) {
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
assert(!bpf_map_update_elem(expected_map_fd, &key, value,
BPF_NOEXIST));
}
- /* Add 1+2*tgt_free to tgt_free*5/2
- * (+tgt_free/2 keys)
- */
+ /* Insert new batch_size: replaces the non-referenced elements */
key = 2 * tgt_free + 1;
end_key = key + batch_size;
for (; key < end_key; key++) {
@@ -500,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
lru_map_fd = create_map(map_type, map_flags,
3 * tgt_free * nr_cpus);
else
- lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+ lru_map_fd = create_map(map_type, map_flags,
+ 3 * __map_size(tgt_free));
assert(lru_map_fd != -1);
expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
deleted file mode 100755
index 1e565f47aca9..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ /dev/null
@@ -1,476 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Setup/topology:
-#
-# NS1 NS2 NS3
-# veth1 <---> veth2 veth3 <---> veth4 (the top route)
-# veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
-#
-# each vethN gets IPv[4|6]_N address
-#
-# IPv*_SRC = IPv*_1
-# IPv*_DST = IPv*_4
-#
-# all tests test pings from IPv*_SRC to IPv*_DST
-#
-# by default, routes are configured to allow packets to go
-# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
-#
-# a GRE device is installed in NS3 with IPv*_GRE, and
-# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
-# (the bottom route)
-#
-# Tests:
-#
-# 1. routes NS2->IPv*_DST are brought down, so the only way a ping
-# from IP*_SRC to IP*_DST can work is via IPv*_GRE
-#
-# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
-# that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-# ping: SRC->[encap at veth1:egress]->GRE:decap->DST
-# ping replies go DST->SRC directly
-#
-# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2
-# that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
-# ping replies go DST->SRC directly
-
-BPF_FILE="test_lwt_ip_encap.bpf.o"
-if [[ $EUID -ne 0 ]]; then
- echo "This script must be run as root"
- echo "FAIL"
- exit 1
-fi
-
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-readonly NS3="ns3-$(mktemp -u XXXXXX)"
-
-readonly IPv4_1="172.16.1.100"
-readonly IPv4_2="172.16.2.100"
-readonly IPv4_3="172.16.3.100"
-readonly IPv4_4="172.16.4.100"
-readonly IPv4_5="172.16.5.100"
-readonly IPv4_6="172.16.6.100"
-readonly IPv4_7="172.16.7.100"
-readonly IPv4_8="172.16.8.100"
-readonly IPv4_GRE="172.16.16.100"
-
-readonly IPv4_SRC=$IPv4_1
-readonly IPv4_DST=$IPv4_4
-
-readonly IPv6_1="fb01::1"
-readonly IPv6_2="fb02::1"
-readonly IPv6_3="fb03::1"
-readonly IPv6_4="fb04::1"
-readonly IPv6_5="fb05::1"
-readonly IPv6_6="fb06::1"
-readonly IPv6_7="fb07::1"
-readonly IPv6_8="fb08::1"
-readonly IPv6_GRE="fb10::1"
-
-readonly IPv6_SRC=$IPv6_1
-readonly IPv6_DST=$IPv6_4
-
-TEST_STATUS=0
-TESTS_SUCCEEDED=0
-TESTS_FAILED=0
-
-TMPFILE=""
-
-process_test_results()
-{
- if [[ "${TEST_STATUS}" -eq 0 ]] ; then
- echo "PASS"
- TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1))
- else
- echo "FAIL"
- TESTS_FAILED=$((TESTS_FAILED+1))
- fi
-}
-
-print_test_summary_and_exit()
-{
- echo "passed tests: ${TESTS_SUCCEEDED}"
- echo "failed tests: ${TESTS_FAILED}"
- if [ "${TESTS_FAILED}" -eq "0" ] ; then
- exit 0
- else
- exit 1
- fi
-}
-
-setup()
-{
- set -e # exit on error
- TEST_STATUS=0
-
- # create devices and namespaces
- ip netns add "${NS1}"
- ip netns add "${NS2}"
- ip netns add "${NS3}"
-
- # rp_filter gets confused by what these tests are doing, so disable it
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
- # disable IPv6 DAD because it sometimes takes too long and fails tests
- ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
- ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
- ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
- ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
-
- ip link add veth1 type veth peer name veth2
- ip link add veth3 type veth peer name veth4
- ip link add veth5 type veth peer name veth6
- ip link add veth7 type veth peer name veth8
-
- ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip link set veth1 netns ${NS1}
- ip link set veth2 netns ${NS2}
- ip link set veth3 netns ${NS2}
- ip link set veth4 netns ${NS3}
- ip link set veth5 netns ${NS1}
- ip link set veth6 netns ${NS2}
- ip link set veth7 netns ${NS2}
- ip link set veth8 netns ${NS3}
-
- if [ ! -z "${VRF}" ] ; then
- ip -netns ${NS1} link add red type vrf table 1001
- ip -netns ${NS1} link set red up
- ip -netns ${NS1} route add table 1001 unreachable default metric 8192
- ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192
- ip -netns ${NS1} link set veth1 vrf red
- ip -netns ${NS1} link set veth5 vrf red
-
- ip -netns ${NS2} link add red type vrf table 1001
- ip -netns ${NS2} link set red up
- ip -netns ${NS2} route add table 1001 unreachable default metric 8192
- ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192
- ip -netns ${NS2} link set veth2 vrf red
- ip -netns ${NS2} link set veth3 vrf red
- ip -netns ${NS2} link set veth6 vrf red
- ip -netns ${NS2} link set veth7 vrf red
- fi
-
- # configure addesses: the top route (1-2-3-4)
- ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1
- ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2
- ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3
- ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4
- ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
- ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
- ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
- ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
-
- # configure addresses: the bottom route (5-6-7-8)
- ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5
- ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6
- ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7
- ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8
- ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
- ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
- ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
- ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
-
- ip -netns ${NS1} link set dev veth1 up
- ip -netns ${NS2} link set dev veth2 up
- ip -netns ${NS2} link set dev veth3 up
- ip -netns ${NS3} link set dev veth4 up
- ip -netns ${NS1} link set dev veth5 up
- ip -netns ${NS2} link set dev veth6 up
- ip -netns ${NS2} link set dev veth7 up
- ip -netns ${NS3} link set dev veth8 up
-
- # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
- # the bottom route to specific bottom addresses
-
- # NS1
- # top route
- ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 ${VRF}
- ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} ${VRF} # go top by default
- ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF} # go top by default
- # bottom route
- ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 ${VRF}
- ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF}
-
- # NS2
- # top route
- ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 ${VRF}
- ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF}
- # bottom route
- ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 ${VRF}
- ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF}
-
- # NS3
- # top route
- ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4
- ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3}
- ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3}
- ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
- ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
- ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
- # bottom route
- ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8
- ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7}
- ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7}
- ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
- ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
- ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
-
- # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
- ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
- ip -netns ${NS3} link set gre_dev up
- ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
- ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF}
-
-
- # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
- ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
- ip -netns ${NS3} link set gre6_dev up
- ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
- ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
-
- TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
-
- sleep 1 # reduce flakiness
- set +e
-}
-
-cleanup()
-{
- if [ -f ${TMPFILE} ] ; then
- rm ${TMPFILE}
- fi
-
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
- ip netns del ${NS3} 2> /dev/null
-}
-
-trap cleanup EXIT
-
-remove_routes_to_gredev()
-{
- ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF}
- ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF}
- ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF}
-}
-
-add_unreachable_routes_to_gredev()
-{
- ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF}
- ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF}
- ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
- ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
-}
-
-test_ping()
-{
- local readonly PROTO=$1
- local readonly EXPECTED=$2
- local RET=0
-
- if [ "${PROTO}" == "IPv4" ] ; then
- ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
- RET=$?
- elif [ "${PROTO}" == "IPv6" ] ; then
- ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
- RET=$?
- else
- echo " test_ping: unknown PROTO: ${PROTO}"
- TEST_STATUS=1
- fi
-
- if [ "0" != "${RET}" ]; then
- RET=1
- fi
-
- if [ "${EXPECTED}" != "${RET}" ] ; then
- echo " test_ping failed: expected: ${EXPECTED}; got ${RET}"
- TEST_STATUS=1
- fi
-}
-
-test_gso()
-{
- local readonly PROTO=$1
- local readonly PKT_SZ=5000
- local IP_DST=""
- : > ${TMPFILE} # trim the capture file
-
- # check that nc is present
- command -v nc >/dev/null 2>&1 || \
- { echo >&2 "nc is not available: skipping TSO tests"; return; }
-
- # listen on port 9000, capture TCP into $TMPFILE
- if [ "${PROTO}" == "IPv4" ] ; then
- IP_DST=${IPv4_DST}
- ip netns exec ${NS3} bash -c \
- "nc -4 -l -p 9000 > ${TMPFILE} &"
- elif [ "${PROTO}" == "IPv6" ] ; then
- IP_DST=${IPv6_DST}
- ip netns exec ${NS3} bash -c \
- "nc -6 -l -p 9000 > ${TMPFILE} &"
- RET=$?
- else
- echo " test_gso: unknown PROTO: ${PROTO}"
- TEST_STATUS=1
- fi
- sleep 1 # let nc start listening
-
- # send a packet larger than MTU
- ip netns exec ${NS1} bash -c \
- "dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
- sleep 2 # let the packet get delivered
-
- # verify we received all expected bytes
- SZ=$(stat -c %s ${TMPFILE})
- if [ "$SZ" != "$PKT_SZ" ] ; then
- echo " test_gso failed: ${PROTO}"
- TEST_STATUS=1
- fi
-}
-
-test_egress()
-{
- local readonly ENCAP=$1
- echo "starting egress ${ENCAP} encap test ${VRF}"
- setup
-
- # by default, pings work
- test_ping IPv4 0
- test_ping IPv6 0
-
- # remove NS2->DST routes, ping fails
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
- test_ping IPv4 1
- test_ping IPv6 1
-
- # install replacement routes (LWT/eBPF), pings succeed
- if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre dev veth1 ${VRF}
- elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
- else
- echo " unknown encap ${ENCAP}"
- TEST_STATUS=1
- fi
- test_ping IPv4 0
- test_ping IPv6 0
-
- # skip GSO tests with VRF: VRF routing needs properly assigned
- # source IP/device, which is easy to do with ping and hard with dd/nc.
- if [ -z "${VRF}" ] ; then
- test_gso IPv4
- test_gso IPv6
- fi
-
- # a negative test: remove routes to GRE devices: ping fails
- remove_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- # another negative test
- add_unreachable_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- cleanup
- process_test_results
-}
-
-test_ingress()
-{
- local readonly ENCAP=$1
- echo "starting ingress ${ENCAP} encap test ${VRF}"
- setup
-
- # need to wait a bit for IPv6 to autoconf, otherwise
- # ping6 sometimes fails with "unable to bind to address"
-
- # by default, pings work
- test_ping IPv4 0
- test_ping IPv6 0
-
- # remove NS2->DST routes, pings fail
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
- test_ping IPv4 1
- test_ping IPv6 1
-
- # install replacement routes (LWT/eBPF), pings succeed
- if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre dev veth2 ${VRF}
- elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
- else
- echo "FAIL: unknown encap ${ENCAP}"
- TEST_STATUS=1
- fi
- test_ping IPv4 0
- test_ping IPv6 0
-
- # a negative test: remove routes to GRE devices: ping fails
- remove_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- # another negative test
- add_unreachable_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- cleanup
- process_test_results
-}
-
-VRF=""
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-VRF="vrf red"
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-print_test_summary_and_exit
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
deleted file mode 100755
index 0efea2292d6a..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/bin/bash
-# Connects 6 network namespaces through veths.
-# Each NS may have different IPv6 global scope addresses :
-# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
-# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
-# fc42::1 fd00::4
-#
-# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
-# IPv6 header with a Segment Routing Header, with segments :
-# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
-#
-# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
-# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
-# - fd00::2 : remove the TLV, change the flags, add a tag
-# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
-#
-# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
-# Each End.BPF action will validate the operations applied on the SRH by the
-# previous BPF program in the chain, otherwise the packet is dropped.
-#
-# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
-# datagram can be read on NS6 when binding to fb00::6.
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-BPF_FILE="test_lwt_seg6local.bpf.o"
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-readonly NS3="ns3-$(mktemp -u XXXXXX)"
-readonly NS4="ns4-$(mktemp -u XXXXXX)"
-readonly NS5="ns5-$(mktemp -u XXXXXX)"
-readonly NS6="ns6-$(mktemp -u XXXXXX)"
-
-msg="skip all tests:"
-if [ $UID != 0 ]; then
- echo $msg please run this as root >&2
- exit $ksft_skip
-fi
-
-TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
-
-cleanup()
-{
- if [ "$?" = "0" ]; then
- echo "selftests: test_lwt_seg6local [PASS]";
- else
- echo "selftests: test_lwt_seg6local [FAILED]";
- fi
-
- set +e
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
- ip netns del ${NS3} 2> /dev/null
- ip netns del ${NS4} 2> /dev/null
- ip netns del ${NS5} 2> /dev/null
- ip netns del ${NS6} 2> /dev/null
- rm -f $TMP_FILE
-}
-
-set -e
-
-ip netns add ${NS1}
-ip netns add ${NS2}
-ip netns add ${NS3}
-ip netns add ${NS4}
-ip netns add ${NS5}
-ip netns add ${NS6}
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-ip link add veth3 type veth peer name veth4
-ip link add veth5 type veth peer name veth6
-ip link add veth7 type veth peer name veth8
-ip link add veth9 type veth peer name veth10
-
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-ip link set veth3 netns ${NS2}
-ip link set veth4 netns ${NS3}
-ip link set veth5 netns ${NS3}
-ip link set veth6 netns ${NS4}
-ip link set veth7 netns ${NS4}
-ip link set veth8 netns ${NS5}
-ip link set veth9 netns ${NS5}
-ip link set veth10 netns ${NS6}
-
-ip netns exec ${NS1} ip link set dev veth1 up
-ip netns exec ${NS2} ip link set dev veth2 up
-ip netns exec ${NS2} ip link set dev veth3 up
-ip netns exec ${NS3} ip link set dev veth4 up
-ip netns exec ${NS3} ip link set dev veth5 up
-ip netns exec ${NS4} ip link set dev veth6 up
-ip netns exec ${NS4} ip link set dev veth7 up
-ip netns exec ${NS5} ip link set dev veth8 up
-ip netns exec ${NS5} ip link set dev veth9 up
-ip netns exec ${NS6} ip link set dev veth10 up
-ip netns exec ${NS6} ip link set dev lo up
-
-# All link scope addresses and routes required between veths
-ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link
-ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link
-ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link
-ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link
-ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link
-ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link
-ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link
-ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link
-ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link
-ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link
-ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link
-ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link
-ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link
-ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link
-ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link
-ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link
-
-ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo
-ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21
-
-ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2
-ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
-
-ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4
-
-ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6
-ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo
-ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87
-
-ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8
-
-ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo
-ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo
-
-ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-
-ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
-ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
-ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
-
-ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE &
-ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
-sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -TERM $!
-
-if [[ $(< $TMP_FILE) != "foobar" ]]; then
- exit 1
-fi
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 8b40e9496af1..ccc5acd55ff9 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -535,7 +535,7 @@ static void test_devmap_hash(unsigned int task, void *data)
static void test_queuemap(unsigned int task, void *data)
{
const int MAP_SIZE = 32;
- __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0;
int fd, i;
/* Fill test values to be used */
@@ -591,7 +591,7 @@ static void test_queuemap(unsigned int task, void *data)
static void test_stackmap(unsigned int task, void *data)
{
const int MAP_SIZE = 32;
- __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0;
int fd, i;
/* Fill test values to be used */
@@ -1396,9 +1396,11 @@ static void test_map_stress(void)
#define MAX_DELAY_US 50000
#define MIN_DELAY_RANGE_US 5000
-static bool retry_for_again_or_busy(int err)
+static bool can_retry(int err)
{
- return (err == EAGAIN || err == EBUSY);
+ return (err == EAGAIN || err == EBUSY ||
+ ((err == ENOMEM || err == E2BIG) &&
+ map_opts.map_flags == BPF_F_NO_PREALLOC));
}
int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
@@ -1451,12 +1453,12 @@ static void test_update_delete(unsigned int fn, void *data)
if (do_update) {
err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
- retry_for_again_or_busy);
+ can_retry);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
- retry_for_again_or_busy);
+ can_retry);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c9e745d49493..02a85dda30e6 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -14,12 +14,14 @@
#include <netinet/in.h>
#include <sys/select.h>
#include <sys/socket.h>
+#include <linux/keyctl.h>
#include <sys/un.h>
#include <bpf/btf.h>
#include <time.h>
#include "json_writer.h"
#include "network_helpers.h"
+#include "verification_cert.h"
/* backtrace() and backtrace_symbols_fd() are glibc specific,
* use header file when glibc is available and provide stub
@@ -88,7 +90,9 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
#endif
}
-static void stdio_restore_cleanup(void)
+static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void stdio_restore(void)
{
#ifdef __GLIBC__
if (verbose() && env.worker_id == -1) {
@@ -98,34 +102,33 @@ static void stdio_restore_cleanup(void)
fflush(stdout);
+ pthread_mutex_lock(&stdout_lock);
+
if (env.subtest_state) {
- fclose(env.subtest_state->stdout_saved);
+ if (env.subtest_state->stdout_saved)
+ fclose(env.subtest_state->stdout_saved);
env.subtest_state->stdout_saved = NULL;
stdout = env.test_state->stdout_saved;
stderr = env.test_state->stdout_saved;
} else {
- fclose(env.test_state->stdout_saved);
+ if (env.test_state->stdout_saved)
+ fclose(env.test_state->stdout_saved);
env.test_state->stdout_saved = NULL;
+ stdout = env.stdout_saved;
+ stderr = env.stderr_saved;
}
+
+ pthread_mutex_unlock(&stdout_lock);
#endif
}
-static void stdio_restore(void)
+static int traffic_monitor_print_fn(const char *format, va_list args)
{
-#ifdef __GLIBC__
- if (verbose() && env.worker_id == -1) {
- /* nothing to do, output to stdout by default */
- return;
- }
-
- if (stdout == env.stdout_saved)
- return;
-
- stdio_restore_cleanup();
+ pthread_mutex_lock(&stdout_lock);
+ vfprintf(stdout, format, args);
+ pthread_mutex_unlock(&stdout_lock);
- stdout = env.stdout_saved;
- stderr = env.stderr_saved;
-#endif
+ return 0;
}
/* Adapted from perf/util/string.c */
@@ -474,8 +477,6 @@ static void dump_test_log(const struct prog_test_def *test,
print_test_result(test, test_state);
}
-static void stdio_restore(void);
-
/* A bunch of tests set custom affinity per-thread and/or per-process. Reset
* it after each test/sub-test.
*/
@@ -490,13 +491,11 @@ static void reset_affinity(void)
err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
if (err < 0) {
- stdio_restore();
fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
exit(EXIT_ERR_SETUP_INFRA);
}
err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
if (err < 0) {
- stdio_restore();
fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
exit(EXIT_ERR_SETUP_INFRA);
}
@@ -514,7 +513,6 @@ static void save_netns(void)
static void restore_netns(void)
{
if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
- stdio_restore();
perror("setns(CLONE_NEWNS)");
exit(EXIT_ERR_SETUP_INFRA);
}
@@ -541,7 +539,8 @@ void test__end_subtest(void)
test_result(subtest_state->error_cnt,
subtest_state->skipped));
- stdio_restore_cleanup();
+ stdio_restore();
+
env.subtest_state = NULL;
}
@@ -1270,8 +1269,10 @@ void crash_handler(int signum)
sz = backtrace(bt, ARRAY_SIZE(bt));
- if (env.stdout_saved)
- stdio_restore();
+ fflush(stdout);
+ stdout = env.stdout_saved;
+ stderr = env.stderr_saved;
+
if (env.test) {
env.test_state->error_cnt++;
dump_test_log(env.test, env.test_state, true, false, NULL);
@@ -1365,10 +1366,19 @@ static int recv_message(int sock, struct msg *msg)
return ret;
}
+static bool ns_is_needed(const char *test_name)
+{
+ if (strlen(test_name) < 3)
+ return false;
+
+ return !strncmp(test_name, "ns_", 3);
+}
+
static void run_one_test(int test_num)
{
struct prog_test_def *test = &prog_test_defs[test_num];
struct test_state *state = &test_states[test_num];
+ struct netns_obj *ns = NULL;
env.test = test;
env.test_state = state;
@@ -1376,10 +1386,13 @@ static void run_one_test(int test_num)
stdio_hijack(&state->log_buf, &state->log_cnt);
watchdog_start();
+ if (ns_is_needed(test->test_name))
+ ns = netns_new(test->test_name, true);
if (test->run_test)
test->run_test();
else if (test->run_serial_test)
test->run_serial_test();
+ netns_free(ns);
watchdog_stop();
/* ensure last sub-test is finalized properly */
@@ -1388,6 +1401,8 @@ static void run_one_test(int test_num)
state->tested = true;
+ stdio_restore();
+
if (verbose() && env.worker_id == -1)
print_test_result(test, state);
@@ -1396,7 +1411,6 @@ static void run_one_test(int test_num)
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
- stdio_restore();
free(stop_libbpf_log_capture());
dump_test_log(test, state, false, false, NULL);
@@ -1916,6 +1930,13 @@ static void free_test_states(void)
}
}
+static __u32 register_session_key(const char *key_data, size_t key_data_size)
+{
+ return syscall(__NR_add_key, "asymmetric", "libbpf_session_key",
+ (const void *)key_data, key_data_size,
+ KEY_SPEC_SESSION_KEYRING);
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -1931,6 +1952,9 @@ int main(int argc, char **argv)
sigaction(SIGSEGV, &sigact, NULL);
+ env.stdout_saved = stdout;
+ env.stderr_saved = stderr;
+
env.secs_till_notify = 10;
env.secs_till_kill = 120;
err = argp_parse(&argp, argc, argv, 0, NULL, &env);
@@ -1946,6 +1970,12 @@ int main(int argc, char **argv)
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
+ err = register_session_key((const char *)test_progs_verification_cert,
+ test_progs_verification_cert_len);
+ if (err < 0)
+ return err;
+
+ traffic_monitor_set_print(traffic_monitor_print_fn);
srand(time(NULL));
@@ -1957,9 +1987,6 @@ int main(int argc, char **argv)
return -1;
}
- env.stdout_saved = stdout;
- env.stderr_saved = stderr;
-
env.has_testmod = true;
if (!env.list_test_names) {
/* ensure previous instance of the module is unloaded */
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 404d0d4915d5..eebfc18cdcd2 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -7,6 +7,7 @@
#include <errno.h>
#include <string.h>
#include <assert.h>
+#include <regex.h>
#include <stdlib.h>
#include <stdarg.h>
#include <time.h>
@@ -427,6 +428,14 @@ void hexdump(const char *prefix, const void *buf, size_t len);
goto goto_label; \
})
+#define SYS_FAIL(goto_label, fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_NEQ(0, system(cmd), cmd)) \
+ goto goto_label; \
+ })
+
#define ALL_TO_DEV_NULL " >/dev/null 2>&1"
#define SYS_NOFAIL(fmt, ...) \
@@ -452,6 +461,34 @@ static inline void *u64_to_ptr(__u64 ptr)
return (void *) (unsigned long) ptr;
}
+static inline __u32 id_from_prog_fd(int fd)
+{
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_info_len = sizeof(prog_info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "id_from_prog_fd"))
+ return 0;
+
+ ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
+ return prog_info.id;
+}
+
+static inline __u32 id_from_link_fd(int fd)
+{
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ int err;
+
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "id_from_link_fd"))
+ return 0;
+
+ ASSERT_NEQ(link_info.id, 0, "link_info.id");
+ return link_info.id;
+}
+
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
@@ -510,4 +547,20 @@ extern void test_loader_fini(struct test_loader *tester);
test_loader_fini(&tester); \
})
+struct expect_msg {
+ const char *substr; /* substring match */
+ regex_t regex;
+ bool is_regex;
+ bool on_next_line;
+ bool negative;
+};
+
+struct expected_msgs {
+ struct expect_msg *patterns;
+ size_t cnt;
+};
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+ void (*emit_fn)(const char *buf, bool force));
+
#endif /* __TEST_PROGS_H */
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index fd2da2234cc9..76568db7a664 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1372,7 +1372,7 @@ run:
} else
fprintf(stderr, "unknown test\n");
out:
- /* Detatch and zero all the maps */
+ /* Detach and zero all the maps */
bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS);
for (i = 0; i < ARRAY_SIZE(links); i++) {
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 5546b05a0486..f1300047c1e0 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -116,7 +116,7 @@ static void tag_from_alg(int insns, uint8_t *tag, uint32_t len)
static const struct sockaddr_alg alg = {
.salg_family = AF_ALG,
.salg_type = "hash",
- .salg_name = "sha1",
+ .salg_name = "sha256",
};
int fd_base, fd_alg, ret;
ssize_t size;
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
deleted file mode 100755
index 76f0bd17061f..000000000000
--- a/tools/testing/selftests/bpf/test_tc_edt.sh
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test installs a TC bpf program that throttles a TCP flow
-# with dst port = 9000 down to 5MBps. Then it measures actual
-# throughput of the flow.
-
-BPF_FILE="test_tc_edt.bpf.o"
-if [[ $EUID -ne 0 ]]; then
- echo "This script must be run as root"
- echo "FAIL"
- exit 1
-fi
-
-# check that nc, dd, and timeout are present
-command -v nc >/dev/null 2>&1 || \
- { echo >&2 "nc is not available"; exit 1; }
-command -v dd >/dev/null 2>&1 || \
- { echo >&2 "nc is not available"; exit 1; }
-command -v timeout >/dev/null 2>&1 || \
- { echo >&2 "timeout is not available"; exit 1; }
-
-readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
-readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
-
-readonly IP_SRC="172.16.1.100"
-readonly IP_DST="172.16.2.100"
-
-cleanup()
-{
- ip netns del ${NS_SRC}
- ip netns del ${NS_DST}
-}
-
-trap cleanup EXIT
-
-set -e # exit on error
-
-ip netns add "${NS_SRC}"
-ip netns add "${NS_DST}"
-ip link add veth_src type veth peer name veth_dst
-ip link set veth_src netns ${NS_SRC}
-ip link set veth_dst netns ${NS_DST}
-
-ip -netns ${NS_SRC} addr add ${IP_SRC}/24 dev veth_src
-ip -netns ${NS_DST} addr add ${IP_DST}/24 dev veth_dst
-
-ip -netns ${NS_SRC} link set dev veth_src up
-ip -netns ${NS_DST} link set dev veth_dst up
-
-ip -netns ${NS_SRC} route add ${IP_DST}/32 dev veth_src
-ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst
-
-# set up TC on TX
-ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
-ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
-ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
- bpf da obj ${BPF_FILE} sec cls_test
-
-
-# start the listener
-ip netns exec ${NS_DST} bash -c \
- "nc -4 -l -p 9000 >/dev/null &"
-declare -i NC_PID=$!
-sleep 1
-
-declare -ir TIMEOUT=20
-declare -ir EXPECTED_BPS=5000000
-
-# run the load, capture RX bytes on DST
-declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
- cat /sys/class/net/veth_dst/statistics/rx_bytes )
-
-set +e
-ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
- bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
-set -e
-
-declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
- cat /sys/class/net/veth_dst/statistics/rx_bytes )
-
-declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
-
-echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
- awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
- $1, ($2-$3)*100.0/$3}'
-
-# Pass the test if the actual bps is within 1% of the expected bps.
-# The difference is usually about 0.1% on a 20-sec test, and ==> zero
-# the longer the test runs.
-declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
- awk 'function abs(x){return ((x < 0.0) ? -x : x)}
- {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
- else { print "0"} }' )
-if [ "${RES}" == "0" ] ; then
- echo "PASS"
-else
- echo "FAIL"
- exit 1
-fi
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
deleted file mode 100755
index cb55a908bb0d..000000000000
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ /dev/null
@@ -1,320 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# In-place tunneling
-
-BPF_FILE="test_tc_tunnel.bpf.o"
-# must match the port that the bpf program filters on
-readonly port=8000
-
-readonly ns_prefix="ns-$$-"
-readonly ns1="${ns_prefix}1"
-readonly ns2="${ns_prefix}2"
-
-readonly ns1_v4=192.168.1.1
-readonly ns2_v4=192.168.1.2
-readonly ns1_v6=fd::1
-readonly ns2_v6=fd::2
-
-# Must match port used by bpf program
-readonly udpport=5555
-# MPLSoverUDP
-readonly mplsudpport=6635
-readonly mplsproto=137
-
-readonly infile="$(mktemp)"
-readonly outfile="$(mktemp)"
-
-setup() {
- ip netns add "${ns1}"
- ip netns add "${ns2}"
-
- ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
- peer name veth2 mtu 1500 netns "${ns2}"
-
- ip netns exec "${ns1}" ethtool -K veth1 tso off
-
- ip -netns "${ns1}" link set veth1 up
- ip -netns "${ns2}" link set veth2 up
-
- ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
- ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
- ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
- ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
-
- # clamp route to reserve room for tunnel headers
- ip -netns "${ns1}" -4 route flush table main
- ip -netns "${ns1}" -6 route flush table main
- ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
- ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
-
- sleep 1
-
- dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
-}
-
-cleanup() {
- ip netns del "${ns2}"
- ip netns del "${ns1}"
-
- if [[ -f "${outfile}" ]]; then
- rm "${outfile}"
- fi
- if [[ -f "${infile}" ]]; then
- rm "${infile}"
- fi
-
- if [[ -n $server_pid ]]; then
- kill $server_pid 2> /dev/null
- fi
-}
-
-server_listen() {
- ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
- server_pid=$!
-}
-
-client_connect() {
- ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}"
- echo $?
-}
-
-verify_data() {
- wait "${server_pid}"
- server_pid=
- # sha1sum returns two fields [sha1] [filepath]
- # convert to bash array and access first elem
- insum=($(sha1sum ${infile}))
- outsum=($(sha1sum ${outfile}))
- if [[ "${insum[0]}" != "${outsum[0]}" ]]; then
- echo "data mismatch"
- exit 1
- fi
-}
-
-wait_for_port() {
- for i in $(seq 20); do
- if ip netns exec "${ns2}" ss ${2:--4}OHntl | grep -q "$1"; then
- return 0
- fi
- sleep 0.1
- done
- return 1
-}
-
-set -e
-
-# no arguments: automated test, run all
-if [[ "$#" -eq "0" ]]; then
- echo "ipip"
- $0 ipv4 ipip none 100
-
- echo "ipip6"
- $0 ipv4 ipip6 none 100
-
- echo "ip6ip6"
- $0 ipv6 ip6tnl none 100
-
- echo "sit"
- $0 ipv6 sit none 100
-
- echo "ip4 vxlan"
- $0 ipv4 vxlan eth 2000
-
- echo "ip6 vxlan"
- $0 ipv6 ip6vxlan eth 2000
-
- for mac in none mpls eth ; do
- echo "ip gre $mac"
- $0 ipv4 gre $mac 100
-
- echo "ip6 gre $mac"
- $0 ipv6 ip6gre $mac 100
-
- echo "ip gre $mac gso"
- $0 ipv4 gre $mac 2000
-
- echo "ip6 gre $mac gso"
- $0 ipv6 ip6gre $mac 2000
-
- echo "ip udp $mac"
- $0 ipv4 udp $mac 100
-
- echo "ip6 udp $mac"
- $0 ipv6 ip6udp $mac 100
-
- echo "ip udp $mac gso"
- $0 ipv4 udp $mac 2000
-
- echo "ip6 udp $mac gso"
- $0 ipv6 ip6udp $mac 2000
- done
-
- echo "OK. All tests passed"
- exit 0
-fi
-
-if [[ "$#" -ne "4" ]]; then
- echo "Usage: $0"
- echo " or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>"
- exit 1
-fi
-
-case "$1" in
-"ipv4")
- readonly addr1="${ns1_v4}"
- readonly addr2="${ns2_v4}"
- readonly ipproto=4
- readonly netcat_opt=-${ipproto}
- readonly foumod=fou
- readonly foutype=ipip
- readonly fouproto=4
- readonly fouproto_mpls=${mplsproto}
- readonly gretaptype=gretap
- ;;
-"ipv6")
- readonly addr1="${ns1_v6}"
- readonly addr2="${ns2_v6}"
- readonly ipproto=6
- readonly netcat_opt=-${ipproto}
- readonly foumod=fou6
- readonly foutype=ip6tnl
- readonly fouproto="41 -6"
- readonly fouproto_mpls="${mplsproto} -6"
- readonly gretaptype=ip6gretap
- ;;
-*)
- echo "unknown arg: $1"
- exit 1
- ;;
-esac
-
-readonly tuntype=$2
-readonly mac=$3
-readonly datalen=$4
-
-echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"
-
-trap cleanup EXIT
-
-setup
-
-# basic communication works
-echo "test basic connectivity"
-server_listen
-wait_for_port ${port} ${netcat_opt}
-client_connect
-verify_data
-
-# clientside, insert bpf program to encap all TCP to port ${port}
-# client can no longer connect
-ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
-ip netns exec "${ns1}" tc filter add dev veth1 egress \
- bpf direct-action object-file ${BPF_FILE} \
- section "encap_${tuntype}_${mac}"
-echo "test bpf encap without decap (expect failure)"
-server_listen
-wait_for_port ${port} ${netcat_opt}
-! client_connect
-
-if [[ "$tuntype" =~ "udp" ]]; then
- # Set up fou tunnel.
- ttype="${foutype}"
- targs="encap fou encap-sport auto encap-dport $udpport"
- # fou may be a module; allow this to fail.
- modprobe "${foumod}" ||true
- if [[ "$mac" == "mpls" ]]; then
- dport=${mplsudpport}
- dproto=${fouproto_mpls}
- tmode="mode any ttl 255"
- else
- dport=${udpport}
- dproto=${fouproto}
- fi
- ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
- targs="encap fou encap-sport auto encap-dport $dport"
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
- ttype=$gretaptype
-elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
- ttype="vxlan"
- targs="id 1 dstport 8472 udp6zerocsumrx"
-elif [[ "$tuntype" == "ipip6" ]]; then
- ttype="ip6tnl"
- targs=""
-else
- ttype=$tuntype
- targs=""
-fi
-
-# tunnel address family differs from inner for SIT
-if [[ "${tuntype}" == "sit" ]]; then
- link_addr1="${ns1_v4}"
- link_addr2="${ns2_v4}"
-elif [[ "${tuntype}" == "ipip6" ]]; then
- link_addr1="${ns1_v6}"
- link_addr2="${ns2_v6}"
-else
- link_addr1="${addr1}"
- link_addr2="${addr2}"
-fi
-
-# serverside, insert decap module
-# server is still running
-# client can connect again
-ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
- ${tmode} remote "${link_addr1}" local "${link_addr2}" $targs
-
-expect_tun_fail=0
-
-if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
- # No support for MPLS IPv6 fou tunnel; expect failure.
- expect_tun_fail=1
-elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
- # No support for TEB fou tunnel; expect failure.
- expect_tun_fail=1
-elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
- # Share ethernet address between tunnel/veth2 so L2 decap works.
- ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
- awk '/ether/ { print $2 }')
- ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
-elif [[ "$mac" == "mpls" ]]; then
- modprobe mpls_iptunnel ||true
- modprobe mpls_gso ||true
- ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
- ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
- ip netns exec "${ns2}" ip link set lo up
- ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
-fi
-
-# Because packets are decapped by the tunnel they arrive on testtun0 from
-# the IP stack perspective. Ensure reverse path filtering is disabled
-# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
-# expected veth2 (veth2 is where 192.168.1.2 is configured).
-ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
-# rp needs to be disabled for both all and testtun0 as the rp value is
-# selected as the max of the "all" and device-specific values.
-ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
-ip netns exec "${ns2}" ip link set dev testtun0 up
-if [[ "$expect_tun_fail" == 1 ]]; then
- # This tunnel mode is not supported, so we expect failure.
- echo "test bpf encap with tunnel device decap (expect failure)"
- ! client_connect
-else
- echo "test bpf encap with tunnel device decap"
- client_connect
- verify_data
- server_listen
- wait_for_port ${port} ${netcat_opt}
-fi
-
-# serverside, use BPF for decap
-ip netns exec "${ns2}" ip link del dev testtun0
-ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
-ip netns exec "${ns2}" tc filter add dev veth2 ingress \
- bpf direct-action object-file ${BPF_FILE} section decap
-echo "test bpf encap with bpf decap"
-client_connect
-verify_data
-
-echo OK
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 595194453ff8..35b4893ccdf8 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -15,20 +15,18 @@
#include <bpf/libbpf.h>
#include <sys/ioctl.h>
#include <linux/rtnetlink.h>
-#include <signal.h>
#include <linux/perf_event.h>
-#include <linux/err.h>
-#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "test_tcpnotify.h"
-#include "trace_helpers.h"
#include "testing_helpers.h"
#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L)
pthread_t tid;
+static bool exit_thread;
+
int rx_callbacks;
static void dummyfn(void *ctx, int cpu, void *data, __u32 size)
@@ -45,7 +43,7 @@ void tcp_notifier_poller(struct perf_buffer *pb)
{
int err;
- while (1) {
+ while (!exit_thread) {
err = perf_buffer__poll(pb, 100);
if (err < 0 && err != -EINTR) {
printf("failed perf_buffer__poll: %d\n", err);
@@ -78,15 +76,10 @@ int main(int argc, char **argv)
int error = EXIT_FAILURE;
struct bpf_object *obj;
char test_script[80];
- cpu_set_t cpuset;
__u32 key = 0;
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
- CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
- pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
-
cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
@@ -151,6 +144,13 @@ int main(int argc, char **argv)
sleep(10);
+ exit_thread = true;
+ int ret = pthread_join(tid, NULL);
+ if (ret) {
+ printf("FAILED: pthread_join\n");
+ goto err;
+ }
+
if (verify_result(&g)) {
printf("FAILED: Wrong stats Expected %d calls, got %d\n",
g.ncalls, rx_callbacks);
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
deleted file mode 100755
index d9661b9988ba..000000000000
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ /dev/null
@@ -1,645 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# End-to-end eBPF tunnel test suite
-# The script tests BPF network tunnel implementation.
-#
-# Topology:
-# ---------
-# root namespace | at_ns0 namespace
-# |
-# ----------- | -----------
-# | tnl dev | | | tnl dev | (overlay network)
-# ----------- | -----------
-# metadata-mode | native-mode
-# with bpf |
-# |
-# ---------- | ----------
-# | veth1 | --------- | veth0 | (underlay network)
-# ---------- peer ----------
-#
-#
-# Device Configuration
-# --------------------
-# Root namespace with metadata-mode tunnel + BPF
-# Device names and addresses:
-# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
-# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
-#
-# Namespace at_ns0 with native tunnel
-# Device names and addresses:
-# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
-# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
-#
-#
-# End-to-end ping packet flow
-# ---------------------------
-# Most of the tests start by namespace creation, device configuration,
-# then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
-# from root namespace, the following operations happen:
-# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
-# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
-# with remote_ip=172.16.1.100 and others.
-# 3) Outer tunnel header is prepended and route the packet to veth1's egress
-# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
-# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
-# 6) Forward the packet to the overlay tnl dev
-
-BPF_FILE="test_tunnel_kern.bpf.o"
-BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel"
-PING_ARG="-c 3 -w 10 -q"
-ret=0
-GREEN='\033[0;92m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-config_device()
-{
- ip netns add at_ns0
- ip link add veth0 type veth peer name veth1
- ip link set veth0 netns at_ns0
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip link set dev veth1 up mtu 1500
- ip addr add dev veth1 172.16.1.200/24
-}
-
-add_gre_tunnel()
-{
- tun_key=
- if [ -n "$1" ]; then
- tun_key="key $1"
- fi
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq $tun_key \
- local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE $tun_key external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6gretap_tunnel()
-{
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
- local ::11 remote ::22
-
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip addr add dev $DEV fc80::200/24
- ip link set dev $DEV up
-}
-
-add_erspan_tunnel()
-{
- # at_ns0 namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 2 erspan_dir egress erspan_hwid 3
- fi
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6erspan_tunnel()
-{
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 2 erspan_dir egress erspan_hwid 7
- fi
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-add_geneve_tunnel()
-{
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- id 2 dstport 6081 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE dstport 6081 external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6geneve_tunnel()
-{
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 22 \
- remote ::22 # geneve has no local option
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-add_ipip_tunnel()
-{
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6tnl_tunnel()
-{
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- local ::11 remote ::22
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip addr add dev $DEV 1::22/96
- ip link set dev $DEV up
-}
-
-test_gre()
-{
- TYPE=gretap
- DEV_NS=gretap00
- DEV=gretap11
- ret=0
-
- check $TYPE
- config_device
- add_gre_tunnel 2
- attach_bpf $DEV gre_set_tunnel gre_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_gre_no_tunnel_key()
-{
- TYPE=gre
- DEV_NS=gre00
- DEV=gre11
- ret=0
-
- check $TYPE
- config_device
- add_gre_tunnel
- attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gre()
-{
- TYPE=ip6gre
- DEV_NS=ip6gre00
- DEV=ip6gre11
- ret=0
-
- check $TYPE
- config_device
- # reuse the ip6gretap function
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- ping $PING_ARG 10.1.1.100
- check_err $?
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 $PING_ARG fc80::200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gretap()
-{
- TYPE=ip6gretap
- DEV_NS=ip6gretap00
- DEV=ip6gretap11
- ret=0
-
- check $TYPE
- config_device
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- ping $PING_ARG 10.1.1.100
- check_err $?
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 $PING_ARG fc80::200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_erspan()
-{
- TYPE=erspan
- DEV_NS=erspan00
- DEV=erspan11
- ret=0
-
- check $TYPE
- config_device
- add_erspan_tunnel $1
- attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6erspan()
-{
- TYPE=ip6erspan
- DEV_NS=ip6erspan00
- DEV=ip6erspan11
- ret=0
-
- check $TYPE
- config_device
- add_ip6erspan_tunnel $1
- attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
- ping6 $PING_ARG ::11
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_geneve()
-{
- TYPE=geneve
- DEV_NS=geneve00
- DEV=geneve11
- ret=0
-
- check $TYPE
- config_device
- add_geneve_tunnel
- attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6geneve()
-{
- TYPE=geneve
- DEV_NS=ip6geneve00
- DEV=ip6geneve11
- ret=0
-
- check $TYPE
- config_device
- add_ip6geneve_tunnel
- attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-test_ipip()
-{
- TYPE=ipip
- DEV_NS=ipip00
- DEV=ipip11
- ret=0
-
- check $TYPE
- config_device
- add_ipip_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ipip6()
-{
- TYPE=ip6tnl
- DEV_NS=ipip6tnl00
- DEV=ipip6tnl11
- ret=0
-
- check $TYPE
- config_device
- add_ip6tnl_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip4 over ip6
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6ip6()
-{
- TYPE=ip6tnl
- DEV_NS=ip6ip6tnl00
- DEV=ip6ip6tnl11
- ret=0
-
- check $TYPE
- config_device
- add_ip6tnl_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip6 over ip6
- ping6 $PING_ARG 1::11
- check_err $?
- ip netns exec at_ns0 ping6 $PING_ARG 1::22
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-attach_bpf()
-{
- DEV=$1
- SET=$2
- GET=$3
- mkdir -p ${BPF_PIN_TUNNEL_DIR}
- bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/
- tc qdisc add dev $DEV clsact
- tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET
- tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET
-}
-
-cleanup()
-{
- rm -rf ${BPF_PIN_TUNNEL_DIR}
-
- ip netns delete at_ns0 2> /dev/null
- ip link del veth1 2> /dev/null
- ip link del ipip11 2> /dev/null
- ip link del ipip6tnl11 2> /dev/null
- ip link del ip6ip6tnl11 2> /dev/null
- ip link del gretap11 2> /dev/null
- ip link del gre11 2> /dev/null
- ip link del ip6gre11 2> /dev/null
- ip link del ip6gretap11 2> /dev/null
- ip link del geneve11 2> /dev/null
- ip link del ip6geneve11 2> /dev/null
- ip link del erspan11 2> /dev/null
- ip link del ip6erspan11 2> /dev/null
-}
-
-cleanup_exit()
-{
- echo "CATCH SIGKILL or SIGINT, cleanup and exit"
- cleanup
- exit 0
-}
-
-check()
-{
- ip link help 2>&1 | grep -q "\s$1\s"
- if [ $? -ne 0 ];then
- echo "SKIP $1: iproute2 not support"
- cleanup
- return 1
- fi
-}
-
-enable_debug()
-{
- echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
-}
-
-check_err()
-{
- if [ $ret -eq 0 ]; then
- ret=$1
- fi
-}
-
-bpf_tunnel_test()
-{
- local errors=0
-
- echo "Testing GRE tunnel..."
- test_gre
- errors=$(( $errors + $? ))
-
- echo "Testing GRE tunnel (without tunnel keys)..."
- test_gre_no_tunnel_key
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GRE tunnel..."
- test_ip6gre
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GRETAP tunnel..."
- test_ip6gretap
- errors=$(( $errors + $? ))
-
- echo "Testing ERSPAN tunnel..."
- test_erspan v2
- errors=$(( $errors + $? ))
-
- echo "Testing IP6ERSPAN tunnel..."
- test_ip6erspan v2
- errors=$(( $errors + $? ))
-
- echo "Testing GENEVE tunnel..."
- test_geneve
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GENEVE tunnel..."
- test_ip6geneve
- errors=$(( $errors + $? ))
-
- echo "Testing IPIP tunnel..."
- test_ipip
- errors=$(( $errors + $? ))
-
- echo "Testing IPIP6 tunnel..."
- test_ipip6
- errors=$(( $errors + $? ))
-
- echo "Testing IP6IP6 tunnel..."
- test_ip6ip6
- errors=$(( $errors + $? ))
-
- return $errors
-}
-
-trap cleanup 0 3 6
-trap cleanup_exit 2 9
-
-cleanup
-bpf_tunnel_test
-
-if [ $? -ne 0 ]; then
- echo -e "$(basename $0): ${RED}FAIL${NC}"
- exit 1
-fi
-echo -e "$(basename $0): ${GREEN}PASS${NC}"
-exit 0
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 447b68509d76..27db34ecf3f5 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -734,7 +734,7 @@ static __u32 btf_raw_types[] = {
BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
};
-static char bpf_vlog[UINT_MAX >> 8];
+static char bpf_vlog[UINT_MAX >> 5];
static int load_btf_spec(__u32 *types, int types_len,
const char *strings, int strings_len)
@@ -1559,10 +1559,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
test->errstr_unpriv : test->errstr;
opts.expected_attach_type = test->expected_attach_type;
- if (verbose)
- opts.log_level = verif_log_level | 4; /* force stats */
- else if (expected_ret == VERBOSE_ACCEPT)
+ if (expected_ret == VERBOSE_ACCEPT)
opts.log_level = 2;
+ else if (verbose)
+ opts.log_level = verif_log_level | 4; /* force stats */
else
opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
opts.prog_flags = pflags;
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
deleted file mode 100755
index 4c3c3fdd2d73..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ /dev/null
@@ -1,214 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test topology:
-# - - - - - - - - - - - - - - - - - - -
-# | veth1 veth2 veth3 | ns0
-# - -| - - - - - - | - - - - - - | - -
-# --------- --------- ---------
-# | veth0 | | veth0 | | veth0 |
-# --------- --------- ---------
-# ns1 ns2 ns3
-#
-# Test modules:
-# XDP modes: generic, native, native + egress_prog
-#
-# Test cases:
-# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
-# the redirects.
-# ns1 -> gw: ns1, ns2, ns3, should receive the arp request
-# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
-# interface should not receive the redirects.
-# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
-# IPv6: Testing none flag, all the pkts should be redirected back
-# ping test: ns1 -> ns2 (block), echo requests will be redirect back
-# egress_prog:
-# all src mac should be egress interface's mac
-
-# netns numbers
-NUM=3
-IFACES=""
-DRV_MODE="xdpgeneric xdpdrv xdpegress"
-PASS=0
-FAIL=0
-LOG_DIR=$(mktemp -d)
-declare -a NS
-NS[0]="ns0-$(mktemp -u XXXXXX)"
-NS[1]="ns1-$(mktemp -u XXXXXX)"
-NS[2]="ns2-$(mktemp -u XXXXXX)"
-NS[3]="ns3-$(mktemp -u XXXXXX)"
-
-test_pass()
-{
- echo "Pass: $@"
- PASS=$((PASS + 1))
-}
-
-test_fail()
-{
- echo "fail: $@"
- FAIL=$((FAIL + 1))
-}
-
-clean_up()
-{
- for i in $(seq 0 $NUM); do
- ip netns del ${NS[$i]} 2> /dev/null
- done
-}
-
-# Kselftest framework requirement - SKIP code is 4.
-check_env()
-{
- ip link set dev lo xdpgeneric off &>/dev/null
- if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
- exit 4
- fi
-
- which tcpdump &>/dev/null
- if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without tcpdump"
- exit 4
- fi
-}
-
-setup_ns()
-{
- local mode=$1
- IFACES=""
-
- if [ "$mode" = "xdpegress" ]; then
- mode="xdpdrv"
- fi
-
- ip netns add ${NS[0]}
- for i in $(seq $NUM); do
- ip netns add ${NS[$i]}
- ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
- ip -n ${NS[$i]} link set veth0 up
- ip -n ${NS[0]} link set veth$i up
-
- ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
- ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
- # Add a neigh entry for IPv4 ping test
- ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
- ip -n ${NS[$i]} link set veth0 $mode obj \
- xdp_dummy.bpf.o sec xdp &> /dev/null || \
- { test_fail "Unable to load dummy xdp" && exit 1; }
- IFACES="$IFACES veth$i"
- veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
- done
-}
-
-do_egress_tests()
-{
- local mode=$1
-
- # mac test
- ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
- ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
- sleep 0.5
- ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
- sleep 0.5
- pkill tcpdump
-
- # mac check
- grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \
- test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
- grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-3_${mode}.log && \
- test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
-}
-
-do_ping_tests()
-{
- local mode=$1
-
- # ping6 test: echo request should be redirect back to itself, not others
- ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
-
- ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
- ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
- ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
- sleep 0.5
- # ARP test
- ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
- # IPv4 test
- ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
- # IPv6 test
- ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
- sleep 0.5
- pkill tcpdump
-
- # All netns should receive the redirect arp requests
- [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
- test_pass "$mode arp(F_BROADCAST) ns1-1" || \
- test_fail "$mode arp(F_BROADCAST) ns1-1"
- [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -eq 2 ] && \
- test_pass "$mode arp(F_BROADCAST) ns1-2" || \
- test_fail "$mode arp(F_BROADCAST) ns1-2"
- [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -eq 2 ] && \
- test_pass "$mode arp(F_BROADCAST) ns1-3" || \
- test_fail "$mode arp(F_BROADCAST) ns1-3"
-
- # ns1 should not receive the redirect echo request, others should
- [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
- test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
- [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
- test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
- [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-3_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
- test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
-
- # ns1 should receive the echo request, ns2 should not
- [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv6 (no flags) ns1-1" || \
- test_fail "$mode IPv6 (no flags) ns1-1"
- [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 0 ] && \
- test_pass "$mode IPv6 (no flags) ns1-2" || \
- test_fail "$mode IPv6 (no flags) ns1-2"
-}
-
-do_tests()
-{
- local mode=$1
- local drv_p
-
- case ${mode} in
- xdpdrv) drv_p="-N";;
- xdpegress) drv_p="-X";;
- xdpgeneric) drv_p="-S";;
- esac
-
- ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
- xdp_pid=$!
- sleep 1
- if ! ps -p $xdp_pid > /dev/null; then
- test_fail "$mode xdp_redirect_multi start failed"
- return 1
- fi
-
- if [ "$mode" = "xdpegress" ]; then
- do_egress_tests $mode
- else
- do_ping_tests $mode
- fi
-
- kill $xdp_pid
-}
-
-check_env
-
-trap clean_up EXIT
-
-for mode in ${DRV_MODE}; do
- setup_ns $mode
- do_tests $mode
- clean_up
-done
-rm -rf ${LOG_DIR}
-
-echo "Summary: PASS $PASS, FAIL $FAIL"
-[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
deleted file mode 100755
index fbcaa9f0120b..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-# Author: Jesper Dangaard Brouer <hawk@kernel.org>
-
-# Kselftest framework requirement - SKIP code is 4.
-readonly KSFT_SKIP=4
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-
-# Allow wrapper scripts to name test
-if [ -z "$TESTNAME" ]; then
- TESTNAME=xdp_vlan
-fi
-
-# Default XDP mode
-XDP_MODE=xdpgeneric
-
-usage() {
- echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME"
- echo ""
- echo "Usage: $0 [-vfh]"
- echo " -v | --verbose : Verbose"
- echo " --flush : Flush before starting (e.g. after --interactive)"
- echo " --interactive : Keep netns setup running after test-run"
- echo " --mode=XXX : Choose XDP mode (xdp | xdpgeneric | xdpdrv)"
- echo ""
-}
-
-valid_xdp_mode()
-{
- local mode=$1
-
- case "$mode" in
- xdpgeneric | xdpdrv | xdp)
- return 0
- ;;
- *)
- return 1
- esac
-}
-
-cleanup()
-{
- local status=$?
-
- if [ "$status" = "0" ]; then
- echo "selftests: $TESTNAME [PASS]";
- else
- echo "selftests: $TESTNAME [FAILED]";
- fi
-
- if [ -n "$INTERACTIVE" ]; then
- echo "Namespace setup still active explore with:"
- echo " ip netns exec ${NS1} bash"
- echo " ip netns exec ${NS2} bash"
- exit $status
- fi
-
- set +e
- ip link del veth1 2> /dev/null
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
-}
-
-# Using external program "getopt" to get --long-options
-OPTIONS=$(getopt -o hvfi: \
- --long verbose,flush,help,interactive,debug,mode: -- "$@")
-if (( $? != 0 )); then
- usage
- echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?"
- exit 2
-fi
-eval set -- "$OPTIONS"
-
-## --- Parse command line arguments / parameters ---
-while true; do
- case "$1" in
- -v | --verbose)
- export VERBOSE=yes
- shift
- ;;
- -i | --interactive | --debug )
- INTERACTIVE=yes
- shift
- ;;
- -f | --flush )
- cleanup
- shift
- ;;
- --mode )
- shift
- XDP_MODE=$1
- shift
- ;;
- -- )
- shift
- break
- ;;
- -h | --help )
- usage;
- echo "selftests: $TESTNAME [SKIP] usage help info requested"
- exit $KSFT_SKIP
- ;;
- * )
- shift
- break
- ;;
- esac
-done
-
-if [ "$EUID" -ne 0 ]; then
- echo "selftests: $TESTNAME [FAILED] need root privileges"
- exit 1
-fi
-
-valid_xdp_mode $XDP_MODE
-if [ $? -ne 0 ]; then
- echo "selftests: $TESTNAME [FAILED] unknown XDP mode ($XDP_MODE)"
- exit 1
-fi
-
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ]; then
- echo "selftests: $TESTNAME [SKIP] need ip xdp support"
- exit $KSFT_SKIP
-fi
-
-# Interactive mode likely require us to cleanup netns
-if [ -n "$INTERACTIVE" ]; then
- ip link del veth1 2> /dev/null
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
-fi
-
-# Exit on failure
-set -e
-
-# Some shell-tools dependencies
-which ip > /dev/null
-which tc > /dev/null
-which ethtool > /dev/null
-
-# Make rest of shell verbose, showing comments as doc/info
-if [ -n "$VERBOSE" ]; then
- set -v
-fi
-
-# Create two namespaces
-ip netns add ${NS1}
-ip netns add ${NS2}
-
-# Run cleanup if failing or on kill
-trap cleanup 0 2 3 6 9
-
-# Create veth pair
-ip link add veth1 type veth peer name veth2
-
-# Move veth1 and veth2 into the respective namespaces
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-
-# NOTICE: XDP require VLAN header inside packet payload
-# - Thus, disable VLAN offloading driver features
-# - For veth REMEMBER TX side VLAN-offload
-#
-# Disable rx-vlan-offload (mostly needed on ns1)
-ip netns exec ${NS1} ethtool -K veth1 rxvlan off
-ip netns exec ${NS2} ethtool -K veth2 rxvlan off
-#
-# Disable tx-vlan-offload (mostly needed on ns2)
-ip netns exec ${NS2} ethtool -K veth2 txvlan off
-ip netns exec ${NS1} ethtool -K veth1 txvlan off
-
-export IPADDR1=100.64.41.1
-export IPADDR2=100.64.41.2
-
-# In ns1/veth1 add IP-addr on plain net_device
-ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1
-ip netns exec ${NS1} ip link set veth1 up
-
-# In ns2/veth2 create VLAN device
-export VLAN=4011
-export DEVNS2=veth2
-ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
-ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
-ip netns exec ${NS2} ip link set $DEVNS2 up
-ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up
-
-# Bringup lo in netns (to avoids confusing people using --interactive)
-ip netns exec ${NS1} ip link set lo up
-ip netns exec ${NS2} ip link set lo up
-
-# At this point, the hosts cannot reach each-other,
-# because ns2 are using VLAN tags on the packets.
-
-ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
-
-
-# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
-# ----------------------------------------------------------------------
-# In ns1: ingress use XDP to remove VLAN tags
-export DEVNS1=veth1
-export BPF_FILE=test_xdp_vlan.bpf.o
-
-# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
-export XDP_PROG=xdp_vlan_change
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
-
-# In ns1: egress use TC to add back VLAN tag 4011
-# (del cmd)
-# tc qdisc del dev $DEVNS1 clsact 2> /dev/null
-#
-ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact
-ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \
- prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push
-
-# Now the namespaces can reach each-other, test with ping:
-ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
-
-# Second test: Replace xdp prog, that fully remove vlan header
-#
-# Catch kernel bug for generic-XDP, that does didn't allow us to
-# remove a VLAN header, because skb->protocol still contain VLAN
-# ETH_P_8021Q indication, and this cause overwriting of our changes.
-#
-export XDP_PROG=xdp_vlan_remove_outer2
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
-
-# Now the namespaces should still be able reach each-other, test with ping:
-ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
deleted file mode 100755
index c515326d6d59..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test generic-XDP
-export TESTNAME=xdp_vlan_mode_generic
-./test_xdp_vlan.sh --mode=xdpgeneric
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
deleted file mode 100755
index 5cf7ce1f16c1..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test native-XDP
-export TESTNAME=xdp_vlan_mode_native
-./test_xdp_vlan.sh --mode=xdpdrv
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 65aafe0003db..62db060298a4 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -241,4 +241,6 @@ done
if [ $failures -eq 0 ]; then
echo "All tests successful!"
+else
+ exit 1
fi
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 5e9f16683be5..16eb37e5bad6 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -399,7 +399,7 @@ int unload_module(const char *name, bool verbose)
return 0;
}
-int load_module(const char *path, bool verbose)
+static int __load_module(const char *path, const char *param_values, bool verbose)
{
int fd;
@@ -411,7 +411,7 @@ int load_module(const char *path, bool verbose)
fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno);
return -ENOENT;
}
- if (finit_module(fd, "", 0)) {
+ if (finit_module(fd, param_values, 0)) {
fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno);
close(fd);
return -EINVAL;
@@ -423,6 +423,16 @@ int load_module(const char *path, bool verbose)
return 0;
}
+int load_module_params(const char *path, const char *param_values, bool verbose)
+{
+ return __load_module(path, param_values, verbose);
+}
+
+int load_module(const char *path, bool verbose)
+{
+ return __load_module(path, "", verbose);
+}
+
int unload_bpf_testmod(bool verbose)
{
return unload_module("bpf_testmod", verbose);
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 46d7f7089f63..eb20d3772218 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -39,6 +39,7 @@ int kern_sync_rcu(void);
int finit_module(int fd, const char *param_values, int flags);
int delete_module(const char *name, int flags);
int load_module(const char *path, bool verbose);
+int load_module_params(const char *path, const char *param_values, bool verbose);
int unload_module(const char *name, bool verbose);
static inline __u64 get_time_ns(void)
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 81943c6254e6..eeaab7013ca2 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -17,7 +17,9 @@
#include <linux/limits.h>
#include <libelf.h>
#include <gelf.h>
+#include "bpf/hashmap.h"
#include "bpf/libbpf_internal.h"
+#include "bpf_util.h"
#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
@@ -519,3 +521,235 @@ void read_trace_pipe(void)
{
read_trace_pipe_iter(trace_pipe_cb, NULL, 0);
}
+
+static size_t symbol_hash(long key, void *ctx __maybe_unused)
+{
+ return str_hash((const char *) key);
+}
+
+static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+static bool is_invalid_entry(char *buf, bool kernel)
+{
+ if (kernel && strchr(buf, '['))
+ return true;
+ if (!kernel && !strchr(buf, '['))
+ return true;
+ return false;
+}
+
+static const char * const trace_blacklist[] = {
+ "migrate_disable",
+ "migrate_enable",
+ "rcu_read_unlock_strict",
+ "preempt_count_add",
+ "preempt_count_sub",
+ "__rcu_read_lock",
+ "__rcu_read_unlock",
+ "bpf_get_numa_node_id",
+};
+
+static bool skip_entry(char *name)
+{
+ int i;
+
+ /*
+ * We attach to almost all kernel functions and some of them
+ * will cause 'suspicious RCU usage' when fprobe is attached
+ * to them. Filter out the current culprits - arch_cpu_idle
+ * default_idle and rcu_* functions.
+ */
+ if (!strcmp(name, "arch_cpu_idle"))
+ return true;
+ if (!strcmp(name, "default_idle"))
+ return true;
+ if (!strncmp(name, "rcu_", 4))
+ return true;
+ if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+ return true;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(trace_blacklist); i++) {
+ if (!strcmp(name, trace_blacklist[i]))
+ return true;
+ }
+
+ return false;
+}
+
+/* Do comparison by ignoring '.llvm.<hash>' suffixes. */
+static int compare_name(const char *name1, const char *name2)
+{
+ const char *res1, *res2;
+ int len1, len2;
+
+ res1 = strstr(name1, ".llvm.");
+ res2 = strstr(name2, ".llvm.");
+ len1 = res1 ? res1 - name1 : strlen(name1);
+ len2 = res2 ? res2 - name2 : strlen(name2);
+
+ if (len1 == len2)
+ return strncmp(name1, name2, len1);
+ if (len1 < len2)
+ return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
+ return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
+}
+
+static int load_kallsyms_compare(const void *p1, const void *p2)
+{
+ return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
+}
+
+static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
+{
+ return compare_name(p1, p2->name);
+}
+
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel)
+{
+ size_t cap = 0, cnt = 0;
+ char *name = NULL, *ksym_name, **syms = NULL;
+ struct hashmap *map;
+ struct ksyms *ksyms;
+ struct ksym *ks;
+ char buf[256];
+ FILE *f;
+ int err = 0;
+
+ ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
+ if (!ksyms)
+ return -EINVAL;
+
+ /*
+ * The available_filter_functions contains many duplicates,
+ * but other than that all symbols are usable to trace.
+ * Filtering out duplicates by using hashmap__add, which won't
+ * add existing entry.
+ */
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+
+ if (!f)
+ return -EINVAL;
+
+ map = hashmap__new(symbol_hash, symbol_equal, NULL);
+ if (IS_ERR(map)) {
+ err = libbpf_get_error(map);
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
+ if (!ks) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ ksym_name = ks->name;
+ err = hashmap__add(map, ksym_name, 0);
+ if (err == -EEXIST) {
+ err = 0;
+ continue;
+ }
+ if (err)
+ goto error;
+
+ err = libbpf_ensure_mem((void **) &syms, &cap,
+ sizeof(*syms), cnt + 1);
+ if (err)
+ goto error;
+
+ syms[cnt++] = ksym_name;
+ }
+
+ *symsp = syms;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ hashmap__free(map);
+ if (err)
+ free(syms);
+ return err;
+}
+
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
+{
+ unsigned long *addr, *addrs, *tmp_addrs;
+ int err = 0, max_cnt, inc_cnt;
+ char *name = NULL;
+ size_t cnt = 0;
+ char buf[256];
+ FILE *f;
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
+
+ if (!f)
+ return -ENOENT;
+
+ /* In my local setup, the number of entries is 50k+ so Let us initially
+ * allocate space to hold 64k entries. If 64k is not enough, incrementally
+ * increase 1k each time.
+ */
+ max_cnt = 65536;
+ inc_cnt = 1024;
+ addrs = malloc(max_cnt * sizeof(long));
+ if (addrs == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ if (cnt == max_cnt) {
+ max_cnt += inc_cnt;
+ tmp_addrs = realloc(addrs, max_cnt * sizeof(long));
+ if (!tmp_addrs) {
+ err = -ENOMEM;
+ goto error;
+ }
+ addrs = tmp_addrs;
+ }
+
+ addrs[cnt++] = (unsigned long)addr;
+ }
+
+ *addrsp = addrs;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ if (err)
+ free(addrs);
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 2ce873c9f9aa..9437bdd4afa5 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -41,4 +41,7 @@ ssize_t get_rel_offset(uintptr_t addr);
int read_build_id(const char *path, char *build_id, size_t size);
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel);
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel);
+
#endif
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c
index 220f6a963813..f997d7ec8fd0 100644
--- a/tools/testing/selftests/bpf/unpriv_helpers.c
+++ b/tools/testing/selftests/bpf/unpriv_helpers.c
@@ -1,15 +1,76 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <sys/utsname.h>
#include <unistd.h>
#include <fcntl.h>
+#include <zlib.h>
#include "unpriv_helpers.h"
-static bool get_mitigations_off(void)
+static gzFile open_config(void)
+{
+ struct utsname uts;
+ char buf[PATH_MAX];
+ gzFile config;
+
+ if (uname(&uts)) {
+ perror("uname");
+ goto config_gz;
+ }
+
+ snprintf(buf, sizeof(buf), "/boot/config-%s", uts.release);
+ config = gzopen(buf, "rb");
+ if (config)
+ return config;
+ fprintf(stderr, "gzopen %s: %s\n", buf, strerror(errno));
+
+config_gz:
+ config = gzopen("/proc/config.gz", "rb");
+ if (!config)
+ perror("gzopen /proc/config.gz");
+ return config;
+}
+
+static int config_contains(const char *pat)
+{
+ const char *msg;
+ char buf[1024];
+ gzFile config;
+ int n, err;
+
+ config = open_config();
+ if (!config)
+ return -1;
+
+ for (;;) {
+ if (!gzgets(config, buf, sizeof(buf))) {
+ msg = gzerror(config, &err);
+ if (err == Z_ERRNO)
+ perror("gzgets /proc/config.gz");
+ else if (err != Z_OK)
+ fprintf(stderr, "gzgets /proc/config.gz: %s", msg);
+ gzclose(config);
+ return -1;
+ }
+ n = strlen(buf);
+ if (buf[n - 1] == '\n')
+ buf[n - 1] = 0;
+ if (strcmp(buf, pat) == 0) {
+ gzclose(config);
+ return 1;
+ }
+ }
+ gzclose(config);
+ return 0;
+}
+
+static bool cmdline_contains(const char *pat)
{
char cmdline[4096], *c;
int fd, ret = false;
@@ -27,7 +88,7 @@ static bool get_mitigations_off(void)
cmdline[sizeof(cmdline) - 1] = '\0';
for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) {
- if (strncmp(c, "mitigations=off", strlen(c)))
+ if (strncmp(c, pat, strlen(c)))
continue;
ret = true;
break;
@@ -37,8 +98,21 @@ out:
return ret;
}
+static int get_mitigations_off(void)
+{
+ int enabled_in_config;
+
+ if (cmdline_contains("mitigations=off"))
+ return 1;
+ enabled_in_config = config_contains("CONFIG_CPU_MITIGATIONS=y");
+ if (enabled_in_config < 0)
+ return -1;
+ return !enabled_in_config;
+}
+
bool get_unpriv_disabled(void)
{
+ int mitigations_off;
bool disabled;
char buf[2];
FILE *fd;
@@ -52,5 +126,19 @@ bool get_unpriv_disabled(void)
disabled = true;
}
- return disabled ? true : get_mitigations_off();
+ if (disabled)
+ return true;
+
+ /*
+ * Some unpriv tests rely on spectre mitigations being on.
+ * If mitigations are off or status can't be determined
+ * assume that unpriv tests are disabled.
+ */
+ mitigations_off = get_mitigations_off();
+ if (mitigations_off < 0) {
+ fprintf(stderr,
+ "Can't determine if mitigations are enabled, disabling unpriv tests.");
+ return true;
+ }
+ return mitigations_off;
}
diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h
new file mode 100644
index 000000000000..549d1f774810
--- /dev/null
+++ b/tools/testing/selftests/bpf/usdt.h
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * This single-header library defines a collection of variadic macros for
+ * defining and triggering USDTs (User Statically-Defined Tracepoints):
+ *
+ * - For USDTs without associated semaphore:
+ * USDT(group, name, args...)
+ *
+ * - For USDTs with implicit (transparent to the user) semaphore:
+ * USDT_WITH_SEMA(group, name, args...)
+ * USDT_IS_ACTIVE(group, name)
+ *
+ * - For USDTs with explicit (user-defined and provided) semaphore:
+ * USDT_WITH_EXPLICIT_SEMA(sema, group, name, args...)
+ * USDT_SEMA_IS_ACTIVE(sema)
+ *
+ * all of which emit a NOP instruction into the instruction stream, and so
+ * have *zero* overhead for the surrounding code. USDTs are identified by
+ * a combination of `group` and `name` identifiers, which is used by external
+ * tracing tooling (tracers) for identifying exact USDTs of interest.
+ *
+ * USDTs can have an associated (2-byte) activity counter (USDT semaphore),
+ * automatically maintained by Linux kernel whenever any correctly written
+ * BPF-based tracer is attached to the USDT. This USDT semaphore can be used
+ * to check whether there is a need to do any extra data collection and
+ * processing for a given USDT (if necessary), and otherwise avoid extra work
+ * for a common case of USDT not being traced ("active").
+ *
+ * See documentation for USDT_WITH_SEMA()/USDT_IS_ACTIVE() or
+ * USDT_WITH_EXPLICIT_SEMA()/USDT_SEMA_IS_ACTIVE() APIs below for details on
+ * working with USDTs with implicitly or explicitly associated
+ * USDT semaphores, respectively.
+ *
+ * There is also some additional data recorded into an auxiliary note
+ * section. The data in the note section describes the operands, in terms of
+ * size and location, used by tracing tooling to know where to find USDT
+ * arguments. Each location is encoded as an assembler operand string.
+ * Tracing tools (bpftrace and BPF-based tracers, systemtap, etc) insert
+ * breakpoints on top of the nop, and decode the location operand-strings,
+ * like an assembler, to find the values being passed.
+ *
+ * The operand strings are selected by the compiler for each operand.
+ * They are constrained by inline-assembler codes.The default is:
+ *
+ * #define USDT_ARG_CONSTRAINT nor
+ *
+ * This is a good default if the operands tend to be integral and
+ * moderate in number (smaller than number of registers). In other
+ * cases, the compiler may report "'asm' requires impossible reload" or
+ * similar. In this case, consider simplifying the macro call (fewer
+ * and simpler operands), reduce optimization, or override the default
+ * constraints string via:
+ *
+ * #define USDT_ARG_CONSTRAINT g
+ * #include <usdt.h>
+ *
+ * For some historical description of USDT v3 format (the one used by this
+ * library and generally recognized and assumed by BPF-based tracing tools)
+ * see [0]. The more formal specification can be found at [1]. Additional
+ * argument constraints information can be found at [2].
+ *
+ * Original SystemTap's sys/sdt.h implementation ([3]) was used as a base for
+ * this USDT library implementation. Current implementation differs *a lot* in
+ * terms of exposed user API and general usability, which was the main goal
+ * and focus of the reimplementation work. Nevertheless, underlying recorded
+ * USDT definitions are fully binary compatible and any USDT-based tooling
+ * should work equally well with USDTs defined by either SystemTap's or this
+ * library's USDT implementation.
+ *
+ * [0] https://ecos.sourceware.org/ml/systemtap/2010-q3/msg00145.html
+ * [1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ * [2] https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ * [3] https://sourceware.org/git/?p=systemtap.git;a=blob;f=includes/sys/sdt.h
+ */
+#ifndef __USDT_H
+#define __USDT_H
+
+/*
+ * Changelog:
+ *
+ * 0.1.0
+ * -----
+ * - Initial release
+ */
+#define USDT_MAJOR_VERSION 0
+#define USDT_MINOR_VERSION 1
+#define USDT_PATCH_VERSION 0
+
+/* C++20 and C23 added __VA_OPT__ as a standard replacement for non-standard `##__VA_ARGS__` extension */
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) || (defined(__cplusplus) && __cplusplus > 201703L)
+#define __usdt_va_opt 1
+#define __usdt_va_args(...) __VA_OPT__(,) __VA_ARGS__
+#else
+#define __usdt_va_args(...) , ##__VA_ARGS__
+#endif
+
+/*
+ * Trigger USDT with `group`:`name` identifier and pass through `args` as its
+ * arguments. Zero arguments are acceptable as well. No USDT semaphore is
+ * associated with this USDT.
+ *
+ * Such "semaphoreless" USDTs are commonly used when there is no extra data
+ * collection or processing needed to collect and prepare USDT arguments and
+ * they are just available in the surrounding code. USDT() macro will just
+ * record their locations in CPU registers or in memory for tracing tooling to
+ * be able to access them, if necessary.
+ */
+#ifdef __usdt_va_opt
+#define USDT(group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_none, 0 __VA_OPT__(,) __VA_ARGS__)
+#else
+#define USDT(group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_none, 0, ##__VA_ARGS__)
+#endif
+
+/*
+ * Trigger USDT with `group`:`name` identifier and pass through `args` as its
+ * arguments. Zero arguments are acceptable as well. USDT also get an
+ * implicitly-defined associated USDT semaphore, which will be "activated" by
+ * tracing tooling and can be used to check whether USDT is being actively
+ * observed.
+ *
+ * USDTs with semaphore are commonly used when there is a need to perform
+ * additional data collection and processing to prepare USDT arguments, which
+ * otherwise might not be necessary for the rest of application logic. In such
+ * case, USDT semaphore can be used to avoid unnecessary extra work. If USDT
+ * is not traced (which is presumed to be a common situation), the associated
+ * USDT semaphore is "inactive", and so there is no need to waste resources to
+ * prepare USDT arguments. Use USDT_IS_ACTIVE(group, name) to check whether
+ * USDT is "active".
+ *
+ * N.B. There is an inherent (albeit short) gap between checking whether USDT
+ * is active and triggering corresponding USDT, in which external tracer can
+ * be attached to an USDT and activate USDT semaphore after the activity check.
+ * If such a race occurs, tracers might miss one USDT execution. Tracers are
+ * expected to accommodate such possibility and this is expected to not be
+ * a problem for applications and tracers.
+ *
+ * N.B. Implicit USDT semaphore defined by USDT_WITH_SEMA() is contained
+ * within a single executable or shared library and is not shared outside
+ * them. I.e., if you use USDT_WITH_SEMA() with the same USDT group and name
+ * identifier across executable and shared library, it will work and won't
+ * conflict, per se, but will define independent USDT semaphores, one for each
+ * shared library/executable in which USDT_WITH_SEMA(group, name) is used.
+ * That is, if you attach to this USDT in one shared library (or executable),
+ * then only USDT semaphore within that shared library (or executable) will be
+ * updated by the kernel, while other libraries (or executable) will not see
+ * activated USDT semaphore. In short, it's best to use unique USDT group:name
+ * identifiers across different shared libraries (and, equivalently, between
+ * executable and shared library). This is advanced consideration and is
+ * rarely (if ever) seen in practice, but just to avoid surprises this is
+ * called out here. (Static libraries become a part of final executable, once
+ * linked by linker, so the above considerations don't apply to them.)
+ */
+#ifdef __usdt_va_opt
+#define USDT_WITH_SEMA(group, name, ...) \
+ __usdt_probe(group, name, \
+ __usdt_sema_implicit, __usdt_sema_name(group, name) \
+ __VA_OPT__(,) __VA_ARGS__)
+#else
+#define USDT_WITH_SEMA(group, name, ...) \
+ __usdt_probe(group, name, \
+ __usdt_sema_implicit, __usdt_sema_name(group, name), \
+ ##__VA_ARGS__)
+#endif
+
+struct usdt_sema { volatile unsigned short active; };
+
+/*
+ * Check if USDT with `group`:`name` identifier is "active" (i.e., whether it
+ * is attached to by external tracing tooling and is actively observed).
+ *
+ * This macro can be used to decide whether any additional and potentially
+ * expensive data collection or processing should be done to pass extra
+ * information into the given USDT. It is assumed that USDT is triggered with
+ * USDT_WITH_SEMA() macro which will implicitly define associated USDT
+ * semaphore. (If one needs more control over USDT semaphore, see
+ * USDT_DEFINE_SEMA() and USDT_WITH_EXPLICIT_SEMA() macros below.)
+ *
+ * N.B. Such checks are necessarily racy and speculative. Between checking
+ * whether USDT is active and triggering the USDT itself, tracer can be
+ * detached with no notification. This race should be extremely rare and worst
+ * case should result in one-time wasted extra data collection and processing.
+ */
+#define USDT_IS_ACTIVE(group, name) ({ \
+ extern struct usdt_sema __usdt_sema_name(group, name) \
+ __usdt_asm_name(__usdt_sema_name(group, name)); \
+ __usdt_sema_implicit(__usdt_sema_name(group, name)); \
+ __usdt_sema_name(group, name).active > 0; \
+})
+
+/*
+ * APIs for working with user-defined explicit USDT semaphores.
+ *
+ * This is a less commonly used advanced API for use cases in which user needs
+ * an explicit control over (potentially shared across multiple USDTs) USDT
+ * semaphore instance. This can be used when there is a group of logically
+ * related USDTs that all need extra data collection and processing whenever
+ * any of a family of related USDTs are "activated" (i.e., traced). In such
+ * a case, all such related USDTs will be associated with the same shared USDT
+ * semaphore defined with USDT_DEFINE_SEMA() and the USDTs themselves will be
+ * triggered with USDT_WITH_EXPLICIT_SEMA() macros, taking an explicit extra
+ * USDT semaphore identifier as an extra parameter.
+ */
+
+/**
+ * Underlying C global variable name for user-defined USDT semaphore with
+ * `sema` identifier. Could be useful for debugging, but normally shouldn't be
+ * used explicitly.
+ */
+#define USDT_SEMA(sema) __usdt_sema_##sema
+
+/*
+ * Define storage for user-defined USDT semaphore `sema`.
+ *
+ * Should be used only once in non-header source file to let compiler allocate
+ * space for the semaphore variable. Just like with any other global variable.
+ *
+ * This macro can be used anywhere where global variable declaration is
+ * allowed. Just like with global variable definitions, there should be only
+ * one definition of user-defined USDT semaphore with given `sema` identifier,
+ * otherwise compiler or linker will complain about duplicate variable
+ * definition.
+ *
+ * For C++, it is allowed to use USDT_DEFINE_SEMA() both in global namespace
+ * and inside namespaces (including nested namespaces). Just make sure that
+ * USDT_DECLARE_SEMA() is placed within the namespace where this semaphore is
+ * referenced, or any of its parent namespaces, so the C++ language-level
+ * identifier is visible to the code that needs to reference the semaphore.
+ * At the lowest layer, USDT semaphores have global naming and visibility
+ * (they have a corresponding `__usdt_sema_<name>` symbol, which can be linked
+ * against from C or C++ code, if necessary). To keep it simple, putting
+ * USDT_DECLARE_SEMA() declarations into global namespaces is the simplest
+ * no-brainer solution. All these aspects are irrelevant for plain C, because
+ * C doesn't have namespaces and everything is always in the global namespace.
+ *
+ * N.B. Due to USDT metadata being recorded in non-allocatable ELF note
+ * section, it has limitations when it comes to relocations, which, in
+ * practice, means that it's not possible to correctly share USDT semaphores
+ * between main executable and shared libraries, or even between multiple
+ * shared libraries. USDT semaphore has to be contained to individual shared
+ * library or executable to avoid unpleasant surprises with half-working USDT
+ * semaphores. We enforce this by marking semaphore ELF symbols as having
+ * a hidden visibility. This is quite an advanced use case and consideration
+ * and for most users this should have no consequences whatsoever.
+ */
+#define USDT_DEFINE_SEMA(sema) \
+ struct usdt_sema __usdt_sema_sec USDT_SEMA(sema) \
+ __usdt_asm_name(USDT_SEMA(sema)) \
+ __attribute__((visibility("hidden"))) = { 0 }
+
+/*
+ * Declare extern reference to user-defined USDT semaphore `sema`.
+ *
+ * Refers to a variable defined in another compilation unit by
+ * USDT_DEFINE_SEMA() and allows to use the same USDT semaphore across
+ * multiple compilation units (i.e., .c and .cpp files).
+ *
+ * See USDT_DEFINE_SEMA() notes above for C++ language usage peculiarities.
+ */
+#define USDT_DECLARE_SEMA(sema) \
+ extern struct usdt_sema USDT_SEMA(sema) __usdt_asm_name(USDT_SEMA(sema))
+
+/*
+ * Check if user-defined USDT semaphore `sema` is "active" (i.e., whether it
+ * is attached to by external tracing tooling and is actively observed).
+ *
+ * This macro can be used to decide whether any additional and potentially
+ * expensive data collection or processing should be done to pass extra
+ * information into USDT(s) associated with USDT semaphore `sema`.
+ *
+ * N.B. Such checks are necessarily racy. Between checking the state of USDT
+ * semaphore and triggering associated USDT(s), the active tracer might attach
+ * or detach. This race should be extremely rare and worst case should result
+ * in one-time missed USDT event or wasted extra data collection and
+ * processing. USDT-using tracers should be written with this in mind and is
+ * not a concern of the application defining USDTs with associated semaphore.
+ */
+#define USDT_SEMA_IS_ACTIVE(sema) (USDT_SEMA(sema).active > 0)
+
+/*
+ * Invoke USDT specified by `group` and `name` identifiers and associate
+ * explicitly user-defined semaphore `sema` with it. Pass through `args` as
+ * USDT arguments. `args` are optional and zero arguments are acceptable.
+ *
+ * Semaphore is defined with the help of USDT_DEFINE_SEMA() macro and can be
+ * checked whether active with USDT_SEMA_IS_ACTIVE().
+ */
+#ifdef __usdt_va_opt
+#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema), ##__VA_ARGS__)
+#else
+#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema) __VA_OPT__(,) __VA_ARGS__)
+#endif
+
+/*
+ * Adjustable implementation aspects
+ */
+#ifndef USDT_ARG_CONSTRAINT
+#if defined __powerpc__
+#define USDT_ARG_CONSTRAINT nZr
+#elif defined __arm__
+#define USDT_ARG_CONSTRAINT g
+#elif defined __loongarch__
+#define USDT_ARG_CONSTRAINT nmr
+#else
+#define USDT_ARG_CONSTRAINT nor
+#endif
+#endif /* USDT_ARG_CONSTRAINT */
+
+#ifndef USDT_NOP
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define USDT_NOP nop 0
+#else
+#define USDT_NOP nop
+#endif
+#endif /* USDT_NOP */
+
+/*
+ * Implementation details
+ */
+/* USDT name for implicitly-defined USDT semaphore, derived from group:name */
+#define __usdt_sema_name(group, name) __usdt_sema_##group##__##name
+/* ELF section into which USDT semaphores are put */
+#define __usdt_sema_sec __attribute__((section(".probes")))
+
+#define __usdt_concat(a, b) a ## b
+#define __usdt_apply(fn, n) __usdt_concat(fn, n)
+
+#ifndef __usdt_nth
+#define __usdt_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, N, ...) N
+#endif
+
+#ifndef __usdt_narg
+#ifdef __usdt_va_opt
+#define __usdt_narg(...) __usdt_nth(_ __VA_OPT__(,) __VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#else
+#define __usdt_narg(...) __usdt_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+#endif /* __usdt_narg */
+
+#define __usdt_hash #
+#define __usdt_str_(x) #x
+#define __usdt_str(x) __usdt_str_(x)
+
+#ifndef __usdt_asm_name
+#define __usdt_asm_name(name) __asm__(__usdt_str(name))
+#endif
+
+#define __usdt_asm0() "\n"
+#define __usdt_asm1(x) __usdt_str(x) "\n"
+#define __usdt_asm2(x, ...) __usdt_str(x) "," __usdt_asm1(__VA_ARGS__)
+#define __usdt_asm3(x, ...) __usdt_str(x) "," __usdt_asm2(__VA_ARGS__)
+#define __usdt_asm4(x, ...) __usdt_str(x) "," __usdt_asm3(__VA_ARGS__)
+#define __usdt_asm5(x, ...) __usdt_str(x) "," __usdt_asm4(__VA_ARGS__)
+#define __usdt_asm6(x, ...) __usdt_str(x) "," __usdt_asm5(__VA_ARGS__)
+#define __usdt_asm7(x, ...) __usdt_str(x) "," __usdt_asm6(__VA_ARGS__)
+#define __usdt_asm8(x, ...) __usdt_str(x) "," __usdt_asm7(__VA_ARGS__)
+#define __usdt_asm9(x, ...) __usdt_str(x) "," __usdt_asm8(__VA_ARGS__)
+#define __usdt_asm10(x, ...) __usdt_str(x) "," __usdt_asm9(__VA_ARGS__)
+#define __usdt_asm11(x, ...) __usdt_str(x) "," __usdt_asm10(__VA_ARGS__)
+#define __usdt_asm12(x, ...) __usdt_str(x) "," __usdt_asm11(__VA_ARGS__)
+#define __usdt_asm(...) __usdt_apply(__usdt_asm, __usdt_narg(__VA_ARGS__))(__VA_ARGS__)
+
+#ifdef __LP64__
+#define __usdt_asm_addr .8byte
+#else
+#define __usdt_asm_addr .4byte
+#endif
+
+#define __usdt_asm_strz_(x) __usdt_asm1(.asciz #x)
+#define __usdt_asm_strz(x) __usdt_asm_strz_(x)
+#define __usdt_asm_str_(x) __usdt_asm1(.ascii #x)
+#define __usdt_asm_str(x) __usdt_asm_str_(x)
+
+/* "semaphoreless" USDT case */
+#ifndef __usdt_sema_none
+#define __usdt_sema_none(sema)
+#endif
+
+/* implicitly defined __usdt_sema__group__name semaphore (using weak symbols) */
+#ifndef __usdt_sema_implicit
+#define __usdt_sema_implicit(sema) \
+ __asm__ __volatile__ ( \
+ __usdt_asm1(.ifndef sema) \
+ __usdt_asm3( .pushsection .probes, "aw", "progbits") \
+ __usdt_asm1( .weak sema) \
+ __usdt_asm1( .hidden sema) \
+ __usdt_asm1( .align 2) \
+ __usdt_asm1(sema:) \
+ __usdt_asm1( .zero 2) \
+ __usdt_asm2( .type sema, @object) \
+ __usdt_asm2( .size sema, 2) \
+ __usdt_asm1( .popsection) \
+ __usdt_asm1(.endif) \
+ );
+#endif
+
+/* externally defined semaphore using USDT_DEFINE_SEMA() and passed explicitly by user */
+#ifndef __usdt_sema_explicit
+#define __usdt_sema_explicit(sema) \
+ __asm__ __volatile__ ("" :: "m" (sema));
+#endif
+
+/* main USDT definition (nop and .note.stapsdt metadata) */
+#define __usdt_probe(group, name, sema_def, sema, ...) do { \
+ sema_def(sema) \
+ __asm__ __volatile__ ( \
+ __usdt_asm( 990: USDT_NOP) \
+ __usdt_asm3( .pushsection .note.stapsdt, "", "note") \
+ __usdt_asm1( .balign 4) \
+ __usdt_asm3( .4byte 992f-991f,994f-993f,3) \
+ __usdt_asm1(991: .asciz "stapsdt") \
+ __usdt_asm1(992: .balign 4) \
+ __usdt_asm1(993: __usdt_asm_addr 990b) \
+ __usdt_asm1( __usdt_asm_addr _.stapsdt.base) \
+ __usdt_asm1( __usdt_asm_addr sema) \
+ __usdt_asm_strz(group) \
+ __usdt_asm_strz(name) \
+ __usdt_asm_args(__VA_ARGS__) \
+ __usdt_asm1( .ascii "\0") \
+ __usdt_asm1(994: .balign 4) \
+ __usdt_asm1( .popsection) \
+ __usdt_asm1(.ifndef _.stapsdt.base) \
+ __usdt_asm5( .pushsection .stapsdt.base,"aG","progbits",.stapsdt.base,comdat)\
+ __usdt_asm1( .weak _.stapsdt.base) \
+ __usdt_asm1( .hidden _.stapsdt.base) \
+ __usdt_asm1(_.stapsdt.base:) \
+ __usdt_asm1( .space 1) \
+ __usdt_asm2( .size _.stapsdt.base, 1) \
+ __usdt_asm1( .popsection) \
+ __usdt_asm1(.endif) \
+ :: __usdt_asm_ops(__VA_ARGS__) \
+ ); \
+} while (0)
+
+/*
+ * NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+ * operand note format.
+ *
+ * The named register may be a longer or shorter (!) alias for the
+ * storage where the value in question is found. For example, on
+ * i386, 64-bit value may be put in register pairs, and a register
+ * name stored would identify just one of them. Previously, gcc was
+ * asked to emit the %w[id] (16-bit alias of some registers holding
+ * operands), even when a wider 32-bit value was used.
+ *
+ * Bottom line: the byte-width given before the @ sign governs. If
+ * there is a mismatch between that width and that of the named
+ * register, then a sys/sdt.h note consumer may need to employ
+ * architecture-specific heuristics to figure out where the compiler
+ * has actually put the complete value.
+ */
+#if defined(__powerpc__) || defined(__powerpc64__)
+#define __usdt_argref(id) %I[id]%[id]
+#elif defined(__i386__)
+#define __usdt_argref(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+#define __usdt_argref(id) %[id]
+#endif
+
+#define __usdt_asm_arg(n) __usdt_asm_str(%c[__usdt_asz##n]) \
+ __usdt_asm1(.ascii "@") \
+ __usdt_asm_str(__usdt_argref(__usdt_aval##n))
+
+#define __usdt_asm_args0 /* no arguments */
+#define __usdt_asm_args1 __usdt_asm_arg(1)
+#define __usdt_asm_args2 __usdt_asm_args1 __usdt_asm1(.ascii " ") __usdt_asm_arg(2)
+#define __usdt_asm_args3 __usdt_asm_args2 __usdt_asm1(.ascii " ") __usdt_asm_arg(3)
+#define __usdt_asm_args4 __usdt_asm_args3 __usdt_asm1(.ascii " ") __usdt_asm_arg(4)
+#define __usdt_asm_args5 __usdt_asm_args4 __usdt_asm1(.ascii " ") __usdt_asm_arg(5)
+#define __usdt_asm_args6 __usdt_asm_args5 __usdt_asm1(.ascii " ") __usdt_asm_arg(6)
+#define __usdt_asm_args7 __usdt_asm_args6 __usdt_asm1(.ascii " ") __usdt_asm_arg(7)
+#define __usdt_asm_args8 __usdt_asm_args7 __usdt_asm1(.ascii " ") __usdt_asm_arg(8)
+#define __usdt_asm_args9 __usdt_asm_args8 __usdt_asm1(.ascii " ") __usdt_asm_arg(9)
+#define __usdt_asm_args10 __usdt_asm_args9 __usdt_asm1(.ascii " ") __usdt_asm_arg(10)
+#define __usdt_asm_args11 __usdt_asm_args10 __usdt_asm1(.ascii " ") __usdt_asm_arg(11)
+#define __usdt_asm_args12 __usdt_asm_args11 __usdt_asm1(.ascii " ") __usdt_asm_arg(12)
+#define __usdt_asm_args(...) __usdt_apply(__usdt_asm_args, __usdt_narg(__VA_ARGS__))
+
+#define __usdt_is_arr(x) (__builtin_classify_type(x) == 14 || __builtin_classify_type(x) == 5)
+#define __usdt_arg_size(x) (__usdt_is_arr(x) ? sizeof(void *) : sizeof(x))
+
+/*
+ * We can't use __builtin_choose_expr() in C++, so fall back to table-based
+ * signedness determination for known types, utilizing templates magic.
+ */
+#ifdef __cplusplus
+
+#define __usdt_is_signed(x) (!__usdt_is_arr(x) && __usdt_t<__typeof(x)>::is_signed)
+
+#include <cstddef>
+
+template<typename T> struct __usdt_t { static const bool is_signed = false; };
+template<typename A> struct __usdt_t<A[]> : public __usdt_t<A *> {};
+template<typename A, size_t N> struct __usdt_t<A[N]> : public __usdt_t<A *> {};
+
+#define __usdt_def_signed(T) \
+template<> struct __usdt_t<T> { static const bool is_signed = true; }; \
+template<> struct __usdt_t<const T> { static const bool is_signed = true; }; \
+template<> struct __usdt_t<volatile T> { static const bool is_signed = true; }; \
+template<> struct __usdt_t<const volatile T> { static const bool is_signed = true; }
+#define __usdt_maybe_signed(T) \
+template<> struct __usdt_t<T> { static const bool is_signed = (T)-1 < (T)1; }; \
+template<> struct __usdt_t<const T> { static const bool is_signed = (T)-1 < (T)1; }; \
+template<> struct __usdt_t<volatile T> { static const bool is_signed = (T)-1 < (T)1; }; \
+template<> struct __usdt_t<const volatile T> { static const bool is_signed = (T)-1 < (T)1; }
+
+__usdt_def_signed(signed char);
+__usdt_def_signed(short);
+__usdt_def_signed(int);
+__usdt_def_signed(long);
+__usdt_def_signed(long long);
+__usdt_maybe_signed(char);
+__usdt_maybe_signed(wchar_t);
+
+#else /* !__cplusplus */
+
+#define __usdt_is_inttype(x) (__builtin_classify_type(x) >= 1 && __builtin_classify_type(x) <= 4)
+#define __usdt_inttype(x) __typeof(__builtin_choose_expr(__usdt_is_inttype(x), (x), 0U))
+#define __usdt_is_signed(x) ((__usdt_inttype(x))-1 < (__usdt_inttype(x))1)
+
+#endif /* __cplusplus */
+
+#define __usdt_asm_op(n, x) \
+ [__usdt_asz##n] "n" ((__usdt_is_signed(x) ? (int)-1 : 1) * (int)__usdt_arg_size(x)), \
+ [__usdt_aval##n] __usdt_str(USDT_ARG_CONSTRAINT)(x)
+
+#define __usdt_asm_ops0() [__usdt_dummy] "g" (0)
+#define __usdt_asm_ops1(x) __usdt_asm_op(1, x)
+#define __usdt_asm_ops2(a,x) __usdt_asm_ops1(a), __usdt_asm_op(2, x)
+#define __usdt_asm_ops3(a,b,x) __usdt_asm_ops2(a,b), __usdt_asm_op(3, x)
+#define __usdt_asm_ops4(a,b,c,x) __usdt_asm_ops3(a,b,c), __usdt_asm_op(4, x)
+#define __usdt_asm_ops5(a,b,c,d,x) __usdt_asm_ops4(a,b,c,d), __usdt_asm_op(5, x)
+#define __usdt_asm_ops6(a,b,c,d,e,x) __usdt_asm_ops5(a,b,c,d,e), __usdt_asm_op(6, x)
+#define __usdt_asm_ops7(a,b,c,d,e,f,x) __usdt_asm_ops6(a,b,c,d,e,f), __usdt_asm_op(7, x)
+#define __usdt_asm_ops8(a,b,c,d,e,f,g,x) __usdt_asm_ops7(a,b,c,d,e,f,g), __usdt_asm_op(8, x)
+#define __usdt_asm_ops9(a,b,c,d,e,f,g,h,x) __usdt_asm_ops8(a,b,c,d,e,f,g,h), __usdt_asm_op(9, x)
+#define __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,x) __usdt_asm_ops9(a,b,c,d,e,f,g,h,i), __usdt_asm_op(10, x)
+#define __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,x) __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,j), __usdt_asm_op(11, x)
+#define __usdt_asm_ops12(a,b,c,d,e,f,g,h,i,j,k,x) __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,k), __usdt_asm_op(12, x)
+#define __usdt_asm_ops(...) __usdt_apply(__usdt_asm_ops, __usdt_narg(__VA_ARGS__))(__VA_ARGS__)
+
+#endif /* __USDT_H */
diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
index b616575c3b00..ce13002c7a19 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -93,7 +93,7 @@
.expected_attach_type = BPF_SK_LOOKUP,
.result = VERBOSE_ACCEPT,
.runs = -1,
- .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8_w=-44\
+ .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8=-44\
2: (c5) if r0 s< 0x0 goto pc+2\
- R0_w=-44",
+ R0=-44",
},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 18596ae0b0c1..c8d640802cce 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1375,7 +1375,7 @@
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
/* write into map value */
@@ -1439,7 +1439,7 @@
/* second time with fp-16 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -1493,7 +1493,7 @@
/* second time with fp-16 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -2380,7 +2380,7 @@
*/
BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
- /* r9 = *r9 ; verifier get's to this point via two paths:
+ /* r9 = *r9 ; verifier gets to this point via two paths:
* ; (I) one including r9 = r8, verified first;
* ; (II) one excluding r9 = r8, verified next.
* ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id.
@@ -2409,3 +2409,27 @@
.errstr_unpriv = "",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
+{
+ "calls: several args with ref_obj_id",
+ .insns = {
+ /* Reserve at least sizeof(struct iphdr) bytes in the ring buffer.
+ * With a smaller size, the verifier would reject the call to
+ * bpf_tcp_raw_gen_syncookie_ipv4 before we can reach the
+ * ref_obj_id error.
+ */
+ BPF_MOV64_IMM(BPF_REG_2, 20),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ /* if r0 == 0 goto <exit> */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tcp_raw_gen_syncookie_ipv4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 2 },
+ .result = REJECT,
+ .errstr = "more than one arg with ref_obj_id",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index ee454327e5c6..77207b498c6f 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -2,14 +2,13 @@
"dead code: start",
.insns = {
BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_JMP_IMM(BPF_JA, 0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 7),
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
},
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index 43776f6f92f4..91d83e9cb148 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -84,11 +84,10 @@
BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -149,11 +148,10 @@
BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -214,11 +212,10 @@
BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -283,11 +280,10 @@
BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -354,11 +350,10 @@
BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -425,11 +420,10 @@
BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -496,11 +490,10 @@
BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -567,11 +560,10 @@
BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -638,11 +630,10 @@
BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -709,11 +700,10 @@
BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -780,11 +770,10 @@
BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
index 11fc68da735e..e901eefd774a 100644
--- a/tools/testing/selftests/bpf/verifier/jset.c
+++ b/tools/testing/selftests/bpf/verifier/jset.c
@@ -78,12 +78,11 @@
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.retval = 1,
.result = ACCEPT,
},
@@ -136,13 +135,12 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2),
BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -154,16 +152,16 @@
BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff),
BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3),
BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh
index f2cac42298ba..09179fb551f0 100755
--- a/tools/testing/selftests/bpf/verify_sig_setup.sh
+++ b/tools/testing/selftests/bpf/verify_sig_setup.sh
@@ -32,7 +32,7 @@ usage()
exit 1
}
-setup()
+genkey()
{
local tmp_dir="$1"
@@ -45,9 +45,14 @@ setup()
openssl x509 -in ${tmp_dir}/signing_key.pem -out \
${tmp_dir}/signing_key.der -outform der
+}
- key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
+setup()
+{
+ local tmp_dir="$1"
+ genkey "${tmp_dir}"
+ key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
keyring_id=$(keyctl newring ebpf_testing_keyring @s)
keyctl link $key_id $keyring_id
}
@@ -105,6 +110,8 @@ main()
if [[ "${action}" == "setup" ]]; then
setup "${tmp_dir}"
+ elif [[ "${action}" == "genkey" ]]; then
+ genkey "${tmp_dir}"
elif [[ "${action}" == "cleanup" ]]; then
cleanup "${tmp_dir}"
elif [[ "${action}" == "fsverity-create-sign" ]]; then
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 06af5029885b..e962f133250c 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
#include <argp.h>
#include <libgen.h>
+#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <sched.h>
@@ -22,6 +23,7 @@
#include <float.h>
#include <math.h>
#include <limits.h>
+#include <assert.h>
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -48,6 +50,7 @@ enum stat_id {
STACK,
PROG_TYPE,
ATTACH_TYPE,
+ MEMORY_PEAK,
FILE_NAME,
PROG_NAME,
@@ -154,6 +157,36 @@ struct filter {
bool abs;
};
+struct rvalue {
+ enum { INTEGRAL, ENUMERATOR } type;
+ union {
+ long long ivalue;
+ char *svalue;
+ };
+};
+
+struct field_access {
+ enum { FIELD_NAME, ARRAY_INDEX } type;
+ union {
+ char *name;
+ struct rvalue index;
+ };
+};
+
+struct var_preset {
+ struct field_access *atoms;
+ int atom_count;
+ char *full_name;
+ struct rvalue value;
+ bool applied;
+};
+
+enum dump_mode {
+ DUMP_NONE = 0,
+ DUMP_XLATED = 1,
+ DUMP_JITED = 2,
+};
+
static struct env {
char **filenames;
int filename_cnt;
@@ -195,6 +228,12 @@ static struct env {
int progs_processed;
int progs_skipped;
int top_src_lines;
+ struct var_preset *presets;
+ int npresets;
+ char orig_cgroup[PATH_MAX];
+ char stat_cgroup[PATH_MAX];
+ int memory_peak_fd;
+ __u32 dump_mode;
} env;
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -206,6 +245,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va
return vfprintf(stderr, format, args);
}
+#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+__attribute__((format(printf, 3, 4)))
+static int log_errno_aux(const char *file, int line, const char *fmt, ...)
+{
+ int err = -errno;
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "%s:%d: ", file, line);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, " failed with error '%s'.\n", strerror(errno));
+ va_end(ap);
+ return err;
+}
+
#ifndef VERISTAT_VERSION
#define VERISTAT_VERSION "<kernel>"
#endif
@@ -223,6 +278,7 @@ const char argp_program_doc[] =
enum {
OPT_LOG_FIXED = 1000,
OPT_LOG_SIZE = 1001,
+ OPT_DUMP = 1002,
};
static const struct argp_option opts[] = {
@@ -246,16 +302,21 @@ static const struct argp_option opts[] = {
{ "test-reg-invariants", 'r', NULL, 0,
"Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
+ { "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" },
+ { "dump", OPT_DUMP, "DUMP_MODE", OPTION_ARG_OPTIONAL, "Print BPF program dump (xlated, jited)" },
{},
};
static int parse_stats(const char *stats_str, struct stat_specs *specs);
static int append_filter(struct filter **filters, int *cnt, const char *str);
static int append_filter_file(const char *path);
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr);
+static int append_var_preset_file(const char *filename);
+static int append_file(const char *path);
+static int append_file_from_file(const char *path);
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
- void *tmp;
int err;
switch (key) {
@@ -327,6 +388,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
fprintf(stderr, "invalid top N specifier: %s\n", arg);
argp_usage(state);
}
+ break;
case 'C':
env.comparison_mode = true;
break;
@@ -353,15 +415,36 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
+ case 'G': {
+ if (arg[0] == '@')
+ err = append_var_preset_file(arg + 1);
+ else
+ err = append_var_preset(&env.presets, &env.npresets, arg);
+ if (err) {
+ fprintf(stderr, "Failed to parse global variable presets: %s\n", arg);
+ return err;
+ }
+ break;
+ }
case ARGP_KEY_ARG:
- tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
- if (!tmp)
- return -ENOMEM;
- env.filenames = tmp;
- env.filenames[env.filename_cnt] = strdup(arg);
- if (!env.filenames[env.filename_cnt])
- return -ENOMEM;
- env.filename_cnt++;
+ if (arg[0] == '@')
+ err = append_file_from_file(arg + 1);
+ else
+ err = append_file(arg);
+ if (err) {
+ fprintf(stderr, "Failed to collect BPF object files: %d\n", err);
+ return err;
+ }
+ break;
+ case OPT_DUMP:
+ if (!arg || strcasecmp(arg, "xlated") == 0) {
+ env.dump_mode |= DUMP_XLATED;
+ } else if (strcasecmp(arg, "jited") == 0) {
+ env.dump_mode |= DUMP_JITED;
+ } else {
+ fprintf(stderr, "Unrecognized dump mode '%s'\n", arg);
+ return -EINVAL;
+ }
break;
default:
return ARGP_ERR_UNKNOWN;
@@ -632,7 +715,7 @@ static int append_filter_file(const char *path)
f = fopen(path, "r");
if (!f) {
err = -errno;
- fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
+ fprintf(stderr, "Failed to open filters in '%s': %s\n", path, strerror(-err));
return err;
}
@@ -662,14 +745,57 @@ static const struct stat_specs default_output_spec = {
},
};
+static int append_file(const char *path)
+{
+ void *tmp;
+
+ tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
+ if (!tmp)
+ return -ENOMEM;
+ env.filenames = tmp;
+ env.filenames[env.filename_cnt] = strdup(path);
+ if (!env.filenames[env.filename_cnt])
+ return -ENOMEM;
+ env.filename_cnt++;
+ return 0;
+}
+
+static int append_file_from_file(const char *path)
+{
+ char buf[1024];
+ int err = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open object files list in '%s': %s\n",
+ path, strerror(errno));
+ return err;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ /* lines starting with # are comments, skip them */
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+ err = append_file(buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
static const struct stat_specs default_csv_output_spec = {
- .spec_cnt = 14,
+ .spec_cnt = 15,
.ids = {
FILE_NAME, PROG_NAME, VERDICT, DURATION,
TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
- STACK,
+ STACK, MEMORY_PEAK,
},
};
@@ -710,6 +836,7 @@ static struct stat_def {
[STACK] = {"Stack depth", {"stack_depth", "stack"}, },
[PROG_TYPE] = { "Program type", {"prog_type"}, },
[ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
+ [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, },
};
static bool parse_stat_id_var(const char *name, size_t len, int *id,
@@ -783,6 +910,18 @@ static bool is_desc_sym(char c)
return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
}
+static char *rtrim(char *str)
+{
+ int i;
+
+ for (i = strlen(str) - 1; i > 0; --i) {
+ if (!isspace(str[i]))
+ break;
+ str[i] = '\0';
+ }
+ return str;
+}
+
static int parse_stat(const char *stat_name, struct stat_specs *specs)
{
int id;
@@ -1111,6 +1250,7 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
case BPF_MAP_TYPE_TASK_STORAGE:
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_CGRP_STORAGE:
break;
case BPF_MAP_TYPE_STRUCT_OPS:
mask_unrelated_struct_ops_progs(obj, map, prog);
@@ -1163,13 +1303,13 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
bpf_program__set_expected_attach_type(prog, attach_type);
if (!env.quiet) {
- printf("Using guessed program type '%s' for %s/%s...\n",
+ fprintf(stderr, "Using guessed program type '%s' for %s/%s...\n",
libbpf_bpf_prog_type_str(prog_type),
filename, prog_name);
}
} else {
if (!env.quiet) {
- printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
+ fprintf(stderr, "Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
ctx_name, filename, prog_name);
}
}
@@ -1207,16 +1347,273 @@ static int max_verifier_log_size(void)
return log_size;
}
+static bool output_stat_enabled(int id)
+{
+ int i;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++)
+ if (env.output_spec.ids[i] == id)
+ return true;
+ return false;
+}
+
+__attribute__((format(printf, 2, 3)))
+static int write_one_line(const char *file, const char *fmt, ...)
+{
+ int err, saved_errno;
+ va_list ap;
+ FILE *f;
+
+ f = fopen(file, "w");
+ if (!f)
+ return -1;
+
+ va_start(ap, fmt);
+ errno = 0;
+ err = vfprintf(f, fmt, ap);
+ saved_errno = errno;
+ va_end(ap);
+ fclose(f);
+ errno = saved_errno;
+ return err < 0 ? -1 : 0;
+}
+
+__attribute__((format(scanf, 3, 4)))
+static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...)
+{
+ int res = 0, saved_errno = 0;
+ char *line = NULL;
+ size_t line_len;
+ va_list ap;
+ FILE *f;
+
+ f = fopen(file, "r");
+ if (!f)
+ return -1;
+
+ va_start(ap, fmt);
+ while (getline(&line, &line_len, f) > 0) {
+ res = vsscanf(line, fmt, ap);
+ if (res == fields_expected)
+ goto out;
+ }
+ if (ferror(f)) {
+ saved_errno = errno;
+ res = -1;
+ }
+
+out:
+ va_end(ap);
+ free(line);
+ fclose(f);
+ errno = saved_errno;
+ return res;
+}
+
+static void destroy_stat_cgroup(void)
+{
+ char buf[PATH_MAX];
+ int err;
+
+ close(env.memory_peak_fd);
+
+ if (env.orig_cgroup[0]) {
+ snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup);
+ err = write_one_line(buf, "%d\n", getpid());
+ if (err < 0)
+ log_errno("moving self to original cgroup %s\n", env.orig_cgroup);
+ }
+
+ if (env.stat_cgroup[0]) {
+ err = rmdir(env.stat_cgroup);
+ if (err < 0)
+ log_errno("deletion of cgroup %s", env.stat_cgroup);
+ }
+
+ env.memory_peak_fd = -1;
+ env.orig_cgroup[0] = 0;
+ env.stat_cgroup[0] = 0;
+}
+
+/*
+ * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-<pid>,
+ * moves current process to this cgroup.
+ */
+static void create_stat_cgroup(void)
+{
+ char cgroup_fs_mount[4096];
+ char buf[4096];
+ int err;
+
+ env.memory_peak_fd = -1;
+
+ if (!output_stat_enabled(MEMORY_PEAK))
+ return;
+
+ err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s",
+ cgroup_fs_mount, buf);
+ if (err != 2) {
+ if (err < 0)
+ log_errno("reading /proc/self/mounts");
+ else if (!env.quiet)
+ fprintf(stderr, "Can't find cgroupfs v2 mount point.\n");
+ goto err_out;
+ }
+
+ /* cgroup-v2.rst promises the line "0::<group>" for cgroups v2 */
+ err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf);
+ if (err != 1) {
+ if (err < 0)
+ log_errno("reading /proc/self/cgroup");
+ else if (!env.quiet)
+ fprintf(stderr, "Can't infer veristat process cgroup.");
+ goto err_out;
+ }
+
+ snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf);
+
+ snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid());
+ err = mkdir(buf, 0777);
+ if (err < 0) {
+ log_errno("creation of cgroup %s", buf);
+ goto err_out;
+ }
+ strcpy(env.stat_cgroup, buf);
+
+ snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup);
+ err = write_one_line(buf, "%d\n", getpid());
+ if (err < 0) {
+ log_errno("entering cgroup %s", buf);
+ goto err_out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup);
+ env.memory_peak_fd = open(buf, O_RDWR | O_APPEND);
+ if (env.memory_peak_fd < 0) {
+ log_errno("opening %s", buf);
+ goto err_out;
+ }
+
+ return;
+
+err_out:
+ if (!env.quiet)
+ fprintf(stderr, "Memory usage metric unavailable.\n");
+ destroy_stat_cgroup();
+}
+
+/* Current value of /sys/fs/cgroup/veristat-accounting-<pid>/memory.peak */
+static long cgroup_memory_peak(void)
+{
+ long err, memory_peak;
+ char buf[32];
+
+ if (env.memory_peak_fd < 0)
+ return -1;
+
+ err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0);
+ if (err <= 0) {
+ log_errno("pread(%s/memory.peak)", env.stat_cgroup);
+ return -1;
+ }
+
+ buf[err] = 0;
+ errno = 0;
+ memory_peak = strtoll(buf, NULL, 10);
+ if (errno) {
+ log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf);
+ return -1;
+ }
+
+ return memory_peak;
+}
+
+static int reset_stat_cgroup(void)
+{
+ char buf[] = "r\n";
+ int err;
+
+ if (env.memory_peak_fd < 0)
+ return -1;
+
+ err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0);
+ if (err <= 0) {
+ log_errno("pwrite(%s/memory.peak)", env.stat_cgroup);
+ return -1;
+ }
+ return 0;
+}
+
+static int parse_rvalue(const char *val, struct rvalue *rvalue)
+{
+ long long value;
+ char *val_end;
+
+ if (val[0] == '-' || isdigit(val[0])) {
+ /* must be a number */
+ errno = 0;
+ value = strtoll(val, &val_end, 0);
+ if (errno == ERANGE) {
+ errno = 0;
+ value = strtoull(val, &val_end, 0);
+ }
+ if (errno || *val_end != '\0') {
+ fprintf(stderr, "Failed to parse value '%s'\n", val);
+ return -EINVAL;
+ }
+ rvalue->ivalue = value;
+ rvalue->type = INTEGRAL;
+ } else {
+ /* if not a number, consider it enum value */
+ rvalue->svalue = strdup(val);
+ if (!rvalue->svalue)
+ return -ENOMEM;
+ rvalue->type = ENUMERATOR;
+ }
+ return 0;
+}
+
+static void dump(__u32 prog_id, enum dump_mode mode, const char *file_name, const char *prog_name)
+{
+ char command[64], buf[4096];
+ FILE *fp;
+ int status;
+
+ status = system("command -v bpftool > /dev/null 2>&1");
+ if (status != 0) {
+ fprintf(stderr, "bpftool is not available, can't print program dump\n");
+ return;
+ }
+ snprintf(command, sizeof(command), "bpftool prog dump %s id %u",
+ mode == DUMP_JITED ? "jited" : "xlated", prog_id);
+ fp = popen(command, "r");
+ if (!fp) {
+ fprintf(stderr, "bpftool failed with error: %d\n", errno);
+ return;
+ }
+
+ printf("DUMP (%s) %s/%s:\n", mode == DUMP_JITED ? "JITED" : "XLATED", file_name, prog_name);
+ while (fgets(buf, sizeof(buf), fp))
+ fputs(buf, stdout);
+ fprintf(stdout, "\n");
+
+ if (ferror(fp))
+ fprintf(stderr, "Failed to dump BPF prog with error: %d\n", errno);
+
+ pclose(fp);
+}
+
static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
{
const char *base_filename = basename(strdupa(filename));
const char *prog_name = bpf_program__name(prog);
+ long mem_peak_a, mem_peak_b, mem_peak = -1;
char *buf;
int buf_sz, log_level;
struct verif_stats *stats;
struct bpf_prog_info info;
__u32 info_len = sizeof(info);
- int err = 0;
+ int err = 0, cgroup_err;
void *tmp;
int fd;
@@ -1261,7 +1658,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
if (env.force_reg_invariants)
bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
- err = bpf_object__load(obj);
+ err = bpf_object__prepare(obj);
+ if (!err) {
+ cgroup_err = reset_stat_cgroup();
+ mem_peak_a = cgroup_memory_peak();
+ err = bpf_object__load(obj);
+ mem_peak_b = cgroup_memory_peak();
+ if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0)
+ mem_peak = mem_peak_b - mem_peak_a;
+ }
env.progs_processed++;
stats->file_name = strdup(base_filename);
@@ -1270,11 +1675,17 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
stats->stats[SIZE] = bpf_program__insn_cnt(prog);
stats->stats[PROG_TYPE] = bpf_program__type(prog);
stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
+ stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024);
memset(&info, 0, info_len);
fd = bpf_program__fd(prog);
- if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0)
+ if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) {
stats->stats[JITED_SIZE] = info.jited_prog_len;
+ if (env.dump_mode & DUMP_JITED)
+ dump(info.id, DUMP_JITED, base_filename, prog_name);
+ if (env.dump_mode & DUMP_XLATED)
+ dump(info.id, DUMP_XLATED, base_filename, prog_name);
+ }
parse_verif_log(buf, buf_sz, stats);
@@ -1290,7 +1701,483 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
free(buf);
return 0;
-};
+}
+
+static int append_preset_atom(struct var_preset *preset, char *value, bool is_index)
+{
+ struct field_access *tmp;
+ int i = preset->atom_count;
+ int err;
+
+ tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms));
+ if (!tmp)
+ return -ENOMEM;
+
+ preset->atoms = tmp;
+ preset->atom_count++;
+
+ if (is_index) {
+ preset->atoms[i].type = ARRAY_INDEX;
+ err = parse_rvalue(value, &preset->atoms[i].index);
+ if (err)
+ return err;
+ } else {
+ preset->atoms[i].type = FIELD_NAME;
+ preset->atoms[i].name = strdup(value);
+ if (!preset->atoms[i].name)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int parse_var_atoms(const char *full_var, struct var_preset *preset)
+{
+ char expr[256], var[256], *name, *saveptr;
+ int n, len, off, err;
+
+ snprintf(expr, sizeof(expr), "%s", full_var);
+ preset->atom_count = 0;
+ while ((name = strtok_r(preset->atom_count ? NULL : expr, ".", &saveptr))) {
+ len = strlen(name);
+ /* parse variable name */
+ if (sscanf(name, "%[a-zA-Z0-9_] %n", var, &off) != 1) {
+ fprintf(stderr, "Can't parse %s\n", name);
+ return -EINVAL;
+ }
+ err = append_preset_atom(preset, var, false);
+ if (err)
+ return err;
+
+ /* parse optional array indexes */
+ while (off < len) {
+ if (sscanf(name + off, " [ %[a-zA-Z0-9_] ] %n", var, &n) != 1) {
+ fprintf(stderr, "Can't parse %s as index\n", name + off);
+ return -EINVAL;
+ }
+ err = append_preset_atom(preset, var, true);
+ if (err)
+ return err;
+ off += n;
+ }
+ }
+ return 0;
+}
+
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr)
+{
+ void *tmp;
+ struct var_preset *cur;
+ char var[256], val[256];
+ int n, err;
+
+ tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets));
+ if (!tmp)
+ return -ENOMEM;
+ *presets = tmp;
+ cur = &(*presets)[*cnt];
+ memset(cur, 0, sizeof(*cur));
+ (*cnt)++;
+
+ if (sscanf(expr, " %[][a-zA-Z0-9_. ] = %s %n", var, val, &n) != 2 || n != strlen(expr)) {
+ fprintf(stderr, "Failed to parse expression '%s'\n", expr);
+ return -EINVAL;
+ }
+ /* Remove trailing spaces from var, as scanf may add those */
+ rtrim(var);
+
+ err = parse_rvalue(val, &cur->value);
+ if (err)
+ return err;
+
+ cur->full_name = strdup(var);
+ if (!cur->full_name)
+ return -ENOMEM;
+
+ err = parse_var_atoms(var, cur);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int append_var_preset_file(const char *filename)
+{
+ char buf[1024];
+ FILE *f;
+ int err = 0;
+
+ f = fopen(filename, "rt");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open presets in '%s': %s\n", filename, strerror(-err));
+ return -EINVAL;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+
+ err = append_var_preset(&env.presets, &env.npresets, buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+static bool is_signed_type(const struct btf_type *t)
+{
+ if (btf_is_int(t))
+ return btf_int_encoding(t) & BTF_INT_SIGNED;
+ if (btf_is_any_enum(t))
+ return btf_kflag(t);
+ return true;
+}
+
+static int enum_value_from_name(const struct btf *btf, const struct btf_type *t,
+ const char *evalue, long long *retval)
+{
+ if (btf_is_enum(t)) {
+ struct btf_enum *e = btf_enum(t);
+ int i, n = btf_vlen(t);
+
+ for (i = 0; i < n; ++i, ++e) {
+ const char *cur_name = btf__name_by_offset(btf, e->name_off);
+
+ if (strcmp(cur_name, evalue) == 0) {
+ *retval = e->val;
+ return 0;
+ }
+ }
+ } else if (btf_is_enum64(t)) {
+ struct btf_enum64 *e = btf_enum64(t);
+ int i, n = btf_vlen(t);
+
+ for (i = 0; i < n; ++i, ++e) {
+ const char *cur_name = btf__name_by_offset(btf, e->name_off);
+ __u64 value = btf_enum64_value(e);
+
+ if (strcmp(cur_name, evalue) == 0) {
+ *retval = value;
+ return 0;
+ }
+ }
+ }
+ return -EINVAL;
+}
+
+static bool is_preset_supported(const struct btf_type *t)
+{
+ return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t);
+}
+
+static int find_enum_value(const struct btf *btf, const char *name, long long *value)
+{
+ const struct btf_type *t;
+ int cnt, i;
+ long long lvalue;
+
+ cnt = btf__type_cnt(btf);
+ for (i = 1; i != cnt; ++i) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_any_enum(t))
+ continue;
+
+ if (enum_value_from_name(btf, t, name, &lvalue) == 0) {
+ *value = lvalue;
+ return 0;
+ }
+ }
+ return -ESRCH;
+}
+
+static int resolve_rvalue(struct btf *btf, const struct rvalue *rvalue, long long *result)
+{
+ int err = 0;
+
+ switch (rvalue->type) {
+ case INTEGRAL:
+ *result = rvalue->ivalue;
+ return 0;
+ case ENUMERATOR:
+ err = find_enum_value(btf, rvalue->svalue, result);
+ if (err) {
+ fprintf(stderr, "Can't resolve enum value %s\n", rvalue->svalue);
+ return err;
+ }
+ return 0;
+ default:
+ fprintf(stderr, "Unknown rvalue type\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int adjust_var_secinfo_array(struct btf *btf, int tid, struct field_access *atom,
+ const char *array_name, struct btf_var_secinfo *sinfo)
+{
+ const struct btf_type *t;
+ struct btf_array *barr;
+ long long idx;
+ int err;
+
+ tid = btf__resolve_type(btf, tid);
+ t = btf__type_by_id(btf, tid);
+ if (!btf_is_array(t)) {
+ fprintf(stderr, "Array index is not expected for %s\n",
+ array_name);
+ return -EINVAL;
+ }
+ barr = btf_array(t);
+ err = resolve_rvalue(btf, &atom->index, &idx);
+ if (err)
+ return err;
+ if (idx < 0 || idx >= barr->nelems) {
+ fprintf(stderr, "Array index %lld is out of bounds [0, %u): %s\n",
+ idx, barr->nelems, array_name);
+ return -EINVAL;
+ }
+ sinfo->size = btf__resolve_size(btf, barr->type);
+ sinfo->offset += sinfo->size * idx;
+ sinfo->type = btf__resolve_type(btf, barr->type);
+ return 0;
+}
+
+static int adjust_var_secinfo_member(const struct btf *btf,
+ const struct btf_type *parent_type,
+ __u32 parent_offset,
+ const char *member_name,
+ struct btf_var_secinfo *sinfo)
+{
+ int i;
+
+ if (!btf_is_composite(parent_type)) {
+ fprintf(stderr, "Can't resolve field %s for non-composite type\n", member_name);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < btf_vlen(parent_type); ++i) {
+ const struct btf_member *member;
+ const struct btf_type *member_type;
+ int tid, off;
+
+ member = btf_members(parent_type) + i;
+ tid = btf__resolve_type(btf, member->type);
+ if (tid < 0)
+ return -EINVAL;
+
+ member_type = btf__type_by_id(btf, tid);
+ off = parent_offset + member->offset;
+ if (member->name_off) {
+ const char *name = btf__name_by_offset(btf, member->name_off);
+
+ if (strcmp(member_name, name) == 0) {
+ if (btf_member_bitfield_size(parent_type, i) != 0) {
+ fprintf(stderr, "Bitfield presets are not supported %s\n",
+ name);
+ return -EINVAL;
+ }
+ sinfo->offset += off / 8;
+ sinfo->type = tid;
+ sinfo->size = member_type->size;
+ return 0;
+ }
+ } else if (btf_is_composite(member_type)) {
+ int err;
+
+ err = adjust_var_secinfo_member(btf, member_type, off,
+ member_name, sinfo);
+ if (!err)
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
+
+static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t,
+ struct btf_var_secinfo *sinfo, struct var_preset *preset)
+{
+ const struct btf_type *base_type;
+ const char *prev_name;
+ int err, i;
+ int tid;
+
+ assert(preset->atom_count > 0);
+ assert(preset->atoms[0].type == FIELD_NAME);
+
+ tid = btf__resolve_type(btf, t->type);
+ base_type = btf__type_by_id(btf, tid);
+ prev_name = preset->atoms[0].name;
+
+ for (i = 1; i < preset->atom_count; ++i) {
+ struct field_access *atom = preset->atoms + i;
+
+ switch (atom->type) {
+ case ARRAY_INDEX:
+ err = adjust_var_secinfo_array(btf, tid, atom, prev_name, sinfo);
+ break;
+ case FIELD_NAME:
+ err = adjust_var_secinfo_member(btf, base_type, 0, atom->name, sinfo);
+ if (err == -ESRCH)
+ fprintf(stderr, "Can't find '%s'\n", atom->name);
+ prev_name = atom->name;
+ break;
+ default:
+ fprintf(stderr, "Unknown field_access type\n");
+ return -EOPNOTSUPP;
+ }
+ if (err)
+ return err;
+ base_type = btf__type_by_id(btf, sinfo->type);
+ tid = sinfo->type;
+ }
+
+ return 0;
+}
+
+static int set_global_var(struct bpf_object *obj, struct btf *btf,
+ struct bpf_map *map, struct btf_var_secinfo *sinfo,
+ struct var_preset *preset)
+{
+ const struct btf_type *base_type;
+ void *ptr;
+ long long value = preset->value.ivalue;
+ size_t size;
+
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type));
+ if (!base_type) {
+ fprintf(stderr, "Failed to resolve type %d\n", sinfo->type);
+ return -EINVAL;
+ }
+ if (!is_preset_supported(base_type)) {
+ fprintf(stderr, "Can't set %s. Only ints and enums are supported\n",
+ preset->full_name);
+ return -EINVAL;
+ }
+
+ if (preset->value.type == ENUMERATOR) {
+ if (btf_is_any_enum(base_type)) {
+ if (enum_value_from_name(btf, base_type, preset->value.svalue, &value)) {
+ fprintf(stderr,
+ "Failed to find integer value for enum element %s\n",
+ preset->value.svalue);
+ return -EINVAL;
+ }
+ } else {
+ fprintf(stderr, "Value %s is not supported for type %s\n",
+ preset->value.svalue,
+ btf__name_by_offset(btf, base_type->name_off));
+ return -EINVAL;
+ }
+ }
+
+ /* Check if value fits into the target variable size */
+ if (sinfo->size < sizeof(value)) {
+ bool is_signed = is_signed_type(base_type);
+ __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0);
+ long long max_val = 1ll << unsigned_bits;
+
+ if (value >= max_val || value < -max_val) {
+ fprintf(stderr,
+ "Variable %s value %lld is out of range [%lld; %lld]\n",
+ btf__name_by_offset(btf, base_type->name_off), value,
+ is_signed ? -max_val : 0, max_val - 1);
+ return -EINVAL;
+ }
+ }
+
+ ptr = bpf_map__initial_value(map, &size);
+ if (!ptr || sinfo->offset + sinfo->size > size)
+ return -EINVAL;
+
+ if (__BYTE_ORDER == __LITTLE_ENDIAN) {
+ memcpy(ptr + sinfo->offset, &value, sinfo->size);
+ } else { /* __BYTE_ORDER == __BIG_ENDIAN */
+ __u8 src_offset = sizeof(value) - sinfo->size;
+
+ memcpy(ptr + sinfo->offset, (void *)&value + src_offset, sinfo->size);
+ }
+ return 0;
+}
+
+static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets)
+{
+ struct btf_var_secinfo *sinfo;
+ const char *sec_name;
+ const struct btf_type *t;
+ struct bpf_map *map;
+ struct btf *btf;
+ int i, j, k, n, cnt, err = 0;
+
+ if (npresets == 0)
+ return 0;
+
+ btf = bpf_object__btf(obj);
+ if (!btf)
+ return -EINVAL;
+
+ cnt = btf__type_cnt(btf);
+ for (i = 1; i != cnt; ++i) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ sinfo = btf_var_secinfos(t);
+ sec_name = btf__name_by_offset(btf, t->name_off);
+ map = bpf_object__find_map_by_name(obj, sec_name);
+ if (!map)
+ continue;
+
+ n = btf_vlen(t);
+ for (j = 0; j < n; ++j, ++sinfo) {
+ const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type);
+ const char *var_name;
+
+ if (!btf_is_var(var_type))
+ continue;
+
+ var_name = btf__name_by_offset(btf, var_type->name_off);
+
+ for (k = 0; k < npresets; ++k) {
+ struct btf_var_secinfo tmp_sinfo;
+
+ if (strcmp(var_name, presets[k].atoms[0].name) != 0)
+ continue;
+
+ if (presets[k].applied) {
+ fprintf(stderr, "Variable %s is set more than once",
+ var_name);
+ return -EINVAL;
+ }
+ tmp_sinfo = *sinfo;
+ err = adjust_var_secinfo(btf, var_type,
+ &tmp_sinfo, presets + k);
+ if (err)
+ return err;
+
+ err = set_global_var(obj, btf, map, &tmp_sinfo, presets + k);
+ if (err)
+ return err;
+
+ presets[k].applied = true;
+ }
+ }
+ }
+ for (i = 0; i < npresets; ++i) {
+ if (!presets[i].applied) {
+ fprintf(stderr, "Global variable preset %s has not been applied\n",
+ presets[i].full_name);
+ err = -EINVAL;
+ }
+ presets[i].applied = false;
+ }
+ return err;
+}
static int process_obj(const char *filename)
{
@@ -1341,6 +2228,11 @@ static int process_obj(const char *filename)
if (prog_cnt == 1) {
prog = bpf_object__next_program(obj, NULL);
bpf_program__set_autoload(prog, true);
+ err = set_global_vars(obj, env.presets, env.npresets);
+ if (err) {
+ fprintf(stderr, "Failed to set global variables %d\n", err);
+ goto cleanup;
+ }
process_prog(filename, obj, prog);
goto cleanup;
}
@@ -1355,6 +2247,12 @@ static int process_obj(const char *filename)
goto cleanup;
}
+ err = set_global_vars(tobj, env.presets, env.npresets);
+ if (err) {
+ fprintf(stderr, "Failed to set global variables %d\n", err);
+ goto cleanup;
+ }
+
lprog = NULL;
bpf_object__for_each_program(tprog, tobj) {
const char *tprog_name = bpf_program__name(tprog);
@@ -1400,6 +2298,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
case TOTAL_STATES:
case PEAK_STATES:
case MAX_STATES_PER_INSN:
+ case MEMORY_PEAK:
case MARK_READ_MAX_LEN: {
long v1 = s1->stats[id];
long v2 = s2->stats[id];
@@ -1629,6 +2528,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
case STACK:
case SIZE:
case JITED_SIZE:
+ case MEMORY_PEAK:
*val = s ? s->stats[id] : 0;
break;
default:
@@ -1715,6 +2615,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats
case MARK_READ_MAX_LEN:
case SIZE:
case JITED_SIZE:
+ case MEMORY_PEAK:
case STACK: {
long val;
int err, n;
@@ -2352,7 +3253,7 @@ static void output_prog_stats(void)
static int handle_verif_mode(void)
{
- int i, err;
+ int i, err = 0;
if (env.filename_cnt == 0) {
fprintf(stderr, "Please provide path to BPF object file!\n\n");
@@ -2360,11 +3261,12 @@ static int handle_verif_mode(void)
return -EINVAL;
}
+ create_stat_cgroup();
for (i = 0; i < env.filename_cnt; i++) {
err = process_obj(env.filenames[i]);
if (err) {
fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
- return err;
+ goto out;
}
}
@@ -2372,7 +3274,9 @@ static int handle_verif_mode(void)
output_prog_stats();
- return 0;
+out:
+ destroy_stat_cgroup();
+ return err;
}
static int handle_replay_mode(void)
@@ -2402,7 +3306,7 @@ static int handle_replay_mode(void)
int main(int argc, char **argv)
{
- int err = 0, i;
+ int err = 0, i, j;
if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
return 1;
@@ -2460,5 +3364,21 @@ int main(int argc, char **argv)
free(env.deny_filters[i].prog_glob);
}
free(env.deny_filters);
+ for (i = 0; i < env.npresets; ++i) {
+ free(env.presets[i].full_name);
+ for (j = 0; j < env.presets[i].atom_count; ++j) {
+ switch (env.presets[i].atoms[j].type) {
+ case FIELD_NAME:
+ free(env.presets[i].atoms[j].name);
+ break;
+ case ARRAY_INDEX:
+ if (env.presets[i].atoms[j].index.type == ENUMERATOR)
+ free(env.presets[i].atoms[j].index.svalue);
+ break;
+ }
+ }
+ free(env.presets[i].atoms);
+ }
+ free(env.presets);
return -err;
}
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 79505d294c44..2f869daf8a06 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -43,6 +43,15 @@ riscv64)
BZIMAGE="arch/riscv/boot/Image"
ARCH="riscv"
;;
+ppc64el)
+ QEMU_BINARY=qemu-system-ppc64
+ QEMU_CONSOLE="hvc0"
+ # KVM could not be tested for powerpc, therefore not enabled for now.
+ HOST_FLAGS=(-machine pseries -cpu POWER9)
+ CROSS_FLAGS=(-machine pseries -cpu POWER9)
+ BZIMAGE="vmlinux"
+ ARCH="powerpc"
+ ;;
*)
echo "Unsupported architecture"
exit 1
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
deleted file mode 100755
index ffcd3953f94c..000000000000
--- a/tools/testing/selftests/bpf/with_addr.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# add private ipv4 and ipv6 addresses to loopback
-
-readonly V6_INNER='100::a/128'
-readonly V4_INNER='192.168.0.1/32'
-
-if getopts ":s" opt; then
- readonly SIT_DEV_NAME='sixtofourtest0'
- readonly V6_SIT='2::/64'
- readonly V4_SIT='172.17.0.1/32'
- shift
-fi
-
-fail() {
- echo "error: $*" 1>&2
- exit 1
-}
-
-setup() {
- ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
- ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
-
- if [[ -n "${V6_SIT}" ]]; then
- ip link add "${SIT_DEV_NAME}" type sit remote any local any \
- || fail 'failed to add sit'
- ip link set dev "${SIT_DEV_NAME}" up \
- || fail 'failed to bring sit device up'
- ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
- || fail 'failed to setup v6 SIT address'
- ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
- || fail 'failed to setup v4 SIT address'
- fi
-
- sleep 2 # avoid race causing bind to fail
-}
-
-cleanup() {
- if [[ -n "${V6_SIT}" ]]; then
- ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
- ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
- ip link del "${SIT_DEV_NAME}"
- fi
-
- ip -4 addr del "${V4_INNER}" dev lo
- ip -6 addr del "${V6_INNER}" dev lo
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
deleted file mode 100755
index e24949ed3a20..000000000000
--- a/tools/testing/selftests/bpf/with_tunnels.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# setup tunnels for flow dissection test
-
-readonly SUFFIX="test_$(mktemp -u XXXX)"
-CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
-
-setup() {
- ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
- ip link add "gre_${SUFFIX}" type gre ${CONFIG}
- ip link add "sit_${SUFFIX}" type sit ${CONFIG}
-
- echo "tunnels before test:"
- ip tunnel show
-
- ip link set "ipip_${SUFFIX}" up
- ip link set "gre_${SUFFIX}" up
- ip link set "sit_${SUFFIX}" up
-}
-
-
-cleanup() {
- ip tunnel del "ipip_${SUFFIX}"
- ip tunnel del "gre_${SUFFIX}"
- ip tunnel del "sit_${SUFFIX}"
-
- echo "tunnels after test:"
- ip tunnel show
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 6f7b15d6c6ed..3d8de0d4c96a 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -13,6 +13,7 @@
* - UDP 9091 packets trigger TX reply
* - TX HW timestamp is requested and reported back upon completion
* - TX checksum is requested
+ * - TX launch time HW offload is requested for transmission
*/
#include <test_progs.h>
@@ -37,6 +38,15 @@
#include <time.h>
#include <unistd.h>
#include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/pkt_sched.h>
+#include <linux/pkt_cls.h>
+#include <linux/ethtool.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
#include "xdp_metadata.h"
@@ -64,6 +74,18 @@ int rxq;
bool skip_tx;
__u64 last_hw_rx_timestamp;
__u64 last_xdp_rx_timestamp;
+__u64 last_launch_time;
+__u64 launch_time_delta_to_hw_rx_timestamp;
+int launch_time_queue;
+
+#define run_command(cmd, ...) \
+({ \
+ char command[1024]; \
+ memset(command, 0, sizeof(command)); \
+ snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
+ fprintf(stderr, "Running: %s\n", command); \
+ system(command); \
+})
void test__fail(void) { /* for network_helpers.c */ }
@@ -298,6 +320,12 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
if (meta->completion.tx_timestamp) {
__u64 ref_tstamp = gettime(clock_id);
+ if (launch_time_delta_to_hw_rx_timestamp) {
+ print_tstamp_delta("HW Launch-time",
+ "HW TX-complete-time",
+ last_launch_time,
+ meta->completion.tx_timestamp);
+ }
print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
meta->completion.tx_timestamp, ref_tstamp);
print_tstamp_delta("XDP RX-time", "User TX-complete-time",
@@ -395,6 +423,17 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
xsk, ntohs(udph->check), ntohs(want_csum),
meta->request.csum_start, meta->request.csum_offset);
+ /* Set the value of launch time */
+ if (launch_time_delta_to_hw_rx_timestamp) {
+ meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
+ meta->request.launch_time = last_hw_rx_timestamp +
+ launch_time_delta_to_hw_rx_timestamp;
+ last_launch_time = meta->request.launch_time;
+ print_tstamp_delta("HW RX-time", "HW Launch-time",
+ last_hw_rx_timestamp,
+ meta->request.launch_time);
+ }
+
memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
tx_desc->options |= XDP_TX_METADATA;
tx_desc->len = len;
@@ -407,6 +446,7 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
const struct xdp_desc *rx_desc;
struct pollfd fds[rxq + 1];
__u64 comp_addr;
+ __u64 deadline;
__u64 addr;
__u32 idx = 0;
int ret;
@@ -477,9 +517,15 @@ peek:
if (ret)
printf("kick_tx ret=%d\n", ret);
- for (int j = 0; j < 500; j++) {
+ /* wait 1 second + cover launch time */
+ deadline = gettime(clock_id) +
+ NANOSEC_PER_SEC +
+ launch_time_delta_to_hw_rx_timestamp;
+ while (true) {
if (complete_tx(xsk, clock_id))
break;
+ if (gettime(clock_id) >= deadline)
+ break;
usleep(10);
}
}
@@ -608,6 +654,10 @@ static void print_usage(void)
" -h Display this help and exit\n\n"
" -m Enable multi-buffer XDP for larger MTU\n"
" -r Don't generate AF_XDP reply (rx metadata only)\n"
+ " -l Delta of launch time relative to HW RX-time in ns\n"
+ " default: 0 ns (launch time request is disabled)\n"
+ " -L Tx Queue to be enabled with launch time offload\n"
+ " default: 0 (Tx Queue 0)\n"
"Generate test packets on the other machine with:\n"
" echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
@@ -618,7 +668,7 @@ static void read_args(int argc, char *argv[])
{
int opt;
- while ((opt = getopt(argc, argv, "chmr")) != -1) {
+ while ((opt = getopt(argc, argv, "chmrl:L:")) != -1) {
switch (opt) {
case 'c':
bind_flags &= ~XDP_USE_NEED_WAKEUP;
@@ -634,6 +684,12 @@ static void read_args(int argc, char *argv[])
case 'r':
skip_tx = true;
break;
+ case 'l':
+ launch_time_delta_to_hw_rx_timestamp = atoll(optarg);
+ break;
+ case 'L':
+ launch_time_queue = atoll(optarg);
+ break;
case '?':
if (isprint(optopt))
fprintf(stderr, "Unknown option: -%c\n", optopt);
@@ -657,23 +713,118 @@ static void read_args(int argc, char *argv[])
error(-1, errno, "Invalid interface name");
}
+void clean_existing_configurations(void)
+{
+ /* Check and delete root qdisc if exists */
+ if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc mqprio 8001:'", ifname) == 0)
+ run_command("sudo tc qdisc del dev %s root", ifname);
+
+ /* Check and delete ingress qdisc if exists */
+ if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc ingress ffff:'", ifname) == 0)
+ run_command("sudo tc qdisc del dev %s ingress", ifname);
+
+ /* Check and delete ethtool filters if any exist */
+ if (run_command("sudo ethtool -n %s | grep -q 'Filter:'", ifname) == 0) {
+ run_command("sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 sudo ethtool -N %s delete >&2",
+ ifname, ifname);
+ }
+}
+
+#define MAX_TC 16
+
int main(int argc, char *argv[])
{
clockid_t clock_id = CLOCK_TAI;
+ struct bpf_program *prog;
int server_fd = -1;
+ size_t map_len = 0;
+ size_t que_len = 0;
+ char *buf = NULL;
+ char *map = NULL;
+ char *que = NULL;
+ char *tmp = NULL;
+ int tc = 0;
int ret;
int i;
- struct bpf_program *prog;
-
read_args(argc, argv);
rxq = rxq_num(ifname);
-
printf("rxq: %d\n", rxq);
+ if (launch_time_queue >= rxq || launch_time_queue < 0)
+ error(1, 0, "Invalid launch_time_queue.");
+
+ clean_existing_configurations();
+ sleep(1);
+
+ /* Enable tx and rx hardware timestamping */
hwtstamp_enable(ifname);
+ /* Prepare priority to traffic class map for tc-mqprio */
+ for (i = 0; i < MAX_TC; i++) {
+ if (i < rxq)
+ tc = i;
+
+ if (asprintf(&buf, "%d ", tc) == -1) {
+ printf("Failed to malloc buf for tc map.\n");
+ goto free_mem;
+ }
+
+ map_len += strlen(buf);
+ tmp = realloc(map, map_len + 1);
+ if (!tmp) {
+ printf("Failed to realloc tc map.\n");
+ goto free_mem;
+ }
+ map = tmp;
+ strcat(map, buf);
+ free(buf);
+ buf = NULL;
+ }
+
+ /* Prepare traffic class to hardware queue map for tc-mqprio */
+ for (i = 0; i <= tc; i++) {
+ if (asprintf(&buf, "1@%d ", i) == -1) {
+ printf("Failed to malloc buf for tc queues.\n");
+ goto free_mem;
+ }
+
+ que_len += strlen(buf);
+ tmp = realloc(que, que_len + 1);
+ if (!tmp) {
+ printf("Failed to realloc tc queues.\n");
+ goto free_mem;
+ }
+ que = tmp;
+ strcat(que, buf);
+ free(buf);
+ buf = NULL;
+ }
+
+ /* Add mqprio qdisc */
+ run_command("sudo tc qdisc add dev %s handle 8001: parent root mqprio num_tc %d map %squeues %shw 0",
+ ifname, tc + 1, map, que);
+
+ /* To test launch time, send UDP packet with VLAN priority 1 to port 9091 */
+ if (launch_time_delta_to_hw_rx_timestamp) {
+ /* Enable launch time hardware offload on launch_time_queue */
+ run_command("sudo tc qdisc replace dev %s parent 8001:%d etf offload clockid CLOCK_TAI delta 500000",
+ ifname, launch_time_queue + 1);
+ sleep(1);
+
+ /* Route incoming packet with VLAN priority 1 into launch_time_queue */
+ if (run_command("sudo ethtool -N %s flow-type ether vlan 0x2000 vlan-mask 0x1FFF action %d",
+ ifname, launch_time_queue)) {
+ run_command("sudo tc qdisc add dev %s ingress", ifname);
+ run_command("sudo tc filter add dev %s parent ffff: protocol 802.1Q flower vlan_prio 1 hw_tc %d",
+ ifname, launch_time_queue);
+ }
+
+ /* Enable VLAN tag stripping offload */
+ run_command("sudo ethtool -K %s rxvlan on", ifname);
+ }
+
rx_xsk = malloc(sizeof(struct xsk) * rxq);
if (!rx_xsk)
error(1, ENOMEM, "malloc");
@@ -733,4 +884,11 @@ int main(int argc, char *argv[])
cleanup();
if (ret)
error(1, -ret, "verify_metadata");
+
+ clean_existing_configurations();
+
+free_mem:
+ free(buf);
+ free(map);
+ free(que);
}
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
deleted file mode 100644
index c1fc44c87c30..000000000000
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ /dev/null
@@ -1,226 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <net/if.h>
-#include <unistd.h>
-#include <libgen.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include "bpf_util.h"
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#define MAX_IFACE_NUM 32
-#define MAX_INDEX_NUM 1024
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int ifaces[MAX_IFACE_NUM] = {};
-
-static void int_exit(int sig)
-{
- __u32 prog_id = 0;
- int i;
-
- for (i = 0; ifaces[i] > 0; i++) {
- if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
- printf("bpf_xdp_query_id failed\n");
- exit(1);
- }
- if (prog_id)
- bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
- }
-
- exit(0);
-}
-
-static int get_mac_addr(unsigned int ifindex, void *mac_addr)
-{
- char ifname[IF_NAMESIZE];
- struct ifreq ifr;
- int fd, ret = -1;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0)
- return ret;
-
- if (!if_indextoname(ifindex, ifname))
- goto err_out;
-
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
- goto err_out;
-
- memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
- ret = 0;
-
-err_out:
- close(fd);
- return ret;
-}
-
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n"
- " -X load xdp program on egress\n",
- prog);
-}
-
-int main(int argc, char **argv)
-{
- int prog_fd, group_all, mac_map;
- struct bpf_program *ingress_prog, *egress_prog;
- int i, err, ret, opt, egress_prog_fd = 0;
- struct bpf_devmap_val devmap_val;
- bool attach_egress_prog = false;
- unsigned char mac_addr[6];
- char ifname[IF_NAMESIZE];
- struct bpf_object *obj;
- unsigned int ifindex;
- char filename[256];
-
- while ((opt = getopt(argc, argv, "SNFX")) != -1) {
- switch (opt) {
- case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
- break;
- case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
- break;
- case 'X':
- attach_egress_prog = true;
- break;
- default:
- usage(basename(argv[0]));
- return 1;
- }
- }
-
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
- xdp_flags |= XDP_FLAGS_DRV_MODE;
- } else if (attach_egress_prog) {
- printf("Load xdp program on egress with SKB mode not supported yet\n");
- goto err_out;
- }
-
- if (optind == argc) {
- printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
- goto err_out;
- }
-
- printf("Get interfaces:");
- for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
- ifaces[i] = if_nametoindex(argv[optind + i]);
- if (!ifaces[i])
- ifaces[i] = strtoul(argv[optind + i], NULL, 0);
- if (!if_indextoname(ifaces[i], ifname)) {
- perror("Invalid interface name or i");
- goto err_out;
- }
- if (ifaces[i] > MAX_INDEX_NUM) {
- printf(" interface index too large\n");
- goto err_out;
- }
- printf(" %d", ifaces[i]);
- }
- printf("\n");
-
- snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]);
- obj = bpf_object__open_file(filename, NULL);
- err = libbpf_get_error(obj);
- if (err)
- goto err_out;
- err = bpf_object__load(obj);
- if (err)
- goto err_out;
- prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL));
-
- if (attach_egress_prog)
- group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
- else
- group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
- mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
-
- if (group_all < 0 || mac_map < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- goto err_out;
- }
-
- if (attach_egress_prog) {
- /* Find ingress/egress prog for 2nd xdp prog */
- ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
- egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
- if (!ingress_prog || !egress_prog) {
- printf("finding ingress/egress_prog in obj file failed\n");
- goto err_out;
- }
- prog_fd = bpf_program__fd(ingress_prog);
- egress_prog_fd = bpf_program__fd(egress_prog);
- if (prog_fd < 0 || egress_prog_fd < 0) {
- printf("find egress_prog fd failed\n");
- goto err_out;
- }
- }
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- /* Init forward multicast groups and exclude group */
- for (i = 0; ifaces[i] > 0; i++) {
- ifindex = ifaces[i];
-
- if (attach_egress_prog) {
- ret = get_mac_addr(ifindex, mac_addr);
- if (ret < 0) {
- printf("get interface %d mac failed\n", ifindex);
- goto err_out;
- }
- ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
- if (ret) {
- perror("bpf_update_elem mac_map failed\n");
- goto err_out;
- }
- }
-
- /* Add all the interfaces to group all */
- devmap_val.ifindex = ifindex;
- devmap_val.bpf_prog.fd = egress_prog_fd;
- ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
- if (ret) {
- perror("bpf_map_update_elem");
- goto err_out;
- }
-
- /* bind prog_fd to each interface */
- ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
- if (ret) {
- printf("Set xdp fd failed on %d\n", ifindex);
- goto err_out;
- }
- }
-
- /* sleep some time for testing */
- sleep(999);
-
- return 0;
-
-err_out:
- return 1;
-}
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 1503a1d2faa0..9ed8c796645d 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -155,7 +155,7 @@ int main(int argc, char **argv)
}
if (!server) {
- /* Only supports IPv4; see hints initiailization above. */
+ /* Only supports IPv4; see hints initialization above. */
if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) {
fprintf(stderr, "Could not resolve %s\n", argv[optind]);
return 1;
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 93c2cc413cfc..48729da142c2 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -93,8 +93,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
/* Refresh the local tail pointer.
* cached_cons is r->size bigger than the real consumer pointer so
* that this addition can be avoided in the more frequently
- * executed code that computs free_entries in the beginning of
- * this function. Without this optimization it whould have been
+ * executed code that computes free_entries in the beginning of
+ * this function. Without this optimization it would have been
* free_entries = r->cached_prod - r->cached_cons + r->size.
*/
r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
index 5a6f36f07383..45810ff552da 100644
--- a/tools/testing/selftests/bpf/xsk_xdp_common.h
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -4,6 +4,7 @@
#define XSK_XDP_COMMON_H_
#define MAX_SOCKETS 2
+#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
struct xdp_info {
__u64 count;
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 11f047b8af75..05b3cebc5ca9 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -74,42 +74,33 @@
#define _GNU_SOURCE
#include <assert.h>
#include <fcntl.h>
-#include <errno.h>
#include <getopt.h>
#include <linux/if_link.h>
#include <linux/if_ether.h>
#include <linux/mman.h>
#include <linux/netdev.h>
-#include <linux/bitmap.h>
#include <linux/ethtool.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <locale.h>
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <libgen.h>
-#include <string.h>
#include <stddef.h>
#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/time.h>
#include <sys/types.h>
-#include <unistd.h>
+#include "prog_tests/test_xsk.h"
#include "xsk_xdp_progs.skel.h"
#include "xsk.h"
#include "xskxceiver.h"
#include <bpf/bpf.h>
#include <linux/filter.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "xsk_xdp_common.h"
#include <network_helpers.h>
-static bool opt_verbose;
static bool opt_print_tests;
static enum test_mode opt_mode = TEST_MODE_ALL;
static u32 opt_run_test = RUN_ALL_TESTS;
@@ -118,169 +109,12 @@ void test__fail(void) { /* for network_helpers.c */ }
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
- ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
- strerror(error));
+ ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line,
+ error, strerror(error));
ksft_exit_xfail();
}
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
-#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
-static char *mode_string(struct test_spec *test)
-{
- switch (test->mode) {
- case TEST_MODE_SKB:
- return "SKB";
- case TEST_MODE_DRV:
- return "DRV";
- case TEST_MODE_ZC:
- return "ZC";
- default:
- return "BOGUS";
- }
-}
-
-static void report_failure(struct test_spec *test)
-{
- if (test->fail)
- return;
-
- ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
- test->name);
- test->fail = true;
-}
-
-/* The payload is a word consisting of a packet sequence number in the upper
- * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
- * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
- */
-static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
-{
- u32 *ptr = (u32 *)dest, i;
-
- start /= sizeof(*ptr);
- size /= sizeof(*ptr);
- for (i = 0; i < size; i++)
- ptr[i] = htonl(pkt_nb << 16 | (i + start));
-}
-
-static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
-{
- memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
- memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
- eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
-}
-
-static bool is_umem_valid(struct ifobject *ifobj)
-{
- return !!ifobj->umem->umem;
-}
-
-static u32 mode_to_xdp_flags(enum test_mode mode)
-{
- return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
-}
-
-static u64 umem_size(struct xsk_umem_info *umem)
-{
- return umem->num_frames * umem->frame_size;
-}
-
-static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
- u64 size)
-{
- struct xsk_umem_config cfg = {
- .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
- .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
- .frame_size = umem->frame_size,
- .frame_headroom = umem->frame_headroom,
- .flags = XSK_UMEM__DEFAULT_FLAGS
- };
- int ret;
-
- if (umem->fill_size)
- cfg.fill_size = umem->fill_size;
-
- if (umem->comp_size)
- cfg.comp_size = umem->comp_size;
-
- if (umem->unaligned_mode)
- cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
-
- ret = xsk_umem__create(&umem->umem, buffer, size,
- &umem->fq, &umem->cq, &cfg);
- if (ret)
- return ret;
-
- umem->buffer = buffer;
- if (ifobj->shared_umem && ifobj->rx_on) {
- umem->base_addr = umem_size(umem);
- umem->next_buffer = umem_size(umem);
- }
-
- return 0;
-}
-
-static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
-{
- u64 addr;
-
- addr = umem->next_buffer;
- umem->next_buffer += umem->frame_size;
- if (umem->next_buffer >= umem->base_addr + umem_size(umem))
- umem->next_buffer = umem->base_addr;
-
- return addr;
-}
-
-static void umem_reset_alloc(struct xsk_umem_info *umem)
-{
- umem->next_buffer = 0;
-}
-
-static void enable_busy_poll(struct xsk_socket_info *xsk)
-{
- int sock_opt;
-
- sock_opt = 1;
- if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
- (void *)&sock_opt, sizeof(sock_opt)) < 0)
- exit_with_error(errno);
-
- sock_opt = 20;
- if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
- (void *)&sock_opt, sizeof(sock_opt)) < 0)
- exit_with_error(errno);
-
- sock_opt = xsk->batch_size;
- if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
- (void *)&sock_opt, sizeof(sock_opt)) < 0)
- exit_with_error(errno);
-}
-
-static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
- struct ifobject *ifobject, bool shared)
-{
- struct xsk_socket_config cfg = {};
- struct xsk_ring_cons *rxr;
- struct xsk_ring_prod *txr;
-
- xsk->umem = umem;
- cfg.rx_size = xsk->rxqsize;
- cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- cfg.bind_flags = ifobject->bind_flags;
- if (shared)
- cfg.bind_flags |= XDP_SHARED_UMEM;
- if (ifobject->mtu > MAX_ETH_PKT_SIZE)
- cfg.bind_flags |= XDP_USE_SG;
- if (umem->comp_size)
- cfg.tx_size = umem->comp_size;
- if (umem->fill_size)
- cfg.rx_size = umem->fill_size;
-
- txr = ifobject->tx_on ? &xsk->tx : NULL;
- rxr = ifobject->rx_on ? &xsk->rx : NULL;
- return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
-}
static bool ifobj_zc_avail(struct ifobject *ifobject)
{
@@ -312,7 +146,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
ifobject->rx_on = true;
xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
- ret = __xsk_configure_socket(xsk, umem, ifobject, false);
+ ret = xsk_configure_socket(xsk, umem, ifobject, false);
if (!ret)
zc_avail = true;
@@ -325,25 +159,6 @@ out:
return zc_avail;
}
-#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
-static unsigned int get_max_skb_frags(void)
-{
- unsigned int max_skb_frags = 0;
- FILE *file;
-
- file = fopen(MAX_SKB_FRAGS_PATH, "r");
- if (!file) {
- ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
- return 0;
- }
-
- if (fscanf(file, "%u", &max_skb_frags) != 1)
- ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
-
- fclose(file);
- return max_skb_frags;
-}
-
static struct option long_options[] = {
{"interface", required_argument, 0, 'i'},
{"busy-poll", no_argument, 0, 'b'},
@@ -444,2119 +259,36 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
}
}
-static int set_ring_size(struct ifobject *ifobj)
-{
- int ret;
- u32 ctr = 0;
-
- while (ctr++ < SOCK_RECONF_CTR) {
- ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
- if (!ret)
- break;
-
- /* Retry if it fails */
- if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
- return -errno;
-
- usleep(USLEEP_MAX);
- }
-
- return ret;
-}
-
-static int hw_ring_size_reset(struct ifobject *ifobj)
-{
- ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
- ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
- return set_ring_size(ifobj);
-}
-
-static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
- struct ifobject *ifobj_rx)
-{
- u32 i, j;
-
- for (i = 0; i < MAX_INTERFACES; i++) {
- struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
-
- ifobj->xsk = &ifobj->xsk_arr[0];
- ifobj->use_poll = false;
- ifobj->use_fill_ring = true;
- ifobj->release_rx = true;
- ifobj->validation_func = NULL;
- ifobj->use_metadata = false;
-
- if (i == 0) {
- ifobj->rx_on = false;
- ifobj->tx_on = true;
- } else {
- ifobj->rx_on = true;
- ifobj->tx_on = false;
- }
-
- memset(ifobj->umem, 0, sizeof(*ifobj->umem));
- ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
- ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-
- for (j = 0; j < MAX_SOCKETS; j++) {
- memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
- ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
- ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
- if (i == 0)
- ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
- else
- ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
-
- memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
- memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
- ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
- ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
- }
- }
-
- if (ifobj_tx->hw_ring_size_supp)
- hw_ring_size_reset(ifobj_tx);
-
- test->ifobj_tx = ifobj_tx;
- test->ifobj_rx = ifobj_rx;
- test->current_step = 0;
- test->total_steps = 1;
- test->nb_sockets = 1;
- test->fail = false;
- test->set_ring = false;
- test->mtu = MAX_ETH_PKT_SIZE;
- test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
- test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
- test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
- test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
-}
-
-static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
- struct ifobject *ifobj_rx, enum test_mode mode,
- const struct test_spec *test_to_run)
-{
- struct pkt_stream *tx_pkt_stream;
- struct pkt_stream *rx_pkt_stream;
- u32 i;
-
- tx_pkt_stream = test->tx_pkt_stream_default;
- rx_pkt_stream = test->rx_pkt_stream_default;
- memset(test, 0, sizeof(*test));
- test->tx_pkt_stream_default = tx_pkt_stream;
- test->rx_pkt_stream_default = rx_pkt_stream;
-
- for (i = 0; i < MAX_INTERFACES; i++) {
- struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
-
- ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
- if (mode == TEST_MODE_ZC)
- ifobj->bind_flags |= XDP_ZEROCOPY;
- else
- ifobj->bind_flags |= XDP_COPY;
- }
-
- strncpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
- test->test_func = test_to_run->test_func;
- test->mode = mode;
- __test_spec_init(test, ifobj_tx, ifobj_rx);
-}
-
-static void test_spec_reset(struct test_spec *test)
-{
- __test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
-}
-
-static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
- struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
- struct bpf_map *xskmap_tx)
-{
- test->xdp_prog_rx = xdp_prog_rx;
- test->xdp_prog_tx = xdp_prog_tx;
- test->xskmap_rx = xskmap_rx;
- test->xskmap_tx = xskmap_tx;
-}
-
-static int test_spec_set_mtu(struct test_spec *test, int mtu)
-{
- int err;
-
- if (test->ifobj_rx->mtu != mtu) {
- err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
- if (err)
- return err;
- test->ifobj_rx->mtu = mtu;
- }
- if (test->ifobj_tx->mtu != mtu) {
- err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
- if (err)
- return err;
- test->ifobj_tx->mtu = mtu;
- }
-
- return 0;
-}
-
-static void pkt_stream_reset(struct pkt_stream *pkt_stream)
-{
- if (pkt_stream) {
- pkt_stream->current_pkt_nb = 0;
- pkt_stream->nb_rx_pkts = 0;
- }
-}
-
-static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
-{
- if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
- return NULL;
-
- return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
-}
-
-static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
-{
- while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
- (*pkts_sent)++;
- if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
- return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
- pkt_stream->current_pkt_nb++;
- }
- return NULL;
-}
-
-static void pkt_stream_delete(struct pkt_stream *pkt_stream)
-{
- free(pkt_stream->pkts);
- free(pkt_stream);
-}
-
-static void pkt_stream_restore_default(struct test_spec *test)
-{
- struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
- struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
-
- if (tx_pkt_stream != test->tx_pkt_stream_default) {
- pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
- test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
- }
-
- if (rx_pkt_stream != test->rx_pkt_stream_default) {
- pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
- test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
- }
-}
-
-static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
-{
- struct pkt_stream *pkt_stream;
-
- pkt_stream = calloc(1, sizeof(*pkt_stream));
- if (!pkt_stream)
- return NULL;
-
- pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
- if (!pkt_stream->pkts) {
- free(pkt_stream);
- return NULL;
- }
-
- pkt_stream->nb_pkts = nb_pkts;
- return pkt_stream;
-}
-
-static bool pkt_continues(u32 options)
-{
- return options & XDP_PKT_CONTD;
-}
-
-static u32 ceil_u32(u32 a, u32 b)
-{
- return (a + b - 1) / b;
-}
-
-static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
-{
- u32 nb_frags = 1, next_frag;
-
- if (!pkt)
- return 1;
-
- if (!pkt_stream->verbatim) {
- if (!pkt->valid || !pkt->len)
- return 1;
- return ceil_u32(pkt->len, frame_size);
- }
-
- /* Search for the end of the packet in verbatim mode */
- if (!pkt_continues(pkt->options))
- return nb_frags;
-
- next_frag = pkt_stream->current_pkt_nb;
- pkt++;
- while (next_frag++ < pkt_stream->nb_pkts) {
- nb_frags++;
- if (!pkt_continues(pkt->options) || !pkt->valid)
- break;
- pkt++;
- }
- return nb_frags;
-}
-
-static bool set_pkt_valid(int offset, u32 len)
-{
- return len <= MAX_ETH_JUMBO_SIZE;
-}
-
-static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
-{
- pkt->offset = offset;
- pkt->len = len;
- pkt->valid = set_pkt_valid(offset, len);
-}
-
-static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
-{
- bool prev_pkt_valid = pkt->valid;
-
- pkt_set(pkt_stream, pkt, offset, len);
- pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
-}
-
-static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
-{
- return ceil_u32(len, umem->frame_size) * umem->frame_size;
-}
-
-static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
-{
- struct pkt_stream *pkt_stream;
- u32 i;
-
- pkt_stream = __pkt_stream_alloc(nb_pkts);
- if (!pkt_stream)
- exit_with_error(ENOMEM);
-
- pkt_stream->nb_pkts = nb_pkts;
- pkt_stream->max_pkt_len = pkt_len;
- for (i = 0; i < nb_pkts; i++) {
- struct pkt *pkt = &pkt_stream->pkts[i];
-
- pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
- pkt->pkt_nb = nb_start + i * nb_off;
- }
-
- return pkt_stream;
-}
-
-static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
-{
- return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
-}
-
-static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
-{
- return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
-}
-
-static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
-{
- struct pkt_stream *pkt_stream;
-
- pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
- test->ifobj_tx->xsk->pkt_stream = pkt_stream;
- pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream;
-}
-
-static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
- int offset)
-{
- struct pkt_stream *pkt_stream;
- u32 i;
-
- pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
- for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
- pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
-
- ifobj->xsk->pkt_stream = pkt_stream;
-}
-
-static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
-{
- __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
- __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
-}
-
-static void pkt_stream_receive_half(struct test_spec *test)
-{
- struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
- u32 i;
-
- test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
- pkt_stream->pkts[0].len);
- pkt_stream = test->ifobj_rx->xsk->pkt_stream;
- for (i = 1; i < pkt_stream->nb_pkts; i += 2)
- pkt_stream->pkts[i].valid = false;
-
- pkt_stream->nb_valid_entries /= 2;
-}
-
-static void pkt_stream_even_odd_sequence(struct test_spec *test)
-{
- struct pkt_stream *pkt_stream;
- u32 i;
-
- for (i = 0; i < test->nb_sockets; i++) {
- pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
- pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
- pkt_stream->pkts[0].len, i, 2);
- test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
-
- pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
- pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
- pkt_stream->pkts[0].len, i, 2);
- test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
- }
-}
-
-static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
-{
- if (!pkt->valid)
- return pkt->offset;
- return pkt->offset + umem_alloc_buffer(umem);
-}
-
-static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
-{
- pkt_stream->current_pkt_nb--;
-}
-
-static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
- u32 pkt_nb, u32 bytes_written)
-{
- void *data = xsk_umem__get_data(umem->buffer, addr);
-
- if (len < MIN_PKT_SIZE)
- return;
-
- if (!bytes_written) {
- gen_eth_hdr(xsk, data);
-
- len -= PKT_HDR_SIZE;
- data += PKT_HDR_SIZE;
- } else {
- bytes_written -= PKT_HDR_SIZE;
- }
-
- write_payload(data, pkt_nb, bytes_written, len);
-}
-
-static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
- u32 nb_frames, bool verbatim)
-{
- u32 i, len = 0, pkt_nb = 0, payload = 0;
- struct pkt_stream *pkt_stream;
-
- pkt_stream = __pkt_stream_alloc(nb_frames);
- if (!pkt_stream)
- exit_with_error(ENOMEM);
-
- for (i = 0; i < nb_frames; i++) {
- struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
- struct pkt *frame = &frames[i];
-
- pkt->offset = frame->offset;
- if (verbatim) {
- *pkt = *frame;
- pkt->pkt_nb = payload;
- if (!frame->valid || !pkt_continues(frame->options))
- payload++;
- } else {
- if (frame->valid)
- len += frame->len;
- if (frame->valid && pkt_continues(frame->options))
- continue;
-
- pkt->pkt_nb = pkt_nb;
- pkt->len = len;
- pkt->valid = frame->valid;
- pkt->options = 0;
-
- len = 0;
- }
-
- print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
- pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
-
- if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
- pkt_stream->max_pkt_len = pkt->len;
-
- if (pkt->valid)
- pkt_stream->nb_valid_entries++;
-
- pkt_nb++;
- }
-
- pkt_stream->nb_pkts = pkt_nb;
- pkt_stream->verbatim = verbatim;
- return pkt_stream;
-}
-
-static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
-{
- struct pkt_stream *pkt_stream;
-
- pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
- test->ifobj_tx->xsk->pkt_stream = pkt_stream;
-
- pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream;
-}
-
-static void pkt_print_data(u32 *data, u32 cnt)
-{
- u32 i;
-
- for (i = 0; i < cnt; i++) {
- u32 seqnum, pkt_nb;
-
- seqnum = ntohl(*data) & 0xffff;
- pkt_nb = ntohl(*data) >> 16;
- ksft_print_msg("%u:%u ", pkt_nb, seqnum);
- data++;
- }
-}
-
-static void pkt_dump(void *pkt, u32 len, bool eth_header)
-{
- struct ethhdr *ethhdr = pkt;
- u32 i, *data;
-
- if (eth_header) {
- /*extract L2 frame */
- ksft_print_msg("DEBUG>> L2: dst mac: ");
- for (i = 0; i < ETH_ALEN; i++)
- ksft_print_msg("%02X", ethhdr->h_dest[i]);
-
- ksft_print_msg("\nDEBUG>> L2: src mac: ");
- for (i = 0; i < ETH_ALEN; i++)
- ksft_print_msg("%02X", ethhdr->h_source[i]);
-
- data = pkt + PKT_HDR_SIZE;
- } else {
- data = pkt;
- }
-
- /*extract L5 frame */
- ksft_print_msg("\nDEBUG>> L5: seqnum: ");
- pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
- ksft_print_msg("....");
- if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
- ksft_print_msg("\n.... ");
- pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
- PKT_DUMP_NB_TO_PRINT);
- }
- ksft_print_msg("\n---------------------------------------\n");
-}
-
-static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
-{
- u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
- u32 offset = addr % umem->frame_size, expected_offset;
- int pkt_offset = pkt->valid ? pkt->offset : 0;
-
- if (!umem->unaligned_mode)
- pkt_offset = 0;
-
- expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
-
- if (offset == expected_offset)
- return true;
-
- ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
- return false;
-}
-
-static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
-{
- void *data = xsk_umem__get_data(buffer, addr);
- struct xdp_info *meta = data - sizeof(struct xdp_info);
-
- if (meta->count != pkt->pkt_nb) {
- ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
- __func__, pkt->pkt_nb,
- (unsigned long long)meta->count);
- return false;
- }
-
- return true;
-}
-
-static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
- u32 bytes_processed)
-{
- u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
- void *data = xsk_umem__get_data(umem->buffer, addr);
-
- addr -= umem->base_addr;
-
- if (addr >= umem->num_frames * umem->frame_size ||
- addr + len > umem->num_frames * umem->frame_size) {
- ksft_print_msg("Frag invalid addr: %llx len: %u\n",
- (unsigned long long)addr, len);
- return false;
- }
- if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
- ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
- (unsigned long long)addr, len);
- return false;
- }
-
- pkt_data = data;
- if (!bytes_processed) {
- pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
- len -= PKT_HDR_SIZE;
- } else {
- bytes_processed -= PKT_HDR_SIZE;
- }
-
- expected_seqnum = bytes_processed / sizeof(*pkt_data);
- seqnum = ntohl(*pkt_data) & 0xffff;
- pkt_nb = ntohl(*pkt_data) >> 16;
-
- if (expected_pkt_nb != pkt_nb) {
- ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
- __func__, expected_pkt_nb, pkt_nb);
- goto error;
- }
- if (expected_seqnum != seqnum) {
- ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
- __func__, expected_seqnum, seqnum);
- goto error;
- }
-
- words_to_end = len / sizeof(*pkt_data) - 1;
- pkt_data += words_to_end;
- seqnum = ntohl(*pkt_data) & 0xffff;
- expected_seqnum += words_to_end;
- if (expected_seqnum != seqnum) {
- ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
- __func__, expected_seqnum, seqnum);
- goto error;
- }
-
- return true;
-
-error:
- pkt_dump(data, len, !bytes_processed);
- return false;
-}
-
-static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
-{
- if (pkt->len != len) {
- ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
- __func__, pkt->len, len);
- pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
- return false;
- }
-
- return true;
-}
-
-static int kick_tx(struct xsk_socket_info *xsk)
-{
- int ret;
-
- ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
- if (ret >= 0)
- return TEST_PASS;
- if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
- usleep(100);
- return TEST_PASS;
- }
- return TEST_FAILURE;
-}
-
-static int kick_rx(struct xsk_socket_info *xsk)
-{
- int ret;
-
- ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
- if (ret < 0)
- return TEST_FAILURE;
-
- return TEST_PASS;
-}
-
-static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
-{
- unsigned int rcvd;
- u32 idx;
- int ret;
-
- if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
- ret = kick_tx(xsk);
- if (ret)
- return TEST_FAILURE;
- }
-
- rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
- if (rcvd) {
- if (rcvd > xsk->outstanding_tx) {
- u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
-
- ksft_print_msg("[%s] Too many packets completed\n", __func__);
- ksft_print_msg("Last completion address: %llx\n",
- (unsigned long long)addr);
- return TEST_FAILURE;
- }
-
- xsk_ring_cons__release(&xsk->umem->cq, rcvd);
- xsk->outstanding_tx -= rcvd;
- }
-
- return TEST_PASS;
-}
-
-static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
-{
- u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
- u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
- struct pkt_stream *pkt_stream = xsk->pkt_stream;
- struct ifobject *ifobj = test->ifobj_rx;
- struct xsk_umem_info *umem = xsk->umem;
- struct pollfd fds = { };
- struct pkt *pkt;
- u64 first_addr = 0;
- int ret;
-
- fds.fd = xsk_socket__fd(xsk->xsk);
- fds.events = POLLIN;
-
- ret = kick_rx(xsk);
- if (ret)
- return TEST_FAILURE;
-
- if (ifobj->use_poll) {
- ret = poll(&fds, 1, POLL_TMOUT);
- if (ret < 0)
- return TEST_FAILURE;
-
- if (!ret) {
- if (!is_umem_valid(test->ifobj_tx))
- return TEST_PASS;
-
- ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
- return TEST_CONTINUE;
- }
-
- if (!(fds.revents & POLLIN))
- return TEST_CONTINUE;
- }
-
- rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
- if (!rcvd)
- return TEST_CONTINUE;
-
- if (ifobj->use_fill_ring) {
- ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
- if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
- ret = poll(&fds, 1, POLL_TMOUT);
- if (ret < 0)
- return TEST_FAILURE;
- }
- ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
- }
- }
-
- while (frags_processed < rcvd) {
- const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
- u64 addr = desc->addr, orig;
-
- orig = xsk_umem__extract_addr(addr);
- addr = xsk_umem__add_offset_to_addr(addr);
-
- if (!nb_frags) {
- pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
- if (!pkt) {
- ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
- __func__, addr, desc->len);
- return TEST_FAILURE;
- }
- }
-
- print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
- addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
-
- if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
- !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
- !is_metadata_correct(pkt, umem->buffer, addr)))
- return TEST_FAILURE;
-
- if (!nb_frags++)
- first_addr = addr;
- frags_processed++;
- pkt_len += desc->len;
- if (ifobj->use_fill_ring)
- *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
-
- if (pkt_continues(desc->options))
- continue;
-
- /* The complete packet has been received */
- if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
- !is_offset_correct(umem, pkt, addr))
- return TEST_FAILURE;
-
- pkt_stream->nb_rx_pkts++;
- nb_frags = 0;
- pkt_len = 0;
- }
-
- if (nb_frags) {
- /* In the middle of a packet. Start over from beginning of packet. */
- idx_rx -= nb_frags;
- xsk_ring_cons__cancel(&xsk->rx, nb_frags);
- if (ifobj->use_fill_ring) {
- idx_fq -= nb_frags;
- xsk_ring_prod__cancel(&umem->fq, nb_frags);
- }
- frags_processed -= nb_frags;
- }
-
- if (ifobj->use_fill_ring)
- xsk_ring_prod__submit(&umem->fq, frags_processed);
- if (ifobj->release_rx)
- xsk_ring_cons__release(&xsk->rx, frags_processed);
-
- pthread_mutex_lock(&pacing_mutex);
- pkts_in_flight -= pkts_sent;
- pthread_mutex_unlock(&pacing_mutex);
- pkts_sent = 0;
-
-return TEST_CONTINUE;
-}
-
-bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
- unsigned long *bitmap)
-{
- struct pkt_stream *pkt_stream = xsk->pkt_stream;
-
- if (!pkt_stream) {
- __set_bit(sock_num, bitmap);
- return false;
- }
-
- if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
- __set_bit(sock_num, bitmap);
- if (bitmap_full(bitmap, test->nb_sockets))
- return true;
- }
-
- return false;
-}
-
-static int receive_pkts(struct test_spec *test)
-{
- struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
- DECLARE_BITMAP(bitmap, test->nb_sockets);
- struct xsk_socket_info *xsk;
- u32 sock_num = 0;
- int res, ret;
-
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
-
- timeradd(&tv_now, &tv_timeout, &tv_end);
-
- while (1) {
- xsk = &test->ifobj_rx->xsk_arr[sock_num];
-
- if ((all_packets_received(test, xsk, sock_num, bitmap)))
- break;
-
- res = __receive_pkts(test, xsk);
- if (!(res == TEST_PASS || res == TEST_CONTINUE))
- return res;
-
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
-
- if (timercmp(&tv_now, &tv_end, >)) {
- ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
- return TEST_FAILURE;
- }
- sock_num = (sock_num + 1) % test->nb_sockets;
- }
-
- return TEST_PASS;
-}
-
-static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
-{
- u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
- struct pkt_stream *pkt_stream = xsk->pkt_stream;
- struct xsk_umem_info *umem = ifobject->umem;
- bool use_poll = ifobject->use_poll;
- struct pollfd fds = { };
- int ret;
-
- buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
- /* pkts_in_flight might be negative if many invalid packets are sent */
- if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
- buffer_len)) {
- ret = kick_tx(xsk);
- if (ret)
- return TEST_FAILURE;
- return TEST_CONTINUE;
- }
-
- fds.fd = xsk_socket__fd(xsk->xsk);
- fds.events = POLLOUT;
-
- while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
- if (use_poll) {
- ret = poll(&fds, 1, POLL_TMOUT);
- if (timeout) {
- if (ret < 0) {
- ksft_print_msg("ERROR: [%s] Poll error %d\n",
- __func__, errno);
- return TEST_FAILURE;
- }
- if (ret == 0)
- return TEST_PASS;
- break;
- }
- if (ret <= 0) {
- ksft_print_msg("ERROR: [%s] Poll error %d\n",
- __func__, errno);
- return TEST_FAILURE;
- }
- }
-
- complete_pkts(xsk, xsk->batch_size);
- }
-
- for (i = 0; i < xsk->batch_size; i++) {
- struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
- u32 nb_frags_left, nb_frags, bytes_written = 0;
-
- if (!pkt)
- break;
-
- nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
- if (nb_frags > xsk->batch_size - i) {
- pkt_stream_cancel(pkt_stream);
- xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
- break;
- }
- nb_frags_left = nb_frags;
-
- while (nb_frags_left--) {
- struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
-
- tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
- if (pkt_stream->verbatim) {
- tx_desc->len = pkt->len;
- tx_desc->options = pkt->options;
- } else if (nb_frags_left) {
- tx_desc->len = umem->frame_size;
- tx_desc->options = XDP_PKT_CONTD;
- } else {
- tx_desc->len = pkt->len - bytes_written;
- tx_desc->options = 0;
- }
- if (pkt->valid)
- pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
- bytes_written);
- bytes_written += tx_desc->len;
-
- print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
- tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
-
- if (nb_frags_left) {
- i++;
- if (pkt_stream->verbatim)
- pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
- }
- }
-
- if (pkt && pkt->valid) {
- valid_pkts++;
- valid_frags += nb_frags;
- }
- }
-
- pthread_mutex_lock(&pacing_mutex);
- pkts_in_flight += valid_pkts;
- pthread_mutex_unlock(&pacing_mutex);
-
- xsk_ring_prod__submit(&xsk->tx, i);
- xsk->outstanding_tx += valid_frags;
-
- if (use_poll) {
- ret = poll(&fds, 1, POLL_TMOUT);
- if (ret <= 0) {
- if (ret == 0 && timeout)
- return TEST_PASS;
-
- ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
- return TEST_FAILURE;
- }
- }
-
- if (!timeout) {
- if (complete_pkts(xsk, i))
- return TEST_FAILURE;
-
- usleep(10);
- return TEST_PASS;
- }
-
- return TEST_CONTINUE;
-}
-
-static int wait_for_tx_completion(struct xsk_socket_info *xsk)
-{
- struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
- int ret;
-
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
- timeradd(&tv_now, &tv_timeout, &tv_end);
-
- while (xsk->outstanding_tx) {
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
- if (timercmp(&tv_now, &tv_end, >)) {
- ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
- return TEST_FAILURE;
- }
-
- complete_pkts(xsk, xsk->batch_size);
- }
-
- return TEST_PASS;
-}
-
-bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
-{
- return bitmap_full(bitmap, test->nb_sockets);
-}
-
-static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
-{
- bool timeout = !is_umem_valid(test->ifobj_rx);
- DECLARE_BITMAP(bitmap, test->nb_sockets);
- u32 i, ret;
-
- while (!(all_packets_sent(test, bitmap))) {
- for (i = 0; i < test->nb_sockets; i++) {
- struct pkt_stream *pkt_stream;
-
- pkt_stream = ifobject->xsk_arr[i].pkt_stream;
- if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
- __set_bit(i, bitmap);
- continue;
- }
- ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
- if (ret == TEST_CONTINUE && !test->fail)
- continue;
-
- if ((ret || test->fail) && !timeout)
- return TEST_FAILURE;
-
- if (ret == TEST_PASS && timeout)
- return ret;
-
- ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
- if (ret)
- return TEST_FAILURE;
- }
- }
-
- return TEST_PASS;
-}
-
-static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
-{
- int fd = xsk_socket__fd(xsk), err;
- socklen_t optlen, expected_len;
-
- optlen = sizeof(*stats);
- err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
- if (err) {
- ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
- __func__, -err, strerror(-err));
- return TEST_FAILURE;
- }
-
- expected_len = sizeof(struct xdp_statistics);
- if (optlen != expected_len) {
- ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
- __func__, expected_len, optlen);
- return TEST_FAILURE;
- }
-
- return TEST_PASS;
-}
-
-static int validate_rx_dropped(struct ifobject *ifobject)
-{
- struct xsk_socket *xsk = ifobject->xsk->xsk;
- struct xdp_statistics stats;
- int err;
-
- err = kick_rx(ifobject->xsk);
- if (err)
- return TEST_FAILURE;
-
- err = get_xsk_stats(xsk, &stats);
- if (err)
- return TEST_FAILURE;
-
- /* The receiver calls getsockopt after receiving the last (valid)
- * packet which is not the final packet sent in this test (valid and
- * invalid packets are sent in alternating fashion with the final
- * packet being invalid). Since the last packet may or may not have
- * been dropped already, both outcomes must be allowed.
- */
- if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
- stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
- return TEST_PASS;
-
- return TEST_FAILURE;
-}
-
-static int validate_rx_full(struct ifobject *ifobject)
-{
- struct xsk_socket *xsk = ifobject->xsk->xsk;
- struct xdp_statistics stats;
- int err;
-
- usleep(1000);
- err = kick_rx(ifobject->xsk);
- if (err)
- return TEST_FAILURE;
-
- err = get_xsk_stats(xsk, &stats);
- if (err)
- return TEST_FAILURE;
-
- if (stats.rx_ring_full)
- return TEST_PASS;
-
- return TEST_FAILURE;
-}
-
-static int validate_fill_empty(struct ifobject *ifobject)
-{
- struct xsk_socket *xsk = ifobject->xsk->xsk;
- struct xdp_statistics stats;
- int err;
-
- usleep(1000);
- err = kick_rx(ifobject->xsk);
- if (err)
- return TEST_FAILURE;
-
- err = get_xsk_stats(xsk, &stats);
- if (err)
- return TEST_FAILURE;
-
- if (stats.rx_fill_ring_empty_descs)
- return TEST_PASS;
-
- return TEST_FAILURE;
-}
-
-static int validate_tx_invalid_descs(struct ifobject *ifobject)
-{
- struct xsk_socket *xsk = ifobject->xsk->xsk;
- int fd = xsk_socket__fd(xsk);
- struct xdp_statistics stats;
- socklen_t optlen;
- int err;
-
- optlen = sizeof(stats);
- err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
- if (err) {
- ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
- __func__, -err, strerror(-err));
- return TEST_FAILURE;
- }
-
- if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
- ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
- __func__,
- (unsigned long long)stats.tx_invalid_descs,
- ifobject->xsk->pkt_stream->nb_pkts);
- return TEST_FAILURE;
- }
-
- return TEST_PASS;
-}
-
-static void xsk_configure_socket(struct test_spec *test, struct ifobject *ifobject,
- struct xsk_umem_info *umem, bool tx)
-{
- int i, ret;
-
- for (i = 0; i < test->nb_sockets; i++) {
- bool shared = (ifobject->shared_umem && tx) ? true : !!i;
- u32 ctr = 0;
-
- while (ctr++ < SOCK_RECONF_CTR) {
- ret = __xsk_configure_socket(&ifobject->xsk_arr[i], umem,
- ifobject, shared);
- if (!ret)
- break;
-
- /* Retry if it fails as xsk_socket__create() is asynchronous */
- if (ctr >= SOCK_RECONF_CTR)
- exit_with_error(-ret);
- usleep(USLEEP_MAX);
- }
- if (ifobject->busy_poll)
- enable_busy_poll(&ifobject->xsk_arr[i]);
- }
-}
-
-static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
-{
- xsk_configure_socket(test, ifobject, test->ifobj_rx->umem, true);
- ifobject->xsk = &ifobject->xsk_arr[0];
- ifobject->xskmap = test->ifobj_rx->xskmap;
- memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
- ifobject->umem->base_addr = 0;
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
- bool fill_up)
-{
- u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
- u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
- int ret;
-
- if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
- buffers_to_fill = umem->num_frames;
- else
- buffers_to_fill = umem->fill_size;
-
- ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
- if (ret != buffers_to_fill)
- exit_with_error(ENOSPC);
-
- while (filled < buffers_to_fill) {
- struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
- u64 addr;
- u32 i;
-
- for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
- if (!pkt) {
- if (!fill_up)
- break;
- addr = filled * umem->frame_size + umem->base_addr;
- } else if (pkt->offset >= 0) {
- addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
- } else {
- addr = pkt->offset + umem_alloc_buffer(umem);
- }
-
- *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
- if (++filled >= buffers_to_fill)
- break;
- }
- }
- xsk_ring_prod__submit(&umem->fq, filled);
- xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
-
- pkt_stream_reset(pkt_stream);
- umem_reset_alloc(umem);
-}
-
-static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
-{
- u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
- int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
- LIBBPF_OPTS(bpf_xdp_query_opts, opts);
- void *bufs;
- int ret;
- u32 i;
-
- if (ifobject->umem->unaligned_mode)
- mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
-
- if (ifobject->shared_umem)
- umem_sz *= 2;
-
- bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
- if (bufs == MAP_FAILED)
- exit_with_error(errno);
-
- ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
- if (ret)
- exit_with_error(-ret);
-
- xsk_configure_socket(test, ifobject, ifobject->umem, false);
-
- ifobject->xsk = &ifobject->xsk_arr[0];
-
- if (!ifobject->rx_on)
- return;
-
- xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring);
-
- for (i = 0; i < test->nb_sockets; i++) {
- ifobject->xsk = &ifobject->xsk_arr[i];
- ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
- if (ret)
- exit_with_error(errno);
- }
-}
-
-static void *worker_testapp_validate_tx(void *arg)
-{
- struct test_spec *test = (struct test_spec *)arg;
- struct ifobject *ifobject = test->ifobj_tx;
- int err;
-
- if (test->current_step == 1) {
- if (!ifobject->shared_umem)
- thread_common_ops(test, ifobject);
- else
- thread_common_ops_tx(test, ifobject);
- }
-
- err = send_pkts(test, ifobject);
-
- if (!err && ifobject->validation_func)
- err = ifobject->validation_func(ifobject);
- if (err)
- report_failure(test);
-
- pthread_exit(NULL);
-}
-
-static void *worker_testapp_validate_rx(void *arg)
-{
- struct test_spec *test = (struct test_spec *)arg;
- struct ifobject *ifobject = test->ifobj_rx;
- int err;
-
- if (test->current_step == 1) {
- thread_common_ops(test, ifobject);
- } else {
- xsk_clear_xskmap(ifobject->xskmap);
- err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
- if (err) {
- ksft_print_msg("Error: Failed to update xskmap, error %s\n",
- strerror(-err));
- exit_with_error(-err);
- }
- }
-
- pthread_barrier_wait(&barr);
-
- err = receive_pkts(test);
-
- if (!err && ifobject->validation_func)
- err = ifobject->validation_func(ifobject);
- if (err)
- report_failure(test);
-
- pthread_exit(NULL);
-}
-
-static u64 ceil_u64(u64 a, u64 b)
-{
- return (a + b - 1) / b;
-}
-
-static void testapp_clean_xsk_umem(struct ifobject *ifobj)
-{
- u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
-
- if (ifobj->shared_umem)
- umem_sz *= 2;
-
- umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
- xsk_umem__delete(ifobj->umem->umem);
- munmap(ifobj->umem->buffer, umem_sz);
-}
-
-static void handler(int signum)
-{
- pthread_exit(NULL);
-}
-
-static bool xdp_prog_changed_rx(struct test_spec *test)
-{
- struct ifobject *ifobj = test->ifobj_rx;
-
- return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
-}
-
-static bool xdp_prog_changed_tx(struct test_spec *test)
-{
- struct ifobject *ifobj = test->ifobj_tx;
-
- return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
-}
-
-static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
- struct bpf_map *xskmap, enum test_mode mode)
-{
- int err;
-
- xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
- err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
- if (err) {
- ksft_print_msg("Error attaching XDP program\n");
- exit_with_error(-err);
- }
-
- if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
- if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
- ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
- exit_with_error(EINVAL);
- }
-
- ifobj->xdp_prog = xdp_prog;
- ifobj->xskmap = xskmap;
- ifobj->mode = mode;
-}
-
-static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
- struct ifobject *ifobj_tx)
-{
- if (xdp_prog_changed_rx(test))
- xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
-
- if (!ifobj_tx || ifobj_tx->shared_umem)
- return;
-
- if (xdp_prog_changed_tx(test))
- xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
-}
-
-static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
- struct ifobject *ifobj2)
-{
- pthread_t t0, t1;
- int err;
-
- if (test->mtu > MAX_ETH_PKT_SIZE) {
- if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
- (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
- ksft_test_result_skip("Multi buffer for zero-copy not supported.\n");
- return TEST_SKIP;
- }
- if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
- (ifobj2 && !ifobj2->multi_buff_supp))) {
- ksft_test_result_skip("Multi buffer not supported.\n");
- return TEST_SKIP;
- }
- }
- err = test_spec_set_mtu(test, test->mtu);
- if (err) {
- ksft_print_msg("Error, could not set mtu.\n");
- exit_with_error(err);
- }
-
- if (ifobj2) {
- if (pthread_barrier_init(&barr, NULL, 2))
- exit_with_error(errno);
- pkt_stream_reset(ifobj2->xsk->pkt_stream);
- }
-
- test->current_step++;
- pkt_stream_reset(ifobj1->xsk->pkt_stream);
- pkts_in_flight = 0;
-
- signal(SIGUSR1, handler);
- /*Spawn RX thread */
- pthread_create(&t0, NULL, ifobj1->func_ptr, test);
-
- if (ifobj2) {
- pthread_barrier_wait(&barr);
- if (pthread_barrier_destroy(&barr))
- exit_with_error(errno);
-
- /*Spawn TX thread */
- pthread_create(&t1, NULL, ifobj2->func_ptr, test);
-
- pthread_join(t1, NULL);
- }
-
- if (!ifobj2)
- pthread_kill(t0, SIGUSR1);
- else
- pthread_join(t0, NULL);
-
- if (test->total_steps == test->current_step || test->fail) {
- u32 i;
-
- if (ifobj2)
- for (i = 0; i < test->nb_sockets; i++)
- xsk_socket__delete(ifobj2->xsk_arr[i].xsk);
-
- for (i = 0; i < test->nb_sockets; i++)
- xsk_socket__delete(ifobj1->xsk_arr[i].xsk);
-
- testapp_clean_xsk_umem(ifobj1);
- if (ifobj2 && !ifobj2->shared_umem)
- testapp_clean_xsk_umem(ifobj2);
- }
-
- return !!test->fail;
-}
-
-static int testapp_validate_traffic(struct test_spec *test)
-{
- struct ifobject *ifobj_rx = test->ifobj_rx;
- struct ifobject *ifobj_tx = test->ifobj_tx;
-
- if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
- (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
- ksft_test_result_skip("No huge pages present.\n");
- return TEST_SKIP;
- }
-
- if (test->set_ring) {
- if (ifobj_tx->hw_ring_size_supp) {
- if (set_ring_size(ifobj_tx)) {
- ksft_test_result_skip("Failed to change HW ring size.\n");
- return TEST_FAILURE;
- }
- } else {
- ksft_test_result_skip("Changing HW ring size not supported.\n");
- return TEST_SKIP;
- }
- }
-
- xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
- return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
-}
-
-static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
-{
- return __testapp_validate_traffic(test, ifobj, NULL);
-}
-
-static int testapp_teardown(struct test_spec *test)
-{
- int i;
-
- for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
- if (testapp_validate_traffic(test))
- return TEST_FAILURE;
- test_spec_reset(test);
- }
-
- return TEST_PASS;
-}
-
-static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
-{
- thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
- struct ifobject *tmp_ifobj = (*ifobj1);
-
- (*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
- (*ifobj2)->func_ptr = tmp_func_ptr;
-
- *ifobj1 = *ifobj2;
- *ifobj2 = tmp_ifobj;
-}
-
-static int testapp_bidirectional(struct test_spec *test)
-{
- int res;
-
- test->ifobj_tx->rx_on = true;
- test->ifobj_rx->tx_on = true;
- test->total_steps = 2;
- if (testapp_validate_traffic(test))
- return TEST_FAILURE;
-
- print_verbose("Switching Tx/Rx direction\n");
- swap_directions(&test->ifobj_rx, &test->ifobj_tx);
- res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
-
- swap_directions(&test->ifobj_rx, &test->ifobj_tx);
- return res;
-}
-
-static int swap_xsk_resources(struct test_spec *test)
-{
- int ret;
-
- test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
- test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
- test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
- test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
- test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
- test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
-
- ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
- if (ret)
- return TEST_FAILURE;
-
- return TEST_PASS;
-}
-
-static int testapp_xdp_prog_cleanup(struct test_spec *test)
-{
- test->total_steps = 2;
- test->nb_sockets = 2;
- if (testapp_validate_traffic(test))
- return TEST_FAILURE;
-
- if (swap_xsk_resources(test))
- return TEST_FAILURE;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_headroom(struct test_spec *test)
-{
- test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_rx_dropped(struct test_spec *test)
-{
- if (test->mode == TEST_MODE_ZC) {
- ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
- return TEST_SKIP;
- }
-
- pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
- test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
- XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
- pkt_stream_receive_half(test);
- test->ifobj_rx->validation_func = validate_rx_dropped;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_tx_invalid_descs(struct test_spec *test)
-{
- pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
- test->ifobj_tx->validation_func = validate_tx_invalid_descs;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_rx_full(struct test_spec *test)
-{
- pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
-
- test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
- test->ifobj_rx->release_rx = false;
- test->ifobj_rx->validation_func = validate_rx_full;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_fill_empty(struct test_spec *test)
-{
- pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
-
- test->ifobj_rx->use_fill_ring = false;
- test->ifobj_rx->validation_func = validate_fill_empty;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_unaligned(struct test_spec *test)
-{
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- /* Let half of the packets straddle a 4K buffer boundary */
- pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2);
-
- return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_unaligned_mb(struct test_spec *test)
-{
- test->mtu = MAX_ETH_JUMBO_SIZE;
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
- return testapp_validate_traffic(test);
-}
-
-static int testapp_single_pkt(struct test_spec *test)
-{
- struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
-
- pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
- return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_mb(struct test_spec *test)
-{
- test->mtu = MAX_ETH_JUMBO_SIZE;
- pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
-
- return testapp_validate_traffic(test);
-}
-
-static int testapp_invalid_desc_mb(struct test_spec *test)
-{
- struct xsk_umem_info *umem = test->ifobj_tx->umem;
- u64 umem_size = umem->num_frames * umem->frame_size;
- struct pkt pkts[] = {
- /* Valid packet for synch to start with */
- {0, MIN_PKT_SIZE, 0, true, 0},
- /* Zero frame len is not legal */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {0, 0, 0, false, 0},
- /* Invalid address in the second frame */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- /* Invalid len in the middle */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- /* Invalid options in the middle */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
- /* Transmit 2 frags, receive 3 */
- {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
- {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
- /* Middle frame crosses chunk boundary with small length */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
- /* Valid packet for synch so that something is received */
- {0, MIN_PKT_SIZE, 0, true, 0}};
-
- if (umem->unaligned_mode) {
- /* Crossing a chunk boundary allowed */
- pkts[12].valid = true;
- pkts[13].valid = true;
- }
-
- test->mtu = MAX_ETH_JUMBO_SIZE;
- pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
- return testapp_validate_traffic(test);
-}
-
-static int testapp_invalid_desc(struct test_spec *test)
-{
- struct xsk_umem_info *umem = test->ifobj_tx->umem;
- u64 umem_size = umem->num_frames * umem->frame_size;
- struct pkt pkts[] = {
- /* Zero packet address allowed */
- {0, MIN_PKT_SIZE, 0, true},
- /* Allowed packet */
- {0, MIN_PKT_SIZE, 0, true},
- /* Straddling the start of umem */
- {-2, MIN_PKT_SIZE, 0, false},
- /* Packet too large */
- {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
- /* Up to end of umem allowed */
- {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
- /* After umem ends */
- {umem_size, MIN_PKT_SIZE, 0, false},
- /* Straddle the end of umem */
- {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
- /* Straddle a 4K boundary */
- {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
- /* Straddle a 2K boundary */
- {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
- /* Valid packet for synch so that something is received */
- {0, MIN_PKT_SIZE, 0, true}};
-
- if (umem->unaligned_mode) {
- /* Crossing a page boundary allowed */
- pkts[7].valid = true;
- }
- if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
- /* Crossing a 2K frame size boundary not allowed */
- pkts[8].valid = false;
- }
-
- if (test->ifobj_tx->shared_umem) {
- pkts[4].offset += umem_size;
- pkts[5].offset += umem_size;
- pkts[6].offset += umem_size;
- }
-
- pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
- return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_drop(struct test_spec *test)
-{
- struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
- struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
- test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
- skel_rx->maps.xsk, skel_tx->maps.xsk);
-
- pkt_stream_receive_half(test);
- return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_metadata_copy(struct test_spec *test)
-{
- struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
- struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
- struct bpf_map *data_map;
- int count = 0;
- int key = 0;
-
- test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
- skel_tx->progs.xsk_xdp_populate_metadata,
- skel_rx->maps.xsk, skel_tx->maps.xsk);
- test->ifobj_rx->use_metadata = true;
-
- data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
- if (!data_map || !bpf_map__is_internal(data_map)) {
- ksft_print_msg("Error: could not find bss section of XDP program\n");
- return TEST_FAILURE;
- }
-
- if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) {
- ksft_print_msg("Error: could not update count element\n");
- return TEST_FAILURE;
- }
-
- return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_shared_umem(struct test_spec *test)
-{
- struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
- struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
- test->total_steps = 1;
- test->nb_sockets = 2;
-
- test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
- skel_tx->progs.xsk_xdp_shared_umem,
- skel_rx->maps.xsk, skel_tx->maps.xsk);
-
- pkt_stream_even_odd_sequence(test);
-
- return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_txq_tmout(struct test_spec *test)
-{
- test->ifobj_tx->use_poll = true;
- /* create invalid frame by set umem frame_size and pkt length equal to 2048 */
- test->ifobj_tx->umem->frame_size = 2048;
- pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
- return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
-}
-
-static int testapp_poll_rxq_tmout(struct test_spec *test)
-{
- test->ifobj_rx->use_poll = true;
- return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
-}
-
-static int testapp_too_many_frags(struct test_spec *test)
-{
- struct pkt *pkts;
- u32 max_frags, i;
- int ret;
-
- if (test->mode == TEST_MODE_ZC) {
- max_frags = test->ifobj_tx->xdp_zc_max_segs;
- } else {
- max_frags = get_max_skb_frags();
- if (!max_frags) {
- ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n");
- max_frags = 17;
- }
- max_frags += 1;
- }
-
- pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
- if (!pkts)
- return TEST_FAILURE;
-
- test->mtu = MAX_ETH_JUMBO_SIZE;
-
- /* Valid packet for synch */
- pkts[0].len = MIN_PKT_SIZE;
- pkts[0].valid = true;
-
- /* One valid packet with the max amount of frags */
- for (i = 1; i < max_frags + 1; i++) {
- pkts[i].len = MIN_PKT_SIZE;
- pkts[i].options = XDP_PKT_CONTD;
- pkts[i].valid = true;
- }
- pkts[max_frags].options = 0;
-
- /* An invalid packet with the max amount of frags but signals packet
- * continues on the last frag
- */
- for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
- pkts[i].len = MIN_PKT_SIZE;
- pkts[i].options = XDP_PKT_CONTD;
- pkts[i].valid = false;
- }
-
- /* Valid packet for synch */
- pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
- pkts[2 * max_frags + 1].valid = true;
-
- pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
- ret = testapp_validate_traffic(test);
-
- free(pkts);
- return ret;
-}
-
-static int xsk_load_xdp_programs(struct ifobject *ifobj)
-{
- ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
- if (libbpf_get_error(ifobj->xdp_progs))
- return libbpf_get_error(ifobj->xdp_progs);
-
- return 0;
-}
-
static void xsk_unload_xdp_programs(struct ifobject *ifobj)
{
xsk_xdp_progs__destroy(ifobj->xdp_progs);
}
-/* Simple test */
-static bool hugepages_present(void)
-{
- size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
- void *bufs;
-
- bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
- if (bufs == MAP_FAILED)
- return false;
-
- mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
- munmap(bufs, mmap_sz);
- return true;
-}
-
-static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
-{
- LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
- int err;
-
- ifobj->func_ptr = func_ptr;
-
- err = xsk_load_xdp_programs(ifobj);
- if (err) {
- ksft_print_msg("Error loading XDP program\n");
- exit_with_error(err);
- }
-
- if (hugepages_present())
- ifobj->unaligned_supp = true;
-
- err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
- if (err) {
- ksft_print_msg("Error querying XDP capabilities\n");
- exit_with_error(-err);
- }
- if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
- ifobj->multi_buff_supp = true;
- if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
- if (query_opts.xdp_zc_max_segs > 1) {
- ifobj->multi_buff_zc_supp = true;
- ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
- } else {
- ifobj->xdp_zc_max_segs = 0;
- }
- }
-}
-
-static int testapp_send_receive(struct test_spec *test)
-{
- return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_2k_frame(struct test_spec *test)
-{
- test->ifobj_tx->umem->frame_size = 2048;
- test->ifobj_rx->umem->frame_size = 2048;
- pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
- return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_rx(struct test_spec *test)
-{
- test->ifobj_rx->use_poll = true;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_tx(struct test_spec *test)
-{
- test->ifobj_tx->use_poll = true;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_aligned_inv_desc(struct test_spec *test)
-{
- return testapp_invalid_desc(test);
-}
-
-static int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
-{
- test->ifobj_tx->umem->frame_size = 2048;
- test->ifobj_rx->umem->frame_size = 2048;
- return testapp_invalid_desc(test);
-}
-
-static int testapp_unaligned_inv_desc(struct test_spec *test)
-{
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- return testapp_invalid_desc(test);
-}
-
-static int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
-{
- u64 page_size, umem_size;
-
- /* Odd frame size so the UMEM doesn't end near a page boundary. */
- test->ifobj_tx->umem->frame_size = 4001;
- test->ifobj_rx->umem->frame_size = 4001;
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- /* This test exists to test descriptors that staddle the end of
- * the UMEM but not a page.
- */
- page_size = sysconf(_SC_PAGESIZE);
- umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
- assert(umem_size % page_size > MIN_PKT_SIZE);
- assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
-
- return testapp_invalid_desc(test);
-}
-
-static int testapp_aligned_inv_desc_mb(struct test_spec *test)
-{
- return testapp_invalid_desc_mb(test);
-}
-
-static int testapp_unaligned_inv_desc_mb(struct test_spec *test)
-{
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- return testapp_invalid_desc_mb(test);
-}
-
-static int testapp_xdp_metadata(struct test_spec *test)
-{
- return testapp_xdp_metadata_copy(test);
-}
-
-static int testapp_xdp_metadata_mb(struct test_spec *test)
-{
- test->mtu = MAX_ETH_JUMBO_SIZE;
- return testapp_xdp_metadata_copy(test);
-}
-
-static int testapp_hw_sw_min_ring_size(struct test_spec *test)
-{
- int ret;
-
- test->set_ring = true;
- test->total_steps = 2;
- test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
- test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
- test->ifobj_tx->xsk->batch_size = 1;
- test->ifobj_rx->xsk->batch_size = 1;
- ret = testapp_validate_traffic(test);
- if (ret)
- return ret;
-
- /* Set batch size to hw_ring_size - 1 */
- test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
- test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_hw_sw_max_ring_size(struct test_spec *test)
-{
- u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
- int ret;
-
- test->set_ring = true;
- test->total_steps = 2;
- test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
- test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending;
- test->ifobj_rx->umem->num_frames = max_descs;
- test->ifobj_rx->umem->fill_size = max_descs;
- test->ifobj_rx->umem->comp_size = max_descs;
- test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-
- ret = testapp_validate_traffic(test);
- if (ret)
- return ret;
-
- /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
- * updating the Rx HW tail register.
- */
- test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
- test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
- pkt_stream_replace(test, max_descs, MIN_PKT_SIZE);
- return testapp_validate_traffic(test);
-}
-
static void run_pkt_test(struct test_spec *test)
{
int ret;
ret = test->test_func(test);
- if (ret == TEST_PASS)
+ switch (ret) {
+ case TEST_PASS:
ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
test->name);
- pkt_stream_restore_default(test);
-}
-
-static struct ifobject *ifobject_create(void)
-{
- struct ifobject *ifobj;
-
- ifobj = calloc(1, sizeof(struct ifobject));
- if (!ifobj)
- return NULL;
-
- ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
- if (!ifobj->xsk_arr)
- goto out_xsk_arr;
-
- ifobj->umem = calloc(1, sizeof(*ifobj->umem));
- if (!ifobj->umem)
- goto out_umem;
-
- return ifobj;
-
-out_umem:
- free(ifobj->xsk_arr);
-out_xsk_arr:
- free(ifobj);
- return NULL;
-}
+ break;
+ case TEST_SKIP:
+ ksft_test_result_skip("SKIP: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ break;
+ case TEST_FAILURE:
+ ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ break;
+ default:
+ ksft_test_result_fail("FAIL: %s %s%s -- Unexpected returned value (%d)\n",
+ mode_string(test), busy_poll_string(test), test->name, ret);
+ }
-static void ifobject_delete(struct ifobject *ifobj)
-{
- free(ifobj->umem);
- free(ifobj->xsk_arr);
- free(ifobj);
+ pkt_stream_restore_default(test);
}
static bool is_xdp_supported(int ifindex)
@@ -2587,42 +319,6 @@ static bool is_xdp_supported(int ifindex)
return true;
}
-static const struct test_spec tests[] = {
- {.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
- {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
- {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
- {.name = "POLL_RX", .test_func = testapp_poll_rx},
- {.name = "POLL_TX", .test_func = testapp_poll_tx},
- {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
- {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
- {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
- {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
- {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
- {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
- {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
- .test_func = testapp_unaligned_inv_desc_4001_frame},
- {.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
- {.name = "TEARDOWN", .test_func = testapp_teardown},
- {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
- {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
- {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
- {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
- {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
- {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
- {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
- {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
- {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
- {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
- {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
- {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
- .test_func = testapp_send_receive_unaligned_mb},
- {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
- {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
- {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
- {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
- {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
- };
-
static void print_tests(void)
{
u32 i;
@@ -2630,10 +326,13 @@ static void print_tests(void)
printf("Tests:\n");
for (i = 0; i < ARRAY_SIZE(tests); i++)
printf("%u: %s\n", i, tests[i].name);
+ for (i = ARRAY_SIZE(tests); i < ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests); i++)
+ printf("%u: %s\n", i, ci_skip_tests[i - ARRAY_SIZE(tests)].name);
}
int main(int argc, char **argv)
{
+ const size_t total_tests = ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests);
struct pkt_stream *rx_pkt_stream_default;
struct pkt_stream *tx_pkt_stream_default;
struct ifobject *ifobj_tx, *ifobj_rx;
@@ -2661,7 +360,7 @@ int main(int argc, char **argv)
print_tests();
ksft_exit_xpass();
}
- if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= ARRAY_SIZE(tests)) {
+ if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= total_tests) {
ksft_print_msg("Error: test %u does not exist.\n", opt_run_test);
ksft_exit_xfail();
}
@@ -2686,10 +385,13 @@ int main(int argc, char **argv)
ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
}
- init_iface(ifobj_rx, worker_testapp_validate_rx);
- init_iface(ifobj_tx, worker_testapp_validate_tx);
+ if (init_iface(ifobj_rx, worker_testapp_validate_rx) ||
+ init_iface(ifobj_tx, worker_testapp_validate_tx)) {
+ ksft_print_msg("Error : can't initialize interfaces\n");
+ ksft_exit_xfail();
+ }
- test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+ test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
if (!tx_pkt_stream_default || !rx_pkt_stream_default)
@@ -2698,7 +400,7 @@ int main(int argc, char **argv)
test.rx_pkt_stream_default = rx_pkt_stream_default;
if (opt_run_test == RUN_ALL_TESTS)
- nb_tests = ARRAY_SIZE(tests);
+ nb_tests = total_tests;
else
nb_tests = 1;
if (opt_mode == TEST_MODE_ALL) {
@@ -2720,11 +422,15 @@ int main(int argc, char **argv)
if (opt_mode != TEST_MODE_ALL && i != opt_mode)
continue;
- for (j = 0; j < ARRAY_SIZE(tests); j++) {
+ for (j = 0; j < total_tests; j++) {
if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test)
continue;
- test_spec_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+ if (j < ARRAY_SIZE(tests))
+ test_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+ else
+ test_init(&test, ifobj_tx, ifobj_rx, i,
+ &ci_skip_tests[j - ARRAY_SIZE(tests)]);
run_pkt_test(&test);
usleep(USLEEP_MAX);
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index e46e823f6a1a..3ca518df23ad 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -22,166 +22,13 @@
#define PF_XDP AF_XDP
#endif
-#ifndef SO_BUSY_POLL_BUDGET
-#define SO_BUSY_POLL_BUDGET 70
-#endif
-
-#ifndef SO_PREFER_BUSY_POLL
-#define SO_PREFER_BUSY_POLL 69
-#endif
-
-#define TEST_PASS 0
-#define TEST_FAILURE -1
-#define TEST_CONTINUE 1
-#define TEST_SKIP 2
-#define MAX_INTERFACES 2
-#define MAX_INTERFACE_NAME_CHARS 16
-#define MAX_TEST_NAME_SIZE 48
#define MAX_TEARDOWN_ITER 10
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
-#define MIN_PKT_SIZE 64
-#define MAX_ETH_PKT_SIZE 1518
#define MAX_ETH_JUMBO_SIZE 9000
-#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
-#define DEFAULT_BATCH_SIZE 64
-#define POLL_TMOUT 1000
-#define THREAD_TMOUT 3
-#define DEFAULT_PKT_CNT (4 * 1024)
-#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
#define RX_FULL_RXQSIZE 32
#define UMEM_HEADROOM_TEST_SIZE 128
#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
-#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
-#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
-#define XSK_DESC__INVALID_OPTION (0xffff)
-#define HUGEPAGE_SIZE (2 * 1024 * 1024)
-#define PKT_DUMP_NB_TO_PRINT 16
#define RUN_ALL_TESTS UINT_MAX
#define NUM_MAC_ADDRESSES 4
-#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
-
-enum test_mode {
- TEST_MODE_SKB,
- TEST_MODE_DRV,
- TEST_MODE_ZC,
- TEST_MODE_ALL
-};
-
-struct xsk_umem_info {
- struct xsk_ring_prod fq;
- struct xsk_ring_cons cq;
- struct xsk_umem *umem;
- u64 next_buffer;
- u32 num_frames;
- u32 frame_headroom;
- void *buffer;
- u32 frame_size;
- u32 base_addr;
- u32 fill_size;
- u32 comp_size;
- bool unaligned_mode;
-};
-
-struct xsk_socket_info {
- struct xsk_ring_cons rx;
- struct xsk_ring_prod tx;
- struct xsk_umem_info *umem;
- struct xsk_socket *xsk;
- struct pkt_stream *pkt_stream;
- u32 outstanding_tx;
- u32 rxqsize;
- u32 batch_size;
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
-};
-
-struct pkt {
- int offset;
- u32 len;
- u32 pkt_nb;
- bool valid;
- u16 options;
-};
-
-struct pkt_stream {
- u32 nb_pkts;
- u32 current_pkt_nb;
- struct pkt *pkts;
- u32 max_pkt_len;
- u32 nb_rx_pkts;
- u32 nb_valid_entries;
- bool verbatim;
-};
-
-struct set_hw_ring {
- u32 default_tx;
- u32 default_rx;
-};
-
-struct ifobject;
-struct test_spec;
-typedef int (*validation_func_t)(struct ifobject *ifobj);
-typedef void *(*thread_func_t)(void *arg);
-typedef int (*test_func_t)(struct test_spec *test);
-
-struct ifobject {
- char ifname[MAX_INTERFACE_NAME_CHARS];
- struct xsk_socket_info *xsk;
- struct xsk_socket_info *xsk_arr;
- struct xsk_umem_info *umem;
- thread_func_t func_ptr;
- validation_func_t validation_func;
- struct xsk_xdp_progs *xdp_progs;
- struct bpf_map *xskmap;
- struct bpf_program *xdp_prog;
- struct ethtool_ringparam ring;
- struct set_hw_ring set_ring;
- enum test_mode mode;
- int ifindex;
- int mtu;
- u32 bind_flags;
- u32 xdp_zc_max_segs;
- bool tx_on;
- bool rx_on;
- bool use_poll;
- bool busy_poll;
- bool use_fill_ring;
- bool release_rx;
- bool shared_umem;
- bool use_metadata;
- bool unaligned_supp;
- bool multi_buff_supp;
- bool multi_buff_zc_supp;
- bool hw_ring_size_supp;
-};
-
-struct test_spec {
- struct ifobject *ifobj_tx;
- struct ifobject *ifobj_rx;
- struct pkt_stream *tx_pkt_stream_default;
- struct pkt_stream *rx_pkt_stream_default;
- struct bpf_program *xdp_prog_rx;
- struct bpf_program *xdp_prog_tx;
- struct bpf_map *xskmap_rx;
- struct bpf_map *xskmap_tx;
- test_func_t test_func;
- int mtu;
- u16 total_steps;
- u16 current_step;
- u16 nb_sockets;
- bool fail;
- bool set_ring;
- enum test_mode mode;
- char name[MAX_TEST_NAME_SIZE];
-};
-
-pthread_barrier_t barr;
-pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-int pkts_in_flight;
-
-static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
-
#endif /* XSKXCEIVER_H_ */
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
index d46962a24724..1159d81890c2 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -18,7 +18,7 @@
#include <errno.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define COUNT_ISN_BPS 4
#define COUNT_WPS 4
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
index e7041816085a..5fc0f37f3fd4 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -26,7 +26,7 @@
#include <errno.h>
#include <signal.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static volatile uint8_t var[96] __attribute__((__aligned__(32)));
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index 8d275f03e977..ca2aaab9e4ca 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -19,7 +19,7 @@
#include <sys/types.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
void child(int cpu)
{
@@ -127,22 +127,42 @@ int run_test(int cpu)
return KSFT_PASS;
}
+/*
+ * Reads the suspend success count from sysfs.
+ * Returns the count on success or exits on failure.
+ */
+static int get_suspend_success_count_or_fail(void)
+{
+ FILE *fp;
+ int val;
+
+ fp = fopen("/sys/power/suspend_stats/success", "r");
+ if (!fp)
+ ksft_exit_fail_msg(
+ "Failed to open suspend_stats/success: %s\n",
+ strerror(errno));
+
+ if (fscanf(fp, "%d", &val) != 1) {
+ fclose(fp);
+ ksft_exit_fail_msg(
+ "Failed to read suspend success count\n");
+ }
+
+ fclose(fp);
+ return val;
+}
+
void suspend(void)
{
- int power_state_fd;
int timerfd;
int err;
+ int count_before;
+ int count_after;
struct itimerspec spec = {};
if (getuid() != 0)
ksft_exit_skip("Please run the test as root - Exiting.\n");
- power_state_fd = open("/sys/power/state", O_RDWR);
- if (power_state_fd < 0)
- ksft_exit_fail_msg(
- "open(\"/sys/power/state\") failed %s)\n",
- strerror(errno));
-
timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
if (timerfd < 0)
ksft_exit_fail_msg("timerfd_create() failed\n");
@@ -152,14 +172,15 @@ void suspend(void)
if (err < 0)
ksft_exit_fail_msg("timerfd_settime() failed\n");
+ count_before = get_suspend_success_count_or_fail();
+
system("(echo mem > /sys/power/state) 2> /dev/null");
- timerfd_gettime(timerfd, &spec);
- if (spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0)
+ count_after = get_suspend_success_count_or_fail();
+ if (count_after <= count_before)
ksft_exit_fail_msg("Failed to enter Suspend state\n");
close(timerfd);
- close(power_state_fd);
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/cachestat/.gitignore b/tools/testing/selftests/cachestat/.gitignore
index d6c30b43a4bb..abbb13b6e96b 100644
--- a/tools/testing/selftests/cachestat/.gitignore
+++ b/tools/testing/selftests/cachestat/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
test_cachestat
+tmpshmcstat
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index 632ab44737ec..542cd09cb443 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -16,7 +16,7 @@
#include <fcntl.h>
#include <errno.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define NR_TESTS 9
@@ -33,6 +33,11 @@ void print_cachestat(struct cachestat *cs)
cs->nr_evicted, cs->nr_recently_evicted);
}
+enum file_type {
+ FILE_MMAP,
+ FILE_SHMEM
+};
+
bool write_exactly(int fd, size_t filesize)
{
int random_fd = open("/dev/urandom", O_RDONLY);
@@ -201,38 +206,73 @@ out1:
out:
return ret;
}
+const char *file_type_str(enum file_type type)
+{
+ switch (type) {
+ case FILE_SHMEM:
+ return "shmem";
+ case FILE_MMAP:
+ return "mmap";
+ default:
+ return "unknown";
+ }
+}
-bool test_cachestat_shmem(void)
+
+bool run_cachestat_test(enum file_type type)
{
size_t PS = sysconf(_SC_PAGESIZE);
size_t filesize = PS * 512 * 2; /* 2 2MB huge pages */
int syscall_ret;
size_t compute_len = PS * 512;
struct cachestat_range cs_range = { PS, compute_len };
- char *filename = "tmpshmcstat";
+ char *filename = "tmpshmcstat", *map;
struct cachestat cs;
bool ret = true;
+ int fd;
unsigned long num_pages = compute_len / PS;
- int fd = shm_open(filename, O_CREAT | O_RDWR, 0600);
+ if (type == FILE_SHMEM)
+ fd = shm_open(filename, O_CREAT | O_RDWR, 0600);
+ else
+ fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0666);
if (fd < 0) {
- ksft_print_msg("Unable to create shmem file.\n");
+ ksft_print_msg("Unable to create %s file.\n",
+ file_type_str(type));
ret = false;
goto out;
}
if (ftruncate(fd, filesize)) {
- ksft_print_msg("Unable to truncate shmem file.\n");
+ ksft_print_msg("Unable to truncate %s file.\n",file_type_str(type));
ret = false;
goto close_fd;
}
+ switch (type) {
+ case FILE_SHMEM:
+ if (!write_exactly(fd, filesize)) {
+ ksft_print_msg("Unable to write to file.\n");
+ ret = false;
+ goto close_fd;
+ }
+ break;
+ case FILE_MMAP:
+ map = mmap(NULL, filesize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
- if (!write_exactly(fd, filesize)) {
- ksft_print_msg("Unable to write to shmem file.\n");
+ if (map == MAP_FAILED) {
+ ksft_print_msg("mmap failed.\n");
+ ret = false;
+ goto close_fd;
+ }
+ for (int i = 0; i < filesize; i++)
+ map[i] = 'A';
+ break;
+ default:
+ ksft_print_msg("Unsupported file type.\n");
ret = false;
goto close_fd;
}
-
syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
if (syscall_ret) {
@@ -308,12 +348,18 @@ int main(void)
break;
}
- if (test_cachestat_shmem())
+ if (run_cachestat_test(FILE_SHMEM))
ksft_test_result_pass("cachestat works with a shmem file\n");
else {
ksft_test_result_fail("cachestat fails with a shmem file\n");
ret = 1;
}
+ if (run_cachestat_test(FILE_MMAP))
+ ksft_test_result_pass("cachestat works with a mmap file\n");
+ else {
+ ksft_test_result_fail("cachestat fails with a mmap file\n");
+ ret = 1;
+ }
return ret;
}
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index 47bad7ddc5bc..46fc8d46b6e6 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -18,7 +18,7 @@
#include <sys/prctl.h>
#include <sys/stat.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static int nerrs;
static pid_t mpid; /* main() pid is used to avoid duplicate test counts */
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index 65f2a1c89239..cef1d9937b9f 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -7,7 +7,7 @@
#include <sys/prctl.h>
#include <sys/auxv.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
# define HAVE_GETAUXVAL
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 1b897152bab6..e01584c2189a 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -21,14 +21,15 @@ TEST_GEN_PROGS += test_zswap
LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
include ../lib.mk
+include lib/libcgroup.mk
-$(OUTPUT)/test_core: cgroup_util.c
-$(OUTPUT)/test_cpu: cgroup_util.c
-$(OUTPUT)/test_cpuset: cgroup_util.c
-$(OUTPUT)/test_freezer: cgroup_util.c
-$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c
-$(OUTPUT)/test_kill: cgroup_util.c
-$(OUTPUT)/test_kmem: cgroup_util.c
-$(OUTPUT)/test_memcontrol: cgroup_util.c
-$(OUTPUT)/test_pids: cgroup_util.c
-$(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/test_core: $(LIBCGROUP_O)
+$(OUTPUT)/test_cpu: $(LIBCGROUP_O)
+$(OUTPUT)/test_cpuset: $(LIBCGROUP_O)
+$(OUTPUT)/test_freezer: $(LIBCGROUP_O)
+$(OUTPUT)/test_hugetlb_memcg: $(LIBCGROUP_O)
+$(OUTPUT)/test_kill: $(LIBCGROUP_O)
+$(OUTPUT)/test_kmem: $(LIBCGROUP_O)
+$(OUTPUT)/test_memcontrol: $(LIBCGROUP_O)
+$(OUTPUT)/test_pids: $(LIBCGROUP_O)
+$(OUTPUT)/test_zswap: $(LIBCGROUP_O)
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 1e2d46636a0c..44c52f620fda 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -17,10 +17,12 @@
#include <unistd.h>
#include "cgroup_util.h"
-#include "../clone3/clone3_selftests.h"
+#include "../../clone3/clone3_selftests.h"
+
+bool cg_test_v1_named;
/* Returns read len on success, or -errno on failure. */
-static ssize_t read_text(const char *path, char *buf, size_t max_len)
+ssize_t read_text(const char *path, char *buf, size_t max_len)
{
ssize_t len;
int fd;
@@ -39,7 +41,7 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len)
}
/* Returns written len on success, or -errno on failure. */
-static ssize_t write_text(const char *path, char *buf, ssize_t len)
+ssize_t write_text(const char *path, char *buf, ssize_t len)
{
int fd;
@@ -217,7 +219,8 @@ int cg_write_numeric(const char *cgroup, const char *control, long value)
return cg_write(cgroup, control, buf);
}
-int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
+static int cg_find_root(char *root, size_t len, const char *controller,
+ bool *nsdelegate)
{
char buf[10 * PAGE_SIZE];
char *fs, *mount, *type, *options;
@@ -236,18 +239,37 @@ int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
options = strtok(NULL, delim);
strtok(NULL, delim);
strtok(NULL, delim);
-
- if (strcmp(type, "cgroup2") == 0) {
- strncpy(root, mount, len);
- if (nsdelegate)
- *nsdelegate = !!strstr(options, "nsdelegate");
- return 0;
+ if (strcmp(type, "cgroup") == 0) {
+ if (!controller || !strstr(options, controller))
+ continue;
+ } else if (strcmp(type, "cgroup2") == 0) {
+ if (controller &&
+ cg_read_strstr(mount, "cgroup.controllers", controller))
+ continue;
+ } else {
+ continue;
}
+ strncpy(root, mount, len);
+
+ if (nsdelegate)
+ *nsdelegate = !!strstr(options, "nsdelegate");
+ return 0;
+
}
return -1;
}
+int cg_find_controller_root(char *root, size_t len, const char *controller)
+{
+ return cg_find_root(root, len, controller, NULL);
+}
+
+int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
+{
+ return cg_find_root(root, len, NULL, nsdelegate);
+}
+
int cg_create(const char *cgroup)
{
return mkdir(cgroup, 0755);
@@ -341,7 +363,7 @@ int cg_enter_current(const char *cgroup)
int cg_enter_current_thread(const char *cgroup)
{
- return cg_write(cgroup, "cgroup.threads", "0");
+ return cg_write(cgroup, CG_THREADS_FILE, "0");
}
int cg_run(const char *cgroup,
@@ -488,94 +510,28 @@ int cg_run_nowait(const char *cgroup,
return pid;
}
-int get_temp_fd(void)
-{
- return open(".", O_TMPFILE | O_RDWR | O_EXCL);
-}
-
-int alloc_pagecache(int fd, size_t size)
-{
- char buf[PAGE_SIZE];
- struct stat st;
- int i;
-
- if (fstat(fd, &st))
- goto cleanup;
-
- size += st.st_size;
-
- if (ftruncate(fd, size))
- goto cleanup;
-
- for (i = 0; i < size; i += sizeof(buf))
- read(fd, buf, sizeof(buf));
-
- return 0;
-
-cleanup:
- return -1;
-}
-
-int alloc_anon(const char *cgroup, void *arg)
+int proc_mount_contains(const char *option)
{
- size_t size = (unsigned long)arg;
- char *buf, *ptr;
+ char buf[4 * PAGE_SIZE];
+ ssize_t read;
- buf = malloc(size);
- for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
- *ptr = 0;
+ read = read_text("/proc/mounts", buf, sizeof(buf));
+ if (read < 0)
+ return read;
- free(buf);
- return 0;
+ return strstr(buf, option) != NULL;
}
-int is_swap_enabled(void)
+int cgroup_feature(const char *feature)
{
char buf[PAGE_SIZE];
- const char delim[] = "\n";
- int cnt = 0;
- char *line;
-
- if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
- return -1;
-
- for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
- cnt++;
-
- return cnt > 1;
-}
-
-int set_oom_adj_score(int pid, int score)
-{
- char path[PATH_MAX];
- int fd, len;
-
- sprintf(path, "/proc/%d/oom_score_adj", pid);
-
- fd = open(path, O_WRONLY | O_APPEND);
- if (fd < 0)
- return fd;
-
- len = dprintf(fd, "%d", score);
- if (len < 0) {
- close(fd);
- return len;
- }
-
- close(fd);
- return 0;
-}
-
-int proc_mount_contains(const char *option)
-{
- char buf[4 * PAGE_SIZE];
ssize_t read;
- read = read_text("/proc/mounts", buf, sizeof(buf));
+ read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf));
if (read < 0)
return read;
- return strstr(buf, option) != NULL;
+ return strstr(buf, feature) != NULL;
}
ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index 19b131ee7707..7ab2824ed7b5 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -2,9 +2,9 @@
#include <stdbool.h>
#include <stdlib.h>
-#include "../kselftest.h"
-
+#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
+#endif
#define MB(x) (x << 20)
@@ -13,6 +13,10 @@
#define TEST_UID 65534 /* usually nobody, any !root is fine */
+#define CG_THREADS_FILE (!cg_test_v1_named ? "cgroup.threads" : "tasks")
+#define CG_NAMED_NAME "selftest"
+#define CG_PATH_FORMAT (!cg_test_v1_named ? "0::%s" : (":name=" CG_NAMED_NAME ":%s"))
+
/*
* Checks if two given values differ by less than err% of their sum.
*/
@@ -21,6 +25,30 @@ static inline int values_close(long a, long b, int err)
return labs(a - b) <= (a + b) / 100 * err;
}
+/*
+ * Checks if two given values differ by less than err% of their sum and assert
+ * with detailed debug info if not.
+ */
+static inline int values_close_report(long a, long b, int err)
+{
+ long diff = labs(a - b);
+ long limit = (a + b) / 100 * err;
+ double actual_err = (a + b) ? (100.0 * diff / (a + b)) : 0.0;
+ int close = diff <= limit;
+
+ if (!close)
+ fprintf(stderr,
+ "[FAIL] actual=%ld expected=%ld | diff=%ld | limit=%ld | "
+ "tolerance=%d%% | actual_error=%.2f%%\n",
+ a, b, diff, limit, err, actual_err);
+
+ return close;
+}
+
+extern ssize_t read_text(const char *path, char *buf, size_t max_len);
+extern ssize_t write_text(const char *path, char *buf, ssize_t len);
+
+extern int cg_find_controller_root(char *root, size_t len, const char *controller);
extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate);
extern char *cg_name(const char *root, const char *name);
extern char *cg_name_indexed(const char *root, const char *name, int index);
@@ -49,14 +77,10 @@ extern int cg_enter_current_thread(const char *cgroup);
extern int cg_run_nowait(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg);
-extern int get_temp_fd(void);
-extern int alloc_pagecache(int fd, size_t size);
-extern int alloc_anon(const char *cgroup, void *arg);
-extern int is_swap_enabled(void);
-extern int set_oom_adj_score(int pid, int score);
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
int proc_mount_contains(const char *option);
+int cgroup_feature(const char *feature);
extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
extern pid_t clone_into_cgroup(int cgroup_fd);
@@ -66,3 +90,4 @@ extern int dirfd_open_opath(const char *dir);
extern int cg_prepare_for_wait(const char *cgroup);
extern int memcg_prepare_for_wait(const char *cgroup);
extern int cg_wait_for(int fd);
+extern bool cg_test_v1_named;
diff --git a/tools/testing/selftests/cgroup/lib/libcgroup.mk b/tools/testing/selftests/cgroup/lib/libcgroup.mk
new file mode 100644
index 000000000000..7a73007204c3
--- /dev/null
+++ b/tools/testing/selftests/cgroup/lib/libcgroup.mk
@@ -0,0 +1,19 @@
+CGROUP_DIR := $(selfdir)/cgroup
+
+LIBCGROUP_C := lib/cgroup_util.c
+
+LIBCGROUP_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBCGROUP_C))
+
+LIBCGROUP_O_DIRS := $(shell dirname $(LIBCGROUP_O) | uniq)
+
+CFLAGS += -I$(CGROUP_DIR)/lib/include
+
+EXTRA_HDRS := $(selfdir)/clone3/clone3_selftests.h
+
+$(LIBCGROUP_O_DIRS):
+ mkdir -p $@
+
+$(LIBCGROUP_O): $(OUTPUT)/%.o : $(CGROUP_DIR)/%.c $(EXTRA_HDRS) $(LIBCGROUP_O_DIRS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBCGROUP_O)
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index a5672a91d273..102262555a59 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -5,6 +5,8 @@
#include <linux/sched.h>
#include <sys/types.h>
#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
@@ -15,10 +17,13 @@
#include <string.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static bool nsdelegate;
+#ifndef CLONE_NEWCGROUP
+#define CLONE_NEWCGROUP 0
+#endif
static int touch_anon(char *buf, size_t size)
{
@@ -148,6 +153,9 @@ static int test_cgcore_populated(const char *root)
int cgroup_fd = -EBADF;
pid_t pid;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
cg_test_a = cg_name(root, "cg_test_a");
cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
@@ -277,6 +285,9 @@ static int test_cgcore_invalid_domain(const char *root)
int ret = KSFT_FAIL;
char *grandparent = NULL, *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
grandparent = cg_name(root, "cg_test_grandparent");
parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
@@ -339,6 +350,9 @@ static int test_cgcore_parent_becomes_threaded(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -378,7 +392,8 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
- if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
+ if (cg_test_v1_named ||
+ cg_read_strstr(root, "cgroup.controllers", "cpu") ||
cg_write(root, "cgroup.subtree_control", "+cpu")) {
ret = KSFT_SKIP;
goto cleanup;
@@ -430,6 +445,9 @@ static int test_cgcore_top_down_constraint_enable(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -465,6 +483,9 @@ static int test_cgcore_top_down_constraint_disable(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -506,6 +527,9 @@ static int test_cgcore_internal_process_constraint(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -573,7 +597,7 @@ static int test_cgcore_proc_migration(const char *root)
}
cg_enter_current(dst);
- if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1)
+ if (cg_read_lc(dst, CG_THREADS_FILE) != n_threads + 1)
goto cleanup;
ret = KSFT_PASS;
@@ -605,7 +629,7 @@ static void *migrating_thread_fn(void *arg)
char lines[3][PATH_MAX];
for (g = 1; g < 3; ++g)
- snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0]));
+ snprintf(lines[g], sizeof(lines[g]), CG_PATH_FORMAT, grps[g] + strlen(grps[0]));
for (i = 0; i < n_iterations; ++i) {
cg_enter_current_thread(grps[(i % 2) + 1]);
@@ -642,10 +666,12 @@ static int test_cgcore_thread_migration(const char *root)
if (cg_create(grps[2]))
goto cleanup;
- if (cg_write(grps[1], "cgroup.type", "threaded"))
- goto cleanup;
- if (cg_write(grps[2], "cgroup.type", "threaded"))
- goto cleanup;
+ if (!cg_test_v1_named) {
+ if (cg_write(grps[1], "cgroup.type", "threaded"))
+ goto cleanup;
+ if (cg_write(grps[2], "cgroup.type", "threaded"))
+ goto cleanup;
+ }
if (cg_enter_current(grps[1]))
goto cleanup;
@@ -659,7 +685,7 @@ static int test_cgcore_thread_migration(const char *root)
if (retval)
goto cleanup;
- snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0]));
+ snprintf(line, sizeof(line), CG_PATH_FORMAT, grps[1] + strlen(grps[0]));
if (proc_read_strstr(0, 1, "cgroup", line))
goto cleanup;
@@ -842,6 +868,38 @@ cleanup:
return ret;
}
+static int setup_named_v1_root(char *root, size_t len, const char *name)
+{
+ char options[PATH_MAX];
+ int r;
+
+ r = snprintf(root, len, "/mnt/cg_selftest");
+ if (r < 0)
+ return r;
+
+ r = snprintf(options, sizeof(options), "none,name=%s", name);
+ if (r < 0)
+ return r;
+
+ r = mkdir(root, 0755);
+ if (r < 0 && errno != EEXIST)
+ return r;
+
+ r = mount("none", root, "cgroup", 0, options);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void cleanup_named_v1_root(char *root)
+{
+ if (!cg_test_v1_named)
+ return;
+ umount(root);
+ rmdir(root);
+}
+
#define T(x) { x, #x }
struct corecg_test {
int (*fn)(const char *root);
@@ -865,15 +923,22 @@ struct corecg_test {
int main(int argc, char *argv[])
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
-
- if (cg_find_unified_root(root, sizeof(root), &nsdelegate))
- ksft_exit_skip("cgroup v2 isn't mounted\n");
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) {
+ if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME))
+ ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n");
+ cg_test_v1_named = true;
+ goto post_v2_setup;
+ }
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
+post_v2_setup:
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
@@ -883,11 +948,11 @@ int main(int argc, char *argv[])
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ cleanup_named_v1_root(root);
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index a2b50af8e9ee..c83f05438d7c 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -2,6 +2,7 @@
#define _GNU_SOURCE
#include <linux/limits.h>
+#include <sys/param.h>
#include <sys/sysinfo.h>
#include <sys/wait.h>
#include <errno.h>
@@ -10,7 +11,7 @@
#include <time.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
enum hog_clock_type {
@@ -218,7 +219,7 @@ static int test_cpucg_stats(const char *root)
if (user_usec <= 0)
goto cleanup;
- if (!values_close(usage_usec, expected_usage_usec, 1))
+ if (!values_close_report(usage_usec, expected_usage_usec, 1))
goto cleanup;
ret = KSFT_PASS;
@@ -290,7 +291,7 @@ static int test_cpucg_nice(const char *root)
user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
- if (!values_close(nice_usec, expected_nice_usec, 1))
+ if (!values_close_report(nice_usec, expected_nice_usec, 1))
goto cleanup;
ret = KSFT_PASS;
@@ -403,7 +404,7 @@ overprovision_validate(const struct cpu_hogger *children, int num_children)
goto cleanup;
delta = children[i + 1].usage - children[i].usage;
- if (!values_close(delta, children[0].usage, 35))
+ if (!values_close_report(delta, children[0].usage, 35))
goto cleanup;
}
@@ -443,7 +444,7 @@ underprovision_validate(const struct cpu_hogger *children, int num_children)
int ret = KSFT_FAIL, i;
for (i = 0; i < num_children - 1; i++) {
- if (!values_close(children[i + 1].usage, children[0].usage, 15))
+ if (!values_close_report(children[i + 1].usage, children[0].usage, 15))
goto cleanup;
}
@@ -572,16 +573,16 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
nested_leaf_usage = leaf[1].usage + leaf[2].usage;
if (overprovisioned) {
- if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
+ if (!values_close_report(leaf[0].usage, nested_leaf_usage, 15))
goto cleanup;
- } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
+ } else if (!values_close_report(leaf[0].usage * 2, nested_leaf_usage, 15))
goto cleanup;
child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
if (child_usage <= 0)
goto cleanup;
- if (!values_close(child_usage, nested_leaf_usage, 1))
+ if (!values_close_report(child_usage, nested_leaf_usage, 1))
goto cleanup;
ret = KSFT_PASS;
@@ -645,10 +646,16 @@ test_cpucg_nested_weight_underprovisioned(const char *root)
static int test_cpucg_max(const char *root)
{
int ret = KSFT_FAIL;
- long usage_usec, user_usec;
- long usage_seconds = 1;
- long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+ long quota_usec = 1000;
+ long default_period_usec = 100000; /* cpu.max's default period */
+ long duration_seconds = 1;
+
+ long duration_usec = duration_seconds * USEC_PER_SEC;
+ long usage_usec, n_periods, remainder_usec, expected_usage_usec;
char *cpucg;
+ char quota_buf[32];
+
+ snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
cpucg = cg_name(root, "cpucg_test");
if (!cpucg)
@@ -657,13 +664,13 @@ static int test_cpucg_max(const char *root)
if (cg_create(cpucg))
goto cleanup;
- if (cg_write(cpucg, "cpu.max", "1000"))
+ if (cg_write(cpucg, "cpu.max", quota_buf))
goto cleanup;
struct cpu_hog_func_param param = {
.nprocs = 1,
.ts = {
- .tv_sec = usage_seconds,
+ .tv_sec = duration_seconds,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_WALL,
@@ -672,14 +679,19 @@ static int test_cpucg_max(const char *root)
goto cleanup;
usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
- user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
- if (user_usec <= 0)
+ if (usage_usec <= 0)
goto cleanup;
- if (user_usec >= expected_usage_usec)
- goto cleanup;
+ /*
+ * The following calculation applies only since
+ * the cpu hog is set to run as per wall-clock time
+ */
+ n_periods = duration_usec / default_period_usec;
+ remainder_usec = duration_usec - n_periods * default_period_usec;
+ expected_usage_usec
+ = n_periods * quota_usec + MIN(remainder_usec, quota_usec);
- if (values_close(usage_usec, expected_usage_usec, 95))
+ if (!values_close_report(usage_usec, expected_usage_usec, 10))
goto cleanup;
ret = KSFT_PASS;
@@ -698,10 +710,16 @@ cleanup:
static int test_cpucg_max_nested(const char *root)
{
int ret = KSFT_FAIL;
- long usage_usec, user_usec;
- long usage_seconds = 1;
- long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+ long quota_usec = 1000;
+ long default_period_usec = 100000; /* cpu.max's default period */
+ long duration_seconds = 1;
+
+ long duration_usec = duration_seconds * USEC_PER_SEC;
+ long usage_usec, n_periods, remainder_usec, expected_usage_usec;
char *parent, *child;
+ char quota_buf[32];
+
+ snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
parent = cg_name(root, "cpucg_parent");
child = cg_name(parent, "cpucg_child");
@@ -717,13 +735,13 @@ static int test_cpucg_max_nested(const char *root)
if (cg_create(child))
goto cleanup;
- if (cg_write(parent, "cpu.max", "1000"))
+ if (cg_write(parent, "cpu.max", quota_buf))
goto cleanup;
struct cpu_hog_func_param param = {
.nprocs = 1,
.ts = {
- .tv_sec = usage_seconds,
+ .tv_sec = duration_seconds,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_WALL,
@@ -732,14 +750,19 @@ static int test_cpucg_max_nested(const char *root)
goto cleanup;
usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
- user_usec = cg_read_key_long(child, "cpu.stat", "user_usec");
- if (user_usec <= 0)
+ if (usage_usec <= 0)
goto cleanup;
- if (user_usec >= expected_usage_usec)
- goto cleanup;
+ /*
+ * The following calculation applies only since
+ * the cpu hog is set to run as per wall-clock time
+ */
+ n_periods = duration_usec / default_period_usec;
+ remainder_usec = duration_usec - n_periods * default_period_usec;
+ expected_usage_usec
+ = n_periods * quota_usec + MIN(remainder_usec, quota_usec);
- if (values_close(usage_usec, expected_usage_usec, 95))
+ if (!values_close_report(usage_usec, expected_usage_usec, 10))
goto cleanup;
ret = KSFT_PASS;
@@ -773,8 +796,10 @@ struct cpucg_test {
int main(int argc, char *argv[])
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i;
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -791,11 +816,10 @@ int main(int argc, char *argv[])
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c
index 4034d14ba69a..c5cf8b56ceb8 100644
--- a/tools/testing/selftests/cgroup/test_cpuset.c
+++ b/tools/testing/selftests/cgroup/test_cpuset.c
@@ -3,7 +3,7 @@
#include <linux/limits.h>
#include <signal.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static int idle_process_fn(const char *cgroup, void *arg)
@@ -247,8 +247,10 @@ struct cpuset_test {
int main(int argc, char *argv[])
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i;
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -265,11 +267,10 @@ int main(int argc, char *argv[])
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 400a696a0d21..a17256d9f88a 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -88,22 +88,32 @@ echo "" > test/cpuset.cpus
# If isolated CPUs have been reserved at boot time (as shown in
# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
# that will be used by this script for testing purpose. If not, some of
-# the tests may fail incorrectly. These pre-isolated CPUs should stay in
-# an isolated state throughout the testing process for now.
+# the tests may fail incorrectly. Wait a bit and retry again just in case
+# these isolated CPUs are leftover from previous run and have just been
+# cleaned up earlier in this script.
+#
+# These pre-isolated CPUs should stay in an isolated state throughout the
+# testing process for now.
#
BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
+[[ -n "$BOOT_ISOLCPUS" ]] && {
+ sleep 0.5
+ BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
+}
if [[ -n "$BOOT_ISOLCPUS" ]]
then
[[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
fi
+
cleanup()
{
online_cpus
cd $CGROUP2
- rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
- rmdir test > /dev/null 2>&1
+ rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1
+ rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \
+ rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1
[[ -n "$SCHED_DEBUG" ]] &&
echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
}
@@ -173,14 +183,22 @@ test_add_proc()
#
# Cgroup test hierarchy
#
-# root -- A1 -- A2 -- A3
-# +- B1
+# root
+# |
+# +------+------+
+# | |
+# A1 B1
+# |
+# A2
+# |
+# A3
#
# P<v> = set cpus.partition (0:member, 1:root, 2:isolated)
# C<l> = add cpu-list to cpuset.cpus
# X<l> = add cpu-list to cpuset.cpus.exclusive
# S<p> = use prefix in subtree_control
# T = put a task into cgroup
+# CX<l> = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive
# O<c>=<v> = Write <v> to CPU online file of <c>
#
# ECPUs - effective CPUs of cpusets
@@ -207,130 +225,129 @@ TEST_MATRIX=(
" C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
" C0-1 . . C2-3:P1 . . . C2 0 "
" C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
- "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3"
- "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0"
- "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2"
- "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1"
- "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3"
+ "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0"
+ "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2"
+ "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1"
+ "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
# CPU offlining cases:
- " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3"
- "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3"
- "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3"
- "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
- "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
- "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1"
- "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1"
- "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
- "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
- "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
- "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+ " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3"
+ "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1"
+ "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
#
# Remote partition and cpuset.cpus.exclusive tests
#
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2"
" C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
- " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5"
- " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3"
- " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3"
# Nested remote/local partition tests
- " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \
- A1:P0,A2:P1,A3:P2,B1:P1 2-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \
- A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \
- A1:P0,A2:P1,A3:P0,B1:P1"
- " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \
- A1:P0,A2:P1,A3:P2,B1:P1 2-4,3"
- " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \
- A1:P0,A2:P2,A3:P1 2-4,2-3"
- " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \
- A1:P0,A2:P2,A3:P1 2"
+ " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \
+ A1:P0|A2:P1|A3:P0|B1:P1"
+ " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-4|3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \
+ A1:P0|A2:P2|A3:P1 2-4|2-3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \
+ A1:P0|A2:P2|A3:P1 2"
" C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
- . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \
- A1:P0,A2:P-2,A3:P-1"
+ . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \
+ A1:P0|A2:P-2|A3:P-1 ."
" C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
- . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \
- A1:P0,A2:P2,A3:P-1 2-4"
+ . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \
+ A1:P0|A2:P2|A3:P-1 2-4"
# Remote partition offline tests
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3"
- " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3,"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|"
# An invalidated remote partition cannot self-recover from hotplug
- " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2"
+ " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ."
# cpus.exclusive.effective clearing test
- " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:"
+ " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:"
# Invalid to valid remote partition transition test
- " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2"
+ " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ."
" C0-3:S+ C1-3:X3:P2
- . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3"
+ . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3"
# Invalid to valid local partition direct transition tests
- " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3"
- " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3"
- " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0"
- " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3"
- " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3"
+ " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3"
+ " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3"
+ " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0"
+ " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3"
# Local partition invalidation tests
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3"
+ . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3"
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
# Local partition CPU change tests
- " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2"
- " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3"
+ " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2"
+ " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3"
# cpus_allowed/exclusive_cpus update tests
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \
- A1:P0,A3:P-2"
+ . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \
+ A1:P0|A3:P-2 ."
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \
- A1:P0,A3:P-2"
+ . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \
+ A1:P0|A3:P-2 ."
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \
- A1:P0,A3:P2 3"
+ . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \
+ A1:P0|A3:P2 3"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
- . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \
- A1:P0,A3:P-2"
+ . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \
+ A1:P0|A3:P2 3"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
- . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \
- A1:P0,A3:P-2"
+ . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \
+ A1:P0|A3:P-2"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
@@ -339,68 +356,127 @@ TEST_MATRIX=(
#
# Adding CPUs to partition root that are not in parent's
# cpuset.cpus is allowed, but those extra CPUs are ignored.
- "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1"
# Taking away all CPUs from parent or itself if there are tasks
# will make the partition invalid.
- "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
- " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1"
- "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
- "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1"
+ " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
# Changing a partition root to member makes child partitions invalid
- "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1"
- "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
+ "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1"
+ "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1"
# cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
# as they overlap.
- "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
# Deletion of CPUs distributed to child cgroup is allowed.
- "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5"
+ "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5"
# To become a valid partition root, cpuset.cpus must overlap parent's
# cpuset.cpus.
- " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
+ " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1"
# Enabling partition with child cpusets is allowed
- " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1"
+ " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1"
- # A partition root with non-partition root parent is invalid, but it
+ # A partition root with non-partition root parent is invalid| but it
# can be made valid if its parent becomes a partition root too.
- " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2"
- " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2"
+ " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2"
+ " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1"
# A non-exclusive cpuset.cpus change will invalidate partition and its siblings
- " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
- " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
- " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
+ " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0"
+ " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1"
+ " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1"
# cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it
- " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5"
+ " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5"
# Child partition root that try to take all CPUs from parent partition
# with tasks will remain invalid.
- " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1"
- " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1"
- " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1"
+ " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
+ " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1"
+ " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
# Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't
# affect cpuset.cpus.exclusive.effective.
- " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3"
+ " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3"
+
+ # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive
+ # but creating a local partition out of it is not allowed. Similarly and change
+ # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will
+ # invalidate it.
+ " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \
+ A1:P0|A2:P2:B1:P1 2-4"
+ " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \
+ A1:P0|A2:P2:B1:P-1 2-4"
+ " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \
+ A1:P0|A2:P2:B1:P-1 2-4"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
# Failure cases:
# A task cannot be added to a partition with no cpu
- "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1"
# Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
- " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5"
+ " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5"
# cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive
- " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5"
+ " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5"
+)
+
+#
+# Cpuset controller remote partition test matrix.
+#
+# Cgroup test hierarchy
+#
+# root
+# |
+# rtest (cpuset.cpus.exclusive=1-7)
+# |
+# +------+------+
+# | |
+# p1 p2
+# +--+--+ +--+--+
+# | | | |
+# c11 c12 c21 c22
+#
+# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX.
+# Only CPUs 1-7 should be used.
+#
+REMOTE_TEST_MATRIX=(
+ # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22
+ # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS
+ # ------ ------ ------- ------- ------- ------- ----- ------ --------
+ " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \
+ . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \
+ c11:P2|c12:P2|c21:P2|c22:P2 1-6"
+ " CX1-4:S+ . X1-2:P2 C3 . . \
+ . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \
+ p1:P0|c11:P2|c12:P0 1-2"
+ " CX1-4:S+ . X1-2:P2 . . . \
+ X2-4 . . . . . p1:1,3-4|c11:2 \
+ p1:P0|c11:P2 2"
+ " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \
+ X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \
+ p1:P0|c11:P2|c12:P1 2"
+ " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ . . X2 . . . p1:1|c11:2|c12:3-4 \
+ p1:P0|c11:P2|c12:P1 2"
+ # p1 as member, will get its effective CPUs from its parent rtest
+ " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \
+ p1:P0|c11:P2|c12:P1 1"
+ " CX1-4:S+ X5-6:P1:S+ . . . . \
+ . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \
+ p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \
+ 1-2,4-6|1-2,5-6"
)
#
@@ -453,25 +529,26 @@ set_ctrl_state()
PFILE=$CGRP/cpuset.cpus.partition
CFILE=$CGRP/cpuset.cpus
XFILE=$CGRP/cpuset.cpus.exclusive
- S=$(expr substr $CMD 1 1)
- if [[ $S = S ]]
- then
- PREFIX=${CMD#?}
+ case $CMD in
+ S*) PREFIX=${CMD#?}
COMM="echo ${PREFIX}${CTRL} > $SFILE"
eval $COMM $REDIRECT
- elif [[ $S = X ]]
- then
+ ;;
+ X*)
CPUS=${CMD#?}
COMM="echo $CPUS > $XFILE"
eval $COMM $REDIRECT
- elif [[ $S = C ]]
- then
- CPUS=${CMD#?}
+ ;;
+ CX*)
+ CPUS=${CMD#??}
+ COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE"
+ eval $COMM $REDIRECT
+ ;;
+ C*) CPUS=${CMD#?}
COMM="echo $CPUS > $CFILE"
eval $COMM $REDIRECT
- elif [[ $S = P ]]
- then
- VAL=${CMD#?}
+ ;;
+ P*) VAL=${CMD#?}
case $VAL in
0) VAL=member
;;
@@ -486,15 +563,17 @@ set_ctrl_state()
esac
COMM="echo $VAL > $PFILE"
eval $COMM $REDIRECT
- elif [[ $S = O ]]
- then
- VAL=${CMD#?}
+ ;;
+ O*) VAL=${CMD#?}
write_cpu_online $VAL
- elif [[ $S = T ]]
- then
- COMM="echo 0 > $TFILE"
+ ;;
+ T*) COMM="echo 0 > $TFILE"
eval $COMM $REDIRECT
- fi
+ ;;
+ *) echo "Unknown command: $CMD"
+ exit 1
+ ;;
+ esac
RET=$?
[[ $RET -ne 0 ]] && {
[[ -n "$SHOWERR" ]] && {
@@ -532,21 +611,18 @@ online_cpus()
}
#
-# Return 1 if the list of effective cpus isn't the same as the initial list.
+# Remove all the test cgroup directories
#
reset_cgroup_states()
{
echo 0 > $CGROUP2/cgroup.procs
online_cpus
- rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
- pause 0.02
- set_ctrl_state . R-
- pause 0.01
+ rmdir $RESET_LIST > /dev/null 2>&1
}
dump_states()
{
- for DIR in . A1 A1/A2 A1/A2/A3 B1
+ for DIR in $CGROUP_LIST
do
CPUS=$DIR/cpuset.cpus
ECPUS=$DIR/cpuset.cpus.effective
@@ -566,17 +642,33 @@ dump_states()
}
#
+# Set the actual cgroup directory into $CGRP_DIR
+# $1 - cgroup name
+#
+set_cgroup_dir()
+{
+ CGRP_DIR=$1
+ [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2
+ [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3
+ [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11
+ [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12
+ [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21
+ [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22
+}
+
+#
# Check effective cpus
-# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]*
+# $1 - check string, format: <cgroup>:<cpu-list>[|<cgroup>:<cpu-list>]*
#
check_effective_cpus()
{
CHK_STR=$1
- for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ for CHK in $(echo $CHK_STR | sed -e "s/|/ /g")
do
set -- $(echo $CHK | sed -e "s/:/ /g")
CGRP=$1
- CPUS=$2
+ EXPECTED_CPUS=$2
+ ACTUAL_CPUS=
if [[ $CGRP = X* ]]
then
CGRP=${CGRP#X}
@@ -584,41 +676,39 @@ check_effective_cpus()
else
FILE=cpuset.cpus.effective
fi
- [[ $CGRP = A2 ]] && CGRP=A1/A2
- [[ $CGRP = A3 ]] && CGRP=A1/A2/A3
- [[ -e $CGRP/$FILE ]] || return 1
- [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1
+ set_cgroup_dir $CGRP
+ [[ -e $CGRP_DIR/$FILE ]] || return 1
+ ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE)
+ [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1
done
}
#
# Check cgroup states
-# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]*
+# $1 - check string, format: <cgroup>:<state>[|<cgroup>:<state>]*
#
check_cgroup_states()
{
CHK_STR=$1
- for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ for CHK in $(echo $CHK_STR | sed -e "s/|/ /g")
do
set -- $(echo $CHK | sed -e "s/:/ /g")
CGRP=$1
- CGRP_DIR=$CGRP
- STATE=$2
+ EXPECTED_STATE=$2
FILE=
- EVAL=$(expr substr $STATE 2 2)
- [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2
- [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3
+ EVAL=$(expr substr $EXPECTED_STATE 2 2)
- case $STATE in
+ set_cgroup_dir $CGRP
+ case $EXPECTED_STATE in
P*) FILE=$CGRP_DIR/cpuset.cpus.partition
;;
- *) echo "Unknown state: $STATE!"
+ *) echo "Unknown state: $EXPECTED_STATE!"
exit 1
;;
esac
- VAL=$(cat $FILE)
+ ACTUAL_STATE=$(cat $FILE)
- case "$VAL" in
+ case "$ACTUAL_STATE" in
member) VAL=0
;;
root) VAL=1
@@ -642,7 +732,7 @@ check_cgroup_states()
[[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && {
DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective)
[[ -n "$DOMS" ]] &&
- echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE
+ echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE
}
done
return 0
@@ -665,22 +755,22 @@ check_cgroup_states()
#
check_isolcpus()
{
- EXPECT_VAL=$1
- ISOLCPUS=
+ EXPECTED_ISOLCPUS=$1
+ ISCPUS=${CGROUP2}/cpuset.cpus.isolated
+ ISOLCPUS=$(cat $ISCPUS)
LASTISOLCPU=
SCHED_DOMAINS=/sys/kernel/debug/sched/domains
- ISCPUS=${CGROUP2}/cpuset.cpus.isolated
- if [[ $EXPECT_VAL = . ]]
+ if [[ $EXPECTED_ISOLCPUS = . ]]
then
- EXPECT_VAL=
- EXPECT_VAL2=
- elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]]
+ EXPECTED_ISOLCPUS=
+ EXPECTED_SDOMAIN=
+ elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]]
then
- set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g")
- EXPECT_VAL=$1
- EXPECT_VAL2=$2
+ set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g")
+ EXPECTED_ISOLCPUS=$2
+ EXPECTED_SDOMAIN=$1
else
- EXPECT_VAL2=$EXPECT_VAL
+ EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS
fi
#
@@ -689,20 +779,21 @@ check_isolcpus()
# to make appending those CPUs easier.
#
[[ -n "$BOOT_ISOLCPUS" ]] && {
- EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS}
- EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS}
+ EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS}
+ EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS}
}
#
# Check cpuset.cpus.isolated cpumask
#
- [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
+ [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && {
# Take a 50ms pause and try again
pause 0.05
ISOLCPUS=$(cat $ISCPUS)
}
- [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1
+ [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1
ISOLCPUS=
+ EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN
#
# Use the sched domain in debugfs to check isolated CPUs, if available
@@ -736,7 +827,7 @@ check_isolcpus()
done
[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
- [[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
+ [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]]
}
test_fail()
@@ -774,6 +865,63 @@ null_isolcpus_check()
}
#
+# Check state transition test result
+# $1 - Test number
+# $2 - Expected effective CPU values
+# $3 - Expected partition states
+# $4 - Expected isolated CPUs
+#
+check_test_results()
+{
+ _NR=$1
+ _ECPUS="$2"
+ _PSTATES="$3"
+ _ISOLCPUS="$4"
+
+ [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && {
+ check_effective_cpus $_ECPUS
+ [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \
+ "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS"
+ }
+
+ [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && {
+ check_cgroup_states $_PSTATES
+ [[ $? -ne 0 ]] && test_fail $_NR states \
+ "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE"
+ }
+
+ # Compare the expected isolated CPUs with the actual ones,
+ # if available
+ [[ -n "$_ISOLCPUS" ]] && {
+ check_isolcpus $_ISOLCPUS
+ [[ $? -ne 0 ]] && {
+ [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS}
+ test_fail $_NR "isolated CPU" \
+ "Expect $_ISOLCPUS, get $ISOLCPUS instead"
+ }
+ }
+ reset_cgroup_states
+ #
+ # Check to see if effective cpu list changes
+ #
+ _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective)
+ RETRY=0
+ while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]]
+ do
+ # Wait a bit longer & recheck a few times
+ pause 0.02
+ ((RETRY++))
+ _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective)
+ done
+ [[ $_NEWLIST != $CPULIST ]] && {
+ echo "Effective cpus changed to $_NEWLIST after test $_NR!"
+ exit 1
+ }
+ null_isolcpus_check
+ [[ $VERBOSE -gt 0 ]] && echo "Test $I done."
+}
+
+#
# Run cpuset state transition test
# $1 - test matrix name
#
@@ -785,6 +933,8 @@ run_state_test()
{
TEST=$1
CONTROLLER=cpuset
+ CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1"
+ RESET_LIST="A1/A2/A3 A1/A2 A1 B1"
I=0
eval CNT="\${#$TEST[@]}"
@@ -812,10 +962,11 @@ run_state_test()
STATES=${11}
ICPUS=${12}
- set_ctrl_state_noerr B1 $OLD_B1
set_ctrl_state_noerr A1 $OLD_A1
set_ctrl_state_noerr A1/A2 $OLD_A2
set_ctrl_state_noerr A1/A2/A3 $OLD_A3
+ set_ctrl_state_noerr B1 $OLD_B1
+
RETVAL=0
set_ctrl_state A1 $NEW_A1; ((RETVAL += $?))
set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?))
@@ -824,47 +975,79 @@ run_state_test()
[[ $RETVAL -ne $RESULT ]] && test_fail $I result
- [[ -n "$ECPUS" && "$ECPUS" != . ]] && {
- check_effective_cpus $ECPUS
- [[ $? -ne 0 ]] && test_fail $I "effective CPU"
- }
+ check_test_results $I "$ECPUS" "$STATES" "$ICPUS"
+ ((I++))
+ done
+ echo "All $I tests of $TEST PASSED."
+}
- [[ -n "$STATES" && "$STATES" != . ]] && {
- check_cgroup_states $STATES
- [[ $? -ne 0 ]] && test_fail $I states
- }
+#
+# Run cpuset remote partition state transition test
+# $1 - test matrix name
+#
+run_remote_state_test()
+{
+ TEST=$1
+ CONTROLLER=cpuset
+ [[ -d rtest ]] || mkdir rtest
+ cd rtest
+ echo +cpuset > cgroup.subtree_control
+ echo "1-7" > cpuset.cpus
+ echo "1-7" > cpuset.cpus.exclusive
+ CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22"
+ RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2"
+ I=0
+ eval CNT="\${#$TEST[@]}"
- # Compare the expected isolated CPUs with the actual ones,
- # if available
- [[ -n "$ICPUS" ]] && {
- check_isolcpus $ICPUS
- [[ $? -ne 0 ]] && {
- [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS}
- test_fail $I "isolated CPU" \
- "Expect $ICPUS, get $ISOLCPUS instead"
- }
- }
- reset_cgroup_states
- #
- # Check to see if effective cpu list changes
- #
- NEWLIST=$(cat cpuset.cpus.effective)
- RETRY=0
- while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]]
- do
- # Wait a bit longer & recheck a few times
- pause 0.02
- ((RETRY++))
- NEWLIST=$(cat cpuset.cpus.effective)
- done
- [[ $NEWLIST != $CPULIST ]] && {
- echo "Effective cpus changed to $NEWLIST after test $I!"
- exit 1
+ reset_cgroup_states
+ console_msg "Running remote partition state transition test ..."
+
+ while [[ $I -lt $CNT ]]
+ do
+ echo "Running test $I ..." > $CONSOLE
+ [[ $VERBOSE -gt 1 ]] && {
+ echo ""
+ eval echo \${$TEST[$I]}
}
- null_isolcpus_check
- [[ $VERBOSE -gt 0 ]] && echo "Test $I done."
+ eval set -- "\${$TEST[$I]}"
+ OLD_p1=$1
+ OLD_p2=$2
+ OLD_c11=$3
+ OLD_c12=$4
+ OLD_c21=$5
+ OLD_c22=$6
+ NEW_p1=$7
+ NEW_p2=$8
+ NEW_c11=$9
+ NEW_c12=${10}
+ NEW_c21=${11}
+ NEW_c22=${12}
+ ECPUS=${13}
+ STATES=${14}
+ ICPUS=${15}
+
+ set_ctrl_state_noerr p1 $OLD_p1
+ set_ctrl_state_noerr p2 $OLD_p2
+ set_ctrl_state_noerr p1/c11 $OLD_c11
+ set_ctrl_state_noerr p1/c12 $OLD_c12
+ set_ctrl_state_noerr p2/c21 $OLD_c21
+ set_ctrl_state_noerr p2/c22 $OLD_c22
+
+ RETVAL=0
+ set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?))
+ set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?))
+ set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?))
+ set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?))
+ set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?))
+ set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?))
+
+ [[ $RETVAL -ne 0 ]] && test_fail $I result
+
+ check_test_results $I "$ECPUS" "$STATES" "$ICPUS"
((I++))
done
+ cd ..
+ rmdir rtest
echo "All $I tests of $TEST PASSED."
}
@@ -932,6 +1115,7 @@ test_isolated()
echo $$ > $CGROUP2/cgroup.procs
[[ -d A1 ]] && rmdir A1
null_isolcpus_check
+ pause 0.05
}
#
@@ -997,10 +1181,13 @@ test_inotify()
else
echo "Inotify test PASSED"
fi
+ echo member > cpuset.cpus.partition
+ echo "" > cpuset.cpus
}
trap cleanup 0 2 3 6
run_state_test TEST_MATRIX
+run_remote_state_test REMOTE_TEST_MATRIX
test_isolated
test_inotify
echo "All tests PASSED."
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 8730645d363a..97fae92c8387 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -11,7 +11,7 @@
#include <string.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
#define DEBUG
@@ -804,6 +804,662 @@ cleanup:
return ret;
}
+/*
+ * Get the current frozen_usec for the cgroup.
+ */
+static long cg_check_freezetime(const char *cgroup)
+{
+ return cg_read_key_long(cgroup, "cgroup.stat.local",
+ "frozen_usec ");
+}
+
+/*
+ * Test that the freeze time will behave as expected for an empty cgroup.
+ */
+static int test_cgfreezer_time_empty(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ long prev, curr;
+
+ cgroup = cg_name(root, "cg_time_test_empty");
+ if (!cgroup)
+ goto cleanup;
+
+ /*
+ * 1) Create an empty cgroup and check that its freeze time
+ * is 0.
+ */
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+ if (curr > 0) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+
+ /*
+ * 2) Sleep for 1000 us. Check that the freeze time is at
+ * least 1000 us.
+ */
+ usleep(1000);
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 1000) {
+ debug("Expect time (%ld) to be at least 1000 us\n",
+ curr);
+ goto cleanup;
+ }
+
+ /*
+ * 3) Unfreeze the cgroup. Check that the freeze time is
+ * larger than at 2).
+ */
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 4) Check the freeze time again to ensure that it has not
+ * changed.
+ */
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be unchanged from previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * A simple test for cgroup freezer time accounting. This test follows
+ * the same flow as test_cgfreezer_time_empty, but with a single process
+ * in the cgroup.
+ */
+static int test_cgfreezer_time_simple(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ long prev, curr;
+
+ cgroup = cg_name(root, "cg_time_test_simple");
+ if (!cgroup)
+ goto cleanup;
+
+ /*
+ * 1) Create a cgroup and check that its freeze time is 0.
+ */
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+ if (curr > 0) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 2) Populate the cgroup with one child and check that the
+ * freeze time is still 0.
+ */
+ cg_run_nowait(cgroup, child_fn, NULL);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr > prev) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+
+ /*
+ * 3) Sleep for 1000 us. Check that the freeze time is at
+ * least 1000 us.
+ */
+ usleep(1000);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 1000) {
+ debug("Expect time (%ld) to be at least 1000 us\n",
+ curr);
+ goto cleanup;
+ }
+
+ /*
+ * 4) Unfreeze the cgroup. Check that the freeze time is
+ * larger than at 3).
+ */
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 5) Sleep for 1000 us. Check that the freeze time is the
+ * same as at 4).
+ */
+ usleep(1000);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be unchanged from previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * Test that freezer time accounting works as expected, even while we're
+ * populating a cgroup with processes.
+ */
+static int test_cgfreezer_time_populate(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ long prev, curr;
+ int i;
+
+ cgroup = cg_name(root, "cg_time_test_populate");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+ if (curr > 0) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 1) Populate the cgroup with 100 processes. Check that
+ * the freeze time is 0.
+ */
+ for (i = 0; i < 100; i++)
+ cg_run_nowait(cgroup, child_fn, NULL);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 2) Wait for the group to become fully populated. Check
+ * that the freeze time is 0.
+ */
+ if (cg_wait_for_proc_count(cgroup, 100))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 3) Freeze the cgroup and then populate it with 100 more
+ * processes. Check that the freeze time continues to grow.
+ */
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ for (i = 0; i < 100; i++)
+ cg_run_nowait(cgroup, child_fn, NULL);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 4) Wait for the group to become fully populated. Check
+ * that the freeze time is larger than at 3).
+ */
+ if (cg_wait_for_proc_count(cgroup, 200))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 5) Unfreeze the cgroup. Check that the freeze time is
+ * larger than at 4).
+ */
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 6) Kill the processes. Check that the freeze time is the
+ * same as it was at 5).
+ */
+ if (cg_killall(cgroup))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be unchanged from previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 7) Freeze and unfreeze the cgroup. Check that the freeze
+ * time is larger than it was at 6).
+ */
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * Test that frozen time for a cgroup continues to work as expected,
+ * even as processes are migrated. Frozen cgroup A's freeze time should
+ * continue to increase and running cgroup B's should stay 0.
+ */
+static int test_cgfreezer_time_migrate(const char *root)
+{
+ long prev_A, curr_A, curr_B;
+ char *cgroup[2] = {0};
+ int ret = KSFT_FAIL;
+ int pid;
+
+ cgroup[0] = cg_name(root, "cg_time_test_migrate_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(root, "cg_time_test_migrate_B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ if (cg_create(cgroup[0]))
+ goto cleanup;
+
+ if (cg_check_freezetime(cgroup[0]) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_create(cgroup[1]))
+ goto cleanup;
+
+ pid = cg_run_nowait(cgroup[0], child_fn, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ if (cg_wait_for_proc_count(cgroup[0], 1))
+ goto cleanup;
+
+ curr_A = cg_check_freezetime(cgroup[0]);
+ if (curr_A) {
+ debug("Expect time (%ld) to be 0\n", curr_A);
+ goto cleanup;
+ }
+ curr_B = cg_check_freezetime(cgroup[1]);
+ if (curr_B) {
+ debug("Expect time (%ld) to be 0\n", curr_B);
+ goto cleanup;
+ }
+
+ /*
+ * Freeze cgroup A.
+ */
+ if (cg_freeze_wait(cgroup[0], true))
+ goto cleanup;
+ prev_A = curr_A;
+ curr_A = cg_check_freezetime(cgroup[0]);
+ if (curr_A <= prev_A) {
+ debug("Expect time (%ld) to be > 0\n", curr_A);
+ goto cleanup;
+ }
+
+ /*
+ * Migrate from A (frozen) to B (running).
+ */
+ if (cg_enter(cgroup[1], pid))
+ goto cleanup;
+
+ usleep(1000);
+ curr_B = cg_check_freezetime(cgroup[1]);
+ if (curr_B) {
+ debug("Expect time (%ld) to be 0\n", curr_B);
+ goto cleanup;
+ }
+
+ prev_A = curr_A;
+ curr_A = cg_check_freezetime(cgroup[0]);
+ if (curr_A <= prev_A) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr_A, prev_A);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup[0])
+ cg_destroy(cgroup[0]);
+ free(cgroup[0]);
+ if (cgroup[1])
+ cg_destroy(cgroup[1]);
+ free(cgroup[1]);
+ return ret;
+}
+
+/*
+ * The test creates a cgroup and freezes it. Then it creates a child cgroup.
+ * After that it checks that the child cgroup has a non-zero freeze time
+ * that is less than the parent's. Next, it freezes the child, unfreezes
+ * the parent, and sleeps. Finally, it checks that the child's freeze
+ * time has grown larger than the parent's.
+ */
+static int test_cgfreezer_time_parent(const char *root)
+{
+ char *parent, *child = NULL;
+ int ret = KSFT_FAIL;
+ long ptime, ctime;
+
+ parent = cg_name(root, "cg_test_parent_A");
+ if (!parent)
+ goto cleanup;
+
+ child = cg_name(parent, "cg_test_parent_B");
+ if (!child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_check_freezetime(parent) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_freeze_wait(parent, true))
+ goto cleanup;
+
+ usleep(1000);
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_check_frozen(child, true))
+ goto cleanup;
+
+ /*
+ * Since the parent was frozen the entire time the child cgroup
+ * was being created, we expect the parent's freeze time to be
+ * larger than the child's.
+ *
+ * Ideally, we would be able to check both times simultaneously,
+ * but here we get the child's after we get the parent's.
+ */
+ ptime = cg_check_freezetime(parent);
+ ctime = cg_check_freezetime(child);
+ if (ptime <= ctime) {
+ debug("Expect ptime (%ld) > ctime (%ld)\n", ptime, ctime);
+ goto cleanup;
+ }
+
+ if (cg_freeze_nowait(child, true))
+ goto cleanup;
+
+ if (cg_freeze_wait(parent, false))
+ goto cleanup;
+
+ if (cg_check_frozen(child, true))
+ goto cleanup;
+
+ usleep(100000);
+
+ ctime = cg_check_freezetime(child);
+ ptime = cg_check_freezetime(parent);
+
+ if (ctime <= ptime) {
+ debug("Expect ctime (%ld) > ptime (%ld)\n", ctime, ptime);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ free(child);
+ if (parent)
+ cg_destroy(parent);
+ free(parent);
+ return ret;
+}
+
+/*
+ * The test creates a parent cgroup and a child cgroup. Then, it freezes
+ * the child and checks that the child's freeze time is greater than the
+ * parent's, which should be zero.
+ */
+static int test_cgfreezer_time_child(const char *root)
+{
+ char *parent, *child = NULL;
+ int ret = KSFT_FAIL;
+ long ptime, ctime;
+
+ parent = cg_name(root, "cg_test_child_A");
+ if (!parent)
+ goto cleanup;
+
+ child = cg_name(parent, "cg_test_child_B");
+ if (!child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_check_freezetime(parent) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_freeze_wait(child, true))
+ goto cleanup;
+
+ ctime = cg_check_freezetime(child);
+ ptime = cg_check_freezetime(parent);
+ if (ptime != 0) {
+ debug("Expect ptime (%ld) to be 0\n", ptime);
+ goto cleanup;
+ }
+
+ if (ctime <= ptime) {
+ debug("Expect ctime (%ld) <= ptime (%ld)\n", ctime, ptime);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ free(child);
+ if (parent)
+ cg_destroy(parent);
+ free(parent);
+ return ret;
+}
+
+/*
+ * The test creates the following hierarchy:
+ * A
+ * |
+ * B
+ * |
+ * C
+ *
+ * Then it freezes the cgroups in the order C, B, A.
+ * Then it unfreezes the cgroups in the order A, B, C.
+ * Then it checks that C's freeze time is larger than B's and
+ * that B's is larger than A's.
+ */
+static int test_cgfreezer_time_nested(const char *root)
+{
+ char *cgroup[3] = {0};
+ int ret = KSFT_FAIL;
+ long time[3] = {0};
+ int i;
+
+ cgroup[0] = cg_name(root, "cg_test_time_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(cgroup[0], "B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ cgroup[2] = cg_name(cgroup[1], "C");
+ if (!cgroup[2])
+ goto cleanup;
+
+ if (cg_create(cgroup[0]))
+ goto cleanup;
+
+ if (cg_check_freezetime(cgroup[0]) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_create(cgroup[1]))
+ goto cleanup;
+
+ if (cg_create(cgroup[2]))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[2], true))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[1], true))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[0], true))
+ goto cleanup;
+
+ usleep(1000);
+
+ if (cg_freeze_nowait(cgroup[0], false))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[1], false))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[2], false))
+ goto cleanup;
+
+ time[2] = cg_check_freezetime(cgroup[2]);
+ time[1] = cg_check_freezetime(cgroup[1]);
+ time[0] = cg_check_freezetime(cgroup[0]);
+
+ if (time[2] <= time[1]) {
+ debug("Expect C's time (%ld) > B's time (%ld)", time[2], time[1]);
+ goto cleanup;
+ }
+
+ if (time[1] <= time[0]) {
+ debug("Expect B's time (%ld) > A's time (%ld)", time[1], time[0]);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 2; i >= 0 && cgroup[i]; i--) {
+ cg_destroy(cgroup[i]);
+ free(cgroup[i]);
+ }
+
+ return ret;
+}
+
#define T(x) { x, #x }
struct cgfreezer_test {
int (*fn)(const char *root);
@@ -819,14 +1475,23 @@ struct cgfreezer_test {
T(test_cgfreezer_stopped),
T(test_cgfreezer_ptraced),
T(test_cgfreezer_vfork),
+ T(test_cgfreezer_time_empty),
+ T(test_cgfreezer_time_simple),
+ T(test_cgfreezer_time_populate),
+ T(test_cgfreezer_time_migrate),
+ T(test_cgfreezer_time_parent),
+ T(test_cgfreezer_time_child),
+ T(test_cgfreezer_time_nested),
};
#undef T
int main(int argc, char *argv[])
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i;
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
@@ -838,11 +1503,10 @@ int main(int argc, char *argv[])
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
index 856f9508ea56..f451aa449be6 100644
--- a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
+++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
@@ -7,7 +7,7 @@
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
#define ADDR ((void *)(0x0UL))
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c
index 0e5bb6c7307a..c8c9d306925b 100644
--- a/tools/testing/selftests/cgroup/test_kill.c
+++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -9,7 +9,7 @@
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "../pidfd/pidfd.h"
#include "cgroup_util.h"
@@ -274,8 +274,10 @@ struct cgkill_test {
int main(int argc, char *argv[])
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i;
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
@@ -287,11 +289,10 @@ int main(int argc, char *argv[])
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 96693d8772be..ca38525484e3 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -14,7 +14,7 @@
#include <sys/sysinfo.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
@@ -308,6 +308,7 @@ static int test_kmem_dead_cgroups(const char *root)
char *parent;
long dead;
int i;
+ int max_time = 20;
parent = cg_name(root, "kmem_dead_cgroups_test");
if (!parent)
@@ -322,7 +323,7 @@ static int test_kmem_dead_cgroups(const char *root)
if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
goto cleanup;
- for (i = 0; i < 5; i++) {
+ for (i = 0; i < max_time; i++) {
dead = cg_read_key_long(parent, "cgroup.stat",
"nr_dying_descendants ");
if (dead == 0) {
@@ -334,6 +335,8 @@ static int test_kmem_dead_cgroups(const char *root)
* let's wait a bit and repeat.
*/
sleep(1);
+ if (i > 5)
+ printf("Waiting time longer than 5s; wait: %ds (dead: %ld)\n", i, dead);
}
cleanup:
@@ -418,8 +421,10 @@ struct kmem_test {
int main(int argc, char **argv)
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i;
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -443,11 +448,10 @@ int main(int argc, char **argv)
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 16f5d74ae762..4e1647568c5b 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -18,12 +18,90 @@
#include <errno.h>
#include <sys/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static bool has_localevents;
static bool has_recursiveprot;
+int get_temp_fd(void)
+{
+ return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+ char buf[PAGE_SIZE];
+ struct stat st;
+ int i;
+
+ if (fstat(fd, &st))
+ goto cleanup;
+
+ size += st.st_size;
+
+ if (ftruncate(fd, size))
+ goto cleanup;
+
+ for (i = 0; i < size; i += sizeof(buf))
+ read(fd, buf, sizeof(buf));
+
+ return 0;
+
+cleanup:
+ return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ free(buf);
+ return 0;
+}
+
+int is_swap_enabled(void)
+{
+ char buf[PAGE_SIZE];
+ const char delim[] = "\n";
+ int cnt = 0;
+ char *line;
+
+ if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+ cnt++;
+
+ return cnt > 1;
+}
+
+int set_oom_adj_score(int pid, int score)
+{
+ char path[PATH_MAX];
+ int fd, len;
+
+ sprintf(path, "/proc/%d/oom_score_adj", pid);
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = dprintf(fd, "%d", score);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+ return 0;
+}
+
/*
* This test creates two nested cgroups with and without enabling
* the memory controller.
@@ -380,10 +458,11 @@ static bool reclaim_until(const char *memcg, long goal);
*
* Then it checks actual memory usages and expects that:
* A/B memory.current ~= 50M
- * A/B/C memory.current ~= 29M
- * A/B/D memory.current ~= 21M
- * A/B/E memory.current ~= 0
- * A/B/F memory.current = 0
+ * A/B/C memory.current ~= 29M [memory.events:low > 0]
+ * A/B/D memory.current ~= 21M [memory.events:low > 0]
+ * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot,
+ * undefined otherwise]
+ * A/B/F memory.current = 0 [memory.events:low == 0]
* (for origin of the numbers, see model in memcg_protection.m.)
*
* After that it tries to allocate more than there is
@@ -495,10 +574,10 @@ static int test_memcg_protection(const char *root, bool min)
for (i = 0; i < ARRAY_SIZE(children); i++)
c[i] = cg_read_long(children[i], "memory.current");
- if (!values_close(c[0], MB(29), 10))
+ if (!values_close(c[0], MB(29), 15))
goto cleanup;
- if (!values_close(c[1], MB(21), 10))
+ if (!values_close(c[1], MB(21), 20))
goto cleanup;
if (c[3] != 0)
@@ -525,7 +604,14 @@ static int test_memcg_protection(const char *root, bool min)
goto cleanup;
}
+ /*
+ * Child 2 has memory.low=0, but some low protection may still be
+ * distributed down from its parent with memory.low=50M if cgroup2
+ * memory_recursiveprot mount option is enabled. Ignore the low
+ * event count in this case.
+ */
for (i = 0; i < ARRAY_SIZE(children); i++) {
+ int ignore_low_events_index = has_recursiveprot ? 2 : -1;
int no_low_events_index = 1;
long low, oom;
@@ -534,6 +620,8 @@ static int test_memcg_protection(const char *root, bool min)
if (oom)
goto cleanup;
+ if (i == ignore_low_events_index)
+ continue;
if (i <= no_low_events_index && low <= 0)
goto cleanup;
if (i > no_low_events_index && low)
@@ -1562,8 +1650,10 @@ struct memcg_test {
int main(int argc, char **argv)
{
char root[PATH_MAX];
- int i, proc_status, ret = EXIT_SUCCESS;
+ int i, proc_status;
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -1597,11 +1687,10 @@ int main(int argc, char **argv)
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c
index 9ecb83c6cc5c..9a387c815d2c 100644
--- a/tools/testing/selftests/cgroup/test_pids.c
+++ b/tools/testing/selftests/cgroup/test_pids.c
@@ -9,7 +9,7 @@
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static int run_success(const char *cgroup, void *arg)
@@ -77,6 +77,9 @@ static int test_pids_events(const char *root)
char *cg_parent = NULL, *cg_child = NULL;
int pid;
+ if (cgroup_feature("pids_localevents") <= 0)
+ return KSFT_SKIP;
+
cg_parent = cg_name(root, "pids_parent");
cg_child = cg_name(cg_parent, "pids_child");
if (!cg_parent || !cg_child)
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index 40de679248b8..64ebc3f3f203 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -10,7 +10,7 @@
#include <sys/wait.h>
#include <sys/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "cgroup_util.h"
static int read_int(const char *path, size_t *value)
@@ -338,7 +338,7 @@ static int test_zswap_writeback_one(const char *cgroup, bool wb)
return -1;
if (wb != !!zswpwb_after) {
- ksft_print_msg("zswpwb_after is %ld while wb is %s",
+ ksft_print_msg("zswpwb_after is %ld while wb is %s\n",
zswpwb_after, wb ? "enabled" : "disabled");
return -1;
}
@@ -597,8 +597,10 @@ static bool zswap_configured(void)
int main(int argc, char **argv)
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i;
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -625,11 +627,10 @@ int main(int argc, char **argv)
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
- ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index e61f07973ce5..289e0c7c1f09 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -18,7 +18,7 @@
#include <unistd.h>
#include <sched.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "clone3_selftests.h"
enum test_mode {
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 3c196fa86c99..e82281efa273 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -24,7 +24,7 @@
#include <unistd.h>
#include <sched.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "clone3_selftests.h"
static void child_exit(int ret)
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index ce0426786828..de0c9d62015d 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -13,7 +13,7 @@
#include <sys/syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "clone3_selftests.h"
static void nop_handler(int signo)
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index 3d2663fe50ba..a0593e8950f0 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -11,12 +11,12 @@
#include <syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
#ifndef __NR_clone3
-#define __NR_clone3 -1
+#define __NR_clone3 435
#endif
struct __clone_args {
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index bfb0da2b4fdd..5c944aee6b41 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -20,7 +20,7 @@
#include <unistd.h>
#include <sched.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "clone3_selftests.h"
#define MAX_PID_NS_LEVEL 32
diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
index 4a825b997666..36c11467a8f1 100644
--- a/tools/testing/selftests/connector/proc_filter.c
+++ b/tools/testing/selftests/connector/proc_filter.c
@@ -16,7 +16,7 @@
#include <signal.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
sizeof(struct proc_input))
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index e0d9851fe1c9..f14eca63f20c 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -14,7 +14,7 @@
#include <sys/resource.h>
#include <linux/close_range.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
diff --git a/tools/testing/selftests/core/unshare_test.c b/tools/testing/selftests/core/unshare_test.c
index 7fec9dfb1b0e..ffce75a6c228 100644
--- a/tools/testing/selftests/core/unshare_test.c
+++ b/tools/testing/selftests/core/unshare_test.c
@@ -14,7 +14,7 @@
#include <sys/resource.h>
#include <linux/close_range.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
TEST(unshare_EMFILE)
diff --git a/tools/testing/selftests/coredump/.gitignore b/tools/testing/selftests/coredump/.gitignore
new file mode 100644
index 000000000000..097f52db0be9
--- /dev/null
+++ b/tools/testing/selftests/coredump/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+stackdump_test
+coredump_socket_test
+coredump_socket_protocol_test
diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile
index ed210037b29d..dece1a31d561 100644
--- a/tools/testing/selftests/coredump/Makefile
+++ b/tools/testing/selftests/coredump/Makefile
@@ -1,7 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS = $(KHDR_INCLUDES)
+CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
-TEST_GEN_PROGS := stackdump_test
+TEST_GEN_PROGS := stackdump_test \
+ coredump_socket_test \
+ coredump_socket_protocol_test
TEST_FILES := stackdump
include ../lib.mk
+
+$(OUTPUT)/stackdump_test: coredump_test_helpers.c
+$(OUTPUT)/coredump_socket_test: coredump_test_helpers.c
+$(OUTPUT)/coredump_socket_protocol_test: coredump_test_helpers.c
diff --git a/tools/testing/selftests/coredump/config b/tools/testing/selftests/coredump/config
new file mode 100644
index 000000000000..a05ef112b4f9
--- /dev/null
+++ b/tools/testing/selftests/coredump/config
@@ -0,0 +1,3 @@
+CONFIG_COREDUMP=y
+CONFIG_NET=y
+CONFIG_UNIX=y
diff --git a/tools/testing/selftests/coredump/coredump_socket_protocol_test.c b/tools/testing/selftests/coredump/coredump_socket_protocol_test.c
new file mode 100644
index 000000000000..d19b6717c53e
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_socket_protocol_test.c
@@ -0,0 +1,1568 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "coredump_test.h"
+
+#define NUM_CRASHING_COREDUMPS 5
+
+FIXTURE_SETUP(coredump)
+{
+ FILE *file;
+ int ret;
+
+ self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
+ file = fopen("/proc/sys/kernel/core_pattern", "r");
+ ASSERT_NE(NULL, file);
+
+ ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+ ASSERT_TRUE(ret || feof(file));
+ ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+ self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+ const char *reason;
+ FILE *file;
+ int ret, status;
+
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ if (!file) {
+ reason = "Unable to open core_pattern";
+ goto fail;
+ }
+
+ ret = fprintf(file, "%s", self->original_core_pattern);
+ if (ret < 0) {
+ reason = "Unable to write to core_pattern";
+ goto fail;
+ }
+
+ ret = fclose(file);
+ if (ret) {
+ reason = "Unable to close core_pattern";
+ goto fail;
+ }
+
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
+ return;
+fail:
+ /* This should never happen */
+ fprintf(stderr, "Failed to cleanup coredump test: %s\n", reason);
+}
+
+TEST_F(coredump, socket_request_kernel)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_kernel: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_kernel: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_kernel: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_kernel: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_kernel: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_kernel: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_kernel: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_request_kernel: creat coredump file failed: %m\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_kernel: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_kernel: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_kernel: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_request_kernel: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_request_kernel: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "socket_request_kernel: write to core file failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_kernel: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+ system("file /tmp/coredump.file");
+}
+
+TEST_F(coredump, socket_request_userspace)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_userspace: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_userspace: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_userspace: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_userspace: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_userspace: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_userspace: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_userspace: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_userspace: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_userspace: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_USERSPACE | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_userspace: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_request_userspace: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0) {
+ fprintf(stderr, "socket_request_userspace: unexpected data received (expected no coredump data)\n");
+ goto out;
+ }
+
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_request_userspace: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_userspace: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_reject)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_reject: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_reject: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_reject: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_reject: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_reject: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_reject: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_reject: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_reject: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_reject: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_reject: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_request_reject: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read > 0) {
+ fprintf(stderr, "socket_request_reject: unexpected data received (expected no coredump data for REJECT)\n");
+ goto out;
+ }
+
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_request_reject: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_reject: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_flag_combination)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING)) {
+ fprintf(stderr, "socket_request_invalid_flag_combination: read_marker COREDUMP_MARK_CONFLICTING failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_invalid_flag_combination: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_unknown_flag)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_unknown_flag: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_unknown_flag: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_unknown_flag: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_unknown_flag: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_unknown_flag: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_unknown_flag: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0)) {
+ fprintf(stderr, "socket_request_unknown_flag: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED)) {
+ fprintf(stderr, "socket_request_unknown_flag: read_marker COREDUMP_MARK_UNSUPPORTED failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_unknown_flag: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_small)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_invalid_size_small: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_invalid_size_small: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_invalid_size_small: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_invalid_size_small: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_invalid_size_small: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_invalid_size_small: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 / 2)) {
+ fprintf(stderr, "socket_request_invalid_size_small: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE)) {
+ fprintf(stderr, "socket_request_invalid_size_small: read_marker COREDUMP_MARK_MINSIZE failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_invalid_size_small: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_large)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_request_invalid_size_large: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_request_invalid_size_large: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_request_invalid_size_large: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_request_invalid_size_large: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_request_invalid_size_large: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_request_invalid_size_large: check_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT,
+ COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE)) {
+ fprintf(stderr, "socket_request_invalid_size_large: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE)) {
+ fprintf(stderr, "socket_request_invalid_size_large: read_marker COREDUMP_MARK_MAXSIZE failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_request_invalid_size_large: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via socket coredump with SIGSEGV
+ *
+ * Verify that when using socket-based coredump protocol,
+ * the coredump_signal field is correctly exposed as SIGSEGV.
+ */
+TEST_F(coredump, socket_coredump_signal_sigsegv)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGSEGV) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_signal=%d, expected SIGSEGV=%d\n",
+ info.coredump_signal, SIGSEGV);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigsegv: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGSEGV);
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGSEGV);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via socket coredump with SIGABRT
+ *
+ * Verify that when using socket-based coredump protocol,
+ * the coredump_signal field is correctly exposed as SIGABRT.
+ */
+TEST_F(coredump, socket_coredump_signal_sigabrt)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ struct coredump_req req = {};
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGABRT) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_signal=%d, expected SIGABRT=%d\n",
+ info.coredump_signal, SIGABRT);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: read_coredump_req failed\n");
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: send_coredump_ack failed\n");
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: read_marker COREDUMP_MARK_REQACK failed\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigabrt: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ abort();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGABRT);
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGABRT);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+ struct coredump_req req = {};
+
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "Failed to create and listen on unix socket\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "Failed to notify parent via ipc socket\n");
+ goto out;
+ }
+ close(ipc_sockets[1]);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd);
+ goto out;
+ }
+
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!send_coredump_ack(fd_coredump, &req,
+ COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump);
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump);
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file);
+ goto out;
+ }
+ }
+
+ close(fd_core_file);
+ close(fd_peer_pidfd);
+ close(fd_coredump);
+ fd_peer_pidfd = -1;
+ fd_coredump = -1;
+ }
+
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ waitpid(pid[i], &status[i], 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500)
+{
+ int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+ pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0;
+ fd_server = -1;
+ exit_code = EXIT_FAILURE;
+ n_conns = 0;
+ close(ipc_sockets[0]);
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+ close(ipc_sockets[1]);
+
+ while (n_conns < NUM_CRASHING_COREDUMPS) {
+ int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ struct coredump_req req = {};
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ continue;
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: accept4 failed: %m\n");
+ goto out;
+ }
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: get_peer_pidfd failed\n");
+ goto out;
+ }
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: get_pidfd_info failed\n");
+ goto out;
+ }
+ if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: missing PIDFD_INFO_COREDUMP or PIDFD_COREDUMPED\n");
+ goto out;
+ }
+ if (!read_coredump_req(fd_coredump, &req)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: read_coredump_req failed\n");
+ goto out;
+ }
+ if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+ COREDUMP_KERNEL | COREDUMP_USERSPACE |
+ COREDUMP_REJECT | COREDUMP_WAIT)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: check_coredump_req failed\n");
+ goto out;
+ }
+ if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: send_coredump_ack failed\n");
+ goto out;
+ }
+ if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: read_marker failed\n");
+ goto out;
+ }
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: open_coredump_tmpfile failed: %m\n");
+ goto out;
+ }
+ pid_t worker = fork();
+ if (worker == 0) {
+ close(fd_server);
+ process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file);
+ }
+ worker_pids[n_conns] = worker;
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ n_conns++;
+ }
+ exit_code = EXIT_SUCCESS;
+out:
+ if (fd_server >= 0)
+ close(fd_server);
+
+ // Reap all worker processes
+ for (int i = 0; i < n_conns; i++) {
+ int wstatus;
+ if (waitpid(worker_pids[i], &wstatus, 0) < 0) {
+ fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]);
+ } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
+ fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus));
+ exit_code = EXIT_FAILURE;
+ }
+ }
+
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ pid[i] = fork();
+ ASSERT_GE(pid[i], 0);
+ if (pid[i] == 0)
+ crashing_child();
+ pidfd[i] = sys_pidfd_open(pid[i], 0);
+ ASSERT_GE(pidfd[i], 0);
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ ASSERT_GE(waitpid(pid[i], &status[i], 0), 0);
+ ASSERT_TRUE(WIFSIGNALED(status[i]));
+ ASSERT_TRUE(WCOREDUMP(status[i]));
+ }
+
+ for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+ }
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/coredump/coredump_socket_test.c b/tools/testing/selftests/coredump/coredump_socket_test.c
new file mode 100644
index 000000000000..7e26d4a6a15d
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_socket_test.c
@@ -0,0 +1,742 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "coredump_test.h"
+
+FIXTURE_SETUP(coredump)
+{
+ FILE *file;
+ int ret;
+
+ self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
+ file = fopen("/proc/sys/kernel/core_pattern", "r");
+ ASSERT_NE(NULL, file);
+
+ ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+ ASSERT_TRUE(ret || feof(file));
+ ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+ self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+ const char *reason;
+ FILE *file;
+ int ret, status;
+
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ if (!file) {
+ reason = "Unable to open core_pattern";
+ goto fail;
+ }
+
+ ret = fprintf(file, "%s", self->original_core_pattern);
+ if (ret < 0) {
+ reason = "Unable to write to core_pattern";
+ goto fail;
+ }
+
+ ret = fclose(file);
+ if (ret) {
+ reason = "Unable to close core_pattern";
+ goto fail;
+ }
+
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
+ return;
+fail:
+ /* This should never happen */
+ fprintf(stderr, "Failed to cleanup coredump test: %s\n", reason);
+}
+
+TEST_F(coredump, socket)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket test: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket test: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket test: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket test: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket test: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket test: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket test: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket test: creat coredump file failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket test: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "socket test: write to core file failed (read=%zd, write=%zd): %m\n", bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket test: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+}
+
+TEST_F(coredump, socket_detect_userspace_client)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_COREDUMP,
+ };
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_detect_userspace_client: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_detect_userspace_client: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_detect_userspace_client: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_mask & PIDFD_COREDUMPED) {
+ fprintf(stderr, "socket_detect_userspace_client: PIDFD_COREDUMPED incorrectly set (should be userspace client)\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_detect_userspace_client: completed successfully\n");
+out:
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ int fd_socket;
+ ssize_t ret;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len =
+ offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd_socket < 0) {
+ fprintf(stderr, "socket_detect_userspace_client (client): socket failed: %m\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "socket_detect_userspace_client (client): connect failed: %m\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ close(fd_socket);
+ pause();
+ fprintf(stderr, "socket_detect_userspace_client (client): completed successfully\n");
+ _exit(EXIT_SUCCESS);
+ }
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+ ASSERT_EQ(close(pidfd), 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+ ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_EQ(errno, ENOENT);
+}
+
+TEST_F(coredump, socket_enoent)
+{
+ int pidfd, status;
+ pid_t pid;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+}
+
+TEST_F(coredump, socket_no_listener)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ int ipc_sockets[2];
+ char c;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_no_listener: socket failed: %m\n");
+ goto out;
+ }
+
+ ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "socket_no_listener: bind failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_no_listener: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_no_listener: completed successfully\n");
+out:
+ if (fd_server >= 0)
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via simple socket coredump
+ *
+ * Verify that when using simple socket-based coredump (@ pattern),
+ * the coredump_signal field is correctly exposed as SIGSEGV.
+ */
+TEST_F(coredump, socket_coredump_signal_sigsegv)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGSEGV) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_signal=%d, expected SIGSEGV=%d\n",
+ info.coredump_signal, SIGSEGV);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: open_coredump_tmpfile failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ fprintf(stderr, "socket_coredump_signal_sigsegv: write to core file failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigsegv: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGSEGV);
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGSEGV);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via simple socket coredump with SIGABRT
+ *
+ * Verify that when using simple socket-based coredump (@ pattern),
+ * the coredump_signal field is correctly exposed as SIGABRT.
+ */
+TEST_F(coredump, socket_coredump_signal_sigabrt)
+{
+ int pidfd, ret, status;
+ pid_t pid, pid_coredump_server;
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+
+ ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+ int exit_code = EXIT_FAILURE;
+
+ close(ipc_sockets[0]);
+
+ fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+ if (fd_server < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: create_and_listen_unix_socket failed: %m\n");
+ goto out;
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: write_nointr to ipc socket failed: %m\n");
+ goto out;
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: accept4 failed: %m\n");
+ goto out;
+ }
+
+ fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+ if (fd_peer_pidfd < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_peer_pidfd failed\n");
+ goto out;
+ }
+
+ if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: get_pidfd_info failed\n");
+ goto out;
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP not set in mask\n");
+ goto out;
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_COREDUMPED not set in coredump_mask\n");
+ goto out;
+ }
+
+ /* Verify coredump_signal is available and correct */
+ if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+ goto out;
+ }
+
+ if (info.coredump_signal != SIGABRT) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_signal=%d, expected SIGABRT=%d\n",
+ info.coredump_signal, SIGABRT);
+ goto out;
+ }
+
+ fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: open_coredump_tmpfile failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: read from coredump socket failed: %m\n");
+ goto out;
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ fprintf(stderr, "socket_coredump_signal_sigabrt: write to core file failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "socket_coredump_signal_sigabrt: completed successfully\n");
+out:
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ if (fd_server >= 0)
+ close(fd_server);
+ _exit(exit_code);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ abort();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGABRT);
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+ ASSERT_EQ(info.coredump_signal, SIGABRT);
+
+ wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_invalid_paths)
+{
+ ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@../coredump.socket"));
+ ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/.."));
+ ASSERT_FALSE(set_core_pattern("@@.."));
+
+ ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket"));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/coredump/coredump_test.h b/tools/testing/selftests/coredump/coredump_test.h
new file mode 100644
index 000000000000..ed47f01fa53c
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_test.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __COREDUMP_TEST_H
+#define __COREDUMP_TEST_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+#include <linux/coredump.h>
+
+#include "../kselftest_harness.h"
+#include "../pidfd/pidfd.h"
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define NUM_THREAD_SPAWN 128
+
+/* Coredump fixture */
+FIXTURE(coredump)
+{
+ char original_core_pattern[256];
+ pid_t pid_coredump_server;
+ int fd_tmpfs_detached;
+};
+
+/* Shared helper function declarations */
+void *do_nothing(void *arg);
+void crashing_child(void);
+int create_detached_tmpfs(void);
+int create_and_listen_unix_socket(const char *path);
+bool set_core_pattern(const char *pattern);
+int get_peer_pidfd(int fd);
+bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info);
+
+/* Inline helper that uses harness types */
+static inline void wait_and_check_coredump_server(pid_t pid_coredump_server,
+ struct __test_metadata *const _metadata,
+ FIXTURE_DATA(coredump) *self)
+{
+ int status;
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
+/* Protocol helper function declarations */
+ssize_t recv_marker(int fd);
+bool read_marker(int fd, enum coredump_mark mark);
+bool read_coredump_req(int fd, struct coredump_req *req);
+bool send_coredump_ack(int fd, const struct coredump_req *req,
+ __u64 mask, size_t size_ack);
+bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+ __u64 required_mask);
+int open_coredump_tmpfile(int fd_tmpfs_detached);
+void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file);
+
+#endif /* __COREDUMP_TEST_H */
diff --git a/tools/testing/selftests/coredump/coredump_test_helpers.c b/tools/testing/selftests/coredump/coredump_test_helpers.c
new file mode 100644
index 000000000000..a6f6d5f2ae07
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_test_helpers.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../filesystems/wrappers.h"
+#include "../pidfd/pidfd.h"
+
+/* Forward declarations to avoid including harness header */
+struct __test_metadata;
+
+/* Match the fixture definition from coredump_test.h */
+struct _fixture_coredump_data {
+ char original_core_pattern[256];
+ pid_t pid_coredump_server;
+ int fd_tmpfs_detached;
+};
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define NUM_THREAD_SPAWN 128
+
+void *do_nothing(void *arg)
+{
+ (void)arg;
+ while (1)
+ pause();
+
+ return NULL;
+}
+
+void crashing_child(void)
+{
+ pthread_t thread;
+ int i;
+
+ for (i = 0; i < NUM_THREAD_SPAWN; ++i)
+ pthread_create(&thread, NULL, do_nothing, NULL);
+
+ /* crash on purpose */
+ i = *(int *)NULL;
+}
+
+int create_detached_tmpfs(void)
+{
+ int fd_context, fd_tmpfs;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ if (fd_context < 0)
+ return -1;
+
+ if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
+ return -1;
+
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ close(fd_context);
+ return fd_tmpfs;
+}
+
+int create_and_listen_unix_socket(const char *path)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ assert(strlen(path) < sizeof(addr.sun_path) - 1);
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
+ size_t addr_len =
+ offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1;
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ goto out;
+
+ ret = bind(fd, (const struct sockaddr *)&addr, addr_len);
+ if (ret < 0)
+ goto out;
+
+ ret = listen(fd, 128);
+ if (ret < 0)
+ goto out;
+
+ return fd;
+
+out:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+bool set_core_pattern(const char *pattern)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+
+ ret = write(fd, pattern, strlen(pattern));
+ close(fd);
+ if (ret < 0)
+ return false;
+
+ fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern));
+ return ret == strlen(pattern);
+}
+
+int get_peer_pidfd(int fd)
+{
+ int fd_peer_pidfd;
+ socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd,
+ &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "get_peer_pidfd: getsockopt(SO_PEERPIDFD) failed: %m\n");
+ return -1;
+ }
+ fprintf(stderr, "get_peer_pidfd: successfully retrieved pidfd %d\n", fd_peer_pidfd);
+ return fd_peer_pidfd;
+}
+
+bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
+{
+ int ret;
+ memset(info, 0, sizeof(*info));
+ info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL;
+ ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info);
+ if (ret < 0) {
+ fprintf(stderr, "get_pidfd_info: ioctl(PIDFD_GET_INFO) failed: %m\n");
+ return false;
+ }
+ fprintf(stderr, "get_pidfd_info: mask=0x%llx, coredump_mask=0x%x, coredump_signal=%d\n",
+ (unsigned long long)info->mask, info->coredump_mask, info->coredump_signal);
+ return true;
+}
+
+/* Protocol helper functions */
+
+ssize_t recv_marker(int fd)
+{
+ enum coredump_mark mark = COREDUMP_MARK_REQACK;
+ ssize_t ret;
+
+ ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL);
+ if (ret != sizeof(mark))
+ return -1;
+
+ switch (mark) {
+ case COREDUMP_MARK_REQACK:
+ fprintf(stderr, "Received marker: ReqAck\n");
+ return COREDUMP_MARK_REQACK;
+ case COREDUMP_MARK_MINSIZE:
+ fprintf(stderr, "Received marker: MinSize\n");
+ return COREDUMP_MARK_MINSIZE;
+ case COREDUMP_MARK_MAXSIZE:
+ fprintf(stderr, "Received marker: MaxSize\n");
+ return COREDUMP_MARK_MAXSIZE;
+ case COREDUMP_MARK_UNSUPPORTED:
+ fprintf(stderr, "Received marker: Unsupported\n");
+ return COREDUMP_MARK_UNSUPPORTED;
+ case COREDUMP_MARK_CONFLICTING:
+ fprintf(stderr, "Received marker: Conflicting\n");
+ return COREDUMP_MARK_CONFLICTING;
+ default:
+ fprintf(stderr, "Received unknown marker: %u\n", mark);
+ break;
+ }
+ return -1;
+}
+
+bool read_marker(int fd, enum coredump_mark mark)
+{
+ ssize_t ret;
+
+ ret = recv_marker(fd);
+ if (ret < 0)
+ return false;
+ return ret == mark;
+}
+
+bool read_coredump_req(int fd, struct coredump_req *req)
+{
+ ssize_t ret;
+ size_t field_size, user_size, ack_size, kernel_size, remaining_size;
+
+ memset(req, 0, sizeof(*req));
+ field_size = sizeof(req->size);
+
+ /* Peek the size of the coredump request. */
+ ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL);
+ if (ret != field_size) {
+ fprintf(stderr, "read_coredump_req: peek failed (got %zd, expected %zu): %m\n",
+ ret, field_size);
+ return false;
+ }
+ kernel_size = req->size;
+
+ if (kernel_size < COREDUMP_ACK_SIZE_VER0) {
+ fprintf(stderr, "read_coredump_req: kernel_size %zu < min %d\n",
+ kernel_size, COREDUMP_ACK_SIZE_VER0);
+ return false;
+ }
+ if (kernel_size >= PAGE_SIZE) {
+ fprintf(stderr, "read_coredump_req: kernel_size %zu >= PAGE_SIZE %d\n",
+ kernel_size, PAGE_SIZE);
+ return false;
+ }
+
+ /* Use the minimum of user and kernel size to read the full request. */
+ user_size = sizeof(struct coredump_req);
+ ack_size = user_size < kernel_size ? user_size : kernel_size;
+ ret = recv(fd, req, ack_size, MSG_WAITALL);
+ if (ret != ack_size)
+ return false;
+
+ fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n",
+ req->size, (unsigned long long)req->mask);
+
+ if (user_size > kernel_size)
+ remaining_size = user_size - kernel_size;
+ else
+ remaining_size = kernel_size - user_size;
+
+ if (PAGE_SIZE <= remaining_size)
+ return false;
+
+ /*
+ * Discard any additional data if the kernel's request was larger than
+ * what we knew about or cared about.
+ */
+ if (remaining_size) {
+ char buffer[PAGE_SIZE];
+
+ ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL);
+ if (ret != remaining_size)
+ return false;
+ fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size);
+ }
+
+ return true;
+}
+
+bool send_coredump_ack(int fd, const struct coredump_req *req,
+ __u64 mask, size_t size_ack)
+{
+ ssize_t ret;
+ /*
+ * Wrap struct coredump_ack in a larger struct so we can
+ * simulate sending to much data to the kernel.
+ */
+ struct large_ack_for_size_testing {
+ struct coredump_ack ack;
+ char buffer[PAGE_SIZE];
+ } large_ack = {};
+
+ if (!size_ack)
+ size_ack = sizeof(struct coredump_ack) < req->size_ack ?
+ sizeof(struct coredump_ack) :
+ req->size_ack;
+ large_ack.ack.mask = mask;
+ large_ack.ack.size = size_ack;
+ ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL);
+ if (ret != size_ack)
+ return false;
+
+ fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n",
+ size_ack, (unsigned long long)mask);
+ return true;
+}
+
+bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+ __u64 required_mask)
+{
+ if (req->size < min_size)
+ return false;
+ if ((req->mask & required_mask) != required_mask)
+ return false;
+ if (req->mask & ~required_mask)
+ return false;
+ return true;
+}
+
+int open_coredump_tmpfile(int fd_tmpfs_detached)
+{
+ return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600);
+}
+
+void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file)
+{
+ int epfd = -1;
+ int exit_code = EXIT_FAILURE;
+ struct epoll_event ev;
+ int flags;
+
+ /* Set socket to non-blocking mode for edge-triggered epoll */
+ flags = fcntl(fd_coredump, F_GETFL, 0);
+ if (flags < 0) {
+ fprintf(stderr, "Worker: fcntl(F_GETFL) failed: %m\n");
+ goto out;
+ }
+ if (fcntl(fd_coredump, F_SETFL, flags | O_NONBLOCK) < 0) {
+ fprintf(stderr, "Worker: fcntl(F_SETFL, O_NONBLOCK) failed: %m\n");
+ goto out;
+ }
+
+ epfd = epoll_create1(0);
+ if (epfd < 0) {
+ fprintf(stderr, "Worker: epoll_create1() failed: %m\n");
+ goto out;
+ }
+
+ ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
+ ev.data.fd = fd_coredump;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0) {
+ fprintf(stderr, "Worker: epoll_ctl(EPOLL_CTL_ADD) failed: %m\n");
+ goto out;
+ }
+
+ for (;;) {
+ struct epoll_event events[1];
+ int n = epoll_wait(epfd, events, 1, -1);
+ if (n < 0) {
+ fprintf(stderr, "Worker: epoll_wait() failed: %m\n");
+ break;
+ }
+
+ if (events[0].events & (EPOLLIN | EPOLLRDHUP)) {
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ break;
+ fprintf(stderr, "Worker: read() failed: %m\n");
+ goto out;
+ }
+ if (bytes_read == 0)
+ goto done;
+ ssize_t bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_write != bytes_read) {
+ if (bytes_write < 0 && errno == ENOSPC)
+ continue;
+ fprintf(stderr, "Worker: write() failed (read=%zd, write=%zd): %m\n",
+ bytes_read, bytes_write);
+ goto out;
+ }
+ }
+ }
+ }
+
+done:
+ exit_code = EXIT_SUCCESS;
+ fprintf(stderr, "Worker: completed successfully\n");
+out:
+ if (epfd >= 0)
+ close(epfd);
+ if (fd_core_file >= 0)
+ close(fd_core_file);
+ if (fd_peer_pidfd >= 0)
+ close(fd_peer_pidfd);
+ if (fd_coredump >= 0)
+ close(fd_coredump);
+ _exit(exit_code);
+}
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 137b2364a082..1ec88937a1c2 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -1,49 +1,44 @@
// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
#include <fcntl.h>
+#include <inttypes.h>
#include <libgen.h>
+#include <limits.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
#include <linux/limits.h>
#include <pthread.h>
#include <string.h>
+#include <sys/mount.h>
+#include <poll.h>
+#include <sys/epoll.h>
#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
+#include "../filesystems/wrappers.h"
+#include "../pidfd/pidfd.h"
+
+#include "coredump_test.h"
#define STACKDUMP_FILE "stack_values"
#define STACKDUMP_SCRIPT "stackdump"
-#define NUM_THREAD_SPAWN 128
-
-static void *do_nothing(void *)
-{
- while (1)
- pause();
-}
-
-static void crashing_child(void)
-{
- pthread_t thread;
- int i;
-
- for (i = 0; i < NUM_THREAD_SPAWN; ++i)
- pthread_create(&thread, NULL, do_nothing, NULL);
-
- /* crash on purpose */
- i = *(int *)NULL;
-}
-FIXTURE(coredump)
-{
- char original_core_pattern[256];
-};
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
FIXTURE_SETUP(coredump)
{
- char buf[PATH_MAX];
FILE *file;
- char *dir;
int ret;
+ self->pid_coredump_server = -ESRCH;
+ self->fd_tmpfs_detached = -1;
file = fopen("/proc/sys/kernel/core_pattern", "r");
ASSERT_NE(NULL, file);
@@ -52,6 +47,8 @@ FIXTURE_SETUP(coredump)
ASSERT_LT(ret, sizeof(self->original_core_pattern));
self->original_core_pattern[ret] = '\0';
+ self->fd_tmpfs_detached = create_detached_tmpfs();
+ ASSERT_GE(self->fd_tmpfs_detached, 0);
ret = fclose(file);
ASSERT_EQ(0, ret);
@@ -61,10 +58,17 @@ FIXTURE_TEARDOWN(coredump)
{
const char *reason;
FILE *file;
- int ret;
+ int ret, status;
unlink(STACKDUMP_FILE);
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
file = fopen("/proc/sys/kernel/core_pattern", "w");
if (!file) {
reason = "Unable to open core_pattern";
@@ -83,20 +87,28 @@ FIXTURE_TEARDOWN(coredump)
goto fail;
}
+ if (self->fd_tmpfs_detached >= 0) {
+ ret = close(self->fd_tmpfs_detached);
+ if (ret < 0) {
+ reason = "Unable to close detached tmpfs";
+ goto fail;
+ }
+ self->fd_tmpfs_detached = -1;
+ }
+
return;
fail:
/* This should never happen */
fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason);
}
-TEST_F(coredump, stackdump)
+TEST_F_TIMEOUT(coredump, stackdump, 120)
{
- struct sigaction action = {};
unsigned long long stack;
char *test_dir, *line;
size_t line_length;
- char buf[PATH_MAX];
- int ret, i;
+ char buf[PAGE_SIZE];
+ int ret, i, status;
FILE *file;
pid_t pid;
@@ -129,6 +141,10 @@ TEST_F(coredump, stackdump)
/*
* Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file
*/
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
for (i = 0; i < 10; ++i) {
file = fopen(STACKDUMP_FILE, "r");
if (file)
@@ -138,10 +154,12 @@ TEST_F(coredump, stackdump)
ASSERT_NE(file, NULL);
/* Step 4: Make sure all stack pointer values are non-zero */
+ line = NULL;
for (i = 0; -1 != getline(&line, &line_length, file); ++i) {
stack = strtoull(line, NULL, 10);
ASSERT_NE(stack, 0);
}
+ free(line);
ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN);
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index d5dc7e0dc726..6232a46ca6e1 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -67,7 +67,7 @@ hotpluggable_cpus()
done
}
-hotplaggable_offline_cpus()
+hotpluggable_offline_cpus()
{
hotpluggable_cpus 0
}
@@ -151,7 +151,7 @@ offline_cpu_expect_fail()
online_all_hot_pluggable_cpus()
{
- for cpu in `hotplaggable_offline_cpus`; do
+ for cpu in `hotpluggable_offline_cpus`; do
online_cpu_expect_success $cpu
done
}
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
index e350c521b467..9927b654fb8f 100755
--- a/tools/testing/selftests/cpufreq/cpufreq.sh
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -52,7 +52,14 @@ read_cpufreq_files_in_dir()
for file in $files; do
if [ -f $1/$file ]; then
printf "$file:"
- cat $1/$file
+ #file is readable ?
+ local rfile=$(ls -l $1/$file | awk '$1 ~ /^.*r.*/ { print $NF; }')
+
+ if [ ! -z $rfile ]; then
+ cat $1/$file
+ else
+ printf "$file is not readable\n"
+ fi
else
printf "\n"
read_cpufreq_files_in_dir "$1/$file"
@@ -83,10 +90,10 @@ update_cpufreq_files_in_dir()
for file in $files; do
if [ -f $1/$file ]; then
- # file is writable ?
- local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }')
+ # file is readable and writable ?
+ local rwfile=$(ls -l $1/$file | awk '$1 ~ /^.*rw.*/ { print $NF; }')
- if [ ! -z $wfile ]; then
+ if [ ! -z $rwfile ]; then
# scaling_setspeed is a special file and we
# should skip updating it
if [ $file != "scaling_setspeed" ]; then
@@ -244,9 +251,10 @@ do_suspend()
printf "Failed to suspend using RTC wake alarm\n"
return 1
fi
+ else
+ echo $filename > $SYSFS/power/state
fi
- echo $filename > $SYSFS/power/state
printf "Came out of $1\n"
printf "Do basic tests after finishing $1 to verify cpufreq state\n\n"
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index ecbf07afc6dd..2180c328a825 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -3,10 +3,13 @@
TEST_GEN_FILES += access_memory access_memory_even
-TEST_FILES = _chk_dependency.sh _damon_sysfs.py
+TEST_FILES = _damon_sysfs.py
+TEST_FILES += drgn_dump_damon_status.py
+TEST_FILES += _common.sh
# functionality tests
TEST_PROGS += sysfs.sh
+TEST_PROGS += sysfs.py
TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py
TEST_PROGS += damos_tried_regions.py damon_nr_regions.py
@@ -15,6 +18,8 @@ TEST_PROGS += reclaim.sh lru_sort.sh
# regression tests (reproducers of previously found bugs)
TEST_PROGS += sysfs_update_removed_scheme_dir.sh
TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
+TEST_PROGS += sysfs_memcg_path_leak.sh
+TEST_PROGS += sysfs_no_op_commit_break.py
EXTRA_CLEAN = __pycache__
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh
deleted file mode 100644
index dda3a87dc00a..000000000000
--- a/tools/testing/selftests/damon/_chk_dependency.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-DBGFS=$(grep debugfs /proc/mounts --max-count 1 | awk '{print $2}')
-if [ "$DBGFS" = "" ]
-then
- echo "debugfs not mounted"
- exit $ksft_skip
-fi
-
-DBGFS+="/damon"
-
-if [ $EUID -ne 0 ];
-then
- echo "Run as root"
- exit $ksft_skip
-fi
-
-if [ ! -d "$DBGFS" ]
-then
- echo "$DBGFS not found"
- exit $ksft_skip
-fi
-
-if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
-then
- monitor_on_file="monitor_on_DEPRECATED"
-else
- monitor_on_file="monitor_on"
-fi
-
-for f in attrs target_ids "$monitor_on_file"
-do
- if [ ! -f "$DBGFS/$f" ]
- then
- echo "$f not found"
- exit 1
- fi
-done
-
-permission_error="Operation not permitted"
-for f in attrs target_ids "$monitor_on_file"
-do
- status=$( cat "$DBGFS/$f" 2>&1 )
- if [ "${status#*$permission_error}" != "$status" ]; then
- echo "Permission for reading $DBGFS/$f denied; maybe secureboot enabled?"
- exit $ksft_skip
- fi
-done
diff --git a/tools/testing/selftests/damon/_common.sh b/tools/testing/selftests/damon/_common.sh
new file mode 100644
index 000000000000..0279698f733e
--- /dev/null
+++ b/tools/testing/selftests/damon/_common.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+check_dependencies()
+{
+ if [ $EUID -ne 0 ]
+ then
+ echo "Run as root"
+ exit $ksft_skip
+ fi
+}
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index 6e136dc3df19..748778b563cd 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -15,6 +15,10 @@ if sysfs_root is None:
print('Seems sysfs not mounted?')
exit(ksft_skip)
+if not os.path.exists(sysfs_root):
+ print('Seems DAMON disabled?')
+ exit(ksft_skip)
+
def write_file(path, string):
"Returns error string if failed, or None otherwise"
string = '%s' % string
@@ -48,9 +52,9 @@ class DamosAccessPattern:
if self.size is None:
self.size = [0, 2**64 - 1]
if self.nr_accesses is None:
- self.nr_accesses = [0, 2**64 - 1]
+ self.nr_accesses = [0, 2**32 - 1]
if self.age is None:
- self.age = [0, 2**64 - 1]
+ self.age = [0, 2**32 - 1]
def sysfs_dir(self):
return os.path.join(self.scheme.sysfs_dir(), 'access_pattern')
@@ -89,14 +93,16 @@ class DamosQuotaGoal:
metric = None
target_value = None
current_value = None
+ nid = None
effective_bytes = None
quota = None # owner quota
idx = None
- def __init__(self, metric, target_value=10000, current_value=0):
+ def __init__(self, metric, target_value=10000, current_value=0, nid=0):
self.metric = metric
self.target_value = target_value
self.current_value = current_value
+ self.nid = nid
def sysfs_dir(self):
return os.path.join(self.quota.sysfs_dir(), 'goals', '%d' % self.idx)
@@ -114,6 +120,10 @@ class DamosQuotaGoal:
self.current_value)
if err is not None:
return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'nid'), self.nid)
+ if err is not None:
+ return err
+
return None
class DamosQuota:
@@ -121,12 +131,20 @@ class DamosQuota:
ms = None # time quota
goals = None # quota goals
reset_interval_ms = None # quota reset interval
+ weight_sz_permil = None
+ weight_nr_accesses_permil = None
+ weight_age_permil = None
scheme = None # owner scheme
- def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0):
+ def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0,
+ weight_sz_permil=0, weight_nr_accesses_permil=0,
+ weight_age_permil=0):
self.sz = sz
self.ms = ms
self.reset_interval_ms = reset_interval_ms
+ self.weight_sz_permil = weight_sz_permil
+ self.weight_nr_accesses_permil = weight_nr_accesses_permil
+ self.weight_age_permil = weight_age_permil
self.goals = goals if goals is not None else []
for idx, goal in enumerate(self.goals):
goal.idx = idx
@@ -147,6 +165,20 @@ class DamosQuota:
if err is not None:
return err
+ err = write_file(os.path.join(
+ self.sysfs_dir(), 'weights', 'sz_permil'), self.weight_sz_permil)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(
+ self.sysfs_dir(), 'weights', 'nr_accesses_permil'),
+ self.weight_nr_accesses_permil)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(
+ self.sysfs_dir(), 'weights', 'age_permil'), self.weight_age_permil)
+ if err is not None:
+ return err
+
nr_goals_file = os.path.join(self.sysfs_dir(), 'goals', 'nr_goals')
content, err = read_file(nr_goals_file)
if err is not None:
@@ -161,6 +193,178 @@ class DamosQuota:
return err
return None
+class DamosWatermarks:
+ metric = None
+ interval = None
+ high = None
+ mid = None
+ low = None
+ scheme = None # owner scheme
+
+ def __init__(self, metric='none', interval=0, high=0, mid=0, low=0):
+ self.metric = metric
+ self.interval = interval
+ self.high = high
+ self.mid = mid
+ self.low = low
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), 'watermarks')
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'metric'), self.metric)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'interval_us'),
+ self.interval)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'high'), self.high)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'mid'), self.mid)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'low'), self.low)
+ if err is not None:
+ return err
+
+class DamosFilter:
+ type_ = None
+ matching = None
+ allow = None
+ memcg_path = None
+ addr_start = None
+ addr_end = None
+ target_idx = None
+ min_ = None
+ max_ = None
+ idx = None
+ filters = None # owner filters
+
+ def __init__(self, type_='anon', matching=False, allow=False,
+ memcg_path='', addr_start=0, addr_end=0, target_idx=0, min_=0,
+ max_=0):
+ self.type_ = type_
+ self.matching = matching
+ self.allow = allow
+ self.memcg_path = memcg_path,
+ self.addr_start = addr_start
+ self.addr_end = addr_end
+ self.target_idx = target_idx
+ self.min_ = min_
+ self.max_ = max_
+
+ def sysfs_dir(self):
+ return os.path.join(self.filters.sysfs_dir(), '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'type'), self.type_)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'matching'),
+ self.matching)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'allow'), self.allow)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'memcg_path'),
+ self.memcg_path)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'addr_start'),
+ self.addr_start)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'addr_end'),
+ self.addr_end)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'damon_target_idx'),
+ self.target_idx)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'min'), self.min_)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'max'), self.max_)
+ if err is not None:
+ return err
+ return None
+
+class DamosFilters:
+ name = None
+ filters = None
+ scheme = None # owner scheme
+
+ def __init__(self, name, filters=[]):
+ self.name = name
+ self.filters = filters
+ for idx, filter_ in enumerate(self.filters):
+ filter_.idx = idx
+ filter_.filters = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), self.name)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_filters'),
+ len(self.filters))
+ if err is not None:
+ return err
+ for filter_ in self.filters:
+ err = filter_.stage()
+ if err is not None:
+ return err
+ return None
+
+class DamosDest:
+ id = None
+ weight = None
+ idx = None
+ dests = None # owner dests
+
+ def __init__(self, id=0, weight=0):
+ self.id = id
+ self.weight = weight
+
+ def sysfs_dir(self):
+ return os.path.join(self.dests.sysfs_dir(), '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'id'), self.id)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'weight'), self.weight)
+ if err is not None:
+ return err
+ return None
+
+class DamosDests:
+ dests = None
+ scheme = None # owner scheme
+
+ def __init__(self, dests=[]):
+ self.dests = dests
+ for idx, dest in enumerate(self.dests):
+ dest.idx = idx
+ dest.dests = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), 'dests')
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_dests'),
+ len(self.dests))
+ if err is not None:
+ return err
+ for dest in self.dests:
+ err = dest.stage()
+ if err is not None:
+ return err
+ return None
+
class DamosStats:
nr_tried = None
sz_tried = None
@@ -186,8 +390,13 @@ class Damos:
action = None
access_pattern = None
quota = None
+ watermarks = None
+ core_filters = None
+ ops_filters = None
+ filters = None
apply_interval_us = None
- # todo: Support watermarks, stats
+ target_nid = None
+ dests = None
idx = None
context = None
tried_bytes = None
@@ -195,12 +404,30 @@ class Damos:
tried_regions = None
def __init__(self, action='stat', access_pattern=DamosAccessPattern(),
- quota=DamosQuota(), apply_interval_us=0):
+ quota=DamosQuota(), watermarks=DamosWatermarks(),
+ core_filters=[], ops_filters=[], filters=[], target_nid=0,
+ dests=DamosDests(), apply_interval_us=0):
self.action = action
self.access_pattern = access_pattern
self.access_pattern.scheme = self
self.quota = quota
self.quota.scheme = self
+ self.watermarks = watermarks
+ self.watermarks.scheme = self
+
+ self.core_filters = DamosFilters(name='core_filters',
+ filters=core_filters)
+ self.core_filters.scheme = self
+ self.ops_filters = DamosFilters(name='ops_filters',
+ filters=ops_filters)
+ self.ops_filters.scheme = self
+ self.filters = DamosFilters(name='filters', filters=filters)
+ self.filters.scheme = self
+
+ self.target_nid = target_nid
+ self.dests = dests
+ self.dests.scheme = self
+
self.apply_interval_us = apply_interval_us
def sysfs_dir(self):
@@ -223,26 +450,39 @@ class Damos:
if err is not None:
return err
- # disable watermarks
- err = write_file(
- os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none')
+ err = self.watermarks.stage()
if err is not None:
return err
- # disable filters
- err = write_file(
- os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0')
+ err = self.core_filters.stage()
+ if err is not None:
+ return err
+ err = self.ops_filters.stage()
+ if err is not None:
+ return err
+ err = self.filters.stage()
+ if err is not None:
+ return err
+
+ err = write_file(os.path.join(self.sysfs_dir(), 'target_nid'), '%d' %
+ self.target_nid)
+ if err is not None:
+ return err
+
+ err = self.dests.stage()
if err is not None:
return err
class DamonTarget:
pid = None
+ obsolete = None
# todo: Support target regions if test is made
idx = None
context = None
- def __init__(self, pid):
+ def __init__(self, pid, obsolete=False):
self.pid = pid
+ self.obsolete = obsolete
def sysfs_dir(self):
return os.path.join(
@@ -253,21 +493,64 @@ class DamonTarget:
os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0')
if err is not None:
return err
- return write_file(
+ err = write_file(
os.path.join(self.sysfs_dir(), 'pid_target'), self.pid)
+ if err is not None:
+ return err
+ return write_file(
+ os.path.join(self.sysfs_dir(), 'obsolete_target'),
+ 'Y' if self.obsolete else 'N')
+
+class IntervalsGoal:
+ access_bp = None
+ aggrs = None
+ min_sample_us = None
+ max_sample_us = None
+ attrs = None # owner DamonAttrs
+
+ def __init__(self, access_bp=0, aggrs=0, min_sample_us=0, max_sample_us=0):
+ self.access_bp = access_bp
+ self.aggrs = aggrs
+ self.min_sample_us = min_sample_us
+ self.max_sample_us = max_sample_us
+
+ def sysfs_dir(self):
+ return os.path.join(self.attrs.interval_sysfs_dir(), 'intervals_goal')
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'access_bp'), self.access_bp)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'aggrs'), self.aggrs)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'min_sample_us'),
+ self.min_sample_us)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'max_sample_us'),
+ self.max_sample_us)
+ if err is not None:
+ return err
+ return None
class DamonAttrs:
sample_us = None
aggr_us = None
+ intervals_goal = None
update_us = None
min_nr_regions = None
max_nr_regions = None
context = None
- def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000,
+ def __init__(self, sample_us=5000, aggr_us=100000,
+ intervals_goal=IntervalsGoal(), update_us=1000000,
min_nr_regions=10, max_nr_regions=1000):
self.sample_us = sample_us
self.aggr_us = aggr_us
+ self.intervals_goal = intervals_goal
+ self.intervals_goal.attrs = self
self.update_us = update_us
self.min_nr_regions = min_nr_regions
self.max_nr_regions = max_nr_regions
@@ -289,6 +572,9 @@ class DamonAttrs:
self.aggr_us)
if err is not None:
return err
+ err = self.intervals_goal.stage()
+ if err is not None:
+ return err
err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'),
self.update_us)
if err is not None:
@@ -404,6 +690,9 @@ class Kdamond:
if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on')
+ if err is not None:
+ return err
+ self.pid, err = read_file(os.path.join(self.sysfs_dir(), 'pid'))
return err
def stop(self):
@@ -420,11 +709,16 @@ class Kdamond:
tried_regions = []
tried_regions_dir = os.path.join(
scheme.sysfs_dir(), 'tried_regions')
+ region_indices = []
for filename in os.listdir(
os.path.join(scheme.sysfs_dir(), 'tried_regions')):
tried_region_dir = os.path.join(tried_regions_dir, filename)
if not os.path.isdir(tried_region_dir):
continue
+ region_indices.append(int(filename))
+ for region_idx in sorted(region_indices):
+ tried_region_dir = os.path.join(tried_regions_dir,
+ '%d' % region_idx)
region_values = []
for f in ['start', 'end', 'nr_accesses', 'age']:
content, err = read_file(
diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh
deleted file mode 100644
index 54d45791b0d9..000000000000
--- a/tools/testing/selftests/damon/_debugfs_common.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-test_write_result() {
- file=$1
- content=$2
- orig_content=$3
- expect_reason=$4
- expected=$5
-
- if [ "$expected" = "0" ]
- then
- echo "$content" > "$file"
- else
- echo "$content" > "$file" 2> /dev/null
- fi
- if [ $? -ne "$expected" ]
- then
- echo "writing $content to $file doesn't return $expected"
- echo "expected because: $expect_reason"
- echo "$orig_content" > "$file"
- exit 1
- fi
-}
-
-test_write_succ() {
- test_write_result "$1" "$2" "$3" "$4" 0
-}
-
-test_write_fail() {
- test_write_result "$1" "$2" "$3" "$4" 1
-}
-
-test_content() {
- file=$1
- orig_content=$2
- expected=$3
- expect_reason=$4
-
- content=$(cat "$file")
- if [ "$content" != "$expected" ]
- then
- echo "reading $file expected $expected but $content"
- echo "expected because: $expect_reason"
- echo "$orig_content" > "$file"
- exit 1
- fi
-}
-
-source ./_chk_dependency.sh
-
-damon_onoff="$DBGFS/monitor_on"
-if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
-then
- damon_onoff="$DBGFS/monitor_on_DEPRECATED"
-else
- damon_onoff="$DBGFS/monitor_on"
-fi
-
-if [ $(cat "$damon_onoff") = "on" ]
-then
- echo "monitoring is on"
- exit $ksft_skip
-fi
diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c
index a9f4e9aaf3a9..93f3a71bcfd4 100644
--- a/tools/testing/selftests/damon/access_memory_even.c
+++ b/tools/testing/selftests/damon/access_memory_even.c
@@ -9,7 +9,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <time.h>
int main(int argc, char *argv[])
{
diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py
index 2e8a74aff543..58f3291fed12 100755
--- a/tools/testing/selftests/damon/damon_nr_regions.py
+++ b/tools/testing/selftests/damon/damon_nr_regions.py
@@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions):
test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % (
real_nr_regions, min_nr_regions, max_nr_regions)
+ collected_nr_regions.sort()
if (collected_nr_regions[0] < min_nr_regions or
collected_nr_regions[-1] > max_nr_regions):
print('fail %s' % test_name)
@@ -109,6 +110,7 @@ def main():
attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs
attrs.min_nr_regions = 3
attrs.max_nr_regions = 7
+ attrs.update_us = 100000
err = kdamonds.kdamonds[0].commit()
if err is not None:
proc.terminate()
diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py
index 7d4c6bb2e3cd..57c4937aaed2 100755
--- a/tools/testing/selftests/damon/damos_quota.py
+++ b/tools/testing/selftests/damon/damos_quota.py
@@ -51,16 +51,19 @@ def main():
nr_quota_exceeds = scheme.stats.qt_exceeds
wss_collected.sort()
+ nr_expected_quota_exceeds = 0
for wss in wss_collected:
if wss > sz_quota:
print('quota is not kept: %s > %s' % (wss, sz_quota))
print('collected samples are as below')
print('\n'.join(['%d' % wss for wss in wss_collected]))
exit(1)
+ if wss == sz_quota:
+ nr_expected_quota_exceeds += 1
- if nr_quota_exceeds < len(wss_collected):
- print('quota is not always exceeded: %d > %d' %
- (len(wss_collected), nr_quota_exceeds))
+ if nr_quota_exceeds < nr_expected_quota_exceeds:
+ print('quota is exceeded less than expected: %d < %d' %
+ (nr_quota_exceeds, nr_expected_quota_exceeds))
exit(1)
if __name__ == '__main__':
diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py
index 18246f3b62f7..f76e0412b564 100755
--- a/tools/testing/selftests/damon/damos_quota_goal.py
+++ b/tools/testing/selftests/damon/damos_quota_goal.py
@@ -63,6 +63,9 @@ def main():
if last_effective_bytes != 0 else -1.0))
if last_effective_bytes == goal.effective_bytes:
+ # effective quota was already minimum that cannot be more reduced
+ if expect_increase is False and last_effective_bytes == 1:
+ continue
print('efective bytes not changed: %d' % goal.effective_bytes)
exit(1)
diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py
new file mode 100755
index 000000000000..5374d18d1fa8
--- /dev/null
+++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env drgn
+# SPDX-License-Identifier: GPL-2.0
+
+'''
+Read DAMON context data and dump as a json string.
+'''
+import drgn
+from drgn import FaultError, NULL, Object, cast, container_of, execscript, offsetof, reinterpret, sizeof
+from drgn.helpers.common import *
+from drgn.helpers.linux import *
+
+import json
+import sys
+
+if "prog" not in globals():
+ try:
+ prog = drgn.get_default_prog()
+ except drgn.NoDefaultProgramError:
+ prog = drgn.program_from_kernel()
+ drgn.set_default_prog(prog)
+
+def to_dict(object, attr_name_converter):
+ d = {}
+ for attr_name, converter in attr_name_converter:
+ d[attr_name] = converter(getattr(object, attr_name))
+ return d
+
+def ops_to_dict(ops):
+ return to_dict(ops, [
+ ['id', int],
+ ])
+
+def intervals_goal_to_dict(goal):
+ return to_dict(goal, [
+ ['access_bp', int],
+ ['aggrs', int],
+ ['min_sample_us', int],
+ ['max_sample_us', int],
+ ])
+
+def attrs_to_dict(attrs):
+ return to_dict(attrs, [
+ ['sample_interval', int],
+ ['aggr_interval', int],
+ ['ops_update_interval', int],
+ ['intervals_goal', intervals_goal_to_dict],
+ ['min_nr_regions', int],
+ ['max_nr_regions', int],
+ ])
+
+def addr_range_to_dict(addr_range):
+ return to_dict(addr_range, [
+ ['start', int],
+ ['end', int],
+ ])
+
+def region_to_dict(region):
+ return to_dict(region, [
+ ['ar', addr_range_to_dict],
+ ['sampling_addr', int],
+ ['nr_accesses', int],
+ ['nr_accesses_bp', int],
+ ['age', int],
+ ])
+
+def regions_to_list(regions):
+ return [region_to_dict(r)
+ for r in list_for_each_entry(
+ 'struct damon_region', regions.address_of_(), 'list')]
+
+def target_to_dict(target):
+ return to_dict(target, [
+ ['pid', int],
+ ['nr_regions', int],
+ ['regions_list', regions_to_list],
+ ['obsolete', bool],
+ ])
+
+def targets_to_list(targets):
+ return [target_to_dict(t)
+ for t in list_for_each_entry(
+ 'struct damon_target', targets.address_of_(), 'list')]
+
+def damos_access_pattern_to_dict(pattern):
+ return to_dict(pattern, [
+ ['min_sz_region', int],
+ ['max_sz_region', int],
+ ['min_nr_accesses', int],
+ ['max_nr_accesses', int],
+ ['min_age_region', int],
+ ['max_age_region', int],
+ ])
+
+def damos_quota_goal_to_dict(goal):
+ return to_dict(goal, [
+ ['metric', int],
+ ['target_value', int],
+ ['current_value', int],
+ ['last_psi_total', int],
+ ['nid', int],
+ ])
+
+def damos_quota_goals_to_list(goals):
+ return [damos_quota_goal_to_dict(g)
+ for g in list_for_each_entry(
+ 'struct damos_quota_goal', goals.address_of_(), 'list')]
+
+def damos_quota_to_dict(quota):
+ return to_dict(quota, [
+ ['reset_interval', int],
+ ['ms', int], ['sz', int],
+ ['goals', damos_quota_goals_to_list],
+ ['esz', int],
+ ['weight_sz', int],
+ ['weight_nr_accesses', int],
+ ['weight_age', int],
+ ])
+
+def damos_watermarks_to_dict(watermarks):
+ return to_dict(watermarks, [
+ ['metric', int],
+ ['interval', int],
+ ['high', int], ['mid', int], ['low', int],
+ ])
+
+def damos_migrate_dests_to_dict(dests):
+ nr_dests = int(dests.nr_dests)
+ node_id_arr = []
+ weight_arr = []
+ for i in range(nr_dests):
+ node_id_arr.append(int(dests.node_id_arr[i]))
+ weight_arr.append(int(dests.weight_arr[i]))
+ return {
+ 'node_id_arr': node_id_arr,
+ 'weight_arr': weight_arr,
+ 'nr_dests': nr_dests,
+ }
+
+def damos_filter_to_dict(damos_filter):
+ filter_type_keyword = {
+ 0: 'anon',
+ 1: 'active',
+ 2: 'memcg',
+ 3: 'young',
+ 4: 'hugepage_size',
+ 5: 'unmapped',
+ 6: 'addr',
+ 7: 'target'
+ }
+ dict_ = {
+ 'type': filter_type_keyword[int(damos_filter.type)],
+ 'matching': bool(damos_filter.matching),
+ 'allow': bool(damos_filter.allow),
+ }
+ type_ = dict_['type']
+ if type_ == 'memcg':
+ dict_['memcg_id'] = int(damos_filter.memcg_id)
+ elif type_ == 'addr':
+ dict_['addr_range'] = [int(damos_filter.addr_range.start),
+ int(damos_filter.addr_range.end)]
+ elif type_ == 'target':
+ dict_['target_idx'] = int(damos_filter.target_idx)
+ elif type_ == 'hugeapge_size':
+ dict_['sz_range'] = [int(damos_filter.sz_range.min),
+ int(damos_filter.sz_range.max)]
+ return dict_
+
+def scheme_to_dict(scheme):
+ dict_ = to_dict(scheme, [
+ ['pattern', damos_access_pattern_to_dict],
+ ['action', int],
+ ['apply_interval_us', int],
+ ['quota', damos_quota_to_dict],
+ ['wmarks', damos_watermarks_to_dict],
+ ['target_nid', int],
+ ['migrate_dests', damos_migrate_dests_to_dict],
+ ])
+ core_filters = []
+ for f in list_for_each_entry(
+ 'struct damos_filter', scheme.core_filters.address_of_(), 'list'):
+ core_filters.append(damos_filter_to_dict(f))
+ dict_['core_filters'] = core_filters
+ ops_filters = []
+ for f in list_for_each_entry(
+ 'struct damos_filter', scheme.ops_filters.address_of_(), 'list'):
+ ops_filters.append(damos_filter_to_dict(f))
+ dict_['ops_filters'] = ops_filters
+
+ return dict_
+
+def schemes_to_list(schemes):
+ return [scheme_to_dict(s)
+ for s in list_for_each_entry(
+ 'struct damos', schemes.address_of_(), 'list')]
+
+def damon_ctx_to_dict(ctx):
+ return to_dict(ctx, [
+ ['ops', ops_to_dict],
+ ['attrs', attrs_to_dict],
+ ['adaptive_targets', targets_to_list],
+ ['schemes', schemes_to_list],
+ ])
+
+def main():
+ if len(sys.argv) < 3:
+ print('Usage: %s <kdamond pid> <file>' % sys.argv[0])
+ exit(1)
+
+ pid = int(sys.argv[1])
+ file_to_store = sys.argv[2]
+
+ kthread_data = cast('struct kthread *',
+ find_task(prog, pid).worker_private).data
+ ctx = cast('struct damon_ctx *', kthread_data)
+ status = {'contexts': [damon_ctx_to_dict(ctx)]}
+ if file_to_store == 'stdout':
+ print(json.dumps(status, indent=4))
+ else:
+ with open(file_to_store, 'w') as f:
+ json.dump(status, f, indent=4)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh
index 61b80197c896..1e4849db78a9 100755
--- a/tools/testing/selftests/damon/lru_sort.sh
+++ b/tools/testing/selftests/damon/lru_sort.sh
@@ -1,14 +1,12 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+source _common.sh
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-if [ $EUID -ne 0 ]
-then
- echo "Run as root"
- exit $ksft_skip
-fi
+check_dependencies
damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled"
if [ ! -f "$damon_lru_sort_enabled" ]
diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh
index 78dbc2334cbe..e56ceb035129 100755
--- a/tools/testing/selftests/damon/reclaim.sh
+++ b/tools/testing/selftests/damon/reclaim.sh
@@ -1,14 +1,12 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+source _common.sh
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-if [ $EUID -ne 0 ]
-then
- echo "Run as root"
- exit $ksft_skip
-fi
+check_dependencies
damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled"
if [ ! -f "$damon_reclaim_enabled" ]
diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
new file mode 100755
index 000000000000..9cca71eb0325
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import os
+import subprocess
+
+import _damon_sysfs
+
+def dump_damon_status_dict(pid):
+ try:
+ subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL)
+ except:
+ return None, 'drgn not found'
+ file_dir = os.path.dirname(os.path.abspath(__file__))
+ dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py')
+ rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'],
+ stderr=subprocess.DEVNULL)
+ if rc != 0:
+ return None, 'drgn fail'
+ try:
+ with open('damon_dump_output', 'r') as f:
+ return json.load(f), None
+ except Exception as e:
+ return None, 'json.load fail (%s)' % e
+
+def fail(expectation, status):
+ print('unexpected %s' % expectation)
+ print(json.dumps(status, indent=4))
+ exit(1)
+
+def assert_true(condition, expectation, status):
+ if condition is not True:
+ fail(expectation, status)
+
+def assert_watermarks_committed(watermarks, dump):
+ wmark_metric_val = {
+ 'none': 0,
+ 'free_mem_rate': 1,
+ }
+ assert_true(dump['metric'] == wmark_metric_val[watermarks.metric],
+ 'metric', dump)
+ assert_true(dump['interval'] == watermarks.interval, 'interval', dump)
+ assert_true(dump['high'] == watermarks.high, 'high', dump)
+ assert_true(dump['mid'] == watermarks.mid, 'mid', dump)
+ assert_true(dump['low'] == watermarks.low, 'low', dump)
+
+def assert_quota_goal_committed(qgoal, dump):
+ metric_val = {
+ 'user_input': 0,
+ 'some_mem_psi_us': 1,
+ 'node_mem_used_bp': 2,
+ 'node_mem_free_bp': 3,
+ }
+ assert_true(dump['metric'] == metric_val[qgoal.metric], 'metric', dump)
+ assert_true(dump['target_value'] == qgoal.target_value, 'target_value',
+ dump)
+ if qgoal.metric == 'user_input':
+ assert_true(dump['current_value'] == qgoal.current_value,
+ 'current_value', dump)
+ assert_true(dump['nid'] == qgoal.nid, 'nid', dump)
+
+def assert_quota_committed(quota, dump):
+ assert_true(dump['reset_interval'] == quota.reset_interval_ms,
+ 'reset_interval', dump)
+ assert_true(dump['ms'] == quota.ms, 'ms', dump)
+ assert_true(dump['sz'] == quota.sz, 'sz', dump)
+ for idx, qgoal in enumerate(quota.goals):
+ assert_quota_goal_committed(qgoal, dump['goals'][idx])
+ assert_true(dump['weight_sz'] == quota.weight_sz_permil, 'weight_sz', dump)
+ assert_true(dump['weight_nr_accesses'] == quota.weight_nr_accesses_permil,
+ 'weight_nr_accesses', dump)
+ assert_true(
+ dump['weight_age'] == quota.weight_age_permil, 'weight_age', dump)
+
+
+def assert_migrate_dests_committed(dests, dump):
+ assert_true(dump['nr_dests'] == len(dests.dests), 'nr_dests', dump)
+ for idx, dest in enumerate(dests.dests):
+ assert_true(dump['node_id_arr'][idx] == dest.id, 'node_id', dump)
+ assert_true(dump['weight_arr'][idx] == dest.weight, 'weight', dump)
+
+def assert_filter_committed(filter_, dump):
+ assert_true(filter_.type_ == dump['type'], 'type', dump)
+ assert_true(filter_.matching == dump['matching'], 'matching', dump)
+ assert_true(filter_.allow == dump['allow'], 'allow', dump)
+ # TODO: check memcg_path and memcg_id if type is memcg
+ if filter_.type_ == 'addr':
+ assert_true([filter_.addr_start, filter_.addr_end] ==
+ dump['addr_range'], 'addr_range', dump)
+ elif filter_.type_ == 'target':
+ assert_true(filter_.target_idx == dump['target_idx'], 'target_idx',
+ dump)
+ elif filter_.type_ == 'hugepage_size':
+ assert_true([filter_.min_, filter_.max_] == dump['sz_range'],
+ 'sz_range', dump)
+
+def assert_access_pattern_committed(pattern, dump):
+ assert_true(dump['min_sz_region'] == pattern.size[0], 'min_sz_region',
+ dump)
+ assert_true(dump['max_sz_region'] == pattern.size[1], 'max_sz_region',
+ dump)
+ assert_true(dump['min_nr_accesses'] == pattern.nr_accesses[0],
+ 'min_nr_accesses', dump)
+ assert_true(dump['max_nr_accesses'] == pattern.nr_accesses[1],
+ 'max_nr_accesses', dump)
+ assert_true(dump['min_age_region'] == pattern.age[0], 'min_age_region',
+ dump)
+ assert_true(dump['max_age_region'] == pattern.age[1], 'miaxage_region',
+ dump)
+
+def assert_scheme_committed(scheme, dump):
+ assert_access_pattern_committed(scheme.access_pattern, dump['pattern'])
+ action_val = {
+ 'willneed': 0,
+ 'cold': 1,
+ 'pageout': 2,
+ 'hugepage': 3,
+ 'nohugeapge': 4,
+ 'lru_prio': 5,
+ 'lru_deprio': 6,
+ 'migrate_hot': 7,
+ 'migrate_cold': 8,
+ 'stat': 9,
+ }
+ assert_true(dump['action'] == action_val[scheme.action], 'action', dump)
+ assert_true(dump['apply_interval_us'] == scheme. apply_interval_us,
+ 'apply_interval_us', dump)
+ assert_true(dump['target_nid'] == scheme.target_nid, 'target_nid', dump)
+ assert_migrate_dests_committed(scheme.dests, dump['migrate_dests'])
+ assert_quota_committed(scheme.quota, dump['quota'])
+ assert_watermarks_committed(scheme.watermarks, dump['wmarks'])
+ # TODO: test filters directory
+ for idx, f in enumerate(scheme.core_filters.filters):
+ assert_filter_committed(f, dump['core_filters'][idx])
+ for idx, f in enumerate(scheme.ops_filters.filters):
+ assert_filter_committed(f, dump['ops_filters'][idx])
+
+def assert_schemes_committed(schemes, dump):
+ assert_true(len(schemes) == len(dump), 'len_schemes', dump)
+ for idx, scheme in enumerate(schemes):
+ assert_scheme_committed(scheme, dump[idx])
+
+def assert_monitoring_attrs_committed(attrs, dump):
+ assert_true(dump['sample_interval'] == attrs.sample_us, 'sample_interval',
+ dump)
+ assert_true(dump['aggr_interval'] == attrs.aggr_us, 'aggr_interval', dump)
+ assert_true(dump['intervals_goal']['access_bp'] ==
+ attrs.intervals_goal.access_bp, 'access_bp',
+ dump['intervals_goal'])
+ assert_true(dump['intervals_goal']['aggrs'] == attrs.intervals_goal.aggrs,
+ 'aggrs', dump['intervals_goal'])
+ assert_true(dump['intervals_goal']['min_sample_us'] ==
+ attrs.intervals_goal.min_sample_us, 'min_sample_us',
+ dump['intervals_goal'])
+ assert_true(dump['intervals_goal']['max_sample_us'] ==
+ attrs.intervals_goal.max_sample_us, 'max_sample_us',
+ dump['intervals_goal'])
+
+ assert_true(dump['ops_update_interval'] == attrs.update_us,
+ 'ops_update_interval', dump)
+ assert_true(dump['min_nr_regions'] == attrs.min_nr_regions,
+ 'min_nr_regions', dump)
+ assert_true(dump['max_nr_regions'] == attrs.max_nr_regions,
+ 'max_nr_regions', dump)
+
+def assert_monitoring_target_committed(target, dump):
+ # target.pid is the pid "number", while dump['pid'] is 'struct pid'
+ # pointer, and hence cannot be compared.
+ assert_true(dump['obsolete'] == target.obsolete, 'target obsolete', dump)
+
+def assert_monitoring_targets_committed(targets, dump):
+ assert_true(len(targets) == len(dump), 'len_targets', dump)
+ for idx, target in enumerate(targets):
+ assert_monitoring_target_committed(target, dump[idx])
+
+def assert_ctx_committed(ctx, dump):
+ ops_val = {
+ 'vaddr': 0,
+ 'fvaddr': 1,
+ 'paddr': 2,
+ }
+ assert_true(dump['ops']['id'] == ops_val[ctx.ops], 'ops_id', dump)
+ assert_monitoring_attrs_committed(ctx.monitoring_attrs, dump['attrs'])
+ assert_monitoring_targets_committed(ctx.targets, dump['adaptive_targets'])
+ assert_schemes_committed(ctx.schemes, dump['schemes'])
+
+def assert_ctxs_committed(kdamonds):
+ status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print(err)
+ kdamonds.stop()
+ exit(1)
+
+ ctxs = kdamonds.kdamonds[0].contexts
+ dump = status['contexts']
+ assert_true(len(ctxs) == len(dump), 'ctxs length', dump)
+ for idx, ctx in enumerate(ctxs):
+ assert_ctx_committed(ctx, dump[idx])
+
+def main():
+ kdamonds = _damon_sysfs.Kdamonds(
+ [_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ targets=[_damon_sysfs.DamonTarget(pid=-1)],
+ schemes=[_damon_sysfs.Damos()],
+ )])])
+ err = kdamonds.start()
+ if err is not None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ assert_ctxs_committed(kdamonds)
+
+ context = _damon_sysfs.DamonCtx(
+ monitoring_attrs=_damon_sysfs.DamonAttrs(
+ sample_us=100000, aggr_us=2000000,
+ intervals_goal=_damon_sysfs.IntervalsGoal(
+ access_bp=400, aggrs=3, min_sample_us=5000,
+ max_sample_us=10000000),
+ update_us=2000000),
+ schemes=[_damon_sysfs.Damos(
+ action='pageout',
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ size=[4096, 2**10],
+ nr_accesses=[3, 317],
+ age=[5,71]),
+ quota=_damon_sysfs.DamosQuota(
+ sz=100*1024*1024, ms=100,
+ goals=[_damon_sysfs.DamosQuotaGoal(
+ metric='node_mem_used_bp',
+ target_value=9950,
+ nid=1)],
+ reset_interval_ms=1500,
+ weight_sz_permil=20,
+ weight_nr_accesses_permil=200,
+ weight_age_permil=1000),
+ watermarks=_damon_sysfs.DamosWatermarks(
+ metric = 'free_mem_rate', interval = 500000, # 500 ms
+ high = 500, mid = 400, low = 50),
+ target_nid=1,
+ apply_interval_us=1000000,
+ dests=_damon_sysfs.DamosDests(
+ dests=[_damon_sysfs.DamosDest(id=1, weight=30),
+ _damon_sysfs.DamosDest(id=0, weight=70)]),
+ core_filters=[
+ _damon_sysfs.DamosFilter(type_='addr', matching=True,
+ allow=False, addr_start=42,
+ addr_end=4242),
+ ],
+ ops_filters=[
+ _damon_sysfs.DamosFilter(type_='anon', matching=True,
+ allow=True),
+ ],
+ )])
+ context.idx = 0
+ context.kdamond = kdamonds.kdamonds[0]
+ kdamonds.kdamonds[0].contexts = [context]
+ kdamonds.kdamonds[0].commit()
+
+ assert_ctxs_committed(kdamonds)
+
+ # test online commitment of minimum context.
+ context = _damon_sysfs.DamonCtx()
+ context.idx = 0
+ context.kdamond = kdamonds.kdamonds[0]
+ kdamonds.kdamonds[0].contexts = [context]
+ kdamonds.kdamonds[0].commit()
+
+ assert_ctxs_committed(kdamonds)
+
+ kdamonds.stop()
+
+ # test obsolete_target.
+ proc1 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ proc2 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ proc3 = subprocess.Popen(['sh'], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ kdamonds = _damon_sysfs.Kdamonds(
+ [_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[
+ _damon_sysfs.DamonTarget(pid=proc1.pid),
+ _damon_sysfs.DamonTarget(pid=proc2.pid),
+ _damon_sysfs.DamonTarget(pid=proc3.pid),
+ ],
+ schemes=[_damon_sysfs.Damos()],
+ )])])
+ err = kdamonds.start()
+ if err is not None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+ kdamonds.kdamonds[0].contexts[0].targets[1].obsolete = True
+ kdamonds.kdamonds[0].commit()
+ del kdamonds.kdamonds[0].contexts[0].targets[1]
+ assert_ctxs_committed(kdamonds)
+ kdamonds.stop()
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index e9a976d296e2..83e3b7f63d81 100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+source _common.sh
+
# Kselftest frmework requirement - SKIP code is 4.
ksft_skip=4
@@ -364,14 +366,5 @@ test_damon_sysfs()
test_kdamonds "$damon_sysfs/kdamonds"
}
-check_dependencies()
-{
- if [ $EUID -ne 0 ]
- then
- echo "Run as root"
- exit $ksft_skip
- fi
-}
-
check_dependencies
test_damon_sysfs "/sys/kernel/mm/damon/admin"
diff --git a/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh
new file mode 100755
index 000000000000..64c5d8c518a4
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $EUID -ne 0 ]
+then
+ echo "Run as root"
+ exit $ksft_skip
+fi
+
+damon_sysfs="/sys/kernel/mm/damon/admin"
+if [ ! -d "$damon_sysfs" ]
+then
+ echo "damon sysfs not found"
+ exit $ksft_skip
+fi
+
+# ensure filter directory
+echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/nr_filters"
+
+filter_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/0"
+
+before_kb=$(grep Slab /proc/meminfo | awk '{print $2}')
+
+# try to leak 3000 KiB
+for i in {1..102400};
+do
+ echo "012345678901234567890123456789" > "$filter_dir/memcg_path"
+done
+
+after_kb=$(grep Slab /proc/meminfo | awk '{print $2}')
+# expect up to 1500 KiB free from other tasks memory
+expected_after_kb_max=$((before_kb + 1500))
+
+if [ "$after_kb" -gt "$expected_after_kb_max" ]
+then
+ echo "maybe memcg_path are leaking: $before_kb -> $after_kb"
+ exit 1
+else
+ exit 0
+fi
diff --git a/tools/testing/selftests/damon/sysfs_no_op_commit_break.py b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py
new file mode 100755
index 000000000000..2c65cffe6b54
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import os
+import subprocess
+import sys
+
+import _damon_sysfs
+
+def dump_damon_status_dict(pid):
+ try:
+ subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL)
+ except:
+ return None, 'drgn not found'
+ file_dir = os.path.dirname(os.path.abspath(__file__))
+ dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py')
+ rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'],
+ stderr=subprocess.DEVNULL)
+
+ if rc != 0:
+ return None, f'drgn fail: return code({rc})'
+ try:
+ with open('damon_dump_output', 'r') as f:
+ return json.load(f), None
+ except Exception as e:
+ return None, 'json.load fail (%s)' % e
+
+def main():
+ kdamonds = _damon_sysfs.Kdamonds(
+ [_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ schemes=[_damon_sysfs.Damos(
+ ops_filters=[
+ _damon_sysfs.DamosFilter(
+ type_='anon',
+ matching=True,
+ allow=True,
+ )
+ ]
+ )],
+ )])]
+ )
+
+ err = kdamonds.start()
+ if err is not None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ before_commit_status, err = \
+ dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print('before-commit status dump failed: %s' % err)
+ exit(1)
+
+ kdamonds.kdamonds[0].commit()
+
+ after_commit_status, err = \
+ dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print('after-commit status dump failed: %s' % err)
+ exit(1)
+
+ if before_commit_status != after_commit_status:
+ print(f'before: {json.dumps(before_commit_status, indent=2)}')
+ print(f'after: {json.dumps(after_commit_status, indent=2)}')
+ exit(1)
+
+ kdamonds.stop()
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
index ade35576e748..35fc32beeaf7 100755
--- a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
+++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
@@ -1,14 +1,12 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+source _common.sh
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-if [ $EUID -ne 0 ]
-then
- echo "Run as root"
- exit $ksft_skip
-fi
+check_dependencies
damon_sysfs="/sys/kernel/mm/damon/admin"
if [ ! -d "$damon_sysfs" ]
diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile
deleted file mode 100644
index cd8c5ece1cba..000000000000
--- a/tools/testing/selftests/dma/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../usr/include/
-CFLAGS += -I../../../../include/
-
-TEST_GEN_PROGS := dma_map_benchmark
-
-include ../lib.mk
diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config
deleted file mode 100644
index 6102ee3c43cd..000000000000
--- a/tools/testing/selftests/dma/config
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_DMA_MAP_BENCHMARK=y
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
deleted file mode 100644
index b12f1f9babf8..000000000000
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ /dev/null
@@ -1,128 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020 HiSilicon Limited.
- */
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <linux/types.h>
-#include <linux/map_benchmark.h>
-
-#define NSEC_PER_MSEC 1000000L
-
-static char *directions[] = {
- "BIDIRECTIONAL",
- "TO_DEVICE",
- "FROM_DEVICE",
-};
-
-int main(int argc, char **argv)
-{
- struct map_benchmark map;
- int fd, opt;
- /* default single thread, run 20 seconds on NUMA_NO_NODE */
- int threads = 1, seconds = 20, node = -1;
- /* default dma mask 32bit, bidirectional DMA */
- int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
- /* default granule 1 PAGESIZE */
- int granule = 1;
-
- int cmd = DMA_MAP_BENCHMARK;
-
- while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
- switch (opt) {
- case 't':
- threads = atoi(optarg);
- break;
- case 's':
- seconds = atoi(optarg);
- break;
- case 'n':
- node = atoi(optarg);
- break;
- case 'b':
- bits = atoi(optarg);
- break;
- case 'd':
- dir = atoi(optarg);
- break;
- case 'x':
- xdelay = atoi(optarg);
- break;
- case 'g':
- granule = atoi(optarg);
- break;
- default:
- return -1;
- }
- }
-
- if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
- fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
- DMA_MAP_MAX_THREADS);
- exit(1);
- }
-
- if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) {
- fprintf(stderr, "invalid number of seconds, must be in 1-%d\n",
- DMA_MAP_MAX_SECONDS);
- exit(1);
- }
-
- if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
- fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
- DMA_MAP_MAX_TRANS_DELAY);
- exit(1);
- }
-
- /* suppose the mininum DMA zone is 1MB in the world */
- if (bits < 20 || bits > 64) {
- fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
- exit(1);
- }
-
- if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE &&
- dir != DMA_MAP_FROM_DEVICE) {
- fprintf(stderr, "invalid dma direction\n");
- exit(1);
- }
-
- if (granule < 1 || granule > 1024) {
- fprintf(stderr, "invalid granule size\n");
- exit(1);
- }
-
- fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
- if (fd == -1) {
- perror("open");
- exit(1);
- }
-
- memset(&map, 0, sizeof(map));
- map.seconds = seconds;
- map.threads = threads;
- map.node = node;
- map.dma_bits = bits;
- map.dma_dir = dir;
- map.dma_trans_ns = xdelay;
- map.granule = granule;
-
- if (ioctl(fd, cmd, &map)) {
- perror("ioctl");
- exit(1);
- }
-
- printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
- threads, seconds, node, dir[directions], granule);
- printf("average map latency(us):%.1f standard deviation:%.1f\n",
- map.avg_map_100ns/10.0, map.map_stddev/10.0);
- printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
- map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0);
-
- return 0;
-}
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index 5d0a809dc2df..fc9694fc4e89 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -15,7 +15,7 @@
#include <linux/dma-buf.h>
#include <linux/dma-heap.h>
#include <drm/drm.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define DEVPATH "/dev/dma_heap"
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index 6062723a172e..d78aec662586 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -16,7 +16,7 @@
#include <sys/mman.h>
#include <linux/memfd.h>
#include <linux/udmabuf.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#define TEST_PREFIX "drivers/dma-buf/udmabuf"
#define NUM_PAGES 4
@@ -138,7 +138,7 @@ int main(int argc, char *argv[])
void *addr1, *addr2;
ksft_print_header();
- ksft_set_plan(6);
+ ksft_set_plan(7);
devfd = open("/dev/udmabuf", O_RDWR);
if (devfd < 0) {
@@ -250,6 +250,24 @@ int main(int argc, char *argv[])
close(buf);
close(memfd);
+
+ /* same test as above but we pin first before writing to memfd */
+ page_size = getpagesize() * 512; /* 2 MB */
+ size = MEMFD_SIZE * page_size;
+ memfd = create_memfd_with_seals(size, true);
+ buf = create_udmabuf_list(devfd, memfd, size);
+ addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize());
+ addr1 = mmap_fd(memfd, size);
+ write_to_memfd(addr1, size, 'a');
+ write_to_memfd(addr1, size, 'b');
+ ret = compare_chunks(addr1, addr2, size);
+ if (ret < 0)
+ ksft_test_result_fail("%s: [FAIL,test-7]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-7]\n", TEST_PREFIX);
+
+ close(buf);
+ close(memfd);
close(devfd);
ksft_print_msg("%s: ok\n", TEST_PREFIX);
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
new file mode 100644
index 000000000000..3633c7a3ed65
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+gro
+napi_id_helper
+psp_responder
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 137470bdee0c..f5c71d993750 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -1,18 +1,43 @@
# SPDX-License-Identifier: GPL-2.0
+CFLAGS += $(KHDR_INCLUDES)
TEST_INCLUDES := $(wildcard lib/py/*.py) \
$(wildcard lib/sh/*.sh) \
- ../../net/net_helper.sh \
../../net/lib.sh \
+TEST_GEN_FILES := \
+ gro \
+ napi_id_helper \
+# end of TEST_GEN_FILES
+
TEST_PROGS := \
+ gro.py \
+ hds.py \
+ napi_id.py \
+ napi_threaded.py \
netcons_basic.sh \
+ netcons_cmdline.sh \
+ netcons_fragmented_msg.sh \
netcons_overflow.sh \
+ netcons_sysdata.sh \
+ netcons_torture.sh \
+ netpoll_basic.py \
ping.py \
+ psp.py \
queues.py \
- stats.py \
+ ring_reconfig.py \
shaper.py \
- hds.py \
+ stats.py \
+ xdp.py \
# end of TEST_PROGS
+# YNL files, must be before "include ..lib.mk"
+YNL_GEN_FILES := psp_responder
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+
include ../../lib.mk
+
+# YNL build
+YNL_GENS := psp
+
+include ../../net/ynl.mk
diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst
index 3b6a29e6564b..eb838ae94844 100644
--- a/tools/testing/selftests/drivers/net/README.rst
+++ b/tools/testing/selftests/drivers/net/README.rst
@@ -107,7 +107,7 @@ On the target machine, running the tests will use netdevsim by default::
1..1
# timeout set to 45
# selftests: drivers/net: ping.py
- # KTAP version 1
+ # TAP version 13
# 1..3
# ok 1 ping.test_v4
# ok 2 ping.test_v6
@@ -128,7 +128,7 @@ Create a config with remote info::
Run the test::
[/root] # ./ksft-net-drv/drivers/net/ping.py
- KTAP version 1
+ TAP version 13
1..3
ok 1 ping.test_v4
ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 2b10854e4b1e..6c5c60adb5e8 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -4,21 +4,29 @@
TEST_PROGS := \
bond-arp-interval-causes-panic.sh \
bond-break-lacpdu-tx.sh \
+ bond-eth-type-change.sh \
bond-lladdr-target.sh \
+ bond_ipsec_offload.sh \
+ bond_lacp_prio.sh \
+ bond_macvlan_ipvlan.sh \
+ bond_options.sh \
+ bond_passive_lacp.sh \
dev_addr_lists.sh \
mode-1-recovery-updelay.sh \
mode-2-recovery-updelay.sh \
- bond_options.sh \
- bond-eth-type-change.sh \
- bond_macvlan_ipvlan.sh
+ netcons_over_bonding.sh \
+# end of TEST_PROGS
TEST_FILES := \
- lag_lib.sh \
bond_topo_2d1c.sh \
- bond_topo_3d1c.sh
+ bond_topo_3d1c.sh \
+ lag_lib.sh \
+# end of TEST_FILES
TEST_INCLUDES := \
+ ../../../net/lib.sh \
+ ../lib/sh/lib_netcons.sh \
../../../net/forwarding/lib.sh \
- ../../../net/lib.sh
+# end of TEST_INCLUDES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh
new file mode 100755
index 000000000000..f09e100232c7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPsec over bonding offload test:
+#
+# +----------------+
+# | bond0 |
+# | | |
+# | eth0 eth1 |
+# +---+-------+----+
+#
+# We use netdevsim instead of physical interfaces
+#-------------------------------------------------------------------
+# Example commands
+# ip x s add proto esp src 192.0.2.1 dst 192.0.2.2 \
+# spi 0x07 mode transport reqid 0x07 replay-window 32 \
+# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+# sel src 192.0.2.1/24 dst 192.0.2.2/24
+# offload dev bond0 dir out
+# ip x p add dir out src 192.0.2.1/24 dst 192.0.2.2/24 \
+# tmpl proto esp src 192.0.2.1 dst 192.0.2.2 \
+# spi 0x07 mode transport reqid 0x07
+#
+#-------------------------------------------------------------------
+
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/lib.sh
+srcip=192.0.2.1
+dstip=192.0.2.2
+ipsec0=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ipsec
+ipsec1=/sys/kernel/debug/netdevsim/netdevsim0/ports/1/ipsec
+active_slave=""
+
+# shellcheck disable=SC2317
+active_slave_changed()
+{
+ local old_active_slave=$1
+ local new_active_slave
+
+ # shellcheck disable=SC2154
+ new_active_slave=$(ip -n "${ns}" -d -j link show bond0 | \
+ jq -r ".[].linkinfo.info_data.active_slave")
+ [ "$new_active_slave" != "$old_active_slave" ] && [ "$new_active_slave" != "null" ]
+}
+
+test_offload()
+{
+ # use ping to exercise the Tx path
+ ip netns exec "$ns" ping -I bond0 -c 3 -W 1 -i 0 "$dstip" >/dev/null
+
+ active_slave=$(ip -n "${ns}" -d -j link show bond0 | \
+ jq -r ".[].linkinfo.info_data.active_slave")
+
+ if [ "$active_slave" = "$nic0" ]; then
+ sysfs=$ipsec0
+ elif [ "$active_slave" = "$nic1" ]; then
+ sysfs=$ipsec1
+ else
+ check_err 1 "bond_ipsec_offload invalid active_slave $active_slave"
+ fi
+
+ # The tx/rx order in sysfs may changed after failover
+ grep -q "SA count=2 tx=3" "$sysfs" && grep -q "tx ipaddr=$dstip" "$sysfs"
+ check_err $? "incorrect tx count with link ${active_slave}"
+
+ log_test bond_ipsec_offload "active_slave ${active_slave}"
+}
+
+setup_env()
+{
+ if ! mount | grep -q debugfs; then
+ mount -t debugfs none /sys/kernel/debug/ &> /dev/null
+ defer umount /sys/kernel/debug/
+
+ fi
+
+ # setup netdevsim since dummy/veth dev doesn't have offload support
+ if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+ if ! modprobe -q netdevsim; then
+ echo "SKIP: can't load netdevsim for ipsec offload"
+ # shellcheck disable=SC2154
+ exit "$ksft_skip"
+ fi
+ defer modprobe -r netdevsim
+ fi
+
+ setup_ns ns
+ defer cleanup_ns "$ns"
+}
+
+setup_bond()
+{
+ ip -n "$ns" link add bond0 type bond mode active-backup miimon 100
+ ip -n "$ns" addr add "$srcip/24" dev bond0
+ ip -n "$ns" link set bond0 up
+
+ echo "0 2" | ip netns exec "$ns" tee /sys/bus/netdevsim/new_device >/dev/null
+ nic0=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | head -n 1)
+ nic1=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | tail -n 1)
+ ip -n "$ns" link set "$nic0" master bond0
+ ip -n "$ns" link set "$nic1" master bond0
+
+ # we didn't create a peer, make sure we can Tx by adding a permanent
+ # neighbour this need to be added after enslave
+ ip -n "$ns" neigh add "$dstip" dev bond0 lladdr 00:11:22:33:44:55
+
+ # create offloaded SAs, both in and out
+ ip -n "$ns" x p add dir out src "$srcip/24" dst "$dstip/24" \
+ tmpl proto esp src "$srcip" dst "$dstip" spi 9 \
+ mode transport reqid 42
+
+ ip -n "$ns" x p add dir in src "$dstip/24" dst "$srcip/24" \
+ tmpl proto esp src "$dstip" dst "$srcip" spi 9 \
+ mode transport reqid 42
+
+ ip -n "$ns" x s add proto esp src "$srcip" dst "$dstip" spi 9 \
+ mode transport reqid 42 aead "rfc4106(gcm(aes))" \
+ 0x3132333435363738393031323334353664636261 128 \
+ sel src "$srcip/24" dst "$dstip/24" \
+ offload dev bond0 dir out
+
+ ip -n "$ns" x s add proto esp src "$dstip" dst "$srcip" spi 9 \
+ mode transport reqid 42 aead "rfc4106(gcm(aes))" \
+ 0x3132333435363738393031323334353664636261 128 \
+ sel src "$dstip/24" dst "$srcip/24" \
+ offload dev bond0 dir in
+
+ # does offload show up in ip output
+ lines=$(ip -n "$ns" x s list | grep -c "crypto offload parameters: dev bond0 dir")
+ if [ "$lines" -ne 2 ] ; then
+ check_err 1 "bond_ipsec_offload SA offload missing from list output"
+ fi
+}
+
+trap defer_scopes_cleanup EXIT
+setup_env
+setup_bond
+
+# start Offload testing
+test_offload
+
+# do failover and re-test
+ip -n "$ns" link set "$active_slave" down
+slowwait 5 active_slave_changed "$active_slave"
+test_offload
+
+# make sure offload get removed from driver
+ip -n "$ns" x s flush
+ip -n "$ns" x p flush
+line0=$(grep -c "SA count=0" "$ipsec0")
+line1=$(grep -c "SA count=0" "$ipsec1")
+[ "$line0" -ne 1 ] || [ "$line1" -ne 1 ]
+check_fail $? "bond_ipsec_offload SA not removed from driver"
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh
new file mode 100755
index 000000000000..a483d505c6a8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing if bond lacp per port priority works
+#
+# Switch (s_ns) Backup Switch (b_ns)
+# +-------------------------+ +-------------------------+
+# | bond0 | | bond0 |
+# | + | | + |
+# | eth0 | eth1 | | eth0 | eth1 |
+# | +---+---+ | | +---+---+ |
+# | | | | | | | |
+# +-------------------------+ +-------------------------+
+# | | | |
+# +-----------------------------------------------------+
+# | | | | | |
+# | +-------+---------+---------+-------+ |
+# | eth0 eth1 | eth2 eth3 |
+# | + |
+# | bond0 |
+# +-----------------------------------------------------+
+# Client (c_ns)
+
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/lib.sh
+
+setup_links()
+{
+ # shellcheck disable=SC2154
+ ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
+ ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
+ # shellcheck disable=SC2154
+ ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}"
+ ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}"
+
+ ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
+ lacp_rate fast ad_select actor_port_prio
+ ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
+ lacp_rate fast
+ ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
+ lacp_rate fast
+
+ ip -n "${c_ns}" link set eth0 master bond0
+ ip -n "${c_ns}" link set eth1 master bond0
+ ip -n "${c_ns}" link set eth2 master bond0
+ ip -n "${c_ns}" link set eth3 master bond0
+ ip -n "${s_ns}" link set eth0 master bond0
+ ip -n "${s_ns}" link set eth1 master bond0
+ ip -n "${b_ns}" link set eth0 master bond0
+ ip -n "${b_ns}" link set eth1 master bond0
+
+ ip -n "${c_ns}" link set bond0 up
+ ip -n "${s_ns}" link set bond0 up
+ ip -n "${b_ns}" link set bond0 up
+}
+
+test_port_prio_setting()
+{
+ RET=0
+ ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000
+ prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \
+ ".[].linkinfo.info_slave_data.actor_port_prio")
+ [ "$prio" -ne 1000 ] && RET=1
+ ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10
+ prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \
+ ".[].linkinfo.info_slave_data.actor_port_prio")
+ [ "$prio" -ne 10 ] && RET=1
+}
+
+test_agg_reselect()
+{
+ local bond_agg_id slave_agg_id
+ local expect_slave="$1"
+ RET=0
+
+ # Trigger link state change to reselect the aggregator
+ ip -n "${c_ns}" link set eth1 down
+ sleep 0.5
+ ip -n "${c_ns}" link set eth1 up
+ sleep 0.5
+
+ bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \
+ ".[].linkinfo.info_data.ad_info.aggregator")
+ slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \
+ ".[].linkinfo.info_slave_data.ad_aggregator_id")
+ # shellcheck disable=SC2034
+ [ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \
+ RET=1
+}
+
+trap cleanup_all_ns EXIT
+setup_ns c_ns s_ns b_ns
+setup_links
+
+test_port_prio_setting
+log_test "bond 802.3ad" "actor_port_prio setting"
+
+test_agg_reselect eth0
+log_test "bond 802.3ad" "actor_port_prio select"
+
+# Change the actor port prio and re-test
+ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10
+ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000
+test_agg_reselect eth2
+log_test "bond 802.3ad" "actor_port_prio switch"
+
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
index c4711272fe45..559f300f965a 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
@@ -30,6 +30,7 @@ check_connection()
local message=${3}
RET=0
+ sleep 0.25
ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
check_err $? "ping failed"
log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index edc56e2cc606..187b478d0ddf 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -7,12 +7,14 @@ ALL_TESTS="
prio
arp_validate
num_grat_arp
+ fail_over_mac
+ vlan_over_bond
"
lib_dir=$(dirname "$0")
source ${lib_dir}/bond_topo_3d1c.sh
-c_maddr="33:33:00:00:00:10"
-g_maddr="33:33:00:00:02:54"
+c_maddr="33:33:ff:00:00:10"
+g_maddr="33:33:ff:00:02:54"
skip_prio()
{
@@ -352,8 +354,8 @@ garp_test()
exp_num=$(echo "${param}" | cut -f6 -d ' ')
active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
- slowwait_for_counter $((exp_num + 5)) $exp_num \
- tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}"
+ slowwait_for_counter $((exp_num + 5)) $exp_num tc_rule_handle_stats_get \
+ "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" &> /dev/null
# check result
real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
@@ -376,6 +378,197 @@ num_grat_arp()
done
}
+check_all_mac_same()
+{
+ RET=0
+ # all slaves should have same mac address (with the first port's mac)
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ local eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ local eth1_mac=$(ip -n "$s_ns" -j link show eth1 | jq -r '.[]["address"]')
+ local eth2_mac=$(ip -n "$s_ns" -j link show eth2 | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "${mac[0]}" ] || [ "$eth0_mac" != "$bond_mac" ] || \
+ [ "$eth1_mac" != "$bond_mac" ] || [ "$eth2_mac" != "$bond_mac" ]; then
+ RET=1
+ fi
+}
+
+check_bond_mac_same_with_first()
+{
+ RET=0
+ # bond mac address should be same with the first added slave
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "${mac[0]}" ]; then
+ RET=1
+ fi
+}
+
+check_bond_mac_same_with_active()
+{
+ RET=0
+ # bond mac address should be same with active slave
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ local active_slave_mac=$(ip -n "$s_ns" -j link show "$active_slave" | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "$active_slave_mac" ]; then
+ RET=1
+ fi
+}
+
+check_backup_slave_mac_not_change()
+{
+ RET=0
+ # backup slave's mac address is not changed
+ if ip -n "$s_ns" -d -j link show type bond_slave | jq -e '.[]
+ | select(.linkinfo.info_slave_data.state=="BACKUP")
+ | select(.address != .linkinfo.info_slave_data.perm_hwaddr)' &> /dev/null; then
+ RET=1
+ fi
+}
+
+check_backup_slave_mac_inherit()
+{
+ local backup_mac
+ RET=0
+
+ # backup slaves should use mac[1] or mac[2]
+ local backup_macs=$(ip -n "$s_ns" -d -j link show type bond_slave | \
+ jq -r '.[] | select(.linkinfo.info_slave_data.state=="BACKUP") | .address')
+ for backup_mac in $backup_macs; do
+ if [ "$backup_mac" != "${mac[1]}" ] && [ "$backup_mac" != "${mac[2]}" ]; then
+ RET=1
+ fi
+ done
+}
+
+check_first_slave_random_mac()
+{
+ RET=0
+ # remove the first added slave and added it back
+ ip -n "$s_ns" link set eth0 nomaster
+ ip -n "$s_ns" link set eth0 master bond0
+
+ # the first slave should use random mac address
+ eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ [ "$eth0_mac" = "${mac[0]}" ] && RET=1
+ log_test "bond fail_over_mac follow" "random first slave mac"
+
+ # remove the first slave, the permanent MAC address should be restored back
+ ip -n "$s_ns" link set eth0 nomaster
+ eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ [ "$eth0_mac" != "${mac[0]}" ] && RET=1
+}
+
+do_active_backup_failover()
+{
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ ip -n ${s_ns} link set ${active_slave} down
+ slowwait 2 active_slave_changed $active_slave
+ ip -n ${s_ns} link set ${active_slave} up
+}
+
+fail_over_mac()
+{
+ # Bring down the first interface on the switch to force the bond to
+ # select another active interface instead of the first one that joined.
+ ip -n "$g_ns" link set s0 down
+
+ # fail_over_mac none
+ bond_reset "mode active-backup miimon 100 fail_over_mac 0"
+ check_all_mac_same
+ log_test "fail_over_mac 0" "all slaves have same mac"
+ do_active_backup_failover
+ check_all_mac_same
+ log_test "fail_over_mac 0" "failover: all slaves have same mac"
+
+ # fail_over_mac active
+ bond_reset "mode active-backup miimon 100 fail_over_mac 1"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 1" "bond mac is same with active slave mac"
+ check_backup_slave_mac_not_change
+ log_test "fail_over_mac 1" "backup slave mac is not changed"
+ do_active_backup_failover
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 1" "failover: bond mac is same with active slave mac"
+ check_backup_slave_mac_not_change
+ log_test "fail_over_mac 1" "failover: backup slave mac is not changed"
+
+ # fail_over_mac follow
+ bond_reset "mode active-backup miimon 100 fail_over_mac 2"
+ check_bond_mac_same_with_first
+ log_test "fail_over_mac 2" "bond mac is same with first slave mac"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 2" "bond mac is same with active slave mac"
+ check_backup_slave_mac_inherit
+ log_test "fail_over_mac 2" "backup slave mac inherit"
+ do_active_backup_failover
+ check_bond_mac_same_with_first
+ log_test "fail_over_mac 2" "failover: bond mac is same with first slave mac"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 2" "failover: bond mac is same with active slave mac"
+ check_backup_slave_mac_inherit
+ log_test "fail_over_mac 2" "failover: backup slave mac inherit"
+ check_first_slave_random_mac
+ log_test "fail_over_mac 2" "first slave mac random"
+}
+
+vlan_over_bond_arp()
+{
+ local mode="$1"
+ RET=0
+
+ bond_reset "mode $mode arp_interval 100 arp_ip_target 192.0.3.10"
+ ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+ ip -n "${s_ns}" link set bond0.3 up
+ ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+ ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+ slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+ "dev eth0.3 ingress" 101 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+ log_test "vlan over bond arp" "$mode"
+}
+
+vlan_over_bond_ns()
+{
+ local mode="$1"
+ RET=0
+
+ if skip_ns; then
+ log_test_skip "vlan_over_bond ns" "$mode"
+ return 0
+ fi
+
+ bond_reset "mode $mode arp_interval 100 ns_ip6_target 2001:db8::3:10"
+ ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+ ip -n "${s_ns}" link set bond0.3 up
+ ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+ ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+ slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+ "dev eth0.3 ingress" 102 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+ log_test "vlan over bond ns" "$mode"
+}
+
+vlan_over_bond()
+{
+ # add vlan 3 for client
+ ip -n "${c_ns}" link add eth0.3 link eth0 type vlan id 3
+ ip -n "${c_ns}" link set eth0.3 up
+ ip -n "${c_ns}" addr add 192.0.3.10/24 dev eth0.3
+ ip -n "${c_ns}" addr add 2001:db8::3:10/64 dev eth0.3
+
+ # Add tc rule to check the vlan pkts
+ tc -n "${c_ns}" qdisc add dev eth0.3 clsact
+ tc -n "${c_ns}" filter add dev eth0.3 ingress protocol arp \
+ handle 101 flower skip_hw arp_op request \
+ arp_sip 192.0.3.1 arp_tip 192.0.3.10 action pass
+ tc -n "${c_ns}" filter add dev eth0.3 ingress protocol ipv6 \
+ handle 102 flower skip_hw ip_proto icmpv6 \
+ type 135 src_ip 2001:db8::3:1 action pass
+
+ vlan_over_bond_arp "active-backup"
+ vlan_over_bond_ns "active-backup"
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh
new file mode 100755
index 000000000000..9c3b089813df
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test if a bond interface works with lacp_active=off.
+
+# shellcheck disable=SC2034
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+# shellcheck disable=SC2317
+check_port_state()
+{
+ local netns=$1
+ local port=$2
+ local state=$3
+
+ ip -n "${netns}" -d -j link show "$port" | \
+ jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null
+}
+
+check_pkt_count()
+{
+ RET=0
+ local ns="$1"
+ local iface="$2"
+
+ # wait 65s, one per 30s
+ slowwait_for_counter 65 2 tc_rule_handle_stats_get \
+ "dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null
+}
+
+setup() {
+ setup_ns c_ns s_ns
+
+ # shellcheck disable=SC2154
+ ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
+ ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
+
+ # Add tc filter to count the pkts
+ tc -n "${c_ns}" qdisc add dev eth0 clsact
+ tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass
+ tc -n "${s_ns}" qdisc add dev eth1 clsact
+ tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass
+
+ ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast
+ ip -n "${s_ns}" link set eth0 master bond0
+ ip -n "${s_ns}" link set eth1 master bond0
+
+ ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast
+ ip -n "${c_ns}" link set eth0 master bond0
+ ip -n "${c_ns}" link set eth1 master bond0
+
+}
+
+trap cleanup_all_ns EXIT
+setup
+
+# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s
+# after interface up
+ip -n "${c_ns}" link set bond0 up
+sleep 2
+
+# 1. The passive side shouldn't send LACPDU.
+check_pkt_count "${c_ns}" "eth0" && RET=1
+log_test "802.3ad lacp_active off" "init port"
+
+ip -n "${s_ns}" link set bond0 up
+# 2. The passive side should not have the 'active' flag.
+RET=0
+slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1
+log_test "802.3ad lacp_active off" "port state active"
+
+# 3. The active side should have the 'active' flag.
+RET=0
+slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1
+log_test "802.3ad lacp_active on" "port state active"
+
+# 4. Make sure the connection is not expired.
+RET=0
+slowwait 5 check_port_state "${s_ns}" "eth0" "distributing"
+slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1
+log_test "bond 802.3ad lacp_active off" "port connection"
+
+# After testing, disconnect one port on each side to check the state.
+ip -n "${s_ns}" link set eth0 nomaster
+ip -n "${s_ns}" link set eth0 up
+ip -n "${c_ns}" link set eth1 nomaster
+ip -n "${c_ns}" link set eth1 up
+# Due to Periodic Machine and Rx Machine state change, the bond will still
+# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure
+# negotiation finished
+sleep 5
+
+# 5. The active side should keep sending LACPDU.
+check_pkt_count "${s_ns}" "eth1" || RET=1
+log_test "bond 802.3ad lacp_active on" "port pkt after disconnect"
+
+# 6. The passive side shouldn't send LACPDU anymore.
+check_pkt_count "${c_ns}" "eth0" && RET=1
+log_test "bond 802.3ad lacp_active off" "port pkt after disconnect"
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
index 195ef83cfbf1..167aa4a4a12a 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -39,6 +39,8 @@ g_ip4="192.0.2.254"
s_ip6="2001:db8::1"
c_ip6="2001:db8::10"
g_ip6="2001:db8::254"
+mac[0]="00:0a:0b:0c:0d:01"
+mac[1]="00:0a:0b:0c:0d:02"
gateway_create()
{
@@ -62,6 +64,7 @@ server_create()
for i in $(seq 0 1); do
ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
ip -n ${g_ns} link set s${i} up
ip -n ${g_ns} link set s${i} master br0
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
index 3a1333d9a85b..23a2932301cc 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -26,6 +26,7 @@
# +-------------------------------------+
source bond_topo_2d1c.sh
+mac[2]="00:0a:0b:0c:0d:03"
setup_prepare()
{
@@ -36,6 +37,7 @@ setup_prepare()
# Add the extra device as we use 3 down links for bond0
local i=2
ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
ip -n ${g_ns} link set s${i} up
ip -n ${g_ns} link set s${i} master br0
ip -n ${s_ns} link set eth${i} master bond0
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
index dad4e5fda4db..991494376223 100644
--- a/tools/testing/selftests/drivers/net/bonding/config
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -1,11 +1,21 @@
CONFIG_BONDING=y
CONFIG_BRIDGE=y
+CONFIG_CONFIGFS_FS=y
CONFIG_DUMMY=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
CONFIG_IPV6=y
-CONFIG_MACVLAN=y
CONFIG_IPVLAN=y
+CONFIG_MACVLAN=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
CONFIG_NET_SCH_INGRESS=y
CONFIG_NLMON=y
CONFIG_VETH=y
+CONFIG_VLAN_8021Q=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
new file mode 100755
index 000000000000..477cc9379500
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
@@ -0,0 +1,361 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This selftest exercises trying to have multiple netpoll users at the same
+# time.
+#
+# This selftest has multiple smalls test inside, and the goal is to
+# get interfaces with bonding and netconsole in different orders in order
+# to catch any possible issue.
+#
+# The main test composes of four interfaces being created using netdevsim; two
+# of them are bonded to serve as the netconsole's transmit interface. The
+# remaining two interfaces are similarly bonded and assigned to a separate
+# network namespace, which acts as the receive interface, where socat monitors
+# for incoming messages.
+#
+# A netconsole message is then sent to ensure it is properly received across
+# this configuration.
+#
+# Later, run a few other tests, to make sure that bonding and netconsole
+# cannot coexist.
+#
+# The test's objective is to exercise netpoll usage when managed simultaneously
+# by multiple subsystems (netconsole and bonding).
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+modprobe bonding 2> /dev/null || true
+modprobe veth 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup_bond EXIT
+
+FORMAT="extended"
+IP_VERSION="ipv4"
+VETH0="veth"$(( RANDOM % 256))
+VETH1="veth"$((256 + RANDOM % 256))
+TXNS=""
+RXNS=""
+
+# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with
+# the bonding interfaces
+function setup_bonding_ifaces() {
+ local RAND=$(( RANDOM % 100 ))
+ BOND_TX_MAIN_IF="bond_tx_$RAND"
+ BOND_RX_MAIN_IF="bond_rx_$RAND"
+
+ # Setup TX
+ if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+ then
+ echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2
+ # only clean nsim ifaces and namespace. Nothing else has been
+ # initialized
+ cleanup_bond_nsim
+ trap - EXIT
+ exit "${ksft_skip}"
+ fi
+
+ # create_netdevsim() got the interface up, but it needs to be down
+ # before being enslaved.
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" down
+ ip -n "${TXNS}" \
+ link set "${BOND_TX2_SLAVE_IF}" down
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ ip -n "${TXNS}" \
+ link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ ip -n "${TXNS}" \
+ link set "${BOND_TX_MAIN_IF}" up
+
+ # Setup RX
+ ip -n "${RXNS}" \
+ link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr
+ ip -n "${RXNS}" \
+ link set "${BOND_RX1_SLAVE_IF}" down
+ ip -n "${RXNS}" \
+ link set "${BOND_RX2_SLAVE_IF}" down
+ ip -n "${RXNS}" \
+ link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+ ip -n "${RXNS}" \
+ link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+ ip -n "${RXNS}" \
+ link set "${BOND_RX_MAIN_IF}" up
+
+ export DSTIF="${BOND_RX_MAIN_IF}"
+ export SRCIF="${BOND_TX_MAIN_IF}"
+}
+
+# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface
+# and the other two will be bond to the RX interface (on the other namespace)
+function create_ifaces_bond() {
+ BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}")
+ BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}")
+ BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}")
+ BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}")
+}
+
+# netdevsim link BOND_TX to BOND_RX interfaces
+function link_ifaces_bond() {
+ local BOND_TX1_SLAVE_IFIDX
+ local BOND_TX2_SLAVE_IFIDX
+ local BOND_RX1_SLAVE_IFIDX
+ local BOND_RX2_SLAVE_IFIDX
+ local TXNS_FD
+ local RXNS_FD
+
+ BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+ cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex)
+ BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+ cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex)
+ BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+ cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex)
+ BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+ cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex)
+
+ exec {TXNS_FD}</var/run/netns/"${TXNS}"
+ exec {RXNS_FD}</var/run/netns/"${RXNS}"
+
+ # Linking TX ifaces to the RX ones (on the other namespace)
+ echo "${TXNS_FD}:$BOND_TX1_SLAVE_IFIDX $RXNS_FD:$BOND_RX1_SLAVE_IFIDX" \
+ > "$NSIM_DEV_SYS_LINK"
+ echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \
+ > "$NSIM_DEV_SYS_LINK"
+
+ exec {TXNS_FD}<&-
+ exec {RXNS_FD}<&-
+}
+
+function create_all_ifaces() {
+ # setup_ns function is coming from lib.sh
+ setup_ns TXNS RXNS
+ export NAMESPACE="${RXNS}"
+
+ # Create two interfaces for RX and two for TX
+ create_ifaces_bond
+ # Link netlink ifaces
+ link_ifaces_bond
+}
+
+# configure DSTIF and SRCIF IPs
+function configure_ifaces_ips() {
+ local IP_VERSION=${1:-"ipv4"}
+ select_ipv4_or_ipv6 "${IP_VERSION}"
+
+ ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}"
+ ip -n "${RXNS}" link set "${DSTIF}" up
+
+ ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}"
+ ip -n "${TXNS}" link set "${SRCIF}" up
+}
+
+function test_enable_netpoll_on_enslaved_iface() {
+ echo 0 > "${NETCONS_PATH}"/enabled
+
+ # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and
+ # linked to BOND_RX1_SLAVE_IF inside the namespace.
+ echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+ # This should fail with the following message in dmesg:
+ # netpoll: netconsole: ethX is a slave device, aborting
+ set +e
+ enable_netcons_ns 2> /dev/null
+ set -e
+
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+ then
+ echo "test failed: Bonding and netpoll cannot co-exists." >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+function test_delete_bond_and_reenable_target() {
+ ip -n "${TXNS}" \
+ link delete "${BOND_TX_MAIN_IF}" type bond
+
+ # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore
+ # netpoll can be plugged in there
+ echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+ # this should work, since the interface is not enslaved
+ enable_netcons_ns
+
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+ then
+ echo "test failed: Unable to start netpoll on an unbond iface." >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Send a netconsole message to the netconsole target
+function test_send_netcons_msg_through_bond_iface() {
+ # Listen for netconsole port inside the namespace and
+ # destination interface
+ listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+ # Wait for socat to start and listen to the port.
+ wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}"
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_result "${OUTPUT_FILE}" "${FORMAT}"
+ # kill socat in case it is still running
+ pkill_socat
+}
+
+# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF.
+# Given BOND_TX_MAIN_IF was deleted, recreate it first
+function test_enslave_netcons_enabled_iface {
+ # netconsole got disabled while the interface was down
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+ then
+ echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # recreate the bonding iface. it got deleted by previous
+ # test (test_delete_bond_and_reenable_target)
+ ip -n "${TXNS}" \
+ link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+
+ # sub-interface need to be down before attaching to bonding
+ # This will also disable netconsole.
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" down
+ ip -n "${TXNS}" \
+ link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ ip -n "${TXNS}" \
+ link set "${BOND_TX_MAIN_IF}" up
+
+ # netconsole got disabled while the interface was down
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+ then
+ echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Get netconsole enabled on a bonding interface and attach a second
+# sub-interface.
+function test_enslave_iface_to_bond {
+ # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now
+ echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name
+ enable_netcons_ns
+
+ # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is
+ # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF.
+ ip -n "${TXNS}" \
+ link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+ if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+ then
+ echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+function test_enslave_iff_disabled_netpoll_iface {
+ local ret
+
+ # Create two interfaces. veth interfaces it known to have
+ # IFF_DISABLE_NETPOLL set
+ if ! ip link add "${VETH0}" type veth peer name "${VETH1}"
+ then
+ echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2
+ exit "${ksft_skip}"
+ fi
+ set +e
+ # This will print RTNETLINK answers: Device or resource busy
+ ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null
+ ret=$?
+ set -e
+ if [[ $ret -eq 0 ]]
+ then
+ echo "test failed: veth interface could not be enslaved"
+ exit "${ksft_fail}"
+ fi
+}
+
+# Given that netconsole picks the current net namespace, we need to enable it
+# from inside the TXNS namespace
+function enable_netcons_ns() {
+ ip netns exec "${TXNS}" sh -c \
+ "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled"
+}
+
+####################
+# Tests start here #
+####################
+
+# Create regular interfaces using netdevsim and link them
+create_all_ifaces
+
+# Setup the bonding interfaces
+# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF
+# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF
+setup_bonding_ifaces
+
+# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF
+configure_ifaces_ips "${IP_VERSION}"
+
+_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}"
+enable_netcons_ns
+set_user_data
+
+# Test #1 : Create an bonding interface and attach netpoll into
+# the bonding interface. Netconsole/netpoll should work on
+# the bonding interface.
+test_send_netcons_msg_through_bond_iface
+echo "test #1: netpoll on bonding interface worked. Test passed" >&2
+
+# Test #2: Attach netpoll to an enslaved interface
+# Try to attach netpoll to an enslaved sub-interface (while still being part of
+# a bonding interface), which shouldn't be allowed
+test_enable_netpoll_on_enslaved_iface
+echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2
+
+# Test #3: Unplug the sub-interface from bond and enable netconsole
+# Detach the interface from a bonding interface and attach netpoll again
+test_delete_bond_and_reenable_target
+echo "test #3: Able to attach to an unbound interface. Test passed." >&2
+
+# Test #4: Enslave a sub-interface that had netconsole enabled
+# Try to enslave an interface that has netconsole/netpoll enabled.
+# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it
+test_enslave_netcons_enabled_iface
+echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2
+
+# Test #5: Enslave a sub-interface to a bonding interface
+# Enslave an interface to a bond interface that has netpoll attached
+# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of
+# it. Netconsole is currently disabled
+test_enslave_iface_to_bond
+echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2
+
+# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface
+# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is
+# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface
+# and it should fail, with netpoll: veth0 doesn't support polling
+test_enslave_iff_disabled_netpoll_iface
+echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2
+
+cleanup_bond
+trap - EXIT
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
index a2d8af60876d..77ccf83d87e0 100644
--- a/tools/testing/selftests/drivers/net/config
+++ b/tools/testing/selftests/drivers/net/config
@@ -1,6 +1,10 @@
-CONFIG_IPV6=y
-CONFIG_NETDEVSIM=m
CONFIG_CONFIGFS_FS=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_BTF_MODULES=n
+CONFIG_INET_PSP=y
+CONFIG_IPV6=y
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
+CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile
index cd6817fe5be6..7994bd0e5c44 100644
--- a/tools/testing/selftests/drivers/net/dsa/Makefile
+++ b/tools/testing/selftests/drivers/net/dsa/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = bridge_locked_port.sh \
+TEST_PROGS := \
+ bridge_locked_port.sh \
bridge_mdb.sh \
bridge_mld.sh \
bridge_vlan_aware.sh \
@@ -9,11 +10,13 @@ TEST_PROGS = bridge_locked_port.sh \
local_termination.sh \
no_forwarding.sh \
tc_actions.sh \
- test_bridge_fdb_stress.sh
+ test_bridge_fdb_stress.sh \
+# end of TEST_PROGS
TEST_FILES := \
+ forwarding.config \
run_net_forwarding_test.sh \
- forwarding.config
+# end of TEST_FILES
TEST_INCLUDES := \
../../../net/forwarding/bridge_locked_port.sh \
@@ -27,6 +30,7 @@ TEST_INCLUDES := \
../../../net/forwarding/no_forwarding.sh \
../../../net/forwarding/tc_actions.sh \
../../../net/forwarding/tc_common.sh \
- ../../../net/lib.sh
+ ../../../net/lib.sh \
+# end of TEST_INCLUDES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/drivers/net/gro.c
index b2184847e388..e894037d2e3e 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/drivers/net/gro.c
@@ -57,7 +57,8 @@
#include <string.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
+#include "../../net/lib/ksft.h"
#define DPORT 8000
#define SPORT 1500
@@ -93,6 +94,7 @@ static bool tx_socket = true;
static int tcp_offset = -1;
static int total_hdr_len = -1;
static int ethhdr_proto = -1;
+static bool ipip;
static const int num_flush_id_cases = 6;
static void vlog(const char *fmt, ...)
@@ -114,7 +116,9 @@ static void setup_sock_filter(int fd)
int ipproto_off, opt_ipproto_off;
int next_off;
- if (proto == PF_INET)
+ if (ipip)
+ next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol);
+ else if (proto == PF_INET)
next_off = offsetof(struct iphdr, protocol);
else
next_off = offsetof(struct ipv6hdr, nexthdr);
@@ -244,7 +248,7 @@ static void fill_datalinklayer(void *buf)
eth->h_proto = ethhdr_proto;
}
-static void fill_networklayer(void *buf, int payload_len)
+static void fill_networklayer(void *buf, int payload_len, int protocol)
{
struct ipv6hdr *ip6h = buf;
struct iphdr *iph = buf;
@@ -254,7 +258,7 @@ static void fill_networklayer(void *buf, int payload_len)
ip6h->version = 6;
ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
- ip6h->nexthdr = IPPROTO_TCP;
+ ip6h->nexthdr = protocol;
ip6h->hop_limit = 8;
if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
error(1, errno, "inet_pton source ip6");
@@ -266,7 +270,7 @@ static void fill_networklayer(void *buf, int payload_len)
iph->version = 4;
iph->ihl = 5;
iph->ttl = 8;
- iph->protocol = IPPROTO_TCP;
+ iph->protocol = protocol;
iph->tot_len = htons(sizeof(struct tcphdr) +
payload_len + sizeof(struct iphdr));
iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
@@ -313,9 +317,19 @@ static void create_packet(void *buf, int seq_offset, int ack_offset,
{
memset(buf, 0, total_hdr_len);
memset(buf + total_hdr_len, 'a', payload_len);
+
fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
payload_len, fin);
- fill_networklayer(buf + ETH_HLEN, payload_len);
+
+ if (ipip) {
+ fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr),
+ IPPROTO_IPIP);
+ fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr),
+ payload_len, IPPROTO_TCP);
+ } else {
+ fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP);
+ }
+
fill_datalinklayer(buf);
}
@@ -416,6 +430,13 @@ static void recompute_packet(char *buf, char *no_ext, int extlen)
iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
iph->check = 0;
iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+
+ if (ipip) {
+ iph += 1;
+ iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ }
} else {
ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
}
@@ -670,7 +691,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
iph2->id = htons(9);
break;
- case 3: /* DF=0, Fixed - should not coalesce */
+ case 3: /* DF=0, Fixed - should coalesce */
iph1->frag_off &= ~htons(IP_DF);
iph1->id = htons(8);
@@ -734,11 +755,11 @@ static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1,
static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1);
write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
- add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2);
write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
}
@@ -777,7 +798,7 @@ static void send_fragment4(int fd, struct sockaddr_ll *daddr)
*/
memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
- fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+ fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP);
fill_datalinklayer(buf);
iph->frag_off = htons(0x6000); // DF = 1, MF = 1
@@ -969,6 +990,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
static void gro_sender(void)
{
+ const int fin_delay_us = 100 * 1000;
static char fin_pkt[MAX_HDR_LEN];
struct sockaddr_ll daddr = {};
int txfd = -1;
@@ -1012,15 +1034,22 @@ static void gro_sender(void)
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
} else if (strcmp(testname, "tcp") == 0) {
send_changed_checksum(txfd, &daddr);
+ /* Adding sleep before sending FIN so that it is not
+ * received prior to other packets.
+ */
+ usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
send_changed_seq(txfd, &daddr);
+ usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
send_changed_ts(txfd, &daddr);
+ usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
send_diff_opt(txfd, &daddr);
+ usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
} else if (strcmp(testname, "ip") == 0) {
send_changed_ECN(txfd, &daddr);
@@ -1071,7 +1100,7 @@ static void gro_sender(void)
* and min ipv6hdr size. Like MAX_HDR_SIZE,
* MAX_PAYLOAD is defined with the larger header of the two.
*/
- int offset = proto == PF_INET ? 20 : 0;
+ int offset = (proto == PF_INET && !ipip) ? 20 : 0;
int remainder = (MAX_PAYLOAD + offset) % MSS;
send_large(txfd, &daddr, remainder);
@@ -1099,6 +1128,8 @@ static void gro_receiver(void)
set_timeout(rxfd);
bind_packetsocket(rxfd);
+ ksft_ready();
+
memset(correct_payload, 0, sizeof(correct_payload));
if (strcmp(testname, "data") == 0) {
@@ -1188,10 +1219,9 @@ static void gro_receiver(void)
correct_payload[0] = PAYLOAD_LEN * 2;
check_recv_pkts(rxfd, correct_payload, 1);
- printf("DF=0, Fixed - should not coalesce: ");
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
+ printf("DF=0, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
correct_payload[0] = PAYLOAD_LEN * 2;
@@ -1222,7 +1252,7 @@ static void gro_receiver(void)
check_recv_pkts(rxfd, correct_payload, 2);
}
} else if (strcmp(testname, "large") == 0) {
- int offset = proto == PF_INET ? 20 : 0;
+ int offset = (proto == PF_INET && !ipip) ? 20 : 0;
int remainder = (MAX_PAYLOAD + offset) % MSS;
correct_payload[0] = (MAX_PAYLOAD + offset);
@@ -1251,6 +1281,7 @@ static void parse_args(int argc, char **argv)
{ "iface", required_argument, NULL, 'i' },
{ "ipv4", no_argument, NULL, '4' },
{ "ipv6", no_argument, NULL, '6' },
+ { "ipip", no_argument, NULL, 'e' },
{ "rx", no_argument, NULL, 'r' },
{ "saddr", required_argument, NULL, 's' },
{ "smac", required_argument, NULL, 'S' },
@@ -1260,7 +1291,7 @@ static void parse_args(int argc, char **argv)
};
int c;
- while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) {
+ while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) {
switch (c) {
case '4':
proto = PF_INET;
@@ -1270,6 +1301,11 @@ static void parse_args(int argc, char **argv)
proto = PF_INET6;
ethhdr_proto = htons(ETH_P_IPV6);
break;
+ case 'e':
+ ipip = true;
+ proto = PF_INET;
+ ethhdr_proto = htons(ETH_P_IP);
+ break;
case 'd':
addr4_dst = addr6_dst = optarg;
break;
@@ -1305,7 +1341,10 @@ int main(int argc, char **argv)
{
parse_args(argc, argv);
- if (proto == PF_INET) {
+ if (ipip) {
+ tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2;
+ total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+ } else if (proto == PF_INET) {
tcp_offset = ETH_HLEN + sizeof(struct iphdr);
total_hdr_len = tcp_offset + sizeof(struct tcphdr);
} else if (proto == PF_INET6) {
@@ -1318,11 +1357,13 @@ int main(int argc, char **argv)
read_MAC(src_mac, smac);
read_MAC(dst_mac, dmac);
- if (tx_socket)
+ if (tx_socket) {
gro_sender();
- else
+ } else {
+ /* Only the receiver exit status determines test success. */
gro_receiver();
+ fprintf(stderr, "Gro::%s test passed.\n", testname);
+ }
- fprintf(stderr, "Gro::%s test passed.\n", testname);
return 0;
}
diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py
new file mode 100755
index 000000000000..ba83713bf7b5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/gro.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+GRO (Generic Receive Offload) conformance tests.
+
+Validates that GRO coalescing works correctly by running the gro
+binary in different configurations and checking for correct packet
+coalescing behavior.
+
+Test cases:
+ - data: Data packets with same size/headers and correct seq numbers coalesce
+ - ack: Pure ACK packets do not coalesce
+ - flags: Packets with PSH, SYN, URG, RST flags do not coalesce
+ - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce
+ - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce
+ - large: Packets larger than GRO_MAX_SIZE don't coalesce
+"""
+
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, KsftXfailEx
+from lib.py import cmd, defer, bkg, ip
+from lib.py import ksft_variants
+
+
+def _resolve_dmac(cfg, ipver):
+ """
+ Find the destination MAC address remote host should use to send packets
+ towards the local host. It may be a router / gateway address.
+ """
+
+ attr = "dmac" + ipver
+ # Cache the response across test cases
+ if hasattr(cfg, attr):
+ return getattr(cfg, attr)
+
+ route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}",
+ json=True, host=cfg.remote)[0]
+ gw = route.get("gateway")
+ # Local L2 segment, address directly
+ if not gw:
+ setattr(cfg, attr, cfg.dev['address'])
+ return getattr(cfg, attr)
+
+ # ping to make sure neighbor is resolved,
+ # bind to an interface, for v6 the GW is likely link local
+ cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote)
+
+ neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}",
+ json=True, host=cfg.remote)[0]
+ setattr(cfg, attr, neigh['lladdr'])
+ return getattr(cfg, attr)
+
+
+def _write_defer_restore(cfg, path, val, defer_undo=False):
+ with open(path, "r", encoding="utf-8") as fp:
+ orig_val = fp.read().strip()
+ if str(val) == orig_val:
+ return
+ with open(path, "w", encoding="utf-8") as fp:
+ fp.write(val)
+ if defer_undo:
+ defer(_write_defer_restore, cfg, path, orig_val)
+
+
+def _set_mtu_restore(dev, mtu, host):
+ if dev['mtu'] < mtu:
+ ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host)
+ defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host)
+
+
+def _setup(cfg, test_name):
+ """ Setup hardware loopback mode for GRO testing. """
+
+ if not hasattr(cfg, "bin_remote"):
+ cfg.bin_local = cfg.test_dir / "gro"
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ # "large" test needs at least 4k MTU
+ if test_name == "large":
+ _set_mtu_restore(cfg.dev, 4096, None)
+ _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote)
+
+ flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
+ irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
+
+ _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
+ _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
+
+ try:
+ # Disable TSO for local tests
+ cfg.require_nsim() # will raise KsftXfailEx if not running on nsim
+
+ cmd(f"ethtool -K {cfg.ifname} gro on tso off")
+ cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote)
+ except KsftXfailEx:
+ pass
+
+def _gro_variants():
+ """Generator that yields all combinations of protocol and test types."""
+
+ for protocol in ["ipv4", "ipv6", "ipip"]:
+ for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]:
+ yield protocol, test_name
+
+
+@ksft_variants(_gro_variants())
+def test(cfg, protocol, test_name):
+ """Run a single GRO test with retries."""
+
+ ipver = "6" if protocol[-1] == "6" else "4"
+ cfg.require_ipver(ipver)
+
+ _setup(cfg, test_name)
+
+ base_cmd_args = [
+ f"--{protocol}",
+ f"--dmac {_resolve_dmac(cfg, ipver)}",
+ f"--smac {cfg.remote_dev['address']}",
+ f"--daddr {cfg.addr_v[ipver]}",
+ f"--saddr {cfg.remote_addr_v[ipver]}",
+ f"--test {test_name}",
+ "--verbose"
+ ]
+ base_args = " ".join(base_cmd_args)
+
+ # Each test is run 6 times to deflake, because given the receive timing,
+ # not all packets that should coalesce will be considered in the same flow
+ # on every try.
+ max_retries = 6
+ for attempt in range(max_retries):
+ rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}"
+ tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}"
+
+ fail_now = attempt >= max_retries - 1
+
+ with bkg(rx_cmd, ksft_ready=True, exit_wait=True,
+ fail=fail_now) as rx_proc:
+ cmd(tx_cmd, host=cfg.remote)
+
+ if rx_proc.ret == 0:
+ return
+
+ ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+ ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+ if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"):
+ ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment")
+ return
+
+ ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(cases=[test], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
index 394971b25c0b..c4fe049e9baa 100755
--- a/tools/testing/selftests/drivers/net/hds.py
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -2,17 +2,55 @@
# SPDX-License-Identifier: GPL-2.0
import errno
+import os
+import random
+from typing import Union
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
-from lib.py import EthtoolFamily, NlError
+from lib.py import CmdExitFailure, EthtoolFamily, NlError
from lib.py import NetDrvEnv
+from lib.py import defer, ethtool, ip
-def get_hds(cfg, netnl) -> None:
+
+def _get_hds_mode(cfg, netnl) -> str:
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
except NlError as e:
raise KsftSkipEx('ring-get not supported by device')
if 'tcp-data-split' not in rings:
raise KsftSkipEx('tcp-data-split not supported by device')
+ return rings['tcp-data-split']
+
+
+def _xdp_onoff(cfg):
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ ip("link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, prog))
+ ip("link set dev %s xdp off" % cfg.ifname)
+
+
+def _ioctl_ringparam_modify(cfg, netnl) -> None:
+ """
+ Helper for performing a hopefully unimportant IOCTL SET.
+ IOCTL does not support HDS, so it should not affect the HDS config.
+ """
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+
+ if 'tx' not in rings:
+ raise KsftSkipEx('setting Tx ring size not supported')
+
+ try:
+ ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}")
+ except CmdExitFailure as e:
+ ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}")
+ defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}")
+
+
+def get_hds(cfg, netnl) -> None:
+ _get_hds_mode(cfg, netnl)
+
def get_hds_thresh(cfg, netnl) -> None:
try:
@@ -22,7 +60,39 @@ def get_hds_thresh(cfg, netnl) -> None:
if 'hds-thresh' not in rings:
raise KsftSkipEx('hds-thresh not supported by device')
+
+def _hds_reset(cfg, netnl, rings) -> None:
+ cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+
+ arg = {'header': {'dev-index': cfg.ifindex}}
+ if cur.get('tcp-data-split') != rings.get('tcp-data-split'):
+ # Try to reset to "unknown" first, we don't know if the setting
+ # was the default or user chose it. Default seems more likely.
+ arg['tcp-data-split'] = "unknown"
+ netnl.rings_set(arg)
+ cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if cur['tcp-data-split'] == rings['tcp-data-split']:
+ del arg['tcp-data-split']
+ else:
+ # Try the explicit setting
+ arg['tcp-data-split'] = rings['tcp-data-split']
+ if cur.get('hds-thresh') != rings.get('hds-thresh'):
+ arg['hds-thresh'] = rings['hds-thresh']
+ if len(arg) > 1:
+ netnl.rings_set(arg)
+
+
+def _defer_reset_hds(cfg, netnl) -> Union[dict, None]:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if 'hds-thresh' in rings or 'tcp-data-split' in rings:
+ defer(_hds_reset, cfg, netnl, rings)
+ except NlError as e:
+ pass
+
+
def set_hds_enable(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'})
except NlError as e:
@@ -40,6 +110,7 @@ def set_hds_enable(cfg, netnl) -> None:
ksft_eq('enabled', rings['tcp-data-split'])
def set_hds_disable(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'})
except NlError as e:
@@ -57,6 +128,7 @@ def set_hds_disable(cfg, netnl) -> None:
ksft_eq('disabled', rings['tcp-data-split'])
def set_hds_thresh_zero(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0})
except NlError as e:
@@ -73,7 +145,39 @@ def set_hds_thresh_zero(cfg, netnl) -> None:
ksft_eq(0, rings['hds-thresh'])
+def set_hds_thresh_random(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ if 'hds-thresh-max' not in rings:
+ raise KsftSkipEx('hds-thresh-max not defined by device')
+
+ if rings['hds-thresh-max'] < 2:
+ raise KsftSkipEx('hds-thresh-max is too small')
+ elif rings['hds-thresh-max'] == 2:
+ hds_thresh = 1
+ else:
+ while True:
+ hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1)
+ if hds_thresh != rings['hds-thresh']:
+ break
+
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ ksft_eq(hds_thresh, rings['hds-thresh'])
+
def set_hds_thresh_max(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
except NlError as e:
@@ -91,6 +195,7 @@ def set_hds_thresh_max(cfg, netnl) -> None:
ksft_eq(rings['hds-thresh'], rings['hds-thresh-max'])
def set_hds_thresh_gt(cfg, netnl) -> None:
+ _defer_reset_hds(cfg, netnl)
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
except NlError as e:
@@ -104,15 +209,119 @@ def set_hds_thresh_gt(cfg, netnl) -> None:
netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt})
ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+def set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ _defer_reset_hds(cfg, netnl)
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl)
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
+def set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl) # Trigger skip if not supported
+
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
+def ioctl(cfg, netnl) -> None:
+ mode1 = _get_hds_mode(cfg, netnl)
+ _ioctl_ringparam_modify(cfg, netnl)
+ mode2 = _get_hds_mode(cfg, netnl)
+
+ ksft_eq(mode1, mode2)
+
+
+def ioctl_set_xdp(cfg, netnl) -> None:
+ """
+ Like set_xdp(), but we perturb the settings via the legacy ioctl.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _ioctl_ringparam_modify(cfg, netnl)
+
+ _xdp_onoff(cfg)
+
+
+def ioctl_enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl) # Trigger skip if not supported
+
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
def main() -> None:
with NetDrvEnv(__file__, queue_count=3) as cfg:
ksft_run([get_hds,
get_hds_thresh,
set_hds_disable,
set_hds_enable,
+ set_hds_thresh_random,
set_hds_thresh_zero,
set_hds_thresh_max,
- set_hds_thresh_gt],
+ set_hds_thresh_gt,
+ set_xdp,
+ enabled_set_xdp,
+ ioctl,
+ ioctl_set_xdp,
+ ioctl_enabled_set_xdp],
args=(cfg, EthtoolFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore
index e9fe6ede681a..46540468a775 100644
--- a/tools/testing/selftests/drivers/net/hw/.gitignore
+++ b/tools/testing/selftests/drivers/net/hw/.gitignore
@@ -1 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+iou-zcrx
ncdevmem
+toeplitz
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 21ba64ce1e34..9c163ba6feee 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -1,8 +1,26 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
+# Check if io_uring supports zero-copy receive
+HAS_IOURING_ZCRX := $(shell \
+ echo -e '#include <liburing.h>\n' \
+ 'void *func = (void *)io_uring_register_ifq;\n' \
+ 'int main() {return 0;}' | \
+ $(CC) -luring -x c - -o /dev/null 2>&1 && echo y)
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+COND_GEN_FILES += iou-zcrx
+else
+$(warning excluding iouring tests, liburing not installed or too old)
+endif
+
+TEST_GEN_FILES := \
+ $(COND_GEN_FILES) \
+# end of TEST_GEN_FILES
+
TEST_PROGS = \
csum.py \
devlink_port_split.py \
+ devlink_rate_tc_bw.py \
devmem.py \
ethtool.sh \
ethtool_extended_state.sh \
@@ -10,11 +28,18 @@ TEST_PROGS = \
ethtool_rmon.sh \
hw_stats_l3.sh \
hw_stats_l3_gre.sh \
+ iou-zcrx.py \
+ irq.py \
loopback.sh \
- nic_link_layer.py \
- nic_performance.py \
+ nic_timestamp.py \
pp_alloc_fail.py \
+ rss_api.py \
rss_ctx.py \
+ rss_flow_label.py \
+ rss_input_xfrm.py \
+ toeplitz.py \
+ tso.py \
+ xsk_reconfig.py \
#
TEST_FILES := \
@@ -24,17 +49,31 @@ TEST_FILES := \
TEST_INCLUDES := \
$(wildcard lib/py/*.py ../lib/py/*.py) \
../../../net/lib.sh \
- ../../../net/forwarding/lib.sh \
../../../net/forwarding/ipip_lib.sh \
+ ../../../net/forwarding/lib.sh \
../../../net/forwarding/tc_common.sh \
#
# YNL files, must be before "include ..lib.mk"
-YNL_GEN_FILES := ncdevmem
+YNL_GEN_FILES := \
+ ncdevmem \
+ toeplitz \
+# end of YNL_GEN_FILES
TEST_GEN_FILES += $(YNL_GEN_FILES)
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
include ../../../lib.mk
# YNL build
-YNL_GENS := ethtool netdev
+YNL_GENS := \
+ ethtool \
+ netdev \
+# end of YNL_GENS
+
include ../../../net/ynl.mk
+
+include ../../../net/bpf.mk
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+$(OUTPUT)/iou-zcrx: LDLIBS += -luring
+endif
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
new file mode 100644
index 000000000000..2307aa001be1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -0,0 +1,11 @@
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
+CONFIG_IO_URING=y
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_UDMABUF=y
+CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
index cb40497faee4..3e3a89a34afe 100755
--- a/tools/testing/selftests/drivers/net/hw/csum.py
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -9,41 +9,35 @@ from lib.py import ksft_run, ksft_exit, KsftSkipEx
from lib.py import EthtoolFamily, NetDrvEpEnv
from lib.py import bkg, cmd, wait_port_listen
-def test_receive(cfg, ipv4=False, extra_args=None):
+def test_receive(cfg, ipver="6", extra_args=None):
"""Test local nic checksum receive. Remote host sends crafted packets."""
if not cfg.have_rx_csum:
raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}")
- if ipv4:
- ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}"
- else:
- ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}"
+ ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}"
rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
- tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
+ tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
with bkg(rx_cmd, exit_wait=True):
wait_port_listen(34000, proto="udp")
cmd(tx_cmd, host=cfg.remote)
-def test_transmit(cfg, ipv4=False, extra_args=None):
+def test_transmit(cfg, ipver="6", extra_args=None):
"""Test local nic checksum transmit. Remote host verifies packets."""
if (not cfg.have_tx_csum_generic and
- not (cfg.have_tx_csum_ipv4 and ipv4) and
- not (cfg.have_tx_csum_ipv6 and not ipv4)):
+ not (cfg.have_tx_csum_ipv4 and ipver == "4") and
+ not (cfg.have_tx_csum_ipv6 and ipver == "6")):
raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}")
- if ipv4:
- ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}"
- else:
- ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}"
+ ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}"
# Cannot randomize input when calculating zero checksum
if extra_args != "-U -Z":
extra_args += " -r 1"
- rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
+ rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}"
with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
@@ -51,26 +45,20 @@ def test_transmit(cfg, ipv4=False, extra_args=None):
cmd(tx_cmd)
-def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""):
+def test_builder(name, cfg, ipver="6", tx=False, extra_args=""):
"""Construct specific tests from the common template.
Most tests follow the same basic pattern, differing only in
Direction of the test and optional flags passed to csum."""
def f(cfg):
- if ipv4:
- cfg.require_v4()
- else:
- cfg.require_v6()
+ cfg.require_ipver(ipver)
if tx:
- test_transmit(cfg, ipv4, extra_args)
+ test_transmit(cfg, ipver, extra_args)
else:
- test_receive(cfg, ipv4, extra_args)
+ test_receive(cfg, ipver, extra_args)
- if ipv4:
- f.__name__ = "ipv4_" + name
- else:
- f.__name__ = "ipv6_" + name
+ f.__name__ = f"ipv{ipver}_" + name
return f
@@ -100,19 +88,19 @@ def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
check_nic_features(cfg)
- cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum")
+ cfg.bin_local = cfg.net_lib_dir / "csum"
cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
cases = []
- for ipv4 in [True, False]:
- cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t"))
- cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E"))
+ for ipver in ["4", "6"]:
+ cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t"))
+ cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E"))
- cases.append(test_builder("rx_udp", cfg, ipv4, False, ""))
- cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E"))
+ cases.append(test_builder("rx_udp", cfg, ipver, False, ""))
+ cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E"))
- cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U"))
- cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z"))
+ cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U"))
+ cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z"))
ksft_run(cases=cases, args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
new file mode 100755
index 000000000000..4e4faa9275bb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Devlink Rate TC Bandwidth Test Suite
+===================================
+
+This test suite verifies the functionality of devlink-rate traffic class (TC)
+bandwidth distribution in a virtualized environment. The tests validate that
+bandwidth can be properly allocated between different traffic classes and
+that TC mapping works as expected.
+
+Test Environment:
+----------------
+- Creates 1 VF
+- Establishes a bridge connecting the VF representor and the uplink representor
+- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102)
+- Configures different traffic classes (TC3 and TC4) for each VLAN
+
+Test Cases:
+----------
+1. test_no_tc_mapping_bandwidth:
+ - Verifies that without TC mapping, bandwidth is NOT distributed according to
+ the configured 20/80 split between TC3 and TC4
+ - This test should fail if bandwidth matches the 20/80 split without TC
+ mapping
+ - Expected: Bandwidth should NOT be distributed as 20/80
+
+2. test_tc_mapping_bandwidth:
+ - Configures TC mapping using mqprio qdisc
+ - Verifies that with TC mapping, bandwidth IS distributed according to the
+ configured 20/80 split between TC3 and TC4
+ - Expected: Bandwidth should be distributed as 20/80
+
+Bandwidth Distribution:
+----------------------
+- TC3 (VLAN 101): Configured for 20% of total bandwidth
+- TC4 (VLAN 102): Configured for 80% of total bandwidth
+- Total bandwidth: 1Gbps
+- Tolerance: +-12%
+
+Hardware-Specific Behavior (mlx5):
+--------------------------
+mlx5 hardware enforces traffic class separation by ensuring that each transmit
+queue (SQ) is associated with a single TC. If a packet is sent on a queue that
+doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set
+mapping), the hardware moves the queue to the correct TC scheduler to preserve
+traffic isolation.
+
+This behavior means that even without explicit TC-to-queue mapping, bandwidth
+enforcement may still appear to work—because the hardware dynamically adjusts
+the scheduling context. However, this can lead to performance issues in high
+rates and HOL blocking if traffic from different TCs is mixed on the same queue.
+"""
+
+import json
+import os
+import subprocess
+import threading
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit
+from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+from lib.py import NetDrvEpEnv, DevlinkFamily
+from lib.py import NlError
+from lib.py import cmd, defer, ethtool, ip
+from lib.py import Iperf3Runner
+
+
+class BandwidthValidator:
+ """
+ Validates total bandwidth and individual shares with tolerance
+ relative to the overall total.
+ """
+
+ def __init__(self, shares):
+ self.tolerance_percent = 12
+ self.expected_total = sum(shares.values())
+ self.bounds = {}
+
+ for name, exp in shares.items():
+ self.bounds[name] = (self.min_expected(exp), self.max_expected(exp))
+
+ def min_expected(self, value):
+ """Calculates the minimum acceptable value based on tolerance."""
+ return value - (self.expected_total * self.tolerance_percent / 100)
+
+ def max_expected(self, value):
+ """Calculates the maximum acceptable value based on tolerance."""
+ return value + (self.expected_total * self.tolerance_percent / 100)
+
+ def bound(self, values):
+ """
+ Return True if all given values fall within tolerance.
+ """
+ for name, value in values.items():
+ low, high = self.bounds[name]
+ if not low <= value <= high:
+ return False
+ return True
+
+
+def setup_vf(cfg, set_tc_mapping=True):
+ """
+ Sets up a VF on the given network interface.
+
+ Enables SR-IOV and switchdev mode, brings the VF interface up,
+ and optionally configures TC mapping using mqprio.
+ """
+ try:
+ cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev")
+ defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy")
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
+ try:
+ cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+ defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
+
+ time.sleep(2)
+ vf_ifc = (os.listdir(
+ f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0]
+ if vf_ifc:
+ ip(f"link set dev {vf_ifc} up")
+ else:
+ raise KsftSkipEx("VF interface not found")
+ if set_tc_mapping:
+ cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8")
+
+ return vf_ifc
+
+
+def setup_vlans_on_vf(vf_ifc):
+ """
+ Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
+ """
+ vlan_configs = [
+ {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"},
+ {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"},
+ ]
+
+ for config in vlan_configs:
+ vlan_dev = f"{vf_ifc}.{config['vlan_id']}"
+ ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}")
+ ip(f"addr add {config['ip']}/29 dev {vlan_dev}")
+ ip(f"link set dev {vlan_dev} up")
+ ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}")
+ ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}")
+
+
+def get_vf_info(cfg):
+ """
+ Finds the VF representor interface and devlink port index
+ for the given PCI device used in the test environment.
+ """
+ cfg.vf_representor = None
+ cfg.vf_port_index = None
+ out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8")
+ ports = json.loads(out)["port"]
+
+ for port_name, props in ports.items():
+ netdev = props.get("netdev")
+
+ if (port_name.startswith(f"pci/{cfg.pci}/") and
+ props.get("vfnum") == 0):
+ cfg.vf_representor = netdev
+ cfg.vf_port_index = int(port_name.split("/")[-1])
+ break
+
+
+def setup_bridge(cfg):
+ """
+ Creates and configures a Linux bridge, with both the uplink
+ and VF representor interfaces attached to it.
+ """
+ bridge_name = f"br_{os.getpid()}"
+ ip(f"link add name {bridge_name} type bridge")
+ defer(cmd, f"ip link del name {bridge_name} type bridge")
+
+ ip(f"link set dev {cfg.ifname} master {bridge_name}")
+
+ rep_name = cfg.vf_representor
+ if rep_name:
+ ip(f"link set dev {rep_name} master {bridge_name}")
+ ip(f"link set dev {rep_name} up")
+ ksft_pr(f"Set representor {rep_name} up and added to bridge")
+ else:
+ raise KsftSkipEx("Could not find representor for the VF")
+
+ ip(f"link set dev {bridge_name} up")
+
+
+def setup_devlink_rate(cfg):
+ """
+ Configures devlink rate tx_max and traffic class bandwidth for the VF.
+ """
+ port_index = cfg.vf_port_index
+ if port_index is None:
+ raise KsftSkipEx("Could not find VF port index")
+ try:
+ cfg.devnl.rate_set({
+ "bus-name": "pci",
+ "dev-name": cfg.pci,
+ "port-index": port_index,
+ "rate-tx-max": 125000000,
+ "rate-tc-bws": [
+ {"index": 0, "bw": 0},
+ {"index": 1, "bw": 0},
+ {"index": 2, "bw": 0},
+ {"index": 3, "bw": 20},
+ {"index": 4, "bw": 80},
+ {"index": 5, "bw": 0},
+ {"index": 6, "bw": 0},
+ {"index": 7, "bw": 0},
+ ]
+ })
+ except NlError as exc:
+ if exc.error == 95: # EOPNOTSUPP
+ raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc
+ raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
+
+
+def setup_remote_vlans(cfg):
+ """
+ Sets up VLAN interfaces on the remote side.
+ """
+ remote_dev = cfg.remote_ifname
+ vlan_ids = [101, 102]
+ remote_ips = ["198.51.100.2", "198.51.100.10"]
+
+ for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
+ vlan_dev = f"{remote_dev}.{vlan_id}"
+ cmd(f"ip link add link {remote_dev} name {vlan_dev} "
+ f"type vlan id {vlan_id}", host=cfg.remote)
+ cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
+ cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
+ defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
+
+
+def setup_test_environment(cfg, set_tc_mapping=True):
+ """
+ Sets up the complete test environment including VF creation, VLANs,
+ bridge configuration and devlink rate setup.
+ """
+ vf_ifc = setup_vf(cfg, set_tc_mapping)
+ ksft_pr(f"Created VF interface: {vf_ifc}")
+
+ setup_vlans_on_vf(vf_ifc)
+
+ get_vf_info(cfg)
+ setup_bridge(cfg)
+
+ setup_devlink_rate(cfg)
+ setup_remote_vlans(cfg)
+
+
+def measure_bandwidth(cfg, server_ip, client_ip, barrier):
+ """
+ Synchronizes with peers and runs an iperf3-based bandwidth measurement
+ between the given endpoints. Returns average Gbps.
+ """
+ runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip)
+ try:
+ barrier.wait(timeout=10)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 barrier wait timed") from exc
+
+ try:
+ bw_gbps = runner.measure_bandwidth(reverse=True)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 bandwidth measurement failed") from exc
+
+ return bw_gbps
+
+
+def run_bandwidth_test(cfg):
+ """
+ Runs parallel bandwidth measurements for each VLAN/TC pair and collects results.
+ """
+ def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix):
+ results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier)
+
+ vf_vlan_data = [
+ # (local_ip, remote_ip, TC)
+ ("198.51.100.1", "198.51.100.2", 3),
+ ("198.51.100.9", "198.51.100.10", 4),
+ ]
+
+ results = {}
+ threads = []
+ start_barrier = threading.Barrier(len(vf_vlan_data))
+
+ for local_ip, remote_ip, tc_ix in vf_vlan_data:
+ thread = threading.Thread(
+ target=_run_measure_bandwidth_thread,
+ args=(local_ip, remote_ip, results, start_barrier, tc_ix)
+ )
+ thread.start()
+ threads.append(thread)
+
+ for thread in threads:
+ thread.join()
+
+ for tc_ix, tc_bw in results.items():
+ if tc_bw is None:
+ raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth")
+
+ return results
+
+
+def calculate_bandwidth_percentages(results):
+ """
+ Calculates the percentage of total bandwidth received by TC3 and TC4.
+ """
+ if 3 not in results or 4 not in results:
+ raise KsftFailEx(f"Missing expected TC results in {results}")
+
+ tc3_bw = results[3]
+ tc4_bw = results[4]
+ total_bw = tc3_bw + tc4_bw
+ tc3_percentage = (tc3_bw / total_bw) * 100
+ tc4_percentage = (tc4_bw / total_bw) * 100
+
+ return {
+ 'tc3_bw': tc3_bw,
+ 'tc4_bw': tc4_bw,
+ 'tc3_percentage': tc3_percentage,
+ 'tc4_percentage': tc4_percentage,
+ 'total_bw': total_bw
+ }
+
+
+def print_bandwidth_results(bw_data, test_name):
+ """
+ Prints bandwidth measurements and TC usage summary for a given test.
+ """
+ ksft_pr(f"Bandwidth check results {test_name}:")
+ ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec")
+ ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%")
+ ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%")
+
+
+def verify_total_bandwidth(bw_data, validator):
+ """
+ Ensures the total measured bandwidth falls within the acceptable tolerance.
+ """
+ total = bw_data['total_bw']
+
+ if validator.bound({"total": total}):
+ return
+
+ low, high = validator.bounds["total"]
+
+ if total < low:
+ raise KsftSkipEx(
+ f"Total bandwidth {total:.2f} Gbps < minimum "
+ f"{low:.2f} Gbps; "
+ f"parent tx_max ({validator.expected_total:.1f} G) "
+ f"not reached, cannot validate share"
+ )
+
+ raise KsftFailEx(
+ f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
+ f"{high:.2f} Gbps "
+ f"(VF tx_max set to {validator.expected_total:.1f} G)"
+ )
+
+
+def run_bandwidth_distribution_test(cfg, set_tc_mapping):
+ """
+ Runs parallel bandwidth measurements for both TCs and collects results.
+ """
+ setup_test_environment(cfg, set_tc_mapping)
+ bandwidths = run_bandwidth_test(cfg)
+ bw_data = calculate_bandwidth_percentages(bandwidths)
+ test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
+ print_bandwidth_results(bw_data, test_name)
+
+ verify_total_bandwidth(bw_data, cfg.traffic_bw_validator)
+
+ return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'],
+ "tc4": bw_data['tc4_percentage']})
+
+
+def test_no_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is not split 20/80 without traffic class mapping.
+ """
+ pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping"
+ fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping"
+ is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
+
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
+ if is_mlx5:
+ raise KsftXfailEx(fail_bw_msg)
+ raise KsftFailEx(fail_bw_msg)
+ if is_mlx5:
+ raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg)
+ ksft_pr(pass_bw_msg)
+
+
+def test_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is correctly split 20/80 between TC3 and TC4
+ when traffic class mapping is set.
+ """
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
+ ksft_pr("Bandwidth is distributed as 20/80 with TC mapping")
+ else:
+ raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping")
+
+
+def main() -> None:
+ """
+ Main entry point for running the test cases.
+ """
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.devnl = DevlinkFamily()
+
+ cfg.pci = os.path.basename(
+ os.path.realpath(f"/sys/class/net/{cfg.ifname}/device")
+ )
+ if not cfg.pci:
+ raise KsftSkipEx("Could not get PCI address of the interface")
+
+ cfg.traffic_bw_validator = BandwidthValidator({"total": 1})
+ cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80})
+
+ cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
+
+ ksft_run(cases=cases, args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 1223f0f5c10c..45c2d49d55b6 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+from os import path
from lib.py import ksft_run, ksft_exit
from lib.py import ksft_eq, KsftSkipEx
from lib.py import NetDrvEpEnv
@@ -10,8 +11,7 @@ from lib.py import ksft_disruptive
def require_devmem(cfg):
if not hasattr(cfg, "_devmem_probed"):
- port = rand_port()
- probe_command = f"./ncdevmem -f {cfg.ifname}"
+ probe_command = f"{cfg.bin_local} -f {cfg.ifname}"
cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0
cfg._devmem_probed = True
@@ -21,22 +21,54 @@ def require_devmem(cfg):
@ksft_disruptive
def check_rx(cfg) -> None:
- cfg.require_v6()
require_devmem(cfg)
port = rand_port()
- listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}"
+ socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}"
+ listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7"
- with bkg(listen_cmd) as socat:
+ with bkg(listen_cmd, exit_wait=True) as ncdevmem:
wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True)
+ cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \
+ head -c 1K | {socat}", host=cfg.remote, shell=True)
+
+ ksft_eq(ncdevmem.ret, 0)
+
+
+@ksft_disruptive
+def check_tx(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+ wait_port_listen(port, host=cfg.remote)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True)
+
+ ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+@ksft_disruptive
+def check_tx_chunks(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+ wait_port_listen(port, host=cfg.remote)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
- ksft_run([check_rx],
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ ksft_run([check_rx, check_tx, check_tx_chunks],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
new file mode 100644
index 000000000000..62456df947bc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#include <liburing.h>
+
+static long page_size;
+#define AREA_SIZE (8192 * page_size)
+#define SEND_SIZE (512 * 4096)
+#define min(a, b) \
+ ({ \
+ typeof(a) _a = (a); \
+ typeof(b) _b = (b); \
+ _a < _b ? _a : _b; \
+ })
+#define min_t(t, a, b) \
+ ({ \
+ t _ta = (a); \
+ t _tb = (b); \
+ min(_ta, _tb); \
+ })
+
+#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1))
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static int cfg_payload_len;
+static const char *cfg_ifname;
+static int cfg_queue_id = -1;
+static bool cfg_oneshot;
+static int cfg_oneshot_recvs;
+static int cfg_send_size = SEND_SIZE;
+static struct sockaddr_in6 cfg_addr;
+
+static char *payload;
+static void *area_ptr;
+static void *ring_ptr;
+static size_t ring_size;
+static struct io_uring_zcrx_rq rq_ring;
+static unsigned long area_token;
+static int connfd;
+static bool stop;
+static size_t received;
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static inline size_t get_refill_ring_size(unsigned int rq_entries)
+{
+ size_t size;
+
+ ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe);
+ /* add space for the header (head/tail/etc.) */
+ ring_size += page_size;
+ return ALIGN_UP(ring_size, page_size);
+}
+
+static void setup_zcrx(struct io_uring *ring)
+{
+ unsigned int ifindex;
+ unsigned int rq_entries = 4096;
+ int ret;
+
+ ifindex = if_nametoindex(cfg_ifname);
+ if (!ifindex)
+ error(1, 0, "bad interface name: %s", cfg_ifname);
+
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+ if (area_ptr == MAP_FAILED)
+ error(1, 0, "mmap(): zero copy area");
+
+ ring_size = get_refill_ring_size(rq_entries);
+ ring_ptr = mmap(NULL,
+ ring_size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+
+ struct io_uring_region_desc region_reg = {
+ .size = ring_size,
+ .user_addr = (__u64)(unsigned long)ring_ptr,
+ .flags = IORING_MEM_REGION_TYPE_USER,
+ };
+
+ struct io_uring_zcrx_area_reg area_reg = {
+ .addr = (__u64)(unsigned long)area_ptr,
+ .len = AREA_SIZE,
+ .flags = 0,
+ };
+
+ struct io_uring_zcrx_ifq_reg reg = {
+ .if_idx = ifindex,
+ .if_rxq = cfg_queue_id,
+ .rq_entries = rq_entries,
+ .area_ptr = (__u64)(unsigned long)&area_reg,
+ .region_ptr = (__u64)(unsigned long)&region_reg,
+ };
+
+ ret = io_uring_register_ifq(ring, &reg);
+ if (ret)
+ error(1, 0, "io_uring_register_ifq(): %d", ret);
+
+ rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head);
+ rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail);
+ rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes);
+ rq_ring.rq_tail = 0;
+ rq_ring.ring_entries = reg.rq_entries;
+
+ area_token = area_reg.rq_area_token;
+}
+
+static void add_accept(struct io_uring *ring, int sockfd)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0);
+ sqe->user_data = 1;
+}
+
+static void add_recvzc(struct io_uring *ring, int sockfd)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+ sqe->user_data = 2;
+}
+
+static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+ sqe->user_data = 2;
+}
+
+static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+ if (cqe->res < 0)
+ error(1, 0, "accept()");
+ if (connfd)
+ error(1, 0, "Unexpected second connection");
+
+ connfd = cqe->res;
+ if (cfg_oneshot)
+ add_recvzc_oneshot(ring, connfd, page_size);
+ else
+ add_recvzc(ring, connfd);
+}
+
+static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+ unsigned rq_mask = rq_ring.ring_entries - 1;
+ struct io_uring_zcrx_cqe *rcqe;
+ struct io_uring_zcrx_rqe *rqe;
+ struct io_uring_sqe *sqe;
+ uint64_t mask;
+ char *data;
+ ssize_t n;
+ int i;
+
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) {
+ stop = true;
+ return;
+ }
+
+ if (cqe->res < 0)
+ error(1, 0, "recvzc(): %d", cqe->res);
+
+ if (cfg_oneshot) {
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
+ add_recvzc_oneshot(ring, connfd, page_size);
+ cfg_oneshot_recvs--;
+ }
+ } else if (!(cqe->flags & IORING_CQE_F_MORE)) {
+ add_recvzc(ring, connfd);
+ }
+
+ rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1);
+
+ n = cqe->res;
+ mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1;
+ data = (char *)area_ptr + (rcqe->off & mask);
+
+ for (i = 0; i < n; i++) {
+ if (*(data + i) != payload[(received + i)])
+ error(1, 0, "payload mismatch at %d", i);
+ }
+ received += n;
+
+ rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)];
+ rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token;
+ rqe->len = cqe->res;
+ io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail);
+}
+
+static void server_loop(struct io_uring *ring)
+{
+ struct io_uring_cqe *cqe;
+ unsigned int count = 0;
+ unsigned int head;
+ int i, ret;
+
+ io_uring_submit_and_wait(ring, 1);
+
+ io_uring_for_each_cqe(ring, head, cqe) {
+ if (cqe->user_data == 1)
+ process_accept(ring, cqe);
+ else if (cqe->user_data == 2)
+ process_recvzc(ring, cqe);
+ else
+ error(1, 0, "unknown cqe");
+ count++;
+ }
+ io_uring_cq_advance(ring, count);
+}
+
+static void run_server(void)
+{
+ unsigned int flags = 0;
+ struct io_uring ring;
+ int fd, enable, ret;
+ uint64_t tstop;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, 0, "socket()");
+
+ enable = 1;
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
+ if (ret < 0)
+ error(1, 0, "setsockopt(SO_REUSEADDR)");
+
+ ret = bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ if (ret < 0)
+ error(1, 0, "bind()");
+
+ if (listen(fd, 1024) < 0)
+ error(1, 0, "listen()");
+
+ flags |= IORING_SETUP_COOP_TASKRUN;
+ flags |= IORING_SETUP_SINGLE_ISSUER;
+ flags |= IORING_SETUP_DEFER_TASKRUN;
+ flags |= IORING_SETUP_SUBMIT_ALL;
+ flags |= IORING_SETUP_CQE32;
+
+ io_uring_queue_init(512, &ring, flags);
+
+ setup_zcrx(&ring);
+
+ add_accept(&ring, fd);
+
+ tstop = gettimeofday_ms() + 5000;
+ while (!stop && gettimeofday_ms() < tstop)
+ server_loop(&ring);
+
+ if (!stop)
+ error(1, 0, "test failed\n");
+}
+
+static void run_client(void)
+{
+ ssize_t to_send = cfg_send_size;
+ ssize_t sent = 0;
+ ssize_t chunk, res;
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, 0, "socket()");
+
+ if (connect(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)))
+ error(1, 0, "connect()");
+
+ while (to_send) {
+ void *src = &payload[sent];
+
+ chunk = min_t(ssize_t, cfg_payload_len, to_send);
+ res = send(fd, src, chunk, 0);
+ if (res < 0)
+ error(1, 0, "send(): %zd", sent);
+ sent += res;
+ to_send -= res;
+ }
+
+ close(fd);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h<server_ip> -p<port> "
+ "-l<payload_size> -i<ifname> -q<rxq_id>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const int max_payload_len = SEND_SIZE -
+ sizeof(struct ipv6hdr) -
+ sizeof(struct tcphdr) -
+ 40 /* max tcp options */;
+ struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+ char *addr = NULL;
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+ cfg_payload_len = max_payload_len;
+
+ while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) {
+ switch (c) {
+ case 's':
+ if (cfg_client)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_server = 1;
+ break;
+ case 'c':
+ if (cfg_server)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_client = 1;
+ break;
+ case 'h':
+ addr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'q':
+ cfg_queue_id = strtoul(optarg, NULL, 0);
+ break;
+ case 'o': {
+ cfg_oneshot = true;
+ cfg_oneshot_recvs = strtoul(optarg, NULL, 0);
+ break;
+ }
+ case 'z':
+ cfg_send_size = strtoul(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ if (cfg_server && addr)
+ error(1, 0, "Receiver cannot have -h specified");
+
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ addr6->sin6_addr = in6addr_any;
+ if (addr) {
+ ret = parse_address(addr, cfg_port, addr6);
+ if (ret)
+ error(1, 0, "receiver address parse error: %s", addr);
+ }
+
+ if (cfg_payload_len > max_payload_len)
+ error(1, 0, "-l: payload exceeds max (%d)", max_payload_len);
+}
+
+int main(int argc, char **argv)
+{
+ const char *cfg_test = argv[argc - 1];
+ int i;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 0)
+ return 1;
+
+ if (posix_memalign((void **)&payload, page_size, SEND_SIZE))
+ return 1;
+
+ parse_opts(argc, argv);
+
+ for (i = 0; i < SEND_SIZE; i++)
+ payload[i] = 'a' + (i % 26);
+
+ if (cfg_server)
+ run_server();
+ else if (cfg_client)
+ run_client();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
new file mode 100755
index 000000000000..712c806508b5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import re
+from os import path
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
+
+
+def _get_current_settings(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True)[0]
+ return (output['rx'], output['hds-thresh'])
+
+
+def _get_combined_channels(cfg):
+ output = ethtool(f"-l {cfg.ifname}").stdout
+ values = re.findall(r'Combined:\s+(\d+)', output)
+ return int(values[1])
+
+
+def _create_rss_ctx(cfg, chan):
+ output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout
+ values = re.search(r'New RSS context is (\d+)', output).group(1)
+ ctx_id = int(values)
+ return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}"))
+
+
+def _set_flow_rule(cfg, port, chan):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def _set_flow_rule_rss(cfg, port, ctx_id):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def test_zcrx(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_oneshot(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_rss(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
+ flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py
new file mode 100755
index 000000000000..0699d6a8b4e2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/irq.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_ge, ksft_eq
+from lib.py import KsftSkipEx
+from lib.py import ksft_disruptive
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import NetDrvEnv
+from lib.py import cmd, ip, defer
+
+
+def read_affinity(irq) -> str:
+ with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp:
+ return fp.read().lstrip("0,").strip()
+
+
+def write_affinity(irq, what) -> str:
+ if what != read_affinity(irq):
+ with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp:
+ fp.write(what)
+
+
+def check_irqs_reported(cfg) -> None:
+ """ Check that device reports IRQs for NAPI instances """
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+ irqs = sum(['irq' in x for x in napis])
+
+ ksft_ge(irqs, 1)
+ ksft_eq(irqs, len(napis))
+
+
+def _check_reconfig(cfg, reconfig_cb) -> None:
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+ for n in reversed(napis):
+ if 'irq' in n:
+ break
+ else:
+ raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}")
+
+ old = read_affinity(n['irq'])
+ # pick an affinity that's not the current one
+ new = "3" if old != "3" else "5"
+ write_affinity(n['irq'], new)
+ defer(write_affinity, n['irq'], old)
+
+ reconfig_cb(cfg)
+
+ ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig")
+
+
+def check_reconfig_queues(cfg) -> None:
+ def reconfig(cfg) -> None:
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ rx_type = 'rx'
+ else:
+ rx_type = 'combined'
+ cur_queue_cnt = channels[f'{rx_type}-count']
+ max_queue_cnt = channels[f'{rx_type}-max']
+
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} 1")
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+ _check_reconfig(cfg, reconfig)
+
+
+def check_reconfig_xdp(cfg) -> None:
+ def reconfig(cfg) -> None:
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+ _check_reconfig(cfg, reconfig)
+
+
+@ksft_disruptive
+def check_down(cfg) -> None:
+ def reconfig(cfg) -> None:
+ ip("link set dev %s down" % cfg.ifname)
+ ip("link set dev %s up" % cfg.ifname)
+
+ _check_reconfig(cfg, reconfig)
+
+
+def main() -> None:
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ ksft_run([check_irqs_reported, check_reconfig_queues,
+ check_reconfig_xdp, check_down],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index 399789a9676a..766bfc4ad842 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -1,5 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
+"""
+Driver test environment (hardware-only tests).
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
import sys
from pathlib import Path
@@ -7,11 +15,38 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
- from net.lib.py import *
- from drivers.net.lib.py import *
- from .linkconfig import LinkConfig
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily, PSPFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+ fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup, ksft_variants, KsftNamedVariant
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
+ from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner
+ from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
+
+ __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+ "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+ "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+ "CmdExitFailure",
+ "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+ "fd_read_timeout", "ip", "rand_port",
+ "wait_port_listen", "wait_file",
+ "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+ "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+ "ksft_setup", "ksft_variants", "KsftNamedVariant",
+ "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+ "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+ "ksft_not_none", "ksft_not_none",
+ "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+ "Iperf3Runner"]
except ModuleNotFoundError as e:
- ksft_pr("Failed importing `net` library from kernel sources")
- ksft_pr(str(e))
- ktap_result(True, comment="SKIP")
+ print("Failed importing `net` library from kernel sources")
+ print(str(e))
sys.exit(4)
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py
deleted file mode 100644
index 79fde603cbbc..000000000000
--- a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-from lib.py import cmd, ethtool, ip
-from lib.py import ksft_pr, ksft_eq, KsftSkipEx
-from typing import Optional
-import re
-import time
-import json
-
-#The LinkConfig class is implemented to handle the link layer configurations.
-#Required minimum ethtool version is 6.10
-
-class LinkConfig:
- """Class for handling the link layer configurations"""
- def __init__(self, cfg: object) -> None:
- self.cfg = cfg
- self.partner_netif = self.get_partner_netif_name()
-
- """Get the initial link configuration of local interface"""
- self.common_link_modes = self.get_common_link_modes()
-
- def get_partner_netif_name(self) -> Optional[str]:
- partner_netif = None
- try:
- if not self.verify_link_up():
- return None
- """Get partner interface name"""
- partner_json_output = ip("addr show", json=True, host=self.cfg.remote)
- for interface in partner_json_output:
- for addr in interface.get('addr_info', []):
- if addr.get('local') == self.cfg.remote_addr:
- partner_netif = interface['ifname']
- ksft_pr(f"Partner Interface name: {partner_netif}")
- if partner_netif is None:
- ksft_pr("Unable to get the partner interface name")
- except Exception as e:
- print(f"Unexpected error occurred while getting partner interface name: {e}")
- self.partner_netif = partner_netif
- return partner_netif
-
- def verify_link_up(self) -> bool:
- """Verify whether the local interface link is up"""
- with open(f"/sys/class/net/{self.cfg.ifname}/operstate", "r") as fp:
- link_state = fp.read().strip()
-
- if link_state == "down":
- ksft_pr(f"Link state of interface {self.cfg.ifname} is DOWN")
- return False
- else:
- return True
-
- def reset_interface(self, local: bool = True, remote: bool = True) -> bool:
- ksft_pr("Resetting interfaces in local and remote")
- if remote:
- if self.verify_link_up():
- if self.partner_netif is not None:
- ifname = self.partner_netif
- link_up_cmd = f"ip link set up {ifname}"
- link_down_cmd = f"ip link set down {ifname}"
- reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}"
- try:
- cmd(reset_cmd, host=self.cfg.remote)
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while resetting remote: {e}")
- else:
- ksft_pr("Partner interface not available")
- if local:
- ifname = self.cfg.ifname
- link_up_cmd = f"ip link set up {ifname}"
- link_down_cmd = f"ip link set down {ifname}"
- reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}"
- try:
- cmd(reset_cmd)
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while resetting local: {e}")
- time.sleep(10)
- if self.verify_link_up() and self.get_ethtool_field("link-detected"):
- ksft_pr("Local and remote interfaces reset to original state")
- return True
- else:
- ksft_pr("Error occurred after resetting interfaces. Link is DOWN.")
- return False
-
- def set_speed_and_duplex(self, speed: str, duplex: str, autoneg: bool = True) -> bool:
- """Set the speed and duplex state for the interface"""
- autoneg_state = "on" if autoneg is True else "off"
- process = None
- try:
- process = ethtool(f"--change {self.cfg.ifname} speed {speed} duplex {duplex} autoneg {autoneg_state}")
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while setting speed/duplex: {e}")
- if process is None or process.ret != 0:
- return False
- else:
- ksft_pr(f"Speed: {speed} Mbps, Duplex: {duplex} set for Interface: {self.cfg.ifname}")
- return True
-
- def verify_speed_and_duplex(self, expected_speed: str, expected_duplex: str) -> bool:
- if not self.verify_link_up():
- return False
- """Verifying the speed and duplex state for the interface"""
- with open(f"/sys/class/net/{self.cfg.ifname}/speed", "r") as fp:
- actual_speed = fp.read().strip()
- with open(f"/sys/class/net/{self.cfg.ifname}/duplex", "r") as fp:
- actual_duplex = fp.read().strip()
-
- ksft_eq(actual_speed, expected_speed)
- ksft_eq(actual_duplex, expected_duplex)
- return True
-
- def set_autonegotiation_state(self, state: str, remote: bool = False) -> bool:
- common_link_modes = self.common_link_modes
- speeds, duplex_modes = self.get_speed_duplex_values(self.common_link_modes)
- speed = speeds[0]
- duplex = duplex_modes[0]
- if not speed or not duplex:
- ksft_pr("No speed or duplex modes found")
- return False
-
- speed_duplex_cmd = f"speed {speed} duplex {duplex}" if state == "off" else ""
- if remote:
- if not self.verify_link_up():
- return False
- """Set the autonegotiation state for the partner"""
- command = f"-s {self.partner_netif} {speed_duplex_cmd} autoneg {state}"
- partner_autoneg_change = None
- """Set autonegotiation state for interface in remote pc"""
- try:
- partner_autoneg_change = ethtool(command, host=self.cfg.remote)
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while changing auto-neg in remote: {e}")
- if partner_autoneg_change is None or partner_autoneg_change.ret != 0:
- ksft_pr(f"Not able to set autoneg parameter for interface {self.partner_netif}.")
- return False
- ksft_pr(f"Autoneg set as {state} for {self.partner_netif}")
- else:
- """Set the autonegotiation state for the interface"""
- try:
- process = ethtool(f"-s {self.cfg.ifname} {speed_duplex_cmd} autoneg {state}")
- if process.ret != 0:
- ksft_pr(f"Not able to set autoneg parameter for interface {self.cfg.ifname}")
- return False
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while changing auto-neg in local: {e}")
- return False
- ksft_pr(f"Autoneg set as {state} for {self.cfg.ifname}")
- return True
-
- def check_autoneg_supported(self, remote: bool = False) -> bool:
- if not remote:
- local_autoneg = self.get_ethtool_field("supports-auto-negotiation")
- if local_autoneg is None:
- ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.cfg.ifname}")
- """Return autoneg status of the local interface"""
- return local_autoneg
- else:
- if not self.verify_link_up():
- raise KsftSkipEx("Link is DOWN")
- """Check remote auto-negotiation support status"""
- partner_autoneg = False
- if self.partner_netif is not None:
- partner_autoneg = self.get_ethtool_field("supports-auto-negotiation", remote=True)
- if partner_autoneg is None:
- ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.partner_netif}")
- return partner_autoneg
-
- def get_common_link_modes(self) -> set[str]:
- common_link_modes = []
- """Populate common link modes"""
- link_modes = self.get_ethtool_field("supported-link-modes")
- partner_link_modes = self.get_ethtool_field("link-partner-advertised-link-modes")
- if link_modes is None:
- raise KsftSkipEx(f"Link modes not available for {self.cfg.ifname}")
- if partner_link_modes is None:
- raise KsftSkipEx(f"Partner link modes not available for {self.cfg.ifname}")
- common_link_modes = set(link_modes) and set(partner_link_modes)
- return common_link_modes
-
- def get_speed_duplex_values(self, link_modes: list[str]) -> tuple[list[str], list[str]]:
- speed = []
- duplex = []
- """Check the link modes"""
- for data in link_modes:
- parts = data.split('/')
- speed_value = re.match(r'\d+', parts[0])
- if speed_value:
- speed.append(speed_value.group())
- else:
- ksft_pr(f"No speed value found for interface {self.ifname}")
- return None, None
- duplex.append(parts[1].lower())
- return speed, duplex
-
- def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]:
- process = None
- if not remote:
- """Get the ethtool field value for the local interface"""
- try:
- process = ethtool(self.cfg.ifname, json=True)
- except Exception as e:
- ksft_pr("Required minimum ethtool version is 6.10")
- ksft_pr(f"Unexpected error occurred while getting ethtool field in local: {e}")
- return None
- else:
- if not self.verify_link_up():
- return None
- """Get the ethtool field value for the remote interface"""
- self.cfg.require_cmd("ethtool", remote=True)
- if self.partner_netif is None:
- ksft_pr(f"Partner interface name is unavailable.")
- return None
- try:
- process = ethtool(self.partner_netif, json=True, host=self.cfg.remote)
- except Exception as e:
- ksft_pr("Required minimum ethtool version is 6.10")
- ksft_pr(f"Unexpected error occurred while getting ethtool field in remote: {e}")
- return None
- json_data = process[0]
- """Check if the field exist in the json data"""
- if field not in json_data:
- raise KsftSkipEx(f'Field {field} does not exist in the output of interface {json_data["ifname"]}')
- return json_data[field]
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 19a6969643f4..3288ed04ce08 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -9,27 +9,37 @@
* ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201
*
* On client:
- * echo -n "hello\nworld" | nc -s <server IP> 5201 -p 5201
+ * echo -n "hello\nworld" | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
*
- * Test data validation:
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ *
+ * Test data validation (devmem TCP on RX only):
*
* On server:
* ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7
*
* On client:
* yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
- * tr \\n \\0 | \
- * head -c 5G | \
+ * head -c 1G | \
* nc <server IP> 5201 -p 5201
*
+ * Test data validation (devmem TCP on RX and TX, validation happens on RX):
*
- * Note this is compatible with regular netcat. i.e. the sender or receiver can
- * be replaced with regular netcat to test the RX or TX path in isolation.
+ * On server:
+ * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1
+ *
+ * On client:
+ * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \
+ * head -c 1M | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
*/
#define _GNU_SOURCE
#define __EXPORTED_HEADERS__
#include <linux/uio.h>
+#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -40,17 +50,19 @@
#include <fcntl.h>
#include <malloc.h>
#include <error.h>
+#include <poll.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
+#include <sys/time.h>
#include <linux/memfd.h>
#include <linux/dma-buf.h>
+#include <linux/errqueue.h>
#include <linux/udmabuf.h>
-#include <libmnl/libmnl.h>
#include <linux/types.h>
#include <linux/netlink.h>
#include <linux/genetlink.h>
@@ -71,6 +83,9 @@
#define MSG_SOCK_DEVMEM 0x2000000
#endif
+#define MAX_IOV 1024
+
+static size_t max_chunk;
static char *server_ip;
static char *client_ip;
static char *port;
@@ -80,6 +95,12 @@ static int num_queues = -1;
static char *ifname;
static unsigned int ifindex;
static unsigned int dmabuf_id;
+static uint32_t tx_dmabuf_id;
+static int waittime_ms = 500;
+
+/* System state loaded by current_config_load() */
+#define MAX_FLOWS 8
+static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, };
struct memory_buffer {
int fd;
@@ -93,10 +114,27 @@ struct memory_buffer {
struct memory_provider {
struct memory_buffer *(*alloc)(size_t size);
void (*free)(struct memory_buffer *ctx);
+ void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
+ void *src, int n);
void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
size_t off, int n);
};
+static void pr_err(const char *fmt, ...)
+{
+ va_list args;
+
+ fprintf(stderr, "%s: ", TEST_PREFIX);
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ if (errno != 0)
+ fprintf(stderr, ": %s", strerror(errno));
+ fprintf(stderr, "\n");
+}
+
static struct memory_buffer *udmabuf_alloc(size_t size)
{
struct udmabuf_create create;
@@ -105,27 +143,33 @@ static struct memory_buffer *udmabuf_alloc(size_t size)
ctx = malloc(sizeof(*ctx));
if (!ctx)
- error(1, ENOMEM, "malloc failed");
+ return NULL;
ctx->size = size;
ctx->devfd = open("/dev/udmabuf", O_RDWR);
- if (ctx->devfd < 0)
- error(1, errno,
- "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
- TEST_PREFIX);
+ if (ctx->devfd < 0) {
+ pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]");
+ goto err_free_ctx;
+ }
ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
- if (ctx->memfd < 0)
- error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX);
+ if (ctx->memfd < 0) {
+ pr_err("[skip,no-memfd]");
+ goto err_close_dev;
+ }
ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
- if (ret < 0)
- error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
+ if (ret < 0) {
+ pr_err("[skip,fcntl-add-seals]");
+ goto err_close_memfd;
+ }
ret = ftruncate(ctx->memfd, size);
- if (ret == -1)
- error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+ if (ret == -1) {
+ pr_err("[FAIL,memfd-truncate]");
+ goto err_close_memfd;
+ }
memset(&create, 0, sizeof(create));
@@ -133,15 +177,29 @@ static struct memory_buffer *udmabuf_alloc(size_t size)
create.offset = 0;
create.size = size;
ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create);
- if (ctx->fd < 0)
- error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX);
+ if (ctx->fd < 0) {
+ pr_err("[FAIL, create udmabuf]");
+ goto err_close_fd;
+ }
ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
ctx->fd, 0);
- if (ctx->buf_mem == MAP_FAILED)
- error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX);
+ if (ctx->buf_mem == MAP_FAILED) {
+ pr_err("[FAIL, map udmabuf]");
+ goto err_close_fd;
+ }
return ctx;
+
+err_close_fd:
+ close(ctx->fd);
+err_close_memfd:
+ close(ctx->memfd);
+err_close_dev:
+ close(ctx->devfd);
+err_free_ctx:
+ free(ctx);
+ return NULL;
}
static void udmabuf_free(struct memory_buffer *ctx)
@@ -153,6 +211,20 @@ static void udmabuf_free(struct memory_buffer *ctx)
free(ctx);
}
+static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
+ void *src, int n)
+{
+ struct dma_buf_sync sync = {};
+
+ sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+ memcpy(dst->buf_mem + off, src, n);
+
+ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
size_t off, int n)
{
@@ -170,6 +242,7 @@ static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
static struct memory_provider udmabuf_memory_provider = {
.alloc = udmabuf_alloc,
.free = udmabuf_free,
+ .memcpy_to_device = udmabuf_memcpy_to_device,
.memcpy_from_device = udmabuf_memcpy_from_device,
};
@@ -184,21 +257,25 @@ static void print_nonzero_bytes(void *ptr, size_t size)
putchar(p[i]);
}
-void validate_buffer(void *line, size_t size)
+int validate_buffer(void *line, size_t size)
{
static unsigned char seed = 1;
unsigned char *ptr = line;
- int errors = 0;
+ unsigned char expected;
+ static int errors;
size_t i;
for (i = 0; i < size; i++) {
- if (ptr[i] != seed) {
+ expected = seed ? seed : '\n';
+ if (ptr[i] != expected) {
fprintf(stderr,
"Failed validation: expected=%u, actual=%u, index=%lu\n",
- seed, ptr[i], i);
+ expected, ptr[i], i);
errors++;
- if (errors > 20)
- error(1, 0, "validation failed.");
+ if (errors > 20) {
+ pr_err("validation failed");
+ return -1;
+ }
}
seed++;
if (seed == do_validation)
@@ -206,6 +283,86 @@ void validate_buffer(void *line, size_t size)
}
fprintf(stdout, "Validated buffer\n");
+ return 0;
+}
+
+static int
+__run_command(char *out, size_t outlen, const char *cmd, va_list args)
+{
+ char command[256];
+ FILE *fp;
+
+ vsnprintf(command, sizeof(command), cmd, args);
+
+ fprintf(stderr, "Running: %s\n", command);
+ fp = popen(command, "r");
+ if (!fp)
+ return -1;
+ if (out) {
+ size_t len;
+
+ if (!fgets(out, outlen, fp))
+ return -1;
+
+ /* Remove trailing newline if present */
+ len = strlen(out);
+ if (len && out[len - 1] == '\n')
+ out[len - 1] = '\0';
+ }
+ return pclose(fp);
+}
+
+static int run_command(const char *cmd, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, cmd);
+ ret = __run_command(NULL, 0, cmd, args);
+ va_end(args);
+
+ return ret;
+}
+
+static int ethtool_add_flow(const char *format, ...)
+{
+ char local_output[256], cmd[256];
+ const char *id_start;
+ int flow_idx, ret;
+ char *endptr;
+ long flow_id;
+ va_list args;
+
+ for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++)
+ if (ntuple_ids[flow_idx] == -1)
+ break;
+ if (flow_idx == MAX_FLOWS) {
+ fprintf(stderr, "Error: too many flows\n");
+ return -1;
+ }
+
+ snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format);
+
+ va_start(args, format);
+ ret = __run_command(local_output, sizeof(local_output), cmd, args);
+ va_end(args);
+
+ if (ret != 0)
+ return ret;
+
+ /* Extract the ID from the output */
+ id_start = strstr(local_output, "Added rule with ID ");
+ if (!id_start)
+ return -1;
+ id_start += strlen("Added rule with ID ");
+
+ flow_id = strtol(id_start, &endptr, 10);
+ if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX)
+ return -1;
+
+ fprintf(stderr, "Added flow rule with ID %ld\n", flow_id);
+ ntuple_ids[flow_idx] = flow_id;
+ return flow_id;
}
static int rxq_num(int ifindex)
@@ -235,29 +392,17 @@ static int rxq_num(int ifindex)
return num;
}
-#define run_command(cmd, ...) \
- ({ \
- char command[256]; \
- memset(command, 0, sizeof(command)); \
- snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
- fprintf(stderr, "Running: %s\n", command); \
- system(command); \
- })
-
-static int reset_flow_steering(void)
-{
- /* Depending on the NIC, toggling ntuple off and on might not
- * be allowed. Additionally, attempting to delete existing filters
- * will fail if no filters are present. Therefore, do not enforce
- * the exit status.
- */
-
- run_command("sudo ethtool -K %s ntuple off >&2", ifname);
- run_command("sudo ethtool -K %s ntuple on >&2", ifname);
- run_command(
- "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2",
- ifname, ifname);
- return 0;
+static void reset_flow_steering(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_FLOWS; i++) {
+ if (ntuple_ids[i] == -1)
+ continue;
+ run_command("ethtool -N %s delete %d",
+ ifname, ntuple_ids[i]);
+ ntuple_ids[i] = -1;
+ }
}
static const char *tcp_data_split_str(int val)
@@ -274,7 +419,81 @@ static const char *tcp_data_split_str(int val)
}
}
-static int configure_headersplit(bool on)
+static struct ethtool_rings_get_rsp *get_ring_config(void)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return NULL;
+ }
+
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+
+ ynl_sock_destroy(ys);
+
+ return get_rsp;
+}
+
+static void restore_ring_config(const struct ethtool_rings_get_rsp *config)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ethtool_rings_set_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ if (!config)
+ return;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return;
+ }
+
+ req = ethtool_rings_set_req_alloc();
+ ethtool_rings_set_req_set_header_dev_index(req, ifindex);
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+ if (config->_present.hds_thresh)
+ ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh);
+
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg);
+
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+
+ /* use explicit value if UKNOWN didn't give us the previous */
+ if (get_rsp->tcp_data_split != config->tcp_data_split) {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ config->tcp_data_split);
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL restoring expl HDS cfg: %s\n",
+ ys->err.msg);
+ }
+
+ ethtool_rings_get_rsp_free(get_rsp);
+ ethtool_rings_set_req_free(req);
+
+ ynl_sock_destroy(ys);
+}
+
+static int
+configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on)
{
struct ethtool_rings_get_req *get_req;
struct ethtool_rings_get_rsp *get_rsp;
@@ -291,8 +510,15 @@ static int configure_headersplit(bool on)
req = ethtool_rings_set_req_alloc();
ethtool_rings_set_req_set_header_dev_index(req, ifindex);
- /* 0 - off, 1 - auto, 2 - on */
- ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0);
+ if (on) {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED);
+ if (old->_present.hds_thresh)
+ ethtool_rings_set_req_set_hds_thresh(req, 0);
+ } else {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+ }
ret = ethtool_rings_set(ys, req);
if (ret < 0)
fprintf(stderr, "YNL failed: %s\n", ys->err.msg);
@@ -316,12 +542,103 @@ static int configure_headersplit(bool on)
static int configure_rss(void)
{
- return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue);
+ return run_command("ethtool -X %s equal %d >&2", ifname, start_queue);
+}
+
+static void reset_rss(void)
+{
+ run_command("ethtool -X %s default >&2", ifname, start_queue);
}
-static int configure_channels(unsigned int rx, unsigned int tx)
+static int check_changing_channels(unsigned int rx, unsigned int tx)
{
- return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx);
+ struct ethtool_channels_get_req *gchan;
+ struct ethtool_channels_set_req *schan;
+ struct ethtool_channels_get_rsp *chan;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx);
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ gchan = ethtool_channels_get_req_alloc();
+ if (!gchan) {
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ ethtool_channels_get_req_set_header_dev_index(gchan, ifindex);
+ chan = ethtool_channels_get(ys, gchan);
+ ethtool_channels_get_req_free(gchan);
+ if (!chan) {
+ fprintf(stderr, "YNL get channels: %s\n", ys->err.msg);
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ schan = ethtool_channels_set_req_alloc();
+ if (!schan) {
+ ret = -1;
+ goto exit_free_chan;
+ }
+
+ ethtool_channels_set_req_set_header_dev_index(schan, ifindex);
+
+ if (chan->_present.combined_count) {
+ if (chan->_present.rx_count || chan->_present.tx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, 0);
+ ethtool_channels_set_req_set_tx_count(schan, 0);
+ }
+
+ if (rx == tx) {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ } else if (rx > tx) {
+ ethtool_channels_set_req_set_combined_count(schan, tx);
+ ethtool_channels_set_req_set_rx_count(schan, rx - tx);
+ } else {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx - rx);
+ }
+
+ } else if (chan->_present.rx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx);
+ } else {
+ fprintf(stderr, "Error: device has neither combined nor rx channels\n");
+ ret = -1;
+ goto exit_free_schan;
+ }
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret) {
+ fprintf(stderr, "YNL set channels: %s\n", ys->err.msg);
+ } else {
+ /* We were expecting a failure, go back to previous settings */
+ ethtool_channels_set_req_set_combined_count(schan,
+ chan->combined_count);
+ ethtool_channels_set_req_set_rx_count(schan, chan->rx_count);
+ ethtool_channels_set_req_set_tx_count(schan, chan->tx_count);
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret)
+ fprintf(stderr, "YNL un-setting channels: %s\n",
+ ys->err.msg);
+ }
+
+exit_free_schan:
+ ethtool_channels_set_req_free(schan);
+exit_free_chan:
+ ethtool_channels_get_rsp_free(chan);
+exit_close_sock:
+ ynl_sock_destroy(ys);
+
+ return ret;
}
static int configure_flow_steering(struct sockaddr_in6 *server_sin)
@@ -329,6 +646,7 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
const char *type = "tcp6";
const char *server_addr;
char buf[40];
+ int flow_id;
inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf));
server_addr = buf;
@@ -338,15 +656,25 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
server_addr = strrchr(server_addr, ':') + 1;
}
- return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
- ifname,
- type,
- client_ip ? "src-ip" : "",
- client_ip ?: "",
- server_addr,
- client_ip ? "src-port" : "",
- client_ip ? port : "",
- port, start_queue);
+ /* Try configure 5-tuple */
+ flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d",
+ type,
+ client_ip ? "src-ip" : "",
+ client_ip ?: "",
+ server_addr,
+ client_ip ? "src-port" : "",
+ client_ip ? port : "",
+ port, start_queue);
+ if (flow_id < 0) {
+ /* If that fails, try configure 3-tuple */
+ flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d",
+ type, server_addr, port, start_queue);
+ if (flow_id < 0)
+ /* If that fails, return error */
+ return -1;
+ }
+
+ return 0;
}
static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
@@ -359,6 +687,7 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!*ys) {
+ netdev_queue_id_free(queues);
fprintf(stderr, "YNL: %s\n", yerr.msg);
return -1;
}
@@ -394,18 +723,67 @@ err_close:
return -1;
}
-static void enable_reuseaddr(int fd)
+static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+ struct ynl_sock **ys)
+{
+ struct netdev_bind_tx_req *req = NULL;
+ struct netdev_bind_tx_rsp *rsp = NULL;
+ struct ynl_error yerr;
+
+ *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!*ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_bind_tx_req_alloc();
+ netdev_bind_tx_req_set_ifindex(req, ifindex);
+ netdev_bind_tx_req_set_fd(req, dmabuf_fd);
+
+ rsp = netdev_bind_tx(*ys, req);
+ if (!rsp) {
+ perror("netdev_bind_tx");
+ goto err_close;
+ }
+
+ if (!rsp->_present.id) {
+ perror("id not present");
+ goto err_close;
+ }
+
+ fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
+ tx_dmabuf_id = rsp->id;
+
+ netdev_bind_tx_req_free(req);
+ netdev_bind_tx_rsp_free(rsp);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+ netdev_bind_tx_req_free(req);
+ ynl_sock_destroy(*ys);
+ return -1;
+}
+
+static int enable_reuseaddr(int fd)
{
int opt = 1;
int ret;
ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
- if (ret)
- error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX);
+ if (ret) {
+ pr_err("SO_REUSEPORT failed");
+ return -1;
+ }
ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
- if (ret)
- error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX);
+ if (ret) {
+ pr_err("SO_REUSEADDR failed");
+ return -1;
+ }
+
+ return 0;
}
static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
@@ -432,10 +810,24 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
return 0;
}
-int do_server(struct memory_buffer *mem)
+static struct netdev_queue_id *create_queues(void)
{
- char ctrl_data[sizeof(int) * 20000];
struct netdev_queue_id *queues;
+ size_t i = 0;
+
+ queues = netdev_queue_id_alloc(num_queues);
+ for (i = 0; i < num_queues; i++) {
+ netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX);
+ netdev_queue_id_set_id(&queues[i], start_queue + i);
+ }
+
+ return queues;
+}
+
+static int do_server(struct memory_buffer *mem)
+{
+ struct ethtool_rings_get_rsp *ring_config;
+ char ctrl_data[sizeof(int) * 20000];
size_t non_page_aligned_frags = 0;
struct sockaddr_in6 client_addr;
struct sockaddr_in6 server_sin;
@@ -446,64 +838,72 @@ int do_server(struct memory_buffer *mem)
char *tmp_mem = NULL;
struct ynl_sock *ys;
char iobuf[819200];
+ int ret, err = -1;
char buffer[256];
int socket_fd;
int client_fd;
- size_t i = 0;
- int ret;
ret = parse_address(server_ip, atoi(port), &server_sin);
- if (ret < 0)
- error(1, 0, "parse server address");
+ if (ret < 0) {
+ pr_err("parse server address");
+ return -1;
+ }
- if (reset_flow_steering())
- error(1, 0, "Failed to reset flow steering\n");
+ ring_config = get_ring_config();
+ if (!ring_config) {
+ pr_err("Failed to get current ring configuration");
+ return -1;
+ }
- if (configure_headersplit(1))
- error(1, 0, "Failed to enable TCP header split\n");
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to enable TCP header split");
+ goto err_free_ring_config;
+ }
/* Configure RSS to divert all traffic from our devmem queues */
- if (configure_rss())
- error(1, 0, "Failed to configure rss\n");
+ if (configure_rss()) {
+ pr_err("Failed to configure rss");
+ goto err_reset_headersplit;
+ }
/* Flow steer our devmem flows to start_queue */
- if (configure_flow_steering(&server_sin))
- error(1, 0, "Failed to configure flow steering\n");
-
- sleep(1);
-
- queues = malloc(sizeof(*queues) * num_queues);
-
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
+ if (configure_flow_steering(&server_sin)) {
+ pr_err("Failed to configure flow steering");
+ goto err_reset_rss;
}
- if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
- error(1, 0, "Failed to bind\n");
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) {
+ pr_err("Failed to bind");
+ goto err_reset_flow_steering;
+ }
tmp_mem = malloc(mem->size);
if (!tmp_mem)
- error(1, ENOMEM, "malloc failed");
+ goto err_unbind;
socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (socket_fd < 0)
- error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX);
+ if (socket_fd < 0) {
+ pr_err("Failed to create socket");
+ goto err_free_tmp;
+ }
- enable_reuseaddr(socket_fd);
+ if (enable_reuseaddr(socket_fd))
+ goto err_close_socket;
fprintf(stderr, "binding to address %s:%d\n", server_ip,
ntohs(server_sin.sin6_port));
ret = bind(socket_fd, &server_sin, sizeof(server_sin));
- if (ret)
- error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX);
+ if (ret) {
+ pr_err("Failed to bind");
+ goto err_close_socket;
+ }
ret = listen(socket_fd, 1);
- if (ret)
- error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX);
+ if (ret) {
+ pr_err("Failed to listen");
+ goto err_close_socket;
+ }
client_addr_len = sizeof(client_addr);
@@ -512,6 +912,10 @@ int do_server(struct memory_buffer *mem)
fprintf(stderr, "Waiting or connection on %s:%d\n", buffer,
ntohs(server_sin.sin6_port));
client_fd = accept(socket_fd, &client_addr, &client_addr_len);
+ if (client_fd < 0) {
+ pr_err("Failed to accept");
+ goto err_close_socket;
+ }
inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer,
sizeof(buffer));
@@ -539,14 +943,18 @@ int do_server(struct memory_buffer *mem)
continue;
if (ret < 0) {
perror("recvmsg");
+ if (errno == EFAULT) {
+ pr_err("received EFAULT, won't recover");
+ goto err_close_client;
+ }
continue;
}
if (ret == 0) {
- fprintf(stderr, "client exited\n");
+ errno = 0;
+ pr_err("client exited");
goto cleanup;
}
- i++;
for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
if (cm->cmsg_level != SOL_SOCKET ||
(cm->cmsg_type != SCM_DEVMEM_DMABUF &&
@@ -581,9 +989,10 @@ int do_server(struct memory_buffer *mem)
dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token,
total_received, dmabuf_cmsg->dmabuf_id);
- if (dmabuf_cmsg->dmabuf_id != dmabuf_id)
- error(1, 0,
- "received on wrong dmabuf_id: flow steering error\n");
+ if (dmabuf_cmsg->dmabuf_id != dmabuf_id) {
+ pr_err("received on wrong dmabuf_id: flow steering error");
+ goto err_close_client;
+ }
if (dmabuf_cmsg->frag_size % getpagesize())
non_page_aligned_frags++;
@@ -594,22 +1003,27 @@ int do_server(struct memory_buffer *mem)
dmabuf_cmsg->frag_offset,
dmabuf_cmsg->frag_size);
- if (do_validation)
- validate_buffer(tmp_mem,
- dmabuf_cmsg->frag_size);
- else
+ if (do_validation) {
+ if (validate_buffer(tmp_mem,
+ dmabuf_cmsg->frag_size))
+ goto err_close_client;
+ } else {
print_nonzero_bytes(tmp_mem,
dmabuf_cmsg->frag_size);
+ }
ret = setsockopt(client_fd, SOL_SOCKET,
SO_DEVMEM_DONTNEED, &token,
sizeof(token));
- if (ret != 1)
- error(1, 0,
- "SO_DEVMEM_DONTNEED not enough tokens");
+ if (ret != 1) {
+ pr_err("SO_DEVMEM_DONTNEED not enough tokens");
+ goto err_close_client;
+ }
+ }
+ if (!is_devmem) {
+ pr_err("flow steering error");
+ goto err_close_client;
}
- if (!is_devmem)
- error(1, 0, "flow steering error\n");
fprintf(stderr, "total_received=%lu\n", total_received);
}
@@ -620,79 +1034,370 @@ int do_server(struct memory_buffer *mem)
page_aligned_frags, non_page_aligned_frags);
cleanup:
+ err = 0;
- free(tmp_mem);
+err_close_client:
close(client_fd);
+err_close_socket:
close(socket_fd);
+err_free_tmp:
+ free(tmp_mem);
+err_unbind:
ynl_sock_destroy(ys);
-
- return 0;
+err_reset_flow_steering:
+ reset_flow_steering();
+err_reset_rss:
+ reset_rss();
+err_reset_headersplit:
+ restore_ring_config(ring_config);
+err_free_ring_config:
+ ethtool_rings_get_rsp_free(ring_config);
+ return err;
}
-void run_devmem_tests(void)
+int run_devmem_tests(void)
{
+ struct ethtool_rings_get_rsp *ring_config;
struct netdev_queue_id *queues;
struct memory_buffer *mem;
struct ynl_sock *ys;
- size_t i = 0;
+ int err = -1;
mem = provider->alloc(getpagesize() * NUM_PAGES);
+ if (!mem) {
+ pr_err("Failed to allocate memory buffer");
+ return -1;
+ }
+
+ ring_config = get_ring_config();
+ if (!ring_config) {
+ pr_err("Failed to get current ring configuration");
+ goto err_free_mem;
+ }
/* Configure RSS to divert all traffic from our devmem queues */
- if (configure_rss())
- error(1, 0, "rss error\n");
+ if (configure_rss()) {
+ pr_err("rss error");
+ goto err_free_ring_config;
+ }
- queues = calloc(num_queues, sizeof(*queues));
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_rss;
+ }
- if (configure_headersplit(1))
- error(1, 0, "Failed to configure header split\n");
+ queues = netdev_queue_id_alloc(num_queues);
+ if (!queues) {
+ pr_err("Failed to allocate empty queues array");
+ goto err_reset_headersplit;
+ }
- if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
- error(1, 0, "Binding empty queues array should have failed\n");
+ if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Binding empty queues array should have failed");
+ goto err_unbind;
+ }
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
+ if (configure_headersplit(ring_config, 0)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_headersplit;
}
- if (configure_headersplit(0))
- error(1, 0, "Failed to configure header split\n");
+ queues = create_queues();
+ if (!queues) {
+ pr_err("Failed to create queues");
+ goto err_reset_headersplit;
+ }
- if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
- error(1, 0, "Configure dmabuf with header split off should have failed\n");
+ if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Configure dmabuf with header split off should have failed");
+ goto err_unbind;
+ }
- if (configure_headersplit(1))
- error(1, 0, "Failed to configure header split\n");
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_headersplit;
+ }
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
+ queues = create_queues();
+ if (!queues) {
+ pr_err("Failed to create queues");
+ goto err_reset_headersplit;
}
- if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
- error(1, 0, "Failed to bind\n");
+ if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Failed to bind");
+ goto err_reset_headersplit;
+ }
/* Deactivating a bound queue should not be legal */
- if (!configure_channels(num_queues, num_queues - 1))
- error(1, 0, "Deactivating a bound queue should be illegal.\n");
+ if (!check_changing_channels(num_queues, num_queues)) {
+ pr_err("Deactivating a bound queue should be illegal");
+ goto err_unbind;
+ }
- /* Closing the netlink socket does an implicit unbind */
- ynl_sock_destroy(ys);
+ err = 0;
+ goto err_unbind;
+err_unbind:
+ ynl_sock_destroy(ys);
+err_reset_headersplit:
+ restore_ring_config(ring_config);
+err_reset_rss:
+ reset_rss();
+err_free_ring_config:
+ ethtool_rings_get_rsp_free(ring_config);
+err_free_mem:
provider->free(mem);
+ return err;
+}
+
+static uint64_t gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
+}
+
+static int do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ ret = poll(&pfd, 1, waittime_ms);
+ if (ret == -1) {
+ pr_err("poll");
+ return -1;
+ }
+
+ return ret && (pfd.revents & POLLERR);
+}
+
+static int wait_compl(int fd)
+{
+ int64_t tstop = gettimeofday_ms() + waittime_ms;
+ char control[CMSG_SPACE(100)] = {};
+ struct sock_extended_err *serr;
+ struct msghdr msg = {};
+ struct cmsghdr *cm;
+ __u32 hi, lo;
+ int ret;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ while (gettimeofday_ms() < tstop) {
+ ret = do_poll(fd);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ continue;
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ pr_err("recvmsg(MSG_ERRQUEUE)");
+ return -1;
+ }
+ if (msg.msg_flags & MSG_CTRUNC) {
+ pr_err("MSG_CTRUNC");
+ return -1;
+ }
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != SOL_IP &&
+ cm->cmsg_level != SOL_IPV6)
+ continue;
+ if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type != IP_RECVERR)
+ continue;
+ if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type != IPV6_RECVERR)
+ continue;
+
+ serr = (void *)CMSG_DATA(cm);
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
+ pr_err("wrong origin %u", serr->ee_origin);
+ return -1;
+ }
+ if (serr->ee_errno != 0) {
+ pr_err("wrong errno %d", serr->ee_errno);
+ return -1;
+ }
+
+ hi = serr->ee_data;
+ lo = serr->ee_info;
+
+ fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
+ return 0;
+ }
+ }
+
+ pr_err("did not receive tx completion");
+ return -1;
+}
+
+static int do_client(struct memory_buffer *mem)
+{
+ char ctrl_data[CMSG_SPACE(sizeof(__u32))];
+ struct sockaddr_in6 server_sin;
+ struct sockaddr_in6 client_sin;
+ struct ynl_sock *ys = NULL;
+ struct iovec iov[MAX_IOV];
+ struct msghdr msg = {};
+ ssize_t line_size = 0;
+ struct cmsghdr *cmsg;
+ char *line = NULL;
+ int ret, err = -1;
+ size_t len = 0;
+ int socket_fd;
+ __u32 ddmabuf;
+ int opt = 1;
+
+ ret = parse_address(server_ip, atoi(port), &server_sin);
+ if (ret < 0) {
+ pr_err("parse server address");
+ return -1;
+ }
+
+ if (client_ip) {
+ ret = parse_address(client_ip, atoi(port), &client_sin);
+ if (ret < 0) {
+ pr_err("parse client address");
+ return ret;
+ }
+ }
+
+ socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ pr_err("create socket");
+ return -1;
+ }
+
+ if (enable_reuseaddr(socket_fd))
+ goto err_close_socket;
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
+ strlen(ifname) + 1);
+ if (ret) {
+ pr_err("bindtodevice");
+ goto err_close_socket;
+ }
+
+ if (bind_tx_queue(ifindex, mem->fd, &ys)) {
+ pr_err("Failed to bind");
+ goto err_close_socket;
+ }
+
+ if (client_ip) {
+ ret = bind(socket_fd, &client_sin, sizeof(client_sin));
+ if (ret) {
+ pr_err("bind");
+ goto err_unbind;
+ }
+ }
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
+ if (ret) {
+ pr_err("set sock opt");
+ goto err_unbind;
+ }
+
+ fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
+ ntohs(server_sin.sin6_port), ifname);
+
+ ret = connect(socket_fd, &server_sin, sizeof(server_sin));
+ if (ret) {
+ pr_err("connect");
+ goto err_unbind;
+ }
+
+ while (1) {
+ free(line);
+ line = NULL;
+ line_size = getline(&line, &len, stdin);
+
+ if (line_size < 0)
+ break;
+
+ if (max_chunk) {
+ msg.msg_iovlen =
+ (line_size + max_chunk - 1) / max_chunk;
+ if (msg.msg_iovlen > MAX_IOV) {
+ pr_err("can't partition %zd bytes into maximum of %d chunks",
+ line_size, MAX_IOV);
+ goto err_free_line;
+ }
+
+ for (int i = 0; i < msg.msg_iovlen; i++) {
+ iov[i].iov_base = (void *)(i * max_chunk);
+ iov[i].iov_len = max_chunk;
+ }
+
+ iov[msg.msg_iovlen - 1].iov_len =
+ line_size - (msg.msg_iovlen - 1) * max_chunk;
+ } else {
+ iov[0].iov_base = 0;
+ iov[0].iov_len = line_size;
+ msg.msg_iovlen = 1;
+ }
+
+ msg.msg_iov = iov;
+ provider->memcpy_to_device(mem, 0, line, line_size);
+
+ msg.msg_control = ctrl_data;
+ msg.msg_controllen = sizeof(ctrl_data);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_DEVMEM_DMABUF;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+
+ ddmabuf = tx_dmabuf_id;
+
+ *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
+
+ ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
+ if (ret < 0) {
+ pr_err("Failed sendmsg");
+ goto err_free_line;
+ }
+
+ fprintf(stderr, "sendmsg_ret=%d\n", ret);
+
+ if (ret != line_size) {
+ pr_err("Did not send all bytes %d vs %zd", ret, line_size);
+ goto err_free_line;
+ }
+
+ if (wait_compl(socket_fd))
+ goto err_free_line;
+ }
+
+ fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
+
+ err = 0;
+
+err_free_line:
+ free(line);
+err_unbind:
+ ynl_sock_destroy(ys);
+err_close_socket:
+ close(socket_fd);
+ return err;
}
int main(int argc, char *argv[])
{
struct memory_buffer *mem;
int is_server = 0, opt;
- int ret;
+ int ret, err = 1;
- while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
+ while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
switch (opt) {
case 'l':
is_server = 1;
@@ -718,47 +1423,62 @@ int main(int argc, char *argv[])
case 'f':
ifname = optarg;
break;
+ case 'z':
+ max_chunk = atoi(optarg);
+ break;
case '?':
fprintf(stderr, "unknown option: %c\n", optopt);
break;
}
}
- if (!ifname)
- error(1, 0, "Missing -f argument\n");
+ if (!ifname) {
+ pr_err("Missing -f argument");
+ return 1;
+ }
ifindex = if_nametoindex(ifname);
+ fprintf(stderr, "using ifindex=%u\n", ifindex);
+
if (!server_ip && !client_ip) {
if (start_queue < 0 && num_queues < 0) {
num_queues = rxq_num(ifindex);
- if (num_queues < 0)
- error(1, 0, "couldn't detect number of queues\n");
- if (num_queues < 2)
- error(1, 0,
- "number of device queues is too low\n");
+ if (num_queues < 0) {
+ pr_err("couldn't detect number of queues");
+ return 1;
+ }
+ if (num_queues < 2) {
+ pr_err("number of device queues is too low");
+ return 1;
+ }
/* make sure can bind to multiple queues */
start_queue = num_queues / 2;
num_queues /= 2;
}
- if (start_queue < 0 || num_queues < 0)
- error(1, 0, "Both -t and -q are required\n");
+ if (start_queue < 0 || num_queues < 0) {
+ pr_err("Both -t and -q are required");
+ return 1;
+ }
- run_devmem_tests();
- return 0;
+ return run_devmem_tests();
}
if (start_queue < 0 && num_queues < 0) {
num_queues = rxq_num(ifindex);
- if (num_queues < 2)
- error(1, 0, "number of device queues is too low\n");
+ if (num_queues < 2) {
+ pr_err("number of device queues is too low");
+ return 1;
+ }
num_queues = 1;
start_queue = rxq_num(ifindex) - num_queues;
- if (start_queue < 0)
- error(1, 0, "couldn't detect number of queues\n");
+ if (start_queue < 0) {
+ pr_err("couldn't detect number of queues");
+ return 1;
+ }
fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues);
}
@@ -766,21 +1486,39 @@ int main(int argc, char *argv[])
for (; optind < argc; optind++)
fprintf(stderr, "extra arguments: %s\n", argv[optind]);
- if (start_queue < 0)
- error(1, 0, "Missing -t argument\n");
+ if (start_queue < 0) {
+ pr_err("Missing -t argument");
+ return 1;
+ }
- if (num_queues < 0)
- error(1, 0, "Missing -q argument\n");
+ if (num_queues < 0) {
+ pr_err("Missing -q argument");
+ return 1;
+ }
- if (!server_ip)
- error(1, 0, "Missing -s argument\n");
+ if (!server_ip) {
+ pr_err("Missing -s argument");
+ return 1;
+ }
- if (!port)
- error(1, 0, "Missing -p argument\n");
+ if (!port) {
+ pr_err("Missing -p argument");
+ return 1;
+ }
mem = provider->alloc(getpagesize() * NUM_PAGES);
- ret = is_server ? do_server(mem) : 1;
- provider->free(mem);
+ if (!mem) {
+ pr_err("Failed to allocate memory buffer");
+ return 1;
+ }
- return ret;
+ ret = is_server ? do_server(mem) : do_client(mem);
+ if (ret)
+ goto err_free_mem;
+
+ err = 0;
+
+err_free_mem:
+ provider->free(mem);
+ return err;
}
diff --git a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py
deleted file mode 100644
index efd921180532..000000000000
--- a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-#Introduction:
-#This file has basic link layer tests for generic NIC drivers.
-#The test comprises of auto-negotiation, speed and duplex checks.
-#
-#Setup:
-#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1)
-#
-# DUT PC Partner PC
-#┌───────────────────────┐ ┌──────────────────────────┐
-#│ │ │ │
-#│ │ │ │
-#│ ┌───────────┐ │ │
-#│ │DUT NIC │ Eth │ │
-#│ │Interface ─┼─────────────────────────┼─ any eth Interface │
-#│ └───────────┘ │ │
-#│ │ │ │
-#│ │ │ │
-#└───────────────────────┘ └──────────────────────────┘
-#
-#Configurations:
-#Required minimum ethtool version is 6.10 (supports json)
-#Default values:
-#time_delay = 8 #time taken to wait for transitions to happen, in seconds.
-
-import time
-import argparse
-from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_eq
-from lib.py import KsftFailEx, KsftSkipEx
-from lib.py import NetDrvEpEnv
-from lib.py import LinkConfig
-
-def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None:
- if link_config.partner_netif is None:
- KsftSkipEx("Partner interface is not available")
- if not link_config.check_autoneg_supported() or not link_config.check_autoneg_supported(remote=True):
- KsftSkipEx(f"Auto-negotiation not supported for interface {cfg.ifname} or {link_config.partner_netif}")
- if not link_config.verify_link_up():
- raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-
-def verify_autonegotiation(cfg: object, expected_state: str, link_config: LinkConfig) -> None:
- if not link_config.verify_link_up():
- raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
- """Verifying the autonegotiation state in partner"""
- partner_autoneg_output = link_config.get_ethtool_field("auto-negotiation", remote=True)
- if partner_autoneg_output is None:
- KsftSkipEx(f"Auto-negotiation state not available for interface {link_config.partner_netif}")
- partner_autoneg_state = "on" if partner_autoneg_output is True else "off"
-
- ksft_eq(partner_autoneg_state, expected_state)
-
- """Verifying the autonegotiation state of local"""
- autoneg_output = link_config.get_ethtool_field("auto-negotiation")
- if autoneg_output is None:
- KsftSkipEx(f"Auto-negotiation state not available for interface {cfg.ifname}")
- actual_state = "on" if autoneg_output is True else "off"
-
- ksft_eq(actual_state, expected_state)
-
- """Verifying the link establishment"""
- link_available = link_config.get_ethtool_field("link-detected")
- if link_available is None:
- KsftSkipEx(f"Link status not available for interface {cfg.ifname}")
- if link_available != True:
- raise KsftSkipEx("Link not established at interface {cfg.ifname} after changing auto-negotiation")
-
-def test_autonegotiation(cfg: object, link_config: LinkConfig, time_delay: int) -> None:
- _pre_test_checks(cfg, link_config)
- for state in ["off", "on"]:
- if not link_config.set_autonegotiation_state(state, remote=True):
- raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {link_config.partner_netif}")
- if not link_config.set_autonegotiation_state(state):
- raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {cfg.ifname}")
- time.sleep(time_delay)
- verify_autonegotiation(cfg, state, link_config)
-
-def test_network_speed(cfg: object, link_config: LinkConfig, time_delay: int) -> None:
- _pre_test_checks(cfg, link_config)
- common_link_modes = link_config.common_link_modes
- if not common_link_modes:
- KsftSkipEx("No common link modes exist")
- speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes)
-
- if speeds and duplex_modes and len(speeds) == len(duplex_modes):
- for idx in range(len(speeds)):
- speed = speeds[idx]
- duplex = duplex_modes[idx]
- if not link_config.set_speed_and_duplex(speed, duplex):
- raise KsftFailEx(f"Unable to set speed and duplex parameters for {cfg.ifname}")
- time.sleep(time_delay)
- if not link_config.verify_speed_and_duplex(speed, duplex):
- raise KsftSkipEx(f"Error occurred while verifying speed and duplex states for interface {cfg.ifname}")
- else:
- if not speeds or not duplex_modes:
- KsftSkipEx(f"No supported speeds or duplex modes found for interface {cfg.ifname}")
- else:
- KsftSkipEx("Mismatch in the number of speeds and duplex modes")
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Run basic link layer tests for NIC driver")
- parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.')
- args = parser.parse_args()
- time_delay = args.time_delay
- with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
- link_config = LinkConfig(cfg)
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, time_delay,))
- link_config.reset_interface()
- ksft_exit()
-
-if __name__ == "__main__":
- main()
diff --git a/tools/testing/selftests/drivers/net/hw/nic_performance.py b/tools/testing/selftests/drivers/net/hw/nic_performance.py
deleted file mode 100644
index 201403b76ea3..000000000000
--- a/tools/testing/selftests/drivers/net/hw/nic_performance.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-#Introduction:
-#This file has basic performance test for generic NIC drivers.
-#The test comprises of throughput check for TCP and UDP streams.
-#
-#Setup:
-#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1)
-#
-# DUT PC Partner PC
-#┌───────────────────────┐ ┌──────────────────────────┐
-#│ │ │ │
-#│ │ │ │
-#│ ┌───────────┐ │ │
-#│ │DUT NIC │ Eth │ │
-#│ │Interface ─┼─────────────────────────┼─ any eth Interface │
-#│ └───────────┘ │ │
-#│ │ │ │
-#│ │ │ │
-#└───────────────────────┘ └──────────────────────────┘
-#
-#Configurations:
-#To prevent interruptions, Add ethtool, ip to the sudoers list in remote PC and get the ssh key from remote.
-#Required minimum ethtool version is 6.10
-#Change the below configuration based on your hw needs.
-# """Default values"""
-#time_delay = 8 #time taken to wait for transitions to happen, in seconds.
-#test_duration = 10 #performance test duration for the throughput check, in seconds.
-#send_throughput_threshold = 80 #percentage of send throughput required to pass the check
-#receive_throughput_threshold = 50 #percentage of receive throughput required to pass the check
-
-import time
-import json
-import argparse
-from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_true
-from lib.py import KsftFailEx, KsftSkipEx, GenerateTraffic
-from lib.py import NetDrvEpEnv, bkg, wait_port_listen
-from lib.py import cmd
-from lib.py import LinkConfig
-
-class TestConfig:
- def __init__(self, time_delay: int, test_duration: int, send_throughput_threshold: int, receive_throughput_threshold: int) -> None:
- self.time_delay = time_delay
- self.test_duration = test_duration
- self.send_throughput_threshold = send_throughput_threshold
- self.receive_throughput_threshold = receive_throughput_threshold
-
-def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None:
- if not link_config.verify_link_up():
- KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
- common_link_modes = link_config.common_link_modes
- if common_link_modes is None:
- KsftSkipEx("No common link modes found")
- if link_config.partner_netif == None:
- KsftSkipEx("Partner interface is not available")
- if link_config.check_autoneg_supported():
- KsftSkipEx("Auto-negotiation not supported by local")
- if link_config.check_autoneg_supported(remote=True):
- KsftSkipEx("Auto-negotiation not supported by remote")
- cfg.require_cmd("iperf3", remote=True)
-
-def check_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, protocol: str, traffic: GenerateTraffic) -> None:
- common_link_modes = link_config.common_link_modes
- speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes)
- """Test duration in seconds"""
- duration = test_config.test_duration
-
- ksft_pr(f"{protocol} test")
- test_type = "-u" if protocol == "UDP" else ""
-
- send_throughput = []
- receive_throughput = []
- for idx in range(0, len(speeds)):
- if link_config.set_speed_and_duplex(speeds[idx], duplex_modes[idx]) == False:
- raise KsftFailEx(f"Not able to set speed and duplex parameters for {cfg.ifname}")
- time.sleep(test_config.time_delay)
- if not link_config.verify_link_up():
- raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-
- send_command=f"{test_type} -b 0 -t {duration} --json"
- receive_command=f"{test_type} -b 0 -t {duration} --reverse --json"
-
- send_result = traffic.run_remote_test(cfg, command=send_command)
- if send_result.ret != 0:
- raise KsftSkipEx("Error occurred during data transmit: {send_result.stdout}")
-
- send_output = send_result.stdout
- send_data = json.loads(send_output)
-
- """Convert throughput to Mbps"""
- send_throughput.append(round(send_data['end']['sum_sent']['bits_per_second'] / 1e6, 2))
- ksft_pr(f"{protocol}: Send throughput: {send_throughput[idx]} Mbps")
-
- receive_result = traffic.run_remote_test(cfg, command=receive_command)
- if receive_result.ret != 0:
- raise KsftSkipEx("Error occurred during data receive: {receive_result.stdout}")
-
- receive_output = receive_result.stdout
- receive_data = json.loads(receive_output)
-
- """Convert throughput to Mbps"""
- receive_throughput.append(round(receive_data['end']['sum_received']['bits_per_second'] / 1e6, 2))
- ksft_pr(f"{protocol}: Receive throughput: {receive_throughput[idx]} Mbps")
-
- """Check whether throughput is not below the threshold (default values set at start)"""
- for idx in range(0, len(speeds)):
- send_threshold = float(speeds[idx]) * float(test_config.send_throughput_threshold / 100)
- receive_threshold = float(speeds[idx]) * float(test_config.receive_throughput_threshold / 100)
- ksft_true(send_throughput[idx] >= send_threshold, f"{protocol}: Send throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex")
- ksft_true(receive_throughput[idx] >= receive_threshold, f"{protocol}: Receive throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex")
-
-def test_tcp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None:
- _pre_test_checks(cfg, link_config)
- check_throughput(cfg, link_config, test_config, 'TCP', traffic)
-
-def test_udp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None:
- _pre_test_checks(cfg, link_config)
- check_throughput(cfg, link_config, test_config, 'UDP', traffic)
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Run basic performance test for NIC driver")
- parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.')
- parser.add_argument('--test-duration', type=int, default=10, help='Performance test duration for the throughput check, in seconds. Default is 10 seconds.')
- parser.add_argument('--stt', type=int, default=80, help='Send throughput Threshold: Percentage of send throughput upon actual throughput required to pass the throughput check (in percentage). Default is 80.')
- parser.add_argument('--rtt', type=int, default=50, help='Receive throughput Threshold: Percentage of receive throughput upon actual throughput required to pass the throughput check (in percentage). Default is 50.')
- args=parser.parse_args()
- test_config = TestConfig(args.time_delay, args.test_duration, args.stt, args.rtt)
- with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
- traffic = GenerateTraffic(cfg)
- link_config = LinkConfig(cfg)
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, test_config, traffic, ))
- link_config.reset_interface()
- ksft_exit()
-
-if __name__ == "__main__":
- main()
diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
new file mode 100755
index 000000000000..c1e943d53f19
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests related to configuration of HW timestamping
+"""
+
+import errno
+from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx
+from lib.py import NetDrvEnv, EthtoolFamily, NlError
+
+
+def __get_hwtimestamp_support(cfg):
+ """ Retrieve supported configuration information """
+
+ try:
+ tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported") from e
+ raise
+
+ ctx = {}
+ tx = tsinfo.get('tx-types', {})
+ rx = tsinfo.get('rx-filters', {})
+
+ bits = tx.get('bits', {})
+ ctx['tx'] = bits.get('bit', [])
+ bits = rx.get('bits', {})
+ ctx['rx'] = bits.get('bit', [])
+ return ctx
+
+
+def __get_hwtimestamp_config(cfg):
+ """ Retrieve current TS configuration information """
+
+ try:
+ tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via netlink") from e
+ raise
+ return tscfg
+
+
+def __set_hwtimestamp_config(cfg, ts):
+ """ Setup new TS configuration information """
+
+ ts['header'] = {'dev-name': cfg.ifname}
+ try:
+ res = cfg.ethnl.tsconfig_set(ts)
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via netlink") from e
+ raise
+ return res
+
+
+def test_hwtstamp_tx(cfg):
+ """
+ Test TX timestamp configuration.
+ The driver should apply provided config and report back proper state.
+ """
+
+ orig_tscfg = __get_hwtimestamp_config(cfg)
+ ts = __get_hwtimestamp_support(cfg)
+ tx = ts['tx']
+ for t in tx:
+ tscfg = orig_tscfg
+ tscfg['tx-types']['bits']['bit'] = [t]
+ res = __set_hwtimestamp_config(cfg, tscfg)
+ if res is None:
+ res = __get_hwtimestamp_config(cfg)
+ ksft_eq(res['tx-types']['bits']['bit'], [t])
+ __set_hwtimestamp_config(cfg, orig_tscfg)
+
+
+def test_hwtstamp_rx(cfg):
+ """
+ Test RX timestamp configuration.
+ The filter configuration is taken from the list of supported filters.
+ The driver should apply the config without error and report back proper state.
+ Some extension of the timestamping scope is allowed for PTP filters.
+ """
+
+ orig_tscfg = __get_hwtimestamp_config(cfg)
+ ts = __get_hwtimestamp_support(cfg)
+ rx = ts['rx']
+ for r in rx:
+ tscfg = orig_tscfg
+ tscfg['rx-filters']['bits']['bit'] = [r]
+ res = __set_hwtimestamp_config(cfg, tscfg)
+ if res is None:
+ res = __get_hwtimestamp_config(cfg)
+ if r['index'] == 0 or r['index'] == 1:
+ ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
+ else:
+ # the driver can fallback to some value which has higher coverage for timestamping
+ ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
+ __set_hwtimestamp_config(cfg, orig_tscfg)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
index ad192fef3117..2a51b60df8a1 100755
--- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -1,8 +1,13 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+"""
+Test driver resilience vs page pool allocation failures.
+"""
+
import errno
import time
+import math
import os
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import KsftSkipEx, KsftFailEx
@@ -13,7 +18,8 @@ from lib.py import cmd, tool, GenerateTraffic
def _write_fail_config(config):
for key, value in config.items():
- with open("/sys/kernel/debug/fail_function/" + key, "w") as fp:
+ path = "/sys/kernel/debug/fail_function/"
+ with open(path + key, "w", encoding='ascii') as fp:
fp.write(str(value) + "\n")
@@ -22,8 +28,7 @@ def _enable_pp_allocation_fail():
raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
- with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
- fp.write("page_pool_alloc_netmems\n")
+ _write_fail_config({"inject": "page_pool_alloc_netmems"})
_write_fail_config({
"verbose": 0,
@@ -38,8 +43,7 @@ def _disable_pp_allocation_fail():
return
if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
- with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
- fp.write("\n")
+ _write_fail_config({"inject": ""})
_write_fail_config({
"probability": 0,
@@ -48,6 +52,10 @@ def _disable_pp_allocation_fail():
def test_pp_alloc(cfg, netdevnl):
+ """
+ Configure page pool allocation fail injection while traffic is running.
+ """
+
def get_stats():
return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
@@ -55,7 +63,7 @@ def test_pp_alloc(cfg, netdevnl):
stat1 = get_stats()
time.sleep(1)
stat2 = get_stats()
- if stat2['rx-packets'] - stat1['rx-packets'] < 15000:
+ if stat2['rx-packets'] - stat1['rx-packets'] < 4000:
raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
@@ -82,11 +90,16 @@ def test_pp_alloc(cfg, netdevnl):
time.sleep(3)
s2 = get_stats()
- if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1:
+ seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail']
+ if seen_fails < 1:
raise KsftSkipEx("Allocation failures not increasing")
- if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100:
- raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'],
- "packets:", s2['rx-packets'] - s1['rx-packets'])
+ pkts = s2['rx-packets'] - s1['rx-packets']
+ # Expecting one failure per 512 buffers, 3.1x safety margin
+ want_fails = math.floor(pkts / 512 / 3.1)
+ if seen_fails < want_fails:
+ raise KsftSkipEx("Allocation increasing too slowly", seen_fails,
+ "packets:", pkts)
+ ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})")
# Basic failures are fine, try to wobble some settings to catch extra failures
check_traffic_flowing()
@@ -105,7 +118,7 @@ def test_pp_alloc(cfg, netdevnl):
else:
ksft_pr("ethtool -G change retval: did not succeed", new_g)
else:
- ksft_pr("ethtool -G change retval: did not try")
+ ksft_pr("ethtool -G change retval: did not try")
time.sleep(0.1)
check_traffic_flowing()
@@ -119,6 +132,7 @@ def test_pp_alloc(cfg, netdevnl):
def main() -> None:
+ """ Ksft boiler plate main """
netdevnl = NetdevFamily()
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py
new file mode 100755
index 000000000000..19847f3d4a00
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_api.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+API level tests for RSS (mostly Netlink vs IOCTL).
+"""
+
+import errno
+import glob
+import random
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import defer, ethtool, CmdExitFailure
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+
+def _require_2qs(cfg):
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+ return qcnt
+
+
+def _ethtool_create(cfg, act, opts):
+ output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+ # Output will be something like: "New RSS context is 1" or
+ # "Added rule with ID 7", we want the integer from the end
+ return int(output.split()[-1])
+
+
+def _ethtool_get_cfg(cfg, fl_type, to_nl=False):
+ descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ if to_nl:
+ converter = {
+ "IP SA": "ip-src",
+ "IP DA": "ip-dst",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3",
+ }
+
+ ret = set()
+ else:
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ }
+
+ ret = ""
+
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ if to_nl:
+ ret.add(converter[line])
+ else:
+ ret += converter[line]
+ return ret
+
+
+def test_rxfh_nl_set_fail(cfg):
+ """
+ Test error path of Netlink SET.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-name": "lo"},
+ "indir": None})
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [100000]})
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ ksft_is(ntf, None)
+
+
+def test_rxfh_nl_set_indir(cfg):
+ """
+ Test setting indirection table via Netlink.
+ """
+ qcnt = _require_2qs(cfg)
+
+ # Test some SETs with a value
+ reset = defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex}, "indir": None})
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+ # Test reset back to default
+ reset.exec()
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt)))
+
+
+def test_rxfh_nl_set_indir_ctx(cfg):
+ """
+ Test setting indirection table for a custom context via Netlink.
+ """
+ _require_2qs(cfg)
+
+ # Get setting for ctx 0, we'll make sure they don't get clobbered
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+
+ # Create context
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+
+def test_rxfh_indir_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1")
+ reset = defer(ethtool, f"-X {cfg.ifname} default")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after reset")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_is(ntf["msg"].get("context"), None)
+ ksft_ne(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_indir_ctx_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified on an additional RSS context.
+ """
+ _require_2qs(cfg)
+
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"].get("context"), ctx_id)
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_nl_set_key(cfg):
+ """
+ Test setting hashing key via Netlink.
+ """
+
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex},
+ "hkey": dflt["hkey"], "indir": None})
+
+ # Empty key should error out
+ with ksft_raises(NlError) as cm:
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": None})
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey')
+
+ # Set key to random
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+
+ # Set key to random and indir tbl to something at once
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1], "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+
+def test_rxfh_fields(cfg):
+ """
+ Test reading Rx Flow Hash over Netlink.
+ """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+ ethnl = EthtoolFamily()
+
+ cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ one = _ethtool_get_cfg(cfg, fl_type, to_nl=True)
+ ksft_eq(one, cfg_nl["flow-hash"][fl_type],
+ comment="Config for " + fl_type)
+
+
+def test_rxfh_fields_set(cfg):
+ """ Test configuring Rx Flow Hash over Netlink. """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+
+ # Collect current settings
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # symmetric hashing prevents some of the configs below
+ if cfg_old.get("input-xfrm"):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": {}})
+
+ for fl_type in flow_types:
+ cur = _ethtool_get_cfg(cfg, fl_type)
+ if cur == "sdfn":
+ change_nl = {"ip-src", "ip-dst"}
+ change_ic = "sd"
+ else:
+ change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+ change_ic = "sdfn"
+
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {fl_type: change_nl}
+ })
+ reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} "
+ f"rx-flow-hash {fl_type} {cur}")
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type],
+ comment=f"Config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(change_ic, cfg_ic,
+ comment=f"Config for {fl_type} over IOCTL")
+
+ reset.exec()
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type],
+ comment=f"Un-config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL")
+
+ # Try to set multiple at once, the defer was already installed at the start
+ change = {"ip-src"}
+ if change == cfg_old["flow-hash"]["tcp4"]:
+ change = {"ip-dst"}
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {x: change for x in flow_types}
+ })
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ ksft_eq(change, cfg_nl["flow-hash"][fl_type],
+ comment=f"multi-config for {fl_type} over Netlink")
+
+
+def test_rxfh_fields_set_xfrm(cfg):
+ """ Test changing Rx Flow Hash vs xfrm_input at once. """
+
+ def set_rss(cfg, xfrm, fh):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": xfrm, "flow-hash": fh})
+
+ # Install the reset handler
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # Make sure we start with input-xfrm off, and tcp4 config non-sym
+ set_rss(cfg, {}, {})
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+ # Setting sym and fixing tcp4 config not expected to pass right now
+ with ksft_raises(NlError):
+ set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}})
+ # One at a time should work, hopefully
+ set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}})
+ no_support = False
+ try:
+ set_rss(cfg, {"sym-xor"}, {})
+ except NlError:
+ try:
+ set_rss(cfg, {"sym-or-xor"}, {})
+ except NlError:
+ no_support = True
+ if no_support:
+ raise KsftSkipEx("no input-xfrm supported")
+ # Disabling two at once should not work either without kernel changes
+ with ksft_raises(NlError):
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+
+def test_rxfh_fields_ntf(cfg):
+ """ Test Rx Flow Hash notifications. """
+
+ cur = _ethtool_get_cfg(cfg, "tcp4")
+ if cur == "sdfn":
+ change = {"ip-src", "ip-dst"}
+ else:
+ change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {"tcp4": change}
+ })
+ reset = defer(ethtool,
+ f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after IOCTL change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after Netlink change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+
+def test_rss_ctx_add(cfg):
+ """ Test creating an additional RSS context via Netlink """
+
+ _require_2qs(cfg)
+
+ # Test basic creation
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_ne(ctx.get("context", 0), 0)
+ ksft_ne(set(ctx.get("indir", [0])), {0},
+ comment="Driver should init the indirection table")
+
+ # Try requesting the ID we just got allocated
+ with ksft_raises(NlError) as cm:
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx.get("context"),
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ d.exec()
+ ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY)
+
+ # Test creating with a specified RSS table, and context ID
+ ctx_id = ctx.get("context")
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx_id,
+ "indir": [1],
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_eq(ctx.get("context"), ctx_id)
+ ksft_eq(set(ctx.get("indir", [0])), {1})
+
+
+def test_rss_ctx_ntf(cfg):
+ """ Test notifications for creating additional RSS contexts """
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ # Create / delete via Netlink
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ cfg.ethnl.rss_delete_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx["context"],
+ })
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+ ksft_eq(ctx, ntf["msg"])
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+ # Create / deleve via IOCTL
+ ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new")
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete")
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index 319aaa004c40..ed7e405682f0 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -4,7 +4,8 @@
import datetime
import random
import re
-from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true
+from lib.py import ksft_run, ksft_pr, ksft_exit
+from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises
from lib.py import NetDrvEpEnv
from lib.py import EthtoolFamily, NetdevFamily
from lib.py import KsftSkipEx, KsftFailEx
@@ -58,6 +59,14 @@ def require_ntuple(cfg):
raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"]))
+def require_context_cnt(cfg, need_cnt):
+ # There's no good API to get the context count, so the tests
+ # which try to add a lot opportunisitically set the count they
+ # discovered. Careful with test ordering!
+ if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt:
+ raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}")
+
+
# Get Rx packet counts for all queues, as a simple list of integers
# if @prev is specified the prev counts will be subtracted
def _get_rx_cnts(cfg, prev=None):
@@ -109,7 +118,7 @@ def test_rss_key_indir(cfg):
qcnt = len(_get_rx_cnts(cfg))
if qcnt < 3:
- KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
+ raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
data = get_rss(cfg)
want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table']
@@ -169,8 +178,13 @@ def test_rss_key_indir(cfg):
cnts = _get_rx_cnts(cfg)
GenerateTraffic(cfg).wait_pkts_and_stop(20000)
cnts = _get_rx_cnts(cfg, prev=cnts)
- # First two queues get less traffic than all the rest
- ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts))
+ if qcnt > 4:
+ # First two queues get less traffic than all the rest
+ ksft_lt(sum(cnts[:2]), sum(cnts[2:]),
+ "traffic distributed: " + str(cnts))
+ else:
+ # When queue count is low make sure third queue got significant pkts
+ ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts))
def test_rss_queue_reconfigure(cfg, main_ctx=True):
@@ -326,19 +340,20 @@ def test_hitless_key_update(cfg):
data = get_rss(cfg)
key_len = len(data['rss-hash-key'])
- key = _rss_key_rand(key_len)
+ ethnl = EthtoolFamily()
+ key = random.randbytes(key_len)
tgen = GenerateTraffic(cfg)
try:
errors0, carrier0 = get_drop_err_sum(cfg)
t0 = datetime.datetime.now()
- ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key})
t1 = datetime.datetime.now()
errors1, carrier1 = get_drop_err_sum(cfg)
finally:
tgen.wait_pkts_and_stop(5000)
- ksft_lt((t1 - t0).total_seconds(), 0.2)
+ ksft_lt((t1 - t0).total_seconds(), 0.15)
ksft_eq(errors1 - errors1, 0)
ksft_eq(carrier1 - carrier0, 0)
@@ -383,7 +398,7 @@ def test_rss_context_dump(cfg):
# Sanity-check the results
for data in ctxs:
- ksft_ne(set(data['indir']), {0}, "indir table is all zero")
+ ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero")
ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
# More specific checks
@@ -456,6 +471,8 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
raise
ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
ctx_cnt = i
+ if cfg.context_cnt is None:
+ cfg.context_cnt = ctx_cnt
break
_rss_key_check(cfg, context=ctx_id)
@@ -511,8 +528,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4):
"""
require_ntuple(cfg)
-
- requested_ctx_cnt = ctx_cnt
+ require_context_cnt(cfg, 4)
# Try to allocate more queues when necessary
qcnt = len(_get_rx_cnts(cfg))
@@ -577,9 +593,6 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4):
remove_ctx(-1)
check_traffic()
- if requested_ctx_cnt != ctx_cnt:
- raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
-
def test_rss_context_overlap(cfg, other_ctx=0):
"""
@@ -588,6 +601,8 @@ def test_rss_context_overlap(cfg, other_ctx=0):
"""
require_ntuple(cfg)
+ if other_ctx:
+ require_context_cnt(cfg, 2)
queue_cnt = len(_get_rx_cnts(cfg))
if queue_cnt < 4:
@@ -649,6 +664,29 @@ def test_rss_context_overlap2(cfg):
test_rss_context_overlap(cfg, True)
+def test_flow_add_context_missing(cfg):
+ """
+ Test that we are not allowed to add a rule pointing to an RSS context
+ which was never created.
+ """
+
+ require_ntuple(cfg)
+
+ # Find a context which doesn't exist
+ for ctx_id in range(1, 100):
+ try:
+ get_rss(cfg, context=ctx_id)
+ except CmdExitFailure:
+ break
+
+ with ksft_raises(CmdExitFailure) as cm:
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ethtool(f"-N {cfg.ifname} delete {ntuple_id}")
+ if cm.exception:
+ ksft_in('Invalid argument', cm.exception.cmd.stderr)
+
+
def test_delete_rss_context_busy(cfg):
"""
Test that deletion returns -EBUSY when an rss context is being used
@@ -715,8 +753,65 @@ def test_rss_ntuple_addition(cfg):
'noise' : (0,) })
+def test_rss_default_context_rule(cfg):
+ """
+ Allocate a port, direct this port to context 0, then create a new RSS
+ context and steer all TCP traffic to it (context 1). Verify that:
+ * Traffic to the specific port continues to use queues of the main
+ context (0/1).
+ * Traffic to any other TCP port is redirected to the new context
+ (queues 2/3).
+ """
+
+ require_ntuple(cfg)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except Exception as exc:
+ raise KsftSkipEx("Not enough queues for the test") from exc
+
+ # Use queues 0 and 1 for the main context
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ # Create a new RSS context that uses queues 2 and 3
+ ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ # Generic low-priority rule: redirect all TCP traffic to the new context.
+ # Give it an explicit higher location number (lower priority).
+ flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1"
+ ethtool(f"-N {cfg.ifname} {flow_generic}")
+ defer(ethtool, f"-N {cfg.ifname} delete 1")
+
+ # Specific high-priority rule for a random port that should stay on context 0.
+ # Assign loc 0 so it is evaluated before the generic rule.
+ port_main = rand_port()
+ flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0"
+ ethtool(f"-N {cfg.ifname} {flow_main}")
+ defer(ethtool, f"-N {cfg.ifname} delete 0")
+
+ _ntuple_rule_check(cfg, 1, ctx_id)
+
+ # Verify that traffic matching the specific rule still goes to queues 0/1
+ _send_traffic_check(cfg, port_main, "context 0",
+ { 'target': (0, 1),
+ 'empty' : (2, 3) })
+
+ # And that traffic for any other port is steered to the new context
+ port_other = rand_port()
+ _send_traffic_check(cfg, port_other, f"context {ctx_id}",
+ { 'target': (2, 3),
+ 'noise' : (0, 1) })
+
+
def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.context_cnt = None
cfg.ethnl = EthtoolFamily()
cfg.netdevnl = NetdevFamily()
@@ -726,7 +821,9 @@ def main() -> None:
test_rss_context_dump, test_rss_context_queue_reconfigure,
test_rss_context_overlap, test_rss_context_overlap2,
test_rss_context_out_of_order, test_rss_context4_create_with_cfg,
- test_delete_rss_context_busy, test_rss_ntuple_addition],
+ test_flow_add_context_missing,
+ test_delete_rss_context_busy, test_rss_ntuple_addition,
+ test_rss_default_context_rule],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
new file mode 100755
index 000000000000..6fa95fe27c47
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests for RSS hashing on IPv6 Flow Label.
+"""
+
+import glob
+import os
+import socket
+from lib.py import CmdExitFailure
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \
+ ksft_not_in, ksft_raises, KsftSkipEx
+from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port
+from lib.py import NetDrvEpEnv
+
+
+def _check_system(cfg):
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+
+ for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt",
+ f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]:
+ try:
+ with open(f, 'r') as fp:
+ setting = fp.read().strip()
+ # CPU mask will be zeros and commas
+ if setting.replace("0", "").replace(",", ""):
+ raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}")
+ except FileNotFoundError:
+ pass
+
+ # 1 is the default, if someone changed it we probably shouldn"t mess with it
+ af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout
+ if af.strip() != "1":
+ raise KsftSkipEx("Remote does not have auto_flowlabels enabled")
+
+
+def _ethtool_get_cfg(cfg, fl_type):
+ descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ "IPv6 Flow Label": "l",
+ }
+
+ ret = ""
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ ret += converter[line]
+ return ret
+
+
+def _traffic(cfg, one_sock, one_cpu):
+ local_port = rand_port(socket.SOCK_DGRAM)
+ remote_port = rand_port(socket.SOCK_DGRAM)
+
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
+ sock.bind(("", local_port))
+ sock.connect((cfg.remote_addr_v["6"], 0))
+ if one_sock:
+ send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \
+ "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-"
+ else:
+ send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \
+ f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done"
+
+ cpus = set()
+ with bkg(send, shell=True, host=cfg.remote, exit_wait=True):
+ for _ in range(20):
+ fd_read_timeout(sock.fileno(), 1)
+ cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+ cpus.add(cpu)
+
+ if one_cpu:
+ ksft_eq(len(cpus), 1,
+ f"{one_sock=} - expected one CPU, got traffic on: {cpus=}")
+ else:
+ ksft_ge(len(cpus), 2,
+ f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}")
+
+
+def test_rss_flow_label(cfg):
+ """
+ Test hashing on IPv6 flow label. Send traffic over a single socket
+ and over multiple sockets. Depend on the remote having auto-label
+ enabled so that it randomizes the label per socket.
+ """
+
+ cfg.require_ipver("6")
+ cfg.require_cmd("socat", remote=True)
+ _check_system(cfg)
+
+ # Enable flow label hashing for UDP6
+ initial = _ethtool_get_cfg(cfg, "udp6")
+ no_lbl = initial.replace("l", "")
+ if "l" not in initial:
+ try:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}")
+ except CmdExitFailure as exc:
+ raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc
+
+ defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+ _traffic(cfg, one_sock=True, one_cpu=True)
+ _traffic(cfg, one_sock=False, one_cpu=False)
+
+ # Disable it, we should see no hashing (reset was already defer()ed)
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}")
+
+ _traffic(cfg, one_sock=False, one_cpu=True)
+
+
+def _check_v4_flow_types(cfg):
+ for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]:
+ try:
+ cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+ ksft_not_in("Flow Label", cur,
+ comment=f"{fl_type=} has Flow Label:" + cur)
+ except CmdExitFailure:
+ # Probably does not support this flow type
+ pass
+
+
+def test_rss_flow_label_6only(cfg):
+ """
+ Test interactions with IPv4 flow types. It should not be possible to set
+ IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also
+ not appear in the IPv4 "current config".
+ """
+
+ with ksft_raises(CmdExitFailure) as cm:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl")
+ ksft_in("Invalid argument", cm.exception.cmd.stderr)
+
+ _check_v4_flow_types(cfg)
+
+ # Try to enable Flow Labels and check again, in case it leaks thru
+ initial = _ethtool_get_cfg(cfg, "udp6")
+ changed = initial.replace("l", "") if "l" in initial else initial + "l"
+
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}")
+ restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+ _check_v4_flow_types(cfg)
+ restore.exec()
+ _check_v4_flow_types(cfg)
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([test_rss_flow_label,
+ test_rss_flow_label_6only],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
new file mode 100755
index 000000000000..72880e388478
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import multiprocessing
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout
+from lib.py import NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import rand_port
+
+
+def traffic(cfg, local_port, remote_port, ipver):
+ af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6
+ sock = socket.socket(af_inet, socket.SOCK_DGRAM)
+ sock.bind(("", local_port))
+ sock.connect((cfg.remote_addr_v[ipver], remote_port))
+ tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}"
+ cmd("echo a | socat - UDP" + tgt, host=cfg.remote)
+ fd_read_timeout(sock.fileno(), 5)
+ return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+
+
+def test_rss_input_xfrm(cfg, ipver):
+ """
+ Test symmetric input_xfrm.
+ If symmetric RSS hash is configured, send traffic twice, swapping the
+ src/dst UDP ports, and verify that the same queue is receiving the traffic
+ in both cases (IPs are constant).
+ """
+
+ if multiprocessing.cpu_count() < 2:
+ raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
+
+ cfg.require_cmd("socat", local=False, remote=True)
+
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
+ rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
+ input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {})))
+
+ # Check for symmetric xor/or-xor
+ if not input_xfrm:
+ raise KsftSkipEx("Symmetric RSS hash not requested")
+
+ cpus = set()
+ successful = 0
+ for _ in range(100):
+ try:
+ port1 = rand_port(socket.SOCK_DGRAM)
+ port2 = rand_port(socket.SOCK_DGRAM)
+ cpu1 = traffic(cfg, port1, port2, ipver)
+ cpu2 = traffic(cfg, port2, port1, ipver)
+ cpus.update([cpu1, cpu2])
+ ksft_eq(
+ cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured")
+
+ successful += 1
+ if successful == 10:
+ break
+ except:
+ continue
+ else:
+ raise KsftFailEx("Failed to run traffic")
+
+ ksft_ge(len(cpus), 2,
+ comment=f"Received traffic on less than two cpus {cpus = }")
+
+
+def test_rss_input_xfrm_ipv4(cfg):
+ cfg.require_ipver("4")
+ test_rss_input_xfrm(cfg, "4")
+
+
+def test_rss_input_xfrm_ipv6(cfg):
+ cfg.require_ipver("6")
+ test_rss_input_xfrm(cfg, "6")
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netdevnl = NetdevFamily()
+
+ ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c
index 9ba03164d73a..d23b3b0c20a3 100644
--- a/tools/testing/selftests/net/toeplitz.c
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c
@@ -52,7 +52,11 @@
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include <ynl.h>
+#include "ethtool-user.h"
+
+#include "kselftest.h"
+#include "../../../net/lib/ksft.h"
#define TOEPLITZ_KEY_MIN_LEN 40
#define TOEPLITZ_KEY_MAX_LEN 60
@@ -64,6 +68,7 @@
#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+#define RSS_MAX_INDIR (1 << 16)
#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
@@ -101,6 +106,8 @@ struct ring_state {
static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
static int rps_silo_to_cpu[RPS_MAX_CPUS];
static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static unsigned int rss_indir_tbl[RSS_MAX_INDIR];
+static unsigned int rss_indir_tbl_size;
static struct ring_state rings[RSS_MAX_CPUS];
static inline uint32_t toeplitz(const unsigned char *four_tuple,
@@ -129,7 +136,12 @@ static inline uint32_t toeplitz(const unsigned char *four_tuple,
/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
static void verify_rss(uint32_t rx_hash, int cpu)
{
- int queue = rx_hash % cfg_num_queues;
+ int queue;
+
+ if (rss_indir_tbl_size)
+ queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size];
+ else
+ queue = rx_hash % cfg_num_queues;
log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
if (rx_irq_cpus[queue] != cpu) {
@@ -482,6 +494,56 @@ static void parse_rps_bitmap(const char *arg)
rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
}
+static void read_rss_dev_info_ynl(void)
+{
+ struct ethtool_rss_get_req *req;
+ struct ethtool_rss_get_rsp *rsp;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, NULL);
+ if (!ys)
+ error(1, errno, "ynl_sock_create failed");
+
+ req = ethtool_rss_get_req_alloc();
+ if (!req)
+ error(1, errno, "ethtool_rss_get_req_alloc failed");
+
+ ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname);
+
+ rsp = ethtool_rss_get(ys, req);
+ if (!rsp)
+ error(1, ys->err.code, "YNL: %s", ys->err.msg);
+
+ if (!rsp->_len.hkey)
+ error(1, 0, "RSS key not available for %s", cfg_ifname);
+
+ if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN ||
+ rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN)
+ error(1, 0, "RSS key length %u out of bounds [%u, %u]",
+ rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN,
+ TOEPLITZ_KEY_MAX_LEN);
+
+ memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey);
+
+ if (rsp->_count.indir > RSS_MAX_INDIR)
+ error(1, 0, "RSS indirection table too large (%u > %u)",
+ rsp->_count.indir, RSS_MAX_INDIR);
+
+ /* If indir table not available we'll fallback to simple modulo math */
+ if (rsp->_count.indir) {
+ memcpy(rss_indir_tbl, rsp->indir,
+ rsp->_count.indir * sizeof(rss_indir_tbl[0]));
+ rss_indir_tbl_size = rsp->_count.indir;
+
+ log_verbose("RSS indirection table size: %u\n",
+ rss_indir_tbl_size);
+ }
+
+ ethtool_rss_get_rsp_free(rsp);
+ ethtool_rss_get_req_free(req);
+ ynl_sock_destroy(ys);
+}
+
static void parse_opts(int argc, char **argv)
{
static struct option long_options[] = {
@@ -550,7 +612,7 @@ static void parse_opts(int argc, char **argv)
}
if (!have_toeplitz)
- error(1, 0, "Must supply rss key ('-k')");
+ read_rss_dev_info_ynl();
num_cpus = get_nprocs();
if (num_cpus > RSS_MAX_CPUS)
@@ -576,6 +638,10 @@ int main(int argc, char **argv)
fd_sink = setup_sink();
setup_rings();
+
+ /* Signal to test framework that we're ready to receive */
+ ksft_ready();
+
process_rings();
cleanup_rings();
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py
new file mode 100755
index 000000000000..d2db5ee9e358
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Toeplitz Rx hashing test:
+ - rxhash (the hash value calculation itself);
+ - RSS mapping from rxhash to rx queue;
+ - RPS mapping from rxhash to cpu.
+"""
+
+import glob
+import os
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily
+from lib.py import cmd, bkg, rand_port, defer
+from lib.py import ksft_in
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx
+
+# "define" for the ID of the Toeplitz hash function
+ETH_RSS_HASH_TOP = 1
+
+
+def _check_rps_and_rfs_not_configured(cfg):
+ """Verify that RPS is not already configured."""
+
+ for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+ with open(rps_file, "r", encoding="utf-8") as fp:
+ val = fp.read().strip()
+ if set(val) - {"0", ","}:
+ raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}")
+
+ rfs_file = "/proc/sys/net/core/rps_sock_flow_entries"
+ with open(rfs_file, "r", encoding="utf-8") as fp:
+ val = fp.read().strip()
+ if val != "0":
+ raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}")
+
+
+def _get_cpu_for_irq(irq):
+ with open(f"/proc/irq/{irq}/smp_affinity_list", "r",
+ encoding="utf-8") as fp:
+ data = fp.read().strip()
+ if "," in data or "-" in data:
+ raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}")
+ return int(data)
+
+
+def _get_irq_cpus(cfg):
+ """
+ Read the list of IRQs for the device Rx queues.
+ """
+ queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True)
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+
+ # Remap into ID-based dicts
+ napis = {n["id"]: n for n in napis}
+ queues = {f"{q['type']}{q['id']}": q for q in queues}
+
+ cpus = []
+ for rx in range(9999):
+ name = f"rx{rx}"
+ if name not in queues:
+ break
+ cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"]))
+
+ return cpus
+
+
+def _get_unused_cpus(cfg, count=2):
+ """
+ Get CPUs that are not used by Rx queues.
+ Returns a list of at least 'count' CPU numbers.
+ """
+
+ # Get CPUs used by Rx queues
+ rx_cpus = set(_get_irq_cpus(cfg))
+
+ # Get total number of CPUs
+ num_cpus = os.cpu_count()
+
+ # Find unused CPUs
+ unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus]
+
+ if len(unused_cpus) < count:
+ raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}")
+
+ return unused_cpus[:count]
+
+
+def _configure_rps(cfg, rps_cpus):
+ """Configure RPS for all Rx queues."""
+
+ mask = 0
+ for cpu in rps_cpus:
+ mask |= (1 << cpu)
+ mask = hex(mask)[2:]
+
+ # Set RPS bitmap for all rx queues
+ for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+ with open(rps_file, "w", encoding="utf-8") as fp:
+ fp.write(mask)
+
+ return mask
+
+
+def _send_traffic(cfg, proto_flag, ipver, port):
+ """Send 20 packets of requested type."""
+
+ # Determine protocol and IP version for socat
+ if proto_flag == "-u":
+ proto = "UDP"
+ else:
+ proto = "TCP"
+
+ baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"]
+
+ # Run socat in a loop to send traffic periodically
+ # Use sh -c with a loop similar to toeplitz_client.sh
+ socat_cmd = f"""
+ for i in `seq 20`; do
+ echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port};
+ sleep 0.001;
+ done
+ """
+
+ cmd(socat_cmd, shell=True, host=cfg.remote)
+
+
+def _test_variants():
+ for grp in ["", "rss", "rps"]:
+ for l4 in ["tcp", "udp"]:
+ for l3 in ["4", "6"]:
+ name = f"{l4}_ipv{l3}"
+ if grp:
+ name = f"{grp}_{name}"
+ yield KsftNamedVariant(name, "-" + l4[0], l3, grp)
+
+
+@ksft_variants(_test_variants())
+def test(cfg, proto_flag, ipver, grp):
+ """Run a single toeplitz test."""
+
+ cfg.require_ipver(ipver)
+
+ # Check that rxhash is enabled
+ ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout)
+
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # Make sure NIC is configured to use Toeplitz hash, and no key xfrm.
+ if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hfunc": ETH_RSS_HASH_TOP,
+ "input-xfrm": {}})
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},
+ "hfunc": rss.get('hfunc'),
+ "input-xfrm": rss.get('input-xfrm', {})
+ })
+
+ port = rand_port(socket.SOCK_DGRAM)
+
+ toeplitz_path = cfg.test_dir / "toeplitz"
+ rx_cmd = [
+ str(toeplitz_path),
+ "-" + ipver,
+ proto_flag,
+ "-d", str(port),
+ "-i", cfg.ifname,
+ "-T", "4000",
+ "-s",
+ "-v"
+ ]
+
+ if grp:
+ _check_rps_and_rfs_not_configured(cfg)
+ if grp == "rss":
+ irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)])
+ rx_cmd += ["-C", irq_cpus]
+ ksft_pr(f"RSS using CPUs: {irq_cpus}")
+ elif grp == "rps":
+ # Get CPUs not used by Rx queues and configure them for RPS
+ rps_cpus = _get_unused_cpus(cfg, count=2)
+ rps_mask = _configure_rps(cfg, rps_cpus)
+ defer(_configure_rps, cfg, [])
+ rx_cmd += ["-r", rps_mask]
+ ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}")
+
+ # Run rx in background, it will exit once it has seen enough packets
+ with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc:
+ while rx_proc.proc.poll() is None:
+ _send_traffic(cfg, proto_flag, ipver, port)
+
+ # Check rx result
+ ksft_pr("Receiver output:")
+ ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+ if rx_proc.stderr:
+ ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+
+def main() -> None:
+ """Ksft boilerplate main."""
+
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+ ksft_run(cases=[test], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
new file mode 100755
index 000000000000..0998e68ebaf0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_eq, ksft_ge, ksft_lt
+from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen
+
+
+def sock_wait_drain(sock, max_wait=1000):
+ """Wait for all pending write data on the socket to get ACKed."""
+ for _ in range(max_wait):
+ one = b'\0' * 4
+ outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one)
+ outq = struct.unpack("I", outq)[0]
+ if outq == 0:
+ break
+ time.sleep(0.01)
+ ksft_eq(outq, 0)
+
+
+def tcp_sock_get_retrans(sock):
+ """Get the number of retransmissions for the TCP socket."""
+ info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512)
+ return struct.unpack("I", info[100:104])[0]
+
+
+def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
+ cfg.require_cmd("socat", local=False, remote=True)
+
+ port = rand_port()
+ listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
+ wait_port_listen(port, host=cfg.remote)
+
+ if ipver == "4":
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((remote_v4, port))
+ else:
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ sock.connect((remote_v6, port))
+
+ # Small send to make sure the connection is working.
+ sock.send("ping".encode())
+ sock_wait_drain(sock)
+
+ # Send 4MB of data, record the LSO packet count.
+ qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ buf = b"0" * 1024 * 1024 * 4
+ sock.send(buf)
+ sock_wait_drain(sock)
+ qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ # Check that at least 90% of the data was sent as LSO packets.
+ # System noise may cause false negatives. Also header overheads
+ # will add up to 5% of extra packes... The check is best effort.
+ total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"]
+ total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"]
+
+ # Make sure we have order of magnitude more LSO packets than
+ # retransmits, in case TCP retransmitted all the LSO packets.
+ ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4)
+ sock.close()
+
+ if should_lso:
+ if cfg.have_stat_super_count:
+ ksft_ge(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ total_lso_super,
+ comment="Number of LSO super-packets with LSO enabled")
+ if cfg.have_stat_wire_count:
+ ksft_ge(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ total_lso_wire,
+ comment="Number of LSO wire-packets with LSO enabled")
+ else:
+ if cfg.have_stat_super_count:
+ ksft_lt(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ 15, comment="Number of LSO super-packets with LSO disabled")
+ if cfg.have_stat_wire_count:
+ ksft_lt(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ 500, comment="Number of LSO wire-packets with LSO disabled")
+
+
+def build_tunnel(cfg, outer_ipver, tun_info):
+ local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1"
+ local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1"
+ remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2"
+ remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2"
+
+ local_addr = cfg.addr_v[outer_ipver]
+ remote_addr = cfg.remote_addr_v[outer_ipver]
+
+ tun_type = tun_info[0]
+ tun_arg = tun_info[1]
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
+ defer(ip, f"link del {tun_type}-ksft")
+ ip(f"link set dev {tun_type}-ksft up")
+ ip(f"addr add {local_v4}/24 dev {tun_type}-ksft")
+ ip(f"addr add {local_v6}/64 dev {tun_type}-ksft")
+
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
+ host=cfg.remote)
+ defer(ip, f"link del {tun_type}-ksft", host=cfg.remote)
+ ip(f"link set dev {tun_type}-ksft up", host=cfg.remote)
+ ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote)
+ ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote)
+
+ return remote_v4, remote_v6
+
+
+def restore_wanted_features(cfg):
+ features_cmd = ""
+ for feature in cfg.hw_features:
+ setting = "on" if feature in cfg.wanted_features else "off"
+ features_cmd += f" {feature} {setting}"
+ try:
+ ethtool(f"-K {cfg.ifname} {features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure restoring wanted features: {e}")
+
+
+def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
+ """Construct specific tests from the common template."""
+ def f(cfg):
+ cfg.require_ipver(outer_ipver)
+ defer(restore_wanted_features, cfg)
+
+ if not cfg.have_stat_super_count and \
+ not cfg.have_stat_wire_count:
+ raise KsftSkipEx(f"Device does not support LSO queue stats")
+
+ if feature not in cfg.hw_features:
+ raise KsftSkipEx(f"Device does not support {feature}")
+
+ ipver = outer_ipver
+ if tun:
+ remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
+ ipver = inner_ipver
+ else:
+ remote_v4 = cfg.remote_addr_v["4"]
+ remote_v6 = cfg.remote_addr_v["6"]
+
+ # First test without the feature enabled.
+ ethtool(f"-K {cfg.ifname} {feature} off")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
+
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+ if feature in cfg.partial_features:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ if ipver == "4":
+ ksft_pr("Testing with mangleid enabled")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
+
+ # Full feature enabled.
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
+
+ f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
+ return f
+
+
+def query_nic_features(cfg) -> None:
+ """Query and cache the NIC features."""
+ cfg.have_stat_super_count = False
+ cfg.have_stat_wire_count = False
+
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+
+ cfg.wanted_features = set()
+ for f in features["wanted"]["bits"]["bit"]:
+ cfg.wanted_features.add(f["name"])
+
+ cfg.hw_features = set()
+ hw_all_features_cmd = ""
+ for f in features["hw"]["bits"]["bit"]:
+ if f.get("value", False):
+ feature = f["name"]
+ cfg.hw_features.add(feature)
+ hw_all_features_cmd += f" {feature} on"
+ try:
+ ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure enabling all hw features: {e}")
+ ksft_pr("partial gso feature detection may be impacted")
+
+ # Check which features are supported via GSO partial
+ cfg.partial_features = set()
+ if 'tx-gso-partial' in cfg.hw_features:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+
+ no_partial = set()
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ no_partial.add(f["name"])
+ cfg.partial_features = cfg.hw_features - no_partial
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+
+ restore_wanted_features(cfg)
+
+ stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
+ if stats:
+ if 'tx-hw-gso-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO super-packets")
+ cfg.have_stat_super_count = True
+ if 'tx-hw-gso-wire-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO wire-packets")
+ cfg.have_stat_wire_count = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ query_nic_features(cfg)
+
+ test_info = (
+ # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))),
+ ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))),
+ ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))),
+ )
+
+ cases = []
+ for outer_ipver in ["4", "6"]:
+ for info in test_info:
+ # Skip if test which only works for a specific IP version
+ if info[1] and outer_ipver != info[1]:
+ continue
+
+ if info[3]:
+ cases += [
+ test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver)
+ for inner_ipver in info[3][2]
+ ]
+ else:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
new file mode 100755
index 000000000000..d19d1d518208
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# This is intended to be run on a virtio-net guest interface.
+# The test binds the XDP socket to the interface without setting
+# the fill ring to trigger delayed refill_work. This helps to
+# make it easier to reproduce the deadlock when XDP program,
+# XDP socket bind/unbind, rx ring resize race with refill_work on
+# the buggy kernel.
+#
+# The Qemu command to setup virtio-net
+# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no
+# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on
+
+from lib.py import ksft_exit, ksft_run
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetDrvEnv
+from lib.py import bkg, ip, cmd, ethtool
+import time
+
+def _get_rx_ring_entries(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True)
+ return output[0]["rx"]
+
+def setup_xsk(cfg, xdp_queue_id = 0) -> bkg:
+ # Probe for support
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+ if xdp.ret == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.ret > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ try:
+ return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \
+ '{xdp_queue_id} -z', ksft_wait=3)
+ except:
+ raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \
+ 'Please consider adding iommu_platform=on ' \
+ 'when setting up virtio-net-pci')
+
+def check_xdp_bind(cfg):
+ with setup_xsk(cfg):
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+def check_rx_resize(cfg):
+ with setup_xsk(cfg):
+ rx_ring = _get_rx_ring_entries(cfg)
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2))
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
+
+def main():
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([check_xdp_bind, check_rx_resize],
+ args=(cfg, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
index 401e70f7f136..8b75faa9af6d 100644
--- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -1,5 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
+"""
+Driver test environment.
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
import sys
from pathlib import Path
@@ -7,13 +15,41 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
- from net.lib.py import *
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily, PSPFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+ fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup, ksft_variants, KsftNamedVariant
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
+
+ __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+ "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+ "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+ "CmdExitFailure",
+ "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+ "fd_read_timeout", "ip", "rand_port",
+ "wait_port_listen", "wait_file",
+ "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+ "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+ "ksft_setup", "ksft_variants", "KsftNamedVariant",
+ "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+ "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+ "ksft_not_none", "ksft_not_none"]
+
+ from .env import NetDrvEnv, NetDrvEpEnv
+ from .load import GenerateTraffic, Iperf3Runner
+ from .remote import Remote
+
+ __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+ "Iperf3Runner"]
except ModuleNotFoundError as e:
- ksft_pr("Failed importing `net` library from kernel sources")
- ksft_pr(str(e))
- ktap_result(True, comment="SKIP")
+ print("Failed importing `net` library from kernel sources")
+ print(str(e))
sys.exit(4)
-
-from .env import *
-from .load import *
-from .remote import Remote
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 987e452d3a45..8b644fd84ff2 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -4,55 +4,60 @@ import os
import time
from pathlib import Path
from lib.py import KsftSkipEx, KsftXfailEx
-from lib.py import ksft_setup
+from lib.py import ksft_setup, wait_file
from lib.py import cmd, ethtool, ip, CmdExitFailure
from lib.py import NetNS, NetdevSimDev
from .remote import Remote
-def _load_env_file(src_path):
- env = os.environ.copy()
-
- src_dir = Path(src_path).parent.resolve()
- if not (src_dir / "net.config").exists():
- return ksft_setup(env)
-
- with open((src_dir / "net.config").as_posix(), 'r') as fp:
- for line in fp.readlines():
- full_file = line
- # Strip comments
- pos = line.find("#")
- if pos >= 0:
- line = line[:pos]
- line = line.strip()
- if not line:
- continue
- pair = line.split('=', maxsplit=1)
- if len(pair) != 2:
- raise Exception("Can't parse configuration line:", full_file)
- env[pair[0]] = pair[1]
- return ksft_setup(env)
-
-
-class NetDrvEnv:
+class NetDrvEnvBase:
"""
- Class for a single NIC / host env, with no remote end
- """
- def __init__(self, src_path, **kwargs):
- self._ns = None
+ Base class for a NIC / host environments
- self.env = _load_env_file(src_path)
+ Attributes:
+ test_dir: Path to the source directory of the test
+ net_lib_dir: Path to the net/lib directory
+ """
+ def __init__(self, src_path):
+ self.src_path = Path(src_path)
+ self.test_dir = self.src_path.parent.resolve()
+ self.net_lib_dir = (Path(__file__).parent / "../../../../net/lib").resolve()
+
+ self.env = self._load_env_file()
+
+ # Following attrs must be set be inheriting classes
+ self.dev = None
+
+ def _load_env_file(self):
+ env = os.environ.copy()
+
+ src_dir = Path(self.src_path).parent.resolve()
+ if not (src_dir / "net.config").exists():
+ return ksft_setup(env)
+
+ with open((src_dir / "net.config").as_posix(), 'r') as fp:
+ for line in fp.readlines():
+ full_file = line
+ # Strip comments
+ pos = line.find("#")
+ if pos >= 0:
+ line = line[:pos]
+ line = line.strip()
+ if not line:
+ continue
+ pair = line.split('=', maxsplit=1)
+ if len(pair) != 2:
+ raise Exception("Can't parse configuration line:", full_file)
+ env[pair[0]] = pair[1]
+ return ksft_setup(env)
- if 'NETIF' in self.env:
- self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
- else:
- self._ns = NetdevSimDev(**kwargs)
- self.dev = self._ns.nsims[0].dev
- self.ifname = self.dev['ifname']
- self.ifindex = self.dev['ifindex']
+ def __del__(self):
+ pass
def __enter__(self):
ip(f"link set dev {self.dev['ifname']} up")
+ wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier",
+ lambda x: x.strip() == "1")
return self
@@ -62,13 +67,37 @@ class NetDrvEnv:
"""
self.__del__()
+
+class NetDrvEnv(NetDrvEnvBase):
+ """
+ Class for a single NIC / host env, with no remote end
+ """
+ def __init__(self, src_path, nsim_test=None, **kwargs):
+ super().__init__(src_path)
+
+ self._ns = None
+
+ if 'NETIF' in self.env:
+ if nsim_test is True:
+ raise KsftXfailEx("Test only works on netdevsim")
+
+ self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
+ else:
+ if nsim_test is False:
+ raise KsftXfailEx("Test does not work on netdevsim")
+
+ self._ns = NetdevSimDev(**kwargs)
+ self.dev = self._ns.nsims[0].dev
+ self.ifname = self.dev['ifname']
+ self.ifindex = self.dev['ifindex']
+
def __del__(self):
if self._ns:
self._ns.remove()
self._ns = None
-class NetDrvEpEnv:
+class NetDrvEpEnv(NetDrvEnvBase):
"""
Class for an environment with a local device and "remote endpoint"
which can be used to send traffic in.
@@ -82,8 +111,7 @@ class NetDrvEpEnv:
nsim_v6_pfx = "2001:db8::"
def __init__(self, src_path, nsim_test=None):
-
- self.env = _load_env_file(src_path)
+ super().__init__(src_path)
self._stats_settle_time = None
@@ -94,17 +122,20 @@ class NetDrvEpEnv:
self._ns = None
self._ns_peer = None
+ self.addr_v = { "4": None, "6": None }
+ self.remote_addr_v = { "4": None, "6": None }
+
if "NETIF" in self.env:
if nsim_test is True:
raise KsftXfailEx("Test only works on netdevsim")
self._check_env()
- self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
+ self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
- self.v4 = self.env.get("LOCAL_V4")
- self.v6 = self.env.get("LOCAL_V6")
- self.remote_v4 = self.env.get("REMOTE_V4")
- self.remote_v6 = self.env.get("REMOTE_V6")
+ self.addr_v["4"] = self.env.get("LOCAL_V4")
+ self.addr_v["6"] = self.env.get("LOCAL_V6")
+ self.remote_addr_v["4"] = self.env.get("REMOTE_V4")
+ self.remote_addr_v["6"] = self.env.get("REMOTE_V6")
kind = self.env["REMOTE_TYPE"]
args = self.env["REMOTE_ARGS"]
else:
@@ -115,26 +146,31 @@ class NetDrvEpEnv:
self.dev = self._ns.nsims[0].dev
- self.v4 = self.nsim_v4_pfx + "1"
- self.v6 = self.nsim_v6_pfx + "1"
- self.remote_v4 = self.nsim_v4_pfx + "2"
- self.remote_v6 = self.nsim_v6_pfx + "2"
+ self.addr_v["4"] = self.nsim_v4_pfx + "1"
+ self.addr_v["6"] = self.nsim_v6_pfx + "1"
+ self.remote_addr_v["4"] = self.nsim_v4_pfx + "2"
+ self.remote_addr_v["6"] = self.nsim_v6_pfx + "2"
kind = "netns"
args = self._netns.name
self.remote = Remote(kind, args, src_path)
- self.addr = self.v6 if self.v6 else self.v4
- self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4
+ self.addr_ipver = "6" if self.addr_v["6"] else "4"
+ self.addr = self.addr_v[self.addr_ipver]
+ self.remote_addr = self.remote_addr_v[self.addr_ipver]
- self.addr_ipver = "6" if self.v6 else "4"
# Bracketed addresses, some commands need IPv6 to be inside []
- self.baddr = f"[{self.v6}]" if self.v6 else self.v4
- self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4
+ self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"]
+ self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"]
self.ifname = self.dev['ifname']
self.ifindex = self.dev['ifindex']
+ # resolve remote interface name
+ self.remote_ifname = self.resolve_remote_ifc()
+ self.remote_dev = ip("-d link show dev " + self.remote_ifname,
+ host=self.remote, json=True)[0]
+
self._required_cmd = {}
def create_local(self):
@@ -181,14 +217,17 @@ class NetDrvEpEnv:
raise Exception("Invalid environment, missing configuration:", missing,
"Please see tools/testing/selftests/drivers/net/README.rst")
- def __enter__(self):
- return self
-
- def __exit__(self, ex_type, ex_value, ex_tb):
- """
- __exit__ gets called at the end of a "with" block.
- """
- self.__del__()
+ def resolve_remote_ifc(self):
+ v4 = v6 = None
+ if self.remote_addr_v["4"]:
+ v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote)
+ if self.remote_addr_v["6"]:
+ v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote)
+ if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]:
+ raise Exception("Can't resolve remote interface name, v4 and v6 don't match")
+ if (v4 and len(v4) > 1) or (v6 and len(v6) > 1):
+ raise Exception("Can't resolve remote interface name, multiple interfaces match")
+ return v6[0]["ifname"] if v6 else v4[0]["ifname"]
def __del__(self):
if self._ns:
@@ -204,13 +243,13 @@ class NetDrvEpEnv:
del self.remote
self.remote = None
- def require_v4(self):
- if not self.v4 or not self.remote_v4:
- raise KsftSkipEx("Test requires IPv4 connectivity")
+ def require_ipver(self, ipver):
+ if not self.addr_v[ipver] or not self.remote_addr_v[ipver]:
+ raise KsftSkipEx(f"Test requires IPv{ipver} connectivity")
- def require_v6(self):
- if not self.v6 or not self.remote_v6:
- raise KsftSkipEx("Test requires IPv6 connectivity")
+ def require_nsim(self):
+ if self._ns is None:
+ raise KsftXfailEx("Test only works on netdevsim")
def _require_cmd(self, comm, key, host=None):
cached = self._required_cmd.get(comm, {})
@@ -225,7 +264,7 @@ class NetDrvEpEnv:
if not self._require_cmd(comm, "local"):
raise KsftSkipEx("Test requires command: " + comm)
if remote:
- if not self._require_cmd(comm, "remote"):
+ if not self._require_cmd(comm, "remote", host=self.remote):
raise KsftSkipEx("Test requires (remote) command: " + comm)
def wait_hw_stats_settle(self):
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index da5af2c680fa..f181fa2d38fc 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -1,46 +1,96 @@
# SPDX-License-Identifier: GPL-2.0
+import re
import time
+import json
-from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg
+from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
-class GenerateTraffic:
- def __init__(self, env, port=None):
- env.require_cmd("iperf3", remote=True)
+class Iperf3Runner:
+ """
+ Sets up and runs iperf3 traffic.
+ """
+ def __init__(self, env, port=None, server_ip=None, client_ip=None):
+ env.require_cmd("iperf3", local=True, remote=True)
self.env = env
+ self.port = rand_port() if port is None else port
+ self.server_ip = server_ip
+ self.client_ip = client_ip
+
+ def _build_server(self):
+ cmdline = f"iperf3 -s -1 -p {self.port}"
+ if self.server_ip:
+ cmdline += f" -B {self.server_ip}"
+ return cmdline
+
+ def _build_client(self, streams, duration, reverse):
+ host = self.env.addr if self.server_ip is None else self.server_ip
+ cmdline = f"iperf3 -c {host} -p {self.port} -P {streams} -t {duration} -J"
+ if self.client_ip:
+ cmdline += f" -B {self.client_ip}"
+ if reverse:
+ cmdline += " --reverse"
+ return cmdline
- if port is None:
- port = rand_port()
- self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True)
- wait_port_listen(port)
+ def start_server(self):
+ """
+ Starts an iperf3 server with optional bind IP.
+ """
+ cmdline = self._build_server()
+ proc = cmd(cmdline, background=True)
+ wait_port_listen(self.port)
time.sleep(0.1)
- self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400",
- background=True, host=env.remote)
+ return proc
+
+ def start_client(self, background=False, streams=1, duration=10, reverse=False):
+ """
+ Starts the iperf3 client with the configured options.
+ """
+ cmdline = self._build_client(streams, duration, reverse)
+ return cmd(cmdline, background=background, host=self.env.remote)
+
+ def measure_bandwidth(self, reverse=False):
+ """
+ Runs an iperf3 measurement and returns the average bandwidth (Gbps).
+ Discards the first and last few reporting intervals and uses only the
+ middle part of the run where throughput is typically stable.
+ """
+ self.start_server()
+ result = self.start_client(duration=10, reverse=reverse)
+
+ if result.ret != 0:
+ raise RuntimeError("iperf3 failed to run successfully")
+ try:
+ out = json.loads(result.stdout)
+ except json.JSONDecodeError as exc:
+ raise ValueError("Failed to parse iperf3 JSON output") from exc
+
+ intervals = out.get("intervals", [])
+ samples = [i["sum"]["bits_per_second"] / 1e9 for i in intervals]
+ if len(samples) < 10:
+ raise ValueError(f"iperf3 returned too few intervals: {len(samples)}")
+ # Discard potentially unstable first and last 3 seconds.
+ stable = samples[3:-3]
+
+ avg = sum(stable) / len(stable)
+
+ return avg
+
+
+class GenerateTraffic:
+ def __init__(self, env, port=None):
+ self.env = env
+ self.runner = Iperf3Runner(env, port)
+
+ self._iperf_server = self.runner.start_server()
+ self._iperf_client = self.runner.start_client(background=True, streams=16, duration=86400)
# Wait for traffic to ramp up
if not self._wait_pkts(pps=1000):
self.stop(verbose=True)
raise Exception("iperf3 traffic did not ramp up")
- def run_remote_test(self, env: object, port=None, command=None):
- if port is None:
- port = rand_port()
- try:
- server_cmd = f"iperf3 -s 1 -p {port} --one-off"
- with bkg(server_cmd, host=env.remote):
- #iperf3 opens TCP connection as default in server
- #-u to be specified in client command for UDP
- wait_port_listen(port, host=env.remote)
- except Exception as e:
- raise Exception(f"Unexpected error occurred while running server command: {e}")
- try:
- client_cmd = f"iperf3 -c {env.remote_addr} -p {port} {command}"
- proc = cmd(client_cmd)
- return proc
- except Exception as e:
- raise Exception(f"Unexpected error occurred while running client command: {e}")
-
def _wait_pkts(self, pkt_cnt=None, pps=None):
"""
Wait until we've seen pkt_cnt or until traffic ramps up to pps.
@@ -74,3 +124,16 @@ class GenerateTraffic:
ksft_pr(">> Server:")
ksft_pr(self._iperf_server.stdout)
ksft_pr(self._iperf_server.stderr)
+ self._wait_client_stopped()
+
+ def _wait_client_stopped(self, sleep=0.005, timeout=5):
+ end = time.monotonic() + timeout
+
+ live_port_pattern = re.compile(fr":{self.runner.port:04X} 0[^6] ")
+
+ while time.monotonic() < end:
+ data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout
+ if not live_port_pattern.search(data):
+ return
+ time.sleep(sleep)
+ raise Exception(f"Waiting for client to stop timed out after {timeout}s")
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 3acaba41ac7b..ae8abff4be40 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -11,9 +11,13 @@ set -euo pipefail
LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
SRCIF="" # to be populated later
-SRCIP=192.0.2.1
+SRCIP="" # to be populated later
+SRCIP4="192.0.2.1"
+SRCIP6="fc00::1"
DSTIF="" # to be populated later
-DSTIP=192.0.2.2
+DSTIP="" # to be populated later
+DSTIP4="192.0.2.2"
+DSTIP6="fc00::2"
PORT="6666"
MSG="netconsole selftest"
@@ -26,18 +30,23 @@ NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
# NAMESPACE will be populated by setup_ns with a random value
NAMESPACE=""
-# IDs for netdevsim
+# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test
+# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the
+# same time.
NSIM_DEV_1_ID=$((256 + RANDOM % 256))
NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_BOND_TX_1=$((768 + RANDOM % 256))
+NSIM_BOND_TX_2=$((1024 + RANDOM % 256))
+NSIM_BOND_RX_1=$((1280 + RANDOM % 256))
+NSIM_BOND_RX_2=$((1536 + RANDOM % 256))
NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
+NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
# Used to create and delete namespaces
source "${LIBDIR}"/../../../../net/lib.sh
-source "${LIBDIR}"/../../../../net/net_helper.sh
# Create netdevsim interfaces
create_ifaces() {
-
echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
udevadm settle 2> /dev/null || true
@@ -81,7 +90,23 @@ function configure_ip() {
ip link set "${SRCIF}" up
}
+function select_ipv4_or_ipv6()
+{
+ local VERSION=${1}
+
+ if [[ "$VERSION" == "ipv6" ]]
+ then
+ DSTIP="${DSTIP6}"
+ SRCIP="${SRCIP6}"
+ else
+ DSTIP="${DSTIP4}"
+ SRCIP="${SRCIP4}"
+ fi
+}
+
function set_network() {
+ local IP_VERSION=${1:-"ipv4"}
+
# setup_ns function is coming from lib.sh
setup_ns NAMESPACE
@@ -92,33 +117,77 @@ function set_network() {
# Link both interfaces back to back
link_ifaces
+ select_ipv4_or_ipv6 "${IP_VERSION}"
configure_ip
}
-function create_dynamic_target() {
+function _create_dynamic_target() {
+ local FORMAT="${1:?FORMAT parameter required}"
+ local NCPATH="${2:?NCPATH parameter required}"
+
DSTMAC=$(ip netns exec "${NAMESPACE}" \
ip link show "${DSTIF}" | awk '/ether/ {print $2}')
# Create a dynamic target
- mkdir "${NETCONS_PATH}"
+ mkdir "${NCPATH}"
- echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
- echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
- echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
- echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+ echo "${DSTIP}" > "${NCPATH}"/remote_ip
+ echo "${SRCIP}" > "${NCPATH}"/local_ip
+ echo "${DSTMAC}" > "${NCPATH}"/remote_mac
+ echo "${SRCIF}" > "${NCPATH}"/dev_name
- echo 1 > "${NETCONS_PATH}"/enabled
+ if [ "${FORMAT}" == "basic" ]
+ then
+ # Basic target does not support release
+ echo 0 > "${NCPATH}"/release
+ echo 0 > "${NCPATH}"/extended
+ elif [ "${FORMAT}" == "extended" ]
+ then
+ echo 1 > "${NCPATH}"/extended
+ fi
}
-function cleanup() {
- local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
+function create_dynamic_target() {
+ local FORMAT=${1:-"extended"}
+ local NCPATH=${2:-"$NETCONS_PATH"}
+ _create_dynamic_target "${FORMAT}" "${NCPATH}"
- # delete netconsole dynamic reconfiguration
+ echo 1 > "${NCPATH}"/enabled
+
+ # This will make sure that the kernel was able to
+ # load the netconsole driver configuration. The console message
+ # gets more organized/sequential as well.
+ sleep 1
+}
+
+# Generate the command line argument for netconsole following:
+# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+function create_cmdline_str() {
+ local BINDMODE=${1:-"ifname"}
+ if [ "${BINDMODE}" == "ifname" ]
+ then
+ SRCDEV=${SRCIF}
+ else
+ SRCDEV=$(mac_get "${SRCIF}")
+ fi
+
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+ SRCPORT="1514"
+ TGTPORT="6666"
+
+ echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\""
+}
+
+# Do not append the release to the header of the message
+function disable_release_append() {
echo 0 > "${NETCONS_PATH}"/enabled
- # Remove all the keys that got created during the selftest
- find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
- # Remove the configfs entry
- rmdir "${NETCONS_PATH}"
+ echo 0 > "${NETCONS_PATH}"/release
+ echo 1 > "${NETCONS_PATH}"/enabled
+}
+
+function do_cleanup() {
+ local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
# Delete netdevsim devices
echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
@@ -131,6 +200,29 @@ function cleanup() {
echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
}
+function cleanup_netcons() {
+ # delete netconsole dynamic reconfiguration
+ # do not fail if the target is already disabled
+ if [[ ! -d "${NETCONS_PATH}" ]]
+ then
+ # in some cases this is called before netcons path is created
+ return
+ fi
+ if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]]
+ then
+ echo 0 > "${NETCONS_PATH}"/enabled || true
+ fi
+ # Remove all the keys that got created during the selftest
+ find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+ # Remove the configfs entry
+ rmdir "${NETCONS_PATH}"
+}
+
+function cleanup() {
+ cleanup_netcons
+ do_cleanup
+}
+
function set_user_data() {
if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
then
@@ -146,18 +238,24 @@ function set_user_data() {
function listen_port_and_save_to() {
local OUTPUT=${1}
+ local IPVERSION=${2:-"ipv4"}
+
+ if [ "${IPVERSION}" == "ipv4" ]
+ then
+ SOCAT_MODE="UDP-LISTEN"
+ else
+ SOCAT_MODE="UDP6-LISTEN"
+ fi
+
# Just wait for 2 seconds
timeout 2 ip netns exec "${NAMESPACE}" \
- socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+ socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null
}
-function validate_result() {
+# Only validate that the message arrived properly
+function validate_msg() {
local TMPFILENAME="$1"
- # TMPFILENAME will contain something like:
- # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
- # key=value
-
# Check if the file exists
if [ ! -f "$TMPFILENAME" ]; then
echo "FAIL: File was not generated." >&2
@@ -169,17 +267,32 @@ function validate_result() {
cat "${TMPFILENAME}" >&2
exit "${ksft_fail}"
fi
+}
- if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
- echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
- cat "${TMPFILENAME}" >&2
- exit "${ksft_fail}"
+# Validate the message and userdata
+function validate_result() {
+ local TMPFILENAME="$1"
+
+ # TMPFILENAME will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # key=value
+
+ validate_msg "${TMPFILENAME}"
+
+ # userdata is not supported on basic format target,
+ # thus, do not validate it.
+ if [ "${FORMAT}" != "basic" ];
+ then
+ if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
+ echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
fi
# Delete the file once it is validated, otherwise keep it
# for debugging purposes
rm "${TMPFILENAME}"
- exit "${ksft_pass}"
}
function check_for_dependencies() {
@@ -203,6 +316,11 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
+ if [ ! -f /proc/net/if_inet6 ]; then
+ echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
exit "${ksft_skip}"
@@ -218,8 +336,84 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
- if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
- echo "SKIP: IPs already in use. Skipping it" >&2
+ REGEXP4="inet.*(${SRCIP4}|${DSTIP4})"
+ REGEXP6="inet.*(${SRCIP6}|${DSTIP6})"
+ if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then
+ echo "SKIP: IPv4s already in use. Skipping it" >&2
exit "${ksft_skip}"
fi
+
+ if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then
+ echo "SKIP: IPv6s already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+function check_for_taskset() {
+ if ! which taskset > /dev/null ; then
+ echo "SKIP: taskset(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# This is necessary if running multiple tests in a row
+function pkill_socat() {
+ PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}"
+ # socat runs under timeout(1), kill it if it is still alive
+ # do not fail if socat doesn't exist anymore
+ set +e
+ pkill -f "${PROCESS_NAME4}"
+ pkill -f "${PROCESS_NAME6}"
+ set -e
+}
+
+# Check if netconsole was compiled as a module, otherwise exit
+function check_netconsole_module() {
+ if modinfo netconsole | grep filename: | grep -q builtin
+ then
+ echo "SKIP: netconsole should be compiled as a module" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# A wrapper to translate protocol version to udp version
+function wait_for_port() {
+ local NAMESPACE=${1}
+ local PORT=${2}
+ IP_VERSION=${3}
+
+ if [ "${IP_VERSION}" == "ipv6" ]
+ then
+ PROTOCOL="udp6"
+ else
+ PROTOCOL="udp"
+ fi
+
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}"
+ # even after the port is open, let's wait 1 second before writing
+ # otherwise the packet could be missed, and the test will fail. Happens
+ # more frequently on IPv6
+ sleep 1
+}
+
+# Clean up netdevsim ifaces created for bonding test
+function cleanup_bond_nsim() {
+ ip -n "${TXNS}" \
+ link delete "${BOND_TX_MAIN_IF}" type bond || true
+ ip -n "${RXNS}" \
+ link delete "${BOND_RX_MAIN_IF}" type bond || true
+
+ cleanup_netdevsim "$NSIM_BOND_TX_1"
+ cleanup_netdevsim "$NSIM_BOND_TX_2"
+ cleanup_netdevsim "$NSIM_BOND_RX_1"
+ cleanup_netdevsim "$NSIM_BOND_RX_2"
+}
+
+# cleanup tests that use bonding interfaces
+function cleanup_bond() {
+ cleanup_netcons
+ cleanup_bond_nsim
+ cleanup_all_ns
+ ip link delete "${VETH0}" || true
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
index 29a672c2270f..e212ad8ccef6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -44,8 +44,7 @@ source $lib_dir/devlink_lib.sh
h1_create()
{
- simple_if_init $h1 192.0.2.1/24
- defer simple_if_fini $h1 192.0.2.1/24
+ adf_simple_if_init $h1 192.0.2.1/24
mtu_set $h1 10000
defer mtu_restore $h1
@@ -56,8 +55,7 @@ h1_create()
h2_create()
{
- simple_if_init $h2 198.51.100.1/24
- defer simple_if_fini $h2 198.51.100.1/24
+ adf_simple_if_init $h2 198.51.100.1/24
mtu_set $h2 10000
defer mtu_restore $h2
@@ -106,8 +104,7 @@ setup_prepare()
# Reload to ensure devlink-trap settings are back to default.
defer devlink_reload
- vrf_prepare
- defer vrf_cleanup
+ adf_vrf_prepare
h1_create
h2_create
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
index d5b6f2cc9a29..9ca340c5f3a6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -57,8 +57,7 @@ source qos_lib.sh
h1_create()
{
- simple_if_init $h1
- defer simple_if_fini $h1
+ adf_simple_if_init $h1
mtu_set $h1 10000
defer mtu_restore $h1
@@ -70,8 +69,7 @@ h1_create()
h2_create()
{
- simple_if_init $h2
- defer simple_if_fini $h2
+ adf_simple_if_init $h2
mtu_set $h2 10000
defer mtu_restore $h2
@@ -83,8 +81,7 @@ h2_create()
h3_create()
{
- simple_if_init $h3
- defer simple_if_fini $h3
+ adf_simple_if_init $h3
mtu_set $h3 10000
defer mtu_restore $h3
@@ -225,8 +222,7 @@ setup_prepare()
h3mac=$(mac_get $h3)
- vrf_prepare
- defer vrf_cleanup
+ adf_vrf_prepare
h1_create
h2_create
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
index 2b5d2c2751d5..a4a25637fe2a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
@@ -68,8 +68,7 @@ mlxsw_only_on_spectrum 2+ || exit
h1_create()
{
- simple_if_init $h1
- defer simple_if_fini $h1
+ adf_simple_if_init $h1
vlan_create $h1 111 v$h1 192.0.2.33/28
defer vlan_destroy $h1 111
@@ -78,8 +77,7 @@ h1_create()
h2_create()
{
- simple_if_init $h2
- defer simple_if_fini $h2
+ adf_simple_if_init $h2
vlan_create $h2 111 v$h2 192.0.2.34/28
defer vlan_destroy $h2 111
@@ -178,8 +176,7 @@ setup_prepare()
h2mac=$(mac_get $h2)
- vrf_prepare
- defer vrf_cleanup
+ adf_vrf_prepare
h1_create
h2_create
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index cd4a5c21360c..d8f8ae8533cd 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -72,8 +72,7 @@ source qos_lib.sh
h1_create()
{
- simple_if_init $h1 192.0.2.65/28
- defer simple_if_fini $h1 192.0.2.65/28
+ adf_simple_if_init $h1 192.0.2.65/28
mtu_set $h1 10000
defer mtu_restore $h1
@@ -81,8 +80,7 @@ h1_create()
h2_create()
{
- simple_if_init $h2
- defer simple_if_fini $h2
+ adf_simple_if_init $h2
mtu_set $h2 10000
defer mtu_restore $h2
@@ -94,8 +92,7 @@ h2_create()
h3_create()
{
- simple_if_init $h3 192.0.2.66/28
- defer simple_if_fini $h3 192.0.2.66/28
+ adf_simple_if_init $h3 192.0.2.66/28
mtu_set $h3 10000
defer mtu_restore $h3
@@ -196,8 +193,7 @@ setup_prepare()
h3mac=$(mac_get $h3)
- vrf_prepare
- defer vrf_cleanup
+ adf_vrf_prepare
h1_create
h2_create
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 537d6baa77b7..47d2ffcf366e 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -100,8 +100,7 @@ host_create()
local dev=$1; shift
local host=$1; shift
- simple_if_init $dev
- defer simple_if_fini $dev
+ adf_simple_if_init $dev
mtu_set $dev 10000
defer mtu_restore $dev
@@ -250,8 +249,7 @@ setup_prepare()
h3_mac=$(mac_get $h3)
- vrf_prepare
- defer vrf_cleanup
+ adf_vrf_prepare
h1_create
h2_create
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 899b6892603f..d7505b933aef 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -51,7 +51,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
fi
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
# Update target in case occupancy of a certain resource changed
# following the test setup.
target=$(${current_test}_get_target "$should_fail")
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 482ebb744eba..7b98cdd0580d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -55,7 +55,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
continue
fi
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
# Update target in case occupancy of a certain resource
# changed following the test setup.
target=$(${current_test}_get_target "$should_fail")
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
new file mode 100755
index 000000000000..d05eddcad539
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, NetNSEnter
+
+def test_napi_id(cfg) -> None:
+ port = rand_port()
+ listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}"
+
+ with bkg(listen_cmd, ksft_wait=3) as server:
+ cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True)
+
+ ksft_eq(0, server.ret)
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run([test_napi_id], args=(cfg,))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
new file mode 100644
index 000000000000..7f49ca6c8637
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netdb.h>
+
+#include "../../net/lib/ksft.h"
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage address;
+ struct addrinfo *result;
+ struct addrinfo hints;
+ unsigned int napi_id;
+ socklen_t addr_len;
+ socklen_t optlen;
+ char buf[1024];
+ int opt = 1;
+ int family;
+ int server;
+ int client;
+ int ret;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE;
+
+ ret = getaddrinfo(argv[1], argv[2], &hints, &result);
+ if (ret != 0) {
+ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret));
+ return 1;
+ }
+
+ family = result->ai_family;
+ addr_len = result->ai_addrlen;
+
+ server = socket(family, SOCK_STREAM, IPPROTO_TCP);
+ if (server < 0) {
+ perror("socket creation failed");
+ freeaddrinfo(result);
+ if (errno == EAFNOSUPPORT)
+ return -1;
+ return 1;
+ }
+
+ if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
+ perror("setsockopt");
+ freeaddrinfo(result);
+ return 1;
+ }
+
+ memcpy(&address, result->ai_addr, result->ai_addrlen);
+ freeaddrinfo(result);
+
+ if (bind(server, (struct sockaddr *)&address, addr_len) < 0) {
+ perror("bind failed");
+ return 1;
+ }
+
+ if (listen(server, 1) < 0) {
+ perror("listen");
+ return 1;
+ }
+
+ ksft_ready();
+
+ client = accept(server, NULL, 0);
+ if (client < 0) {
+ perror("accept");
+ return 1;
+ }
+
+ optlen = sizeof(napi_id);
+ ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id,
+ &optlen);
+ if (ret != 0) {
+ perror("getsockopt");
+ return 1;
+ }
+
+ read(client, buf, 1024);
+
+ ksft_wait();
+
+ if (napi_id == 0) {
+ fprintf(stderr, "napi ID is 0\n");
+ return 1;
+ }
+
+ close(client);
+ close(server);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py
new file mode 100755
index 000000000000..f4be72b2145a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_threaded.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test napi threaded states.
+"""
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, ksft_ne, ksft_ge
+from lib.py import NetDrvEnv, NetdevFamily
+from lib.py import cmd, defer, ethtool
+
+
+def _assert_napi_threaded_enabled(nl, napi_id) -> None:
+ napi = nl.napi_get({'id': napi_id})
+ ksft_eq(napi['threaded'], 'enabled')
+ ksft_ne(napi.get('pid'), None)
+
+
+def _assert_napi_threaded_disabled(nl, napi_id) -> None:
+ napi = nl.napi_get({'id': napi_id})
+ ksft_eq(napi['threaded'], 'disabled')
+ ksft_eq(napi.get('pid'), None)
+
+
+def _set_threaded_state(cfg, threaded) -> None:
+ with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp:
+ fp.write(str(threaded).encode('utf-8'))
+
+
+def _setup_deferred_cleanup(cfg) -> None:
+ combined = ethtool(f"-l {cfg.ifname}", json=True)[0].get("combined", 0)
+ ksft_ge(combined, 2)
+ defer(ethtool, f"-L {cfg.ifname} combined {combined}")
+
+ threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout
+ defer(_set_threaded_state, cfg, threaded)
+
+ return combined
+
+
+def napi_init(cfg, nl) -> None:
+ """
+ Test that threaded state (in the persistent NAPI config) gets updated
+ even when NAPI with given ID is not allocated at the time.
+ """
+
+ qcnt = _setup_deferred_cleanup(cfg)
+
+ _set_threaded_state(cfg, 1)
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ _set_threaded_state(cfg, 0)
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ for napi in napis:
+ ksft_eq(napi['threaded'], 'disabled')
+ ksft_eq(napi.get('pid'), None)
+
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ _set_threaded_state(cfg, 1)
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ for napi in napis:
+ ksft_eq(napi['threaded'], 'enabled')
+ ksft_ne(napi.get('pid'), None)
+
+
+def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None:
+ """
+ Test that when napi threaded is enabled at device level and
+ then disabled at napi level for one napi, the threaded state
+ of all napis is preserved after a change in number of queues.
+ """
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_ge(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ qcnt = _setup_deferred_cleanup(cfg)
+
+ # set threaded
+ _set_threaded_state(cfg, 1)
+
+ # check napi threaded is set for both napis
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_enabled(nl, napi1_id)
+
+ # disable threaded for napi1
+ nl.napi_set({'id': napi1_id, 'threaded': 'disabled'})
+
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_disabled(nl, napi1_id)
+
+
+def change_num_queues(cfg, nl) -> None:
+ """
+ Test that when napi threaded is enabled at device level,
+ the napi threaded state is preserved after a change in
+ number of queues.
+ """
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ ksft_ge(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ qcnt = _setup_deferred_cleanup(cfg)
+
+ # set threaded
+ _set_threaded_state(cfg, 1)
+
+ # check napi threaded is set for both napis
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_enabled(nl, napi1_id)
+
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ # check napi threaded is set for both napis
+ _assert_napi_threaded_enabled(nl, napi0_id)
+ _assert_napi_threaded_enabled(nl, napi1_id)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, queue_count=2) as cfg:
+ ksft_run([napi_init,
+ change_num_queues,
+ enable_dev_threaded_disable_napi_threaded],
+ args=(cfg, NetdevFamily()))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
index fe765da498e8..2022f3061738 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -28,25 +28,47 @@ OUTPUT_FILE="/tmp/${TARGET}"
# Check for basic system dependency and exit if not found
check_for_dependencies
-# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
-echo "6 5" > /proc/sys/kernel/printk
# Remove the namespace, interfaces and netconsole target on exit
trap cleanup EXIT
-# Create one namespace and two interfaces
-set_network
-# Create a dynamic target for netconsole
-create_dynamic_target
-# Set userdata "key" with the "value" value
-set_user_data
-# Listed for netconsole port inside the namespace and destination interface
-listen_port_and_save_to "${OUTPUT_FILE}" &
-# Wait for socat to start and listen to the port.
-wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
-# Send the message
-echo "${MSG}: ${TARGET}" > /dev/kmsg
-# Wait until socat saves the file to disk
-busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
-
-# Make sure the message was received in the dst part
-# and exit
-validate_result "${OUTPUT_FILE}"
+
+# Run the test twice, with different format modes
+for FORMAT in "basic" "extended"
+do
+ for IP_VERSION in "ipv6" "ipv4"
+ do
+ echo "Running with target mode: ${FORMAT} (${IP_VERSION})"
+ # Set current loglevel to KERN_INFO(6), and default to
+ # KERN_NOTICE(5)
+ echo "6 5" > /proc/sys/kernel/printk
+ # Create one namespace and two interfaces
+ set_network "${IP_VERSION}"
+ # Create a dynamic target for netconsole
+ create_dynamic_target "${FORMAT}"
+ # Only set userdata for extended format
+ if [ "$FORMAT" == "extended" ]
+ then
+ # Set userdata "key" with the "value" value
+ set_user_data
+ fi
+ # Listed for netconsole port inside the namespace and
+ # destination interface
+ listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+ # Wait for socat to start and listen to the port.
+ wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}"
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_result "${OUTPUT_FILE}" "${FORMAT}"
+ # kill socat in case it is still running
+ pkill_socat
+ cleanup
+ echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2
+ done
+done
+
+trap - EXIT
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
new file mode 100755
index 000000000000..d1d23dc67f99
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This is a selftest to test cmdline arguments on netconsole.
+# It exercises loading of netconsole from cmdline instead of the dynamic
+# reconfiguration. This includes parsing the long netconsole= line and all the
+# flow through init_netconsole().
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+check_netconsole_module
+
+modprobe netdevsim 2> /dev/null || true
+rmmod netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+# check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace and network interfaces
+trap do_cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+
+# Run the test twice, with different cmdline parameters
+for BINDMODE in "ifname" "mac"
+do
+ echo "Running with bind mode: ${BINDMODE}" >&2
+ # Create the command line for netconsole, with the configuration from
+ # the function above
+ CMDLINE=$(create_cmdline_str "${BINDMODE}")
+
+ # The content of kmsg will be save to the following file
+ OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}"
+
+ # Load the module, with the cmdline set
+ modprobe netconsole "${CMDLINE}"
+
+ # Listed for netconsole port inside the namespace and destination
+ # interface
+ listen_port_and_save_to "${OUTPUT_FILE}" &
+ # Wait for socat to start and listen to the port.
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_msg "${OUTPUT_FILE}"
+
+ # kill socat in case it is still running
+ pkill_socat
+ # Unload the module
+ rmmod netconsole
+ echo "${BINDMODE} : Test passed" >&2
+done
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
new file mode 100755
index 000000000000..4a71e01a230c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test netconsole's message fragmentation functionality.
+#
+# When a message exceeds the maximum packet size, netconsole splits it into
+# multiple fragments for transmission. This test verifies:
+# - Correct fragmentation of large messages
+# - Proper reassembly of fragments at the receiver
+# - Preservation of userdata across fragments
+# - Behavior with and without kernel release version appending
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# set userdata to a long value. In this case, it is "1-2-3-4...50-"
+USERDATA_VALUE=$(printf -- '%.2s-' {1..60})
+
+# Convert the header string in a regexp, so, we can remove
+# the second header as well.
+# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If
+# release is appended, you might find something like:L
+# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;"
+function header_to_regex() {
+ # header is everything before ;
+ local HEADER="${1}"
+ REGEX=$(echo "${HEADER}" | cut -d'=' -f1)
+ echo "${REGEX}=[0-9]*\/[0-9]*;"
+}
+
+# We have two headers in the message. Remove both to get the full message,
+# and extract the full message.
+function extract_msg() {
+ local MSGFILE="${1}"
+ # Extract the header, which is the very first thing that arrives in the
+ # first list.
+ HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1)
+ HEADER_REGEX=$(header_to_regex "${HEADER}")
+
+ # Remove the two headers from the received message
+ # This will return the message without any header, similarly to what
+ # was sent.
+ sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}"
+}
+
+# Validate the message, which has two messages glued together.
+# unwrap them to make sure all the characters were transmitted.
+# File will look like the following:
+# 13,468,514729715,-,ncfrag=0/1135;<message>
+# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key>
+function validate_fragmented_result() {
+ # Discard the netconsole headers, and assemble the full message
+ RCVMSG=$(extract_msg "${1}")
+
+ # check for the main message
+ if ! echo "${RCVMSG}" | grep -q "${MSG}"; then
+ echo "Message body doesn't match." >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # check userdata
+ if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then
+ echo "message userdata doesn't match" >&2
+ echo "msg received=" "${RCVMSG}" >&2
+ exit "${ksft_fail}"
+ fi
+ # test passed. hooray
+}
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# Set userdata "key" with the "value" value
+set_user_data
+
+
+# TEST 1: Send message and userdata. They will fragment
+# =======
+MSG=$(printf -- 'MSG%.3s=' {1..150})
+
+# Listen for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+# Check if the message was not corrupted
+validate_fragmented_result "${OUTPUT_FILE}"
+
+# TEST 2: Test with smaller message, and without release appended
+# =======
+MSG=$(printf -- 'FOOBAR%.3s=' {1..100})
+# Let's disable release and test again.
+disable_release_append
+
+listen_port_and_save_to "${OUTPUT_FILE}" &
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+validate_fragmented_result "${OUTPUT_FILE}"
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh
index 29bad56448a2..06089643b771 100755
--- a/tools/testing/selftests/drivers/net/netcons_overflow.sh
+++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh
@@ -15,7 +15,7 @@ SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
# This is coming from netconsole code. Check for it in drivers/net/netconsole.c
-MAX_USERDATA_ITEMS=16
+MAX_USERDATA_ITEMS=256
# Function to create userdata entries
function create_userdata_max_entries() {
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
new file mode 100755
index 000000000000..baf69031089e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
@@ -0,0 +1,272 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A test that makes sure that sysdata runtime CPU data is properly set
+# when a message is sent.
+#
+# There are 3 different tests, every time sent using a random CPU.
+# - Test #1
+# * Only enable cpu_nr sysdata feature.
+# - Test #2
+# * Keep cpu_nr sysdata feature enable and enable userdata.
+# - Test #3
+# * keep userdata enabled, and disable sysdata cpu_nr feature.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Enable the sysdata cpu_nr feature
+function set_cpu_nr() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]]
+ then
+ echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Enable the taskname to be appended to sysdata
+function set_taskname() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/taskname_enabled" ]]
+ then
+ echo "Not able to enable taskname sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/taskname_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
+# Enable the release to be appended to sysdata
+function set_release() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/release_enabled" ]]
+ then
+ echo "Not able to enable release sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/release_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/release_enabled"
+}
+
+# Enable the msgid to be appended to sysdata
+function set_msgid() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]]
+ then
+ echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
+# Disable the sysdata cpu_nr feature
+function unset_cpu_nr() {
+ echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Once called, taskname=<..> will not be appended anymore
+function unset_taskname() {
+ echo 0 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
+function unset_release() {
+ echo 0 > "${NETCONS_PATH}/userdata/release_enabled"
+}
+
+function unset_msgid() {
+ echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
+# Test if MSG contains sysdata
+function validate_sysdata() {
+ # OUTPUT_FILE will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # userdatakey=userdatavalue
+ # cpu=X
+ # taskname=<taskname>
+ # msgid=<id>
+
+ # Echo is what this test uses to create the message. See runtest()
+ # function
+ SENDER="echo"
+
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+ echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # Check if cpu=XX exists in the file and matches the one used
+ # in taskset(1)
+ if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "taskname=${SENDER}" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'taskname=echo' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ rm "${OUTPUT_FILE}"
+ pkill_socat
+}
+
+function validate_release() {
+ RELEASE=$(uname -r)
+
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "release=${RELEASE}" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'release=${RELEASE}' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# Test if MSG content exists in OUTPUT_FILE but no `cpu=` and `taskname=`
+# strings
+function validate_no_sysdata() {
+ if [ ! -f "$OUTPUT_FILE" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+ echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "cpu=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'cpu= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "taskname=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'taskname= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "release=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'release= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if grep -q "msgid=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ rm "${OUTPUT_FILE}"
+}
+
+# Start socat, send the message and wait for the file to show up in the file
+# system
+function runtest {
+ # Listen for netconsole port inside the namespace and destination
+ # interface
+ listen_port_and_save_to "${OUTPUT_FILE}" &
+ # Wait for socat to start and listen to the port.
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+ # Send the message
+ taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# This test also depends on taskset(1). Check for it before starting the test
+check_for_taskset
+
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+
+#====================================================
+# TEST #1
+# Send message from a random CPU
+#====================================================
+# Random CPU in the system
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_1"
+MSG="Test #1 from CPU${CPU}"
+# Enable the auto population of cpu_nr
+set_cpu_nr
+# Enable taskname to be appended to sysdata
+set_taskname
+set_release
+set_msgid
+runtest
+# Make sure the message was received in the dst part
+# and exit
+validate_release
+validate_sysdata
+
+#====================================================
+# TEST #2
+# This test now adds userdata together with sysdata
+# ===================================================
+# Get a new random CPU
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_2"
+MSG="Test #2 from CPU${CPU}"
+set_user_data
+runtest
+validate_release
+validate_sysdata
+
+# ===================================================
+# TEST #3
+# Unset all sysdata, fail if any userdata is set
+# ===================================================
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_3"
+MSG="Test #3 from CPU${CPU}"
+unset_cpu_nr
+unset_taskname
+unset_release
+unset_msgid
+runtest
+# At this time, cpu= shouldn't be present in the msg
+validate_no_sysdata
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh
new file mode 100755
index 000000000000..2ce9ee3719d1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_torture.sh
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Repeatedly send kernel messages, toggles netconsole targets on and off,
+# creates and deletes targets in parallel, and toggles the source interface to
+# simulate stress conditions.
+#
+# This test aims to verify the robustness of netconsole under dynamic
+# configurations and concurrent operations.
+#
+# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make
+# sure no issues is reported.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Number of times the main loop run
+ITERATIONS=${1:-150}
+
+# Only test extended format
+FORMAT="extended"
+# And ipv6 only
+IP_VERSION="ipv6"
+
+# Create, enable and delete some targets.
+create_and_delete_random_target() {
+ COUNT=2
+ RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_)
+
+ if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \
+ [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then
+ echo "Function didn't finish yet, skipping it." >&2
+ return
+ fi
+
+ # enable COUNT targets
+ for i in $(seq ${COUNT})
+ do
+ RND_TARGET="${RND_PREFIX}"${i}
+ RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+
+ # Basic population so the target can come up
+ _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}"
+ done
+
+ echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg
+ # disable them all
+ for i in $(seq ${COUNT})
+ do
+ RND_TARGET="${RND_PREFIX}"${i}
+ RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+ if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]]
+ then
+ echo 0 > "${RND_TARGET_PATH}"/enabled
+ fi
+ rmdir "${RND_TARGET_PATH}"
+ done
+}
+
+# Disable and enable the target mid-air, while messages
+# are being transmitted.
+toggle_netcons_target() {
+ for i in $(seq 2)
+ do
+ if [ ! -d "${NETCONS_PATH}" ]
+ then
+ break
+ fi
+ echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+ # Try to enable a bit harder, given it might fail to enable
+ # Write to `enabled` might fail depending on the lock, which is
+ # highly contentious here
+ for _ in $(seq 5)
+ do
+ echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+ done
+ done
+}
+
+toggle_iface(){
+ ip link set "${SRCIF}" down
+ ip link set "${SRCIF}" up
+}
+
+# Start here
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network "${IP_VERSION}"
+# Create a dynamic target for netconsole
+create_dynamic_target "${FORMAT}"
+
+for i in $(seq "$ITERATIONS")
+do
+ for _ in $(seq 10)
+ do
+ echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg
+ done
+ wait
+
+ if (( i % 30 == 0 )); then
+ toggle_netcons_target &
+ fi
+
+ if (( i % 50 == 0 )); then
+ # create some targets, enable them, send msg and disable
+ # all in a parallel thread
+ create_and_delete_random_target &
+ fi
+
+ if (( i % 70 == 0 )); then
+ toggle_iface &
+ fi
+done
+wait
+
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
index 07b7c46d3311..1a228c5430f5 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/Makefile
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -1,13 +1,13 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = devlink.sh \
+TEST_PROGS := \
+ devlink.sh \
devlink_in_netns.sh \
devlink_trap.sh \
ethtool-coalesce.sh \
ethtool-features.sh \
ethtool-fec.sh \
ethtool-pause.sh \
- ethtool-ring.sh \
fib.sh \
fib_notifications.sh \
hw_stats_l3.sh \
@@ -17,5 +17,10 @@ TEST_PROGS = devlink.sh \
psample.sh \
tc-mq-visibility.sh \
udp_tunnel_nic.sh \
+# end of TEST_PROGS
+
+TEST_FILES := \
+ ethtool-common.sh
+# end of TEST_FILES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index b5ea2526f23c..1b529ccaf050 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -3,7 +3,8 @@
lib_dir=$(dirname $0)/../../../net/forwarding
-ALL_TESTS="fw_flash_test params_test regions_test reload_test \
+ALL_TESTS="fw_flash_test params_test \
+ params_default_test regions_test reload_test \
netns_reload_test resource_test dev_info_test \
empty_reporter_test dummy_reporter_test rate_test"
NUM_NETIFS=0
@@ -40,6 +41,8 @@ fw_flash_test()
return
fi
+ echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms
+
devlink dev flash $DL_HANDLE file $DUMMYFILE
check_err $? "Failed to flash with status updates on"
@@ -76,17 +79,28 @@ fw_flash_test()
param_get()
{
local name=$1
+ local attr=${2:-value}
+ local cmode=${3:-driverinit}
cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \
- '.[][][].values[] | select(.cmode == "driverinit").value'
+ '.[][][].values[] | select(.cmode == "'"$cmode"'").'"$attr"
}
param_set()
{
local name=$1
local value=$2
+ local cmode=${3:-driverinit}
- devlink dev param set $DL_HANDLE name $name cmode driverinit value $value
+ devlink dev param set $DL_HANDLE name $name cmode $cmode value $value
+}
+
+param_set_default()
+{
+ local name=$1
+ local cmode=${2:-driverinit}
+
+ devlink dev param set $DL_HANDLE name $name default cmode $cmode
}
check_value()
@@ -95,12 +109,18 @@ check_value()
local phase_name=$2
local expected_param_value=$3
local expected_debugfs_value=$4
+ local cmode=${5:-driverinit}
local value
+ local attr="value"
+
+ if [[ "$phase_name" == *"default"* ]]; then
+ attr="default"
+ fi
- value=$(param_get $name)
- check_err $? "Failed to get $name param value"
+ value=$(param_get $name $attr $cmode)
+ check_err $? "Failed to get $name param $attr"
[ "$value" == "$expected_param_value" ]
- check_err $? "Unexpected $phase_name $name param value"
+ check_err $? "Unexpected $phase_name $name param $attr"
value=$(<$DEBUGFS_DIR/$name)
check_err $? "Failed to get $name debugfs value"
[ "$value" == "$expected_debugfs_value" ]
@@ -133,6 +153,92 @@ params_test()
log_test "params test"
}
+value_to_debugfs()
+{
+ local value=$1
+
+ case "$value" in
+ true)
+ echo "Y"
+ ;;
+ false)
+ echo "N"
+ ;;
+ *)
+ echo "$value"
+ ;;
+ esac
+}
+
+test_default()
+{
+ local param_name=$1
+ local new_value=$2
+ local expected_default=$3
+ local cmode=${4:-driverinit}
+ local default_debugfs
+ local new_debugfs
+ local expected_debugfs
+
+ default_debugfs=$(value_to_debugfs $expected_default)
+ new_debugfs=$(value_to_debugfs $new_value)
+
+ expected_debugfs=$default_debugfs
+ check_value $param_name initial-default $expected_default $expected_debugfs $cmode
+
+ param_set $param_name $new_value $cmode
+ check_err $? "Failed to set $param_name to $new_value"
+
+ expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$new_debugfs" || echo "$default_debugfs")
+ check_value $param_name post-set $new_value $expected_debugfs $cmode
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload device"
+
+ expected_debugfs=$new_debugfs
+ check_value $param_name post-reload-new-value $new_value $expected_debugfs $cmode
+
+ param_set_default $param_name $cmode
+ check_err $? "Failed to set $param_name to default"
+
+ expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$default_debugfs" || echo "$new_debugfs")
+ check_value $param_name post-set-default $expected_default $expected_debugfs $cmode
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload device"
+
+ expected_debugfs=$default_debugfs
+ check_value $param_name post-reload-default $expected_default $expected_debugfs $cmode
+}
+
+params_default_test()
+{
+ RET=0
+
+ if ! devlink dev param help 2>&1 | grep -q "value VALUE | default"; then
+ echo "SKIP: devlink cli missing default feature"
+ return
+ fi
+
+ # Remove side effects of previous tests. Use plain param_set, because
+ # param_set_default is a feature under test here.
+ param_set max_macs 32 driverinit
+ check_err $? "Failed to reset max_macs to default value"
+ param_set test1 true driverinit
+ check_err $? "Failed to reset test1 to default value"
+ param_set test2 1234 runtime
+ check_err $? "Failed to reset test2 to default value"
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload device for clean state"
+
+ test_default max_macs 16 32 driverinit
+ test_default test1 false true driverinit
+ test_default test2 100 1234 runtime
+
+ log_test "params default test"
+}
+
check_region_size()
{
local name=$1
@@ -608,6 +714,46 @@ rate_attr_parent_check()
check_err $? "Unexpected parent attr value $api_value != $parent"
}
+rate_attr_tc_bw_check()
+{
+ local handle=$1
+ local tc_bw=$2
+ local debug_file=$3
+
+ local tc_bw_str=""
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ tc_bw_str="$tc_bw_str $tc:$value"
+ done
+ tc_bw_str=${tc_bw_str# }
+
+ rate_attr_set "$handle" tc-bw "$tc_bw_str"
+ check_err $? "Failed to set tc-bw values"
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ local debug_value
+ debug_value=$(cat "$debug_file"/tc"${tc}"_bw)
+ check_err $? "Failed to read tc-bw value from debugfs for tc$tc"
+ [ "$debug_value" == "$value" ]
+ check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value"
+ done
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local expected_value=${bw##*:}
+ local api_value
+ api_value=$(rate_attr_get "$handle" tc_"$tc")
+ if [ "$api_value" = "null" ]; then
+ api_value=0
+ fi
+ [ "$api_value" == "$expected_value" ]
+ check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value"
+ done
+}
+
rate_node_add()
{
local handle=$1
@@ -649,6 +795,13 @@ rate_test()
rate=$(($rate+100))
done
+ local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0"
+ for r_obj in $leafs
+ do
+ rate_attr_tc_bw_check "$r_obj" "$tc_bw" \
+ "$DEBUGFS_DIR"/ports/"${r_obj##*/}"
+ done
+
local node1_name='group1'
local node1="$DL_HANDLE/$node1_name"
rate_node_add "$node1"
@@ -666,6 +819,12 @@ rate_test()
rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
$DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+ local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0"
+ rate_attr_tc_bw_check $node1 "$tc_bw" \
+ "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}"
+
+
rate_node_del "$node1"
check_err $? "Failed to delete node $node1"
local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
deleted file mode 100755
index c969559ffa7a..000000000000
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-
-source ethtool-common.sh
-
-function get_value {
- local query="${SETTINGS_MAP[$1]}"
-
- echo $(ethtool -g $NSIM_NETDEV | \
- tail -n +$CURR_SETT_LINE | \
- awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}')
-}
-
-function update_current_settings {
- for key in ${!SETTINGS_MAP[@]}; do
- CURRENT_SETTINGS[$key]=$(get_value $key)
- done
- echo ${CURRENT_SETTINGS[@]}
-}
-
-if ! ethtool -h | grep -q set-ring >/dev/null; then
- echo "SKIP: No --set-ring support in ethtool"
- exit 4
-fi
-
-NSIM_NETDEV=$(make_netdev)
-
-set -o pipefail
-
-declare -A SETTINGS_MAP=(
- ["rx"]="RX"
- ["rx-mini"]="RX Mini"
- ["rx-jumbo"]="RX Jumbo"
- ["tx"]="TX"
-)
-
-declare -A EXPECTED_SETTINGS=(
- ["rx"]=""
- ["rx-mini"]=""
- ["rx-jumbo"]=""
- ["tx"]=""
-)
-
-declare -A CURRENT_SETTINGS=(
- ["rx"]=""
- ["rx-mini"]=""
- ["rx-jumbo"]=""
- ["tx"]=""
-)
-
-MAX_VALUE=$((RANDOM % $((2**32-1))))
-RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/)
-
-for ring_max_entry in $RING_MAX_LIST; do
- echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry
-done
-
-CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:)
-
-# populate the expected settings map
-for key in ${!SETTINGS_MAP[@]}; do
- EXPECTED_SETTINGS[$key]=$(get_value $key)
-done
-
-# test
-for key in ${!SETTINGS_MAP[@]}; do
- value=$((RANDOM % $MAX_VALUE))
-
- ethtool -G $NSIM_NETDEV "$key" "$value"
-
- EXPECTED_SETTINGS[$key]="$value"
- expected=${EXPECTED_SETTINGS[@]}
- current=$(update_current_settings)
-
- check $? "$current" "$expected"
- set +x
-done
-
-if [ $num_errors -eq 0 ]; then
- echo "PASSED all $((num_passes)) checks"
- exit 0
-else
- echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
- exit 1
-fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index e8e0dc088d6a..01d0c044a5fc 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -1053,6 +1053,6 @@ trap cleanup EXIT
setup_prepare
-tests_run
+xfail_on_slow tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
index aed62d9e6c0a..7f32b5600925 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -1,7 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
-source ../../../net/net_helper.sh
+lib_dir=$(dirname $0)/../../../net
+source $lib_dir/lib.sh
NSIM_DEV_1_ID=$((256 + RANDOM % 256))
NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 92c2f0376c08..4c859ecdad94 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -266,7 +266,6 @@ for port in 0 1; do
echo $NSIM_ID > /sys/bus/netdevsim/new_device
else
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_SYS/new_port
fi
NSIM_NETDEV=`get_netdev_name old_netdevs`
@@ -350,23 +349,11 @@ old_netdevs=$(ls /sys/class/net)
port=0
echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
-echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
echo 0 > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
-new_vxlan vxlan0 10000 $NSIM_NETDEV
-
-modprobe -r vxlan
-modprobe -r udp_tunnel
-
-msg="remove tunnels"
-exp0=( 0 0 0 0 )
-check_tables
-
-msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
+exp0=( `mke 10000 1` 0 0 0 )
new_vxlan vxlan0 10000 $NSIM_NETDEV
exp0=( 0 0 0 0 )
@@ -428,7 +415,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -486,7 +472,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -543,7 +528,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -573,7 +557,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -634,7 +617,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -690,7 +672,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -750,7 +731,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -809,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
-echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_DFS/udp_ports_shared
old_netdevs=$(ls /sys/class/net)
diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py
new file mode 100755
index 000000000000..408bd54d6779
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netpoll_basic.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Author: Breno Leitao <leitao@debian.org>
+"""
+ This test aims to evaluate the netpoll polling mechanism (as in
+ netpoll_poll_dev()). It presents a complex scenario where the network
+ attempts to send a packet but fails, prompting it to poll the NIC from within
+ the netpoll TX side.
+
+ This has been a crucial path in netpoll that was previously untested. Jakub
+ suggested using a single RX/TX queue, pushing traffic to the NIC, and then
+ sending netpoll messages (via netconsole) to trigger the poll.
+
+ In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
+ so, the test passes, otherwise it will be skipped. This test is very dependent on
+ the driver and environment, given we are trying to trigger a tricky scenario.
+"""
+
+import errno
+import logging
+import os
+import random
+import string
+import threading
+import time
+from typing import Optional
+
+from lib.py import (
+ bpftrace,
+ CmdExitFailure,
+ defer,
+ ethtool,
+ GenerateTraffic,
+ ksft_exit,
+ ksft_pr,
+ ksft_run,
+ KsftFailEx,
+ KsftSkipEx,
+ NetDrvEpEnv,
+ KsftXfailEx,
+)
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
+NETCONS_REMOTE_PORT: int = 6666
+NETCONS_LOCAL_PORT: int = 1514
+
+# Max number of netcons messages to send. Each iteration will setup
+# netconsole and send MAX_WRITES messages
+ITERATIONS: int = 20
+# Number of writes to /dev/kmsg per iteration
+MAX_WRITES: int = 40
+# MAPS contains the information coming from bpftrace it will have only one
+# key: "hits", which tells the number of times netpoll_poll_dev() was called
+MAPS: dict[str, int] = {}
+# Thread to run bpftrace in parallel
+BPF_THREAD: Optional[threading.Thread] = None
+# Time bpftrace will be running in parallel.
+BPFTRACE_TIMEOUT: int = 10
+
+
+def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
+ """
+ Read the ringsize using ethtool. This will be used to restore it after the test
+ """
+ try:
+ ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
+ rxs = ethtool_result["rx"]
+ txs = ethtool_result["tx"]
+ except (KeyError, IndexError) as exception:
+ raise KsftSkipEx(
+ f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxs, txs
+
+
+def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
+ """Try to the number of RX and TX ringsize."""
+ rxs = ring_size[0]
+ txs = ring_size[1]
+
+ logging.debug("Setting ring size to %d/%d", rxs, txs)
+ try:
+ ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
+ except CmdExitFailure:
+ # This might fail on real device, retry with a higher value,
+ # worst case, keep it as it is.
+ return False
+
+ return True
+
+
+def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
+ """Read the number of RX, TX and combined queues using ethtool"""
+
+ try:
+ ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
+ rxq = ethtool_result.get("rx", -1)
+ txq = ethtool_result.get("tx", -1)
+ combined = ethtool_result.get("combined", -1)
+
+ except IndexError as exception:
+ raise KsftSkipEx(
+ f"Failed to read queues numbers: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxq, txq, combined
+
+
+def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
+ """Set the number of RX, TX and combined queues using ethtool"""
+ rxq, txq, combined = queues
+
+ cmdline = f"-L {interface_name}"
+
+ if rxq != -1:
+ cmdline += f" rx {rxq}"
+ if txq != -1:
+ cmdline += f" tx {txq}"
+ if combined != -1:
+ cmdline += f" combined {combined}"
+
+ logging.debug("calling: ethtool %s", cmdline)
+
+ try:
+ ethtool(cmdline)
+ except CmdExitFailure as exception:
+ raise KsftSkipEx(
+ f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
+ ) from exception
+
+
+def netcons_generate_random_target_name() -> str:
+ """Generate a random target name starting with 'netcons'"""
+ random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+ return f"netcons_{random_suffix}"
+
+
+def netcons_create_target(
+ config_data: dict[str, str],
+ target_name: str,
+) -> None:
+ """Create a netconsole dynamic target against the interfaces"""
+ logging.debug("Using netconsole name: %s", target_name)
+ try:
+ os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
+ logging.debug(
+ "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
+ )
+ except OSError as exception:
+ if exception.errno != errno.EEXIST:
+ raise KsftFailEx(
+ f"Failed to create netconsole target directory: {exception}"
+ ) from exception
+
+ try:
+ for key, value in config_data.items():
+ path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
+ logging.debug("Writing %s to %s", key, path)
+ with open(path, "w", encoding="utf-8") as file:
+ # Always convert to string to write to file
+ file.write(str(value))
+
+ # Read all configuration values for debugging purposes
+ for debug_key in config_data.keys():
+ with open(
+ f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
+ "r",
+ encoding="utf-8",
+ ) as file:
+ content = file.read()
+ logging.debug(
+ "%s/%s/%s : %s",
+ NETCONSOLE_CONFIGFS_PATH,
+ target_name,
+ debug_key,
+ content.strip(),
+ )
+
+ except Exception as exception:
+ raise KsftFailEx(
+ f"Failed to configure netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_configure_target(
+ cfg: NetDrvEpEnv, interface_name: str, target_name: str
+) -> None:
+ """Configure netconsole on the interface with the given target name"""
+ config_data = {
+ "extended": "1",
+ "dev_name": interface_name,
+ "local_port": NETCONS_LOCAL_PORT,
+ "remote_port": NETCONS_REMOTE_PORT,
+ "local_ip": cfg.addr,
+ "remote_ip": cfg.remote_addr,
+ "remote_mac": "00:00:00:00:00:00", # Not important for this test
+ "enabled": "1",
+ }
+
+ netcons_create_target(config_data, target_name)
+ logging.debug(
+ "Created netconsole target: %s on interface %s", target_name, interface_name
+ )
+
+
+def netcons_delete_target(name: str) -> None:
+ """Delete a netconsole dynamic target"""
+ target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
+ try:
+ if os.path.exists(target_path):
+ os.rmdir(target_path)
+ except OSError as exception:
+ raise KsftFailEx(
+ f"Failed to delete netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_load_module() -> None:
+ """Try to load the netconsole module"""
+ os.system("modprobe netconsole")
+
+
+def bpftrace_call() -> None:
+ """Call bpftrace to find how many times netpoll_poll_dev() is called.
+ Output is saved in the global variable `maps`"""
+
+ # This is going to update the global variable, that will be seen by the
+ # main function
+ global MAPS # pylint: disable=W0603
+
+ # This will be passed to bpftrace as in bpftrace -e "expr"
+ expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
+
+ MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
+ logging.debug("BPFtrace output: %s", MAPS)
+
+
+def bpftrace_start():
+ """Start a thread to call `call_bpf` in a parallel thread"""
+ global BPF_THREAD # pylint: disable=W0603
+
+ BPF_THREAD = threading.Thread(target=bpftrace_call)
+ BPF_THREAD.start()
+ if not BPF_THREAD.is_alive():
+ raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
+
+
+def bpftrace_stop() -> None:
+ """Stop the bpftrace thread"""
+ if BPF_THREAD:
+ BPF_THREAD.join()
+
+
+def bpftrace_any_hit(join: bool) -> bool:
+ """Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
+ if not BPF_THREAD:
+ raise KsftFailEx("BPFtrace didn't start")
+
+ if BPF_THREAD.is_alive():
+ if join:
+ # Wait for bpftrace to finish
+ BPF_THREAD.join()
+ else:
+ # bpftrace is still running, so, we will not check the result yet
+ return False
+
+ logging.debug("MAPS coming from bpftrace = %s", MAPS)
+ if "hits" not in MAPS.keys():
+ raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
+
+ logging.debug("Got a total of %d hits", MAPS["hits"])
+ return MAPS["hits"] > 0
+
+
+def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ # Start bpftrace in parallel, so, it is watching
+ # netpoll_poll_dev() while we are sending netconsole messages
+ bpftrace_start()
+ defer(bpftrace_stop)
+
+ do_netpoll_flush(cfg, ifname, target_name)
+
+ if bpftrace_any_hit(join=True):
+ ksft_pr("netpoll_poll_dev() was called. Success")
+ return
+
+ raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
+
+
+def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ netcons_configure_target(cfg, ifname, target_name)
+ retry = 0
+
+ for i in range(int(ITERATIONS)):
+ if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
+ # bpftrace is done, stop sending messages
+ break
+
+ msg = f"netcons test #{i}"
+ with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
+ for j in range(MAX_WRITES):
+ try:
+ kmsg.write(f"{msg}-{j}\n")
+ except OSError as exception:
+ # in some cases, kmsg can be busy, so, we will retry
+ time.sleep(1)
+ retry += 1
+ if retry < 5:
+ logging.info("Failed to write to kmsg. Retrying")
+ # Just retry a few times
+ continue
+ raise KsftFailEx(
+ f"Failed to write to kmsg: {exception}"
+ ) from exception
+
+ netcons_delete_target(target_name)
+ netcons_configure_target(cfg, ifname, target_name)
+ # If we sleep here, we will have a better chance of triggering
+ # This number is based on a few tests I ran while developing this test
+ time.sleep(0.4)
+
+
+def configure_network(ifname: str) -> None:
+ """Configure ring size and queue numbers"""
+
+ # Set defined queues to 1 to force congestion
+ prev_queues = ethtool_get_queues_cnt(ifname)
+ logging.debug("RX/TX/combined queues: %s", prev_queues)
+ # Only set the queues to 1 if they exists in the device. I.e, they are > 0
+ ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
+ defer(ethtool_set_queues_cnt, ifname, prev_queues)
+
+ # Try to set the ring size to some low value.
+ # Do not fail if the hardware do not accepted desired values
+ prev_ring_size = ethtool_get_ringsize(ifname)
+ for size in [(1, 1), (128, 128), (256, 256)]:
+ if ethtool_set_ringsize(ifname, size):
+ # hardware accepted the desired ringsize
+ logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
+ break
+ defer(ethtool_set_ringsize, ifname, prev_ring_size)
+
+
+def test_netpoll(cfg: NetDrvEpEnv) -> None:
+ """
+ Test netpoll by sending traffic to the interface and then sending
+ netconsole messages to trigger a poll
+ """
+
+ ifname = cfg.ifname
+ configure_network(ifname)
+ target_name = netcons_generate_random_target_name()
+ traffic = None
+
+ try:
+ traffic = GenerateTraffic(cfg)
+ do_netpoll_flush_monitored(cfg, ifname, target_name)
+ finally:
+ if traffic:
+ traffic.stop()
+
+ # Revert RX/TX queues
+ netcons_delete_target(target_name)
+
+
+def test_check_dependencies() -> None:
+ """Check if the dependencies are met"""
+ if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
+ raise KsftSkipEx(
+ f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301
+ )
+
+
+def main() -> None:
+ """Main function to run the test"""
+ netcons_load_module()
+ test_check_dependencies()
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(
+ [test_netpoll],
+ args=(cfg,),
+ )
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
index bed748dde4b0..8972f42dfe03 100755
--- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -266,18 +266,14 @@ run_test()
"${base_time}" \
"${CYCLE_TIME_NS}" \
"${SHIFT_TIME_NS}" \
+ "${GATE_DURATION_NS}" \
"${NUM_PKTS}" \
"${STREAM_VID}" \
"${STREAM_PRIO}" \
"" \
"${isochron_dat}"
- # Count all received packets by looking at the non-zero RX timestamps
- received=$(isochron report \
- --input-file "${isochron_dat}" \
- --printf-format "%u\n" --printf-args "R" | \
- grep -w -v '0' | wc -l)
-
+ received=$(isochron_report_num_received "${isochron_dat}")
if [ "${received}" = "${expected}" ]; then
RET=0
else
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index eb83e7b48797..da3623c5e8a9 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -1,49 +1,239 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+import os
+import random, string, time
from lib.py import ksft_run, ksft_exit
-from lib.py import ksft_eq
-from lib.py import NetDrvEpEnv
+from lib.py import ksft_eq, KsftSkipEx, KsftFailEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
from lib.py import bkg, cmd, wait_port_listen, rand_port
+from lib.py import defer, ethtool, ip
+no_sleep=False
-def test_v4(cfg) -> None:
- cfg.require_v4()
+def _test_v4(cfg) -> None:
+ if not cfg.addr_v["4"]:
+ return
- cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}")
- cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
+ cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"])
+ cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["4"])
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
+def _test_v6(cfg) -> None:
+ if not cfg.addr_v["6"]:
+ return
-def test_v6(cfg) -> None:
- cfg.require_v6()
+ cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"])
+ cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote)
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["6"])
+ cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote)
- cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}")
- cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
-
-
-def test_tcp(cfg) -> None:
- cfg.require_cmd("socat", remote=True)
+def _test_tcp(cfg) -> None:
+ cfg.require_cmd("socat", local=False, remote=True)
port = rand_port()
listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
with bkg(listen_cmd, exit_wait=True) as nc:
wait_port_listen(port)
- cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
shell=True, host=cfg.remote)
- ksft_eq(nc.stdout.strip(), "ping")
+ ksft_eq(nc.stdout.strip(), test_string)
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
wait_port_listen(port, host=cfg.remote)
- cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
- ksft_eq(nc.stdout.strip(), "ping")
-
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
+ ksft_eq(nc.stdout.strip(), test_string)
+
+def _schedule_checksum_reset(cfg, netnl) -> None:
+ features = ethtool(f"-k {cfg.ifname}", json=True)
+ setting = ""
+ for side in ["tx", "rx"]:
+ f = features[0][side + "-checksumming"]
+ if not f["fixed"]:
+ setting += " " + side
+ setting += " " + ("on" if f["requested"] or f["active"] else "off")
+ defer(ethtool, f" -K {cfg.ifname} " + setting)
+
+def _set_offload_checksum(cfg, netnl, on) -> None:
+ try:
+ ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
+ except:
+ return
+
+def _set_xdp_generic_sb_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True)
+ defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off")
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_generic_mb_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+ ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog))
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off")
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_sb_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True)
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+ xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if xdp_info['xdp']['mode'] != 1:
+ """
+ If the interface doesn't support native-mode, it falls back to generic mode.
+ The mode value 1 is native and 2 is generic.
+ So it raises an exception if mode is not 1(native mode).
+ """
+ raise KsftSkipEx('device does not support native-XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_mb_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True)
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+ except Exception as e:
+ raise KsftSkipEx('device does not support native-multi-buffer XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_offload_on(cfg) -> None:
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True)
+ except Exception as e:
+ raise KsftSkipEx('device does not support offloaded XDP')
+ defer(ip, f"link set dev {cfg.ifname} xdpoffload off")
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def get_interface_info(cfg) -> None:
+ global no_sleep
+
+ if cfg.remote_ifname == "":
+ raise KsftFailEx('Can not get remote interface')
+ local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim":
+ no_sleep=True
+ if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth":
+ no_sleep=True
+
+def set_interface_init(cfg) -> None:
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+def test_default_v4(cfg, netnl) -> None:
+ cfg.require_ipver("4")
+
+ _schedule_checksum_reset(cfg, netnl)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_tcp(cfg)
+
+def test_default_v6(cfg, netnl) -> None:
+ cfg.require_ipver("6")
+
+ _schedule_checksum_reset(cfg, netnl)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_generic_sb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
+ _set_xdp_generic_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_generic_mb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
+ _set_xdp_generic_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_native_sb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
+ _set_xdp_native_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_native_mb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
+ _set_xdp_native_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_offload(cfg, netnl) -> None:
+ _set_xdp_offload_on(cfg)
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ get_interface_info(cfg)
+ set_interface_init(cfg)
+ ksft_run([test_default_v4,
+ test_default_v6,
+ test_xdp_generic_sb,
+ test_xdp_generic_mb,
+ test_xdp_native_sb,
+ test_xdp_native_mb,
+ test_xdp_offload],
+ args=(cfg, EthtoolFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py
new file mode 100755
index 000000000000..06559ef49b9a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/psp.py
@@ -0,0 +1,640 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Test suite for PSP capable drivers."""
+
+import errno
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import defer
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_true, ksft_eq, ksft_ne, ksft_gt, ksft_raises
+from lib.py import ksft_not_none
+from lib.py import KsftSkipEx
+from lib.py import NetDrvEpEnv, PSPFamily, NlError
+from lib.py import bkg, rand_port, wait_port_listen
+
+
+def _get_outq(s):
+ one = b'\0' * 4
+ outq = fcntl.ioctl(s.fileno(), termios.TIOCOUTQ, one)
+ return struct.unpack("I", outq)[0]
+
+
+def _send_with_ack(cfg, msg):
+ cfg.comm_sock.send(msg)
+ response = cfg.comm_sock.recv(4)
+ if response != b'ack\0':
+ raise RuntimeError("Unexpected server response", response)
+
+
+def _remote_read_len(cfg):
+ cfg.comm_sock.send(b'read len\0')
+ return int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8'))
+
+
+def _make_clr_conn(cfg, ipver=None):
+ _send_with_ack(cfg, b'conn clr\0')
+ remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr
+ s = socket.create_connection((remote_addr, cfg.comm_port), )
+ return s
+
+
+def _make_psp_conn(cfg, version=0, ipver=None):
+ _send_with_ack(cfg, b'conn psp\0' + struct.pack('BB', version, version))
+ remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr
+ s = socket.create_connection((remote_addr, cfg.comm_port), )
+ return s
+
+
+def _close_conn(cfg, s):
+ _send_with_ack(cfg, b'data close\0')
+ s.close()
+
+
+def _close_psp_conn(cfg, s):
+ _close_conn(cfg, s)
+
+
+def _spi_xchg(s, rx):
+ s.send(struct.pack('I', rx['spi']) + rx['key'])
+ tx = s.recv(4 + len(rx['key']))
+ return {
+ 'spi': struct.unpack('I', tx[:4])[0],
+ 'key': tx[4:]
+ }
+
+
+def _send_careful(cfg, s, rounds):
+ data = b'0123456789' * 200
+ for i in range(rounds):
+ n = 0
+ for _ in range(10): # allow 10 retries
+ try:
+ n += s.send(data[n:], socket.MSG_DONTWAIT)
+ if n == len(data):
+ break
+ except BlockingIOError:
+ time.sleep(0.05)
+ else:
+ rlen = _remote_read_len(cfg)
+ outq = _get_outq(s)
+ report = f'sent: {i * len(data) + n} remote len: {rlen} outq: {outq}'
+ raise RuntimeError(report)
+
+ return len(data) * rounds
+
+
+def _check_data_rx(cfg, exp_len):
+ read_len = -1
+ for _ in range(30):
+ cfg.comm_sock.send(b'read len\0')
+ read_len = int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8'))
+ if read_len == exp_len:
+ break
+ time.sleep(0.01)
+ ksft_eq(read_len, exp_len)
+
+
+def _check_data_outq(s, exp_len, force_wait=False):
+ outq = 0
+ for _ in range(10):
+ outq = _get_outq(s)
+ if not force_wait and outq == exp_len:
+ break
+ time.sleep(0.01)
+ ksft_eq(outq, exp_len)
+
+
+def _get_stat(cfg, key):
+ return cfg.pspnl.get_stats({'dev-id': cfg.psp_dev_id})[key]
+
+#
+# Test case boiler plate
+#
+
+def _init_psp_dev(cfg):
+ if not hasattr(cfg, 'psp_dev_id'):
+ # Figure out which local device we are testing against
+ for dev in cfg.pspnl.dev_get({}, dump=True):
+ if dev['ifindex'] == cfg.ifindex:
+ cfg.psp_info = dev
+ cfg.psp_dev_id = cfg.psp_info['id']
+ break
+ else:
+ raise KsftSkipEx("No PSP devices found")
+
+ # Enable PSP if necessary
+ cap = cfg.psp_info['psp-versions-cap']
+ ena = cfg.psp_info['psp-versions-ena']
+ if cap != ena:
+ cfg.pspnl.dev_set({'id': cfg.psp_dev_id, 'psp-versions-ena': cap})
+ defer(cfg.pspnl.dev_set, {'id': cfg.psp_dev_id,
+ 'psp-versions-ena': ena })
+
+#
+# Test cases
+#
+
+def dev_list_devices(cfg):
+ """ Dump all devices """
+ _init_psp_dev(cfg)
+
+ devices = cfg.pspnl.dev_get({}, dump=True)
+
+ found = False
+ for dev in devices:
+ found |= dev['id'] == cfg.psp_dev_id
+ ksft_true(found)
+
+
+def dev_get_device(cfg):
+ """ Get the device we intend to use """
+ _init_psp_dev(cfg)
+
+ dev = cfg.pspnl.dev_get({'id': cfg.psp_dev_id})
+ ksft_eq(dev['id'], cfg.psp_dev_id)
+
+
+def dev_get_device_bad(cfg):
+ """ Test getting device which doesn't exist """
+ raised = False
+ try:
+ cfg.pspnl.dev_get({'id': 1234567})
+ except NlError as e:
+ ksft_eq(e.nl_msg.error, -errno.ENODEV)
+ raised = True
+ ksft_true(raised)
+
+
+def dev_rotate(cfg):
+ """ Test key rotation """
+ _init_psp_dev(cfg)
+
+ prev_rotations = _get_stat(cfg, 'key-rotations')
+
+ rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+ ksft_eq(rot['id'], cfg.psp_dev_id)
+ rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+ ksft_eq(rot['id'], cfg.psp_dev_id)
+
+ cur_rotations = _get_stat(cfg, 'key-rotations')
+ ksft_eq(cur_rotations, prev_rotations + 2)
+
+
+def dev_rotate_spi(cfg):
+ """ Test key rotation and SPI check """
+ _init_psp_dev(cfg)
+
+ top_a = top_b = 0
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ assoc_a = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ top_a = assoc_a['rx-key']['spi'] >> 31
+ s.close()
+ rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ ksft_eq(rot['id'], cfg.psp_dev_id)
+ assoc_b = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ top_b = assoc_b['rx-key']['spi'] >> 31
+ s.close()
+ ksft_ne(top_a, top_b)
+
+
+def assoc_basic(cfg):
+ """ Test creating associations """
+ _init_psp_dev(cfg)
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ ksft_eq(assoc['dev-id'], cfg.psp_dev_id)
+ ksft_gt(assoc['rx-key']['spi'], 0)
+ ksft_eq(len(assoc['rx-key']['key']), 16)
+
+ assoc = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": assoc['rx-key'],
+ "sock-fd": s.fileno()})
+ ksft_eq(len(assoc), 0)
+ s.close()
+
+
+def assoc_bad_dev(cfg):
+ """ Test creating associations with bad device ID """
+ _init_psp_dev(cfg)
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id + 1234567,
+ "sock-fd": s.fileno()})
+ ksft_eq(cm.exception.nl_msg.error, -errno.ENODEV)
+
+
+def assoc_sk_only_conn(cfg):
+ """ Test creating associations based on socket """
+ _init_psp_dev(cfg)
+
+ with _make_clr_conn(cfg) as s:
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "sock-fd": s.fileno()})
+ ksft_eq(assoc['dev-id'], cfg.psp_dev_id)
+ cfg.pspnl.tx_assoc({"version": 0,
+ "tx-key": assoc['rx-key'],
+ "sock-fd": s.fileno()})
+ _close_conn(cfg, s)
+
+
+def assoc_sk_only_mismatch(cfg):
+ """ Test creating associations based on socket (dev mismatch) """
+ _init_psp_dev(cfg)
+
+ with _make_clr_conn(cfg) as s:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id + 1234567,
+ "sock-fd": s.fileno()})
+ the_exception = cm.exception
+ ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
+ ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_sk_only_mismatch_tx(cfg):
+ """ Test creating associations based on socket (dev mismatch) """
+ _init_psp_dev(cfg)
+
+ with _make_clr_conn(cfg) as s:
+ with ksft_raises(NlError) as cm:
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "sock-fd": s.fileno()})
+ cfg.pspnl.tx_assoc({"version": 0,
+ "tx-key": assoc['rx-key'],
+ "dev-id": cfg.psp_dev_id + 1234567,
+ "sock-fd": s.fileno()})
+ the_exception = cm.exception
+ ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
+ ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_sk_only_unconn(cfg):
+ """ Test creating associations based on socket (unconnected, should fail) """
+ _init_psp_dev(cfg)
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.rx_assoc({"version": 0,
+ "sock-fd": s.fileno()})
+ the_exception = cm.exception
+ ksft_eq(the_exception.nl_msg.extack['miss-type'], "dev-id")
+ ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_version_mismatch(cfg):
+ """ Test creating associations where Rx and Tx PSP versions do not match """
+ _init_psp_dev(cfg)
+
+ versions = list(cfg.psp_info['psp-versions-cap'])
+ if len(versions) < 2:
+ raise KsftSkipEx("Not enough PSP versions supported by the device for the test")
+
+ # Translate versions to integers
+ versions = [cfg.pspnl.consts["version"].entries[v].value for v in versions]
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ rx = cfg.pspnl.rx_assoc({"version": versions[0],
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+
+ for version in versions[1:]:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": version,
+ "tx-key": rx['rx-key'],
+ "sock-fd": s.fileno()})
+ the_exception = cm.exception
+ ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_twice(cfg):
+ """ Test reusing Tx assoc for two sockets """
+ _init_psp_dev(cfg)
+
+ def rx_assoc_check(s):
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ ksft_eq(assoc['dev-id'], cfg.psp_dev_id)
+ ksft_gt(assoc['rx-key']['spi'], 0)
+ ksft_eq(len(assoc['rx-key']['key']), 16)
+
+ return assoc
+
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ assoc = rx_assoc_check(s)
+ tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": assoc['rx-key'],
+ "sock-fd": s.fileno()})
+ ksft_eq(len(tx), 0)
+
+ # Use the same Tx assoc second time
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s2:
+ rx_assoc_check(s2)
+ tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": assoc['rx-key'],
+ "sock-fd": s2.fileno()})
+ ksft_eq(len(tx), 0)
+
+ s.close()
+
+
+def _data_basic_send(cfg, version, ipver):
+ """ Test basic data send """
+ _init_psp_dev(cfg)
+
+ # Version 0 is required by spec, don't let it skip
+ if version:
+ name = cfg.pspnl.consts["version"].entries_by_val[version].name
+ if name not in cfg.psp_info['psp-versions-cap']:
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ with ksft_raises(NlError) as cm:
+ cfg.pspnl.rx_assoc({"version": version,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
+ raise KsftSkipEx("PSP version not supported", name)
+
+ s = _make_psp_conn(cfg, version, ipver)
+
+ rx_assoc = cfg.pspnl.rx_assoc({"version": version,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ rx = rx_assoc['rx-key']
+ tx = _spi_xchg(s, rx)
+
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": version,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ data_len = _send_careful(cfg, s, 100)
+ _check_data_rx(cfg, data_len)
+ _close_psp_conn(cfg, s)
+
+
+def __bad_xfer_do(cfg, s, tx, version='hdr0-aes-gcm-128'):
+ # Make sure we accept the ACK for the SPI before we seal with the bad assoc
+ _check_data_outq(s, 0)
+
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": version,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ data_len = _send_careful(cfg, s, 20)
+ _check_data_outq(s, data_len, force_wait=True)
+ _check_data_rx(cfg, 0)
+ _close_psp_conn(cfg, s)
+
+
+def data_send_bad_key(cfg):
+ """ Test send data with bad key """
+ _init_psp_dev(cfg)
+
+ s = _make_psp_conn(cfg)
+
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ rx = rx_assoc['rx-key']
+ tx = _spi_xchg(s, rx)
+ tx['key'] = (tx['key'][0] ^ 0xff).to_bytes(1, 'little') + tx['key'][1:]
+ __bad_xfer_do(cfg, s, tx)
+
+
+def data_send_disconnect(cfg):
+ """ Test socket close after sending data """
+ _init_psp_dev(cfg)
+
+ with _make_psp_conn(cfg) as s:
+ assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "sock-fd": s.fileno()})
+ tx = _spi_xchg(s, assoc['rx-key'])
+ cfg.pspnl.tx_assoc({"version": 0,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ data_len = _send_careful(cfg, s, 100)
+ _check_data_rx(cfg, data_len)
+
+ s.shutdown(socket.SHUT_RDWR)
+ s.close()
+
+
+def _data_mss_adjust(cfg, ipver):
+ _init_psp_dev(cfg)
+
+ # First figure out what the MSS would be without any adjustments
+ s = _make_clr_conn(cfg, ipver)
+ s.send(b"0123456789abcdef" * 1024)
+ _check_data_rx(cfg, 16 * 1024)
+ mss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+ _close_conn(cfg, s)
+
+ s = _make_psp_conn(cfg, 0, ipver)
+ try:
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ rx = rx_assoc['rx-key']
+ tx = _spi_xchg(s, rx)
+
+ rxmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+ ksft_eq(mss, rxmss)
+
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+ ksft_eq(mss, txmss + 40)
+
+ data_len = _send_careful(cfg, s, 100)
+ _check_data_rx(cfg, data_len)
+ _check_data_outq(s, 0)
+
+ txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+ ksft_eq(mss, txmss + 40)
+ finally:
+ _close_psp_conn(cfg, s)
+
+
+def data_stale_key(cfg):
+ """ Test send on a double-rotated key """
+ _init_psp_dev(cfg)
+
+ prev_stale = _get_stat(cfg, 'stale-events')
+ s = _make_psp_conn(cfg)
+ try:
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ rx = rx_assoc['rx-key']
+ tx = _spi_xchg(s, rx)
+
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": tx,
+ "sock-fd": s.fileno()})
+
+ data_len = _send_careful(cfg, s, 100)
+ _check_data_rx(cfg, data_len)
+ _check_data_outq(s, 0)
+
+ cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+ cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+
+ cur_stale = _get_stat(cfg, 'stale-events')
+ ksft_gt(cur_stale, prev_stale)
+
+ s.send(b'0123456789' * 200)
+ _check_data_outq(s, 2000, force_wait=True)
+ finally:
+ _close_psp_conn(cfg, s)
+
+
+def __nsim_psp_rereg(cfg):
+ # The PSP dev ID will change, remember what was there before
+ before = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)])
+
+ cfg._ns.nsims[0].dfs_write('psp_rereg', '1')
+
+ after = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)])
+
+ new_devs = list(after - before)
+ ksft_eq(len(new_devs), 1)
+ cfg.psp_dev_id = list(after - before)[0]
+
+
+def removal_device_rx(cfg):
+ """ Test removing a netdev / PSD with active Rx assoc """
+
+ # We could technically devlink reload real devices, too
+ # but that kills the control socket. So test this on
+ # netdevsim only for now
+ cfg.require_nsim()
+
+ s = _make_clr_conn(cfg)
+ try:
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ ksft_not_none(rx_assoc)
+
+ __nsim_psp_rereg(cfg)
+ finally:
+ _close_conn(cfg, s)
+
+
+def removal_device_bi(cfg):
+ """ Test removing a netdev / PSD with active Rx/Tx assoc """
+
+ # We could technically devlink reload real devices, too
+ # but that kills the control socket. So test this on
+ # netdevsim only for now
+ cfg.require_nsim()
+
+ s = _make_clr_conn(cfg)
+ try:
+ rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+ "dev-id": cfg.psp_dev_id,
+ "sock-fd": s.fileno()})
+ cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+ "version": 0,
+ "tx-key": rx_assoc['rx-key'],
+ "sock-fd": s.fileno()})
+ __nsim_psp_rereg(cfg)
+ finally:
+ _close_conn(cfg, s)
+
+
+def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver):
+ """Build test cases for each combo of PSP version and IP version"""
+ def test_case(cfg):
+ cfg.require_ipver(ipver)
+ test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}"
+ test_func(cfg, psp_ver, ipver)
+ return test_case
+
+
+def ipver_test_builder(name, test_func, ipver):
+ """Build test cases for each IP version"""
+ def test_case(cfg):
+ cfg.require_ipver(ipver)
+ test_case.__name__ = f"{name}_ip{ipver}"
+ test_func(cfg, ipver)
+ return test_case
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.pspnl = PSPFamily()
+
+ # Set up responder and communication sock
+ responder = cfg.remote.deploy("psp_responder")
+
+ cfg.comm_port = rand_port()
+ srv = None
+ try:
+ with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote,
+ exit_wait=True) as srv:
+ wait_port_listen(cfg.comm_port, host=cfg.remote)
+
+ cfg.comm_sock = socket.create_connection((cfg.remote_addr,
+ cfg.comm_port),
+ timeout=1)
+
+ cases = [
+ psp_ip_ver_test_builder(
+ "data_basic_send", _data_basic_send, version, ipver
+ )
+ for version in range(0, 4)
+ for ipver in ("4", "6")
+ ]
+ cases += [
+ ipver_test_builder("data_mss_adjust", _data_mss_adjust, ipver)
+ for ipver in ("4", "6")
+ ]
+
+ ksft_run(cases=cases, globs=globals(),
+ case_pfx={"dev_", "data_", "assoc_", "removal_"},
+ args=(cfg, ))
+
+ cfg.comm_sock.send(b"exit\0")
+ cfg.comm_sock.close()
+ finally:
+ if srv and (srv.stdout or srv.stderr):
+ ksft_pr("")
+ ksft_pr(f"Responder logs ({srv.ret}):")
+ if srv and srv.stdout:
+ ksft_pr("STDOUT:\n# " + srv.stdout.strip().replace("\n", "\n# "))
+ if srv and srv.stderr:
+ ksft_pr("STDERR:\n# " + srv.stderr.strip().replace("\n", "\n# "))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c
new file mode 100644
index 000000000000..f309e0d73cbf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/psp_responder.c
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <unistd.h>
+
+#include <ynl.h>
+
+#include "psp-user.h"
+
+#define dbg(msg...) \
+do { \
+ if (opts->verbose) \
+ fprintf(stderr, "DEBUG: " msg); \
+} while (0)
+
+static bool should_quit;
+
+struct opts {
+ int port;
+ int devid;
+ bool verbose;
+};
+
+enum accept_cfg {
+ ACCEPT_CFG_NONE = 0,
+ ACCEPT_CFG_CLEAR,
+ ACCEPT_CFG_PSP,
+};
+
+static struct {
+ unsigned char tx;
+ unsigned char rx;
+} psp_vers;
+
+static int conn_setup_psp(struct ynl_sock *ys, struct opts *opts, int data_sock)
+{
+ struct psp_rx_assoc_rsp *rsp;
+ struct psp_rx_assoc_req *req;
+ struct psp_tx_assoc_rsp *tsp;
+ struct psp_tx_assoc_req *teq;
+ char info[300];
+ int key_len;
+ ssize_t sz;
+ __u32 spi;
+
+ dbg("create PSP connection\n");
+
+ // Rx assoc alloc
+ req = psp_rx_assoc_req_alloc();
+
+ psp_rx_assoc_req_set_sock_fd(req, data_sock);
+ psp_rx_assoc_req_set_version(req, psp_vers.rx);
+
+ rsp = psp_rx_assoc(ys, req);
+ psp_rx_assoc_req_free(req);
+
+ if (!rsp) {
+ perror("ERROR: failed to Rx assoc");
+ return -1;
+ }
+
+ // SPI exchange
+ key_len = rsp->rx_key._len.key;
+ memcpy(info, &rsp->rx_key.spi, sizeof(spi));
+ memcpy(&info[sizeof(spi)], rsp->rx_key.key, key_len);
+ sz = sizeof(spi) + key_len;
+
+ send(data_sock, info, sz, MSG_WAITALL);
+ psp_rx_assoc_rsp_free(rsp);
+
+ sz = recv(data_sock, info, sz, MSG_WAITALL);
+ if (sz < 0) {
+ perror("ERROR: failed to read PSP key from sock");
+ return -1;
+ }
+ memcpy(&spi, info, sizeof(spi));
+
+ // Setup Tx assoc
+ teq = psp_tx_assoc_req_alloc();
+
+ psp_tx_assoc_req_set_sock_fd(teq, data_sock);
+ psp_tx_assoc_req_set_version(teq, psp_vers.tx);
+ psp_tx_assoc_req_set_tx_key_spi(teq, spi);
+ psp_tx_assoc_req_set_tx_key_key(teq, &info[sizeof(spi)], key_len);
+
+ tsp = psp_tx_assoc(ys, teq);
+ psp_tx_assoc_req_free(teq);
+ if (!tsp) {
+ perror("ERROR: failed to Tx assoc");
+ return -1;
+ }
+ psp_tx_assoc_rsp_free(tsp);
+
+ return 0;
+}
+
+static void send_ack(int sock)
+{
+ send(sock, "ack", 4, MSG_WAITALL);
+}
+
+static void send_err(int sock)
+{
+ send(sock, "err", 4, MSG_WAITALL);
+}
+
+static void send_str(int sock, int value)
+{
+ char buf[128];
+ int ret;
+
+ ret = snprintf(buf, sizeof(buf), "%d", value);
+ send(sock, buf, ret + 1, MSG_WAITALL);
+}
+
+static void
+run_session(struct ynl_sock *ys, struct opts *opts,
+ int server_sock, int comm_sock)
+{
+ enum accept_cfg accept_cfg = ACCEPT_CFG_NONE;
+ struct pollfd pfds[3];
+ size_t data_read = 0;
+ int data_sock = -1;
+
+ while (true) {
+ bool race_close = false;
+ int nfds;
+
+ memset(pfds, 0, sizeof(pfds));
+
+ pfds[0].fd = server_sock;
+ pfds[0].events = POLLIN;
+
+ pfds[1].fd = comm_sock;
+ pfds[1].events = POLLIN;
+
+ nfds = 2;
+ if (data_sock >= 0) {
+ pfds[2].fd = data_sock;
+ pfds[2].events = POLLIN;
+ nfds++;
+ }
+
+ dbg(" ...\n");
+ if (poll(pfds, nfds, -1) < 0) {
+ perror("poll");
+ break;
+ }
+
+ /* data sock */
+ if (pfds[2].revents & POLLIN) {
+ char buf[8192];
+ ssize_t n;
+
+ n = recv(data_sock, buf, sizeof(buf), 0);
+ if (n <= 0) {
+ if (n < 0)
+ perror("data read");
+ close(data_sock);
+ data_sock = -1;
+ dbg("data sock closed\n");
+ } else {
+ data_read += n;
+ dbg("data read %zd\n", data_read);
+ }
+ }
+
+ /* comm sock */
+ if (pfds[1].revents & POLLIN) {
+ static char buf[4096];
+ static ssize_t off;
+ bool consumed;
+ ssize_t n;
+
+ n = recv(comm_sock, &buf[off], sizeof(buf) - off, 0);
+ if (n <= 0) {
+ if (n < 0)
+ perror("comm read");
+ return;
+ }
+
+ off += n;
+ n = off;
+
+#define __consume(sz) \
+ ({ \
+ if (n == (sz)) { \
+ off = 0; \
+ } else { \
+ off -= (sz); \
+ memmove(buf, &buf[(sz)], off); \
+ } \
+ })
+
+#define cmd(_name) \
+ ({ \
+ ssize_t sz = sizeof(_name); \
+ bool match = n >= sz && !memcmp(buf, _name, sz); \
+ \
+ if (match) { \
+ dbg("command: " _name "\n"); \
+ __consume(sz); \
+ } \
+ consumed |= match; \
+ match; \
+ })
+
+ do {
+ consumed = false;
+
+ if (cmd("read len"))
+ send_str(comm_sock, data_read);
+
+ if (cmd("data echo")) {
+ if (data_sock >= 0)
+ send(data_sock, "echo", 5,
+ MSG_WAITALL);
+ else
+ fprintf(stderr, "WARN: echo but no data sock\n");
+ send_ack(comm_sock);
+ }
+ if (cmd("data close")) {
+ if (data_sock >= 0) {
+ close(data_sock);
+ data_sock = -1;
+ send_ack(comm_sock);
+ } else {
+ race_close = true;
+ }
+ }
+ if (cmd("conn psp")) {
+ if (accept_cfg != ACCEPT_CFG_NONE)
+ fprintf(stderr, "WARN: old conn config still set!\n");
+ accept_cfg = ACCEPT_CFG_PSP;
+ send_ack(comm_sock);
+ /* next two bytes are versions */
+ if (off >= 2) {
+ memcpy(&psp_vers, buf, 2);
+ __consume(2);
+ } else {
+ fprintf(stderr, "WARN: short conn psp command!\n");
+ }
+ }
+ if (cmd("conn clr")) {
+ if (accept_cfg != ACCEPT_CFG_NONE)
+ fprintf(stderr, "WARN: old conn config still set!\n");
+ accept_cfg = ACCEPT_CFG_CLEAR;
+ send_ack(comm_sock);
+ }
+ if (cmd("exit"))
+ should_quit = true;
+#undef cmd
+
+ if (!consumed) {
+ fprintf(stderr, "WARN: unknown cmd: [%zd] %s\n",
+ off, buf);
+ }
+ } while (consumed && off);
+ }
+
+ /* server sock */
+ if (pfds[0].revents & POLLIN) {
+ if (data_sock >= 0) {
+ fprintf(stderr, "WARN: new data sock but old one still here\n");
+ close(data_sock);
+ data_sock = -1;
+ }
+ data_sock = accept(server_sock, NULL, NULL);
+ if (data_sock < 0) {
+ perror("accept");
+ continue;
+ }
+ data_read = 0;
+
+ if (accept_cfg == ACCEPT_CFG_CLEAR) {
+ dbg("new data sock: clear\n");
+ /* nothing to do */
+ } else if (accept_cfg == ACCEPT_CFG_PSP) {
+ dbg("new data sock: psp\n");
+ conn_setup_psp(ys, opts, data_sock);
+ } else {
+ fprintf(stderr, "WARN: new data sock but no config\n");
+ }
+ accept_cfg = ACCEPT_CFG_NONE;
+ }
+
+ if (race_close) {
+ if (data_sock >= 0) {
+ /* indeed, ordering problem, handle the close */
+ close(data_sock);
+ data_sock = -1;
+ send_ack(comm_sock);
+ } else {
+ fprintf(stderr, "WARN: close but no data sock\n");
+ send_err(comm_sock);
+ }
+ }
+ }
+ dbg("session ending\n");
+}
+
+static int spawn_server(struct opts *opts)
+{
+ struct sockaddr_in6 addr;
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("can't open socket");
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_addr = in6addr_any;
+ addr.sin6_port = htons(opts->port);
+
+ if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) {
+ perror("can't bind socket");
+ return -1;
+ }
+
+ if (listen(fd, 5)) {
+ perror("can't listen");
+ return -1;
+ }
+
+ return fd;
+}
+
+static int run_responder(struct ynl_sock *ys, struct opts *opts)
+{
+ int server_sock, comm;
+
+ server_sock = spawn_server(opts);
+ if (server_sock < 0)
+ return 4;
+
+ while (!should_quit) {
+ comm = accept(server_sock, NULL, NULL);
+ if (comm < 0) {
+ perror("accept failed");
+ } else {
+ run_session(ys, opts, server_sock, comm);
+ close(comm);
+ }
+ }
+
+ return 0;
+}
+
+static void usage(const char *name, const char *miss)
+{
+ if (miss)
+ fprintf(stderr, "Missing argument: %s\n", miss);
+
+ fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name);
+ exit(EXIT_FAILURE);
+}
+
+static void parse_cmd_opts(int argc, char **argv, struct opts *opts)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "vp:d:")) != -1) {
+ switch (opt) {
+ case 'v':
+ opts->verbose = 1;
+ break;
+ case 'p':
+ opts->port = atoi(optarg);
+ break;
+ case 'd':
+ opts->devid = atoi(optarg);
+ break;
+ default:
+ usage(argv[0], NULL);
+ }
+ }
+}
+
+static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions)
+{
+ struct psp_dev_set_req *sreq;
+ struct psp_dev_set_rsp *srsp;
+
+ fprintf(stderr, "Set PSP enable on device %d to 0x%x\n",
+ dev_id, versions);
+
+ sreq = psp_dev_set_req_alloc();
+
+ psp_dev_set_req_set_id(sreq, dev_id);
+ psp_dev_set_req_set_psp_versions_ena(sreq, versions);
+
+ srsp = psp_dev_set(ys, sreq);
+ psp_dev_set_req_free(sreq);
+ if (!srsp)
+ return 10;
+
+ psp_dev_set_rsp_free(srsp);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct psp_dev_get_list *dev_list;
+ bool devid_found = false;
+ __u32 ver_ena, ver_cap;
+ struct opts opts = {};
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int first_id = 0;
+ int ret;
+
+ parse_cmd_opts(argc, argv, &opts);
+ if (!opts.port)
+ usage(argv[0], "port"); // exits
+
+ ys = ynl_sock_create(&ynl_psp_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ dev_list = psp_dev_get_dump(ys);
+ if (ynl_dump_empty(dev_list)) {
+ if (ys->err.code)
+ goto err_close;
+ fprintf(stderr, "No PSP devices\n");
+ goto err_close_silent;
+ }
+
+ ynl_dump_foreach(dev_list, d) {
+ if (opts.devid) {
+ devid_found = true;
+ ver_ena = d->psp_versions_ena;
+ ver_cap = d->psp_versions_cap;
+ } else if (!first_id) {
+ first_id = d->id;
+ ver_ena = d->psp_versions_ena;
+ ver_cap = d->psp_versions_cap;
+ } else {
+ fprintf(stderr, "Multiple PSP devices found\n");
+ goto err_close_silent;
+ }
+ }
+ psp_dev_get_list_free(dev_list);
+
+ if (opts.devid && !devid_found) {
+ fprintf(stderr, "PSP device %d requested on cmdline, not found\n",
+ opts.devid);
+ goto err_close_silent;
+ } else if (!opts.devid) {
+ opts.devid = first_id;
+ }
+
+ if (ver_ena != ver_cap) {
+ ret = psp_dev_set_ena(ys, opts.devid, ver_cap);
+ if (ret)
+ goto err_close;
+ }
+
+ ret = run_responder(ys, &opts);
+
+ if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena))
+ fprintf(stderr, "WARN: failed to set the PSP versions back\n");
+
+ ynl_sock_destroy(ys);
+
+ return ret;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_close_silent:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 38303da957ee..236005290a33 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -2,13 +2,15 @@
# SPDX-License-Identifier: GPL-2.0
from lib.py import ksft_disruptive, ksft_exit, ksft_run
-from lib.py import ksft_eq, ksft_raises, KsftSkipEx
+from lib.py import ksft_eq, ksft_not_in, ksft_raises, KsftSkipEx, KsftFailEx
from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import NetDrvEnv
-from lib.py import cmd, defer, ip
+from lib.py import bkg, cmd, defer, ip
import errno
import glob
-
+import os
+import socket
+import struct
def sys_get_queues(ifname, qtype='rx') -> int:
folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
@@ -22,6 +24,40 @@ def nl_get_queues(cfg, nl, qtype='rx'):
return None
+def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
+ # Probe for support
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+ if xdp.ret == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.ret > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
+ ksft_wait=3):
+
+ rx = tx = False
+
+ queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
+ if not queues:
+ raise KsftSkipEx("Netlink reports no queues")
+
+ for q in queues:
+ if q['id'] == 0:
+ if q['type'] == 'rx':
+ rx = True
+ if q['type'] == 'tx':
+ tx = True
+
+ ksft_eq(q.get('xsk', None), {},
+ comment="xsk attr on queue we configured")
+ else:
+ ksft_not_in('xsk', q,
+ comment="xsk attr on queue we didn't configure")
+
+ ksft_eq(rx, True)
+ ksft_eq(tx, True)
+
+
def get_queues(cfg, nl) -> None:
snl = NetdevFamily(recv_size=4096)
@@ -45,10 +81,9 @@ def addremove_queues(cfg, nl) -> None:
netnl = EthtoolFamily()
channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
- if channels['combined-count'] == 0:
- rx_type = 'rx'
- else:
- rx_type = 'combined'
+ rx_type = 'rx'
+ if channels.get('combined-count', 0) > 0:
+ rx_type = 'combined'
expected = curr_queues - 1
cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
@@ -81,7 +116,8 @@ def check_down(cfg, nl) -> None:
def main() -> None:
with NetDrvEnv(__file__, queue_count=100) as cfg:
- ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily()))
+ ksft_run([get_queues, addremove_queues, check_down, check_xsk],
+ args=(cfg, NetdevFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py
new file mode 100755
index 000000000000..f9530a8b0856
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ring_reconfig.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test channel and ring size configuration via ethtool (-L / -G).
+"""
+
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_eq
+from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic
+from lib.py import defer, NlError
+
+
+def channels(cfg) -> None:
+ """
+ Twiddle channel counts in various combinations of parameters.
+ We're only looking for driver adhering to the requested config
+ if the config is accepted and crashes.
+ """
+ ehdr = {'header':{'dev-index': cfg.ifindex}}
+ chans = cfg.eth.channels_get(ehdr)
+
+ all_keys = ["rx", "tx", "combined"]
+ mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"},
+ {"rx", "tx", "combined"},]
+
+ # Get the set of keys that device actually supports
+ restore = {}
+ supported = set()
+ for key in all_keys:
+ if key + "-max" in chans:
+ supported.add(key)
+ restore |= {key + "-count": chans[key + "-count"]}
+
+ defer(cfg.eth.channels_set, ehdr | restore)
+
+ def test_config(config):
+ try:
+ cfg.eth.channels_set(ehdr | config)
+ get = cfg.eth.channels_get(ehdr)
+ for k, v in config.items():
+ ksft_eq(get.get(k, 0), v)
+ except NlError as e:
+ failed.append(mix)
+ ksft_pr("Can't set", config, e)
+ else:
+ ksft_pr("Okay", config)
+
+ failed = []
+ for mix in mixes:
+ if not mix.issubset(supported):
+ continue
+
+ # Set all the values in the mix to 1, other supported to 0
+ config = {}
+ for key in all_keys:
+ config[key + "-count"] = 1 if key in mix else 0
+ test_config(config)
+
+ for mix in mixes:
+ if not mix.issubset(supported):
+ continue
+ if mix in failed:
+ continue
+
+ # Set all the values in the mix to max, other supported to 0
+ config = {}
+ for key in all_keys:
+ config[key + "-count"] = chans[key + '-max'] if key in mix else 0
+ test_config(config)
+
+
+def _configure_min_ring_cnt(cfg) -> None:
+ """ Try to configure a single Rx/Tx ring. """
+ ehdr = {'header':{'dev-index': cfg.ifindex}}
+ chans = cfg.eth.channels_get(ehdr)
+
+ all_keys = ["rx-count", "tx-count", "combined-count"]
+ restore = {}
+ config = {}
+ for key in all_keys:
+ if key in chans:
+ restore[key] = chans[key]
+ config[key] = 0
+
+ if chans.get('combined-count', 0) > 1:
+ config['combined-count'] = 1
+ elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1:
+ config['tx-count'] = 1
+ config['rx-count'] = 1
+ else:
+ # looks like we're already on 1 channel
+ return
+
+ cfg.eth.channels_set(ehdr | config)
+ defer(cfg.eth.channels_set, ehdr | restore)
+
+
+def ringparam(cfg) -> None:
+ """
+ Tweak the ringparam configuration. Try to run some traffic over min
+ ring size to make sure it actually functions.
+ """
+ ehdr = {'header':{'dev-index': cfg.ifindex}}
+ rings = cfg.eth.rings_get(ehdr)
+
+ restore = {}
+ maxes = {}
+ params = set()
+ for key in rings.keys():
+ if 'max' in key:
+ param = key[:-4]
+ maxes[param] = rings[key]
+ params.add(param)
+ restore[param] = rings[param]
+
+ defer(cfg.eth.rings_set, ehdr | restore)
+
+ # Speed up the reconfig by configuring just one ring
+ _configure_min_ring_cnt(cfg)
+
+ # Try to reach min on all settings
+ for param in params:
+ val = rings[param]
+ while True:
+ try:
+ cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex},
+ param: val // 2})
+ if val == 0:
+ break
+ val //= 2
+ except NlError:
+ break
+
+ get = cfg.eth.rings_get(ehdr)
+ ksft_eq(get[param], val)
+
+ ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})")
+
+ GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+ # Try max across all params, if the driver supports large rings
+ # this may OOM so we ignore errors
+ try:
+ ksft_pr("Applying max settings")
+ config = {p: maxes[p] for p in params}
+ cfg.eth.rings_set(ehdr | config)
+ except NlError as e:
+ ksft_pr("Can't set max params", config, e)
+ else:
+ GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.eth = EthtoolFamily()
+
+ ksft_run([channels,
+ ringparam],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index efcc1e10575b..b08e4d48b15c 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -1,12 +1,16 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+"""
+Tests related to standard netdevice statistics.
+"""
+
import errno
import subprocess
import time
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
-from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import KsftSkipEx, KsftFailEx
from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
@@ -18,13 +22,16 @@ rtnl = RtnlFamily()
def check_pause(cfg) -> None:
- global ethnl
+ """
+ Check that drivers which support Pause config also report standard
+ pause stats.
+ """
try:
ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftXfailEx("pause not supported by the device")
+ raise KsftSkipEx("pause not supported by the device") from e
raise
data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
@@ -33,13 +40,16 @@ def check_pause(cfg) -> None:
def check_fec(cfg) -> None:
- global ethnl
+ """
+ Check that drivers which support FEC config also report standard
+ FEC stats.
+ """
try:
ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftXfailEx("FEC not supported by the device")
+ raise KsftSkipEx("FEC not supported by the device") from e
raise
data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
@@ -47,16 +57,48 @@ def check_fec(cfg) -> None:
ksft_true(data['stats'], "driver does not report stats")
+def check_fec_hist(cfg) -> None:
+ """
+ Check that drivers which support FEC histogram statistics report
+ reasonable values.
+ """
+
+ try:
+ data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
+ "flags": {'stats'}}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("FEC not supported by the device") from e
+ raise
+ if 'stats' not in data:
+ raise KsftSkipEx("FEC stats not supported by the device")
+ if 'hist' not in data['stats']:
+ raise KsftSkipEx("FEC histogram not supported by the device")
+
+ hist = data['stats']['hist']
+ for fec_bin in hist:
+ for key in ['bin-low', 'bin-high', 'bin-val']:
+ ksft_in(key, fec_bin,
+ "Drivers should always report FEC bin range and value")
+ ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'],
+ "FEC bin range should be valid")
+ if 'bin-val-per-lane' in fec_bin:
+ ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'],
+ "FEC bin value should be equal to sum of per-plane values")
+
+
def pkt_byte_sum(cfg) -> None:
- global netfam, rtnl
+ """
+ Check that qstat and interface stats match in value.
+ """
def get_qstat(test):
- global netfam
stats = netfam.qstats_get({}, dump=True)
if stats:
for qs in stats:
if qs["ifindex"]== test.ifindex:
return qs
+ return None
qstat = get_qstat(cfg)
if qstat is None:
@@ -77,15 +119,14 @@ def pkt_byte_sum(cfg) -> None:
for _ in range(10):
rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
if stat_cmp(rtstat, qstat) < 0:
- raise Exception("RTNL stats are lower, fetched later")
+ raise KsftFailEx("RTNL stats are lower, fetched later")
qstat = get_qstat(cfg)
if stat_cmp(rtstat, qstat) > 0:
- raise Exception("Qstats are lower, fetched later")
+ raise KsftFailEx("Qstats are lower, fetched later")
def qstat_by_ifindex(cfg) -> None:
- global netfam
- global rtnl
+ """ Qstats Netlink API tests - querying by ifindex. """
# Construct a map ifindex -> [dump, by-index, dump]
ifindexes = {}
@@ -93,7 +134,7 @@ def qstat_by_ifindex(cfg) -> None:
for entry in stats:
ifindexes[entry['ifindex']] = [entry, None, None]
- for ifindex in ifindexes.keys():
+ for ifindex in ifindexes:
entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
ksft_eq(len(entry), 1)
ifindexes[entry[0]['ifindex']][1] = entry[0]
@@ -145,7 +186,7 @@ def qstat_by_ifindex(cfg) -> None:
# Try to get stats for lowest unused ifindex but not 0
devs = rtnl.getlink({}, dump=True)
- all_ifindexes = set([dev["ifi-index"] for dev in devs])
+ all_ifindexes = set(dev["ifi-index"] for dev in devs)
lowest = 2
while lowest in all_ifindexes:
lowest += 1
@@ -158,18 +199,20 @@ def qstat_by_ifindex(cfg) -> None:
@ksft_disruptive
def check_down(cfg) -> None:
+ """ Test statistics (interface and qstat) are not impacted by ifdown """
+
try:
qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftSkipEx("qstats not supported by the device")
+ raise KsftSkipEx("qstats not supported by the device") from e
raise
ip(f"link set dev {cfg.dev['ifname']} down")
defer(ip, f"link set dev {cfg.dev['ifname']} up")
qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
- for k, v in qstat.items():
+ for k in qstat:
ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
# exercise per-queue API to make sure that "device down" state
@@ -220,14 +263,15 @@ def procfs_downup_hammer(cfg) -> None:
Reading stats via procfs only holds the RCU lock, drivers often try
to sleep when reading the stats, or don't protect against races.
"""
- # Max out the queues, we'll flip between max and 1
+ # Set a large number of queues,
+ # we'll flip between min(max_queues, 64) and 1
channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
if channels['combined-count'] == 0:
rx_type = 'rx'
else:
rx_type = 'combined'
cur_queue_cnt = channels[f'{rx_type}-count']
- max_queue_cnt = channels[f'{rx_type}-max']
+ max_queue_cnt = min(channels[f'{rx_type}-max'], 64)
cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
@@ -263,9 +307,12 @@ def procfs_downup_hammer(cfg) -> None:
def main() -> None:
+ """ Ksft boiler plate main """
+
with NetDrvEnv(__file__, queue_count=100) as cfg:
- ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
- check_down, procfs_hammer, procfs_downup_hammer],
+ ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum,
+ qstat_by_ifindex, check_down, procfs_hammer,
+ procfs_downup_hammer],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index 2d5a76d99181..1340b3df9c31 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -1,11 +1,18 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for net selftests
-TEST_PROGS := dev_addr_lists.sh
+TEST_PROGS := \
+ dev_addr_lists.sh \
+ options.sh \
+ propagation.sh \
+# end of TEST_PROGS
TEST_INCLUDES := \
../bonding/lag_lib.sh \
../../../net/forwarding/lib.sh \
- ../../../net/lib.sh
+ ../../../net/in_netns.sh \
+ ../../../net/lib.sh \
+ ../../../net/lib/sh/defer.sh \
+# end of TEST_INCLUDES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
index b5e3a3aad4bf..558e1d0cf565 100644
--- a/tools/testing/selftests/drivers/net/team/config
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -1,5 +1,7 @@
CONFIG_DUMMY=y
CONFIG_IPV6=y
CONFIG_MACVLAN=y
+CONFIG_NETDEVSIM=m
CONFIG_NET_TEAM=y
+CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y
CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh
new file mode 100755
index 000000000000..44888f32b513
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/options.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# These tests verify basic set and get functionality of the team
+# driver options over netlink.
+
+# Run in private netns.
+test_dir="$(dirname "$0")"
+if [[ $# -eq 0 ]]; then
+ "${test_dir}"/../../../net/in_netns.sh "$0" __subprocess
+ exit $?
+fi
+
+ALL_TESTS="
+ team_test_options
+"
+
+source "${test_dir}/../../../net/lib.sh"
+
+TEAM_PORT="team0"
+MEMBER_PORT="dummy0"
+
+setup()
+{
+ ip link add name "${MEMBER_PORT}" type dummy
+ ip link add name "${TEAM_PORT}" type team
+}
+
+get_and_check_value()
+{
+ local option_name="$1"
+ local expected_value="$2"
+ local port_flag="$3"
+
+ local value_from_get
+
+ if ! value_from_get=$(teamnl "${TEAM_PORT}" getoption "${option_name}" \
+ "${port_flag}"); then
+ echo "Could not get option '${option_name}'" >&2
+ return 1
+ fi
+
+ if [[ "${value_from_get}" != "${expected_value}" ]]; then
+ echo "Incorrect value for option '${option_name}'" >&2
+ echo "get (${value_from_get}) != set (${expected_value})" >&2
+ return 1
+ fi
+}
+
+set_and_check_get()
+{
+ local option_name="$1"
+ local option_value="$2"
+ local port_flag="$3"
+
+ local value_from_get
+
+ if ! teamnl "${TEAM_PORT}" setoption "${option_name}" \
+ "${option_value}" "${port_flag}"; then
+ echo "'setoption ${option_name} ${option_value}' failed" >&2
+ return 1
+ fi
+
+ get_and_check_value "${option_name}" "${option_value}" "${port_flag}"
+ return $?
+}
+
+# Get a "port flag" to pass to the `teamnl` command.
+# E.g. $1="dummy0" -> "port=dummy0",
+# $1="" -> ""
+get_port_flag()
+{
+ local port_name="$1"
+
+ if [[ -n "${port_name}" ]]; then
+ echo "--port=${port_name}"
+ fi
+}
+
+attach_port_if_specified()
+{
+ local port_name="$1"
+
+ if [[ -n "${port_name}" ]]; then
+ ip link set dev "${port_name}" master "${TEAM_PORT}"
+ return $?
+ fi
+}
+
+detach_port_if_specified()
+{
+ local port_name="$1"
+
+ if [[ -n "${port_name}" ]]; then
+ ip link set dev "${port_name}" nomaster
+ return $?
+ fi
+}
+
+# Test that an option's get value matches its set value.
+# Globals:
+# RET - Used by testing infra like `check_err`.
+# EXIT_STATUS - Used by `log_test` for whole script exit value.
+# Arguments:
+# option_name - The name of the option.
+# value_1 - The first value to try setting.
+# value_2 - The second value to try setting.
+# port_name - The (optional) name of the attached port.
+team_test_option()
+{
+ local option_name="$1"
+ local value_1="$2"
+ local value_2="$3"
+ local possible_values="$2 $3 $2"
+ local port_name="$4"
+ local port_flag
+
+ RET=0
+
+ echo "Setting '${option_name}' to '${value_1}' and '${value_2}'"
+
+ attach_port_if_specified "${port_name}"
+ check_err $? "Couldn't attach ${port_name} to master"
+ port_flag=$(get_port_flag "${port_name}")
+
+ # Set and get both possible values.
+ for value in ${possible_values}; do
+ set_and_check_get "${option_name}" "${value}" "${port_flag}"
+ check_err $? "Failed to set '${option_name}' to '${value}'"
+ done
+
+ detach_port_if_specified "${port_name}"
+ check_err $? "Couldn't detach ${port_name} from its master"
+
+ log_test "Set + Get '${option_name}' test"
+}
+
+# Test that getting a non-existant option fails.
+# Globals:
+# RET - Used by testing infra like `check_err`.
+# EXIT_STATUS - Used by `log_test` for whole script exit value.
+# Arguments:
+# option_name - The name of the option.
+# port_name - The (optional) name of the attached port.
+team_test_get_option_fails()
+{
+ local option_name="$1"
+ local port_name="$2"
+ local port_flag
+
+ RET=0
+
+ attach_port_if_specified "${port_name}"
+ check_err $? "Couldn't attach ${port_name} to master"
+ port_flag=$(get_port_flag "${port_name}")
+
+ # Just confirm that getting the value fails.
+ teamnl "${TEAM_PORT}" getoption "${option_name}" "${port_flag}"
+ check_fail $? "Shouldn't be able to get option '${option_name}'"
+
+ detach_port_if_specified "${port_name}"
+
+ log_test "Get '${option_name}' fails"
+}
+
+team_test_options()
+{
+ # Wrong option name behavior.
+ team_test_get_option_fails fake_option1
+ team_test_get_option_fails fake_option2 "${MEMBER_PORT}"
+
+ # Correct set and get behavior.
+ team_test_option mode activebackup loadbalance
+ team_test_option notify_peers_count 0 5
+ team_test_option notify_peers_interval 0 5
+ team_test_option mcast_rejoin_count 0 5
+ team_test_option mcast_rejoin_interval 0 5
+ team_test_option enabled true false "${MEMBER_PORT}"
+ team_test_option user_linkup true false "${MEMBER_PORT}"
+ team_test_option user_linkup_enabled true false "${MEMBER_PORT}"
+ team_test_option priority 10 20 "${MEMBER_PORT}"
+ team_test_option queue_id 0 1 "${MEMBER_PORT}"
+}
+
+require_command teamnl
+setup
+tests_run
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/team/propagation.sh b/tools/testing/selftests/drivers/net/team/propagation.sh
new file mode 100755
index 000000000000..4bea75b79878
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/propagation.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+NSIM_LRO_ID=$((256 + RANDOM % 256))
+NSIM_LRO_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_LRO_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+
+cleanup()
+{
+ set +e
+ ip link del dummyteam &>/dev/null
+ ip link del team0 &>/dev/null
+ echo $NSIM_LRO_ID > $NSIM_DEV_SYS_DEL
+ modprobe -r netdevsim
+}
+
+# Trigger LRO propagation to the lower.
+# https://lore.kernel.org/netdev/aBvOpkIoxcr9PfDg@mini-arch/
+team_lro()
+{
+ # using netdevsim because it supports NETIF_F_LRO
+ NSIM_LRO_NAME=$(find $NSIM_LRO_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_LRO_SYS/net -exec basename {} \;)
+
+ ip link add name team0 type team
+ ip link set $NSIM_LRO_NAME down
+ ip link set dev $NSIM_LRO_NAME master team0
+ ip link set team0 up
+ ethtool -K team0 large-receive-offload off
+
+ ip link del team0
+}
+
+# Trigger promisc propagation to the lower during IFLA_MASTER.
+# https://lore.kernel.org/netdev/20250506032328.3003050-1-sdf@fomichev.me/
+team_promisc()
+{
+ ip link add name dummyteam type dummy
+ ip link add name team0 type team
+ ip link set dummyteam down
+ ip link set team0 promisc on
+ ip link set dev dummyteam master team0
+ ip link set team0 up
+
+ ip link del team0
+ ip link del dummyteam
+}
+
+# Trigger promisc propagation to the lower via netif_change_flags (aka
+# ndo_change_rx_flags).
+# https://lore.kernel.org/netdev/20250514220319.3505158-1-stfomichev@gmail.com/
+team_change_flags()
+{
+ ip link add name dummyteam type dummy
+ ip link add name team0 type team
+ ip link set dummyteam down
+ ip link set dev dummyteam master team0
+ ip link set team0 up
+ ip link set team0 promisc on
+
+ # Make sure we can add more L2 addresses without any issues.
+ ip link add link team0 address 00:00:00:00:00:01 team0.1 type macvlan
+ ip link set team0.1 up
+
+ ip link del team0.1
+ ip link del team0
+ ip link del dummyteam
+}
+
+trap cleanup EXIT
+modprobe netdevsim || :
+echo $NSIM_LRO_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+team_lro
+team_promisc
+team_change_flags
diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile
index 7ec7cd3ab2cc..868ece3fea1f 100644
--- a/tools/testing/selftests/drivers/net/virtio_net/Makefile
+++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile
@@ -1,15 +1,12 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = basic_features.sh \
- #
+TEST_PROGS = basic_features.sh
-TEST_FILES = \
- virtio_net_common.sh \
- #
+TEST_FILES = virtio_net_common.sh
TEST_INCLUDES = \
- ../../../net/forwarding/lib.sh \
- ../../../net/lib.sh \
- #
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh \
+# end of TEST_INCLUDES
include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
new file mode 100755
index 000000000000..e54df158dfe9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -0,0 +1,779 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This file contains tests to verify native XDP support in network drivers.
+The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib`
+directory, with each test focusing on a specific aspect of XDP functionality.
+"""
+import random
+import string
+from dataclasses import dataclass
+from enum import Enum
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr
+from lib.py import KsftNamedVariant, ksft_variants
+from lib.py import KsftFailEx, NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily, NlError
+from lib.py import bkg, cmd, rand_port, wait_port_listen
+from lib.py import ip, bpftool, defer
+
+
+class TestConfig(Enum):
+ """Enum for XDP configuration options."""
+ MODE = 0 # Configures the BPF program for a specific test
+ PORT = 1 # Port configuration to communicate with the remote host
+ ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking
+ ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension
+
+
+class XDPAction(Enum):
+ """Enum for XDP actions."""
+ PASS = 0 # Pass the packet up to the stack
+ DROP = 1 # Drop the packet
+ TX = 2 # Route the packet to the remote host
+ TAIL_ADJST = 3 # Adjust the tail of the packet
+ HEAD_ADJST = 4 # Adjust the head of the packet
+
+
+class XDPStats(Enum):
+ """Enum for XDP statistics."""
+ RX = 0 # Count of valid packets received for testing
+ PASS = 1 # Count of packets passed up to the stack
+ DROP = 2 # Count of packets dropped
+ TX = 3 # Count of incoming packets routed to the remote host
+ ABORT = 4 # Count of packets that were aborted
+
+
+@dataclass
+class BPFProgInfo:
+ """Data class to store information about a BPF program."""
+ name: str # Name of the BPF program
+ file: str # BPF program object file
+ xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags")
+ mtu: int = 1500 # Maximum Transmission Unit, default is 1500
+
+
+def _exchg_udp(cfg, port, test_string):
+ """
+ Exchanges UDP packets between a local and remote host using the socat tool.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ test_string: String that the remote host will send.
+
+ Returns:
+ The string received by the test host.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
+ tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+
+ with bkg(rx_udp_cmd, exit_wait=True) as nc:
+ wait_port_listen(port, proto="udp")
+ cmd(tx_udp_cmd, host=cfg.remote, shell=True)
+
+ return nc.stdout.strip()
+
+
+def _test_udp(cfg, port, size=256):
+ """
+ Tests UDP packet exchange between a local and remote host.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ size: The length of the test string to be exchanged, default is 256 characters.
+
+ Returns:
+ bool: True if the received string matches the sent string, False otherwise.
+ """
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ return recvd_str == test_str
+
+
+def _load_xdp_prog(cfg, bpf_info):
+ """
+ Loads an XDP program onto a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+
+ Returns:
+ dict: A dictionary containing the XDP program ID, name, and associated map IDs.
+ """
+ abs_path = cfg.net_lib_dir / bpf_info.file
+ prog_info = {}
+
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+
+ cmd(
+ f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}",
+ shell=True
+ )
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off")
+
+ xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0]
+ prog_info["id"] = xdp_info["xdp"]["prog"]["id"]
+ prog_info["name"] = xdp_info["xdp"]["prog"]["name"]
+ prog_id = prog_info["id"]
+
+ map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"]
+ prog_info["maps"] = {}
+ for map_id in map_ids:
+ name = bpftool(f"map show id {map_id}", json=True)["name"]
+ prog_info["maps"][name] = map_id
+
+ return prog_info
+
+
+def format_hex_bytes(value):
+ """
+ Helper function that converts an integer into a formatted hexadecimal byte string.
+
+ Args:
+ value: An integer representing the number to be converted.
+
+ Returns:
+ A string representing hexadecimal equivalent of value, with bytes separated by spaces.
+ """
+ hex_str = value.to_bytes(4, byteorder='little', signed=True)
+ return ' '.join(f'{byte:02x}' for byte in hex_str)
+
+
+def _set_xdp_map(map_name, key, value):
+ """
+ Updates an XDP map with a given key-value pair using bpftool.
+
+ Args:
+ map_name: The name of the XDP map to update.
+ key: The key to update in the map, formatted as a hexadecimal string.
+ value: The value to associate with the key, formatted as a hexadecimal string.
+ """
+ key_formatted = format_hex_bytes(key)
+ value_formatted = format_hex_bytes(value)
+ bpftool(
+ f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}"
+ )
+
+
+def _get_stats(xdp_map_id):
+ """
+ Retrieves and formats statistics from an XDP map.
+
+ Args:
+ xdp_map_id: The ID of the XDP map from which to retrieve statistics.
+
+ Returns:
+ A dictionary containing formatted packet statistics for various XDP actions.
+ The keys are based on the XDPStats Enum values.
+
+ Raises:
+ KsftFailEx: If the stats retrieval fails.
+ """
+ stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True)
+ if not stats_dump:
+ raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}")
+
+ stats_formatted = {}
+ for key in range(0, 5):
+ val = stats_dump[key]["formatted"]["value"]
+ if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value:
+ stats_formatted[XDPStats.RX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value:
+ stats_formatted[XDPStats.PASS.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value:
+ stats_formatted[XDPStats.DROP.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value:
+ stats_formatted[XDPStats.TX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value:
+ stats_formatted[XDPStats.ABORT.value] = val
+
+ return stats_formatted
+
+
+def _test_pass(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_PASS action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch")
+
+
+def test_xdp_native_pass_sb(cfg):
+ """
+ Tests the XDP_PASS action for single buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_pass(cfg, bpf_info, 256)
+
+
+def test_xdp_native_pass_mb(cfg):
+ """
+ Tests the XDP_PASS action for a multi-buff size.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_pass(cfg, bpf_info, 8000)
+
+
+def _test_drop(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_DROP action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch")
+
+
+def test_xdp_native_drop_sb(cfg):
+ """
+ Tests the XDP_DROP action for a signle-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_drop(cfg, bpf_info, 256)
+
+
+def test_xdp_native_drop_mb(cfg):
+ """
+ Tests the XDP_DROP action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_drop(cfg, bpf_info, 8000)
+
+
+def _test_xdp_native_tx(cfg, bpf_info, payload_lens):
+ """
+ Tests the XDP_TX action.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing the BPF program metadata.
+ payload_lens: Array of packet lengths to send.
+ """
+ cfg.require_cmd("socat", remote=True)
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ expected_pkts = 0
+ for payload_len in payload_lens:
+ test_string = "".join(
+ random.choice(string.ascii_lowercase) for _ in range(payload_len)
+ )
+
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \
+ f"-u UDP-RECV:{port},reuseport STDOUT"
+
+ # Writing zero bytes to stdin gets ignored by socat,
+ # but with the shut-null flag socat generates a zero sized packet
+ # when the socket is closed.
+ tx_cmd_suffix = ",shut-null" if payload_len == 0 else ""
+ tx_udp = f"echo -n {test_string} | socat -t 2 " + \
+ f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}"
+
+ with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc:
+ wait_port_listen(port, proto="udp", host=cfg.remote)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed")
+
+ expected_pkts += 1
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+ ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch")
+ ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch")
+
+
+def test_xdp_native_tx_sb(cfg):
+ """
+ Tests the XDP_TX action for a single-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ # Ensure there's enough room for an ETH / IP / UDP header
+ pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62
+
+ _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len])
+
+
+def test_xdp_native_tx_mb(cfg):
+ """
+ Tests the XDP_TX action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o",
+ "xdp.frags", 9000)
+ # The first packet ensures we exercise the fragmented code path.
+ # And the subsequent 0-sized packet ensures the driver
+ # reinitializes xdp_buff correctly.
+ _test_xdp_native_tx(cfg, bpf_info, [8000, 0])
+
+
+def _validate_res(res, offset_lst, pkt_sz_lst):
+ """
+ Validates the result of a test.
+
+ Args:
+ res: The result of the test, which should be a dictionary with a "status" key.
+
+ Raises:
+ KsftFailEx: If the test fails to pass any combination of offset and packet size.
+ """
+ if "status" not in res:
+ raise KsftFailEx("Missing 'status' key in result dictionary")
+
+ # Validate that not a single case was successful
+ if res["status"] == "fail":
+ if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]:
+ raise KsftFailEx(f"{res['reason']}")
+
+ # Get the previous offset and packet size to report the successful run
+ tmp_idx = offset_lst.index(res["offset"])
+ prev_offset = offset_lst[tmp_idx - 1]
+ if tmp_idx == 0:
+ tmp_idx = pkt_sz_lst.index(res["pkt_sz"])
+ prev_pkt_sz = pkt_sz_lst[tmp_idx - 1]
+ else:
+ prev_pkt_sz = res["pkt_sz"]
+
+ # Use these values for error reporting
+ ksft_pr(
+ f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. "
+ f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. "
+ f"Reason: {res['reason']}"
+ )
+
+
+def _check_for_failures(recvd_str, stats):
+ """
+ Checks for common failures while adjusting headroom or tailroom.
+
+ Args:
+ recvd_str: The string received from the remote host after sending a test string.
+ stats: A dictionary containing formatted packet statistics for various XDP actions.
+
+ Returns:
+ str: A string describing the failure reason if a failure is detected, otherwise None.
+ """
+
+ # Any adjustment failure result in an abort hence, we track this counter
+ if stats[XDPStats.ABORT.value] != 0:
+ return "Adjustment failed"
+
+ # Since we are using aggregate stats for a single test across all offsets and packet sizes
+ # we can't use RX stats only to track data exchange failure without taking a previous
+ # snapshot. An easier way is to simply check for non-zero length of received string.
+ if len(recvd_str) == 0:
+ return "Data exchange failed"
+
+ # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run
+ if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]:
+ return "RX stats mismatch"
+
+ return None
+
+
+def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP tail adjustment functionality.
+
+ This function loads the appropriate XDP program based on the provided
+ program name and configures the XDP map for tail adjustment. It then
+ validates the tail adjustment by sending and receiving UDP packets
+ with specified packet sizes and offsets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ prog: Name of the XDP program to load.
+ pkt_sz_lst: List of packet sizes to test.
+ offset_lst: List of offsets to validate support for tail adjustment.
+
+ Returns:
+ dict: A dictionary with test status and failure details if applicable.
+ """
+ port = rand_port()
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+
+ # Configure the XDP map for tail adjustment
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ for offset in offset_lst:
+ tag = format(random.randint(65, 90), "02x")
+
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+ if offset > 0:
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+
+ for pkt_sz in pkt_sz_lst:
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ failure = _check_for_failures(recvd_str, stats)
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset > 0:
+ expected_data = test_str + (offset * chr(int(tag, 16)))
+ else:
+ expected_data = test_str[0:pkt_sz + offset]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_tail_grow_data(cfg):
+ """
+ Tests the XDP tail adjustment by growing packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [1, 16, 32, 64, 128, 256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_tail_shrnk_data(cfg):
+ """
+ Tests the XDP tail adjustment by shrinking packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def get_hds_thresh(cfg):
+ """
+ Retrieves the header data split (HDS) threshold for a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ Returns:
+ The HDS threshold value. If the threshold is not supported or an error occurs,
+ a default value of 1500 is returned.
+ """
+ ethnl = cfg.ethnl
+ hds_thresh = 1500
+
+ try:
+ rings = ethnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if 'hds-thresh' not in rings:
+ ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}')
+ return hds_thresh
+ hds_thresh = rings['hds-thresh']
+ except NlError as e:
+ ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}")
+
+ return hds_thresh
+
+
+def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP head adjustment action for a multi-buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ ethnl: Network namespace or link object (not used in this function).
+
+ This function sets up the packet size and offset lists, then performs
+ the head adjustment test by sending and receiving UDP packets.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000))
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ hds_thresh = get_hds_thresh(cfg)
+ for offset in offset_lst:
+ for pkt_sz in pkt_sz_lst:
+ # The "head" buffer must contain at least the Ethernet header
+ # after we eat into it. We send large-enough packets, but if HDS
+ # is enabled head will only contain headers. Don't try to eat
+ # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14)
+ l2_cut_off = 28 if cfg.addr_ipver == 4 else 48
+ if pkt_sz > hds_thresh and offset > l2_cut_off:
+ ksft_pr(
+ f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and "
+ f"offset {offset} > {l2_cut_off}"
+ )
+ return {"status": "pass"}
+
+ test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ tag = format(random.randint(65, 90), '02x')
+
+ _set_xdp_map("map_xdp_setup",
+ TestConfig.ADJST_OFFSET.value,
+ offset)
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ # Check for failures around adjustment and data exchange
+ failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats']))
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset < 0:
+ expected_data = chr(int(tag, 16)) * (0 - offset) + test_str
+ else:
+ expected_data = test_str[offset:]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_head_grow_data(cfg):
+ """
+ Tests the XDP headroom growth support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully extended for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Negative values result in headroom shrinking, resulting in growing of payload
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_head_shrnk_data(cfg):
+ """
+ Tests the XDP headroom shrinking support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully shrunk for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Positive values result in headroom growing, resulting in shrinking of payload
+ offset_lst = [16, 32, 64, 128, 256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+@ksft_variants([
+ KsftNamedVariant("pass", XDPAction.PASS),
+ KsftNamedVariant("drop", XDPAction.DROP),
+ KsftNamedVariant("tx", XDPAction.TX),
+])
+def test_xdp_native_qstats(cfg, act):
+ """
+ Send 1000 messages. Expect XDP action specified in @act.
+ Make sure the packets were counted to interface level qstats
+ (Rx, and Tx if act is TX).
+ """
+
+ cfg.require_cmd("socat")
+
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, act.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ # Discard the input, but we need a listener to avoid ICMP errors
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport " + \
+ "/dev/null"
+ # Listener runs on "remote" in case of XDP_TX
+ rx_host = cfg.remote if act == XDPAction.TX else None
+ # We want to spew 1000 packets quickly, bash seems to do a good enough job
+ # Each reopening of the socket gives us a differenot local port (for RSS)
+ tx_udp = "for _ in `seq 20`; do " \
+ f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \
+ "for i in `seq 50`; do echo a >&5; done; " \
+ "exec 5>&-; done"
+
+ cfg.wait_hw_stats_settle()
+ # Qstats have more clearly defined semantics than rtnetlink.
+ # XDP is the "first layer of the stack" so XDP packets should be counted
+ # as received and sent as if the decision was made in the routing layer.
+ before = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ with bkg(rx_udp, host=rx_host, exit_wait=True):
+ wait_port_listen(port, proto="udp", host=rx_host)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ cfg.wait_hw_stats_settle()
+ after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ expected_pkts = 1000
+ ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts)
+ if act == XDPAction.TX:
+ ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts)
+
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+ ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch")
+ if act == XDPAction.TX:
+ ksft_eq(stats[XDPStats.TX.value], expected_pkts, "XDP TX stats mismatch")
+
+ # Flip the ring count back and forth to make sure the stats from XDP rings
+ # don't get lost.
+ chans = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if chans.get('combined-count', 0) > 1:
+ cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+ 'combined-count': 1})
+ cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+ 'combined-count': chans['combined-count']})
+ before = after
+ after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ ksft_ge(after['rx-packets'], before['rx-packets'])
+ if act == XDPAction.TX:
+ ksft_ge(after['tx-packets'], before['tx-packets'])
+
+
+def main():
+ """
+ Main function to execute the XDP tests.
+
+ This function runs a series of tests to validate the XDP support for
+ both the single and multi-buffer. It uses the NetDrvEpEnv context
+ manager to manage the network driver environment and the ksft_run
+ function to execute the tests.
+ """
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+ ksft_run(
+ [
+ test_xdp_native_pass_sb,
+ test_xdp_native_pass_mb,
+ test_xdp_native_drop_sb,
+ test_xdp_native_drop_mb,
+ test_xdp_native_tx_sb,
+ test_xdp_native_tx_mb,
+ test_xdp_native_adjst_tail_grow_data,
+ test_xdp_native_adjst_tail_shrnk_data,
+ test_xdp_native_adjst_head_grow_data,
+ test_xdp_native_adjst_head_shrnk_data,
+ test_xdp_native_qstats,
+ ],
+ args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c
index 3aad311574c4..e6a37214aa46 100644
--- a/tools/testing/selftests/drivers/ntsync/ntsync.c
+++ b/tools/testing/selftests/drivers/ntsync/ntsync.c
@@ -12,7 +12,7 @@
#include <time.h>
#include <pthread.h>
#include <linux/ntsync.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
static int read_sem_state(int sem, __u32 *count, __u32 *max)
{
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
index 7ee7492138c6..14df9aa07308 100644
--- a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
@@ -14,7 +14,7 @@
#include <asm/uvdevice.h>
-#include "../../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define UV_PATH "/dev/uv"
#define BUFFER_SIZE 0x200
diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c
index 55bce47e56b7..f2397e75aa7c 100644
--- a/tools/testing/selftests/exec/check-exec.c
+++ b/tools/testing/selftests/exec/check-exec.c
@@ -30,7 +30,7 @@
#define _ASM_GENERIC_FCNTL_H
#include <linux/fcntl.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
char *const envp[], int flags)
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 8fb7395fd35b..d37c068ed5fe 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -21,7 +21,7 @@
#include <string.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define TESTS_EXPECTED 54
#define TEST_NAME_LEN (PATH_MAX * 4)
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
index 8257fddba8c8..55fd3732f029 100644
--- a/tools/testing/selftests/exec/load_address.c
+++ b/tools/testing/selftests/exec/load_address.c
@@ -6,7 +6,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct Statistics {
unsigned long long load_address;
diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c
index cd3a34aca93e..14ac36487df5 100644
--- a/tools/testing/selftests/exec/non-regular.c
+++ b/tools/testing/selftests/exec/non-regular.c
@@ -9,7 +9,7 @@
#include <sys/sysmacros.h>
#include <sys/types.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
/* Remove a file, ignoring the result if it didn't exist. */
void rm(struct __test_metadata *_metadata, const char *pathname,
diff --git a/tools/testing/selftests/exec/null-argv.c b/tools/testing/selftests/exec/null-argv.c
index c19726e710d1..4940aee5bb38 100644
--- a/tools/testing/selftests/exec/null-argv.c
+++ b/tools/testing/selftests/exec/null-argv.c
@@ -5,7 +5,7 @@
#include <sys/types.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define FORK(exec) \
do { \
diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c
index 438c8ff2fd26..7b5c4f6d1928 100644
--- a/tools/testing/selftests/exec/recursion-depth.c
+++ b/tools/testing/selftests/exec/recursion-depth.c
@@ -23,7 +23,7 @@
#include <fcntl.h>
#include <sys/mount.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(void)
{
diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
index e0319417124d..e397339495f6 100644
--- a/tools/testing/selftests/fchmodat2/fchmodat2_test.c
+++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
@@ -7,7 +7,7 @@
#include <syscall.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags)
{
diff --git a/tools/testing/selftests/filelock/ofdlocks.c b/tools/testing/selftests/filelock/ofdlocks.c
index a55b79810ab2..ff8d47fc373a 100644
--- a/tools/testing/selftests/filelock/ofdlocks.c
+++ b/tools/testing/selftests/filelock/ofdlocks.c
@@ -6,7 +6,7 @@
#include <stdio.h>
#include <unistd.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static int lock_set(int fd, struct flock *fl)
{
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
index 828b66a10c63..64ac0dfa46b7 100644
--- a/tools/testing/selftests/filesystems/.gitignore
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
dnotify_test
devpts_pts
+fclog
file_stressor
+anon_inode_test
+kernfs_test
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 66305fc34c60..85427d7f19b9 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := devpts_pts file_stressor
+TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test fclog
TEST_GEN_PROGS_EXTENDED := dnotify_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c
new file mode 100644
index 000000000000..94c6c81c2301
--- /dev/null
+++ b/tools/testing/selftests/filesystems/anon_inode_test.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "kselftest_harness.h"
+#include "wrappers.h"
+
+TEST(anon_inode_no_chown)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(fchown(fd_context, 1234, 5678), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_chmod)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(fchmod(fd_context, 0777), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_exec)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(execveat(fd_context, "", NULL, NULL, AT_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, EACCES);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_open)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_GE(dup2(fd_context, 500), 0);
+ ASSERT_EQ(close(fd_context), 0);
+ fd_context = 500;
+
+ ASSERT_LT(open("/proc/self/fd/500", 0), 0);
+ ASSERT_EQ(errno, ENXIO);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 81db85a5cc16..a1a79a6fef17 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -21,7 +21,7 @@
#include <linux/android/binder.h>
#include <linux/android/binderfs.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define DEFAULT_THREADS 4
@@ -65,6 +65,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
"oneway_spam_detection",
"extended_error",
"freeze_notification",
+ "transaction_report",
};
change_mountns(_metadata);
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
index b1fc9b916ace..54fea349204e 100644
--- a/tools/testing/selftests/filesystems/devpts_pts.c
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -11,7 +11,7 @@
#include <asm/ioctls.h>
#include <sys/mount.h>
#include <sys/wait.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static bool terminal_dup2(int duplicate, int original)
{
diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
index 65ede506305c..8bc57a2ef966 100644
--- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -11,7 +11,7 @@
#include <sys/epoll.h>
#include <sys/socket.h>
#include <sys/eventfd.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
struct epoll_mtcontext
{
diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
index 85acb4e3ef00..1b48f267157d 100644
--- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
+++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
@@ -11,7 +11,7 @@
#include <pthread.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define EVENTFD_TEST_ITERATIONS 100000UL
@@ -50,7 +50,7 @@ TEST(eventfd_check_flag_rdwr)
ASSERT_GE(fd, 0);
flags = fcntl(fd, F_GETFL);
- // since the kernel automatically added O_RDWR.
+ // The kernel automatically adds the O_RDWR flag.
EXPECT_EQ(flags, O_RDWR);
close(fd);
@@ -85,7 +85,7 @@ TEST(eventfd_check_flag_nonblock)
close(fd);
}
-TEST(eventfd_chek_flag_cloexec_and_nonblock)
+TEST(eventfd_check_flag_cloexec_and_nonblock)
{
int fd, flags;
@@ -178,8 +178,7 @@ TEST(eventfd_check_flag_semaphore)
// The semaphore could only be obtained from fdinfo.
ret = verify_fdinfo(fd, &err, "eventfd-semaphore: ", 19, "1\n");
if (ret != 0)
- ksft_print_msg("eventfd-semaphore check failed, msg: %s\n",
- err.msg);
+ ksft_print_msg("eventfd semaphore flag check failed: %s\n", err.msg);
EXPECT_EQ(ret, 0);
close(fd);
diff --git a/tools/testing/selftests/filesystems/fclog.c b/tools/testing/selftests/filesystems/fclog.c
new file mode 100644
index 000000000000..551c4a0f395a
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fclog.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2025 SUSE LLC.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mount.h>
+
+#include "kselftest_harness.h"
+
+#define ASSERT_ERRNO(expected, _t, seen) \
+ __EXPECT(expected, #expected, \
+ ({__typeof__(seen) _tmp_seen = (seen); \
+ _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1)
+
+#define ASSERT_ERRNO_EQ(expected, seen) \
+ ASSERT_ERRNO(expected, ==, seen)
+
+#define ASSERT_SUCCESS(seen) \
+ ASSERT_ERRNO(0, <=, seen)
+
+FIXTURE(ns)
+{
+ int host_mntns;
+};
+
+FIXTURE_SETUP(ns)
+{
+ /* Stash the old mntns. */
+ self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC);
+ ASSERT_SUCCESS(self->host_mntns);
+
+ /* Create a new mount namespace and make it private. */
+ ASSERT_SUCCESS(unshare(CLONE_NEWNS));
+ ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL));
+}
+
+FIXTURE_TEARDOWN(ns)
+{
+ ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS));
+ ASSERT_SUCCESS(close(self->host_mntns));
+}
+
+TEST_F(ns, fscontext_log_enodata)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ /* A brand new fscontext has no log entries. */
+ char buf[128] = {};
+ for (int i = 0; i < 16; i++)
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_F(ns, fscontext_log_errorfc)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0));
+
+ char buf[128] = {};
+ ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf)));
+ EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf);
+
+ /* The message has been consumed. */
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_F(ns, fscontext_log_errorfc_after_fsmount)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0));
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+ int mfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NOSUID);
+ ASSERT_SUCCESS(mfd);
+ ASSERT_SUCCESS(move_mount(mfd, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH));
+
+ /*
+ * The fscontext log should still contain data even after
+ * FSCONFIG_CMD_CREATE and fsmount().
+ */
+ char buf[128] = {};
+ ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf)));
+ EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf);
+
+ /* The message has been consumed. */
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_F(ns, fscontext_log_emsgsize)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0));
+
+ char buf[128] = {};
+ /*
+ * Attempting to read a message with too small a buffer should not
+ * result in the message getting consumed.
+ */
+ ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 0));
+ ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 1));
+ for (int i = 0; i < 16; i++)
+ ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 16));
+
+ ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf)));
+ EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf);
+
+ /* The message has been consumed. */
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c
index 1136f93a9977..141badd671a9 100644
--- a/tools/testing/selftests/filesystems/file_stressor.c
+++ b/tools/testing/selftests/filesystems/file_stressor.c
@@ -12,7 +12,7 @@
#include <sys/mount.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <linux/types.h>
#include <linux/mount.h>
@@ -156,7 +156,7 @@ TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2)
ssize_t nr_read;
/*
- * Concurrently read /proc/<pid>/fd/ which rougly does:
+ * Concurrently read /proc/<pid>/fd/ which roughly does:
*
* f = fget_task_next(p, &fd);
* if (!f)
diff --git a/tools/testing/selftests/filesystems/fuse/.gitignore b/tools/testing/selftests/filesystems/fuse/.gitignore
new file mode 100644
index 000000000000..3e72e742d08e
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+fuse_mnt
+fusectl_test
diff --git a/tools/testing/selftests/filesystems/fuse/Makefile b/tools/testing/selftests/filesystems/fuse/Makefile
new file mode 100644
index 000000000000..612aad69a93a
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS := fusectl_test
+TEST_GEN_FILES := fuse_mnt
+
+include ../../lib.mk
+
+VAR_CFLAGS := $(shell pkg-config fuse --cflags 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS := -D_FILE_OFFSET_BITS=64 -I/usr/include/fuse
+endif
+
+VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS := -lfuse -pthread
+endif
+
+$(OUTPUT)/fuse_mnt: CFLAGS += $(VAR_CFLAGS)
+$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS)
diff --git a/tools/testing/selftests/filesystems/fuse/fuse_mnt.c b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c
new file mode 100644
index 000000000000..d12b17f30fad
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fusectl test file-system
+ * Creates a simple FUSE filesystem with a single read-write file (/test)
+ */
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+static char *content;
+static size_t content_size = 0;
+static const char test_path[] = "/test";
+
+static int test_getattr(const char *path, struct stat *st)
+{
+ memset(st, 0, sizeof(*st));
+
+ if (!strcmp(path, "/")) {
+ st->st_mode = S_IFDIR | 0755;
+ st->st_nlink = 2;
+ return 0;
+ }
+
+ if (!strcmp(path, test_path)) {
+ st->st_mode = S_IFREG | 0664;
+ st->st_nlink = 1;
+ st->st_size = content_size;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int test_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
+ off_t offset, struct fuse_file_info *fi)
+{
+ if (strcmp(path, "/"))
+ return -ENOENT;
+
+ filler(buf, ".", NULL, 0);
+ filler(buf, "..", NULL, 0);
+ filler(buf, test_path + 1, NULL, 0);
+
+ return 0;
+}
+
+static int test_open(const char *path, struct fuse_file_info *fi)
+{
+ if (strcmp(path, test_path))
+ return -ENOENT;
+
+ return 0;
+}
+
+static int test_read(const char *path, char *buf, size_t size, off_t offset,
+ struct fuse_file_info *fi)
+{
+ if (strcmp(path, test_path) != 0)
+ return -ENOENT;
+
+ if (!content || content_size == 0)
+ return 0;
+
+ if (offset >= content_size)
+ return 0;
+
+ if (offset + size > content_size)
+ size = content_size - offset;
+
+ memcpy(buf, content + offset, size);
+
+ return size;
+}
+
+static int test_write(const char *path, const char *buf, size_t size,
+ off_t offset, struct fuse_file_info *fi)
+{
+ size_t new_size;
+
+ if (strcmp(path, test_path) != 0)
+ return -ENOENT;
+
+ if(offset > content_size)
+ return -EINVAL;
+
+ new_size = MAX(offset + size, content_size);
+
+ if (new_size > content_size)
+ content = realloc(content, new_size);
+
+ content_size = new_size;
+
+ if (!content)
+ return -ENOMEM;
+
+ memcpy(content + offset, buf, size);
+
+ return size;
+}
+
+static int test_truncate(const char *path, off_t size)
+{
+ if (strcmp(path, test_path) != 0)
+ return -ENOENT;
+
+ if (size == 0) {
+ free(content);
+ content = NULL;
+ content_size = 0;
+ return 0;
+ }
+
+ content = realloc(content, size);
+
+ if (!content)
+ return -ENOMEM;
+
+ if (size > content_size)
+ memset(content + content_size, 0, size - content_size);
+
+ content_size = size;
+ return 0;
+}
+
+static struct fuse_operations memfd_ops = {
+ .getattr = test_getattr,
+ .readdir = test_readdir,
+ .open = test_open,
+ .read = test_read,
+ .write = test_write,
+ .truncate = test_truncate,
+};
+
+int main(int argc, char *argv[])
+{
+ return fuse_main(argc, argv, &memfd_ops, NULL);
+}
diff --git a/tools/testing/selftests/filesystems/fuse/fusectl_test.c b/tools/testing/selftests/filesystems/fuse/fusectl_test.c
new file mode 100644
index 000000000000..0d1d012c35ed
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/fusectl_test.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Chen Linxuan <chenlinxuan@uniontech.com>
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sched.h>
+#include <linux/limits.h>
+
+#include "kselftest_harness.h"
+
+#define FUSECTL_MOUNTPOINT "/sys/fs/fuse/connections"
+#define FUSE_MOUNTPOINT "/tmp/fuse_mnt_XXXXXX"
+#define FUSE_DEVICE "/dev/fuse"
+#define FUSECTL_TEST_VALUE "1"
+
+static void write_file(struct __test_metadata *const _metadata,
+ const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+
+ ASSERT_GE(fd, 0);
+ ASSERT_EQ(write(fd, val, len), len);
+ ASSERT_EQ(close(fd), 0);
+}
+
+FIXTURE(fusectl){
+ char fuse_mountpoint[sizeof(FUSE_MOUNTPOINT)];
+ int connection;
+};
+
+FIXTURE_SETUP(fusectl)
+{
+ const char *fuse_mnt_prog = "./fuse_mnt";
+ int status, pid;
+ struct stat statbuf;
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+ char buf[32];
+
+ /* Setup userns */
+ ASSERT_EQ(unshare(CLONE_NEWNS|CLONE_NEWUSER), 0);
+ sprintf(buf, "0 %d 1", uid);
+ write_file(_metadata, "/proc/self/uid_map", buf);
+ write_file(_metadata, "/proc/self/setgroups", "deny");
+ sprintf(buf, "0 %d 1", gid);
+ write_file(_metadata, "/proc/self/gid_map", buf);
+ ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0);
+
+ strcpy(self->fuse_mountpoint, FUSE_MOUNTPOINT);
+
+ if (!mkdtemp(self->fuse_mountpoint))
+ SKIP(return,
+ "Failed to create FUSE mountpoint %s",
+ strerror(errno));
+
+ if (access(FUSECTL_MOUNTPOINT, F_OK))
+ SKIP(return,
+ "FUSE control filesystem not mounted");
+
+ pid = fork();
+ if (pid < 0)
+ SKIP(return,
+ "Failed to fork FUSE daemon process: %s",
+ strerror(errno));
+
+ if (pid == 0) {
+ execlp(fuse_mnt_prog, fuse_mnt_prog, self->fuse_mountpoint, NULL);
+ exit(errno);
+ }
+
+ waitpid(pid, &status, 0);
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ SKIP(return,
+ "Failed to start FUSE daemon %s",
+ strerror(WEXITSTATUS(status)));
+ }
+
+ if (stat(self->fuse_mountpoint, &statbuf))
+ SKIP(return,
+ "Failed to stat FUSE mountpoint %s",
+ strerror(errno));
+
+ self->connection = statbuf.st_dev;
+}
+
+FIXTURE_TEARDOWN(fusectl)
+{
+ umount2(self->fuse_mountpoint, MNT_DETACH);
+ rmdir(self->fuse_mountpoint);
+}
+
+TEST_F(fusectl, abort)
+{
+ char path_buf[PATH_MAX];
+ int abort_fd, test_fd, ret;
+
+ sprintf(path_buf, "/sys/fs/fuse/connections/%d/abort", self->connection);
+
+ ASSERT_EQ(0, access(path_buf, F_OK));
+
+ abort_fd = open(path_buf, O_WRONLY);
+ ASSERT_GE(abort_fd, 0);
+
+ sprintf(path_buf, "%s/test", self->fuse_mountpoint);
+
+ test_fd = open(path_buf, O_RDWR);
+ ASSERT_GE(test_fd, 0);
+
+ ret = read(test_fd, path_buf, sizeof(path_buf));
+ ASSERT_EQ(ret, 0);
+
+ ret = write(test_fd, "test", sizeof("test"));
+ ASSERT_EQ(ret, sizeof("test"));
+
+ ret = lseek(test_fd, 0, SEEK_SET);
+ ASSERT_GE(ret, 0);
+
+ ret = write(abort_fd, FUSECTL_TEST_VALUE, sizeof(FUSECTL_TEST_VALUE));
+ ASSERT_GT(ret, 0);
+
+ close(abort_fd);
+
+ ret = read(test_fd, path_buf, sizeof(path_buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENOTCONN);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/kernfs_test.c b/tools/testing/selftests/filesystems/kernfs_test.c
new file mode 100644
index 000000000000..84c2b910a60d
--- /dev/null
+++ b/tools/testing/selftests/filesystems/kernfs_test.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+
+#include "kselftest_harness.h"
+#include "wrappers.h"
+
+TEST(kernfs_listxattr)
+{
+ int fd;
+
+ /* Read-only file that can never have any extended attributes set. */
+ fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_EQ(flistxattr(fd, NULL, 0), 0);
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST(kernfs_getxattr)
+{
+ int fd;
+ char buf[1];
+
+ /* Read-only file that can never have any extended attributes set. */
+ fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_LT(fgetxattr(fd, "user.foo", buf, sizeof(buf)), 0);
+ ASSERT_EQ(errno, ENODATA);
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore
new file mode 100644
index 000000000000..124339ea7845
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/*_test
+/*_test_ns
diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile
new file mode 100644
index 000000000000..836a4eb7be06
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
+
+TEST_GEN_PROGS := mount-notify_test mount-notify_test_ns
+
+include ../../lib.mk
+
+$(OUTPUT)/mount-notify_test: ../utils.c
+$(OUTPUT)/mount-notify_test_ns: ../utils.c
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
new file mode 100644
index 000000000000..6381af6a40e3
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -0,0 +1,528 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
+
+#define _GNU_SOURCE
+
+// Needed for linux/fanotify.h
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#define __kernel_fsid_t __kernel_fsid_t
+
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/fanotify.h>
+
+#include "kselftest_harness.h"
+#include "../statmount/statmount.h"
+#include "../utils.h"
+
+static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+
+static const int mark_cmds[] = {
+ FAN_MARK_ADD,
+ FAN_MARK_REMOVE,
+ FAN_MARK_FLUSH
+};
+
+#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
+
+FIXTURE(fanotify) {
+ int fan_fd[NUM_FAN_FDS];
+ char buf[256];
+ unsigned int rem;
+ void *next;
+ char root_mntpoint[sizeof(root_mntpoint_templ)];
+ int orig_root;
+ int ns_fd;
+ uint64_t root_id;
+};
+
+FIXTURE_SETUP(fanotify)
+{
+ int i, ret;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->ns_fd, 0);
+
+ ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0);
+
+ strcpy(self->root_mntpoint, root_mntpoint_templ);
+ ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
+
+ self->orig_root = open("/", O_PATH | O_CLOEXEC);
+ ASSERT_GE(self->orig_root, 0);
+
+ ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
+
+ ASSERT_EQ(chroot(self->root_mntpoint), 0);
+
+ ASSERT_EQ(chdir("/"), 0);
+
+ ASSERT_EQ(mkdir("a", 0700), 0);
+
+ ASSERT_EQ(mkdir("b", 0700), 0);
+
+ self->root_id = get_unique_mnt_id("/");
+ ASSERT_NE(self->root_id, 0);
+
+ for (i = 0; i < NUM_FAN_FDS; i++) {
+ self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
+ 0);
+ ASSERT_GE(self->fan_fd[i], 0);
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ // On fd[0] we do an extra ADD that changes nothing.
+ // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
+ ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+
+ self->rem = 0;
+}
+
+FIXTURE_TEARDOWN(fanotify)
+{
+ int i;
+
+ ASSERT_EQ(self->rem, 0);
+ for (i = 0; i < NUM_FAN_FDS; i++)
+ close(self->fan_fd[i]);
+
+ ASSERT_EQ(fchdir(self->orig_root), 0);
+
+ ASSERT_EQ(chroot("."), 0);
+
+ EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
+ EXPECT_EQ(chdir(self->root_mntpoint), 0);
+ EXPECT_EQ(chdir("/"), 0);
+ EXPECT_EQ(rmdir(self->root_mntpoint), 0);
+}
+
+static uint64_t expect_notify(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t *mask)
+{
+ struct fanotify_event_metadata *meta;
+ struct fanotify_event_info_mnt *mnt;
+ unsigned int thislen;
+
+ if (!self->rem) {
+ ssize_t len;
+ int i;
+
+ for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
+ len = read(self->fan_fd[i], self->buf,
+ sizeof(self->buf));
+ if (i > 0) {
+ // Groups 1,2 should get EAGAIN
+ ASSERT_EQ(len, -1);
+ ASSERT_EQ(errno, EAGAIN);
+ } else {
+ // Group 0 should get events
+ ASSERT_GT(len, 0);
+ }
+ }
+
+ self->rem = len;
+ self->next = (void *) self->buf;
+ }
+
+ meta = self->next;
+ ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
+
+ thislen = meta->event_len;
+ self->rem -= thislen;
+ self->next += thislen;
+
+ *mask = meta->mask;
+ thislen -= sizeof(*meta);
+
+ mnt = ((void *) meta) + meta->event_len - thislen;
+
+ ASSERT_EQ(thislen, sizeof(*mnt));
+
+ return mnt->mnt_id;
+}
+
+static void expect_notify_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ unsigned int n, uint64_t mask[], uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify(_metadata, self, &mask[i]);
+}
+
+static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t expect_mask)
+{
+ uint64_t mntid, mask;
+
+ mntid = expect_notify(_metadata, self, &mask);
+ ASSERT_EQ(expect_mask, mask);
+
+ return mntid;
+}
+
+
+static void expect_notify_mask_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t mask, unsigned int n, uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify_mask(_metadata, self, mask);
+}
+
+static void verify_mount_ids(struct __test_metadata *const _metadata,
+ const uint64_t list1[], const uint64_t list2[],
+ size_t num)
+{
+ unsigned int i, j;
+
+ // Check that neither list has any duplicates
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (i != j) {
+ ASSERT_NE(list1[i], list1[j]);
+ ASSERT_NE(list2[i], list2[j]);
+ }
+ }
+ }
+ // Check that all list1 memebers can be found in list2. Together with
+ // the above it means that the list1 and list2 represent the same sets.
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (list1[i] == list2[j])
+ break;
+ }
+ ASSERT_NE(j, num);
+ }
+}
+
+static void check_mounted(struct __test_metadata *const _metadata,
+ const uint64_t mnts[], size_t num)
+{
+ ssize_t ret;
+ uint64_t *list;
+
+ list = malloc((num + 1) * sizeof(list[0]));
+ ASSERT_NE(list, NULL);
+
+ ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
+ ASSERT_EQ(ret, num);
+
+ verify_mount_ids(_metadata, mnts, list, num);
+
+ free(list);
+}
+
+static void setup_mount_tree(struct __test_metadata *const _metadata,
+ int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+TEST_F(fanotify, bind)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, move)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+ uint64_t move_id;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
+ ASSERT_EQ(ret, 0);
+
+ move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ ASSERT_EQ(move_id, mnts[1]);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, propagate)
+{
+ const unsigned int log2_num = 4;
+ const unsigned int num = (1 << log2_num);
+ uint64_t mnts[num];
+
+ setup_mount_tree(_metadata, log2_num);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
+
+ mnts[0] = self->root_id;
+ check_mounted(_metadata, mnts, num);
+
+ // Cleanup
+ int ret;
+ uint64_t mnts2[num];
+ ret = umount2("/", MNT_DETACH);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/", NULL, MS_PRIVATE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts2[0] = self->root_id;
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
+ verify_mount_ids(_metadata, mnts, mnts2, num);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, fsmount)
+{
+ int ret, fs, mnt;
+ uint64_t mnts[2] = { self->root_id };
+
+ fs = fsopen("tmpfs", 0);
+ ASSERT_GE(fs, 0);
+
+ ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ mnt = fsmount(fs, 0, 0);
+ ASSERT_GE(mnt, 0);
+
+ close(fs);
+
+ ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
+ ASSERT_EQ(ret, 0);
+
+ close(mnt);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, reparent)
+{
+ uint64_t mnts[6] = { self->root_id };
+ uint64_t dmnts[3];
+ uint64_t masks[3];
+ unsigned int i;
+ int ret;
+
+ // Create setup with a[1] -> b[2] propagation
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/a", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/b", NULL, MS_SLAVE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ // Mount on a[3], which is propagated to b[4]
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
+
+ check_mounted(_metadata, mnts, 5);
+
+ // Mount on b[5], not propagated
+ ret = mount("/", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ check_mounted(_metadata, mnts, 6);
+
+ // Umount a[3], which is propagated to b[4], but not b[5]
+ // This will result in b[5] "falling" on b[2]
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_n(_metadata, self, 3, masks, dmnts);
+ verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
+
+ for (i = 0; i < 3; i++) {
+ if (dmnts[i] == mnts[5]) {
+ ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ } else {
+ ASSERT_EQ(masks[i], FAN_MNT_DETACH);
+ }
+ }
+
+ mnts[3] = mnts[5];
+ check_mounted(_metadata, mnts, 4);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
+ verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, rmdir)
+{
+ uint64_t mnts[3] = { self->root_id };
+ int ret;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ ret = chdir("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret == 0) {
+ chdir("/");
+ unshare(CLONE_NEWNS);
+ mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ umount2("/a", MNT_DETACH);
+ // This triggers a detach in the other namespace
+ rmdir("/a");
+ exit(0);
+ }
+ wait(NULL);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
+ check_mounted(_metadata, mnts, 1);
+
+ // Cleanup
+ ret = chdir("/");
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(fanotify, pivot_root)
+{
+ uint64_t mnts[3] = { self->root_id };
+ uint64_t mnts2[3];
+ int ret;
+
+ ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ ret = mkdir("/a/new", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mkdir("/a/old", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ check_mounted(_metadata, mnts, 3);
+
+ ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
+ verify_mount_ids(_metadata, mnts, mnts2, 2);
+ check_mounted(_metadata, mnts, 3);
+
+ // Cleanup
+ ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
new file mode 100644
index 000000000000..320ee25dc8a5
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
+
+#define _GNU_SOURCE
+
+// Needed for linux/fanotify.h
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#define __kernel_fsid_t __kernel_fsid_t
+
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/fanotify.h>
+
+#include "kselftest_harness.h"
+#include "../statmount/statmount.h"
+#include "../utils.h"
+
+static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+
+static const int mark_types[] = {
+ FAN_MARK_FILESYSTEM,
+ FAN_MARK_MOUNT,
+ FAN_MARK_INODE
+};
+
+static const int mark_cmds[] = {
+ FAN_MARK_ADD,
+ FAN_MARK_REMOVE,
+ FAN_MARK_FLUSH
+};
+
+#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
+
+FIXTURE(fanotify) {
+ int fan_fd[NUM_FAN_FDS];
+ char buf[256];
+ unsigned int rem;
+ void *next;
+ char root_mntpoint[sizeof(root_mntpoint_templ)];
+ int orig_root;
+ int orig_ns_fd;
+ int ns_fd;
+ uint64_t root_id;
+};
+
+FIXTURE_SETUP(fanotify)
+{
+ int i, ret;
+
+ self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->orig_ns_fd, 0);
+
+ ret = setup_userns();
+ ASSERT_EQ(ret, 0);
+
+ self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->ns_fd, 0);
+
+ strcpy(self->root_mntpoint, root_mntpoint_templ);
+ ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
+
+ self->orig_root = open("/", O_PATH | O_CLOEXEC);
+ ASSERT_GE(self->orig_root, 0);
+
+ ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
+
+ ASSERT_EQ(chroot(self->root_mntpoint), 0);
+
+ ASSERT_EQ(chdir("/"), 0);
+
+ ASSERT_EQ(mkdir("a", 0700), 0);
+
+ ASSERT_EQ(mkdir("b", 0700), 0);
+
+ self->root_id = get_unique_mnt_id("/");
+ ASSERT_NE(self->root_id, 0);
+
+ for (i = 0; i < NUM_FAN_FDS; i++) {
+ int fan_fd = fanotify_init(FAN_REPORT_FID, 0);
+ // Verify that watching tmpfs mounted inside userns is allowed
+ ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i],
+ FAN_OPEN, AT_FDCWD, "/");
+ ASSERT_EQ(ret, 0);
+ // ...but watching entire orig root filesystem is not allowed
+ ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
+ FAN_OPEN, self->orig_root, ".");
+ ASSERT_NE(ret, 0);
+ close(fan_fd);
+
+ self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
+ 0);
+ ASSERT_GE(self->fan_fd[i], 0);
+ // Verify that watching mntns where group was created is allowed
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ // ...but watching orig mntns is not allowed
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->orig_ns_fd, NULL);
+ ASSERT_NE(ret, 0);
+ // On fd[0] we do an extra ADD that changes nothing.
+ // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
+ ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+
+ self->rem = 0;
+}
+
+FIXTURE_TEARDOWN(fanotify)
+{
+ int i;
+
+ ASSERT_EQ(self->rem, 0);
+ for (i = 0; i < NUM_FAN_FDS; i++)
+ close(self->fan_fd[i]);
+
+ ASSERT_EQ(fchdir(self->orig_root), 0);
+
+ ASSERT_EQ(chroot("."), 0);
+
+ EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
+ EXPECT_EQ(chdir(self->root_mntpoint), 0);
+ EXPECT_EQ(chdir("/"), 0);
+ EXPECT_EQ(rmdir(self->root_mntpoint), 0);
+}
+
+static uint64_t expect_notify(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t *mask)
+{
+ struct fanotify_event_metadata *meta;
+ struct fanotify_event_info_mnt *mnt;
+ unsigned int thislen;
+
+ if (!self->rem) {
+ ssize_t len;
+ int i;
+
+ for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
+ len = read(self->fan_fd[i], self->buf,
+ sizeof(self->buf));
+ if (i > 0) {
+ // Groups 1,2 should get EAGAIN
+ ASSERT_EQ(len, -1);
+ ASSERT_EQ(errno, EAGAIN);
+ } else {
+ // Group 0 should get events
+ ASSERT_GT(len, 0);
+ }
+ }
+
+ self->rem = len;
+ self->next = (void *) self->buf;
+ }
+
+ meta = self->next;
+ ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
+
+ thislen = meta->event_len;
+ self->rem -= thislen;
+ self->next += thislen;
+
+ *mask = meta->mask;
+ thislen -= sizeof(*meta);
+
+ mnt = ((void *) meta) + meta->event_len - thislen;
+
+ ASSERT_EQ(thislen, sizeof(*mnt));
+
+ return mnt->mnt_id;
+}
+
+static void expect_notify_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ unsigned int n, uint64_t mask[], uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify(_metadata, self, &mask[i]);
+}
+
+static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t expect_mask)
+{
+ uint64_t mntid, mask;
+
+ mntid = expect_notify(_metadata, self, &mask);
+ ASSERT_EQ(expect_mask, mask);
+
+ return mntid;
+}
+
+
+static void expect_notify_mask_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t mask, unsigned int n, uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify_mask(_metadata, self, mask);
+}
+
+static void verify_mount_ids(struct __test_metadata *const _metadata,
+ const uint64_t list1[], const uint64_t list2[],
+ size_t num)
+{
+ unsigned int i, j;
+
+ // Check that neither list has any duplicates
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (i != j) {
+ ASSERT_NE(list1[i], list1[j]);
+ ASSERT_NE(list2[i], list2[j]);
+ }
+ }
+ }
+ // Check that all list1 memebers can be found in list2. Together with
+ // the above it means that the list1 and list2 represent the same sets.
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (list1[i] == list2[j])
+ break;
+ }
+ ASSERT_NE(j, num);
+ }
+}
+
+static void check_mounted(struct __test_metadata *const _metadata,
+ const uint64_t mnts[], size_t num)
+{
+ ssize_t ret;
+ uint64_t *list;
+
+ list = malloc((num + 1) * sizeof(list[0]));
+ ASSERT_NE(list, NULL);
+
+ ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
+ ASSERT_EQ(ret, num);
+
+ verify_mount_ids(_metadata, mnts, list, num);
+
+ free(list);
+}
+
+static void setup_mount_tree(struct __test_metadata *const _metadata,
+ int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+TEST_F(fanotify, bind)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, move)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+ uint64_t move_id;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
+ ASSERT_EQ(ret, 0);
+
+ move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ ASSERT_EQ(move_id, mnts[1]);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, propagate)
+{
+ const unsigned int log2_num = 4;
+ const unsigned int num = (1 << log2_num);
+ uint64_t mnts[num];
+
+ setup_mount_tree(_metadata, log2_num);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
+
+ mnts[0] = self->root_id;
+ check_mounted(_metadata, mnts, num);
+
+ // Cleanup
+ int ret;
+ uint64_t mnts2[num];
+ ret = umount2("/", MNT_DETACH);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/", NULL, MS_PRIVATE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts2[0] = self->root_id;
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
+ verify_mount_ids(_metadata, mnts, mnts2, num);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, fsmount)
+{
+ int ret, fs, mnt;
+ uint64_t mnts[2] = { self->root_id };
+
+ fs = fsopen("tmpfs", 0);
+ ASSERT_GE(fs, 0);
+
+ ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ mnt = fsmount(fs, 0, 0);
+ ASSERT_GE(mnt, 0);
+
+ close(fs);
+
+ ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
+ ASSERT_EQ(ret, 0);
+
+ close(mnt);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, reparent)
+{
+ uint64_t mnts[6] = { self->root_id };
+ uint64_t dmnts[3];
+ uint64_t masks[3];
+ unsigned int i;
+ int ret;
+
+ // Create setup with a[1] -> b[2] propagation
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/a", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/b", NULL, MS_SLAVE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ // Mount on a[3], which is propagated to b[4]
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
+
+ check_mounted(_metadata, mnts, 5);
+
+ // Mount on b[5], not propagated
+ ret = mount("/", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ check_mounted(_metadata, mnts, 6);
+
+ // Umount a[3], which is propagated to b[4], but not b[5]
+ // This will result in b[5] "falling" on b[2]
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_n(_metadata, self, 3, masks, dmnts);
+ verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
+
+ for (i = 0; i < 3; i++) {
+ if (dmnts[i] == mnts[5]) {
+ ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ } else {
+ ASSERT_EQ(masks[i], FAN_MNT_DETACH);
+ }
+ }
+
+ mnts[3] = mnts[5];
+ check_mounted(_metadata, mnts, 4);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
+ verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, rmdir)
+{
+ uint64_t mnts[3] = { self->root_id };
+ int ret;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ ret = chdir("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret == 0) {
+ chdir("/");
+ unshare(CLONE_NEWNS);
+ mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ umount2("/a", MNT_DETACH);
+ // This triggers a detach in the other namespace
+ rmdir("/a");
+ exit(0);
+ }
+ wait(NULL);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
+ check_mounted(_metadata, mnts, 1);
+
+ // Cleanup
+ ret = chdir("/");
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(fanotify, pivot_root)
+{
+ uint64_t mnts[3] = { self->root_id };
+ uint64_t mnts2[3];
+ int ret;
+
+ ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ ret = mkdir("/a/new", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mkdir("/a/old", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ check_mounted(_metadata, mnts, 3);
+
+ ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
+ verify_mount_ids(_metadata, mnts, mnts2, 2);
+ check_mounted(_metadata, mnts, 3);
+
+ // Cleanup
+ ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
index 457cf76f3c5f..61e55dfbf121 100644
--- a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
+++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
@@ -3,6 +3,8 @@
#define _GNU_SOURCE
#include <fcntl.h>
+#include <linux/auto_dev-ioctl.h>
+#include <linux/errno.h>
#include <sched.h>
#include <stdio.h>
#include <string.h>
@@ -10,7 +12,7 @@
#include <sys/mount.h>
#include <unistd.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define MNT_NS_COUNT 11
#define MNT_NS_LAST_INDEX 10
@@ -146,4 +148,16 @@ TEST_F(iterate_mount_namespaces, iterate_backward)
}
}
+TEST_F(iterate_mount_namespaces, nfs_valid_ioctl)
+{
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_OPENMOUNT, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_CLOSEMOUNT, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_READY, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile
index e8d1adb021af..d3ad4a77db9b 100644
--- a/tools/testing/selftests/filesystems/overlayfs/Makefile
+++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
@@ -1,7 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := dev_in_maps set_layers_via_fds
+CFLAGS += -Wall
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lcap
-CFLAGS := -Wall -Werror
+LOCAL_HDRS += ../wrappers.h log.h
+
+TEST_GEN_PROGS := dev_in_maps
+TEST_GEN_PROGS += set_layers_via_fds
include ../../lib.mk
+
+$(OUTPUT)/set_layers_via_fds: ../utils.c
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
index 3b796264223f..8924cea6aa4b 100644
--- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -15,9 +15,9 @@
#include <sched.h>
#include <fcntl.h>
-#include "../../kselftest.h"
+#include "kselftest.h"
#include "log.h"
-#include "wrappers.h"
+#include "../wrappers.h"
static long get_file_dev_and_inode(void *addr, struct statx *stx)
{
diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
index 1d0ae785a667..3c0b93183348 100644
--- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -6,26 +6,40 @@
#include <sched.h>
#include <stdio.h>
#include <string.h>
+#include <sys/socket.h>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <sys/mount.h>
#include <unistd.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
#include "log.h"
-#include "wrappers.h"
+#include "../utils.h"
+#include "../wrappers.h"
FIXTURE(set_layers_via_fds) {
+ int pidfd;
};
FIXTURE_SETUP(set_layers_via_fds)
{
- ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+ self->pidfd = -EBADF;
+ EXPECT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+ EXPECT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0);
}
FIXTURE_TEARDOWN(set_layers_via_fds)
{
+ if (self->pidfd >= 0) {
+ EXPECT_EQ(sys_pidfd_send_signal(self->pidfd, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(close(self->pidfd), 0);
+ }
umount2("/set_layers_via_fds", 0);
- ASSERT_EQ(rmdir("/set_layers_via_fds"), 0);
+ EXPECT_EQ(rmdir("/set_layers_via_fds"), 0);
+
+ umount2("/set_layers_via_fds_tmpfs", 0);
+ EXPECT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0);
}
TEST_F(set_layers_via_fds, set_layers_via_fds)
@@ -214,4 +228,493 @@ TEST_F(set_layers_via_fds, set_500_layers_via_fds)
ASSERT_EQ(close(fd_overlay), 0);
}
+TEST_F(set_layers_via_fds, set_override_creds)
+{
+ int fd_context, fd_tmpfs, fd_overlay;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int pidfd;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "nooverride_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_invalid)
+{
+ int fd_context, fd_tmpfs, fd_overlay, ret;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int fd_userns1, fd_userns2;
+ int ipc_sockets[2];
+ char c;
+ const unsigned int predictable_fd_context_nr = 123;
+
+ fd_userns1 = get_userns_fd(0, 0, 10000);
+ ASSERT_GE(fd_userns1, 0);
+
+ fd_userns2 = get_userns_fd(0, 1234, 10000);
+ ASSERT_GE(fd_userns2, 0);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_GE(ret, 0);
+
+ pid = create_child(&self->pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (close(ipc_sockets[0])) {
+ TH_LOG("close should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!switch_userns(fd_userns2, 0, 0, false)) {
+ TH_LOG("switch_userns should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (read_nointr(ipc_sockets[1], &c, 1) != 1) {
+ TH_LOG("read_nointr should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (close(ipc_sockets[1])) {
+ TH_LOG("close should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!sys_fsconfig(predictable_fd_context_nr, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have failed");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ ASSERT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(switch_userns(fd_userns1, 0, 0, false), true);
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+ ASSERT_EQ(dup3(fd_context, predictable_fd_context_nr, 0), predictable_fd_context_nr);
+ ASSERT_EQ(close(fd_context), 0);
+ fd_context = predictable_fd_context_nr;
+ ASSERT_EQ(write_nointr(ipc_sockets[0], "1", 1), 1);
+ ASSERT_EQ(close(ipc_sockets[0]), 0);
+
+ ASSERT_EQ(wait_for_pid(pid), 0);
+ ASSERT_EQ(close(self->pidfd), 0);
+ self->pidfd = -EBADF;
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++)
+ ASSERT_EQ(close(layer_fds[i]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+ ASSERT_EQ(close(fd_userns1), 0);
+ ASSERT_EQ(close(fd_userns2), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_nomknod)
+{
+ int fd_context, fd_tmpfs, fd_overlay;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int pidfd;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (!cap_down(CAP_MKNOD))
+ _exit(EXIT_FAILURE);
+
+ if (!cap_down(CAP_SYS_ADMIN))
+ _exit(EXIT_FAILURE);
+
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0))
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_EQ(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(mknodat(fd_overlay, "dev-zero", S_IFCHR | 0644, makedev(1, 5)), -1);
+ ASSERT_EQ(errno, EPERM);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower;
+ int layer_fds[500] = { [0 ... 499] = -EBADF };
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ char path[100];
+
+ sprintf(path, "l%d", i);
+ ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0);
+ layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[i], 0);
+ }
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ fd_work = openat(fd_tmpfs, "w", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_work, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_upper, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0);
+ fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_lower, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, fd_work), 0);
+ ASSERT_EQ(close(fd_work), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, fd_upper), 0);
+ ASSERT_EQ(close(fd_upper), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0);
+ ASSERT_EQ(close(fd_lower), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_layers_via_detached_mount_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay, fd_tmp;
+ int layer_fds[] = { [0 ... 8] = -EBADF };
+ bool layers_found[] = { [0 ... 8] = false };
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f_mountinfo;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u/upper", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u/work", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/set_layers_via_fds_tmpfs", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ fd_tmp = open_tree(fd_tmpfs, "u", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tmp, 0);
+
+ layer_fds[0] = openat(fd_tmp, "upper", O_CLOEXEC | O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmp, "work", O_CLOEXEC | O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = open_tree(fd_tmpfs, "l1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = open_tree(fd_tmpfs, "l2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[3], 0);
+
+ layer_fds[4] = open_tree(fd_tmpfs, "l3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[4], 0);
+
+ layer_fds[5] = open_tree(fd_tmpfs, "l4", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[5], 0);
+
+ layer_fds[6] = open_tree(fd_tmpfs, "d1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[6], 0);
+
+ layer_fds[7] = open_tree(fd_tmpfs, "d2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[7], 0);
+
+ layer_fds[8] = open_tree(fd_tmpfs, "d3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[8], 0);
+
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[6]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[7]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[8]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ f_mountinfo = fopen("/proc/self/mountinfo", "r");
+ ASSERT_NE(f_mountinfo, NULL);
+
+ while (getline(&line, &len, f_mountinfo) != -1) {
+ char *haystack = line;
+
+ if (strstr(haystack, "workdir=/tmp/w"))
+ layers_found[0] = true;
+ if (strstr(haystack, "upperdir=/tmp/u"))
+ layers_found[1] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l1"))
+ layers_found[2] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l2"))
+ layers_found[3] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l3"))
+ layers_found[4] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l4"))
+ layers_found[5] = true;
+ if (strstr(haystack, "datadir+=/tmp/d1"))
+ layers_found[6] = true;
+ if (strstr(haystack, "datadir+=/tmp/d2"))
+ layers_found[7] = true;
+ if (strstr(haystack, "datadir+=/tmp/d3"))
+ layers_found[8] = true;
+ }
+ free(line);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(layers_found[i], true);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+ ASSERT_EQ(fclose(f_mountinfo), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/overlayfs/wrappers.h
deleted file mode 100644
index 071b95fd2ac0..000000000000
--- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h
+++ /dev/null
@@ -1,47 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-//
-#ifndef __SELFTEST_OVERLAYFS_WRAPPERS_H__
-#define __SELFTEST_OVERLAYFS_WRAPPERS_H__
-
-#define _GNU_SOURCE
-
-#include <linux/types.h>
-#include <linux/mount.h>
-#include <sys/syscall.h>
-
-static inline int sys_fsopen(const char *fsname, unsigned int flags)
-{
- return syscall(__NR_fsopen, fsname, flags);
-}
-
-static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key,
- const char *value, int aux)
-{
- return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
-}
-
-static inline int sys_fsmount(int fd, unsigned int flags,
- unsigned int attr_flags)
-{
- return syscall(__NR_fsmount, fd, flags, attr_flags);
-}
-
-static inline int sys_mount(const char *src, const char *tgt, const char *fst,
- unsigned long flags, const void *data)
-{
- return syscall(__NR_mount, src, tgt, fst, flags, data);
-}
-
-#ifndef MOVE_MOUNT_F_EMPTY_PATH
-#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
-#endif
-
-static inline int sys_move_mount(int from_dfd, const char *from_pathname,
- int to_dfd, const char *to_pathname,
- unsigned int flags)
-{
- return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
- to_pathname, flags);
-}
-
-#endif
diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile
index 14ee91a41650..8e354fe99b44 100644
--- a/tools/testing/selftests/filesystems/statmount/Makefile
+++ b/tools/testing/selftests/filesystems/statmount/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
+
TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test
include ../../lib.mk
+
+$(OUTPUT)/statmount_test_ns: ../utils.c
diff --git a/tools/testing/selftests/filesystems/statmount/listmount_test.c b/tools/testing/selftests/filesystems/statmount/listmount_test.c
index 15f0834f7557..8bc82f38c42f 100644
--- a/tools/testing/selftests/filesystems/statmount/listmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/listmount_test.c
@@ -11,7 +11,7 @@
#include <unistd.h>
#include "statmount.h"
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef LISTMOUNT_REVERSE
#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
index f4294bab9d73..99e5ad082fb1 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount.h
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -7,6 +7,42 @@
#include <linux/mount.h>
#include <asm/unistd.h>
+#ifndef __NR_statmount
+ #if defined __alpha__
+ #define __NR_statmount 567
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_statmount 4457
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_statmount 6457
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_statmount 5457
+ #endif
+ #else
+ #define __NR_statmount 457
+ #endif
+#endif
+
+#ifndef __NR_listmount
+ #if defined __alpha__
+ #define __NR_listmount 568
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_listmount 4458
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_listmount 6458
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_listmount 5458
+ #endif
+ #else
+ #define __NR_listmount 458
+ #endif
+#endif
+
static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
struct statmount *buf, size_t bufsize,
unsigned int flags)
@@ -25,7 +61,7 @@ static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
return syscall(__NR_statmount, &req, buf, bufsize, flags);
}
-static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
+static inline ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
uint64_t last_mnt_id, uint64_t list[], size_t num,
unsigned int flags)
{
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index 46d289611ce8..6e53430423d2 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -13,7 +13,7 @@
#include <linux/stat.h>
#include "statmount.h"
-#include "../../kselftest.h"
+#include "kselftest.h"
static const char *const known_fs[] = {
"9p", "adfs", "affs", "afs", "aio", "anon_inodefs", "apparmorfs",
@@ -26,13 +26,12 @@ static const char *const known_fs[] = {
"hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem",
"ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos",
"nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2",
- "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs",
- "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs",
- "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem",
- "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs",
- "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf",
- "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs",
- "zonefs", NULL };
+ "ocfs2_dlmfs", "omfs", "openpromfs", "overlay", "pipefs", "proc",
+ "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs",
+ "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs",
+ "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs",
+ "sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf",
+ "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL };
static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
{
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
index 70cb0c8b21cf..d56d4103182f 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -14,7 +14,8 @@
#include <linux/stat.h>
#include "statmount.h"
-#include "../../kselftest.h"
+#include "../utils.h"
+#include "kselftest.h"
#define NSID_PASS 0
#define NSID_FAIL 1
@@ -78,87 +79,10 @@ static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id)
return NSID_PASS;
}
-static int get_mnt_id(const char *path, uint64_t *mnt_id)
-{
- struct statx sx;
- int ret;
-
- ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
- if (ret == -1) {
- ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
- strerror(errno));
- return NSID_ERROR;
- }
-
- if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
- ksft_print_msg("no unique mount ID available for %s\n", path);
- return NSID_ERROR;
- }
-
- *mnt_id = sx.stx_mnt_id;
- return NSID_PASS;
-}
-
-static int write_file(const char *path, const char *val)
-{
- int fd = open(path, O_WRONLY);
- size_t len = strlen(val);
- int ret;
-
- if (fd == -1) {
- ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
- return NSID_ERROR;
- }
-
- ret = write(fd, val, len);
- if (ret == -1) {
- ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
- return NSID_ERROR;
- }
- if (ret != len) {
- ksft_print_msg("short write to %s\n", path);
- return NSID_ERROR;
- }
-
- ret = close(fd);
- if (ret == -1) {
- ksft_print_msg("closing %s\n", path);
- return NSID_ERROR;
- }
-
- return NSID_PASS;
-}
-
static int setup_namespace(void)
{
- int ret;
- char buf[32];
- uid_t uid = getuid();
- gid_t gid = getgid();
-
- ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
- if (ret == -1)
- ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
- strerror(errno));
-
- sprintf(buf, "0 %d 1", uid);
- ret = write_file("/proc/self/uid_map", buf);
- if (ret != NSID_PASS)
- return ret;
- ret = write_file("/proc/self/setgroups", "deny");
- if (ret != NSID_PASS)
- return ret;
- sprintf(buf, "0 %d 1", gid);
- ret = write_file("/proc/self/gid_map", buf);
- if (ret != NSID_PASS)
- return ret;
-
- ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
- if (ret == -1) {
- ksft_print_msg("making mount tree private: %s\n",
- strerror(errno));
+ if (setup_userns() != 0)
return NSID_ERROR;
- }
return NSID_PASS;
}
@@ -174,9 +98,9 @@ static int _test_statmount_mnt_ns_id(void)
if (ret != NSID_PASS)
return ret;
- ret = get_mnt_id("/", &root_id);
- if (ret != NSID_PASS)
- return ret;
+ root_id = get_unique_mnt_id("/");
+ if (!root_id)
+ return NSID_ERROR;
ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
if (ret == -1) {
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
new file mode 100644
index 000000000000..c9dd5412b37b
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <fcntl.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <grp.h>
+#include <linux/limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/fsuid.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/xattr.h>
+#include <sys/mount.h>
+
+#include "kselftest.h"
+#include "wrappers.h"
+#include "utils.h"
+
+#define MAX_USERNS_LEVEL 32
+
+#define syserror(format, ...) \
+ ({ \
+ fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
+ (-errno); \
+ })
+
+#define syserror_set(__ret__, format, ...) \
+ ({ \
+ typeof(__ret__) __internal_ret__ = (__ret__); \
+ errno = labs(__ret__); \
+ fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
+ __internal_ret__; \
+ })
+
+#define STRLITERALLEN(x) (sizeof(""x"") - 1)
+
+#define INTTYPE_TO_STRLEN(type) \
+ (2 + (sizeof(type) <= 1 \
+ ? 3 \
+ : sizeof(type) <= 2 \
+ ? 5 \
+ : sizeof(type) <= 4 \
+ ? 10 \
+ : sizeof(type) <= 8 ? 20 : sizeof(int[-2 * (sizeof(type) > 8)])))
+
+#define list_for_each(__iterator, __list) \
+ for (__iterator = (__list)->next; __iterator != __list; __iterator = __iterator->next)
+
+typedef enum idmap_type_t {
+ ID_TYPE_UID,
+ ID_TYPE_GID
+} idmap_type_t;
+
+struct id_map {
+ idmap_type_t map_type;
+ __u32 nsid;
+ __u32 hostid;
+ __u32 range;
+};
+
+struct list {
+ void *elem;
+ struct list *next;
+ struct list *prev;
+};
+
+struct userns_hierarchy {
+ int fd_userns;
+ int fd_event;
+ unsigned int level;
+ struct list id_map;
+};
+
+static inline void list_init(struct list *list)
+{
+ list->elem = NULL;
+ list->next = list->prev = list;
+}
+
+static inline int list_empty(const struct list *list)
+{
+ return list == list->next;
+}
+
+static inline void __list_add(struct list *new, struct list *prev, struct list *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void list_add_tail(struct list *head, struct list *list)
+{
+ __list_add(list, head->prev, head);
+}
+
+static inline void list_del(struct list *list)
+{
+ struct list *next, *prev;
+
+ next = list->next;
+ prev = list->prev;
+ next->prev = prev;
+ prev->next = next;
+}
+
+static ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = read(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+ for (;;)
+ pause();
+ _exit(0);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
+{
+ int fd = -EBADF, setgroups_fd = -EBADF;
+ int fret = -1;
+ int ret;
+ char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+ STRLITERALLEN("/setgroups") + 1];
+
+ if (geteuid() != 0 && map_type == ID_TYPE_GID) {
+ ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
+ if (ret < 0 || ret >= sizeof(path))
+ goto out;
+
+ setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
+ if (setgroups_fd < 0 && errno != ENOENT) {
+ syserror("Failed to open \"%s\"", path);
+ goto out;
+ }
+
+ if (setgroups_fd >= 0) {
+ ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
+ if (ret != STRLITERALLEN("deny\n")) {
+ syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
+ goto out;
+ }
+ }
+ }
+
+ ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
+ if (ret < 0 || ret >= sizeof(path))
+ goto out;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC);
+ if (fd < 0) {
+ syserror("Failed to open \"%s\"", path);
+ goto out;
+ }
+
+ ret = write_nointr(fd, buf, buf_size);
+ if (ret != buf_size) {
+ syserror("Failed to write %cid mapping to \"%s\"",
+ map_type == ID_TYPE_UID ? 'u' : 'g', path);
+ goto out;
+ }
+
+ fret = 0;
+out:
+ close(fd);
+ close(setgroups_fd);
+
+ return fret;
+}
+
+static int map_ids_from_idmap(struct list *idmap, pid_t pid)
+{
+ int fill, left;
+ char mapbuf[4096] = {};
+ bool had_entry = false;
+ idmap_type_t map_type, u_or_g;
+
+ if (list_empty(idmap))
+ return 0;
+
+ for (map_type = ID_TYPE_UID, u_or_g = 'u';
+ map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
+ char *pos = mapbuf;
+ int ret;
+ struct list *iterator;
+
+
+ list_for_each(iterator, idmap) {
+ struct id_map *map = iterator->elem;
+ if (map->map_type != map_type)
+ continue;
+
+ had_entry = true;
+
+ left = 4096 - (pos - mapbuf);
+ fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
+ /*
+ * The kernel only takes <= 4k for writes to
+ * /proc/<pid>/{g,u}id_map
+ */
+ if (fill <= 0 || fill >= left)
+ return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
+
+ pos += fill;
+ }
+ if (!had_entry)
+ continue;
+
+ ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
+ if (ret < 0)
+ return syserror("Failed to write mapping: %s", mapbuf);
+
+ memset(mapbuf, 0, sizeof(mapbuf));
+ }
+
+ return 0;
+}
+
+static int get_userns_fd_from_idmap(struct list *idmap)
+{
+ int ret;
+ pid_t pid;
+ char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+ STRLITERALLEN("/ns/user") + 1];
+
+ pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
+ if (pid < 0)
+ return -errno;
+
+ ret = map_ids_from_idmap(idmap, pid);
+ if (ret < 0)
+ return ret;
+
+ ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
+ if (ret < 0 || (size_t)ret >= sizeof(path_ns))
+ ret = -EIO;
+ else
+ ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
+
+ (void)kill(pid, SIGKILL);
+ (void)wait_for_pid(pid);
+ return ret;
+}
+
+int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+ struct list head, uid_mapl, gid_mapl;
+ struct id_map uid_map = {
+ .map_type = ID_TYPE_UID,
+ .nsid = nsid,
+ .hostid = hostid,
+ .range = range,
+ };
+ struct id_map gid_map = {
+ .map_type = ID_TYPE_GID,
+ .nsid = nsid,
+ .hostid = hostid,
+ .range = range,
+ };
+
+ list_init(&head);
+ uid_mapl.elem = &uid_map;
+ gid_mapl.elem = &gid_map;
+ list_add_tail(&head, &uid_mapl);
+ list_add_tail(&head, &gid_mapl);
+
+ return get_userns_fd_from_idmap(&head);
+}
+
+bool switch_ids(uid_t uid, gid_t gid)
+{
+ if (setgroups(0, NULL))
+ return syserror("failure: setgroups");
+
+ if (setresgid(gid, gid, gid))
+ return syserror("failure: setresgid");
+
+ if (setresuid(uid, uid, uid))
+ return syserror("failure: setresuid");
+
+ /* Ensure we can access proc files from processes we can ptrace. */
+ if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
+ return syserror("failure: make dumpable");
+
+ return true;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h);
+
+static int userns_fd_cb(void *data)
+{
+ struct userns_hierarchy *h = data;
+ char c;
+ int ret;
+
+ ret = read_nointr(h->fd_event, &c, 1);
+ if (ret < 0)
+ return syserror("failure: read from socketpair");
+
+ /* Only switch ids if someone actually wrote a mapping for us. */
+ if (c == '1') {
+ if (!switch_ids(0, 0))
+ return syserror("failure: switch ids to 0");
+ }
+
+ ret = write_nointr(h->fd_event, "1", 1);
+ if (ret < 0)
+ return syserror("failure: write to socketpair");
+
+ ret = create_userns_hierarchy(++h);
+ if (ret < 0)
+ return syserror("failure: userns level %d", h->level);
+
+ return 0;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h)
+{
+ int fret = -1;
+ char c;
+ int fd_socket[2];
+ int fd_userns = -EBADF, ret = -1;
+ ssize_t bytes;
+ pid_t pid;
+ char path[256];
+
+ if (h->level == MAX_USERNS_LEVEL)
+ return 0;
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fd_socket);
+ if (ret < 0)
+ return syserror("failure: create socketpair");
+
+ /* Note the CLONE_FILES | CLONE_VM when mucking with fds and memory. */
+ h->fd_event = fd_socket[1];
+ pid = do_clone(userns_fd_cb, h, CLONE_NEWUSER | CLONE_FILES | CLONE_VM);
+ if (pid < 0) {
+ syserror("failure: userns level %d", h->level);
+ goto out_close;
+ }
+
+ ret = map_ids_from_idmap(&h->id_map, pid);
+ if (ret < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: writing id mapping for userns level %d for %d", h->level, pid);
+ goto out_wait;
+ }
+
+ if (!list_empty(&h->id_map))
+ bytes = write_nointr(fd_socket[0], "1", 1); /* Inform the child we wrote a mapping. */
+ else
+ bytes = write_nointr(fd_socket[0], "0", 1); /* Inform the child we didn't write a mapping. */
+ if (bytes < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: write to socketpair");
+ goto out_wait;
+ }
+
+ /* Wait for child to set*id() and become dumpable. */
+ bytes = read_nointr(fd_socket[0], &c, 1);
+ if (bytes < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: read from socketpair");
+ goto out_wait;
+ }
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ fd_userns = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd_userns < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: open userns level %d for %d", h->level, pid);
+ goto out_wait;
+ }
+
+ fret = 0;
+
+out_wait:
+ if (!wait_for_pid(pid) && !fret) {
+ h->fd_userns = fd_userns;
+ fd_userns = -EBADF;
+ }
+
+out_close:
+ if (fd_userns >= 0)
+ close(fd_userns);
+ close(fd_socket[0]);
+ close(fd_socket[1]);
+ return fret;
+}
+
+static int write_file(const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+ int ret;
+
+ if (fd == -1) {
+ ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
+ return -1;
+ }
+
+ ret = write(fd, val, len);
+ if (ret == -1) {
+ ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
+ return -1;
+ }
+ if (ret != len) {
+ ksft_print_msg("short write to %s\n", path);
+ return -1;
+ }
+
+ ret = close(fd);
+ if (ret == -1) {
+ ksft_print_msg("closing %s\n", path);
+ return -1;
+ }
+
+ return 0;
+}
+
+int setup_userns(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER);
+ if (ret) {
+ ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
+ strerror(errno));
+ return ret;
+ }
+
+ sprintf(buf, "0 %d 1", uid);
+ ret = write_file("/proc/self/uid_map", buf);
+ if (ret)
+ return ret;
+ ret = write_file("/proc/self/setgroups", "deny");
+ if (ret)
+ return ret;
+ sprintf(buf, "0 %d 1", gid);
+ ret = write_file("/proc/self/gid_map", buf);
+ if (ret)
+ return ret;
+
+ ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (ret) {
+ ksft_print_msg("making mount tree private: %s\n", strerror(errno));
+ return ret;
+ }
+
+ return 0;
+}
+
+/* caps_down - lower all effective caps */
+int caps_down(void)
+{
+ bool fret = false;
+ cap_t caps = NULL;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps)
+ goto out;
+
+ ret = cap_clear_flag(caps, CAP_EFFECTIVE);
+ if (ret)
+ goto out;
+
+ ret = cap_set_proc(caps);
+ if (ret)
+ goto out;
+
+ fret = true;
+
+out:
+ cap_free(caps);
+ return fret;
+}
+
+/* cap_down - lower an effective cap */
+int cap_down(cap_value_t down)
+{
+ bool fret = false;
+ cap_t caps = NULL;
+ cap_value_t cap = down;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps)
+ goto out;
+
+ ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, 0);
+ if (ret)
+ goto out;
+
+ ret = cap_set_proc(caps);
+ if (ret)
+ goto out;
+
+ fret = true;
+
+out:
+ cap_free(caps);
+ return fret;
+}
+
+uint64_t get_unique_mnt_id(const char *path)
+{
+ struct statx sx;
+ int ret;
+
+ ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
+ if (ret == -1) {
+ ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
+ strerror(errno));
+ return 0;
+ }
+
+ if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
+ ksft_print_msg("no unique mount ID available for %s\n", path);
+ return 0;
+ }
+
+ return sx.stx_mnt_id;
+}
diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h
new file mode 100644
index 000000000000..70f7ccc607f4
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __IDMAP_UTILS_H
+#define __IDMAP_UTILS_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/capability.h>
+#include <sys/fsuid.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+extern int get_userns_fd(unsigned long nsid, unsigned long hostid,
+ unsigned long range);
+
+extern int caps_down(void);
+extern int cap_down(cap_value_t down);
+
+extern bool switch_ids(uid_t uid, gid_t gid);
+extern int setup_userns(void);
+
+static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
+{
+ if (setns(fd, CLONE_NEWUSER))
+ return false;
+
+ if (!switch_ids(uid, gid))
+ return false;
+
+ if (drop_caps && !caps_down())
+ return false;
+
+ return true;
+}
+
+extern uint64_t get_unique_mnt_id(const char *path);
+
+#endif /* __IDMAP_UTILS_H */
diff --git a/tools/testing/selftests/filesystems/wrappers.h b/tools/testing/selftests/filesystems/wrappers.h
new file mode 100644
index 000000000000..420ae4f908cf
--- /dev/null
+++ b/tools/testing/selftests/filesystems/wrappers.h
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+#ifndef __SELFTEST_OVERLAYFS_WRAPPERS_H__
+#define __SELFTEST_OVERLAYFS_WRAPPERS_H__
+
+#define _GNU_SOURCE
+
+#include <linux/types.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+
+#ifndef STATX_MNT_ID_UNIQUE
+#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+#endif
+
+static inline int sys_fsopen(const char *fsname, unsigned int flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key,
+ const char *value, int aux)
+{
+ return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
+}
+
+static inline int sys_fsmount(int fd, unsigned int flags,
+ unsigned int attr_flags)
+{
+ return syscall(__NR_fsmount, fd, flags, attr_flags);
+}
+
+static inline int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return syscall(__NR_mount, src, tgt, fst, flags, data);
+}
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+#endif
+
+#ifndef MOVE_MOUNT_T_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
+#endif
+
+#ifndef __NR_move_mount
+ #if defined __alpha__
+ #define __NR_move_mount 539
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_move_mount 4429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_move_mount 6429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_move_mount 5429
+ #endif
+ #else
+ #define __NR_move_mount 429
+ #endif
+#endif
+
+static inline int sys_move_mount(int from_dfd, const char *from_pathname,
+ int to_dfd, const char *to_pathname,
+ unsigned int flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
+ to_pathname, flags);
+}
+
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#endif
+
+#ifndef __NR_open_tree
+ #if defined __alpha__
+ #define __NR_open_tree 538
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree 4428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree 6428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree 5428
+ #endif
+ #else
+ #define __NR_open_tree 428
+ #endif
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
+#endif
diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore
index 2659417cb2c7..4d7fcb828850 100644
--- a/tools/testing/selftests/ftrace/.gitignore
+++ b/tools/testing/selftests/ftrace/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
logs
+poll
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index 49d96bb16355..7c12263f8260 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -6,6 +6,6 @@ TEST_PROGS := ftracetest-ktap
TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
-TEST_GEN_PROGS = poll
+TEST_GEN_FILES := poll
include ../lib.mk
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index cce72f8b03dc..3230bd54dba8 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -22,6 +22,7 @@ echo " --fail-unresolved Treat UNRESOLVED as a failure"
echo " -d|--debug Debug mode (trace all shell commands)"
echo " -l|--logdir <dir> Save logs on the <dir>"
echo " If <dir> is -, all logs output in console only"
+echo " --rv Run RV selftests instead of ftrace ones"
exit $1
}
@@ -133,6 +134,10 @@ parse_opts() { # opts
LINK_PTR=
shift 2
;;
+ --rv)
+ RV_TEST=1
+ shift 1
+ ;;
*.tc)
if [ -f "$1" ]; then
OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`"
@@ -152,9 +157,13 @@ parse_opts() { # opts
;;
esac
done
- if [ ! -z "$OPT_TEST_CASES" ]; then
+ if [ -n "$OPT_TEST_CASES" ]; then
TEST_CASES=$OPT_TEST_CASES
fi
+ if [ -n "$OPT_TEST_DIR" -a -f "$OPT_TEST_DIR"/test.d/functions ]; then
+ TOP_DIR=$OPT_TEST_DIR
+ TEST_DIR=$TOP_DIR/test.d
+ fi
}
# Parameters
@@ -190,10 +199,6 @@ fi
TOP_DIR=`absdir $0`
TEST_DIR=$TOP_DIR/test.d
TEST_CASES=`find_testcases $TEST_DIR`
-LOG_TOP_DIR=$TOP_DIR/logs
-LOG_DATE=`date +%Y%m%d-%H%M%S`
-LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/
-LINK_PTR=$LOG_TOP_DIR/latest
KEEP_LOG=0
KTAP=0
DEBUG=0
@@ -201,14 +206,23 @@ VERBOSE=0
UNSUPPORTED_RESULT=0
UNRESOLVED_RESULT=0
STOP_FAILURE=0
+RV_TEST=0
# Parse command-line options
parse_opts $*
+LOG_TOP_DIR=$TOP_DIR/logs
+LOG_DATE=`date +%Y%m%d-%H%M%S`
+LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/
+LINK_PTR=$LOG_TOP_DIR/latest
+
[ $DEBUG -ne 0 ] && set -x
-# Verify parameters
-if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then
- errexit "No ftrace directory found"
+if [ $RV_TEST -ne 0 ]; then
+ TRACING_DIR=$TRACING_DIR/rv
+ if [ ! -d "$TRACING_DIR" ]; then
+ err_ret=$err_skip
+ errexit "rv is not configured in this kernel"
+ fi
fi
# Preparing logs
@@ -419,7 +433,7 @@ trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
__run_test() { # testfile
# setup PID and PPID, $$ is not updated.
(cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x;
- checkreq $1; initialize_ftrace; . $1)
+ checkreq $1; initialize_system; . $1)
[ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID
}
@@ -496,7 +510,7 @@ for t in $TEST_CASES; do
exit 1
fi
done
-(cd $TRACING_DIR; finish_ftrace) # for cleanup
+(cd $TRACING_DIR; finish_system) # for cleanup
prlog ""
prlog "# of passed: " `echo $PASSED_CASES | wc -w`
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
index 8a7ce647a60d..318939451caf 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
@@ -28,7 +28,7 @@ unmount_tracefs() {
local mount_point="$1"
# Need to make sure the mount isn't busy so that we can umount it
- (cd $mount_point; finish_ftrace;)
+ (cd $mount_point; finish_system;)
cleanup
}
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
new file mode 100644
index 000000000000..7daf7292209e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
@@ -0,0 +1,107 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic tests on writing to trace_marker_raw
+# requires: trace_marker_raw
+# flags: instance
+
+is_little_endian() {
+ if lscpu | grep -q 'Little Endian'; then
+ echo 1;
+ else
+ echo 0;
+ fi
+}
+
+little=`is_little_endian`
+
+make_str() {
+ id=$1
+ cnt=$2
+
+ if [ $little -eq 1 ]; then
+ val=`printf "\\%03o\\%03o\\%03o\\%03o" \
+ $(($id & 0xff)) \
+ $((($id >> 8) & 0xff)) \
+ $((($id >> 16) & 0xff)) \
+ $((($id >> 24) & 0xff))`
+ else
+ val=`printf "\\%03o\\%03o\\%03o\\%03o" \
+ $((($id >> 24) & 0xff)) \
+ $((($id >> 16) & 0xff)) \
+ $((($id >> 8) & 0xff)) \
+ $(($id & 0xff))`
+ fi
+
+ data=`printf -- 'X%.0s' $(seq $cnt)`
+
+ printf "${val}${data}"
+}
+
+write_buffer() {
+ id=$1
+ size=$2
+
+ # write the string into the raw marker
+ make_str $id $size > trace_marker_raw
+}
+
+
+test_multiple_writes() {
+
+ # Write a bunch of data where the id is the count of
+ # data to write
+ for i in `seq 1 10` `seq 101 110` `seq 1001 1010`; do
+ write_buffer $i $i
+ done
+
+ # add a little buffer
+ echo stop > trace_marker
+
+ # Check to make sure the number of entries is the id (rounded up by 4)
+ awk '/.*: # [0-9a-f]* / {
+ print;
+ cnt = -1;
+ for (i = 0; i < NF; i++) {
+ # The counter is after the "#" marker
+ if ( $i == "#" ) {
+ i++;
+ cnt = strtonum("0x" $i);
+ num = NF - (i + 1);
+ # The number of items is always rounded up by 4
+ cnt2 = int((cnt + 3) / 4) * 4;
+ if (cnt2 != num) {
+ exit 1;
+ }
+ break;
+ }
+ }
+ }
+ // { if (NR > 30) { exit 0; } } ' trace_pipe;
+}
+
+
+get_buffer_data_size() {
+ sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+test_buffer() {
+
+ # The id must be four bytes, test that 3 bytes fails a write
+ if echo -n abc > ./trace_marker_raw ; then
+ echo "Too small of write expected to fail but did not"
+ exit_fail
+ fi
+
+ size=`get_buffer_data_size`
+ echo size = $size
+
+ # Now add a little more than what it can handle
+
+ if write_buffer 0xdeadbeef $size ; then
+ echo "Too big of write expected to fail but did not"
+ exit_fail
+ fi
+}
+
+test_buffer
+test_multiple_writes
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
index dc25bcf4f9e2..47067a5e3cb0 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -7,20 +7,76 @@ echo 0 > events/enable
echo > dynamic_events
PLACE=$FUNCTION_FORK
+PLACE2="kmem_cache_free"
+PLACE3="schedule_timeout"
+
+# Some functions may have BPF programs attached, therefore
+# count already enabled_functions before tests start
+ocnt=`cat enabled_functions | wc -l`
echo "f:myevent1 $PLACE" >> dynamic_events
+
echo "f:myevent2 $PLACE%return" >> dynamic_events
+# add another event
+echo "f:myevent3 $PLACE2" >> dynamic_events
+
grep -q myevent1 dynamic_events
grep -q myevent2 dynamic_events
+grep -q myevent3 dynamic_events
test -d events/fprobes/myevent1
test -d events/fprobes/myevent2
+echo 1 > events/fprobes/myevent1/enable
+# Make sure the event is attached.
+grep -q $PLACE enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -eq $ocnt ]; then
+ exit_fail
+fi
+
+echo 1 > events/fprobes/myevent2/enable
+cnt2=`cat enabled_functions | wc -l`
+
+echo 1 > events/fprobes/myevent3/enable
+# If the function is different, the attached function should be increased
+grep -q $PLACE2 enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -eq $cnt2 ]; then
+ exit_fail
+fi
+
+echo 0 > events/fprobes/myevent2/enable
echo "-:myevent2" >> dynamic_events
grep -q myevent1 dynamic_events
! grep -q myevent2 dynamic_events
+echo 0 > events/fprobes/enable
echo > dynamic_events
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+ exit_fail
+fi
+
+echo "f:myevent4 $PLACE" >> dynamic_events
+
+echo 1 > events/fprobes/myevent4/enable
+# Should only have one enabled
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+ exit_fail
+fi
+
+echo 0 > events/fprobes/enable
+echo > dynamic_events
+
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+ exit_fail
+fi
+
clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
index 155792eaeee5..f271c4238b72 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
@@ -6,6 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
+SUBSYSTEM=kmem
TRACEPOINT1=kmem_cache_alloc
TRACEPOINT2=kmem_cache_free
@@ -24,4 +25,17 @@ grep -q myevent1 dynamic_events
echo > dynamic_events
+# auto naming check
+echo "t $TRACEPOINT1" >> dynamic_events
+
+test -d events/tracepoints/$TRACEPOINT1
+
+echo > dynamic_events
+
+# SUBSYSTEM is not supported
+echo "t $SUBSYSTEM/$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+echo "t $SUBSYSTEM:$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+echo "t:myevent3 $SUBSYSTEM/$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+echo "t:myevent3 $SUBSYSTEM:$TRACEPOINT1" >> dynamic_events && exit_fail ||:
+
clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
index 86c76679c56e..f2048c244526 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
@@ -3,14 +3,18 @@
# description: Generic dynamic event - add/remove/test uprobe events
# requires: uprobe_events
+if ! which readelf > /dev/null 2>&1 ; then
+ echo "No readelf found. skipped."
+ exit_unresolved
+fi
+
echo 0 > events/enable
echo > dynamic_events
REALBIN=`readlink -f /bin/sh`
+ENTRYPOINT=`readelf -h ${REALBIN} | grep Entry | sed -e 's/[^0]*//'`
-echo 'cat /proc/$$/maps' | /bin/sh | \
- grep "r-xp .*${REALBIN}$" | \
- awk '{printf "p:myevent %s:0x%s\n", $6,$3 }' >> uprobe_events
+echo "p:myevent ${REALBIN}:${ENTRYPOINT}" >> uprobe_events
grep -q myevent uprobe_events
test -d events/uprobes/myevent
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
new file mode 100644
index 000000000000..f656bccb1a14
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
@@ -0,0 +1,63 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Checking dynamic events limitations
+# requires: dynamic_events "imm-value":README
+
+# Max arguments limitation
+MAX_ARGS=128
+EXCEED_ARGS=$((MAX_ARGS + 1))
+
+# bash and dash evaluate variables differently.
+# dash will evaluate '\\' every time it is read whereas bash does not.
+#
+# TEST_STRING="$TEST_STRING \\$i"
+# echo $TEST_STRING
+#
+# With i=123
+# On bash, that will print "\123"
+# but on dash, that will print the escape sequence of \123 as the \ will
+# be interpreted again in the echo.
+#
+# Set a variable "bs" to save a double backslash, then echo that
+# to "ts" to see if $ts changed or not. If it changed, it's dash,
+# if not, it's bash, and then bs can equal a single backslash.
+bs='\\'
+ts=`echo $bs`
+if [ "$ts" = '\\' ]; then
+ # this is bash
+ bs='\'
+fi
+
+check_max_args() { # event_header
+ TEST_STRING=$1
+ # Acceptable
+ for i in `seq 1 $MAX_ARGS`; do
+ TEST_STRING="$TEST_STRING $bs$i"
+ done
+ echo "$TEST_STRING" >> dynamic_events
+ echo > dynamic_events
+ # Error
+ TEST_STRING="$TEST_STRING \\$EXCEED_ARGS"
+ ! echo "$TEST_STRING" >> dynamic_events
+ return 0
+}
+
+# Kprobe max args limitation
+if grep -q "kprobe_events" README; then
+ check_max_args "p vfs_read"
+fi
+
+# Fprobe max args limitation
+if grep -q "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README; then
+ check_max_args "f vfs_read"
+fi
+
+# Tprobe max args limitation
+if grep -q "t[:[<group>/][<event>]] <tracepoint> [<args>]" README; then
+ check_max_args "t kfree"
+fi
+
+# Uprobe max args limitation
+if grep -q "uprobe_events" README; then
+ check_max_args "p /bin/sh:10"
+fi
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc
new file mode 100644
index 000000000000..c1f1cafa30f3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - enable/disable tracepoint probe events
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TRACEPOINT=sched_switch
+ENABLEFILE=events/tracepoints/myprobe/enable
+
+:;: "Add tracepoint event on $TRACEPOINT" ;:
+
+echo "t:myprobe ${TRACEPOINT}" >> dynamic_events
+
+:;: "Check enable/disable to ensure it works" ;:
+
+echo 1 > $ENABLEFILE
+
+grep -q $TRACEPOINT trace
+
+echo 0 > $ENABLEFILE
+
+echo > trace
+
+! grep -q $TRACEPOINT trace
+
+:;: "Repeat enable/disable to ensure it works" ;:
+
+echo 1 > $ENABLEFILE
+
+grep -q $TRACEPOINT trace
+
+echo 0 > $ENABLEFILE
+
+echo > trace
+
+! grep -q $TRACEPOINT trace
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index c9425a34fae3..fee479295e2f 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -27,6 +27,7 @@ check_error 'f:^foo.1/bar vfs_read' # BAD_GROUP_NAME
check_error 'f:^ vfs_read' # NO_EVENT_NAME
check_error 'f:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG
check_error 'f:foo/^bar.1 vfs_read' # BAD_EVENT_NAME
+check_error 't kmem^/kfree' # BAD_TP_NAME
check_error 'f vfs_read ^$stack10000' # BAD_STACK_NUM
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index b7c8f29c09a9..65916bb55dfb 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -14,11 +14,35 @@ fail() { #msg
exit_fail
}
+# As reading trace can last forever, simply look for 3 different
+# events then exit out of reading the file. If there's not 3 different
+# events, then the test has failed.
+check_unique() {
+ cat trace | grep -v '^#' | awk '
+ BEGIN { cnt = 0; }
+ {
+ for (i = 0; i < cnt; i++) {
+ if (event[i] == $5) {
+ break;
+ }
+ }
+ if (i == cnt) {
+ event[cnt++] = $5;
+ if (cnt > 2) {
+ exit;
+ }
+ }
+ }
+ END {
+ printf "%d", cnt;
+ }'
+}
+
echo 'sched:*' > set_event
yield
-count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`check_unique`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
@@ -29,7 +53,7 @@ echo 1 > events/sched/enable
yield
-count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`check_unique`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index 118247b8dd84..cfa16aa1f39a 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -20,6 +20,10 @@ sample_events() {
echo 0 > tracing_on
echo 0 > events/enable
+# Clear functions caused by page cache; run sample_events twice
+sample_events
+sample_events
+
echo "Get the most frequently calling function"
echo > trace
sample_events
@@ -80,6 +84,26 @@ if [ $misscnt -gt 0 ]; then
exit_fail
fi
+# Check strings too
+if [ -f events/syscalls/sys_enter_openat/filter ]; then
+ DIRNAME=`basename $TMPDIR`
+ echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter
+ echo 1 > events/syscalls/sys_enter_openat/enable
+ echo 1 > tracing_on
+ ls /bin/sh
+ nocnt=`grep openat trace | wc -l`
+ ls $TMPDIR
+ echo 0 > tracing_on
+ hitcnt=`grep openat trace | wc -l`;
+ echo 0 > events/syscalls/sys_enter_openat/enable
+ if [ $nocnt -gt 0 ]; then
+ exit_fail
+ fi
+ if [ $hitcnt -eq 0 ]; then
+ exit_fail
+ fi
+fi
+
reset_events_filter
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc
new file mode 100644
index 000000000000..b6d6a312ead5
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc
@@ -0,0 +1,177 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters
+# requires: set_ftrace_filter function_graph:tracer
+
+# Make sure that function graph filtering works
+
+INSTANCE1="instances/test1_$$"
+INSTANCE2="instances/test2_$$"
+
+WD=`pwd`
+
+do_reset() {
+ cd $WD
+ if [ -d $INSTANCE1 ]; then
+ echo nop > $INSTANCE1/current_tracer
+ rmdir $INSTANCE1
+ fi
+ if [ -d $INSTANCE2 ]; then
+ echo nop > $INSTANCE2/current_tracer
+ rmdir $INSTANCE2
+ fi
+}
+
+mkdir $INSTANCE1
+if ! grep -q function_graph $INSTANCE1/available_tracers; then
+ echo "function_graph not allowed with instances"
+ rmdir $INSTANCE1
+ exit_unsupported
+fi
+
+mkdir $INSTANCE2
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+function_count() {
+ search=$1
+ vsearch=$2
+
+ if [ -z "$search" ]; then
+ cat enabled_functions | wc -l
+ elif [ -z "$vsearch" ]; then
+ grep $search enabled_functions | wc -l
+ else
+ grep $search enabled_functions | grep $vsearch| wc -l
+ fi
+}
+
+set_fgraph() {
+ instance=$1
+ filter="$2"
+ notrace="$3"
+
+ echo "$filter" > $instance/set_ftrace_filter
+ echo "$notrace" > $instance/set_ftrace_notrace
+ echo function_graph > $instance/current_tracer
+}
+
+check_functions() {
+ orig_cnt=$1
+ test=$2
+
+ cnt=`function_count $test`
+ if [ $cnt -gt $orig_cnt ]; then
+ fail
+ fi
+}
+
+check_cnt() {
+ orig_cnt=$1
+ search=$2
+ vsearch=$3
+
+ cnt=`function_count $search $vsearch`
+ if [ $cnt -gt $orig_cnt ]; then
+ fail
+ fi
+}
+
+reset_graph() {
+ instance=$1
+ echo nop > $instance/current_tracer
+}
+
+# get any functions that were enabled before the test
+total_cnt=`function_count`
+sched_cnt=`function_count sched`
+lock_cnt=`function_count lock`
+time_cnt=`function_count time`
+clock_cnt=`function_count clock`
+locks_clock_cnt=`function_count locks clock`
+clock_locks_cnt=`function_count clock locks`
+
+# Trace functions with "sched" but not "time"
+set_fgraph $INSTANCE1 '*sched*' '*time*'
+
+# Make sure "time" isn't listed
+check_functions $time_cnt 'time'
+instance1_cnt=`function_count`
+
+# Trace functions with "lock" but not "clock"
+set_fgraph $INSTANCE2 '*lock*' '*clock*'
+instance1_2_cnt=`function_count`
+
+# Turn off the first instance
+reset_graph $INSTANCE1
+
+# The second instance doesn't trace "clock" functions
+check_functions $clock_cnt 'clock'
+instance2_cnt=`function_count`
+
+# Start from a clean slate
+reset_graph $INSTANCE2
+check_functions $total_cnt
+
+# Trace functions with "lock" but not "clock"
+set_fgraph $INSTANCE2 '*lock*' '*clock*'
+
+# This should match the last time instance 2 was by itself
+cnt=`function_count`
+if [ $instance2_cnt -ne $cnt ]; then
+ fail
+fi
+
+# And it should not be tracing "clock" functions
+check_functions $clock_cnt 'clock'
+
+# Trace functions with "sched" but not "time"
+set_fgraph $INSTANCE1 '*sched*' '*time*'
+
+# This should match the last time both instances were enabled
+cnt=`function_count`
+if [ $instance1_2_cnt -ne $cnt ]; then
+ fail
+fi
+
+# Turn off the second instance
+reset_graph $INSTANCE2
+
+# This should match the last time instance 1 was by itself
+cnt=`function_count`
+if [ $instance1_cnt -ne $cnt ]; then
+ fail
+fi
+
+# And it should not be tracing "time" functions
+check_functions $time_cnt 'time'
+
+# Start from a clean slate
+reset_graph $INSTANCE1
+check_functions $total_cnt
+
+# Enable all functions but those that have "locks"
+set_fgraph $INSTANCE1 '' '*locks*'
+
+# Enable all functions but those that have "clock"
+set_fgraph $INSTANCE2 '' '*clock*'
+
+# If a function has "locks" it should not have "clock"
+check_cnt $locks_clock_cnt locks clock
+
+# If a function has "clock" it should not have "locks"
+check_cnt $clock_locks_cnt clock locks
+
+reset_graph $INSTANCE1
+reset_graph $INSTANCE2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
index 4b994b6df5ac..ed81eaf2afd6 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -29,7 +29,7 @@ ftrace_filter_check 'schedule*' '^schedule.*$'
ftrace_filter_check '*pin*lock' '.*pin.*lock$'
# filter by start*mid*
-ftrace_filter_check 'mutex*try*' '^mutex.*try.*'
+ftrace_filter_check 'mutex*unl*' '^mutex.*unl.*'
# Advanced full-glob matching feature is recently supported.
# Skip the tests if we are sure the kernel does not support it.
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 84d6a9c7ad67..e8e718139294 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -104,7 +104,7 @@ clear_dynamic_events() { # reset all current dynamic events
done
}
-initialize_ftrace() { # Reset ftrace to initial-state
+initialize_system() { # Reset ftrace to initial-state
# As the initial state, ftrace will be set to nop tracer,
# no events, no triggers, no filters, no function filters,
# no probes, and tracing on.
@@ -134,8 +134,8 @@ initialize_ftrace() { # Reset ftrace to initial-state
enable_tracing
}
-finish_ftrace() {
- initialize_ftrace
+finish_system() {
+ initialize_system
# And recover it to default.
[ -f options/pause-on-trace ] && echo 0 > options/pause-on-trace
}
@@ -156,7 +156,13 @@ check_requires() { # Check required files and tracers
exit_unsupported
fi
elif [ "$r" != "$i" ]; then
- if ! grep -Fq "$r" README ; then
+ # If this is an instance, check the top directory
+ if echo $TRACING_DIR | grep -q "/instances/"; then
+ test="$TRACING_DIR/../.."
+ else
+ test=$TRACING_DIR
+ fi
+ if ! grep -Fq "$r" $test/README ; then
echo "Required feature pattern \"$r\" is not in README."
exit_unsupported
fi
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
index 1590d6bfb857..20a35fea13f8 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger expected fail actions
# requires: set_event snapshot "snapshot()":README
+# flags: instance
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
index 91339c130832..55ab0270e5f7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onchange action
# requires: set_event "onchange(var)":README ping:program
+# flags: instance
fail() { #msg
echo $1
@@ -19,4 +20,6 @@ if ! grep -q "changed:" events/sched/sched_waking/hist; then
fail "Failed to create onchange action inter-event histogram"
fi
+echo '!hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> events/sched/sched_waking/trigger
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
index 147967e86584..9eb37c2fa417 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger snapshot action
# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README ping:program
+# flags: instance
fail() { #msg
echo $1
@@ -27,4 +28,6 @@ if ! grep -q "comm=ping" snapshot; then
fail "Failed to create snapshot action inter-event histogram"
fi
+echo '!hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> events/sched/sched_waking/trigger
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
index 05ffba299dbf..0ebda2068a00 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test histogram expression parsing
# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README
+# flags: instance
fail() { #msg
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index fbcbdb6963b3..776ad658f75e 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -1,11 +1,14 @@
# SPDX-License-Identifier: GPL-2.0-only
+futex_numa_mpol
+futex_priv_hash
+futex_requeue
futex_requeue_pi
futex_requeue_pi_mismatched_ops
futex_requeue_pi_signal_restart
+futex_wait
futex_wait_private_mapped_file
futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
-futex_wait
-futex_requeue
futex_waitv
+futex_numa
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index f79f9bac7918..490ace1f017e 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,12 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
+PKG_CONFIG ?= pkg-config
+LIBNUMA_TEST = $(shell sh -c "$(PKG_CONFIG) numa --atleast-version 2.0.16 > /dev/null 2>&1 && echo SUFFICIENT || echo NO")
+
INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
-CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES)
-LDLIBS := -lpthread -lrt
+CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 $(INCLUDES) $(KHDR_INCLUDES) -DLIBNUMA_VER_$(LIBNUMA_TEST)=1
+LDLIBS := -lpthread -lrt -lnuma
LOCAL_HDRS := \
../include/futextest.h \
- ../include/atomic.h \
- ../include/logging.h
+ ../include/atomic.h
TEST_GEN_PROGS := \
futex_wait_timeout \
futex_wait_wouldblock \
@@ -17,7 +19,10 @@ TEST_GEN_PROGS := \
futex_wait_private_mapped_file \
futex_wait \
futex_requeue \
- futex_waitv
+ futex_priv_hash \
+ futex_numa_mpol \
+ futex_waitv \
+ futex_numa
TEST_PROGS := run.sh
diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c
new file mode 100644
index 000000000000..e0a33510ccb6
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_numa.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <assert.h>
+#include "futextest.h"
+#include "futex2test.h"
+
+typedef u_int32_t u32;
+typedef int32_t s32;
+typedef u_int64_t u64;
+
+static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
+static int fnode = FUTEX_NO_NODE;
+
+/* fairly stupid test-and-set lock with a waiter flag */
+
+#define N_LOCK 0x0000001
+#define N_WAITERS 0x0001000
+
+struct futex_numa_32 {
+ union {
+ u64 full;
+ struct {
+ u32 val;
+ u32 node;
+ };
+ };
+};
+
+void futex_numa_32_lock(struct futex_numa_32 *lock)
+{
+ for (;;) {
+ struct futex_numa_32 new, old = {
+ .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
+ };
+
+ for (;;) {
+ new = old;
+ if (old.val == 0) {
+ /* no waiter, no lock -> first lock, set no-node */
+ new.node = fnode;
+ }
+ if (old.val & N_LOCK) {
+ /* contention, set waiter */
+ new.val |= N_WAITERS;
+ }
+ new.val |= N_LOCK;
+
+ /* nothing changed, ready to block */
+ if (old.full == new.full)
+ break;
+
+ /*
+ * Use u64 cmpxchg to set the futex value and node in a
+ * consistent manner.
+ */
+ if (__atomic_compare_exchange_n(&lock->full,
+ &old.full, new.full,
+ /* .weak */ false,
+ __ATOMIC_ACQUIRE,
+ __ATOMIC_RELAXED)) {
+
+ /* if we just set N_LOCK, we own it */
+ if (!(old.val & N_LOCK))
+ return;
+
+ /* go block */
+ break;
+ }
+ }
+
+ futex2_wait(lock, new.val, fflags, NULL, 0);
+ }
+}
+
+void futex_numa_32_unlock(struct futex_numa_32 *lock)
+{
+ u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
+ assert((s32)val >= 0);
+ if (val & N_WAITERS) {
+ int woken = futex2_wake(lock, 1, fflags);
+ assert(val == N_WAITERS);
+ if (!woken) {
+ __atomic_compare_exchange_n(&lock->val, &val, 0U,
+ false, __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED);
+ }
+ }
+}
+
+static long nanos = 50000;
+
+struct thread_args {
+ pthread_t tid;
+ volatile int * done;
+ struct futex_numa_32 *lock;
+ int val;
+ int *val1, *val2;
+ int node;
+};
+
+static void *threadfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+ struct timespec ts = {
+ .tv_nsec = nanos,
+ };
+ int node;
+
+ while (!*args->done) {
+
+ futex_numa_32_lock(args->lock);
+ args->val++;
+
+ assert(*args->val1 == *args->val2);
+ (*args->val1)++;
+ nanosleep(&ts, NULL);
+ (*args->val2)++;
+
+ node = args->lock->node;
+ futex_numa_32_unlock(args->lock);
+
+ if (node != args->node) {
+ args->node = node;
+ printf("node: %d\n", node);
+ }
+
+ nanosleep(&ts, NULL);
+ }
+
+ return NULL;
+}
+
+static void *contendfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+
+ while (!*args->done) {
+ /*
+ * futex2_wait() will take hb-lock, verify *var == val and
+ * queue/abort. By knowingly setting val 'wrong' this will
+ * abort and thereby generate hb-lock contention.
+ */
+ futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
+ args->val++;
+ }
+
+ return NULL;
+}
+
+static volatile int done = 0;
+static struct futex_numa_32 lock = { .val = 0, };
+static int val1, val2;
+
+int main(int argc, char *argv[])
+{
+ struct thread_args *tas[512], *cas[512];
+ int c, t, threads = 2, contenders = 0;
+ int sleeps = 10;
+ int total = 0;
+
+ while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
+ switch (c) {
+ case 'c':
+ contenders = atoi(optarg);
+ break;
+ case 't':
+ threads = atoi(optarg);
+ break;
+ case 's':
+ sleeps = atoi(optarg);
+ break;
+ case 'n':
+ nanos = atoi(optarg);
+ break;
+ case 'N':
+ fflags |= FUTEX2_NUMA;
+ if (optarg)
+ fnode = atoi(optarg);
+ break;
+ default:
+ exit(1);
+ break;
+ }
+ }
+
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, contendfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ cas[t] = args;
+ }
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, threadfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ tas[t] = args;
+ }
+
+ sleep(sleeps);
+
+ done = true;
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = tas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+// printf("tval: %d\n", args->val);
+ }
+ printf("total: %d\n", total);
+
+ if (contenders) {
+ total = 0;
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = cas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+ // printf("tval: %d\n", args->val);
+ }
+ printf("contenders: %d\n", total);
+ }
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
new file mode 100644
index 000000000000..ab8555752137
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <numa.h>
+#include <numaif.h>
+
+#include <linux/futex.h>
+#include <sys/mman.h>
+
+#include "futextest.h"
+#include "futex2test.h"
+#include "kselftest_harness.h"
+
+#define MAX_THREADS 64
+
+static pthread_barrier_t barrier_main;
+static pthread_t threads[MAX_THREADS];
+
+struct thread_args {
+ void *futex_ptr;
+ unsigned int flags;
+ int result;
+};
+
+static struct thread_args thread_args[MAX_THREADS];
+
+#ifndef FUTEX_NO_NODE
+#define FUTEX_NO_NODE (-1)
+#endif
+
+#ifndef FUTEX2_MPOL
+#define FUTEX2_MPOL 0x08
+#endif
+
+static void *thread_lock_fn(void *arg)
+{
+ struct thread_args *args = arg;
+ int ret;
+
+ pthread_barrier_wait(&barrier_main);
+ ret = futex2_wait(args->futex_ptr, 0, args->flags, NULL, 0);
+ args->result = ret;
+ return NULL;
+}
+
+static void create_max_threads(void *futex_ptr)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ thread_args[i].futex_ptr = futex_ptr;
+ thread_args[i].flags = FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA;
+ thread_args[i].result = 0;
+ ret = pthread_create(&threads[i], NULL, thread_lock_fn, &thread_args[i]);
+ if (ret)
+ ksft_exit_fail_msg("pthread_create failed\n");
+ }
+}
+
+static void join_max_threads(void)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_join(threads[i], NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
+ }
+}
+
+static void __test_futex(void *futex_ptr, int err_value, unsigned int futex_flags)
+{
+ int to_wake, ret, i, need_exit = 0;
+
+ pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
+ create_max_threads(futex_ptr);
+ pthread_barrier_wait(&barrier_main);
+ to_wake = MAX_THREADS;
+
+ do {
+ ret = futex2_wake(futex_ptr, to_wake, futex_flags);
+
+ if (err_value) {
+ if (ret >= 0)
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n",
+ to_wake, futex_flags);
+
+ if (errno != err_value)
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) expected error was %d, but returned %d (%s)\n",
+ to_wake, futex_flags, err_value, errno, strerror(errno));
+
+ break;
+ }
+ if (ret < 0) {
+ ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n",
+ to_wake, futex_flags);
+ }
+ if (!ret)
+ usleep(50);
+ to_wake -= ret;
+
+ } while (to_wake);
+ join_max_threads();
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (err_value && thread_args[i].result != -1) {
+ ksft_print_msg("Thread %d should fail but succeeded (%d)\n",
+ i, thread_args[i].result);
+ need_exit = 1;
+ }
+ if (!err_value && thread_args[i].result != 0) {
+ ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result);
+ need_exit = 1;
+ }
+ }
+ if (need_exit)
+ ksft_exit_fail_msg("Aborting due to earlier errors.\n");
+}
+
+static void test_futex(void *futex_ptr, int err_value)
+{
+ __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
+}
+
+static void test_futex_mpol(void *futex_ptr, int err_value)
+{
+ __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+}
+
+TEST(futex_numa_mpol)
+{
+ struct futex32_numa *futex_numa;
+ void *futex_ptr;
+ int mem_size;
+
+ mem_size = sysconf(_SC_PAGE_SIZE);
+ futex_ptr = mmap(NULL, mem_size * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ if (futex_ptr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size);
+
+ /* Create an invalid memory region for the "Memory out of range" test */
+ mprotect(futex_ptr + mem_size, mem_size, PROT_NONE);
+
+ futex_numa = futex_ptr;
+
+ ksft_print_msg("Regular test\n");
+ futex_numa->futex = 0;
+ futex_numa->numa = FUTEX_NO_NODE;
+ test_futex(futex_ptr, 0);
+
+ if (futex_numa->numa == FUTEX_NO_NODE)
+ ksft_exit_fail_msg("NUMA node is left uninitialized\n");
+
+ /* FUTEX2_NUMA futex must be 8-byte aligned */
+ ksft_print_msg("Mis-aligned futex\n");
+ test_futex(futex_ptr + mem_size - 4, EINVAL);
+
+ ksft_print_msg("Memory out of range\n");
+ test_futex(futex_ptr + mem_size, EFAULT);
+
+ futex_numa->numa = FUTEX_NO_NODE;
+ mprotect(futex_ptr, mem_size, PROT_READ);
+ ksft_print_msg("Memory, RO\n");
+ test_futex(futex_ptr, EFAULT);
+
+ mprotect(futex_ptr, mem_size, PROT_NONE);
+ ksft_print_msg("Memory, no access\n");
+ test_futex(futex_ptr, EFAULT);
+
+ mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE);
+ ksft_print_msg("Memory back to RW\n");
+ test_futex(futex_ptr, 0);
+
+ ksft_test_result_pass("futex2 memory boundary tests passed\n");
+
+ /* MPOL test. Does not work as expected */
+#ifdef LIBNUMA_VER_SUFFICIENT
+ for (int i = 0; i < 4; i++) {
+ unsigned long nodemask;
+ int ret;
+
+ nodemask = 1 << i;
+ ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask,
+ sizeof(nodemask) * 8, 0);
+ if (ret == 0) {
+ ret = numa_set_mempolicy_home_node(futex_ptr, mem_size, i, 0);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to set home node: %m, %d\n", errno);
+
+ ksft_print_msg("Node %d test\n", i);
+ futex_numa->futex = 0;
+ futex_numa->numa = FUTEX_NO_NODE;
+
+ ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+ if (ret < 0)
+ ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n");
+ if (futex_numa->numa != i) {
+ ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n",
+ futex_numa->numa, i);
+ }
+ }
+ }
+ ksft_test_result_pass("futex2 MPOL hints test passed\n");
+#else
+ ksft_test_result_skip("futex2 MPOL hints test requires libnuma 2.0.16+\n");
+#endif
+ munmap(futex_ptr, mem_size * 2);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
new file mode 100644
index 000000000000..e8079d7c65e8
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/prctl.h>
+#include <sys/prctl.h>
+
+#include "kselftest_harness.h"
+
+#define MAX_THREADS 64
+
+static pthread_barrier_t barrier_main;
+static pthread_mutex_t global_lock;
+static pthread_t threads[MAX_THREADS];
+static int counter;
+
+#ifndef PR_FUTEX_HASH
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define PR_FUTEX_HASH_GET_SLOTS 2
+#endif
+
+static int futex_hash_slots_set(unsigned int slots)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, 0);
+}
+
+static int futex_hash_slots_get(void)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
+}
+
+static void futex_hash_slots_set_verify(int slots)
+{
+ int ret;
+
+ ret = futex_hash_slots_set(slots);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set slots to %d: %m\n", slots);
+ ksft_finished();
+ }
+ ret = futex_hash_slots_get();
+ if (ret != slots) {
+ ksft_test_result_fail("Set %d slots but PR_FUTEX_HASH_GET_SLOTS returns: %d, %m\n",
+ slots, ret);
+ ksft_finished();
+ }
+ ksft_test_result_pass("SET and GET slots %d passed\n", slots);
+}
+
+static void futex_hash_slots_set_must_fail(int slots)
+{
+ int ret;
+
+ ret = futex_hash_slots_set(slots);
+ ksft_test_result(ret < 0, "futex_hash_slots_set(%d)\n",
+ slots);
+}
+
+static void *thread_return_fn(void *arg)
+{
+ return NULL;
+}
+
+static void *thread_lock_fn(void *arg)
+{
+ pthread_barrier_wait(&barrier_main);
+
+ pthread_mutex_lock(&global_lock);
+ counter++;
+ usleep(20);
+ pthread_mutex_unlock(&global_lock);
+ return NULL;
+}
+
+static void create_max_threads(void *(*thread_fn)(void *))
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_create(&threads[i], NULL, thread_fn, NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_create failed: %m\n");
+ }
+}
+
+static void join_max_threads(void)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_join(threads[i], NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
+ }
+}
+
+#define SEC_IN_NSEC 1000000000
+#define MSEC_IN_NSEC 1000000
+
+static void futex_dummy_op(void)
+{
+ pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+ struct timespec timeout;
+ int ret;
+
+ pthread_mutex_lock(&lock);
+ clock_gettime(CLOCK_REALTIME, &timeout);
+ timeout.tv_nsec += 100 * MSEC_IN_NSEC;
+ if (timeout.tv_nsec >= SEC_IN_NSEC) {
+ timeout.tv_nsec -= SEC_IN_NSEC;
+ timeout.tv_sec++;
+ }
+ ret = pthread_mutex_timedlock(&lock, &timeout);
+ if (ret == 0)
+ ksft_exit_fail_msg("Successfully locked an already locked mutex.\n");
+
+ if (ret != ETIMEDOUT)
+ ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret);
+}
+
+static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n";
+static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n";
+
+TEST(priv_hash)
+{
+ int futex_slots1, futex_slotsn, online_cpus;
+ pthread_mutexattr_t mutex_attr_pi;
+ int ret, retry = 20;
+
+ ret = pthread_mutexattr_init(&mutex_attr_pi);
+ ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT);
+ ret |= pthread_mutex_init(&global_lock, &mutex_attr_pi);
+ if (ret != 0) {
+ ksft_exit_fail_msg("Failed to initialize pthread mutex.\n");
+ }
+ /* First thread, expect to be 0, not yet initialized */
+ ret = futex_hash_slots_get();
+ if (ret != 0)
+ ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret);
+
+ ksft_test_result_pass("Basic get slots and immutable status.\n");
+ ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_create() failed: %d, %m\n", ret);
+
+ ret = pthread_join(threads[0], NULL);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret);
+
+ /* First thread, has to initialize private hash */
+ futex_slots1 = futex_hash_slots_get();
+ if (futex_slots1 <= 0) {
+ ksft_print_msg("Current hash buckets: %d\n", futex_slots1);
+ ksft_exit_fail_msg("%s", test_msg_auto_create);
+ }
+
+ ksft_test_result_pass("%s", test_msg_auto_create);
+
+ online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_barrier_init failed: %m.\n");
+
+ ret = pthread_mutex_lock(&global_lock);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_mutex_lock failed: %m.\n");
+
+ counter = 0;
+ create_max_threads(thread_lock_fn);
+ pthread_barrier_wait(&barrier_main);
+
+ /*
+ * The current default size of hash buckets is 16. The auto increase
+ * works only if more than 16 CPUs are available.
+ */
+ ksft_print_msg("Online CPUs: %d\n", online_cpus);
+ if (online_cpus > 16) {
+retry_getslots:
+ futex_slotsn = futex_hash_slots_get();
+ if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) {
+ retry--;
+ /*
+ * Auto scaling on thread creation can be slightly delayed
+ * because it waits for a RCU grace period twice. The new
+ * private hash is assigned upon the first futex operation
+ * after grace period.
+ * To cover all this for testing purposes the function
+ * below will acquire a lock and acquire it again with a
+ * 100ms timeout which must timeout. This ensures we
+ * sleep for 100ms and issue a futex operation.
+ */
+ if (retry > 0) {
+ futex_dummy_op();
+ goto retry_getslots;
+ }
+ ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n",
+ futex_slots1, futex_slotsn);
+ ksft_exit_fail_msg("%s", test_msg_auto_inc);
+ }
+ ksft_test_result_pass("%s", test_msg_auto_inc);
+ } else {
+ ksft_test_result_skip("%s", test_msg_auto_inc);
+ }
+ ret = pthread_mutex_unlock(&global_lock);
+
+ /* Once the user changes it, it has to be what is set */
+ futex_hash_slots_set_verify(2);
+ futex_hash_slots_set_verify(4);
+ futex_hash_slots_set_verify(8);
+ futex_hash_slots_set_verify(32);
+ futex_hash_slots_set_verify(16);
+
+ ret = futex_hash_slots_set(15);
+ ksft_test_result(ret < 0, "Use 15 slots\n");
+
+ futex_hash_slots_set_verify(2);
+ join_max_threads();
+ ksft_test_result(counter == MAX_THREADS, "Created and waited for %d of %d threads\n",
+ counter, MAX_THREADS);
+ counter = 0;
+ /* Once the user set something, auto resize must be disabled */
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
+
+ create_max_threads(thread_lock_fn);
+ join_max_threads();
+
+ ret = futex_hash_slots_get();
+ ksft_test_result(ret == 2, "No more auto-resize after manual setting, got %d\n",
+ ret);
+
+ futex_hash_slots_set_must_fail(1 << 29);
+ futex_hash_slots_set_verify(4);
+
+ /*
+ * Once the global hash has been requested, then this requested can not
+ * be undone.
+ */
+ ret = futex_hash_slots_set(0);
+ ksft_test_result(ret == 0, "Global hash request\n");
+ if (ret != 0)
+ return;
+
+ futex_hash_slots_set_must_fail(4);
+ futex_hash_slots_set_must_fail(8);
+ futex_hash_slots_set_must_fail(8);
+ futex_hash_slots_set_must_fail(0);
+ futex_hash_slots_set_must_fail(6);
+
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_barrier_init failed: %m\n");
+
+ create_max_threads(thread_lock_fn);
+ join_max_threads();
+
+ ret = futex_hash_slots_get();
+ ksft_test_result(ret == 0, "Continue to use global hash\n");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
index 51485be6eb2f..35d4be23db5d 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -7,24 +7,15 @@
#include <pthread.h>
#include <limits.h>
-#include "logging.h"
+
#include "futextest.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-requeue"
#define timeout_ns 30000000
#define WAKE_WAIT_US 10000
volatile futex_t *f1;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *waiterfn(void *arg)
{
struct timespec to;
@@ -38,67 +29,49 @@ void *waiterfn(void *arg)
return NULL;
}
-int main(int argc, char *argv[])
+TEST(requeue_single)
{
- pthread_t waiter[10];
- int res, ret = RET_PASS;
- int c, i;
volatile futex_t _f1 = 0;
volatile futex_t f2 = 0;
+ pthread_t waiter[10];
+ int res;
f1 = &_f1;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(2);
- ksft_print_msg("%s: Test futex_requeue\n",
- basename(argv[0]));
-
/*
* Requeue a waiter from f1 to f2, and wake f2.
*/
if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Requeuing 1 futex from f1 to f2\n");
+ ksft_print_dbg_msg("Requeuing 1 futex from f1 to f2\n");
res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0);
- if (res != 1) {
+ if (res != 1)
ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
- }
-
- info("Waking 1 futex at f2\n");
+ ksft_print_dbg_msg("Waking 1 futex at f2\n");
res = futex_wake(&f2, 1, 0);
if (res != 1) {
ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_requeue simple succeeds\n");
}
+}
+
+TEST(requeue_multiple)
+{
+ volatile futex_t _f1 = 0;
+ volatile futex_t f2 = 0;
+ pthread_t waiter[10];
+ int res, i;
+ f1 = &_f1;
/*
* Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7.
@@ -106,31 +79,28 @@ int main(int argc, char *argv[])
*/
for (i = 0; i < 10; i++) {
if (pthread_create(&waiter[i], NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
}
usleep(WAKE_WAIT_US);
- info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
+ ksft_print_dbg_msg("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0);
if (res != 10) {
ksft_test_result_fail("futex_requeue many returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
}
- info("Waking INT_MAX futexes at f2\n");
+ ksft_print_dbg_msg("Waking INT_MAX futexes at f2\n");
res = futex_wake(&f2, INT_MAX, 0);
if (res != 7) {
ksft_test_result_fail("futex_requeue many returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_requeue many succeeds\n");
}
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index 215c6cb539b4..46d2858e15a8 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -26,11 +26,11 @@
#include <stdlib.h>
#include <signal.h>
#include <string.h>
+
#include "atomic.h"
#include "futextest.h"
-#include "logging.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-requeue-pi"
#define MAX_WAKE_ITERS 1000
#define THREAD_MAX 10
#define SIGNAL_PERIOD_US 100
@@ -42,12 +42,6 @@ futex_t f1 = FUTEX_INITIALIZER;
futex_t f2 = FUTEX_INITIALIZER;
futex_t wake_complete = FUTEX_INITIALIZER;
-/* Test option defaults */
-static long timeout_ns;
-static int broadcast;
-static int owner;
-static int locked;
-
struct thread_arg {
long id;
struct timespec *timeout;
@@ -56,18 +50,73 @@ struct thread_arg {
};
#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
-void usage(char *prog)
+FIXTURE(args)
{
- printf("Usage: %s\n", prog);
- printf(" -b Broadcast wakeup (all waiters)\n");
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -l Lock the pi futex across requeue\n");
- printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n");
- printf(" -t N Timeout in nanoseconds (default: 0)\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
+};
+
+FIXTURE_SETUP(args)
+{
+};
+
+FIXTURE_TEARDOWN(args)
+{
+};
+
+FIXTURE_VARIANT(args)
+{
+ long timeout_ns;
+ bool broadcast;
+ bool owner;
+ bool locked;
+};
+
+/*
+ * For a given timeout value, this macro creates a test input with all the
+ * possible combinations of valid arguments
+ */
+#define FIXTURE_VARIANT_ADD_TIMEOUT(timeout) \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout) \
+{ \
+ .timeout_ns = timeout, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_locked) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+ .locked = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_owner) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+ .owner = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_locked) \
+{ \
+ .timeout_ns = timeout, \
+ .locked = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_owner) \
+{ \
+ .timeout_ns = timeout, \
+ .owner = true, \
+}; \
+
+FIXTURE_VARIANT_ADD_TIMEOUT(0);
+FIXTURE_VARIANT_ADD_TIMEOUT(5000);
+FIXTURE_VARIANT_ADD_TIMEOUT(500000);
+FIXTURE_VARIANT_ADD_TIMEOUT(2000000000);
int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
int policy, int prio)
@@ -81,26 +130,26 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
if (ret) {
- error("pthread_attr_setinheritsched\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setinheritsched\n");
return -1;
}
ret = pthread_attr_setschedpolicy(&attr, policy);
if (ret) {
- error("pthread_attr_setschedpolicy\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setschedpolicy\n");
return -1;
}
schedp.sched_priority = prio;
ret = pthread_attr_setschedparam(&attr, &schedp);
if (ret) {
- error("pthread_attr_setschedparam\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setschedparam\n");
return -1;
}
ret = pthread_create(pth, &attr, func, arg);
if (ret) {
- error("pthread_create\n", ret);
+ ksft_exit_fail_msg("pthread_create\n");
return -1;
}
return 0;
@@ -112,7 +161,7 @@ void *waiterfn(void *arg)
struct thread_arg *args = (struct thread_arg *)arg;
futex_t old_val;
- info("Waiter %ld: running\n", args->id);
+ ksft_print_dbg_msg("Waiter %ld: running\n", args->id);
/* Each thread sleeps for a different amount of time
* This is to avoid races, because we don't lock the
* external mutex here */
@@ -120,26 +169,25 @@ void *waiterfn(void *arg)
old_val = f1;
atomic_inc(&waiters_blocked);
- info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+ ksft_print_dbg_msg("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
&f1, f1, &f2);
args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
FUTEX_PRIVATE_FLAG);
- info("waiter %ld woke with %d %s\n", args->id, args->ret,
+ ksft_print_dbg_msg("waiter %ld woke with %d %s\n", args->id, args->ret,
args->ret < 0 ? strerror(errno) : "");
atomic_inc(&waiters_woken);
if (args->ret < 0) {
if (args->timeout && errno == ETIMEDOUT)
args->ret = 0;
else {
- args->ret = RET_ERROR;
- error("futex_wait_requeue_pi\n", errno);
+ ksft_exit_fail_msg("futex_wait_requeue_pi\n");
}
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+ ksft_print_dbg_msg("Waiter %ld: exiting with %d\n", args->id, args->ret);
pthread_exit((void *)&args->ret);
}
@@ -152,14 +200,14 @@ void *broadcast_wakerfn(void *arg)
int nr_wake = 1;
int i = 0;
- info("Waker: waiting for waiters to block\n");
+ ksft_print_dbg_msg("Waker: waiting for waiters to block\n");
while (waiters_blocked.val < THREAD_MAX)
usleep(1000);
usleep(1000);
- info("Waker: Calling broadcast\n");
+ ksft_print_dbg_msg("Waker: Calling broadcast\n");
if (args->lock) {
- info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
continue_requeue:
@@ -167,16 +215,14 @@ void *broadcast_wakerfn(void *arg)
args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
FUTEX_PRIVATE_FLAG);
if (args->ret < 0) {
- args->ret = RET_ERROR;
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
} else if (++i < MAX_WAKE_ITERS) {
task_count += args->ret;
if (task_count < THREAD_MAX - waiters_woken.val)
goto continue_requeue;
} else {
- error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
- 0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
- args->ret = RET_ERROR;
+ ksft_exit_fail_msg("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+ MAX_WAKE_ITERS, task_count, THREAD_MAX);
}
futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
@@ -187,7 +233,7 @@ void *broadcast_wakerfn(void *arg)
if (args->ret > 0)
args->ret = task_count;
- info("Waker: exiting with %d\n", args->ret);
+ ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret);
pthread_exit((void *)&args->ret);
}
@@ -200,20 +246,20 @@ void *signal_wakerfn(void *arg)
int nr_wake = 1;
int i = 0;
- info("Waker: waiting for waiters to block\n");
+ ksft_print_dbg_msg("Waker: waiting for waiters to block\n");
while (waiters_blocked.val < THREAD_MAX)
usleep(1000);
usleep(1000);
while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
- info("task_count: %d, waiters_woken: %d\n",
+ ksft_print_dbg_msg("task_count: %d, waiters_woken: %d\n",
task_count, waiters_woken.val);
if (args->lock) {
- info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
- f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
- info("Waker: Calling signal\n");
+ ksft_print_dbg_msg("Waker: Calling signal\n");
/* cond_signal */
old_val = f1;
args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
@@ -221,28 +267,23 @@ void *signal_wakerfn(void *arg)
FUTEX_PRIVATE_FLAG);
if (args->ret < 0)
args->ret = -errno;
- info("futex: %x\n", f2);
+ ksft_print_dbg_msg("futex: %x\n", f2);
if (args->lock) {
- info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
- f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
}
- info("futex: %x\n", f2);
- if (args->ret < 0) {
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
- args->ret = RET_ERROR;
- break;
- }
+ ksft_print_dbg_msg("futex: %x\n", f2);
+ if (args->ret < 0)
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
task_count += args->ret;
usleep(SIGNAL_PERIOD_US);
i++;
/* we have to loop at least THREAD_MAX times */
if (i > MAX_WAKE_ITERS + THREAD_MAX) {
- error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
- 0, MAX_WAKE_ITERS + THREAD_MAX);
- args->ret = RET_ERROR;
- break;
+ ksft_exit_fail_msg("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+ MAX_WAKE_ITERS + THREAD_MAX);
}
}
@@ -251,8 +292,8 @@ void *signal_wakerfn(void *arg)
if (args->ret >= 0)
args->ret = task_count;
- info("Waker: exiting with %d\n", args->ret);
- info("Waker: waiters_woken: %d\n", waiters_woken.val);
+ ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret);
+ ksft_print_dbg_msg("Waker: waiters_woken: %d\n", waiters_woken.val);
pthread_exit((void *)&args->ret);
}
@@ -269,35 +310,40 @@ void *third_party_blocker(void *arg)
ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
out:
- if (args->ret || ret2) {
- error("third_party_blocker() futex error", 0);
- args->ret = RET_ERROR;
- }
+ if (args->ret || ret2)
+ ksft_exit_fail_msg("third_party_blocker() futex error");
pthread_exit((void *)&args->ret);
}
-int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+TEST_F(args, futex_requeue_pi)
{
- void *(*wakerfn)(void *) = signal_wakerfn;
struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
pthread_t waiter[THREAD_MAX], waker, blocker;
- struct timespec ts, *tsp = NULL;
+ void *(*wakerfn)(void *) = signal_wakerfn;
+ bool third_party_owner = variant->owner;
+ long timeout_ns = variant->timeout_ns;
+ bool broadcast = variant->broadcast;
struct thread_arg args[THREAD_MAX];
- int *waiter_ret;
- int i, ret = RET_PASS;
+ struct timespec ts, *tsp = NULL;
+ bool lock = variant->locked;
+ int *waiter_ret, i, ret = 0;
+
+ ksft_print_msg(
+ "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+ broadcast, lock, third_party_owner, timeout_ns);
if (timeout_ns) {
time_t secs;
- info("timeout_ns = %ld\n", timeout_ns);
+ ksft_print_dbg_msg("timeout_ns = %ld\n", timeout_ns);
ret = clock_gettime(CLOCK_MONOTONIC, &ts);
secs = (ts.tv_nsec + timeout_ns) / 1000000000;
ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
ts.tv_sec += secs;
- info("ts.tv_sec = %ld\n", ts.tv_sec);
- info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+ ksft_print_dbg_msg("ts.tv_sec = %ld\n", ts.tv_sec);
+ ksft_print_dbg_msg("ts.tv_nsec = %ld\n", ts.tv_nsec);
tsp = &ts;
}
@@ -307,10 +353,7 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
if (third_party_owner) {
if (create_rt_thread(&blocker, third_party_blocker,
(void *)&blocker_arg, SCHED_FIFO, 1)) {
- error("Creating third party blocker thread failed\n",
- errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating third party blocker thread failed\n");
}
}
@@ -318,20 +361,16 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
for (i = 0; i < THREAD_MAX; i++) {
args[i].id = i;
args[i].timeout = tsp;
- info("Starting thread %d\n", i);
+ ksft_print_dbg_msg("Starting thread %d\n", i);
if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
SCHED_FIFO, 1)) {
- error("Creating waiting thread failed\n", errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating waiting thread failed\n");
}
}
waker_arg.lock = lock;
if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
SCHED_FIFO, 1)) {
- error("Creating waker thread failed\n", errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating waker thread failed\n");
}
/* Wait for threads to finish */
@@ -345,7 +384,6 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
pthread_join(blocker, NULL);
pthread_join(waker, NULL);
-out:
if (!ret) {
if (*waiter_ret)
ret = *waiter_ret;
@@ -355,66 +393,8 @@ out:
ret = blocker_arg.ret;
}
- return ret;
+ if (ret)
+ ksft_test_result_fail("fail");
}
-int main(int argc, char *argv[])
-{
- char *test_name;
- int c, ret;
-
- while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
- switch (c) {
- case 'b':
- broadcast = 1;
- break;
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'l':
- locked = 1;
- break;
- case 'o':
- owner = 1;
- locked = 0;
- break;
- case 't':
- timeout_ns = atoi(optarg);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
- ksft_print_msg(
- "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
- broadcast, locked, owner, timeout_ns);
-
- ret = asprintf(&test_name,
- "%s broadcast=%d locked=%d owner=%d timeout=%ldns",
- TEST_NAME, broadcast, locked, owner, timeout_ns);
- if (ret < 0) {
- ksft_print_msg("Failed to generate test name\n");
- test_name = TEST_NAME;
- }
-
- /*
- * FIXME: unit_test is obsolete now that we parse options and the
- * various style of runs are done by run.sh - simplify the code and move
- * unit_test into main()
- */
- ret = unit_test(broadcast, locked, owner, timeout_ns);
-
- print_result(test_name, ret);
- return ret;
-}
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
index d0a4d332ea44..f686e605359c 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -23,67 +23,32 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
-#include "futextest.h"
-#include "logging.h"
-#define TEST_NAME "futex-requeue-pi-mismatched-ops"
+#include "futextest.h"
+#include "kselftest_harness.h"
futex_t f1 = FUTEX_INITIALIZER;
futex_t f2 = FUTEX_INITIALIZER;
int child_ret = 0;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *blocking_child(void *arg)
{
child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
if (child_ret < 0) {
child_ret = -errno;
- error("futex_wait\n", errno);
+ ksft_exit_fail_msg("futex_wait\n");
}
return (void *)&child_ret;
}
-int main(int argc, char *argv[])
+TEST(requeue_pi_mismatched_ops)
{
- int ret = RET_PASS;
pthread_t child;
- int c;
+ int ret;
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
- basename(argv[0]));
+ if (pthread_create(&child, NULL, blocking_child, NULL))
+ ksft_exit_fail_msg("pthread_create\n");
- if (pthread_create(&child, NULL, blocking_child, NULL)) {
- error("pthread_create\n", errno);
- ret = RET_ERROR;
- goto out;
- }
/* Allow the child to block in the kernel. */
sleep(1);
@@ -102,34 +67,27 @@ int main(int argc, char *argv[])
* FUTEX_WAKE.
*/
ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
- if (ret == 1) {
- ret = RET_PASS;
- } else if (ret < 0) {
- error("futex_wake\n", errno);
- ret = RET_ERROR;
- } else {
- error("futex_wake did not wake the child\n", 0);
- ret = RET_ERROR;
- }
+ if (ret == 1)
+ ret = 0;
+ else if (ret < 0)
+ ksft_exit_fail_msg("futex_wake\n");
+ else
+ ksft_exit_fail_msg("futex_wake did not wake the child\n");
} else {
- error("futex_cmp_requeue_pi\n", errno);
- ret = RET_ERROR;
+ ksft_exit_fail_msg("futex_cmp_requeue_pi\n");
}
} else if (ret > 0) {
- fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
- ret = RET_FAIL;
+ ksft_test_result_fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
} else {
- error("futex_cmp_requeue_pi found no waiters\n", 0);
- ret = RET_ERROR;
+ ksft_exit_fail_msg("futex_cmp_requeue_pi found no waiters\n");
}
pthread_join(child, NULL);
- if (!ret)
- ret = child_ret;
-
- out:
- /* If the kernel crashes, we shouldn't return at all. */
- print_result(TEST_NAME, ret);
- return ret;
+ if (!ret && !child_ret)
+ ksft_test_result_pass("futex_requeue_pi_mismatched_ops passed\n");
+ else
+ ksft_test_result_pass("futex_requeue_pi_mismatched_ops failed\n");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
index c6b8f32990c8..a18ccae73eb1 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -24,11 +24,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+
#include "atomic.h"
#include "futextest.h"
-#include "logging.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-requeue-pi-signal-restart"
#define DELAY_US 100
futex_t f1 = FUTEX_INITIALIZER;
@@ -37,15 +37,6 @@ atomic_t requeued = ATOMIC_INITIALIZER;
int waiter_ret = 0;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
int policy, int prio)
{
@@ -57,35 +48,28 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
memset(&schedp, 0, sizeof(schedp));
ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
- if (ret) {
- error("pthread_attr_setinheritsched\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setinheritsched\n");
ret = pthread_attr_setschedpolicy(&attr, policy);
- if (ret) {
- error("pthread_attr_setschedpolicy\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setschedpolicy\n");
schedp.sched_priority = prio;
ret = pthread_attr_setschedparam(&attr, &schedp);
- if (ret) {
- error("pthread_attr_setschedparam\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setschedparam\n");
ret = pthread_create(pth, &attr, func, arg);
- if (ret) {
- error("pthread_create\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_create\n");
+
return 0;
}
void handle_signal(int signo)
{
- info("signal received %s requeue\n",
+ ksft_print_dbg_msg("signal received %s requeue\n",
requeued.val ? "after" : "prior to");
}
@@ -94,78 +78,46 @@ void *waiterfn(void *arg)
unsigned int old_val;
int res;
- waiter_ret = RET_PASS;
-
- info("Waiter running\n");
- info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Waiter running\n");
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
old_val = f1;
res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
FUTEX_PRIVATE_FLAG);
if (!requeued.val || errno != EWOULDBLOCK) {
- fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+ ksft_test_result_fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
res, strerror(errno));
- info("w2:futex: %x\n", f2);
+ ksft_print_dbg_msg("w2:futex: %x\n", f2);
if (!res)
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- waiter_ret = RET_FAIL;
}
- info("Waiter exiting with %d\n", waiter_ret);
pthread_exit(NULL);
}
-int main(int argc, char *argv[])
+TEST(futex_requeue_pi_signal_restart)
{
unsigned int old_val;
struct sigaction sa;
pthread_t waiter;
- int c, res, ret = RET_PASS;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test signal handling during requeue_pi\n",
- basename(argv[0]));
- ksft_print_msg("\tArguments: <none>\n");
+ int res;
sa.sa_handler = handle_signal;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
- if (sigaction(SIGUSR1, &sa, NULL)) {
- error("sigaction\n", errno);
- exit(1);
- }
+ if (sigaction(SIGUSR1, &sa, NULL))
+ ksft_exit_fail_msg("sigaction\n");
- info("m1:f2: %x\n", f2);
- info("Creating waiter\n");
+ ksft_print_dbg_msg("m1:f2: %x\n", f2);
+ ksft_print_dbg_msg("Creating waiter\n");
res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
- if (res) {
- error("Creating waiting thread failed", res);
- ret = RET_ERROR;
- goto out;
- }
+ if (res)
+ ksft_exit_fail_msg("Creating waiting thread failed");
- info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
- info("m2:f2: %x\n", f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("m2:f2: %x\n", f2);
futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
- info("m3:f2: %x\n", f2);
+ ksft_print_dbg_msg("m3:f2: %x\n", f2);
while (1) {
/*
@@ -173,11 +125,11 @@ int main(int argc, char *argv[])
* restart futex_wait_requeue_pi() in the kernel. Wait for the
* waiter to block on f1 again.
*/
- info("Issuing SIGUSR1 to waiter\n");
+ ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n");
pthread_kill(waiter, SIGUSR1);
usleep(DELAY_US);
- info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+ ksft_print_dbg_msg("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
old_val = f1;
res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
FUTEX_PRIVATE_FLAG);
@@ -191,12 +143,10 @@ int main(int argc, char *argv[])
atomic_set(&requeued, 1);
break;
} else if (res < 0) {
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
- ret = RET_ERROR;
- break;
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
}
}
- info("m4:f2: %x\n", f2);
+ ksft_print_dbg_msg("m4:f2: %x\n", f2);
/*
* Signal the waiter after requeue, waiter should return from
@@ -204,19 +154,14 @@ int main(int argc, char *argv[])
* futex_unlock_pi() can't happen before the signal wakeup is detected
* in the kernel.
*/
- info("Issuing SIGUSR1 to waiter\n");
+ ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n");
pthread_kill(waiter, SIGUSR1);
- info("Waiting for waiter to return\n");
+ ksft_print_dbg_msg("Waiting for waiter to return\n");
pthread_join(waiter, NULL);
- info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- info("m5:f2: %x\n", f2);
-
- out:
- if (ret == RET_PASS && waiter_ret)
- ret = waiter_ret;
-
- print_result(TEST_NAME, ret);
- return ret;
+ ksft_print_dbg_msg("m5:f2: %x\n", f2);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
index 685140d9b93d..0e69c53524c1 100644
--- a/tools/testing/selftests/futex/functional/futex_wait.c
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -9,25 +9,16 @@
#include <sys/shm.h>
#include <sys/mman.h>
#include <fcntl.h>
-#include "logging.h"
+
#include "futextest.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-wait"
#define timeout_ns 30000000
#define WAKE_WAIT_US 10000
#define SHM_PATH "futex_shm_file"
void *futex;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
static void *waiterfn(void *arg)
{
struct timespec to;
@@ -45,53 +36,37 @@ static void *waiterfn(void *arg)
return NULL;
}
-int main(int argc, char *argv[])
+TEST(private_futex)
{
- int res, ret = RET_PASS, fd, c, shm_id;
- u_int32_t f_private = 0, *shared_data;
unsigned int flags = FUTEX_PRIVATE_FLAG;
+ u_int32_t f_private = 0;
pthread_t waiter;
- void *shm;
+ int res;
futex = &f_private;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(3);
- ksft_print_msg("%s: Test futex_wait\n", basename(argv[0]));
-
/* Testing a private futex */
- info("Calling private futex_wait on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling private futex_wait on futex: %p\n", futex);
if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Calling private futex_wake on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling private futex_wake on futex: %p\n", futex);
res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG);
if (res != 1) {
ksft_test_result_fail("futex_wake private returned: %d %s\n",
errno, strerror(errno));
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wake private succeeds\n");
}
+}
+
+TEST(anon_page)
+{
+ u_int32_t *shared_data;
+ pthread_t waiter;
+ int res, shm_id;
/* Testing an anon page shared memory */
shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
@@ -105,67 +80,65 @@ int main(int argc, char *argv[])
*shared_data = 0;
futex = shared_data;
- info("Calling shared (page anon) futex_wait on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (page anon) futex_wait on futex: %p\n", futex);
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Calling shared (page anon) futex_wake on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (page anon) futex_wake on futex: %p\n", futex);
res = futex_wake(futex, 1, 0);
if (res != 1) {
ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n",
errno, strerror(errno));
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wake shared (page anon) succeeds\n");
}
+ shmdt(shared_data);
+}
+
+TEST(file_backed)
+{
+ u_int32_t f_private = 0;
+ pthread_t waiter;
+ int res, fd;
+ void *shm;
/* Testing a file backed shared memory */
fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
- if (fd < 0) {
- perror("open");
- exit(1);
- }
+ if (fd < 0)
+ ksft_exit_fail_msg("open");
- if (ftruncate(fd, sizeof(f_private))) {
- perror("ftruncate");
- exit(1);
- }
+ if (ftruncate(fd, sizeof(f_private)))
+ ksft_exit_fail_msg("ftruncate");
shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (shm == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (shm == MAP_FAILED)
+ ksft_exit_fail_msg("mmap");
memcpy(shm, &f_private, sizeof(f_private));
futex = shm;
- info("Calling shared (file backed) futex_wait on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (file backed) futex_wait on futex: %p\n", futex);
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Calling shared (file backed) futex_wake on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (file backed) futex_wake on futex: %p\n", futex);
res = futex_wake(shm, 1, 0);
if (res != 1) {
ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n",
errno, strerror(errno));
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wake shared (file backed) succeeds\n");
}
- /* Freeing resources */
- shmdt(shared_data);
munmap(shm, sizeof(f_private));
remove(SHM_PATH);
close(fd);
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
index fb4148f23fa3..2a749f9b14eb 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -27,10 +27,9 @@
#include <libgen.h>
#include <signal.h>
-#include "logging.h"
#include "futextest.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-wait-private-mapped-file"
#define PAGE_SZ 4096
char pad[PAGE_SZ] = {1};
@@ -40,86 +39,44 @@ char pad2[PAGE_SZ] = {1};
#define WAKE_WAIT_US 3000000
struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *thr_futex_wait(void *arg)
{
int ret;
- info("futex wait\n");
+ ksft_print_dbg_msg("futex wait\n");
ret = futex_wait(&val, 1, &wait_timeout, 0);
- if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
- error("futex error.\n", errno);
- print_result(TEST_NAME, RET_ERROR);
- exit(RET_ERROR);
- }
+ if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT)
+ ksft_exit_fail_msg("futex error.\n");
if (ret && errno == ETIMEDOUT)
- fail("waiter timedout\n");
+ ksft_exit_fail_msg("waiter timedout\n");
- info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+ ksft_print_dbg_msg("futex_wait: ret = %d, errno = %d\n", ret, errno);
return NULL;
}
-int main(int argc, char **argv)
+TEST(wait_private_mapped_file)
{
pthread_t thr;
- int ret = RET_PASS;
int res;
- int c;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg(
- "%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
- basename(argv[0]));
-
- ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
- if (ret < 0) {
- fprintf(stderr, "pthread_create error\n");
- ret = RET_ERROR;
- goto out;
- }
-
- info("wait a while\n");
+
+ res = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+ if (res < 0)
+ ksft_exit_fail_msg("pthread_create error\n");
+
+ ksft_print_dbg_msg("wait a while\n");
usleep(WAKE_WAIT_US);
val = 2;
res = futex_wake(&val, 1, 0);
- info("futex_wake %d\n", res);
- if (res != 1) {
- fail("FUTEX_WAKE didn't find the waiting thread.\n");
- ret = RET_FAIL;
- }
+ ksft_print_dbg_msg("futex_wake %d\n", res);
+ if (res != 1)
+ ksft_exit_fail_msg("FUTEX_WAKE didn't find the waiting thread.\n");
- info("join\n");
+ ksft_print_dbg_msg("join\n");
pthread_join(thr, NULL);
- out:
- print_result(TEST_NAME, ret);
- return ret;
+ ksft_test_result_pass("wait_private_mapped_file");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index d183f878360b..674dd13af421 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -16,26 +16,15 @@
*****************************************************************************/
#include <pthread.h>
+
#include "futextest.h"
#include "futex2test.h"
-#include "logging.h"
-
-#define TEST_NAME "futex-wait-timeout"
+#include "kselftest_harness.h"
static long timeout_ns = 100000; /* 100us default timeout */
static futex_t futex_pi;
static pthread_barrier_t barrier;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -t N Timeout in nanoseconds (default: 100,000)\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
/*
* Get a PI lock and hold it forever, so the main thread lock_pi will block
* and we can test the timeout
@@ -47,13 +36,13 @@ void *get_pi_lock(void *arg)
ret = futex_lock_pi(&futex_pi, NULL, 0, 0);
if (ret != 0)
- error("futex_lock_pi failed\n", ret);
+ ksft_exit_fail_msg("futex_lock_pi failed\n");
pthread_barrier_wait(&barrier);
/* Blocks forever */
ret = futex_wait(&lock, 0, NULL, 0);
- error("futex_wait failed\n", ret);
+ ksft_exit_fail_msg("futex_wait failed\n");
return NULL;
}
@@ -61,12 +50,11 @@ void *get_pi_lock(void *arg)
/*
* Check if the function returned the expected error
*/
-static void test_timeout(int res, int *ret, char *test_name, int err)
+static void test_timeout(int res, char *test_name, int err)
{
if (!res || errno != err) {
ksft_test_result_fail("%s returned %d\n", test_name,
res < 0 ? errno : res);
- *ret = RET_FAIL;
} else {
ksft_test_result_pass("%s succeeds\n", test_name);
}
@@ -78,10 +66,8 @@ static void test_timeout(int res, int *ret, char *test_name, int err)
static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
long timeout_ns)
{
- if (clock_gettime(clockid, to)) {
- error("clock_gettime failed\n", errno);
- return errno;
- }
+ if (clock_gettime(clockid, to))
+ ksft_exit_fail_msg("clock_gettime failed\n");
to->tv_nsec += timeout_ns;
@@ -93,83 +79,66 @@ static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
return 0;
}
-int main(int argc, char *argv[])
+TEST(wait_bitset)
{
futex_t f1 = FUTEX_INITIALIZER;
- int res, ret = RET_PASS;
struct timespec to;
- pthread_t thread;
- int c;
- struct futex_waitv waitv = {
- .uaddr = (uintptr_t)&f1,
- .val = f1,
- .flags = FUTEX_32,
- .__reserved = 0
- };
-
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 't':
- timeout_ns = atoi(optarg);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(9);
- ksft_print_msg("%s: Block on a futex and wait for timeout\n",
- basename(argv[0]));
- ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
-
- pthread_barrier_init(&barrier, NULL, 2);
- pthread_create(&thread, NULL, get_pi_lock, NULL);
+ int res;
/* initialize relative timeout */
to.tv_sec = 0;
to.tv_nsec = timeout_ns;
res = futex_wait(&f1, f1, &to, 0);
- test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT);
+ test_timeout(res, "futex_wait relative", ETIMEDOUT);
/* FUTEX_WAIT_BITSET with CLOCK_REALTIME */
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT);
+ test_timeout(res, "futex_wait_bitset realtime", ETIMEDOUT);
/* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */
if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_bitset(&f1, f1, &to, 1, 0);
- test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT);
+ test_timeout(res, "futex_wait_bitset monotonic", ETIMEDOUT);
+}
+
+TEST(requeue_pi)
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct timespec to;
+ int res;
/* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT);
+ test_timeout(res, "futex_wait_requeue_pi realtime", ETIMEDOUT);
/* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */
if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
- test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+ test_timeout(res, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
+}
+
+TEST(lock_pi)
+{
+ struct timespec to;
+ pthread_t thread;
+ int res;
+
+ /* Create a thread that will lock forever so any waiter will timeout */
+ pthread_barrier_init(&barrier, NULL, 2);
+ pthread_create(&thread, NULL, get_pi_lock, NULL);
/* Wait until the other thread calls futex_lock_pi() */
pthread_barrier_wait(&barrier);
pthread_barrier_destroy(&barrier);
+
/*
* FUTEX_LOCK_PI with CLOCK_REALTIME
* Due to historical reasons, FUTEX_LOCK_PI supports only realtime
@@ -181,26 +150,38 @@ int main(int argc, char *argv[])
* smaller than realtime and the syscall will timeout immediately.
*/
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_lock_pi(&futex_pi, &to, 0, 0);
- test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT);
+ test_timeout(res, "futex_lock_pi realtime", ETIMEDOUT);
/* Test operations that don't support FUTEX_CLOCK_REALTIME */
res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
+ test_timeout(res, "futex_lock_pi invalid timeout flag", ENOSYS);
+}
+
+TEST(waitv)
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1,
+ .flags = FUTEX_32,
+ .__reserved = 0,
+ };
+ struct timespec to;
+ int res;
/* futex_waitv with CLOCK_MONOTONIC */
if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
- test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT);
+ test_timeout(res, "futex_waitv monotonic", ETIMEDOUT);
/* futex_waitv with CLOCK_REALTIME */
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT);
-
- ksft_print_cnts();
- return ret;
+ test_timeout(res, "futex_waitv realtime", ETIMEDOUT);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
index ed9cd07e31c1..b07d68a67f31 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -29,95 +29,55 @@
#include <linux/futex.h>
#include <libgen.h>
-#include "logging.h"
#include "futextest.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-wait-uninitialized-heap"
#define WAIT_US 5000000
static int child_blocked = 1;
-static int child_ret;
+static bool child_ret;
void *buf;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *wait_thread(void *arg)
{
int res;
- child_ret = RET_PASS;
+ child_ret = true;
res = futex_wait(buf, 1, NULL, 0);
child_blocked = 0;
if (res != 0 && errno != EWOULDBLOCK) {
- error("futex failure\n", errno);
- child_ret = RET_ERROR;
+ ksft_exit_fail_msg("futex failure\n");
+ child_ret = false;
}
pthread_exit(NULL);
}
-int main(int argc, char **argv)
+TEST(futex_wait_uninitialized_heap)
{
- int c, ret = RET_PASS;
long page_size;
pthread_t thr;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
+ int ret;
page_size = sysconf(_SC_PAGESIZE);
buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
- if (buf == (void *)-1) {
- error("mmap\n", errno);
- exit(1);
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
- basename(argv[0]));
-
+ if (buf == (void *)-1)
+ ksft_exit_fail_msg("mmap\n");
ret = pthread_create(&thr, NULL, wait_thread, NULL);
- if (ret) {
- error("pthread_create\n", errno);
- ret = RET_ERROR;
- goto out;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_create\n");
- info("waiting %dus for child to return\n", WAIT_US);
+ ksft_print_dbg_msg("waiting %dus for child to return\n", WAIT_US);
usleep(WAIT_US);
- ret = child_ret;
- if (child_blocked) {
- fail("child blocked in kernel\n");
- ret = RET_FAIL;
- }
+ if (child_blocked)
+ ksft_test_result_fail("child blocked in kernel\n");
- out:
- print_result(TEST_NAME, ret);
- return ret;
+ if (!child_ret)
+ ksft_test_result_fail("child error\n");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 7d7a6a06cdb7..9ff936ecf164 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -21,72 +21,44 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
+
#include "futextest.h"
#include "futex2test.h"
-#include "logging.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-wait-wouldblock"
#define timeout_ns 100000
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
-int main(int argc, char *argv[])
+TEST(futex_wait_wouldblock)
{
struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
futex_t f1 = FUTEX_INITIALIZER;
- int res, ret = RET_PASS;
- int c;
- struct futex_waitv waitv = {
- .uaddr = (uintptr_t)&f1,
- .val = f1+1,
- .flags = FUTEX_32,
- .__reserved = 0
- };
+ int res;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(2);
- ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
- basename(argv[0]));
-
- info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ ksft_print_dbg_msg("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
if (!res || errno != EWOULDBLOCK) {
ksft_test_result_fail("futex_wait returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wait\n");
}
+}
- if (clock_gettime(CLOCK_MONOTONIC, &to)) {
- error("clock_gettime failed\n", errno);
- return errno;
- }
+TEST(futex_waitv_wouldblock)
+{
+ struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1 + 1,
+ .flags = FUTEX_32,
+ .__reserved = 0,
+ };
+ int res;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("clock_gettime failed %d\n", errno);
to.tv_nsec += timeout_ns;
@@ -95,17 +67,15 @@ int main(int argc, char *argv[])
to.tv_nsec -= 1000000000;
}
- info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ ksft_print_dbg_msg("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
if (!res || errno != EWOULDBLOCK) {
- ksft_test_result_pass("futex_waitv returned: %d %s\n",
+ ksft_test_result_fail("futex_waitv returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv\n");
}
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
index a94337f677e1..d60876164d4b 100644
--- a/tools/testing/selftests/futex/functional/futex_waitv.c
+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
@@ -15,25 +15,16 @@
#include <pthread.h>
#include <stdint.h>
#include <sys/shm.h>
+
#include "futextest.h"
#include "futex2test.h"
-#include "logging.h"
+#include "kselftest_harness.h"
-#define TEST_NAME "futex-wait"
#define WAKE_WAIT_US 10000
#define NR_FUTEXES 30
static struct futex_waitv waitv[NR_FUTEXES];
u_int32_t futexes[NR_FUTEXES] = {0};
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *waiterfn(void *arg)
{
struct timespec to;
@@ -41,7 +32,7 @@ void *waiterfn(void *arg)
/* setting absolute timeout for futex2 */
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -57,34 +48,10 @@ void *waiterfn(void *arg)
return NULL;
}
-int main(int argc, char *argv[])
+TEST(private_waitv)
{
pthread_t waiter;
- int res, ret = RET_PASS;
- struct timespec to;
- int c, i;
-
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(7);
- ksft_print_msg("%s: Test FUTEX_WAITV\n",
- basename(argv[0]));
+ int res, i;
for (i = 0; i < NR_FUTEXES; i++) {
waitv[i].uaddr = (uintptr_t)&futexes[i];
@@ -95,7 +62,7 @@ int main(int argc, char *argv[])
/* Private waitv */
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
@@ -104,10 +71,15 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_wake private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv private\n");
}
+}
+
+TEST(shared_waitv)
+{
+ pthread_t waiter;
+ int res, i;
/* Shared waitv */
for (i = 0; i < NR_FUTEXES; i++) {
@@ -128,7 +100,7 @@ int main(int argc, char *argv[])
}
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
@@ -137,19 +109,24 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_wake shared returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv shared\n");
}
for (i = 0; i < NR_FUTEXES; i++)
shmdt(u64_to_ptr(waitv[i].uaddr));
+}
+
+TEST(invalid_flag)
+{
+ struct timespec to;
+ int res;
/* Testing a waiter without FUTEX_32 flag */
waitv[0].flags = FUTEX_PRIVATE_FLAG;
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -158,17 +135,22 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv without FUTEX_32\n");
}
+}
+
+TEST(unaligned_address)
+{
+ struct timespec to;
+ int res;
/* Testing a waiter with an unaligned address */
waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32;
waitv[0].uaddr = 1;
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -177,16 +159,21 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_wake private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv with an unaligned address\n");
}
+}
+
+TEST(null_address)
+{
+ struct timespec to;
+ int res;
/* Testing a NULL address for waiters.uaddr */
waitv[0].uaddr = 0x00000000;
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -195,14 +182,13 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n");
}
/* Testing a NULL address for *waiters */
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -211,14 +197,19 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv NULL address in *waiters\n");
}
+}
+
+TEST(invalid_clockid)
+{
+ struct timespec to;
+ int res;
/* Testing an invalid clockid */
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -227,11 +218,9 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv invalid clockid\n");
}
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 5ccd599da6c3..e88545c06d57 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -18,67 +18,36 @@
#
###############################################################################
-# Test for a color capable console
-if [ -z "$USE_COLOR" ]; then
- tput setf 7 || tput setaf 7
- if [ $? -eq 0 ]; then
- USE_COLOR=1
- tput sgr0
- fi
-fi
-if [ "$USE_COLOR" -eq 1 ]; then
- COLOR="-c"
-fi
-
+echo
+./futex_requeue_pi
echo
-# requeue pi testing
-# without timeouts
-./futex_requeue_pi $COLOR
-./futex_requeue_pi $COLOR -b
-./futex_requeue_pi $COLOR -b -l
-./futex_requeue_pi $COLOR -b -o
-./futex_requeue_pi $COLOR -l
-./futex_requeue_pi $COLOR -o
-# with timeouts
-./futex_requeue_pi $COLOR -b -l -t 5000
-./futex_requeue_pi $COLOR -l -t 5000
-./futex_requeue_pi $COLOR -b -l -t 500000
-./futex_requeue_pi $COLOR -l -t 500000
-./futex_requeue_pi $COLOR -b -t 5000
-./futex_requeue_pi $COLOR -t 5000
-./futex_requeue_pi $COLOR -b -t 500000
-./futex_requeue_pi $COLOR -t 500000
-./futex_requeue_pi $COLOR -b -o -t 5000
-./futex_requeue_pi $COLOR -l -t 5000
-./futex_requeue_pi $COLOR -b -o -t 500000
-./futex_requeue_pi $COLOR -l -t 500000
-# with long timeout
-./futex_requeue_pi $COLOR -b -l -t 2000000000
-./futex_requeue_pi $COLOR -l -t 2000000000
+./futex_requeue_pi_mismatched_ops
+echo
+./futex_requeue_pi_signal_restart
echo
-./futex_requeue_pi_mismatched_ops $COLOR
+./futex_wait_timeout
echo
-./futex_requeue_pi_signal_restart $COLOR
+./futex_wait_wouldblock
echo
-./futex_wait_timeout $COLOR
+./futex_wait_uninitialized_heap
+./futex_wait_private_mapped_file
echo
-./futex_wait_wouldblock $COLOR
+./futex_wait
echo
-./futex_wait_uninitialized_heap $COLOR
-./futex_wait_private_mapped_file $COLOR
+./futex_requeue
echo
-./futex_wait $COLOR
+./futex_waitv
echo
-./futex_requeue $COLOR
+./futex_priv_hash
echo
-./futex_waitv $COLOR
+./futex_numa_mpol
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
index 9d305520e849..1f625b39948a 100644
--- a/tools/testing/selftests/futex/include/futex2test.h
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -4,10 +4,58 @@
*
* Copyright 2021 Collabora Ltd.
*/
+#include <linux/time_types.h>
#include <stdint.h>
#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
+#ifndef __NR_futex_waitv
+#define __NR_futex_waitv 449
+struct futex_waitv {
+ __u64 val;
+ __u64 uaddr;
+ __u32 flags;
+ __u32 __reserved;
+};
+#endif
+
+#ifndef __NR_futex_wake
+#define __NR_futex_wake 454
+#endif
+
+#ifndef __NR_futex_wait
+#define __NR_futex_wait 455
+#endif
+
+#ifndef FUTEX2_SIZE_U32
+#define FUTEX2_SIZE_U32 0x02
+#endif
+
+#ifndef FUTEX2_NUMA
+#define FUTEX2_NUMA 0x04
+#endif
+
+#ifndef FUTEX2_MPOL
+#define FUTEX2_MPOL 0x08
+#endif
+
+#ifndef FUTEX2_PRIVATE
+#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG
+#endif
+
+#ifndef FUTEX2_NO_NODE
+#define FUTEX_NO_NODE (-1)
+#endif
+
+#ifndef FUTEX_32
+#define FUTEX_32 FUTEX2_SIZE_U32
+#endif
+
+struct futex32_numa {
+ futex_t futex;
+ futex_t numa;
+};
+
/**
* futex_waitv - Wait at multiple futexes, wake on any
* @waiters: Array of waiters
@@ -18,5 +66,33 @@
static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
unsigned long flags, struct timespec *timo, clockid_t clockid)
{
- return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
+ struct __kernel_timespec ts = {
+ .tv_sec = timo->tv_sec,
+ .tv_nsec = timo->tv_nsec,
+ };
+
+ return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid);
+}
+
+/*
+ * futex_wait() - block on uaddr with optional timeout
+ * @val: Expected value
+ * @flags: FUTEX2 flags
+ * @timeout: Relative timeout
+ * @clockid: Clock id for the timeout
+ */
+static inline int futex2_wait(void *uaddr, long val, unsigned int flags,
+ struct timespec *timeout, clockid_t clockid)
+{
+ return syscall(__NR_futex_wait, uaddr, val, ~0U, flags, timeout, clockid);
+}
+
+/*
+ * futex2_wake() - Wake a number of futexes
+ * @nr: Number of threads to wake at most
+ * @flags: FUTEX2 flags
+ */
+static inline int futex2_wake(void *uaddr, int nr, unsigned int flags)
+{
+ return syscall(__NR_futex_wake, uaddr, ~0U, nr, flags);
}
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
index ddbcfc9b7bac..3d48e9789d9f 100644
--- a/tools/testing/selftests/futex/include/futextest.h
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -47,6 +47,28 @@ typedef volatile u_int32_t futex_t;
FUTEX_PRIVATE_FLAG)
#endif
+/*
+ * SYS_futex is expected from system C library, in glibc some 32-bit
+ * architectures (e.g. RV32) are using 64-bit time_t, therefore it doesn't have
+ * SYS_futex defined but just SYS_futex_time64. Define SYS_futex as
+ * SYS_futex_time64 in this situation to ensure the compilation and the
+ * compatibility.
+ */
+#if !defined(SYS_futex) && defined(SYS_futex_time64)
+#define SYS_futex SYS_futex_time64
+#endif
+
+/*
+ * On 32bit systems if we use "-D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64" or if
+ * we are using a newer compiler then the size of the timestamps will be 64bit,
+ * however, the SYS_futex will still point to the 32bit futex system call.
+ */
+#if __SIZEOF_POINTER__ == 4 && defined(SYS_futex_time64) && \
+ defined(_TIME_BITS) && _TIME_BITS == 64
+# undef SYS_futex
+# define SYS_futex SYS_futex_time64
+#endif
+
/**
* futex() - SYS_futex syscall wrapper
* @uaddr: address of first futex
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
deleted file mode 100644
index 874c69ce5cce..000000000000
--- a/tools/testing/selftests/futex/include/logging.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/******************************************************************************
- *
- * Copyright © International Business Machines Corp., 2009
- *
- * DESCRIPTION
- * Glibc independent futex library for testing kernel functionality.
- *
- * AUTHOR
- * Darren Hart <dvhart@linux.intel.com>
- *
- * HISTORY
- * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
- *
- *****************************************************************************/
-
-#ifndef _LOGGING_H
-#define _LOGGING_H
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <linux/futex.h>
-#include "kselftest.h"
-
-/*
- * Define PASS, ERROR, and FAIL strings with and without color escape
- * sequences, default to no color.
- */
-#define ESC 0x1B, '['
-#define BRIGHT '1'
-#define GREEN '3', '2'
-#define YELLOW '3', '3'
-#define RED '3', '1'
-#define ESCEND 'm'
-#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
-#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
-#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
-#define RESET_COLOR ESC, '0', 'm'
-static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
- RESET_COLOR, 0};
-static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
- RESET_COLOR, 0};
-static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
- RESET_COLOR, 0};
-static const char INFO_NORMAL[] = " INFO";
-static const char PASS_NORMAL[] = " PASS";
-static const char ERROR_NORMAL[] = "ERROR";
-static const char FAIL_NORMAL[] = " FAIL";
-const char *INFO = INFO_NORMAL;
-const char *PASS = PASS_NORMAL;
-const char *ERROR = ERROR_NORMAL;
-const char *FAIL = FAIL_NORMAL;
-
-/* Verbosity setting for INFO messages */
-#define VQUIET 0
-#define VCRITICAL 1
-#define VINFO 2
-#define VMAX VINFO
-int _verbose = VCRITICAL;
-
-/* Functional test return codes */
-#define RET_PASS 0
-#define RET_ERROR -1
-#define RET_FAIL -2
-
-/**
- * log_color() - Use colored output for PASS, ERROR, and FAIL strings
- * @use_color: use color (1) or not (0)
- */
-void log_color(int use_color)
-{
- if (use_color) {
- PASS = PASS_COLOR;
- ERROR = ERROR_COLOR;
- FAIL = FAIL_COLOR;
- } else {
- PASS = PASS_NORMAL;
- ERROR = ERROR_NORMAL;
- FAIL = FAIL_NORMAL;
- }
-}
-
-/**
- * log_verbosity() - Set verbosity of test output
- * @verbose: Enable (1) verbose output or not (0)
- *
- * Currently setting verbose=1 will enable INFO messages and 0 will disable
- * them. FAIL and ERROR messages are always displayed.
- */
-void log_verbosity(int level)
-{
- if (level > VMAX)
- level = VMAX;
- else if (level < 0)
- level = 0;
- _verbose = level;
-}
-
-/**
- * print_result() - Print standard PASS | ERROR | FAIL results
- * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL
- *
- * print_result() is primarily intended for functional tests.
- */
-void print_result(const char *test_name, int ret)
-{
- switch (ret) {
- case RET_PASS:
- ksft_test_result_pass("%s\n", test_name);
- ksft_print_cnts();
- return;
- case RET_ERROR:
- ksft_test_result_error("%s\n", test_name);
- ksft_print_cnts();
- return;
- case RET_FAIL:
- ksft_test_result_fail("%s\n", test_name);
- ksft_print_cnts();
- return;
- }
-}
-
-/* log level macros */
-#define info(message, vargs...) \
-do { \
- if (_verbose >= VINFO) \
- fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
-} while (0)
-
-#define error(message, err, args...) \
-do { \
- if (_verbose >= VCRITICAL) {\
- if (err) \
- fprintf(stderr, "\t%s: %s: "message, \
- ERROR, strerror(err), ##args); \
- else \
- fprintf(stderr, "\t%s: "message, ERROR, ##args); \
- } \
-} while (0)
-
-#define fail(message, args...) \
-do { \
- if (_verbose >= VCRITICAL) \
- fprintf(stderr, "\t%s: "message, FAIL, ##args); \
-} while (0)
-
-#endif
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index e0884390447d..7bfe315f7001 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := gpio-mockup.sh gpio-sim.sh
+TEST_PROGS := gpio-mockup.sh gpio-sim.sh gpio-aggregator.sh
TEST_FILES := gpio-mockup-sysfs.sh
TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
index 409a8532facc..1287abeaac7e 100644
--- a/tools/testing/selftests/gpio/config
+++ b/tools/testing/selftests/gpio/config
@@ -2,3 +2,4 @@ CONFIG_GPIOLIB=y
CONFIG_GPIO_CDEV=y
CONFIG_GPIO_MOCKUP=m
CONFIG_GPIO_SIM=m
+CONFIG_GPIO_AGGREGATOR=m
diff --git a/tools/testing/selftests/gpio/gpio-aggregator.sh b/tools/testing/selftests/gpio/gpio-aggregator.sh
new file mode 100755
index 000000000000..9b6f80ad9f8a
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-aggregator.sh
@@ -0,0 +1,727 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 Bartosz Golaszewski <brgl@bgdev.pl>
+# Copyright (C) 2025 Koichiro Den <koichiro.den@canonical.com>
+
+BASE_DIR=$(dirname "$0")
+CONFIGFS_SIM_DIR="/sys/kernel/config/gpio-sim"
+CONFIGFS_AGG_DIR="/sys/kernel/config/gpio-aggregator"
+SYSFS_AGG_DIR="/sys/bus/platform/drivers/gpio-aggregator"
+MODULE="gpio-aggregator"
+
+fail() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test FAIL"
+ exit 1
+}
+
+skip() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test SKIP"
+ exit 4
+}
+
+# gpio-sim
+sim_enable_chip() {
+ local CHIP=$1
+
+ echo 1 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to enable the chip"
+}
+
+sim_disable_chip() {
+ local CHIP=$1
+
+ echo 0 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to disable the chip"
+}
+
+sim_configfs_cleanup() {
+ local NOCHECK=${1:-0}
+
+ for CHIP_DIR in "$CONFIGFS_SIM_DIR"/*; do
+ [ -d "$CHIP_DIR" ] || continue
+ echo 0 > "$CHIP_DIR/live"
+ find "$CHIP_DIR" -depth -type d -exec rmdir {} \;
+ done
+ [ "$NOCHECK" -eq 1 ] && return;
+ remaining=$(find "$CONFIGFS_SIM_DIR" -mindepth 1 -type d 2> /dev/null)
+ if [ -n "$remaining" ]; then
+ fail "Directories remain in $CONFIGFS_SIM_DIR: $remaining"
+ fi
+}
+
+sim_get_chip_label() {
+ local CHIP=$1
+ local BANK=$2
+ local CHIP_NAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name" 2> /dev/null) || \
+ fail "Unable to read the chip name from configfs"
+
+ $BASE_DIR/gpio-chip-info "/dev/$CHIP_NAME" label || \
+ fail "Unable to read the chip label from the character device"
+}
+
+# gpio-aggregator
+agg_create_chip() {
+ local CHIP=$1
+
+ mkdir "$CONFIGFS_AGG_DIR/$CHIP"
+}
+
+agg_remove_chip() {
+ local CHIP=$1
+
+ find "$CONFIGFS_AGG_DIR/$CHIP/" -depth -type d -exec rmdir {} \; || \
+ fail "Unable to remove $CONFIGFS_AGG_DIR/$CHIP"
+}
+
+agg_create_line() {
+ local CHIP=$1
+ local LINE=$2
+
+ mkdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE"
+}
+
+agg_remove_line() {
+ local CHIP=$1
+ local LINE=$2
+
+ rmdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE"
+}
+
+agg_set_key() {
+ local CHIP=$1
+ local LINE=$2
+ local KEY=$3
+
+ echo "$KEY" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/key" || fail "Unable to set the lookup key"
+}
+
+agg_set_offset() {
+ local CHIP=$1
+ local LINE=$2
+ local OFFSET=$3
+
+ echo "$OFFSET" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/offset" || \
+ fail "Unable to set the lookup offset"
+}
+
+agg_set_line_name() {
+ local CHIP=$1
+ local LINE=$2
+ local NAME=$3
+
+ echo "$NAME" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/name" || fail "Unable to set the line name"
+}
+
+agg_enable_chip() {
+ local CHIP=$1
+
+ echo 1 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to enable the chip"
+}
+
+agg_disable_chip() {
+ local CHIP=$1
+
+ echo 0 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to disable the chip"
+}
+
+agg_configfs_cleanup() {
+ local NOCHECK=${1:-0}
+
+ for CHIP_DIR in "$CONFIGFS_AGG_DIR"/*; do
+ [ -d "$CHIP_DIR" ] || continue
+ echo 0 > "$CHIP_DIR/live" 2> /dev/null
+ find "$CHIP_DIR" -depth -type d -exec rmdir {} \;
+ done
+ [ "$NOCHECK" -eq 1 ] && return;
+ remaining=$(find "$CONFIGFS_AGG_DIR" -mindepth 1 -type d 2> /dev/null)
+ if [ -n "$remaining" ]; then
+ fail "Directories remain in $CONFIGFS_AGG_DIR: $remaining"
+ fi
+}
+
+agg_configfs_dev_name() {
+ local CHIP=$1
+
+ cat "$CONFIGFS_AGG_DIR/$CHIP/dev_name" 2> /dev/null || \
+ fail "Unable to read the device name from configfs"
+}
+
+agg_configfs_chip_name() {
+ local CHIP=$1
+ local DEV_NAME=$(agg_configfs_dev_name "$CHIP")
+ local CHIP_LIST=$(find "/sys/devices/platform/$DEV_NAME" \
+ -maxdepth 1 -type d -name "gpiochip[0-9]*" 2> /dev/null)
+ local CHIP_COUNT=$(echo "$CHIP_LIST" | wc -l)
+
+ if [ -z "$CHIP_LIST" ]; then
+ fail "No gpiochip in /sys/devices/platform/$DEV_NAME/"
+ elif [ "$CHIP_COUNT" -ne 1 ]; then
+ fail "Multiple gpiochips unexpectedly found: $CHIP_LIST"
+ fi
+ basename "$CHIP_LIST"
+}
+
+agg_get_chip_num_lines() {
+ local CHIP=$1
+ local N_DIR=$(ls -d $CONFIGFS_AGG_DIR/$CHIP/line[0-9]* 2> /dev/null | wc -l)
+ local N_LINES
+
+ if [ "$(cat $CONFIGFS_AGG_DIR/$CHIP/live)" = 0 ]; then
+ echo "$N_DIR"
+ else
+ N_LINES=$(
+ $BASE_DIR/gpio-chip-info \
+ "/dev/$(agg_configfs_chip_name "$CHIP")" num-lines
+ ) || fail "Unable to read the number of lines from the character device"
+ if [ $N_DIR != $N_LINES ]; then
+ fail "Discrepancy between two sources for the number of lines"
+ fi
+ echo "$N_LINES"
+ fi
+}
+
+agg_get_chip_label() {
+ local CHIP=$1
+
+ $BASE_DIR/gpio-chip-info "/dev/$(agg_configfs_chip_name "$CHIP")" label || \
+ fail "Unable to read the chip label from the character device"
+}
+
+agg_get_line_name() {
+ local CHIP=$1
+ local OFFSET=$2
+ local NAME_CONFIGFS=$(cat "$CONFIGFS_AGG_DIR/$CHIP/line${OFFSET}/name")
+ local NAME_CDEV
+
+ if [ "$(cat "$CONFIGFS_AGG_DIR/$CHIP/live")" = 0 ]; then
+ echo "$NAME_CONFIGFS"
+ else
+ NAME_CDEV=$(
+ $BASE_DIR/gpio-line-name \
+ "/dev/$(agg_configfs_chip_name "$CHIP")" "$OFFSET"
+ ) || fail "Unable to read the line name from the character device"
+ if [ "$NAME_CONFIGFS" != "$NAME_CDEV" ]; then
+ fail "Discrepancy between two sources for the name of line"
+ fi
+ echo "$NAME_CDEV"
+ fi
+}
+
+
+# Load the modules. This will pull in configfs if needed too.
+modprobe gpio-sim || skip "unable to load the gpio-sim module"
+modprobe gpio-aggregator || skip "unable to load the gpio-aggregator module"
+
+# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed.
+for IDX in $(seq 5); do
+ if [ "$IDX" -eq "5" ]; then
+ skip "configfs not mounted at /sys/kernel/config"
+ fi
+
+ mountpoint -q /sys/kernel/config && break
+ sleep 0.1
+done
+
+# If the module was already loaded: remove all previous chips
+agg_configfs_cleanup
+sim_configfs_cleanup
+
+trap "exit 1" SIGTERM SIGINT
+trap "agg_configfs_cleanup 1; sim_configfs_cleanup 1" EXIT
+
+# Use gpio-sim chips as the test backend
+for CHIP in $(seq -f "chip%g" 0 1); do
+ mkdir $CONFIGFS_SIM_DIR/$CHIP
+ for BANK in $(seq -f "bank%g" 0 1); do
+ mkdir -p "$CONFIGFS_SIM_DIR/$CHIP/$BANK"
+ echo "${CHIP}_${BANK}" > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/label" || \
+ fail "unable to set the chip label"
+ echo 16 > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/num_lines" || \
+ fail "unable to set the number of lines"
+ for IDX in $(seq 0 15); do
+ LINE_NAME="${CHIP}${BANK}_${IDX}"
+ LINE_DIR="$CONFIGFS_SIM_DIR/$CHIP/$BANK/line$IDX"
+ mkdir -p $LINE_DIR
+ echo "$LINE_NAME" > "$LINE_DIR/name" || fail "unable to set the line name"
+ done
+ done
+ sim_enable_chip "$CHIP"
+done
+
+echo "1. GPIO aggregator creation/deletion"
+
+echo "1.1. Creation/deletion via configfs"
+
+echo "1.1.1. Minimum creation/deletion"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+agg_set_offset agg0 line0 5
+agg_set_line_name agg0 line0 test0
+agg_enable_chip agg0
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines agg0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset"
+agg_disable_chip agg0
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.2. Complex creation/deletion"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_create_line agg0 line1
+agg_create_line agg0 line2
+agg_create_line agg0 line3
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key agg0 line2 "$(sim_get_chip_label chip1 bank0)"
+agg_set_key agg0 line3 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset agg0 line0 1
+agg_set_offset agg0 line1 3
+agg_set_offset agg0 line2 5
+agg_set_offset agg0 line3 7
+agg_set_line_name agg0 line0 test0
+agg_set_line_name agg0 line1 test1
+agg_set_line_name agg0 line2 test2
+agg_set_line_name agg0 line3 test3
+agg_enable_chip agg0
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines agg0)" = "4" || fail "number of lines is not 1"
+test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset"
+test "$(agg_get_line_name agg0 1)" = "test1" || fail "line name is unset"
+test "$(agg_get_line_name agg0 2)" = "test2" || fail "line name is unset"
+test "$(agg_get_line_name agg0 3)" = "test3" || fail "line name is unset"
+agg_disable_chip agg0
+agg_remove_line agg0 line0
+agg_remove_line agg0 line1
+agg_remove_line agg0 line2
+agg_remove_line agg0 line3
+agg_remove_chip agg0
+
+echo "1.1.3. Can't instantiate a chip without any line"
+agg_create_chip agg0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+agg_remove_chip agg0
+
+echo "1.1.4. Can't instantiate a chip with invalid configuration"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "chipX_bankX"
+agg_set_offset agg0 line0 99
+agg_set_line_name agg0 line0 test0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.5. Can't instantiate a chip asynchronously via deferred probe"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "chip0_bank0"
+agg_set_offset agg0 line0 5
+agg_set_line_name agg0 line0 test0
+sim_disable_chip chip0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+sim_enable_chip chip0
+sleep 1
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || \
+ fail "chip unexpectedly transitioned to 'live' state"
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.6. Can't instantiate a chip with _sysfs prefix"
+mkdir "$CONFIGFS_AGG_DIR/_sysfs" 2> /dev/null && fail "chip _sysfs unexpectedly created"
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.foo" 2> /dev/null && fail "chip _sysfs.foo unexpectedly created"
+
+echo "1.2. Creation/deletion via sysfs"
+
+echo "1.2.1. Minimum creation/deletion"
+echo "chip0_bank0 0" > "$SYSFS_AGG_DIR/new_device"
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.2. Complex creation/deletion"
+echo "chip0bank0_0 chip1_bank1 10-11" > "$SYSFS_AGG_DIR/new_device"
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "3" || fail "number of lines is not 3"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 1)" = "" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 2)" = "" || fail "line name is unset"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.3. Asynchronous creation with deferred probe"
+sim_disable_chip chip0
+echo 'chip0_bank0 0' > $SYSFS_AGG_DIR/new_device
+sleep 1
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive"
+sim_enable_chip chip0
+sleep 1
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly remains dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name unexpectedly set"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.4. Can't instantiate a chip with invalid configuration"
+echo "xyz 0" > "$SYSFS_AGG_DIR/new_device"
+test "$(cat $CONFIGFS_AGG_DIR/_sysfs.0/live)" = 0 || fail "chip unexpectedly alive"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+
+echo "2. GPIO aggregator configuration"
+
+echo "2.1. Configuring aggregators instantiated via configfs"
+setup_2_1() {
+ agg_create_chip agg0
+ agg_create_line agg0 line0
+ agg_create_line agg0 line1
+ agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+ agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank0)"
+ agg_set_offset agg0 line0 1
+ agg_set_offset agg0 line1 3
+ agg_set_line_name agg0 line0 test0
+ agg_set_line_name agg0 line1 test1
+ agg_enable_chip agg0
+}
+teardown_2_1() {
+ agg_configfs_cleanup
+}
+
+echo "2.1.1. While offline"
+
+echo "2.1.1.1. Line can be added/removed"
+setup_2_1
+agg_disable_chip agg0
+agg_create_line agg0 line2
+agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line2 5
+agg_enable_chip agg0
+test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1"
+teardown_2_1
+
+echo "2.1.1.2. Line key can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank1)"
+agg_enable_chip agg0
+teardown_2_1
+
+echo "2.1.1.3. Line name can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_line_name agg0 line0 new0
+agg_set_line_name agg0 line1 new1
+agg_enable_chip agg0
+test "$(agg_get_line_name agg0 0)" = "new0" || fail "line name is unset"
+test "$(agg_get_line_name agg0 1)" = "new1" || fail "line name is unset"
+teardown_2_1
+
+echo "2.1.1.4. Line offset can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_offset agg0 line0 5
+agg_set_offset agg0 line1 7
+agg_enable_chip agg0
+teardown_2_1
+
+echo "2.1.1.5. Can re-enable a chip after valid reconfiguration"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset agg0 line0 15
+agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line0 14
+agg_create_line agg0 line2
+agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line2 13
+agg_enable_chip agg0
+test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1"
+teardown_2_1
+
+echo "2.1.1.7. Can't re-enable a chip with invalid reconfiguration"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 invalidkey
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_1
+setup_2_1
+agg_disable_chip agg0
+agg_set_offset agg0 line0 99
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_1
+
+echo "2.1.2. While online"
+
+echo "2.1.2.1. Can't add/remove line"
+setup_2_1
+mkdir "$CONFIGFS_AGG_DIR/agg0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/agg0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_1
+
+echo "2.1.2.2. Can't modify line key"
+setup_2_1
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/agg0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_1
+
+echo "2.1.2.3. Can't modify line name"
+setup_2_1
+echo "new0" > "$CONFIGFS_AGG_DIR/agg0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_1
+
+echo "2.1.2.4. Can't modify line offset"
+setup_2_1
+echo "5" > "$CONFIGFS_AGG_DIR/agg0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_1
+
+echo "2.2. Configuring aggregators instantiated via sysfs"
+setup_2_2() {
+ echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device"
+}
+teardown_2_2() {
+ echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+}
+
+echo "2.2.1. While online"
+
+echo "2.2.1.1. Can toggle live"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.1.2. Can't add/remove line"
+setup_2_2
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+
+echo "2.2.1.3. Can't modify line key"
+setup_2_2
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_2
+
+echo "2.2.1.4. Can't modify line name"
+setup_2_2
+echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_2
+
+echo "2.2.1.5. Can't modify line offset"
+setup_2_2
+echo "5" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_2
+
+echo "2.2.2. While waiting for deferred probe"
+
+echo "2.2.2.1. Can't add/remove line despite live = 0"
+sim_disable_chip chip0
+setup_2_2
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.2. Can't modify line key"
+sim_disable_chip chip0
+setup_2_2
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.3. Can't modify line name"
+sim_disable_chip chip0
+setup_2_2
+echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.4. Can't modify line offset"
+sim_disable_chip chip0
+setup_2_2
+echo 5 > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.5. Can't toggle live"
+sim_disable_chip chip0
+setup_2_2
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive"
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.3. While offline"
+
+echo "2.2.3.1. Can't add/remove line despite live = 0"
+setup_2_2
+agg_disable_chip _sysfs.0
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+
+echo "2.2.3.2. Line key can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip1 bank1)"
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.3. Line name can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_line_name _sysfs.0 line0 new0
+agg_set_line_name _sysfs.0 line1 new1
+agg_enable_chip _sysfs.0
+test "$(agg_get_line_name _sysfs.0 0)" = "new0" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 1)" = "new1" || fail "line name is unset"
+teardown_2_2
+
+echo "2.2.3.4. Line offset can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_offset _sysfs.0 line0 5
+agg_set_offset _sysfs.0 line1 7
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.5. Can re-enable a chip with valid reconfiguration"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset _sysfs.0 line0 15
+agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset _sysfs.0 line0 14
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.6. Can't re-enable a chip with invalid reconfiguration"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 invalidkey
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_offset _sysfs.0 line0 99
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+
+echo "3. Module unload"
+
+echo "3.1. Can't unload module if there is at least one device created via configfs"
+agg_create_chip agg0
+modprobe -r gpio-aggregator 2> /dev/null
+test -d /sys/module/gpio_aggregator || fail "module unexpectedly unloaded"
+agg_remove_chip agg0
+
+echo "3.2. Can unload module if there is no device created via configfs"
+echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device"
+modprobe -r gpio-aggregator 2> /dev/null
+test -d /sys/module/gpio_aggregator && fail "module unexpectedly remains to be loaded"
+modprobe gpio-aggregator 2> /dev/null
+
+echo "4. GPIO forwarder functional"
+SETTINGS="chip0:bank0:2 chip0:bank1:4 chip1:bank0:6 chip1:bank1:8"
+setup_4() {
+ local OFFSET=0
+ agg_create_chip agg0
+ for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ agg_create_line agg0 "line${OFFSET}"
+ agg_set_key agg0 "line${OFFSET}" "$(sim_get_chip_label "$CHIP" "$BANK")"
+ agg_set_offset agg0 "line${OFFSET}" "$LINE"
+ OFFSET=$(expr $OFFSET + 1)
+ done
+ agg_enable_chip agg0
+}
+teardown_4() {
+ agg_configfs_cleanup
+}
+
+echo "4.1. Forwarding set values"
+setup_4
+OFFSET=0
+for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name")
+ CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name")
+ VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value"
+ test $(cat $VAL_PATH) = "0" || fail "incorrect value read from sysfs"
+ $BASE_DIR/gpio-mockup-cdev -s 1 "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET" &
+ mock_pid=$!
+ sleep 0.1 # FIXME Any better way?
+ test "$(cat $VAL_PATH)" = "1" || fail "incorrect value read from sysfs"
+ kill "$mock_pid"
+ OFFSET=$(expr $OFFSET + 1)
+done
+teardown_4
+
+echo "4.2. Forwarding set config"
+setup_4
+OFFSET=0
+for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name")
+ CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name")
+ VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value"
+ $BASE_DIR/gpio-mockup-cdev -b pull-up "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET"
+ test $(cat "$VAL_PATH") = "1" || fail "incorrect value read from sysfs"
+ OFFSET=$(expr $OFFSET + 1)
+done
+teardown_4
+
+echo "5. Race condition verification"
+
+echo "5.1. Stress test of new_device/delete_device and module load/unload"
+for _ in $(seq 1000); do
+ {
+ echo "dummy 0" > "$SYSFS_AGG_DIR/new_device"
+ cat "$CONFIGFS_AGG_DIR/_sysfs.0/dev_name" > "$SYSFS_AGG_DIR/delete_device"
+ } 2> /dev/null
+done &
+writer_pid=$!
+while kill -0 "$writer_pid" 2> /dev/null; do
+ {
+ modprobe gpio-aggregator
+ modprobe -r gpio-aggregator
+ } 2> /dev/null
+done
+
+echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
index 0336353bd15f..2839d2612ce3 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -43,10 +43,8 @@ TEST_GEN_PROGS = hid_bpf hidraw
# $3 - target (assumed to be file); only file name will be emitted;
# $4 - optional extra arg, emitted as-is, if provided.
ifeq ($(V),1)
-Q =
msg =
else
-Q = @
msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
MAKEFLAGS += --no-print-directory
submake_extras := feature_display=0
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
index 45b5570441ce..38c51158adf8 100644
--- a/tools/testing/selftests/hid/config.common
+++ b/tools/testing/selftests/hid/config.common
@@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPUSETS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_BLAKE2B=y
CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_CRYPTO_SEQIV=y
@@ -136,6 +135,7 @@ CONFIG_NET_EMATCH=y
CONFIG_NETFILTER_NETLINK_LOG=y
CONFIG_NETFILTER_NETLINK_QUEUE=y
CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_MATCH_COMMENT=y
diff --git a/tools/testing/selftests/hid/hid_common.h b/tools/testing/selftests/hid/hid_common.h
index f77f69c6657d..e3b267446fa0 100644
--- a/tools/testing/selftests/hid/hid_common.h
+++ b/tools/testing/selftests/hid/hid_common.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2022-2024 Red Hat */
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <fcntl.h>
#include <fnmatch.h>
@@ -230,6 +230,12 @@ static int uhid_event(struct __test_metadata *_metadata, int fd)
break;
case UHID_SET_REPORT:
UHID_LOG("UHID_SET_REPORT from uhid-dev");
+
+ answer.type = UHID_SET_REPORT_REPLY;
+ answer.u.set_report_reply.id = ev.u.set_report.id;
+ answer.u.set_report_reply.err = 0; /* success */
+
+ uhid_write(_metadata, fd, &answer);
break;
default:
TH_LOG("Invalid event from uhid-dev: %u", ev.type);
diff --git a/tools/testing/selftests/hid/hidraw.c b/tools/testing/selftests/hid/hidraw.c
index 821db37ba4bb..d625772f8b7c 100644
--- a/tools/testing/selftests/hid/hidraw.c
+++ b/tools/testing/selftests/hid/hidraw.c
@@ -2,6 +2,9 @@
/* Copyright (c) 2022-2024 Red Hat */
#include "hid_common.h"
+#include <linux/input.h>
+#include <string.h>
+#include <sys/ioctl.h>
/* for older kernels */
#ifndef HIDIOCREVOKE
@@ -215,6 +218,476 @@ TEST_F(hidraw, write_event_revoked)
pthread_mutex_unlock(&uhid_output_mtx);
}
+/*
+ * Test HIDIOCGRDESCSIZE ioctl to get report descriptor size
+ */
+TEST_F(hidraw, ioctl_rdescsize)
+{
+ int desc_size = 0;
+ int err;
+
+ /* call HIDIOCGRDESCSIZE ioctl */
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESCSIZE, &desc_size);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESCSIZE ioctl failed");
+
+ /* verify the size matches our test report descriptor */
+ ASSERT_EQ(desc_size, sizeof(rdesc))
+ TH_LOG("expected size %zu, got %d", sizeof(rdesc), desc_size);
+}
+
+/*
+ * Test HIDIOCGRDESC ioctl to get report descriptor data
+ */
+TEST_F(hidraw, ioctl_rdesc)
+{
+ struct hidraw_report_descriptor desc;
+ int err;
+
+ /* get the full report descriptor */
+ desc.size = sizeof(rdesc);
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &desc);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESC ioctl failed");
+
+ /* verify the descriptor data matches our test descriptor */
+ ASSERT_EQ(memcmp(desc.value, rdesc, sizeof(rdesc)), 0)
+ TH_LOG("report descriptor data mismatch");
+}
+
+/*
+ * Test HIDIOCGRDESC ioctl with smaller buffer size
+ */
+TEST_F(hidraw, ioctl_rdesc_small_buffer)
+{
+ struct hidraw_report_descriptor desc;
+ int err;
+ size_t small_size = sizeof(rdesc) / 2; /* request half the descriptor size */
+
+ /* get partial report descriptor */
+ desc.size = small_size;
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &desc);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESC ioctl failed with small buffer");
+
+ /* verify we got the first part of the descriptor */
+ ASSERT_EQ(memcmp(desc.value, rdesc, small_size), 0)
+ TH_LOG("partial report descriptor data mismatch");
+}
+
+/*
+ * Test HIDIOCGRAWINFO ioctl to get device information
+ */
+TEST_F(hidraw, ioctl_rawinfo)
+{
+ struct hidraw_devinfo devinfo;
+ int err;
+
+ /* get device info */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWINFO, &devinfo);
+ ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRAWINFO ioctl failed");
+
+ /* verify device info matches our test setup */
+ ASSERT_EQ(devinfo.bustype, BUS_USB)
+ TH_LOG("expected bustype 0x03, got 0x%x", devinfo.bustype);
+ ASSERT_EQ(devinfo.vendor, 0x0001)
+ TH_LOG("expected vendor 0x0001, got 0x%x", devinfo.vendor);
+ ASSERT_EQ(devinfo.product, 0x0a37)
+ TH_LOG("expected product 0x0a37, got 0x%x", devinfo.product);
+}
+
+/*
+ * Test HIDIOCGFEATURE ioctl to get feature report
+ */
+TEST_F(hidraw, ioctl_gfeature)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /* get feature report */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGFEATURE ioctl failed, got %d", err);
+
+ /* verify we got the expected feature data */
+ ASSERT_EQ(buf[0], feature_data[0])
+ TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]);
+ ASSERT_EQ(buf[1], feature_data[1])
+ TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]);
+}
+
+/*
+ * Test HIDIOCGFEATURE ioctl with invalid report ID
+ */
+TEST_F(hidraw, ioctl_gfeature_invalid)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set invalid report ID (not 1) */
+ buf[0] = 2;
+
+ /* try to get feature report */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE should have failed with invalid report ID");
+ ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno);
+}
+
+/*
+ * Test ioctl with incorrect nr bits
+ */
+TEST_F(hidraw, ioctl_invalid_nr)
+{
+ char buf[256] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /*
+ * craft an ioctl command with wrong _IOC_NR bits
+ */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x00, sizeof(buf)); /* 0 is not valid */
+
+ /* test the ioctl */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-write with wrong _IOC_NR (0) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-write _IOC_NR (0), got errno %d", errno);
+
+ /*
+ * craft an ioctl command with wrong _IOC_NR bits
+ */
+ bad_cmd = _IOC(_IOC_READ, 'H', 0x00, sizeof(buf)); /* 0 is not valid */
+
+ /* test the ioctl */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-only with wrong _IOC_NR (0) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-only _IOC_NR (0), got errno %d", errno);
+
+ /* also test with bigger number */
+ bad_cmd = _IOC(_IOC_READ, 'H', 0x42, sizeof(buf)); /* 0x42 is not valid as well */
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-only with wrong _IOC_NR (0x42) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-only _IOC_NR (0x42), got errno %d", errno);
+
+ /* also test with bigger number: 0x42 is not valid as well */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x42, sizeof(buf));
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl read-write with wrong _IOC_NR (0x42) should have failed");
+ ASSERT_EQ(errno, ENOTTY)
+ TH_LOG("expected ENOTTY for wrong read-write _IOC_NR (0x42), got errno %d", errno);
+}
+
+/*
+ * Test ioctl with incorrect type bits
+ */
+TEST_F(hidraw, ioctl_invalid_type)
+{
+ char buf[256] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /*
+ * craft an ioctl command with wrong _IOC_TYPE bits
+ */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'I', 0x01, sizeof(buf)); /* 'I' should be 'H' */
+
+ /* test the ioctl */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("ioctl with wrong _IOC_TYPE (I) should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_NR, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCGFEATURE ioctl with incorrect _IOC_DIR bits
+ */
+TEST_F(hidraw, ioctl_gfeature_invalid_dir)
+{
+ __u8 buf[10] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /*
+ * craft an ioctl command with wrong _IOC_DIR bits
+ * HIDIOCGFEATURE should have _IOC_WRITE|_IOC_READ, let's use only _IOC_WRITE
+ */
+ bad_cmd = _IOC(_IOC_WRITE, 'H', 0x07, sizeof(buf)); /* should be _IOC_WRITE|_IOC_READ */
+
+ /* try to get feature report with wrong direction bits */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+
+ /* also test with only _IOC_READ */
+ bad_cmd = _IOC(_IOC_READ, 'H', 0x07, sizeof(buf)); /* should be _IOC_WRITE|_IOC_READ */
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+}
+
+/*
+ * Test read-only ioctl with incorrect _IOC_DIR bits
+ */
+TEST_F(hidraw, ioctl_readonly_invalid_dir)
+{
+ char buf[256] = {0};
+ int err;
+ unsigned int bad_cmd;
+
+ /*
+ * craft an ioctl command with wrong _IOC_DIR bits
+ * HIDIOCGRAWNAME should have _IOC_READ, let's use _IOC_WRITE
+ */
+ bad_cmd = _IOC(_IOC_WRITE, 'H', 0x04, sizeof(buf)); /* should be _IOC_READ */
+
+ /* try to get device name with wrong direction bits */
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGRAWNAME with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+
+ /* also test with _IOC_WRITE|_IOC_READ */
+ bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x04, sizeof(buf)); /* should be only _IOC_READ */
+
+ err = ioctl(self->hidraw_fd, bad_cmd, buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGRAWNAME with wrong _IOC_DIR should have failed");
+ ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCSFEATURE ioctl to set feature report
+ */
+TEST_F(hidraw, ioctl_sfeature)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* prepare feature report data */
+ buf[0] = 1; /* report ID */
+ buf[1] = 0x42;
+ buf[2] = 0x24;
+
+ /* set feature report */
+ err = ioctl(self->hidraw_fd, HIDIOCSFEATURE(3), buf);
+ ASSERT_EQ(err, 3) TH_LOG("HIDIOCSFEATURE ioctl failed, got %d", err);
+
+ /*
+ * Note: The uhid mock doesn't validate the set report data,
+ * so we just verify the ioctl succeeds
+ */
+}
+
+/*
+ * Test HIDIOCGINPUT ioctl to get input report
+ */
+TEST_F(hidraw, ioctl_ginput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /* get input report */
+ err = ioctl(self->hidraw_fd, HIDIOCGINPUT(sizeof(buf)), buf);
+ ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGINPUT ioctl failed, got %d", err);
+
+ /* verify we got the expected input data */
+ ASSERT_EQ(buf[0], feature_data[0])
+ TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]);
+ ASSERT_EQ(buf[1], feature_data[1])
+ TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]);
+}
+
+/*
+ * Test HIDIOCGINPUT ioctl with invalid report ID
+ */
+TEST_F(hidraw, ioctl_ginput_invalid)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set invalid report ID (not 1) */
+ buf[0] = 2;
+
+ /* try to get input report */
+ err = ioctl(self->hidraw_fd, HIDIOCGINPUT(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGINPUT should have failed with invalid report ID");
+ ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCSINPUT ioctl to set input report
+ */
+TEST_F(hidraw, ioctl_sinput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* prepare input report data */
+ buf[0] = 1; /* report ID */
+ buf[1] = 0x55;
+ buf[2] = 0xAA;
+
+ /* set input report */
+ err = ioctl(self->hidraw_fd, HIDIOCSINPUT(3), buf);
+ ASSERT_EQ(err, 3) TH_LOG("HIDIOCSINPUT ioctl failed, got %d", err);
+
+ /*
+ * Note: The uhid mock doesn't validate the set report data,
+ * so we just verify the ioctl succeeds
+ */
+}
+
+/*
+ * Test HIDIOCGOUTPUT ioctl to get output report
+ */
+TEST_F(hidraw, ioctl_goutput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set report ID 1 in first byte */
+ buf[0] = 1;
+
+ /* get output report */
+ err = ioctl(self->hidraw_fd, HIDIOCGOUTPUT(sizeof(buf)), buf);
+ ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGOUTPUT ioctl failed, got %d", err);
+
+ /* verify we got the expected output data */
+ ASSERT_EQ(buf[0], feature_data[0])
+ TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]);
+ ASSERT_EQ(buf[1], feature_data[1])
+ TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]);
+}
+
+/*
+ * Test HIDIOCGOUTPUT ioctl with invalid report ID
+ */
+TEST_F(hidraw, ioctl_goutput_invalid)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* set invalid report ID (not 1) */
+ buf[0] = 2;
+
+ /* try to get output report */
+ err = ioctl(self->hidraw_fd, HIDIOCGOUTPUT(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("HIDIOCGOUTPUT should have failed with invalid report ID");
+ ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno);
+}
+
+/*
+ * Test HIDIOCSOUTPUT ioctl to set output report
+ */
+TEST_F(hidraw, ioctl_soutput)
+{
+ __u8 buf[10] = {0};
+ int err;
+
+ /* prepare output report data */
+ buf[0] = 1; /* report ID */
+ buf[1] = 0x33;
+ buf[2] = 0xCC;
+
+ /* set output report */
+ err = ioctl(self->hidraw_fd, HIDIOCSOUTPUT(3), buf);
+ ASSERT_EQ(err, 3) TH_LOG("HIDIOCSOUTPUT ioctl failed, got %d", err);
+
+ /*
+ * Note: The uhid mock doesn't validate the set report data,
+ * so we just verify the ioctl succeeds
+ */
+}
+
+/*
+ * Test HIDIOCGRAWNAME ioctl to get device name string
+ */
+TEST_F(hidraw, ioctl_rawname)
+{
+ char name[256] = {0};
+ char expected_name[64];
+ int err;
+
+ /* get device name */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWNAME(sizeof(name)), name);
+ ASSERT_GT(err, 0) TH_LOG("HIDIOCGRAWNAME ioctl failed, got %d", err);
+
+ /* construct expected name based on device id */
+ snprintf(expected_name, sizeof(expected_name), "test-uhid-device-%d", self->hid.dev_id);
+
+ /* verify the name matches expected pattern */
+ ASSERT_EQ(strcmp(name, expected_name), 0)
+ TH_LOG("expected name '%s', got '%s'", expected_name, name);
+}
+
+/*
+ * Test HIDIOCGRAWPHYS ioctl to get device physical address string
+ */
+TEST_F(hidraw, ioctl_rawphys)
+{
+ char phys[256] = {0};
+ char expected_phys[64];
+ int err;
+
+ /* get device physical address */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWPHYS(sizeof(phys)), phys);
+ ASSERT_GT(err, 0) TH_LOG("HIDIOCGRAWPHYS ioctl failed, got %d", err);
+
+ /* construct expected phys based on device id */
+ snprintf(expected_phys, sizeof(expected_phys), "%d", self->hid.dev_id);
+
+ /* verify the phys matches expected value */
+ ASSERT_EQ(strcmp(phys, expected_phys), 0)
+ TH_LOG("expected phys '%s', got '%s'", expected_phys, phys);
+}
+
+/*
+ * Test HIDIOCGRAWUNIQ ioctl to get device unique identifier string
+ */
+TEST_F(hidraw, ioctl_rawuniq)
+{
+ char uniq[256] = {0};
+ int err;
+
+ /* get device unique identifier */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWUNIQ(sizeof(uniq)), uniq);
+ ASSERT_GE(err, 0) TH_LOG("HIDIOCGRAWUNIQ ioctl failed, got %d", err);
+
+ /* uniq is typically empty in our test setup */
+ ASSERT_EQ(strlen(uniq), 0) TH_LOG("expected empty uniq, got '%s'", uniq);
+}
+
+/*
+ * Test device string ioctls with small buffer sizes
+ */
+TEST_F(hidraw, ioctl_strings_small_buffer)
+{
+ char small_buf[8] = {0};
+ char expected_name[64];
+ int err;
+
+ /* test HIDIOCGRAWNAME with small buffer */
+ err = ioctl(self->hidraw_fd, HIDIOCGRAWNAME(sizeof(small_buf)), small_buf);
+ ASSERT_EQ(err, sizeof(small_buf))
+ TH_LOG("HIDIOCGRAWNAME with small buffer failed, got %d", err);
+
+ /* construct expected truncated name */
+ snprintf(expected_name, sizeof(expected_name), "test-uhid-device-%d", self->hid.dev_id);
+
+ /* verify we got truncated name (first 8 chars, no null terminator guaranteed) */
+ ASSERT_EQ(strncmp(small_buf, expected_name, sizeof(small_buf)), 0)
+ TH_LOG("expected truncated name to match first %zu chars", sizeof(small_buf));
+
+ /* Note: hidraw driver doesn't guarantee null termination when buffer is too small */
+}
+
int main(int argc, char **argv)
{
return test_harness_run(argc, argv);
diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py
index 3a465768e507..5175cf235b2f 100644
--- a/tools/testing/selftests/hid/tests/base.py
+++ b/tools/testing/selftests/hid/tests/base.py
@@ -5,6 +5,7 @@
# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
# Copyright (c) 2017 Red Hat, Inc.
+import dataclasses
import libevdev
import os
import pytest
@@ -145,6 +146,18 @@ class UHIDTestDevice(BaseDevice):
self.name = name
+@dataclasses.dataclass
+class HidBpf:
+ object_name: str
+ has_rdesc_fixup: bool
+
+
+@dataclasses.dataclass
+class KernelModule:
+ driver_name: str
+ module_name: str
+
+
class BaseTestCase:
class TestUhid(object):
syn_event = libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT) # type: ignore
@@ -155,20 +168,20 @@ class BaseTestCase:
# List of kernel modules to load before starting the test
# if any module is not available (not compiled), the test will skip.
- # Each element is a tuple '(kernel driver name, kernel module)',
- # for example ("playstation", "hid-playstation")
- kernel_modules: List[Tuple[str, str]] = []
+ # Each element is a KernelModule object, for example
+ # KernelModule("playstation", "hid-playstation")
+ kernel_modules: List[KernelModule] = []
# List of in kernel HID-BPF object files to load
# before starting the test
# Any existing pre-loaded HID-BPF module will be removed
# before the ones in this list will be manually loaded.
- # Each Element is a tuple '(hid_bpf_object, rdesc_fixup_present)',
- # for example '("xppen-ArtistPro16Gen2.bpf.o", True)'
- # If 'rdesc_fixup_present' is True, the test needs to wait
+ # Each Element is a HidBpf object, for example
+ # 'HidBpf("xppen-ArtistPro16Gen2.bpf.o", True)'
+ # If 'has_rdesc_fixup' is True, the test needs to wait
# for one unbind and rebind before it can be sure the kernel is
# ready
- hid_bpfs: List[Tuple[str, bool]] = []
+ hid_bpfs: List[HidBpf] = []
def assertInputEventsIn(self, expected_events, effective_events):
effective_events = effective_events.copy()
@@ -232,25 +245,26 @@ class BaseTestCase:
@pytest.fixture()
def load_kernel_module(self):
- for kernel_driver, kernel_module in self.kernel_modules:
- self._load_kernel_module(kernel_driver, kernel_module)
+ for k in self.kernel_modules:
+ self._load_kernel_module(k.driver_name, k.module_name)
yield
def load_hid_bpfs(self):
+ # this function will only work when run in the kernel tree
script_dir = Path(os.path.dirname(os.path.realpath(__file__)))
root_dir = (script_dir / "../../../../..").resolve()
bpf_dir = root_dir / "drivers/hid/bpf/progs"
+ if not bpf_dir.exists():
+ pytest.skip("looks like we are not in the kernel tree, skipping")
+
udev_hid_bpf = shutil.which("udev-hid-bpf")
if not udev_hid_bpf:
pytest.skip("udev-hid-bpf not found in $PATH, skipping")
- wait = False
- for _, rdesc_fixup in self.hid_bpfs:
- if rdesc_fixup:
- wait = True
+ wait = any(b.has_rdesc_fixup for b in self.hid_bpfs)
- for hid_bpf, _ in self.hid_bpfs:
+ for hid_bpf in self.hid_bpfs:
# We need to start `udev-hid-bpf` in the background
# and dispatch uhid events in case the kernel needs
# to fetch features on the device
@@ -260,13 +274,13 @@ class BaseTestCase:
"--verbose",
"add",
str(self.uhdev.sys_path),
- str(bpf_dir / hid_bpf),
+ str(bpf_dir / hid_bpf.object_name),
],
)
while process.poll() is None:
self.uhdev.dispatch(1)
- if process.poll() != 0:
+ if process.returncode != 0:
pytest.fail(
f"Couldn't insert hid-bpf program '{hid_bpf}', marking the test as failed"
)
diff --git a/tools/testing/selftests/hid/tests/base_device.py b/tools/testing/selftests/hid/tests/base_device.py
index e0515be97f83..59465c58d94d 100644
--- a/tools/testing/selftests/hid/tests/base_device.py
+++ b/tools/testing/selftests/hid/tests/base_device.py
@@ -18,10 +18,12 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+import dataclasses
import fcntl
import functools
import libevdev
import os
+import threading
try:
import pyudev
@@ -104,6 +106,12 @@ class PowerSupply(object):
return self._type.str_value
+@dataclasses.dataclass
+class HidReadiness:
+ is_ready: bool = False
+ count: int = 0
+
+
class HIDIsReady(object):
"""
Companion class that binds to a kernel mechanism
@@ -115,18 +123,18 @@ class HIDIsReady(object):
def __init__(self: "HIDIsReady", uhid: UHIDDevice) -> None:
self.uhid = uhid
- def is_ready(self: "HIDIsReady") -> bool:
+ def is_ready(self: "HIDIsReady") -> HidReadiness:
"""
Overwrite in subclasses: should return True or False whether
the attached uhid device is ready or not.
"""
- return False
+ return HidReadiness()
class UdevHIDIsReady(HIDIsReady):
_pyudev_context: ClassVar[Optional[pyudev.Context]] = None
_pyudev_monitor: ClassVar[Optional[pyudev.Monitor]] = None
- _uhid_devices: ClassVar[Dict[int, Tuple[bool, int]]] = {}
+ _uhid_devices: ClassVar[Dict[int, HidReadiness]] = {}
def __init__(self: "UdevHIDIsReady", uhid: UHIDDevice) -> None:
super().__init__(uhid)
@@ -157,18 +165,19 @@ class UdevHIDIsReady(HIDIsReady):
id = int(event.sys_path.strip().split(".")[-1], 16)
- device_ready, count = cls._uhid_devices.get(id, (False, 0))
+ readiness = cls._uhid_devices.setdefault(id, HidReadiness())
ready = event.action == "bind"
- if not device_ready and ready:
- count += 1
- cls._uhid_devices[id] = (ready, count)
+ if not readiness.is_ready and ready:
+ readiness.count += 1
+
+ readiness.is_ready = ready
- def is_ready(self: "UdevHIDIsReady") -> Tuple[bool, int]:
+ def is_ready(self: "UdevHIDIsReady") -> HidReadiness:
try:
return self._uhid_devices[self.uhid.hid_id]
except KeyError:
- return (False, 0)
+ return HidReadiness()
class EvdevMatch(object):
@@ -322,11 +331,11 @@ class BaseDevice(UHIDDevice):
@property
def kernel_is_ready(self: "BaseDevice") -> bool:
- return self._kernel_is_ready.is_ready()[0] and self.started
+ return self._kernel_is_ready.is_ready().is_ready and self.started
@property
def kernel_ready_count(self: "BaseDevice") -> int:
- return self._kernel_is_ready.is_ready()[1]
+ return self._kernel_is_ready.is_ready().count
@property
def input_nodes(self: "BaseDevice") -> List[EvdevDevice]:
@@ -336,10 +345,28 @@ class BaseDevice(UHIDDevice):
if not self.kernel_is_ready or not self.started:
return []
+ # Starting with kernel v6.16, an event is emitted when
+ # userspace opens a kernel device, and for some devices
+ # this translates into a SET_REPORT.
+ # Because EvdevDevice(path) opens every single evdev node
+ # we need to have a separate thread to process the incoming
+ # SET_REPORT or we end up having to wait for the kernel
+ # timeout of 5 seconds.
+ done = False
+
+ def dispatch():
+ while not done:
+ self.dispatch(1)
+
+ t = threading.Thread(target=dispatch)
+ t.start()
+
self._input_nodes = [
EvdevDevice(path)
for path in self.walk_sysfs("input", "input/input*/event*")
]
+ done = True
+ t.join()
return self._input_nodes
def match_evdev_rule(self, application, evdev):
diff --git a/tools/testing/selftests/hid/tests/test_apple_keyboard.py b/tools/testing/selftests/hid/tests/test_apple_keyboard.py
index f81071d46166..0e17588b945c 100644
--- a/tools/testing/selftests/hid/tests/test_apple_keyboard.py
+++ b/tools/testing/selftests/hid/tests/test_apple_keyboard.py
@@ -8,13 +8,14 @@
from .test_keyboard import ArrayKeyboard, TestArrayKeyboard
from hidtools.util import BusType
+from . import base
import libevdev
import logging
logger = logging.getLogger("hidtools.test.apple-keyboard")
-KERNEL_MODULE = ("apple", "hid-apple")
+KERNEL_MODULE = base.KernelModule("apple", "hid-apple")
class KbdData(object):
diff --git a/tools/testing/selftests/hid/tests/test_gamepad.py b/tools/testing/selftests/hid/tests/test_gamepad.py
index 8d5b5ffdae49..612197805931 100644
--- a/tools/testing/selftests/hid/tests/test_gamepad.py
+++ b/tools/testing/selftests/hid/tests/test_gamepad.py
@@ -12,6 +12,7 @@ import pytest
from .base_gamepad import BaseGamepad, JoystickGamepad, AxisMapping
from hidtools.util import BusType
+from .base import HidBpf
import logging
@@ -654,7 +655,7 @@ class TestAsusGamepad(BaseTest.TestGamepad):
class TestRaptorMach2Joystick(BaseTest.TestGamepad):
- hid_bpfs = [("FR-TEC__Raptor-Mach-2.bpf.o", True)]
+ hid_bpfs = [HidBpf("FR-TEC__Raptor-Mach-2.bpf.o", True)]
def create_device(self):
return RaptorMach2Joystick(
diff --git a/tools/testing/selftests/hid/tests/test_ite_keyboard.py b/tools/testing/selftests/hid/tests/test_ite_keyboard.py
index 38550c167bae..f695eaad1648 100644
--- a/tools/testing/selftests/hid/tests/test_ite_keyboard.py
+++ b/tools/testing/selftests/hid/tests/test_ite_keyboard.py
@@ -11,10 +11,11 @@ from hidtools.util import BusType
import libevdev
import logging
+from . import base
logger = logging.getLogger("hidtools.test.ite-keyboard")
-KERNEL_MODULE = ("itetech", "hid_ite")
+KERNEL_MODULE = base.KernelModule("itetech", "hid_ite")
class KbdData(object):
diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py
index 66daf7e5975c..eb4e15a0e53b 100644
--- a/tools/testing/selftests/hid/tests/test_mouse.py
+++ b/tools/testing/selftests/hid/tests/test_mouse.py
@@ -439,6 +439,68 @@ class BadResolutionMultiplierMouse(ResolutionMultiplierMouse):
return 32 # EPIPE
+class BadReportDescriptorMouse(BaseMouse):
+ """
+ This "device" was one autogenerated by syzbot. There are a lot of issues in
+ it, and the most problematic is that it declares features that have no
+ size.
+
+ This leads to report->size being set to 0 and can mess up with usbhid
+ internals. Fortunately, uhid merely passes the incoming buffer, without
+ touching it so a buffer of size 0 will be translated to [] without
+ triggering a kernel oops.
+
+ Because the report descriptor is wrong, no input are created, and we need
+ to tweak a little bit the parameters to make it look correct.
+ """
+
+ # fmt: off
+ report_descriptor = [
+ 0x96, 0x01, 0x00, # Report Count (1) 0
+ 0x06, 0x01, 0x00, # Usage Page (Generic Desktop) 3
+ # 0x03, 0x00, 0x00, 0x00, 0x00, # Ignored by the kernel somehow
+ 0x2a, 0x90, 0xa0, # Usage Maximum (41104) 6
+ 0x27, 0x00, 0x00, 0x00, 0x00, # Logical Maximum (0) 9
+ 0xb3, 0x81, 0x3e, 0x25, 0x03, # Feature (Cnst,Arr,Abs,Vol) 14
+ 0x1b, 0xdd, 0xe8, 0x40, 0x50, # Usage Minimum (1346431197) 19
+ 0x3b, 0x5d, 0x8c, 0x3d, 0xda, # Designator Index 24
+ ]
+ # fmt: on
+
+ def __init__(
+ self, rdesc=report_descriptor, name=None, input_info=(3, 0x045E, 0x07DA)
+ ):
+ super().__init__(rdesc, name, input_info)
+ self.high_resolution_report_called = False
+
+ def get_evdev(self, application=None):
+ assert self._input_nodes is None
+ return (
+ "Ok" # should be a list or None, but both would fail, so abusing the system
+ )
+
+ def next_sync_events(self, application=None):
+ # there are no evdev nodes, so no events
+ return []
+
+ def is_ready(self):
+ # we wait for the SET_REPORT command to come
+ return self.high_resolution_report_called
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ raise InvalidHIDCommunication(f"Unexpected report type: {rtype}")
+ if rnum != 0x0:
+ raise InvalidHIDCommunication(f"Unexpected report number: {rnum}")
+
+ if len(data) != 1:
+ raise InvalidHIDCommunication(f"Unexpected data: {data}, expected '[0]'")
+
+ self.high_resolution_report_called = True
+
+ return 0
+
+
class ResolutionMultiplierHWheelMouse(TwoWheelMouse):
# fmt: off
report_descriptor = [
@@ -975,3 +1037,11 @@ class TestMiMouse(TestWheelMouse):
# assert below print out the real error
pass
assert remaining == []
+
+
+class TestBadReportDescriptorMouse(base.BaseTestCase.TestUhid):
+ def create_device(self):
+ return BadReportDescriptorMouse()
+
+ def assertName(self, uhdev):
+ pass
diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py
index 4265012231c6..ece0ba8e7d34 100644
--- a/tools/testing/selftests/hid/tests/test_multitouch.py
+++ b/tools/testing/selftests/hid/tests/test_multitouch.py
@@ -17,7 +17,7 @@ import time
logger = logging.getLogger("hidtools.test.multitouch")
-KERNEL_MODULE = ("hid-multitouch", "hid_multitouch")
+KERNEL_MODULE = base.KernelModule("hid-multitouch", "hid_multitouch")
def BIT(x):
@@ -1752,6 +1752,52 @@ class TestWin8TSConfidence(BaseTest.TestWin8Multitouch):
assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Confidence" not in uhdev.fields,
+ "Device not compatible, missing Confidence usage",
+ )
+ def test_mt_confidence_bad_multi_release(self):
+ """Check for the sticky finger being properly detected.
+
+ We first inject 3 fingers, then release only the second.
+ After 100 ms, we should receive a generated event about the
+ 2 missing fingers being released.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ # send 3 touches
+ t0 = Touch(1, 50, 10)
+ t1 = Touch(2, 150, 100)
+ t2 = Touch(3, 250, 200)
+ r = uhdev.event([t0, t1, t2])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ # release the second
+ t1.tipswitch = False
+ r = uhdev.event([t1])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ # only the second is released
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+
+ # wait for the timer to kick in
+ time.sleep(0.2)
+
+ events = uhdev.next_sync_events()
+ self.debug_reports([], uhdev, events)
+
+ # now all 3 fingers are released
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
class TestElanXPS9360(BaseTest.TestWin8Multitouch):
def create_device(self):
return Digitizer(
@@ -2086,3 +2132,12 @@ class Testsynaptics_06cb_ce08(BaseTest.TestPTP):
input_info=(BusType.I2C, 0x06CB, 0xCE08),
rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 01 09 02 a1 01 85 18 09 01 a1 00 05 09 19 01 29 03 46 00 00 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 06 00 ff 09 02 a1 01 85 20 09 01 a1 00 09 03 15 00 26 ff 00 35 00 46 ff 00 75 08 95 05 81 02 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 45 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 45 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
)
+
+class Testsynaptics_06cb_ce26(TestWin8TSConfidence):
+ def create_device(self):
+ return PTP(
+ "uhid test synaptics_06cb_ce26",
+ max_contacts=5,
+ input_info=(BusType.I2C, 0x06CB, 0xCE26),
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 3d 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
diff --git a/tools/testing/selftests/hid/tests/test_sony.py b/tools/testing/selftests/hid/tests/test_sony.py
index 7e52c28e59c5..7fd3a8e6137d 100644
--- a/tools/testing/selftests/hid/tests/test_sony.py
+++ b/tools/testing/selftests/hid/tests/test_sony.py
@@ -7,6 +7,7 @@
#
from .base import application_matches
+from .base import KernelModule
from .test_gamepad import BaseTest
from hidtools.device.sony_gamepad import (
PS3Controller,
@@ -24,9 +25,9 @@ import pytest
logger = logging.getLogger("hidtools.test.sony")
-PS3_MODULE = ("sony", "hid_sony")
-PS4_MODULE = ("playstation", "hid_playstation")
-PS5_MODULE = ("playstation", "hid_playstation")
+PS3_MODULE = KernelModule("sony", "hid_sony")
+PS4_MODULE = KernelModule("playstation", "hid_playstation")
+PS5_MODULE = KernelModule("playstation", "hid_playstation")
class SonyBaseTest:
diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py
index a9e2de1e8861..5b9abb616db4 100644
--- a/tools/testing/selftests/hid/tests/test_tablet.py
+++ b/tools/testing/selftests/hid/tests/test_tablet.py
@@ -10,6 +10,7 @@ from . import base
import copy
from enum import Enum
from hidtools.util import BusType
+from .base import HidBpf
import libevdev
import logging
import pytest
@@ -451,6 +452,7 @@ class Pen(object):
def __init__(self, x, y):
self.x = x
self.y = y
+ self.distance = -10
self.tipswitch = False
self.tippressure = 15
self.azimuth = 0
@@ -472,6 +474,7 @@ class Pen(object):
for i in [
"x",
"y",
+ "distance",
"tippressure",
"azimuth",
"width",
@@ -553,6 +556,7 @@ class PenDigitizer(base.UHIDTestDevice):
pen.tipswitch = False
pen.tippressure = 0
pen.azimuth = 0
+ pen.distance = 0
pen.inrange = False
pen.width = 0
pen.height = 0
@@ -867,6 +871,29 @@ class BaseTest:
state machine."""
self._test_states(state_list, scribble, allow_intermediate_states=True)
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Z" not in uhdev.fields,
+ "Device not compatible, missing Z usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [pytest.param(v, id=k) for k, v in PenState.legal_transitions().items()],
+ )
+ def test_z_reported_as_distance(self, state_list, scribble):
+ """Verify stylus Z values are reported as ABS_DISTANCE."""
+ self._test_states(state_list, scribble, allow_intermediate_states=False)
+
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ p = Pen(0, 0)
+ uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE, None)
+ p = Pen(100, 200)
+ uhdev.move_to(p, PenState.PEN_IS_IN_RANGE, None)
+ p.distance = -1
+ events = self.post(uhdev, p, None)
+ assert evdev.value[libevdev.EV_ABS.ABS_DISTANCE] == -1
+
class GXTP_pen(PenDigitizer):
def event(self, pen, test_button):
@@ -1228,9 +1255,9 @@ class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer):
pen.current_state = state
def call_input_event(self, report):
- if report[0] == 0x0a:
+ if report[0] == 0x0A:
# ensures the original second Eraser usage is null
- report[1] &= 0xdf
+ report[1] &= 0xDF
# ensures the original last bit is equal to bit 6 (In Range)
if report[1] & 0x40:
@@ -1291,6 +1318,35 @@ class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer):
return rs
+class Wacom_2d1f_014b(PenDigitizer):
+ """
+ Pen that reports distance values with HID_GD_Z usage.
+ """
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 0x2D1F, 0x014B),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+
+ def match_evdev_rule(self, application, evdev):
+ # there are 2 nodes created by the device, only one matters
+ return evdev.name.endswith("Stylus")
+
+ def event(self, pen, test_button):
+ # this device reports the distance through Z
+ pen.z = pen.distance
+
+ return super().event(pen, test_button)
+
+
################################################################################
#
# Windows 7 compatible devices
@@ -1472,7 +1528,7 @@ class TestGoodix_27c6_0e00(BaseTest.TestTablet):
class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet):
- hid_bpfs = [("XPPen__ArtistPro16Gen2.bpf.o", True)]
+ hid_bpfs = [HidBpf("XPPen__ArtistPro16Gen2.bpf.o", True)]
def create_device(self):
dev = XPPen_ArtistPro16Gen2_28bd_095b(
@@ -1484,7 +1540,7 @@ class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet):
class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet):
- hid_bpfs = [("XPPen__Artist24.bpf.o", True)]
+ hid_bpfs = [HidBpf("XPPen__Artist24.bpf.o", True)]
def create_device(self):
return XPPen_Artist24_28bd_093a(
@@ -1495,7 +1551,7 @@ class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet):
class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet):
- hid_bpfs = [("Huion__Kamvas-Pro-19.bpf.o", True)]
+ hid_bpfs = [HidBpf("Huion__Kamvas-Pro-19.bpf.o", True)]
def create_device(self):
return Huion_Kamvas_Pro_19_256c_006b(
@@ -1503,3 +1559,19 @@ class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet):
rdesc="05 0d 09 02 a1 01 85 0a 09 20 a1 01 09 42 09 44 09 43 09 3c 09 45 15 00 25 01 75 01 95 06 81 02 09 32 75 01 95 01 81 02 81 03 05 01 09 30 09 31 55 0d 65 33 26 ff 7f 35 00 46 00 08 75 10 95 02 81 02 05 0d 09 30 26 ff 3f 75 10 95 01 81 02 09 3d 09 3e 15 a6 25 5a 75 08 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 75 08 95 08 81 03 85 05 09 55 25 0a 75 08 95 01 b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0",
input_info=(BusType.USB, 0x256C, 0x006B),
)
+
+
+################################################################################
+#
+# Devices Reporting Distance
+#
+################################################################################
+
+
+class TestWacom_2d1f_014b(BaseTest.TestTablet):
+ def create_device(self):
+ return Wacom_2d1f_014b(
+ "uhid test Wacom 2d1f_014b",
+ rdesc="05 0d 09 02 a1 01 85 02 09 20 a1 00 09 42 09 44 09 45 09 3c 09 5a 09 32 15 00 25 01 75 01 95 06 81 02 95 02 81 03 05 01 09 30 26 88 3e 46 88 3e 65 11 55 0d 75 10 95 01 81 02 09 31 26 60 53 46 60 53 81 02 05 0d 09 30 26 ff 0f 45 00 65 00 55 00 81 02 06 00 ff 09 04 75 08 26 ff 00 46 ff 00 65 11 55 0e 81 02 05 0d 09 3d 75 10 16 d8 dc 26 28 23 36 d8 dc 46 28 23 65 14 81 02 09 3e 81 02 05 01 09 32 16 01 ff 25 00 36 01 ff 45 00 65 11 81 02 05 0d 09 56 15 00 27 ff ff 00 00 35 00 47 ff ff 00 00 66 01 10 55 0c 81 02 45 00 65 00 55 00 c0 09 00 75 08 26 ff 00 b1 12 85 03 09 00 95 12 b1 12 85 05 09 00 95 04 b1 02 85 06 09 00 95 24 b1 02 85 16 09 00 15 00 26 ff 00 95 06 b1 02 85 17 09 00 95 0c b1 02 85 19 09 00 95 01 b1 02 85 0a 09 00 75 08 95 01 15 10 26 ff 00 b1 02 85 1e 09 00 95 10 b1 02 c0 06 00 ff 09 00 a1 01 85 09 05 0d 09 20 a1 00 09 00 15 00 26 ff 00 75 08 95 10 81 02 c0 09 00 95 03 b1 12 c0 06 00 ff 09 02 a1 01 85 07 09 00 96 09 01 b1 02 85 08 09 00 95 03 81 02 09 00 b1 02 85 0e 09 00 96 0a 01 b1 02 c0 05 0d 09 02 a1 01 85 1a 09 20 a1 00 09 42 09 44 09 45 09 3c 09 5a 09 32 15 00 25 01 75 01 95 06 81 02 09 38 25 03 75 02 95 01 81 02 05 01 09 30 26 88 3e 46 88 3e 65 11 55 0d 75 10 95 01 81 02 09 31 26 60 53 46 60 53 81 02 05 0d 09 30 26 ff 0f 46 b0 0f 66 11 e1 55 02 81 02 06 00 ff 09 04 75 08 26 ff 00 46 ff 00 65 11 55 0e 81 02 05 0d 09 3d 75 10 16 d8 dc 26 28 23 36 d8 dc 46 28 23 65 14 81 02 09 3e 81 02 05 01 09 32 16 01 ff 25 00 36 01 ff 45 00 65 11 81 02 05 0d 09 56 15 00 27 ff ff 00 00 35 00 47 ff ff 00 00 66 01 10 55 0c 81 02 45 00 65 00 55 00 c0 c0 06 00 ff 09 00 a1 01 85 1b 05 0d 09 20 a1 00 09 00 26 ff 00 75 08 95 10 81 02 c0 c0",
+ input_info=(BusType.USB, 0x2D1F, 0x014B),
+ )
diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py
index b62c7dba6777..2d6d04f0ff80 100644
--- a/tools/testing/selftests/hid/tests/test_wacom_generic.py
+++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py
@@ -40,7 +40,7 @@ import logging
logger = logging.getLogger("hidtools.test.wacom")
-KERNEL_MODULE = ("wacom", "wacom")
+KERNEL_MODULE = base.KernelModule("wacom", "wacom")
class ProximityState(Enum):
@@ -892,9 +892,9 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
locations. The value of `t` may be incremented over time to move the
points along a linear path.
"""
- return [ self.make_contact(id, t) for id in range(0, n) ]
+ return [self.make_contact(id, t) for id in range(0, n)]
- def assert_contact(self, uhdev, evdev, contact_ids, t=0):
+ def assert_contact(self, evdev, contact_ids, t=0):
"""
Assert properties of a contact generated by make_contact.
"""
@@ -916,12 +916,12 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_X] == x
assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_Y] == y
- def assert_contacts(self, uhdev, evdev, data, t=0):
+ def assert_contacts(self, evdev, data, t=0):
"""
Assert properties of a list of contacts generated by make_contacts.
"""
for contact_ids in data:
- self.assert_contact(uhdev, evdev, contact_ids, t)
+ self.assert_contact(evdev, contact_ids, t)
def test_contact_id_0(self):
"""
@@ -997,12 +997,16 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
- self.assert_contacts(uhdev, evdev,
- [ self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None),
- self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0),
- self.ContactIds(contact_id = 2, tracking_id = -1, slot_num = None),
- self.ContactIds(contact_id = 3, tracking_id = 1, slot_num = 1),
- self.ContactIds(contact_id = 4, tracking_id = -1, slot_num = None) ])
+ self.assert_contacts(
+ evdev,
+ [
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None),
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ self.ContactIds(contact_id=2, tracking_id=-1, slot_num=None),
+ self.ContactIds(contact_id=3, tracking_id=1, slot_num=1),
+ self.ContactIds(contact_id=4, tracking_id=-1, slot_num=None),
+ ],
+ )
def confidence_change_assert_playback(self, uhdev, evdev, timeline):
"""
@@ -1026,8 +1030,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
events = uhdev.next_sync_events()
self.debug_reports(r, uhdev, events)
- ids = [ x[0] for x in state ]
- self.assert_contacts(uhdev, evdev, ids, t)
+ ids = [x[0] for x in state]
+ self.assert_contacts(evdev, ids, t)
t += 1
@@ -1044,27 +1048,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
uhdev = self.uhdev
evdev = uhdev.get_evdev()
- self.confidence_change_assert_playback(uhdev, evdev, [
- # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
- # Both fingers confidently in contact
- [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident
- # First finger looses confidence and clears only the tipswitch flag
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
- # First finger has lost confidence and has both flags cleared
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
- # First finger has lost confidence and has both flags cleared
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
- ])
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # Both fingers confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident
+ # First finger looses confidence and clears only the tipswitch flag
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
def test_confidence_loss_b(self):
"""
@@ -1079,27 +1124,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
uhdev = self.uhdev
evdev = uhdev.get_evdev()
- self.confidence_change_assert_playback(uhdev, evdev, [
- # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
- # Both fingers confidently in contact
- [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident
- # First finger looses confidence and has both flags cleared simultaneously
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
- # First finger has lost confidence and has both flags cleared
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
- # First finger has lost confidence and has both flags cleared
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
- ])
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # Both fingers confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger looses confidence and has both flags cleared simultaneously
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
def test_confidence_loss_c(self):
"""
@@ -1113,27 +1199,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
uhdev = self.uhdev
evdev = uhdev.get_evdev()
- self.confidence_change_assert_playback(uhdev, evdev, [
- # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
- # Both fingers confidently in contact
- [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
- # First finger looses confidence and clears only the confidence flag
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
- # First finger has lost confidence and has both flags cleared
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
- # First finger has lost confidence and has both flags cleared
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
- ])
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # Both fingers confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # First finger looses confidence and clears only the confidence flag
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ True,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
def test_confidence_gain_a(self):
"""
@@ -1144,27 +1271,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
uhdev = self.uhdev
evdev = uhdev.get_evdev()
- self.confidence_change_assert_playback(uhdev, evdev, [
- # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident
- # Only second finger is confidently in contact
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False),
- (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)],
-
- # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
- # First finger gains confidence
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False),
- (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)],
-
- # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
- # First finger remains confident
- [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True),
- (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)],
-
- # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident
- # First finger remains confident
- [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True),
- (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)]
- ])
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # Only second finger is confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None),
+ True,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # First finger gains confidence
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None),
+ True,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First finger remains confident
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First finger remains confident
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
def test_confidence_gain_b(self):
"""
@@ -1175,24 +1343,65 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
uhdev = self.uhdev
evdev = uhdev.get_evdev()
- self.confidence_change_assert_playback(uhdev, evdev, [
- # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
- # First and second finger confidently in contact
- [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
- # Firtst finger looses confidence
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
- # First finger gains confidence
- [(self.ContactIds(contact_id = 0, tracking_id = 2, slot_num = 0), True, True),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
- # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident
- # First finger goes up
- [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True),
- (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
- ])
+ self.confidence_change_assert_playback(
+ uhdev,
+ evdev,
+ [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First and second finger confidently in contact
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # Firtst finger looses confidence
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ True,
+ False,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First finger gains confidence
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=2, slot_num=0),
+ True,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident
+ # First finger goes up
+ [
+ (
+ self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+ False,
+ True,
+ ),
+ (
+ self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+ True,
+ True,
+ ),
+ ],
+ ],
+ )
diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh
index db534e9099a8..ecbd57f775a0 100755
--- a/tools/testing/selftests/hid/vmtest.sh
+++ b/tools/testing/selftests/hid/vmtest.sh
@@ -1,296 +1,474 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Red Hat
+# Copyright (c) 2025 Meta Platforms, Inc. and affiliates
+#
+# Dependencies:
+# * virtme-ng
+# * busybox-static (used by virtme-ng)
+# * qemu (used by virtme-ng)
+
+readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../)
+
+source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh
+
+readonly HID_BPF_TEST="${SCRIPT_DIR}"/hid_bpf
+readonly HIDRAW_TEST="${SCRIPT_DIR}"/hidraw
+readonly HID_BPF_PROGS="${KERNEL_CHECKOUT}/drivers/hid/bpf/progs"
+readonly SSH_GUEST_PORT=22
+readonly WAIT_PERIOD=3
+readonly WAIT_PERIOD_MAX=60
+readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX ))
+readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_hid_vmtest_XXXX.pid)
+
+readonly QEMU_OPTS="\
+ --pidfile ${QEMU_PIDFILE} \
+"
+readonly KERNEL_CMDLINE=""
+readonly LOG=$(mktemp /tmp/hid_vmtest_XXXX.log)
+readonly TEST_NAMES=(vm_hid_bpf vm_hidraw vm_pytest)
+readonly TEST_DESCS=(
+ "Run hid_bpf tests in the VM."
+ "Run hidraw tests in the VM."
+ "Run the hid-tools test-suite in the VM."
+)
+
+VERBOSE=0
+SHELL_MODE=0
+BUILD_HOST=""
+BUILD_HOST_PODMAN_CONTAINER_NAME=""
+
+usage() {
+ local name
+ local desc
+ local i
+
+ echo
+ echo "$0 [OPTIONS] [TEST]... [-- tests-args]"
+ echo "If no TEST argument is given, all tests will be run."
+ echo
+ echo "Options"
+ echo " -b: build the kernel from the current source tree and use it for guest VMs"
+ echo " -H: hostname for remote build host (used with -b)"
+ echo " -p: podman container name for remote build host (used with -b)"
+ echo " Example: -H beefyserver -p vng"
+ echo " -q: set the path to or name of qemu binary"
+ echo " -s: start a shell in the VM instead of running tests"
+ echo " -v: more verbose output (can be repeated multiple times)"
+ echo
+ echo "Available tests"
+
+ for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do
+ name=${TEST_NAMES[${i}]}
+ desc=${TEST_DESCS[${i}]}
+ printf "\t%-35s%-35s\n" "${name}" "${desc}"
+ done
+ echo
-set -u
-set -e
-
-# This script currently only works for x86_64
-ARCH="$(uname -m)"
-case "${ARCH}" in
-x86_64)
- QEMU_BINARY=qemu-system-x86_64
- BZIMAGE="arch/x86/boot/bzImage"
- ;;
-*)
- echo "Unsupported architecture"
exit 1
- ;;
-esac
-SCRIPT_DIR="$(dirname $(realpath $0))"
-OUTPUT_DIR="$SCRIPT_DIR/results"
-KCONFIG_REL_PATHS=("${SCRIPT_DIR}/config" "${SCRIPT_DIR}/config.common" "${SCRIPT_DIR}/config.${ARCH}")
-B2C_URL="https://gitlab.freedesktop.org/gfx-ci/boot2container/-/raw/main/vm2c.py"
-NUM_COMPILE_JOBS="$(nproc)"
-LOG_FILE_BASE="$(date +"hid_selftests.%Y-%m-%d_%H-%M-%S")"
-LOG_FILE="${LOG_FILE_BASE}.log"
-EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
-CONTAINER_IMAGE="registry.freedesktop.org/bentiss/hid/fedora/39:2023-11-22.1"
-
-TARGETS="${TARGETS:=$(basename ${SCRIPT_DIR})}"
-DEFAULT_COMMAND="pip3 install hid-tools; make -C tools/testing/selftests TARGETS=${TARGETS} run_tests"
-
-usage()
-{
- cat <<EOF
-Usage: $0 [-j N] [-s] [-b] [-d <output_dir>] -- [<command>]
-
-<command> is the command you would normally run when you are in
-the source kernel direcory. e.g:
-
- $0 -- ./tools/testing/selftests/hid/hid_bpf
-
-If no command is specified and a debug shell (-s) is not requested,
-"${DEFAULT_COMMAND}" will be run by default.
-
-If you build your kernel using KBUILD_OUTPUT= or O= options, these
-can be passed as environment variables to the script:
-
- O=<kernel_build_path> $0 -- ./tools/testing/selftests/hid/hid_bpf
-
-or
-
- KBUILD_OUTPUT=<kernel_build_path> $0 -- ./tools/testing/selftests/hid/hid_bpf
-
-Options:
-
- -u) Update the boot2container script to a newer version.
- -d) Update the output directory (default: ${OUTPUT_DIR})
- -b) Run only the build steps for the kernel and the selftests
- -j) Number of jobs for compilation, similar to -j in make
- (default: ${NUM_COMPILE_JOBS})
- -s) Instead of powering off the VM, start an interactive
- shell. If <command> is specified, the shell runs after
- the command finishes executing
-EOF
}
-download()
-{
- local file="$1"
+die() {
+ echo "$*" >&2
+ exit "${KSFT_FAIL}"
+}
- echo "Downloading $file..." >&2
- curl -Lsf "$file" -o "${@:2}"
+vm_ssh() {
+ # vng --ssh-client keeps shouting "Warning: Permanently added 'virtme-ng%22'
+ # (ED25519) to the list of known hosts.",
+ # So replace the command with what's actually called and add the "-q" option
+ stdbuf -oL ssh -q \
+ -F ${HOME}/.cache/virtme-ng/.ssh/virtme-ng-ssh.conf \
+ -l root virtme-ng%${SSH_GUEST_PORT} \
+ "$@"
+ return $?
}
-recompile_kernel()
-{
- local kernel_checkout="$1"
- local make_command="$2"
+cleanup() {
+ if [[ -s "${QEMU_PIDFILE}" ]]; then
+ pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1
+ fi
- cd "${kernel_checkout}"
+ # If failure occurred during or before qemu start up, then we need
+ # to clean this up ourselves.
+ if [[ -e "${QEMU_PIDFILE}" ]]; then
+ rm "${QEMU_PIDFILE}"
+ fi
+}
+
+check_args() {
+ local found
- ${make_command} olddefconfig
- ${make_command} headers
- ${make_command}
+ for arg in "$@"; do
+ found=0
+ for name in "${TEST_NAMES[@]}"; do
+ if [[ "${name}" = "${arg}" ]]; then
+ found=1
+ break
+ fi
+ done
+
+ if [[ "${found}" -eq 0 ]]; then
+ echo "${arg} is not an available test" >&2
+ usage
+ fi
+ done
+
+ for arg in "$@"; do
+ if ! command -v > /dev/null "test_${arg}"; then
+ echo "Test ${arg} not found" >&2
+ usage
+ fi
+ done
+}
+
+check_deps() {
+ for dep in vng ${QEMU} busybox pkill ssh pytest; do
+ if [[ ! -x $(command -v "${dep}") ]]; then
+ echo -e "skip: dependency ${dep} not found!\n"
+ exit "${KSFT_SKIP}"
+ fi
+ done
+
+ if [[ ! -x $(command -v "${HID_BPF_TEST}") ]]; then
+ printf "skip: %s not found!" "${HID_BPF_TEST}"
+ printf " Please build the kselftest hid_bpf target.\n"
+ exit "${KSFT_SKIP}"
+ fi
+
+ if [[ ! -x $(command -v "${HIDRAW_TEST}") ]]; then
+ printf "skip: %s not found!" "${HIDRAW_TEST}"
+ printf " Please build the kselftest hidraw target.\n"
+ exit "${KSFT_SKIP}"
+ fi
}
-update_selftests()
-{
- local kernel_checkout="$1"
- local selftests_dir="${kernel_checkout}/tools/testing/selftests/hid"
+check_vng() {
+ local tested_versions
+ local version
+ local ok
- cd "${selftests_dir}"
- ${make_command}
+ tested_versions=("1.36" "1.37")
+ version="$(vng --version)"
+
+ ok=0
+ for tv in "${tested_versions[@]}"; do
+ if [[ "${version}" == *"${tv}"* ]]; then
+ ok=1
+ break
+ fi
+ done
+
+ if [[ ! "${ok}" -eq 1 ]]; then
+ printf "warning: vng version '%s' has not been tested and may " "${version}" >&2
+ printf "not function properly.\n\tThe following versions have been tested: " >&2
+ echo "${tested_versions[@]}" >&2
+ fi
}
-run_vm()
-{
- local run_dir="$1"
- local b2c="$2"
- local kernel_bzimage="$3"
- local command="$4"
- local post_command=""
-
- cd "${run_dir}"
-
- if ! which "${QEMU_BINARY}" &> /dev/null; then
- cat <<EOF
-Could not find ${QEMU_BINARY}
-Please install qemu or set the QEMU_BINARY environment variable.
-EOF
+handle_build() {
+ if [[ ! "${BUILD}" -eq 1 ]]; then
+ return
+ fi
+
+ if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then
+ echo "-b requires vmtest.sh called from the kernel source tree" >&2
exit 1
fi
- # alpine (used in post-container requires the PATH to have /bin
- export PATH=$PATH:/bin
+ pushd "${KERNEL_CHECKOUT}" &>/dev/null
- if [[ "${debug_shell}" != "yes" ]]
- then
- touch ${OUTPUT_DIR}/${LOG_FILE}
- command="mount bpffs -t bpf /sys/fs/bpf/; set -o pipefail ; ${command} 2>&1 | tee ${OUTPUT_DIR}/${LOG_FILE}"
- post_command="cat ${OUTPUT_DIR}/${LOG_FILE}"
- else
- command="mount bpffs -t bpf /sys/fs/bpf/; ${command}"
+ if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then
+ die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})"
fi
- set +e
- $b2c --command "${command}" \
- --kernel ${kernel_bzimage} \
- --workdir ${OUTPUT_DIR} \
- --image ${CONTAINER_IMAGE}
+ local vng_args=("-v" "--config" "${SCRIPT_DIR}/config" "--build")
- echo $? > ${OUTPUT_DIR}/${EXIT_STATUS_FILE}
+ if [[ -n "${BUILD_HOST}" ]]; then
+ vng_args+=("--build-host" "${BUILD_HOST}")
+ fi
- set -e
+ if [[ -n "${BUILD_HOST_PODMAN_CONTAINER_NAME}" ]]; then
+ vng_args+=("--build-host-exec-prefix" \
+ "podman exec -ti ${BUILD_HOST_PODMAN_CONTAINER_NAME}")
+ fi
- ${post_command}
-}
+ if ! vng "${vng_args[@]}"; then
+ die "failed to build kernel from source tree (${KERNEL_CHECKOUT})"
+ fi
-is_rel_path()
-{
- local path="$1"
+ if ! make -j$(nproc) -C "${HID_BPF_PROGS}"; then
+ die "failed to build HID bpf objects from source tree (${HID_BPF_PROGS})"
+ fi
- [[ ${path:0:1} != "/" ]]
+ if ! make -j$(nproc) -C "${SCRIPT_DIR}"; then
+ die "failed to build HID selftests from source tree (${SCRIPT_DIR})"
+ fi
+
+ popd &>/dev/null
}
-do_update_kconfig()
-{
- local kernel_checkout="$1"
- local kconfig_file="$2"
+vm_start() {
+ local logfile=/dev/null
+ local verbose_opt=""
+ local kernel_opt=""
+ local qemu
- rm -f "$kconfig_file" 2> /dev/null
+ qemu=$(command -v "${QEMU}")
- for config in "${KCONFIG_REL_PATHS[@]}"; do
- local kconfig_src="${config}"
- cat "$kconfig_src" >> "$kconfig_file"
- done
-}
+ if [[ "${VERBOSE}" -eq 2 ]]; then
+ verbose_opt="--verbose"
+ logfile=/dev/stdout
+ fi
-update_kconfig()
-{
- local kernel_checkout="$1"
- local kconfig_file="$2"
-
- if [[ -f "${kconfig_file}" ]]; then
- local local_modified="$(stat -c %Y "${kconfig_file}")"
-
- for config in "${KCONFIG_REL_PATHS[@]}"; do
- local kconfig_src="${config}"
- local src_modified="$(stat -c %Y "${kconfig_src}")"
- # Only update the config if it has been updated after the
- # previously cached config was created. This avoids
- # unnecessarily compiling the kernel and selftests.
- if [[ "${src_modified}" -gt "${local_modified}" ]]; then
- do_update_kconfig "$kernel_checkout" "$kconfig_file"
- # Once we have found one outdated configuration
- # there is no need to check other ones.
- break
- fi
- done
- else
- do_update_kconfig "$kernel_checkout" "$kconfig_file"
+ # If we are running from within the kernel source tree, use the kernel source tree
+ # as the kernel to boot, otherwise use the currently running kernel.
+ if [[ "$(realpath "$(pwd)")" == "${KERNEL_CHECKOUT}"* ]]; then
+ kernel_opt="${KERNEL_CHECKOUT}"
fi
-}
-main()
-{
- local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
- local kernel_checkout=$(realpath "${script_dir}"/../../../../)
- # By default the script searches for the kernel in the checkout directory but
- # it also obeys environment variables O= and KBUILD_OUTPUT=
- local kernel_bzimage="${kernel_checkout}/${BZIMAGE}"
- local command="${DEFAULT_COMMAND}"
- local update_b2c="no"
- local debug_shell="no"
- local build_only="no"
-
- while getopts ':hsud:j:b' opt; do
- case ${opt} in
- u)
- update_b2c="yes"
- ;;
- d)
- OUTPUT_DIR="$OPTARG"
- ;;
- j)
- NUM_COMPILE_JOBS="$OPTARG"
- ;;
- s)
- command="/bin/sh"
- debug_shell="yes"
- ;;
- b)
- build_only="yes"
- ;;
- h)
- usage
- exit 0
- ;;
- \? )
- echo "Invalid Option: -$OPTARG"
- usage
- exit 1
- ;;
- : )
- echo "Invalid Option: -$OPTARG requires an argument"
- usage
- exit 1
- ;;
- esac
- done
- shift $((OPTIND -1))
-
- # trap 'catch "$?"' EXIT
- if [[ "${build_only}" == "no" && "${debug_shell}" == "no" ]]; then
- if [[ $# -eq 0 ]]; then
- echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
- else
- command="$@"
-
- if [[ "${command}" == "/bin/bash" || "${command}" == "bash" ]]
- then
- debug_shell="yes"
- fi
+ vng \
+ --run \
+ ${kernel_opt} \
+ ${verbose_opt} \
+ --qemu-opts="${QEMU_OPTS}" \
+ --qemu="${qemu}" \
+ --user root \
+ --append "${KERNEL_CMDLINE}" \
+ --ssh "${SSH_GUEST_PORT}" \
+ --rw &> ${logfile} &
+
+ local vng_pid=$!
+ local elapsed=0
+
+ while [[ ! -s "${QEMU_PIDFILE}" ]]; do
+ if ! kill -0 "${vng_pid}" 2>/dev/null; then
+ echo "vng process (PID ${vng_pid}) exited early, check logs for details" >&2
+ die "failed to boot VM"
fi
- fi
- local kconfig_file="${OUTPUT_DIR}/latest.config"
- local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}"
+ if [[ ${elapsed} -ge ${WAIT_TOTAL} ]]; then
+ echo "Timed out after ${WAIT_TOTAL} seconds waiting for VM to boot" >&2
+ die "failed to boot VM"
+ fi
- # Figure out where the kernel is being built.
- # O takes precedence over KBUILD_OUTPUT.
- if [[ "${O:=""}" != "" ]]; then
- if is_rel_path "${O}"; then
- O="$(realpath "${PWD}/${O}")"
+ sleep 1
+ elapsed=$((elapsed + 1))
+ done
+}
+
+vm_wait_for_ssh() {
+ local i
+
+ i=0
+ while true; do
+ if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then
+ die "Timed out waiting for guest ssh"
fi
- kernel_bzimage="${O}/${BZIMAGE}"
- make_command="${make_command} O=${O}"
- elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
- if is_rel_path "${KBUILD_OUTPUT}"; then
- KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
+ if vm_ssh -- true; then
+ break
fi
- kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}"
- make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}"
+ i=$(( i + 1 ))
+ sleep ${WAIT_PERIOD}
+ done
+}
+
+vm_mount_bpffs() {
+ vm_ssh -- mount bpffs -t bpf /sys/fs/bpf
+}
+
+__log_stdin() {
+ stdbuf -oL awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0; fflush() }'
+}
+
+__log_args() {
+ echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+}
+
+log() {
+ local verbose="$1"
+ shift
+
+ local prefix="$1"
+
+ shift
+ local redirect=
+ if [[ ${verbose} -le 0 ]]; then
+ redirect=/dev/null
+ else
+ redirect=/dev/stdout
+ fi
+
+ if [[ "$#" -eq 0 ]]; then
+ __log_stdin | tee -a "${LOG}" > ${redirect}
+ else
+ __log_args "$@" | tee -a "${LOG}" > ${redirect}
fi
+}
- local b2c="${OUTPUT_DIR}/vm2c.py"
+log_setup() {
+ log $((VERBOSE-1)) "setup" "$@"
+}
- echo "Output directory: ${OUTPUT_DIR}"
+log_host() {
+ local testname=$1
- mkdir -p "${OUTPUT_DIR}"
- update_kconfig "${kernel_checkout}" "${kconfig_file}"
+ shift
+ log $((VERBOSE-1)) "test:${testname}:host" "$@"
+}
- recompile_kernel "${kernel_checkout}" "${make_command}"
- update_selftests "${kernel_checkout}" "${make_command}"
+log_guest() {
+ local testname=$1
- if [[ "${build_only}" == "no" ]]; then
- if [[ "${update_b2c}" == "no" && ! -f "${b2c}" ]]; then
- echo "vm2c script not found in ${b2c}"
- update_b2c="yes"
- fi
+ shift
+ log ${VERBOSE} "# test:${testname}" "$@"
+}
- if [[ "${update_b2c}" == "yes" ]]; then
- download $B2C_URL $b2c
- chmod +x $b2c
- fi
+test_vm_hid_bpf() {
+ local testname="${FUNCNAME[0]#test_}"
- run_vm "${kernel_checkout}" $b2c "${kernel_bzimage}" "${command}"
- if [[ "${debug_shell}" != "yes" ]]; then
- echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
- fi
+ vm_ssh -- "${HID_BPF_TEST}" \
+ 2>&1 | log_guest "${testname}"
+
+ return ${PIPESTATUS[0]}
+}
+
+test_vm_hidraw() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ vm_ssh -- "${HIDRAW_TEST}" \
+ 2>&1 | log_guest "${testname}"
+
+ return ${PIPESTATUS[0]}
+}
+
+test_vm_pytest() {
+ local testname="${FUNCNAME[0]#test_}"
- exit $(cat ${OUTPUT_DIR}/${EXIT_STATUS_FILE})
+ shift
+
+ vm_ssh -- pytest ${SCRIPT_DIR}/tests --color=yes "$@" \
+ 2>&1 | log_guest "${testname}"
+
+ return ${PIPESTATUS[0]}
+}
+
+run_test() {
+ local vm_oops_cnt_before
+ local vm_warn_cnt_before
+ local vm_oops_cnt_after
+ local vm_warn_cnt_after
+ local name
+ local rc
+
+ vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
+ vm_error_cnt_before=$(vm_ssh -- dmesg --level=err | wc -l)
+
+ name=$(echo "${1}" | awk '{ print $1 }')
+ eval test_"${name}" "$@"
+ rc=$?
+
+ vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
+ if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
+ echo "FAIL: kernel oops detected on vm" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ vm_error_cnt_after=$(vm_ssh -- dmesg --level=err | wc -l)
+ if [[ ${vm_error_cnt_after} -gt ${vm_error_cnt_before} ]]; then
+ echo "FAIL: kernel error detected on vm" | log_host "${name}"
+ vm_ssh -- dmesg --level=err | log_host "${name}"
+ rc=$KSFT_FAIL
fi
+
+ return "${rc}"
}
-main "$@"
+QEMU="qemu-system-$(uname -m)"
+
+while getopts :hvsbq:H:p: o
+do
+ case $o in
+ v) VERBOSE=$((VERBOSE+1));;
+ s) SHELL_MODE=1;;
+ b) BUILD=1;;
+ q) QEMU=$OPTARG;;
+ H) BUILD_HOST=$OPTARG;;
+ p) BUILD_HOST_PODMAN_CONTAINER_NAME=$OPTARG;;
+ h|*) usage;;
+ esac
+done
+shift $((OPTIND-1))
+
+trap cleanup EXIT
+
+PARAMS=""
+
+if [[ ${#} -eq 0 ]]; then
+ ARGS=("${TEST_NAMES[@]}")
+else
+ ARGS=()
+ COUNT=0
+ for arg in $@; do
+ COUNT=$((COUNT+1))
+ if [[ x"$arg" == x"--" ]]; then
+ break
+ fi
+ ARGS+=($arg)
+ done
+ shift $COUNT
+ PARAMS="$@"
+fi
+
+if [[ "${SHELL_MODE}" -eq 0 ]]; then
+ check_args "${ARGS[@]}"
+ echo "1..${#ARGS[@]}"
+fi
+check_deps
+check_vng
+handle_build
+
+log_setup "Booting up VM"
+vm_start
+vm_wait_for_ssh
+vm_mount_bpffs
+log_setup "VM booted up"
+
+if [[ "${SHELL_MODE}" -eq 1 ]]; then
+ log_setup "Starting interactive shell in VM"
+ echo "Starting shell in VM. Use 'exit' to quit and shutdown the VM."
+ CURRENT_DIR="$(pwd)"
+ vm_ssh -t -- "cd '${CURRENT_DIR}' && exec bash -l"
+ exit "$KSFT_PASS"
+fi
+
+cnt_pass=0
+cnt_fail=0
+cnt_skip=0
+cnt_total=0
+for arg in "${ARGS[@]}"; do
+ run_test "${arg}" "${PARAMS}"
+ rc=$?
+ if [[ ${rc} -eq $KSFT_PASS ]]; then
+ cnt_pass=$(( cnt_pass + 1 ))
+ echo "ok ${cnt_total} ${arg}"
+ elif [[ ${rc} -eq $KSFT_SKIP ]]; then
+ cnt_skip=$(( cnt_skip + 1 ))
+ echo "ok ${cnt_total} ${arg} # SKIP"
+ elif [[ ${rc} -eq $KSFT_FAIL ]]; then
+ cnt_fail=$(( cnt_fail + 1 ))
+ echo "not ok ${cnt_total} ${arg} # exit=$rc"
+ fi
+ cnt_total=$(( cnt_total + 1 ))
+done
+
+echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
+echo "Log: ${LOG}"
+
+if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then
+ exit "$KSFT_PASS"
+else
+ exit "$KSFT_FAIL"
+fi
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index a8acf3996973..953b63e5aa6a 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -11,7 +11,7 @@
#include <errno.h>
#include <string.h>
#include <time.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MSEC_PER_SEC 1000L
#define NSEC_PER_MSEC 1000000L
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index a1b2b657999d..10e051b6f592 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -13,9 +13,6 @@
static unsigned long HUGEPAGE_SIZE;
-#define MOCK_PAGE_SIZE (PAGE_SIZE / 2)
-#define MOCK_HUGE_PAGE_SIZE (512 * MOCK_PAGE_SIZE)
-
static unsigned long get_huge_page_size(void)
{
char buf[80];
@@ -54,6 +51,8 @@ static __attribute__((constructor)) void setup_sizes(void)
mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
&mfd);
+ assert(mfd_buffer != MAP_FAILED);
+ assert(mfd > 0);
}
FIXTURE(iommufd)
@@ -342,12 +341,14 @@ FIXTURE(iommufd_ioas)
uint32_t hwpt_id;
uint32_t device_id;
uint64_t base_iova;
+ uint32_t device_pasid_id;
};
FIXTURE_VARIANT(iommufd_ioas)
{
unsigned int mock_domains;
unsigned int memory_limit;
+ bool pasid_capable;
};
FIXTURE_SETUP(iommufd_ioas)
@@ -372,6 +373,12 @@ FIXTURE_SETUP(iommufd_ioas)
IOMMU_TEST_DEV_CACHE_DEFAULT);
self->base_iova = MOCK_APERTURE_START;
}
+
+ if (variant->pasid_capable)
+ test_cmd_mock_domain_flags(self->ioas_id,
+ MOCK_FLAGS_DEVICE_PASID,
+ NULL, NULL,
+ &self->device_pasid_id);
}
FIXTURE_TEARDOWN(iommufd_ioas)
@@ -387,6 +394,7 @@ FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain)
FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain)
{
.mock_domains = 1,
+ .pasid_capable = true,
};
FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain)
@@ -439,6 +447,10 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
&test_hwpt_id);
test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0,
&test_hwpt_id);
+ test_err_hwpt_alloc(EOPNOTSUPP, self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_FAULT_ID_VALID,
+ &test_hwpt_id);
test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
IOMMU_HWPT_ALLOC_NEST_PARENT,
@@ -748,25 +760,51 @@ TEST_F(iommufd_ioas, get_hw_info)
} buffer_smaller;
if (self->device_id) {
+ uint8_t max_pasid = 0;
+
/* Provide a zero-size user_buffer */
- test_cmd_get_hw_info(self->device_id, NULL, 0);
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, NULL, 0);
/* Provide a user_buffer with exact size */
- test_cmd_get_hw_info(self->device_id, &buffer_exact, sizeof(buffer_exact));
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+ sizeof(buffer_exact));
+
+ /* Request for a wrong data_type, and a correct one */
+ test_err_get_hw_info(EOPNOTSUPP, self->device_id,
+ IOMMU_HW_INFO_TYPE_SELFTEST + 1,
+ &buffer_exact, sizeof(buffer_exact));
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_SELFTEST, &buffer_exact,
+ sizeof(buffer_exact));
/*
* Provide a user_buffer with size larger than the exact size to check if
* kernel zero the trailing bytes.
*/
- test_cmd_get_hw_info(self->device_id, &buffer_larger, sizeof(buffer_larger));
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger,
+ sizeof(buffer_larger));
/*
* Provide a user_buffer with size smaller than the exact size to check if
* the fields within the size range still gets updated.
*/
- test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller));
+ test_cmd_get_hw_info(self->device_id,
+ IOMMU_HW_INFO_TYPE_DEFAULT,
+ &buffer_smaller, sizeof(buffer_smaller));
+ test_cmd_get_hw_info_pasid(self->device_id, &max_pasid);
+ ASSERT_EQ(0, max_pasid);
+ if (variant->pasid_capable) {
+ test_cmd_get_hw_info_pasid(self->device_pasid_id,
+ &max_pasid);
+ ASSERT_EQ(MOCK_PASID_WIDTH, max_pasid);
+ }
} else {
test_err_get_hw_info(ENOENT, self->device_id,
- &buffer_exact, sizeof(buffer_exact));
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+ sizeof(buffer_exact));
test_err_get_hw_info(ENOENT, self->device_id,
- &buffer_larger, sizeof(buffer_larger));
+ IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger,
+ sizeof(buffer_larger));
}
}
@@ -929,6 +967,33 @@ TEST_F(iommufd_ioas, area_auto_iova)
test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
}
+/* https://lore.kernel.org/r/685af644.a00a0220.2e5631.0094.GAE@google.com */
+TEST_F(iommufd_ioas, reserved_overflow)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_ADD_RESERVED,
+ .id = self->ioas_id,
+ .add_reserved.start = 6,
+ };
+ unsigned int map_len;
+ __u64 iova;
+
+ if (PAGE_SIZE == 4096) {
+ test_cmd.add_reserved.length = 0xffffffffffff8001;
+ map_len = 0x5000;
+ } else {
+ test_cmd.add_reserved.length =
+ 0xffffffffffffffff - MOCK_PAGE_SIZE * 16;
+ map_len = MOCK_PAGE_SIZE * 10;
+ }
+
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ test_err_ioctl_ioas_map(ENOSPC, buffer, map_len, &iova);
+}
+
TEST_F(iommufd_ioas, area_allowed)
{
struct iommu_test_cmd test_cmd = {
@@ -1506,6 +1571,49 @@ TEST_F(iommufd_ioas, copy_sweep)
test_ioctl_destroy(dst_ioas_id);
}
+TEST_F(iommufd_ioas, dmabuf_simple)
+{
+ size_t buf_size = PAGE_SIZE*4;
+ __u64 iova;
+ int dfd;
+
+ test_cmd_get_dmabuf(buf_size, &dfd);
+ test_err_ioctl_ioas_map_file(EINVAL, dfd, 0, 0, &iova);
+ test_err_ioctl_ioas_map_file(EINVAL, dfd, buf_size, buf_size, &iova);
+ test_err_ioctl_ioas_map_file(EINVAL, dfd, 0, buf_size + 1, &iova);
+ test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova);
+
+ close(dfd);
+}
+
+TEST_F(iommufd_ioas, dmabuf_revoke)
+{
+ size_t buf_size = PAGE_SIZE*4;
+ __u32 hwpt_id;
+ __u64 iova;
+ __u64 iova2;
+ int dfd;
+
+ test_cmd_get_dmabuf(buf_size, &dfd);
+ test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova);
+ test_cmd_revoke_dmabuf(dfd, true);
+
+ if (variant->mock_domains)
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id, 0,
+ &hwpt_id);
+
+ test_err_ioctl_ioas_map_file(ENODEV, dfd, 0, buf_size, &iova2);
+
+ test_cmd_revoke_dmabuf(dfd, false);
+ test_ioctl_ioas_map_file(dfd, 0, buf_size, &iova2);
+
+ /* Restore the iova back */
+ test_ioctl_ioas_unmap(iova, buf_size);
+ test_ioctl_ioas_map_fixed_file(dfd, 0, buf_size, iova);
+
+ close(dfd);
+}
+
FIXTURE(iommufd_mock_domain)
{
int fd;
@@ -1724,13 +1832,15 @@ TEST_F(iommufd_mock_domain, all_aligns)
unsigned int end;
uint8_t *buf;
int prot = PROT_READ | PROT_WRITE;
- int mfd;
+ int mfd = -1;
if (variant->file)
buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
else
buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
check_refs(buf, buf_size, 0);
/*
@@ -1776,13 +1886,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy)
unsigned int end;
uint8_t *buf;
int prot = PROT_READ | PROT_WRITE;
- int mfd;
+ int mfd = -1;
if (variant->file)
buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
else
buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
check_refs(buf, buf_size, 0);
/*
@@ -1986,6 +2098,13 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
FIXTURE_SETUP(iommufd_dirty_tracking)
{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_HUGE_PAGES,
+ .op = IOMMU_OPTION_OP_SET,
+ .val64 = 0,
+ };
+ size_t mmap_buffer_size;
unsigned long size;
int mmap_flags;
void *vrc;
@@ -1993,29 +2112,40 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
if (variant->buffer_size < MOCK_PAGE_SIZE) {
SKIP(return,
- "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu",
+ "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%u",
variant->buffer_size, MOCK_PAGE_SIZE);
}
self->fd = open("/dev/iommu", O_RDWR);
ASSERT_NE(-1, self->fd);
- rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size);
- if (rc || !self->buffer) {
- SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
- variant->buffer_size, rc);
- }
-
mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED;
+ mmap_buffer_size = variant->buffer_size;
if (variant->hugepages) {
/*
* MAP_POPULATE will cause the kernel to fail mmap if THPs are
* not available.
*/
mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+
+ /*
+ * Allocation must be aligned to the HUGEPAGE_SIZE, because the
+ * following mmap() will automatically align the length to be a
+ * multiple of the underlying huge page size. Failing to do the
+ * same at this allocation will result in a memory overwrite by
+ * the mmap().
+ */
+ if (mmap_buffer_size < HUGEPAGE_SIZE)
+ mmap_buffer_size = HUGEPAGE_SIZE;
+ }
+
+ rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size);
+ if (rc || !self->buffer) {
+ SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
+ mmap_buffer_size, rc);
}
assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
- vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE,
+ vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE,
mmap_flags, -1, 0);
assert(vrc == self->buffer);
@@ -2030,22 +2160,24 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
test_ioctl_ioas_alloc(&self->ioas_id);
- /* Enable 1M mock IOMMU hugepages */
- if (variant->hugepages) {
- test_cmd_mock_domain_flags(self->ioas_id,
- MOCK_FLAGS_DEVICE_HUGE_IOVA,
- &self->stdev_id, &self->hwpt_id,
- &self->idev_id);
- } else {
- test_cmd_mock_domain(self->ioas_id, &self->stdev_id,
- &self->hwpt_id, &self->idev_id);
- }
+
+ /*
+ * For dirty testing it is important that the page size fed into
+ * the iommu page tables matches the size the dirty logic
+ * expects, or set_dirty can touch too much stuff.
+ */
+ cmd.object_id = self->ioas_id;
+ if (!variant->hugepages)
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id,
+ &self->idev_id);
}
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
{
- munmap(self->buffer, variant->buffer_size);
- munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE));
+ free(self->buffer);
+ free(self->bitmap);
teardown_iommufd(self->fd, _metadata);
}
@@ -2153,8 +2285,7 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability)
test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id);
test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
- test_cmd_get_hw_capabilities(self->idev_id, caps,
- IOMMU_HW_CAP_DIRTY_TRACKING);
+ test_cmd_get_hw_capabilities(self->idev_id, caps);
ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING,
caps & IOMMU_HW_CAP_DIRTY_TRACKING);
@@ -2165,18 +2296,23 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability)
TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
{
uint32_t page_size = MOCK_PAGE_SIZE;
+ uint32_t ioas_id = self->ioas_id;
uint32_t hwpt_id;
- uint32_t ioas_id;
if (variant->hugepages)
page_size = MOCK_HUGE_PAGE_SIZE;
- test_ioctl_ioas_alloc(&ioas_id);
test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
variant->buffer_size, MOCK_APERTURE_START);
- test_cmd_hwpt_alloc(self->idev_id, ioas_id,
- IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ if (variant->hugepages)
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_HUGE, &hwpt_id);
+ else
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_DEFAULT, &hwpt_id);
test_cmd_set_dirty_tracking(hwpt_id, true);
@@ -2202,18 +2338,24 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear)
{
uint32_t page_size = MOCK_PAGE_SIZE;
+ uint32_t ioas_id = self->ioas_id;
uint32_t hwpt_id;
- uint32_t ioas_id;
if (variant->hugepages)
page_size = MOCK_HUGE_PAGE_SIZE;
- test_ioctl_ioas_alloc(&ioas_id);
test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
variant->buffer_size, MOCK_APERTURE_START);
- test_cmd_hwpt_alloc(self->idev_id, ioas_id,
- IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+
+ if (variant->hugepages)
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_HUGE, &hwpt_id);
+ else
+ test_cmd_hwpt_alloc_iommupt(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
+ MOCK_IOMMUPT_DEFAULT, &hwpt_id);
test_cmd_set_dirty_tracking(hwpt_id, true);
@@ -2555,6 +2697,8 @@ TEST_F(vfio_compat_mock_domain, map)
ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
+ /* Unmap of empty is success */
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
/* UNMAP_FLAG_ALL requires 0 iova/size */
ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
@@ -2666,7 +2810,7 @@ FIXTURE_SETUP(iommufd_viommu)
/* Allocate a vIOMMU taking refcount of the parent hwpt */
test_cmd_viommu_alloc(self->device_id, self->hwpt_id,
- IOMMU_VIOMMU_TYPE_SELFTEST,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
&self->viommu_id);
/* Allocate a regular nested hwpt */
@@ -2705,24 +2849,27 @@ TEST_F(iommufd_viommu, viommu_negative_tests)
if (self->device_id) {
/* Negative test -- invalid hwpt (hwpt_id=0) */
test_err_viommu_alloc(ENOENT, device_id, 0,
- IOMMU_VIOMMU_TYPE_SELFTEST, NULL);
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ NULL);
/* Negative test -- not a nesting parent hwpt */
test_cmd_hwpt_alloc(device_id, ioas_id, 0, &hwpt_id);
test_err_viommu_alloc(EINVAL, device_id, hwpt_id,
- IOMMU_VIOMMU_TYPE_SELFTEST, NULL);
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ NULL);
test_ioctl_destroy(hwpt_id);
/* Negative test -- unsupported viommu type */
test_err_viommu_alloc(EOPNOTSUPP, device_id, self->hwpt_id,
- 0xdead, NULL);
+ 0xdead, NULL, 0, NULL);
EXPECT_ERRNO(EBUSY,
_test_ioctl_destroy(self->fd, self->hwpt_id));
EXPECT_ERRNO(EBUSY,
_test_ioctl_destroy(self->fd, self->viommu_id));
} else {
test_err_viommu_alloc(ENOENT, self->device_id, self->hwpt_id,
- IOMMU_VIOMMU_TYPE_SELFTEST, NULL);
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ NULL);
}
}
@@ -2736,32 +2883,68 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf)
uint32_t iopf_hwpt_id;
uint32_t fault_id;
uint32_t fault_fd;
+ uint32_t vdev_id;
+
+ if (!dev_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ test_ioctl_fault_alloc(&fault_id, &fault_fd);
+ test_err_hwpt_alloc_iopf(ENOENT, dev_id, viommu_id, UINT32_MAX,
+ IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+ test_err_hwpt_alloc_iopf(EOPNOTSUPP, dev_id, viommu_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID | (1 << 31),
+ &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_cmd_hwpt_alloc_iopf(dev_id, viommu_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+
+ /* Must allocate vdevice before attaching to a nested hwpt */
+ test_err_mock_domain_replace(ENOENT, self->stdev_id, iopf_hwpt_id);
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, iopf_hwpt_id));
+ test_cmd_trigger_iopf(dev_id, fault_fd);
+
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+ test_ioctl_destroy(iopf_hwpt_id);
+ close(fault_fd);
+ test_ioctl_destroy(fault_id);
+}
+
+TEST_F(iommufd_viommu, viommu_alloc_with_data)
+{
+ struct iommu_viommu_selftest data = {
+ .in_data = 0xbeef,
+ };
+ uint32_t *test;
- if (self->device_id) {
- test_ioctl_fault_alloc(&fault_id, &fault_fd);
- test_err_hwpt_alloc_iopf(
- ENOENT, dev_id, viommu_id, UINT32_MAX,
- IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
- IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
- test_err_hwpt_alloc_iopf(
- EOPNOTSUPP, dev_id, viommu_id, fault_id,
- IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), &iopf_hwpt_id,
- IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
- test_cmd_hwpt_alloc_iopf(
- dev_id, viommu_id, fault_id, IOMMU_HWPT_FAULT_ID_VALID,
- &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data,
- sizeof(data));
+ if (!self->device_id)
+ SKIP(return, "Skipping test for variant no_viommu");
- test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
- EXPECT_ERRNO(EBUSY,
- _test_ioctl_destroy(self->fd, iopf_hwpt_id));
- test_cmd_trigger_iopf(dev_id, fault_fd);
+ test_cmd_viommu_alloc(self->device_id, self->hwpt_id,
+ IOMMU_VIOMMU_TYPE_SELFTEST, &data, sizeof(data),
+ &self->viommu_id);
+ ASSERT_EQ(data.out_data, data.in_data);
- test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
- test_ioctl_destroy(iopf_hwpt_id);
- close(fault_fd);
- test_ioctl_destroy(fault_id);
- }
+ /* Negative mmap tests -- offset and length cannot be changed */
+ test_err_mmap(ENXIO, data.out_mmap_length,
+ data.out_mmap_offset + PAGE_SIZE);
+ test_err_mmap(ENXIO, data.out_mmap_length,
+ data.out_mmap_offset + PAGE_SIZE * 2);
+ test_err_mmap(ENXIO, data.out_mmap_length / 2, data.out_mmap_offset);
+ test_err_mmap(ENXIO, data.out_mmap_length * 2, data.out_mmap_offset);
+
+ /* Now do a correct mmap for a loopback test */
+ test = mmap(NULL, data.out_mmap_length, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->fd, data.out_mmap_offset);
+ ASSERT_NE(MAP_FAILED, test);
+ ASSERT_EQ(data.in_data, *test);
+
+ /* The owner of the mmap region should be blocked */
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->viommu_id));
+ munmap(test, data.out_mmap_length);
}
TEST_F(iommufd_viommu, vdevice_alloc)
@@ -2769,15 +2952,46 @@ TEST_F(iommufd_viommu, vdevice_alloc)
uint32_t viommu_id = self->viommu_id;
uint32_t dev_id = self->device_id;
uint32_t vdev_id = 0;
+ uint32_t veventq_id;
+ uint32_t veventq_fd;
+ int prev_seq = -1;
if (dev_id) {
+ /* Must allocate vdevice before attaching to a nested hwpt */
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ self->nested_hwpt_id);
+
+ /* Allocate a vEVENTQ with veventq_depth=2 */
+ test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST,
+ &veventq_id, &veventq_fd);
+ test_err_veventq_alloc(EEXIST, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, NULL, NULL);
/* Set vdev_id to 0x99, unset it, and set to 0x88 */
test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id,
+ self->nested_hwpt_id);
+ test_cmd_trigger_vevents(dev_id, 1);
+ test_cmd_read_vevents(veventq_fd, 1, 0x99, &prev_seq);
test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99,
&vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
test_ioctl_destroy(vdev_id);
+
+ /* Try again with 0x88 */
test_cmd_vdevice_alloc(viommu_id, dev_id, 0x88, &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id,
+ self->nested_hwpt_id);
+ /* Trigger an overflow with three events */
+ test_cmd_trigger_vevents(dev_id, 3);
+ test_err_read_vevents(EOVERFLOW, veventq_fd, 3, 0x88,
+ &prev_seq);
+ /* Overflow must be gone after the previous reads */
+ test_cmd_trigger_vevents(dev_id, 1);
+ test_cmd_read_vevents(veventq_fd, 1, 0x88, &prev_seq);
+ close(veventq_fd);
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
test_ioctl_destroy(vdev_id);
+ test_ioctl_destroy(veventq_id);
} else {
test_err_vdevice_alloc(ENOENT, viommu_id, dev_id, 0x99, NULL);
}
@@ -2791,169 +3005,540 @@ TEST_F(iommufd_viommu, vdevice_cache)
uint32_t vdev_id = 0;
uint32_t num_inv;
- if (dev_id) {
- test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ if (!dev_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+
+ test_cmd_dev_check_cache_all(dev_id, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /* Check data_type by passing zero-length array */
+ num_inv = 0;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: Invalid data_type */
+ num_inv = 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: structure size sanity */
+ num_inv = 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs) + 1, &num_inv);
+ assert(!num_inv);
+
+ num_inv = 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 1,
+ &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid flag is passed */
+ num_inv = 1;
+ inv_reqs[0].flags = 0xffffffff;
+ inv_reqs[0].vdev_id = 0x99;
+ test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid data_uptr when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ test_err_viommu_invalidate(EINVAL, viommu_id, NULL,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid entry_len when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 0,
+ &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid cache_id */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid vdev_id */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x9;
+ inv_reqs[0].cache_id = 0;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
- test_cmd_dev_check_cache_all(dev_id,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
+ /*
+ * Invalidate the 1st cache entry but fail the 2nd request
+ * due to invalid flags configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 0;
+ inv_reqs[1].flags = 0xffffffff;
+ inv_reqs[1].vdev_id = 0x99;
+ inv_reqs[1].cache_id = 1;
+ test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache(dev_id, 0, 0);
+ test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
- /* Check data_type by passing zero-length array */
- num_inv = 0;
- test_cmd_viommu_invalidate(viommu_id, inv_reqs,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
+ /*
+ * Invalidate the 1st cache entry but fail the 2nd request
+ * due to invalid cache_id configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 0;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].vdev_id = 0x99;
+ inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
+ test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+ IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache(dev_id, 0, 0);
+ test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /* Invalidate the 2nd cache entry and verify */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 1;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache(dev_id, 0, 0);
+ test_cmd_dev_check_cache(dev_id, 1, 0);
+ test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+ test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+ /* Invalidate the 3rd and 4th cache entries and verify */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].cache_id = 2;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].vdev_id = 0x99;
+ inv_reqs[1].cache_id = 3;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(num_inv == 2);
+ test_cmd_dev_check_cache_all(dev_id, 0);
+
+ /* Invalidate all cache entries for nested_dev_id[1] and verify */
+ num_inv = 1;
+ inv_reqs[0].vdev_id = 0x99;
+ inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL;
+ test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+ &num_inv);
+ assert(num_inv == 1);
+ test_cmd_dev_check_cache_all(dev_id, 0);
+ test_ioctl_destroy(vdev_id);
+}
+
+TEST_F(iommufd_viommu, hw_queue)
+{
+ __u64 iova = MOCK_APERTURE_START, iova2;
+ uint32_t viommu_id = self->viommu_id;
+ uint32_t hw_queue_id[2];
+
+ if (!viommu_id)
+ SKIP(return, "Skipping test for variant no_viommu");
+
+ /* Fail IOMMU_HW_QUEUE_TYPE_DEFAULT */
+ test_err_hw_queue_alloc(EOPNOTSUPP, viommu_id,
+ IOMMU_HW_QUEUE_TYPE_DEFAULT, 0, iova, PAGE_SIZE,
+ &hw_queue_id[0]);
+ /* Fail queue addr and length */
+ test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ 0, iova, 0, &hw_queue_id[0]);
+ test_err_hw_queue_alloc(EOVERFLOW, viommu_id,
+ IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, ~(uint64_t)0,
+ PAGE_SIZE, &hw_queue_id[0]);
+ /* Fail missing iova */
+ test_err_hw_queue_alloc(ENOENT, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ 0, iova, PAGE_SIZE, &hw_queue_id[0]);
+
+ /* Map iova */
+ test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova);
+ test_ioctl_ioas_map(buffer + PAGE_SIZE, PAGE_SIZE, &iova2);
+
+ /* Fail index=1 and =MAX; must start from index=0 */
+ test_err_hw_queue_alloc(EIO, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1,
+ iova, PAGE_SIZE, &hw_queue_id[0]);
+ test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ IOMMU_TEST_HW_QUEUE_MAX, iova, PAGE_SIZE,
+ &hw_queue_id[0]);
+
+ /* Allocate index=0, declare ownership of the iova */
+ test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 0,
+ iova, PAGE_SIZE, &hw_queue_id[0]);
+ /* Fail duplicated index */
+ test_err_hw_queue_alloc(EEXIST, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+ 0, iova, PAGE_SIZE, &hw_queue_id[0]);
+ /* Fail unmap, due to iova ownership */
+ test_err_ioctl_ioas_unmap(EBUSY, iova, PAGE_SIZE);
+ /* The 2nd page is not pinned, so it can be unmmap */
+ test_ioctl_ioas_unmap(iova2, PAGE_SIZE);
+
+ /* Allocate index=1, with an unaligned case */
+ test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1,
+ iova + PAGE_SIZE / 2, PAGE_SIZE / 2,
+ &hw_queue_id[1]);
+ /* Fail to destroy, due to dependency */
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hw_queue_id[0]));
+
+ /* Destroy in descending order */
+ test_ioctl_destroy(hw_queue_id[1]);
+ test_ioctl_destroy(hw_queue_id[0]);
+ /* Now it can unmap the first page */
+ test_ioctl_ioas_unmap(iova, PAGE_SIZE);
+}
- /* Negative test: Invalid data_type */
- num_inv = 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
+TEST_F(iommufd_viommu, vdevice_tombstone)
+{
+ uint32_t viommu_id = self->viommu_id;
+ uint32_t dev_id = self->device_id;
+ uint32_t vdev_id = 0;
- /* Negative test: structure size sanity */
- num_inv = 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs) + 1, &num_inv);
- assert(!num_inv);
+ if (!dev_id)
+ SKIP(return, "Skipping test for variant no_viommu");
- num_inv = 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- 1, &num_inv);
- assert(!num_inv);
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ test_ioctl_destroy(self->stdev_id);
+ EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, vdev_id));
+}
- /* Negative test: invalid flag is passed */
- num_inv = 1;
- inv_reqs[0].flags = 0xffffffff;
- inv_reqs[0].vdev_id = 0x99;
- test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
+FIXTURE(iommufd_device_pasid)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t hwpt_id;
+ uint32_t stdev_id;
+ uint32_t device_id;
+ uint32_t no_pasid_stdev_id;
+ uint32_t no_pasid_device_id;
+};
- /* Negative test: invalid data_uptr when array is not empty */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- test_err_viommu_invalidate(EINVAL, viommu_id, NULL,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
+FIXTURE_VARIANT(iommufd_device_pasid)
+{
+ bool pasid_capable;
+};
- /* Negative test: invalid entry_len when array is not empty */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- 0, &num_inv);
- assert(!num_inv);
+FIXTURE_SETUP(iommufd_device_pasid)
+{
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+ test_ioctl_ioas_alloc(&self->ioas_id);
- /* Negative test: invalid cache_id */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
+ test_cmd_mock_domain_flags(self->ioas_id,
+ MOCK_FLAGS_DEVICE_PASID,
+ &self->stdev_id, &self->hwpt_id,
+ &self->device_id);
+ if (!variant->pasid_capable)
+ test_cmd_mock_domain_flags(self->ioas_id, 0,
+ &self->no_pasid_stdev_id, NULL,
+ &self->no_pasid_device_id);
+}
- /* Negative test: invalid vdev_id */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x9;
- inv_reqs[0].cache_id = 0;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(!num_inv);
+FIXTURE_TEARDOWN(iommufd_device_pasid)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_device_pasid, no_pasid)
+{
+ .pasid_capable = false,
+};
+FIXTURE_VARIANT_ADD(iommufd_device_pasid, has_pasid)
+{
+ .pasid_capable = true,
+};
+
+TEST_F(iommufd_device_pasid, pasid_attach)
+{
+ struct iommu_hwpt_selftest data = {
+ .iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+ };
+ uint32_t nested_hwpt_id[3] = {};
+ uint32_t parent_hwpt_id = 0;
+ uint32_t fault_id, fault_fd;
+ uint32_t s2_hwpt_id = 0;
+ uint32_t iopf_hwpt_id;
+ uint32_t pasid = 100;
+ uint32_t viommu_id;
+
+ /*
+ * Negative, detach pasid without attaching, this is not expected.
+ * But it should not result in failure anyway.
+ */
+ test_cmd_pasid_detach(pasid);
+
+ /* Allocate two nested hwpts sharing one common parent hwpt */
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT,
+ &parent_hwpt_id);
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST,
+ &data, sizeof(data));
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &nested_hwpt_id[1],
+ IOMMU_HWPT_DATA_SELFTEST,
+ &data, sizeof(data));
+
+ /* Fault related preparation */
+ test_ioctl_fault_alloc(&fault_id, &fault_fd);
+ test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID,
+ &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ /* Allocate a regular nested hwpt based on viommu */
+ test_cmd_viommu_alloc(self->device_id, parent_hwpt_id,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, &viommu_id);
+ test_cmd_hwpt_alloc_nested(self->device_id, viommu_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &nested_hwpt_id[2],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &s2_hwpt_id);
+
+ /* Attach RID to non-pasid compat domain, */
+ test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
+ /* then attach to pasid should fail */
+ test_err_pasid_attach(EINVAL, pasid, s2_hwpt_id);
+
+ /* Attach RID to pasid compat domain, */
+ test_cmd_mock_domain_replace(self->stdev_id, s2_hwpt_id);
+ /* then attach to pasid should succeed, */
+ test_cmd_pasid_attach(pasid, nested_hwpt_id[0]);
+ /* but attach RID to non-pasid compat domain should fail now. */
+ test_err_mock_domain_replace(EINVAL, self->stdev_id, parent_hwpt_id);
+ /*
+ * Detach hwpt from pasid 100, and check if the pasid 100
+ * has null domain.
+ */
+ test_cmd_pasid_detach(pasid);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+ /* RID is attached to pasid-comapt domain, pasid path is not used */
+
+ if (!variant->pasid_capable) {
/*
- * Invalidate the 1st cache entry but fail the 2nd request
- * due to invalid flags configuration in the 2nd request.
+ * PASID-compatible domain can be used by non-PASID-capable
+ * device.
*/
- num_inv = 2;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = 0;
- inv_reqs[1].flags = 0xffffffff;
- inv_reqs[1].vdev_id = 0x99;
- inv_reqs[1].cache_id = 1;
- test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 1);
- test_cmd_dev_check_cache(dev_id, 0, 0);
- test_cmd_dev_check_cache(dev_id, 1,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 2,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 3,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
-
+ test_cmd_mock_domain_replace(self->no_pasid_stdev_id, nested_hwpt_id[0]);
+ test_cmd_mock_domain_replace(self->no_pasid_stdev_id, self->ioas_id);
/*
- * Invalidate the 1st cache entry but fail the 2nd request
- * due to invalid cache_id configuration in the 2nd request.
+ * Attach hwpt to pasid 100 of non-PASID-capable device,
+ * should fail, no matter domain is pasid-comapt or not.
*/
- num_inv = 2;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = 0;
- inv_reqs[1].flags = 0;
- inv_reqs[1].vdev_id = 0x99;
- inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
- test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
- IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 1);
- test_cmd_dev_check_cache(dev_id, 0, 0);
- test_cmd_dev_check_cache(dev_id, 1,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 2,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 3,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
-
- /* Invalidate the 2nd cache entry and verify */
- num_inv = 1;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = 1;
- test_cmd_viommu_invalidate(viommu_id, inv_reqs,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 1);
- test_cmd_dev_check_cache(dev_id, 0, 0);
- test_cmd_dev_check_cache(dev_id, 1, 0);
- test_cmd_dev_check_cache(dev_id, 2,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
- test_cmd_dev_check_cache(dev_id, 3,
- IOMMU_TEST_DEV_CACHE_DEFAULT);
-
- /* Invalidate the 3rd and 4th cache entries and verify */
- num_inv = 2;
- inv_reqs[0].flags = 0;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].cache_id = 2;
- inv_reqs[1].flags = 0;
- inv_reqs[1].vdev_id = 0x99;
- inv_reqs[1].cache_id = 3;
- test_cmd_viommu_invalidate(viommu_id, inv_reqs,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 2);
- test_cmd_dev_check_cache_all(dev_id, 0);
-
- /* Invalidate all cache entries for nested_dev_id[1] and verify */
- num_inv = 1;
- inv_reqs[0].vdev_id = 0x99;
- inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL;
- test_cmd_viommu_invalidate(viommu_id, inv_reqs,
- sizeof(*inv_reqs), &num_inv);
- assert(num_inv == 1);
- test_cmd_dev_check_cache_all(dev_id, 0);
- test_ioctl_destroy(vdev_id);
+ EXPECT_ERRNO(EINVAL,
+ _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id,
+ pasid, parent_hwpt_id));
+ EXPECT_ERRNO(EINVAL,
+ _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id,
+ pasid, s2_hwpt_id));
}
+
+ /*
+ * Attach non pasid compat hwpt to pasid-capable device, should
+ * fail, and have null domain.
+ */
+ test_err_pasid_attach(EINVAL, pasid, parent_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+
+ /*
+ * Attach ioas to pasid 100, should fail, domain should
+ * be null.
+ */
+ test_err_pasid_attach(EINVAL, pasid, self->ioas_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+
+ /*
+ * Attach the s2_hwpt to pasid 100, should succeed, domain should
+ * be valid.
+ */
+ test_cmd_pasid_attach(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Try attach pasid 100 with another hwpt, should FAIL
+ * as attach does not allow overwrite, use REPLACE instead.
+ */
+ test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]);
+
+ /*
+ * Detach hwpt from pasid 100 for next test, should succeed,
+ * and have null domain.
+ */
+ test_cmd_pasid_detach(pasid);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+
+ /*
+ * Attach nested hwpt to pasid 100, should succeed, domain
+ * should be valid.
+ */
+ test_cmd_pasid_attach(pasid, nested_hwpt_id[0]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, nested_hwpt_id[0]));
+
+ /* Attach to pasid 100 which has been attached, should fail. */
+ test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]);
+
+ /* cleanup pasid 100 */
+ test_cmd_pasid_detach(pasid);
+
+ /* Replace tests */
+
+ pasid = 200;
+ /*
+ * Replace pasid 200 without attaching it, should fail
+ * with -EINVAL.
+ */
+ test_err_pasid_replace(EINVAL, pasid, s2_hwpt_id);
+
+ /*
+ * Attach the s2 hwpt to pasid 200, should succeed, domain should
+ * be valid.
+ */
+ test_cmd_pasid_attach(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Replace pasid 200 with self->ioas_id, should fail
+ * and domain should be the prior s2 hwpt.
+ */
+ test_err_pasid_replace(EINVAL, pasid, self->ioas_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Replace a nested hwpt for pasid 200, should succeed,
+ * and have valid domain.
+ */
+ test_cmd_pasid_replace(pasid, nested_hwpt_id[0]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, nested_hwpt_id[0]));
+
+ /*
+ * Replace with another nested hwpt for pasid 200, should
+ * succeed, and have valid domain.
+ */
+ test_cmd_pasid_replace(pasid, nested_hwpt_id[1]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, nested_hwpt_id[1]));
+
+ /* cleanup pasid 200 */
+ test_cmd_pasid_detach(pasid);
+
+ /* Negative Tests for pasid replace, use pasid 1024 */
+
+ /*
+ * Attach the s2 hwpt to pasid 1024, should succeed, domain should
+ * be valid.
+ */
+ pasid = 1024;
+ test_cmd_pasid_attach(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Replace pasid 1024 with nested_hwpt_id[0], should fail,
+ * but have the old valid domain. This is a designed
+ * negative case. Normally, this shall succeed.
+ */
+ test_err_pasid_replace(ENOMEM, pasid, nested_hwpt_id[0]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /* cleanup pasid 1024 */
+ test_cmd_pasid_detach(pasid);
+
+ /* Attach to iopf-capable hwpt */
+
+ /*
+ * Attach an iopf hwpt to pasid 2048, should succeed, domain should
+ * be valid.
+ */
+ pasid = 2048;
+ test_cmd_pasid_attach(pasid, iopf_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, iopf_hwpt_id));
+
+ test_cmd_trigger_iopf_pasid(self->device_id, pasid, fault_fd);
+
+ /*
+ * Replace with s2_hwpt_id for pasid 2048, should
+ * succeed, and have valid domain.
+ */
+ test_cmd_pasid_replace(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /* cleanup pasid 2048 */
+ test_cmd_pasid_detach(pasid);
+
+ test_ioctl_destroy(iopf_hwpt_id);
+ close(fault_fd);
+ test_ioctl_destroy(fault_id);
+
+ /* Detach the s2_hwpt_id from RID */
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index 64b1f8e1b0cf..45c14323a618 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -113,7 +113,7 @@ static bool fail_nth_next(struct __test_metadata *_metadata,
* necessarily mean a test failure, just that the limit has to be made
* bigger.
*/
- ASSERT_GT(400, nth_state->iteration);
+ ASSERT_GT(1000, nth_state->iteration);
if (nth_state->iteration != 0) {
ssize_t res;
ssize_t res2;
@@ -209,12 +209,16 @@ FIXTURE(basic_fail_nth)
{
int fd;
uint32_t access_id;
+ uint32_t stdev_id;
+ uint32_t pasid;
};
FIXTURE_SETUP(basic_fail_nth)
{
self->fd = -1;
self->access_id = 0;
+ self->stdev_id = 0;
+ self->pasid = 0; //test should use a non-zero value
}
FIXTURE_TEARDOWN(basic_fail_nth)
@@ -226,6 +230,8 @@ FIXTURE_TEARDOWN(basic_fail_nth)
rc = _test_cmd_destroy_access(self->access_id);
assert(rc == 0);
}
+ if (self->pasid && self->stdev_id)
+ _test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid);
teardown_iommufd(self->fd, _metadata);
}
@@ -620,13 +626,15 @@ TEST_FAIL_NTH(basic_fail_nth, device)
};
struct iommu_test_hw_info info;
uint32_t fault_id, fault_fd;
+ uint32_t veventq_id, veventq_fd;
uint32_t fault_hwpt_id;
+ uint32_t test_hwpt_id;
uint32_t ioas_id;
uint32_t ioas_id2;
- uint32_t stdev_id;
uint32_t idev_id;
uint32_t hwpt_id;
uint32_t viommu_id;
+ uint32_t hw_queue_id;
uint32_t vdev_id;
__u64 iova;
@@ -654,35 +662,46 @@ TEST_FAIL_NTH(basic_fail_nth, device)
fail_nth_enable();
- if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, NULL,
- &idev_id))
+ if (_test_cmd_mock_domain_flags(self->fd, ioas_id,
+ MOCK_FLAGS_DEVICE_PASID,
+ &self->stdev_id, NULL, &idev_id))
return -1;
- if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL))
+ if (_test_cmd_get_hw_info(self->fd, idev_id, IOMMU_HW_INFO_TYPE_DEFAULT,
+ &info, sizeof(info), NULL, NULL))
return -1;
- if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id,
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
+ IOMMU_HWPT_ALLOC_PASID, &hwpt_id,
IOMMU_HWPT_DATA_NONE, 0, 0))
return -1;
- if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL))
+ if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, ioas_id2, NULL))
return -1;
- if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL))
+ if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, hwpt_id, NULL))
return -1;
if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
- IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_ALLOC_PASID,
+ &hwpt_id,
IOMMU_HWPT_DATA_NONE, 0, 0))
return -1;
- if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id,
- IOMMU_VIOMMU_TYPE_SELFTEST, 0, &viommu_id))
+ if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, 0,
+ IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+ &viommu_id))
return -1;
if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id))
return -1;
+ if (_test_cmd_hw_queue_alloc(self->fd, viommu_id,
+ IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, iova,
+ PAGE_SIZE, &hw_queue_id))
+ return -1;
+
if (_test_ioctl_fault_alloc(self->fd, &fault_id, &fault_fd))
return -1;
close(fault_fd);
@@ -692,6 +711,37 @@ TEST_FAIL_NTH(basic_fail_nth, device)
IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)))
return -1;
+ if (_test_cmd_veventq_alloc(self->fd, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, &veventq_id,
+ &veventq_fd))
+ return -1;
+ close(veventq_fd);
+
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
+ IOMMU_HWPT_ALLOC_PASID,
+ &test_hwpt_id,
+ IOMMU_HWPT_DATA_NONE, 0, 0))
+ return -1;
+
+ /* Tests for pasid attach/replace/detach */
+
+ self->pasid = 200;
+
+ if (_test_cmd_pasid_attach(self->fd, self->stdev_id,
+ self->pasid, hwpt_id)) {
+ self->pasid = 0;
+ return -1;
+ }
+
+ if (_test_cmd_pasid_replace(self->fd, self->stdev_id,
+ self->pasid, test_hwpt_id))
+ return -1;
+
+ if (_test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid))
+ return -1;
+
+ self->pasid = 0;
+
return 0;
}
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index d979f5b0efe8..5502751d500c 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -9,8 +9,9 @@
#include <sys/ioctl.h>
#include <stdint.h>
#include <assert.h>
+#include <poll.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../../../../drivers/iommu/iommufd/iommufd_test.h"
/* Hack to make assertions more readable */
@@ -55,17 +56,26 @@ static unsigned long PAGE_SIZE;
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+#define test_err_mmap(_errno, length, offset) \
+ EXPECT_ERRNO(_errno, (long)mmap(NULL, length, PROT_READ | PROT_WRITE, \
+ MAP_SHARED, self->fd, offset))
+
static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p)
{
int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0;
int mfd = memfd_create("buffer", mfd_flags);
+ void *buf = MAP_FAILED;
if (mfd <= 0)
return MAP_FAILED;
if (ftruncate(mfd, length))
- return MAP_FAILED;
+ goto out;
*mfd_p = mfd;
- return mmap(0, length, prot, flags, mfd, 0);
+ buf = mmap(0, length, prot, flags, mfd, 0);
+out:
+ if (buf == MAP_FAILED)
+ close(mfd);
+ return buf;
}
/*
@@ -205,6 +215,18 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_i
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
0))
+#define test_cmd_hwpt_alloc_iommupt(device_id, pt_id, flags, iommupt_type, \
+ hwpt_id) \
+ ({ \
+ struct iommu_hwpt_selftest user_cfg = { \
+ .pagetable_type = iommupt_type \
+ }; \
+ \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc( \
+ self->fd, device_id, pt_id, 0, flags, \
+ hwpt_id, IOMMU_HWPT_DATA_SELFTEST, \
+ &user_cfg, sizeof(user_cfg))); \
+ })
#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \
EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \
self->fd, device_id, pt_id, 0, flags, \
@@ -538,6 +560,39 @@ static int _test_cmd_destroy_access_pages(int fd, unsigned int access_id,
EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages( \
self->fd, access_id, access_pages_id))
+static int _test_cmd_get_dmabuf(int fd, size_t len, int *out_fd)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DMABUF_GET,
+ .dmabuf_get = { .length = len, .open_flags = O_CLOEXEC },
+ };
+
+ *out_fd = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (*out_fd < 0)
+ return -1;
+ return 0;
+}
+#define test_cmd_get_dmabuf(len, out_fd) \
+ ASSERT_EQ(0, _test_cmd_get_dmabuf(self->fd, len, out_fd))
+
+static int _test_cmd_revoke_dmabuf(int fd, int dmabuf_fd, bool revoked)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DMABUF_REVOKE,
+ .dmabuf_revoke = { .dmabuf_fd = dmabuf_fd, .revoked = revoked },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret < 0)
+ return -1;
+ return 0;
+}
+#define test_cmd_revoke_dmabuf(dmabuf_fd, revoke) \
+ ASSERT_EQ(0, _test_cmd_revoke_dmabuf(self->fd, dmabuf_fd, revoke))
+
static int _test_ioctl_destroy(int fd, unsigned int id)
{
struct iommu_destroy cmd = {
@@ -708,6 +763,17 @@ static int _test_ioctl_ioas_map_file(int fd, unsigned int ioas_id, int mfd,
self->fd, ioas_id, mfd, start, length, iova_p, \
IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))
+#define test_ioctl_ioas_map_fixed_file(mfd, start, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ ASSERT_EQ(0, _test_ioctl_ioas_map_file( \
+ self->fd, self->ioas_id, mfd, start, \
+ length, &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit)
{
struct iommu_test_cmd memlimit_cmd = {
@@ -756,19 +822,24 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
#endif
/* @data can be NULL */
-static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
- size_t data_len, uint32_t *capabilities)
+static int _test_cmd_get_hw_info(int fd, __u32 device_id, __u32 data_type,
+ void *data, size_t data_len,
+ uint32_t *capabilities, uint8_t *max_pasid)
{
struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data;
struct iommu_hw_info cmd = {
.size = sizeof(cmd),
.dev_id = device_id,
.data_len = data_len,
+ .in_data_type = data_type,
.data_uptr = (uint64_t)data,
.out_capabilities = 0,
};
int ret;
+ if (data_type != IOMMU_HW_INFO_TYPE_DEFAULT)
+ cmd.flags |= IOMMU_HW_INFO_FLAG_INPUT_TYPE;
+
ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd);
if (ret)
return ret;
@@ -802,22 +873,33 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
assert(!info->flags);
}
+ if (max_pasid)
+ *max_pasid = cmd.out_max_pasid_log2;
+
if (capabilities)
*capabilities = cmd.out_capabilities;
return 0;
}
-#define test_cmd_get_hw_info(device_id, data, data_len) \
- ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \
- data_len, NULL))
+#define test_cmd_get_hw_info(device_id, data_type, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data_type, \
+ data, data_len, NULL, NULL))
+
+#define test_err_get_hw_info(_errno, device_id, data_type, data, data_len) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_get_hw_info(self->fd, device_id, data_type, \
+ data, data_len, NULL, NULL))
-#define test_err_get_hw_info(_errno, device_id, data, data_len) \
- EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \
- data_len, NULL))
+#define test_cmd_get_hw_capabilities(device_id, caps) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \
+ IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \
+ 0, &caps, NULL))
-#define test_cmd_get_hw_capabilities(device_id, caps, mask) \
- ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps))
+#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \
+ IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \
+ 0, NULL, max_pasid))
static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
{
@@ -842,14 +924,15 @@ static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
ASSERT_NE(0, *(fault_fd)); \
})
-static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd)
+static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 pasid,
+ __u32 fault_fd)
{
struct iommu_test_cmd trigger_iopf_cmd = {
.size = sizeof(trigger_iopf_cmd),
.op = IOMMU_TEST_OP_TRIGGER_IOPF,
.trigger_iopf = {
.dev_id = device_id,
- .pasid = 0x1,
+ .pasid = pasid,
.grpid = 0x2,
.perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE,
.addr = 0xdeadbeaf,
@@ -880,10 +963,14 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd)
}
#define test_cmd_trigger_iopf(device_id, fault_fd) \
- ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd))
+ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, 0x1, fault_fd))
+#define test_cmd_trigger_iopf_pasid(device_id, pasid, fault_fd) \
+ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, \
+ pasid, fault_fd))
static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
- __u32 type, __u32 flags, __u32 *viommu_id)
+ __u32 flags, __u32 type, void *data,
+ __u32 data_len, __u32 *viommu_id)
{
struct iommu_viommu_alloc cmd = {
.size = sizeof(cmd),
@@ -891,6 +978,8 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
.type = type,
.dev_id = device_id,
.hwpt_id = hwpt_id,
+ .data_uptr = (uint64_t)data,
+ .data_len = data_len,
};
int ret;
@@ -902,13 +991,15 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
return 0;
}
-#define test_cmd_viommu_alloc(device_id, hwpt_id, type, viommu_id) \
- ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \
- type, 0, viommu_id))
-#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, viommu_id) \
- EXPECT_ERRNO(_errno, \
- _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \
- type, 0, viommu_id))
+#define test_cmd_viommu_alloc(device_id, hwpt_id, type, data, data_len, \
+ viommu_id) \
+ ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \
+ type, data, data_len, viommu_id))
+#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, data, \
+ data_len, viommu_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \
+ type, data, data_len, viommu_id))
static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id,
__u64 virt_id, __u32 *vdev_id)
@@ -936,3 +1027,233 @@ static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id,
EXPECT_ERRNO(_errno, \
_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \
virt_id, vdev_id))
+
+static int _test_cmd_hw_queue_alloc(int fd, __u32 viommu_id, __u32 type,
+ __u32 idx, __u64 base_addr, __u64 length,
+ __u32 *hw_queue_id)
+{
+ struct iommu_hw_queue_alloc cmd = {
+ .size = sizeof(cmd),
+ .viommu_id = viommu_id,
+ .type = type,
+ .index = idx,
+ .nesting_parent_iova = base_addr,
+ .length = length,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HW_QUEUE_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (hw_queue_id)
+ *hw_queue_id = cmd.out_hw_queue_id;
+ return 0;
+}
+
+#define test_cmd_hw_queue_alloc(viommu_id, type, idx, base_addr, len, out_qid) \
+ ASSERT_EQ(0, _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \
+ base_addr, len, out_qid))
+#define test_err_hw_queue_alloc(_errno, viommu_id, type, idx, base_addr, len, \
+ out_qid) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \
+ base_addr, len, out_qid))
+
+static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type,
+ __u32 *veventq_id, __u32 *veventq_fd)
+{
+ struct iommu_veventq_alloc cmd = {
+ .size = sizeof(cmd),
+ .type = type,
+ .veventq_depth = 2,
+ .viommu_id = viommu_id,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_VEVENTQ_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (veventq_id)
+ *veventq_id = cmd.out_veventq_id;
+ if (veventq_fd)
+ *veventq_fd = cmd.out_veventq_fd;
+ return 0;
+}
+
+#define test_cmd_veventq_alloc(viommu_id, type, veventq_id, veventq_fd) \
+ ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+ veventq_id, veventq_fd))
+#define test_err_veventq_alloc(_errno, viommu_id, type, veventq_id, \
+ veventq_fd) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+ veventq_id, veventq_fd))
+
+static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents)
+{
+ struct iommu_test_cmd trigger_vevent_cmd = {
+ .size = sizeof(trigger_vevent_cmd),
+ .op = IOMMU_TEST_OP_TRIGGER_VEVENT,
+ .trigger_vevent = {
+ .dev_id = dev_id,
+ },
+ };
+
+ while (nvevents--) {
+ if (ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT),
+ &trigger_vevent_cmd))
+ return -1;
+ }
+ return 0;
+}
+
+#define test_cmd_trigger_vevents(dev_id, nvevents) \
+ ASSERT_EQ(0, _test_cmd_trigger_vevents(self->fd, dev_id, nvevents))
+
+static int _test_cmd_read_vevents(int fd, __u32 event_fd, __u32 nvevents,
+ __u32 virt_id, int *prev_seq)
+{
+ struct pollfd pollfd = { .fd = event_fd, .events = POLLIN };
+ struct iommu_viommu_event_selftest *event;
+ struct iommufd_vevent_header *hdr;
+ ssize_t bytes;
+ void *data;
+ int ret, i;
+
+ ret = poll(&pollfd, 1, 1000);
+ if (ret < 0)
+ return -1;
+
+ data = calloc(nvevents, sizeof(*hdr) + sizeof(*event));
+ if (!data) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ bytes = read(event_fd, data,
+ nvevents * (sizeof(*hdr) + sizeof(*event)));
+ if (bytes <= 0) {
+ errno = EFAULT;
+ ret = -1;
+ goto out_free;
+ }
+
+ for (i = 0; i < nvevents; i++) {
+ hdr = data + i * (sizeof(*hdr) + sizeof(*event));
+
+ if (hdr->flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS ||
+ hdr->sequence - *prev_seq > 1) {
+ *prev_seq = hdr->sequence;
+ errno = EOVERFLOW;
+ ret = -1;
+ goto out_free;
+ }
+ *prev_seq = hdr->sequence;
+ event = data + sizeof(*hdr);
+ if (event->virt_id != virt_id) {
+ errno = EINVAL;
+ ret = -1;
+ goto out_free;
+ }
+ }
+
+ ret = 0;
+out_free:
+ free(data);
+ return ret;
+}
+
+#define test_cmd_read_vevents(event_fd, nvevents, virt_id, prev_seq) \
+ ASSERT_EQ(0, _test_cmd_read_vevents(self->fd, event_fd, nvevents, \
+ virt_id, prev_seq))
+#define test_err_read_vevents(_errno, event_fd, nvevents, virt_id, prev_seq) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_read_vevents(self->fd, event_fd, nvevents, \
+ virt_id, prev_seq))
+
+static int _test_cmd_pasid_attach(int fd, __u32 stdev_id, __u32 pasid,
+ __u32 pt_id)
+{
+ struct iommu_test_cmd test_attach = {
+ .size = sizeof(test_attach),
+ .op = IOMMU_TEST_OP_PASID_ATTACH,
+ .id = stdev_id,
+ .pasid_attach = {
+ .pasid = pasid,
+ .pt_id = pt_id,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_ATTACH),
+ &test_attach);
+}
+
+#define test_cmd_pasid_attach(pasid, hwpt_id) \
+ ASSERT_EQ(0, _test_cmd_pasid_attach(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+#define test_err_pasid_attach(_errno, pasid, hwpt_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_pasid_attach(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+static int _test_cmd_pasid_replace(int fd, __u32 stdev_id, __u32 pasid,
+ __u32 pt_id)
+{
+ struct iommu_test_cmd test_replace = {
+ .size = sizeof(test_replace),
+ .op = IOMMU_TEST_OP_PASID_REPLACE,
+ .id = stdev_id,
+ .pasid_replace = {
+ .pasid = pasid,
+ .pt_id = pt_id,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_REPLACE),
+ &test_replace);
+}
+
+#define test_cmd_pasid_replace(pasid, hwpt_id) \
+ ASSERT_EQ(0, _test_cmd_pasid_replace(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+#define test_err_pasid_replace(_errno, pasid, hwpt_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_pasid_replace(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+static int _test_cmd_pasid_detach(int fd, __u32 stdev_id, __u32 pasid)
+{
+ struct iommu_test_cmd test_detach = {
+ .size = sizeof(test_detach),
+ .op = IOMMU_TEST_OP_PASID_DETACH,
+ .id = stdev_id,
+ .pasid_detach = {
+ .pasid = pasid,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_DETACH),
+ &test_detach);
+}
+
+#define test_cmd_pasid_detach(pasid) \
+ ASSERT_EQ(0, _test_cmd_pasid_detach(self->fd, self->stdev_id, pasid))
+
+static int test_cmd_pasid_check_hwpt(int fd, __u32 stdev_id, __u32 pasid,
+ __u32 hwpt_id)
+{
+ struct iommu_test_cmd test_pasid_check = {
+ .size = sizeof(test_pasid_check),
+ .op = IOMMU_TEST_OP_PASID_CHECK_HWPT,
+ .id = stdev_id,
+ .pasid_check = {
+ .pasid = pasid,
+ .hwpt_id = hwpt_id,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_CHECK_HWPT),
+ &test_pasid_check);
+}
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index e9dbb84c100a..e107379d185c 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -7,7 +7,7 @@
#include <sys/msg.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MAX_MSG_SIZE 32
@@ -39,26 +39,26 @@ int restore_queue(struct msgque_data *msgque)
fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
if (fd == -1) {
- printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+ ksft_test_result_fail("Failed to open /proc/sys/kernel/msg_next_id\n");
return -errno;
}
sprintf(buf, "%d", msgque->msq_id);
ret = write(fd, buf, strlen(buf));
if (ret != strlen(buf)) {
- printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+ ksft_test_result_fail("Failed to write to /proc/sys/kernel/msg_next_id\n");
return -errno;
}
id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
if (id == -1) {
- printf("Failed to create queue\n");
+ ksft_test_result_fail("Failed to create queue\n");
return -errno;
}
if (id != msgque->msq_id) {
- printf("Restored queue has wrong id (%d instead of %d)\n",
- id, msgque->msq_id);
+ ksft_test_result_fail("Restored queue has wrong id (%d instead of %d)\n"
+ , id, msgque->msq_id);
ret = -EFAULT;
goto destroy;
}
@@ -66,7 +66,7 @@ int restore_queue(struct msgque_data *msgque)
for (i = 0; i < msgque->qnum; i++) {
if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
msgque->messages[i].msize, IPC_NOWAIT) != 0) {
- printf("msgsnd failed (%m)\n");
+ ksft_test_result_fail("msgsnd failed (%m)\n");
ret = -errno;
goto destroy;
}
@@ -90,23 +90,22 @@ int check_and_destroy_queue(struct msgque_data *msgque)
if (ret < 0) {
if (errno == ENOMSG)
break;
- printf("Failed to read IPC message: %m\n");
+ ksft_test_result_fail("Failed to read IPC message: %m\n");
ret = -errno;
goto err;
}
if (ret != msgque->messages[cnt].msize) {
- printf("Wrong message size: %d (expected %d)\n", ret,
- msgque->messages[cnt].msize);
+ ksft_test_result_fail("Wrong message size: %d (expected %d)\n", ret, msgque->messages[cnt].msize);
ret = -EINVAL;
goto err;
}
if (message.mtype != msgque->messages[cnt].mtype) {
- printf("Wrong message type\n");
+ ksft_test_result_fail("Wrong message type\n");
ret = -EINVAL;
goto err;
}
if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
- printf("Wrong message content\n");
+ ksft_test_result_fail("Wrong message content\n");
ret = -EINVAL;
goto err;
}
@@ -114,7 +113,7 @@ int check_and_destroy_queue(struct msgque_data *msgque)
}
if (cnt != msgque->qnum) {
- printf("Wrong message number\n");
+ ksft_test_result_fail("Wrong message number\n");
ret = -EINVAL;
goto err;
}
@@ -139,7 +138,7 @@ int dump_queue(struct msgque_data *msgque)
if (ret < 0) {
if (errno == EINVAL)
continue;
- printf("Failed to get stats for IPC queue with id %d\n",
+ ksft_test_result_fail("Failed to get stats for IPC queue with id %d\n",
kern_id);
return -errno;
}
@@ -150,7 +149,7 @@ int dump_queue(struct msgque_data *msgque)
msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
if (msgque->messages == NULL) {
- printf("Failed to get stats for IPC queue\n");
+ ksft_test_result_fail("Failed to get stats for IPC queue\n");
return -ENOMEM;
}
@@ -162,7 +161,7 @@ int dump_queue(struct msgque_data *msgque)
ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
if (ret < 0) {
- printf("Failed to copy IPC message: %m (%d)\n", errno);
+ ksft_test_result_fail("Failed to copy IPC message: %m (%d)\n", errno);
return -errno;
}
msgque->messages[i].msize = ret;
@@ -178,7 +177,7 @@ int fill_msgque(struct msgque_data *msgque)
memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
IPC_NOWAIT) != 0) {
- printf("First message send failed (%m)\n");
+ ksft_test_result_fail("First message send failed (%m)\n");
return -errno;
}
@@ -186,7 +185,7 @@ int fill_msgque(struct msgque_data *msgque)
memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
IPC_NOWAIT) != 0) {
- printf("Second message send failed (%m)\n");
+ ksft_test_result_fail("Second message send failed (%m)\n");
return -errno;
}
return 0;
@@ -202,44 +201,44 @@ int main(int argc, char **argv)
msgque.key = ftok(argv[0], 822155650);
if (msgque.key == -1) {
- printf("Can't make key: %d\n", -errno);
+ ksft_test_result_fail("Can't make key: %d\n", -errno);
ksft_exit_fail();
}
msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
if (msgque.msq_id == -1) {
err = -errno;
- printf("Can't create queue: %d\n", err);
+ ksft_test_result_fail("Can't create queue: %d\n", err);
goto err_out;
}
err = fill_msgque(&msgque);
if (err) {
- printf("Failed to fill queue: %d\n", err);
+ ksft_test_result_fail("Failed to fill queue: %d\n", err);
goto err_destroy;
}
err = dump_queue(&msgque);
if (err) {
- printf("Failed to dump queue: %d\n", err);
+ ksft_test_result_fail("Failed to dump queue: %d\n", err);
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
- printf("Failed to check and destroy queue: %d\n", err);
+ ksft_test_result_fail("Failed to check and destroy queue: %d\n", err);
goto err_out;
}
err = restore_queue(&msgque);
if (err) {
- printf("Failed to restore queue: %d\n", err);
+ ksft_test_result_fail("Failed to restore queue: %d\n", err);
goto err_destroy;
}
err = check_and_destroy_queue(&msgque);
if (err) {
- printf("Failed to test queue: %d\n", err);
+ ksft_test_result_fail("Failed to test queue: %d\n", err);
goto err_out;
}
ksft_exit_pass();
diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c
index f4a15cbdd5ea..adfcf50b1264 100644
--- a/tools/testing/selftests/ir/ir_loopback.c
+++ b/tools/testing/selftests/ir/ir_loopback.c
@@ -23,7 +23,7 @@
#include <dirent.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define TEST_SCANCODES 10
#define SYSFS_PATH_MAX 256
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
index d7a8e321bb16..79aa438b7479 100644
--- a/tools/testing/selftests/kcmp/kcmp_test.c
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -18,7 +18,7 @@
#include <sys/wait.h>
#include <sys/epoll.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static long sys_kcmp(int pid1, int pid2, int type, unsigned long fd1, unsigned long fd2)
{
diff --git a/tools/testing/selftests/kexec/.gitignore b/tools/testing/selftests/kexec/.gitignore
new file mode 100644
index 000000000000..5f3d9e089ae8
--- /dev/null
+++ b/tools/testing/selftests/kexec/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_kexec_jump
diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile
index 67fe7a46cb62..874cfdd3b75b 100644
--- a/tools/testing/selftests/kexec/Makefile
+++ b/tools/testing/selftests/kexec/Makefile
@@ -8,6 +8,13 @@ ifeq ($(ARCH_PROCESSED),$(filter $(ARCH_PROCESSED),x86 ppc64le))
TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh
TEST_FILES := kexec_common_lib.sh
+include ../../../scripts/Makefile.arch
+
+ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86)
+TEST_PROGS += test_kexec_jump.sh
+TEST_GEN_PROGS := test_kexec_jump
+endif
+
include ../lib.mk
endif
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.c b/tools/testing/selftests/kexec/test_kexec_jump.c
new file mode 100644
index 000000000000..fbce287866f5
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.c
@@ -0,0 +1,72 @@
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <sys/reboot.h>
+#include <sys/syscall.h>
+
+asm(
+ " .code64\n"
+ " .data\n"
+ "purgatory_start:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start_b(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ // Same again
+ "purgatory_start_b:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ "purgatory_end:\n"
+ ".previous"
+);
+extern char purgatory_start[], purgatory_end[];
+
+int main (void)
+{
+ struct kexec_segment segment = {};
+ int ret;
+
+ segment.buf = purgatory_start;
+ segment.bufsz = purgatory_end - purgatory_start;
+ segment.mem = (void *)0x400000;
+ segment.memsz = 0x1000;
+ ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT);
+ if (ret) {
+ perror("kexec_load");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+ printf("Success\n");
+ return 0;
+}
+
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.sh b/tools/testing/selftests/kexec/test_kexec_jump.sh
new file mode 100755
index 000000000000..6ae977054ba2
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Prevent loading a kernel image via the kexec_load syscall when
+# signatures are required. (Dependent on CONFIG_IMA_ARCH_POLICY.)
+
+TEST="$0"
+. ./kexec_common_lib.sh
+
+# kexec requires root privileges
+require_root_privileges
+
+# get the kernel config
+get_kconfig
+
+kconfig_enabled "CONFIG_KEXEC_JUMP=y" "kexec_jump is enabled"
+if [ $? -eq 0 ]; then
+ log_skip "kexec_jump is not enabled"
+fi
+
+kconfig_enabled "CONFIG_IMA_APPRAISE=y" "IMA enabled"
+ima_appraise=$?
+
+kconfig_enabled "CONFIG_IMA_ARCH_POLICY=y" \
+ "IMA architecture specific policy enabled"
+arch_policy=$?
+
+get_secureboot_mode
+secureboot=$?
+
+if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ]; then
+ log_skip "Secure boot and CONFIG_IMA_ARCH_POLICY are enabled"
+fi
+
+./test_kexec_jump
+if [ $? -eq 0 ]; then
+ log_pass "kexec_jump succeeded"
+else
+ # The more likely failure mode if anything went wrong is that the
+ # kernel just crashes. But if we get back here, sure, whine anyway.
+ log_fail "kexec_jump failed"
+fi
diff --git a/tools/testing/selftests/kho/arm64.conf b/tools/testing/selftests/kho/arm64.conf
new file mode 100644
index 000000000000..ee696807cd35
--- /dev/null
+++ b/tools/testing/selftests/kho/arm64.conf
@@ -0,0 +1,9 @@
+QEMU_CMD="qemu-system-aarch64 -M virt -cpu max"
+QEMU_KCONFIG="
+CONFIG_SERIAL_AMBA_PL010=y
+CONFIG_SERIAL_AMBA_PL010_CONSOLE=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+"
+KERNEL_IMAGE="Image"
+KERNEL_CMDLINE="console=ttyAMA0"
diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c
new file mode 100644
index 000000000000..6d9e91d55d68
--- /dev/null
+++ b/tools/testing/selftests/kho/init.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/mount.h>
+#include <sys/reboot.h>
+#include <linux/kexec.h>
+
+/* from arch/x86/include/asm/setup.h */
+#define COMMAND_LINE_SIZE 2048
+
+#define KHO_FINALIZE "/debugfs/kho/out/finalize"
+#define KERNEL_IMAGE "/kernel"
+
+static int mount_filesystems(void)
+{
+ if (mount("debugfs", "/debugfs", "debugfs", 0, NULL) < 0)
+ return -1;
+
+ return mount("proc", "/proc", "proc", 0, NULL);
+}
+
+static int kho_enable(void)
+{
+ const char enable[] = "1";
+ int fd;
+
+ fd = open(KHO_FINALIZE, O_RDWR);
+ if (fd < 0)
+ return -1;
+
+ if (write(fd, enable, sizeof(enable)) != sizeof(enable))
+ return 1;
+
+ close(fd);
+ return 0;
+}
+
+static long kexec_file_load(int kernel_fd, int initrd_fd,
+ unsigned long cmdline_len, const char *cmdline,
+ unsigned long flags)
+{
+ return syscall(__NR_kexec_file_load, kernel_fd, initrd_fd, cmdline_len,
+ cmdline, flags);
+}
+
+static int kexec_load(void)
+{
+ char cmdline[COMMAND_LINE_SIZE];
+ ssize_t len;
+ int fd, err;
+
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ len = read(fd, cmdline, sizeof(cmdline));
+ close(fd);
+ if (len < 0)
+ return -1;
+
+ /* replace \n with \0 */
+ cmdline[len - 1] = 0;
+ fd = open(KERNEL_IMAGE, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ err = kexec_file_load(fd, -1, len, cmdline, KEXEC_FILE_NO_INITRAMFS);
+ close(fd);
+
+ return err ? : 0;
+}
+
+int main(int argc, char *argv[])
+{
+ if (mount_filesystems())
+ goto err_reboot;
+
+ if (kho_enable())
+ goto err_reboot;
+
+ if (kexec_load())
+ goto err_reboot;
+
+ if (reboot(RB_KEXEC))
+ goto err_reboot;
+
+ return 0;
+
+err_reboot:
+ reboot(RB_AUTOBOOT);
+ return -1;
+}
diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh
new file mode 100755
index 000000000000..49fdac8e8b15
--- /dev/null
+++ b/tools/testing/selftests/kho/vmtest.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -ue
+
+CROSS_COMPILE="${CROSS_COMPILE:-""}"
+
+test_dir=$(realpath "$(dirname "$0")")
+kernel_dir=$(realpath "$test_dir/../../../..")
+
+tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX)
+headers_dir="$tmp_dir/usr"
+initrd="$tmp_dir/initrd.cpio"
+
+source "$test_dir/../kselftest/ktap_helpers.sh"
+
+function usage() {
+ cat <<EOF
+$0 [-d build_dir] [-j jobs] [-t target_arch] [-h]
+Options:
+ -d) path to the kernel build directory
+ -j) number of jobs for compilation, similar to -j in make
+ -t) run test for target_arch, requires CROSS_COMPILE set
+ supported targets: aarch64, x86_64
+ -h) display this help
+EOF
+}
+
+function cleanup() {
+ rm -fr "$tmp_dir"
+ ktap_finished
+}
+trap cleanup EXIT
+
+function skip() {
+ local msg=${1:-""}
+
+ ktap_test_skip "$msg"
+ exit "$KSFT_SKIP"
+}
+
+function fail() {
+ local msg=${1:-""}
+
+ ktap_test_fail "$msg"
+ exit "$KSFT_FAIL"
+}
+
+function build_kernel() {
+ local build_dir=$1
+ local make_cmd=$2
+ local arch_kconfig=$3
+ local kimage=$4
+
+ local kho_config="$tmp_dir/kho.config"
+ local kconfig="$build_dir/.config"
+
+ # enable initrd, KHO and KHO test in kernel configuration
+ tee "$kconfig" > "$kho_config" <<EOF
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KEXEC_HANDOVER=y
+CONFIG_KEXEC_HANDOVER_DEBUGFS=y
+CONFIG_TEST_KEXEC_HANDOVER=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_VM=y
+$arch_kconfig
+EOF
+
+ make_cmd="$make_cmd -C $kernel_dir O=$build_dir"
+ $make_cmd olddefconfig
+
+ # verify that kernel confiration has all necessary options
+ while read -r opt ; do
+ grep "$opt" "$kconfig" &>/dev/null || skip "$opt is missing"
+ done < "$kho_config"
+
+ $make_cmd "$kimage"
+ $make_cmd headers_install INSTALL_HDR_PATH="$headers_dir"
+}
+
+function mkinitrd() {
+ local kernel=$1
+
+ "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -nostdlib \
+ -fno-asynchronous-unwind-tables -fno-ident \
+ -I "$headers_dir/include" \
+ -I "$kernel_dir/tools/include/nolibc" \
+ -o "$tmp_dir/init" "$test_dir/init.c"
+
+ cat > "$tmp_dir/cpio_list" <<EOF
+dir /dev 0755 0 0
+dir /proc 0755 0 0
+dir /debugfs 0755 0 0
+nod /dev/console 0600 0 0 c 5 1
+file /init $tmp_dir/init 0755 0 0
+file /kernel $kernel 0644 0 0
+EOF
+
+ "$build_dir/usr/gen_init_cpio" "$tmp_dir/cpio_list" > "$initrd"
+}
+
+function run_qemu() {
+ local qemu_cmd=$1
+ local cmdline=$2
+ local kernel=$3
+ local serial="$tmp_dir/qemu.serial"
+
+ cmdline="$cmdline kho=on panic=-1"
+
+ $qemu_cmd -m 1G -smp 2 -no-reboot -nographic -nodefaults \
+ -accel kvm -accel hvf -accel tcg \
+ -serial file:"$serial" \
+ -append "$cmdline" \
+ -kernel "$kernel" \
+ -initrd "$initrd"
+
+ grep "KHO restore succeeded" "$serial" &> /dev/null || fail "KHO failed"
+}
+
+function target_to_arch() {
+ local target=$1
+
+ case $target in
+ aarch64) echo "arm64" ;;
+ x86_64) echo "x86" ;;
+ *) skip "architecture $target is not supported"
+ esac
+}
+
+function main() {
+ local build_dir="$kernel_dir/.kho"
+ local jobs=$(($(nproc) * 2))
+ local target="$(uname -m)"
+
+ # skip the test if any of the preparation steps fails
+ set -o errtrace
+ trap skip ERR
+
+ while getopts 'hd:j:t:' opt; do
+ case $opt in
+ d)
+ build_dir="$OPTARG"
+ ;;
+ j)
+ jobs="$OPTARG"
+ ;;
+ t)
+ target="$OPTARG"
+ ;;
+ h)
+ usage
+ exit 0
+ ;;
+ *)
+ echo Unknown argument "$opt"
+ usage
+ exit 1
+ ;;
+ esac
+ done
+
+ ktap_print_header
+ ktap_set_plan 1
+
+ if [[ "$target" != "$(uname -m)" ]] && [[ -z "$CROSS_COMPILE" ]]; then
+ skip "Cross-platform testing needs to specify CROSS_COMPILE"
+ fi
+
+ mkdir -p "$build_dir"
+ local arch=$(target_to_arch "$target")
+ source "$test_dir/$arch.conf"
+
+ # build the kernel and create initrd
+ # initrd includes the kernel image that will be kexec'ed
+ local make_cmd="make ARCH=$arch CROSS_COMPILE=$CROSS_COMPILE -j$jobs"
+ build_kernel "$build_dir" "$make_cmd" "$QEMU_KCONFIG" "$KERNEL_IMAGE"
+
+ local kernel="$build_dir/arch/$arch/boot/$KERNEL_IMAGE"
+ mkinitrd "$kernel"
+
+ run_qemu "$QEMU_CMD" "$KERNEL_CMDLINE" "$kernel"
+
+ ktap_test_pass "KHO succeeded"
+}
+
+main "$@"
diff --git a/tools/testing/selftests/kho/x86.conf b/tools/testing/selftests/kho/x86.conf
new file mode 100644
index 000000000000..b419e610ca22
--- /dev/null
+++ b/tools/testing/selftests/kho/x86.conf
@@ -0,0 +1,7 @@
+QEMU_CMD=qemu-system-x86_64
+QEMU_KCONFIG="
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+"
+KERNEL_IMAGE="bzImage"
+KERNEL_CMDLINE="console=ttyS0"
diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config
index 259f4fd6b5e2..1f1e63494af9 100644
--- a/tools/testing/selftests/kmod/config
+++ b/tools/testing/selftests/kmod/config
@@ -1,7 +1,2 @@
CONFIG_TEST_KMOD=m
CONFIG_TEST_LKM=m
-CONFIG_XFS_FS=m
-
-# For the module parameter force_init_test is used
-CONFIG_TUN=m
-CONFIG_BTRFS_FS=m
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index cdf91b0ca40f..afbcf8412ae5 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -54,6 +54,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <sys/utsname.h>
@@ -92,6 +93,14 @@
#endif
#define __printf(a, b) __attribute__((format(printf, a, b)))
+#ifndef __always_unused
+#define __always_unused __attribute__((__unused__))
+#endif
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
/* counters */
struct ksft_count {
unsigned int ksft_pass;
@@ -104,6 +113,7 @@ struct ksft_count {
static struct ksft_count ksft_cnt;
static unsigned int ksft_plan;
+static bool ksft_debug_enabled;
static inline unsigned int ksft_test_num(void)
{
@@ -175,6 +185,18 @@ static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...)
va_end(args);
}
+static inline void ksft_print_dbg_msg(const char *msg, ...)
+{
+ va_list args;
+
+ if (!ksft_debug_enabled)
+ return;
+
+ va_start(args, msg);
+ ksft_print_msg(msg, args);
+ va_end(args);
+}
+
static inline void ksft_perror(const char *msg)
{
ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
@@ -444,10 +466,6 @@ static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ...
static inline int ksft_min_kernel_version(unsigned int min_major,
unsigned int min_minor)
{
-#ifdef NOLIBC
- ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
- return 0;
-#else
unsigned int major, minor;
struct utsname info;
@@ -455,7 +473,6 @@ static inline int ksft_min_kernel_version(unsigned int min_major,
ksft_exit_fail_msg("Can't parse kernel version\n");
return major > min_major || (major == min_major && minor >= min_minor);
-#endif
}
#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest/module.sh b/tools/testing/selftests/kselftest/module.sh
index fb4733faff12..51fb65159932 100755
--- a/tools/testing/selftests/kselftest/module.sh
+++ b/tools/testing/selftests/kselftest/module.sh
@@ -11,7 +11,7 @@
# SPDX-License-Identifier: GPL-2.0+
# $(dirname $0)/../kselftest/module.sh "description" module_name
#
-# Example: tools/testing/selftests/lib/printf.sh
+# Example: tools/testing/selftests/lib/bitmap.sh
desc="" # Output prefix.
module="" # Filename (without the .ko).
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 2c3c58e65a41..3a62039fa621 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -44,6 +44,12 @@ tap_timeout()
fi
}
+report_failure()
+{
+ echo "not ok $*"
+ echo "$*" >> "$kselftest_failures_file"
+}
+
run_one()
{
DIR="$1"
@@ -105,7 +111,7 @@ run_one()
echo "# $TEST_HDR_MSG"
if [ ! -e "$TEST" ]; then
echo "# Warning: file $TEST is missing!"
- echo "not ok $test_num $TEST_HDR_MSG"
+ report_failure "$test_num $TEST_HDR_MSG"
else
if [ -x /usr/bin/stdbuf ]; then
stdbuf="/usr/bin/stdbuf --output=L "
@@ -123,7 +129,7 @@ run_one()
interpreter=$(head -n 1 "$TEST" | cut -c 3-)
cmd="$stdbuf $interpreter ./$BASENAME_TEST"
else
- echo "not ok $test_num $TEST_HDR_MSG"
+ report_failure "$test_num $TEST_HDR_MSG"
return
fi
fi
@@ -137,9 +143,9 @@ run_one()
echo "ok $test_num $TEST_HDR_MSG # SKIP"
elif [ $rc -eq $timeout_rc ]; then \
echo "#"
- echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds"
+ report_failure "$test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds"
else
- echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
+ report_failure "$test_num $TEST_HDR_MSG # exit=$rc"
fi)
cd - >/dev/null
fi
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 666c9fde76da..baae6b7ded41 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -14,7 +14,7 @@
*
* .. code-block:: c
*
- * #include "../kselftest_harness.h"
+ * #include "kselftest_harness.h"
*
* TEST(standalone_test) {
* do_some_stuff;
@@ -56,6 +56,8 @@
#include <asm/types.h>
#include <ctype.h>
#include <errno.h>
+#include <linux/unistd.h>
+#include <poll.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -65,7 +67,6 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
-#include <setjmp.h>
#include "kselftest.h"
@@ -172,14 +173,11 @@
#define __TEST_IMPL(test_name, _signal) \
static void test_name(struct __test_metadata *_metadata); \
- static inline void wrapper_##test_name( \
+ static void wrapper_##test_name( \
struct __test_metadata *_metadata, \
- struct __fixture_variant_metadata *variant) \
+ struct __fixture_variant_metadata __attribute__((unused)) *variant) \
{ \
- _metadata->setup_completed = true; \
- if (setjmp(_metadata->env) == 0) \
- test_name(_metadata); \
- __test_check_assert(_metadata); \
+ test_name(_metadata); \
} \
static struct __test_metadata _##test_name##_object = \
{ .name = #test_name, \
@@ -258,7 +256,7 @@
* A bare "return;" statement may be used to return early.
*/
#define FIXTURE_SETUP(fixture_name) \
- void fixture_name##_setup( \
+ static void fixture_name##_setup( \
struct __test_metadata __attribute__((unused)) *_metadata, \
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
const FIXTURE_VARIANT(fixture_name) \
@@ -307,7 +305,7 @@
__FIXTURE_TEARDOWN(fixture_name)
#define __FIXTURE_TEARDOWN(fixture_name) \
- void fixture_name##_teardown( \
+ static void fixture_name##_teardown( \
struct __test_metadata __attribute__((unused)) *_metadata, \
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
const FIXTURE_VARIANT(fixture_name) \
@@ -401,7 +399,7 @@
struct __test_metadata *_metadata, \
FIXTURE_DATA(fixture_name) *self, \
const FIXTURE_VARIANT(fixture_name) *variant); \
- static inline void wrapper_##fixture_name##_##test_name( \
+ static void wrapper_##fixture_name##_##test_name( \
struct __test_metadata *_metadata, \
struct __fixture_variant_metadata *variant) \
{ \
@@ -410,9 +408,9 @@
pid_t child = 1; \
int status = 0; \
/* Makes sure there is only one teardown, even when child forks again. */ \
- bool *teardown = mmap(NULL, sizeof(*teardown), \
+ _metadata->no_teardown = mmap(NULL, sizeof(*_metadata->no_teardown), \
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
- *teardown = false; \
+ *_metadata->no_teardown = true; \
if (sizeof(*self) > 0) { \
if (fixture_name##_teardown_parent) { \
self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \
@@ -422,31 +420,26 @@
self = &self_private; \
} \
} \
- if (setjmp(_metadata->env) == 0) { \
- /* _metadata and potentially self are shared with all forks. */ \
- child = fork(); \
- if (child == 0) { \
- fixture_name##_setup(_metadata, self, variant->data); \
- /* Let setup failure terminate early. */ \
- if (_metadata->exit_code) \
- _exit(0); \
- _metadata->setup_completed = true; \
- fixture_name##_##test_name(_metadata, self, variant->data); \
- } else if (child < 0 || child != waitpid(child, &status, 0)) { \
- ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
- _metadata->exit_code = KSFT_FAIL; \
- } \
- } \
+ _metadata->variant = variant->data; \
+ _metadata->self = self; \
+ /* _metadata and potentially self are shared with all forks. */ \
+ child = fork(); \
if (child == 0) { \
- if (_metadata->setup_completed && !fixture_name##_teardown_parent && \
- __sync_bool_compare_and_swap(teardown, false, true)) \
- fixture_name##_teardown(_metadata, self, variant->data); \
+ fixture_name##_setup(_metadata, self, variant->data); \
+ /* Let setup failure terminate early. */ \
+ if (_metadata->exit_code) \
+ _exit(0); \
+ *_metadata->no_teardown = false; \
+ fixture_name##_##test_name(_metadata, self, variant->data); \
+ _metadata->teardown_fn(false, _metadata, self, variant->data); \
_exit(0); \
+ } else if (child < 0 || child != waitpid(child, &status, 0)) { \
+ ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
+ _metadata->exit_code = KSFT_FAIL; \
} \
- if (_metadata->setup_completed && fixture_name##_teardown_parent && \
- __sync_bool_compare_and_swap(teardown, false, true)) \
- fixture_name##_teardown(_metadata, self, variant->data); \
- munmap(teardown, sizeof(*teardown)); \
+ _metadata->teardown_fn(true, _metadata, self, variant->data); \
+ munmap(_metadata->no_teardown, sizeof(*_metadata->no_teardown)); \
+ _metadata->no_teardown = NULL; \
if (self && fixture_name##_teardown_parent) \
munmap(self, sizeof(*self)); \
if (WIFEXITED(status)) { \
@@ -456,7 +449,14 @@
/* Forward signal to __wait_for_test(). */ \
kill(getpid(), WTERMSIG(status)); \
} \
- __test_check_assert(_metadata); \
+ } \
+ static void wrapper_##fixture_name##_##test_name##_teardown( \
+ bool in_parent, struct __test_metadata *_metadata, \
+ void *self, const void *variant) \
+ { \
+ if (fixture_name##_teardown_parent == in_parent && \
+ !__atomic_test_and_set(_metadata->no_teardown, __ATOMIC_RELAXED)) \
+ fixture_name##_teardown(_metadata, self, variant); \
} \
static struct __test_metadata *_##fixture_name##_##test_name##_object; \
static void __attribute__((constructor)) \
@@ -467,6 +467,7 @@
object->name = #test_name; \
object->fn = &wrapper_##fixture_name##_##test_name; \
object->fixture = &_##fixture_name##_fixture_object; \
+ object->teardown_fn = &wrapper_##fixture_name##_##test_name##_teardown; \
object->termsig = signal; \
object->timeout = tmout; \
_##fixture_name##_##test_name##_object = object; \
@@ -750,7 +751,7 @@
for (; _metadata->trigger; _metadata->trigger = \
__bail(_assert, _metadata))
-#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
+#define is_signed_var(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
/* Avoid multiple evaluation of the cases */ \
@@ -758,7 +759,7 @@
__typeof__(_seen) __seen = (_seen); \
if (!(__exp _t __seen)) { \
/* Report with actual signedness to avoid weird output. */ \
- switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \
+ switch (is_signed_var(__exp) * 2 + is_signed_var(__seen)) { \
case 0: { \
uintmax_t __exp_print = (uintmax_t)__exp; \
uintmax_t __seen_print = (uintmax_t)__seen; \
@@ -910,14 +911,16 @@ struct __test_metadata {
struct __fixture_variant_metadata *);
pid_t pid; /* pid of test when being run */
struct __fixture_metadata *fixture;
+ void (*teardown_fn)(bool in_parent, struct __test_metadata *_metadata,
+ void *self, const void *variant);
int termsig;
int exit_code;
int trigger; /* extra handler after the evaluation */
int timeout; /* seconds to wait for test timeout */
- bool timed_out; /* did this test timeout instead of exiting? */
bool aborted; /* stopped test due to failed ASSERT */
- bool setup_completed; /* did setup finish? */
- jmp_buf env; /* for exiting out of test early */
+ bool *no_teardown; /* fixture needs teardown */
+ void *self;
+ const void *variant;
struct __test_results *results;
struct __test_metadata *prev, *next;
};
@@ -951,88 +954,60 @@ static inline int __bail(int for_realz, struct __test_metadata *t)
{
/* if this is ASSERT, return immediately. */
if (for_realz) {
- t->aborted = true;
- longjmp(t->env, 1);
+ if (t->teardown_fn)
+ t->teardown_fn(false, t, t->self, t->variant);
+ abort();
}
/* otherwise, end the for loop and continue. */
return 0;
}
-static inline void __test_check_assert(struct __test_metadata *t)
+static void __wait_for_test(struct __test_metadata *t)
{
- if (t->aborted)
- abort();
-}
-
-struct __test_metadata *__active_test;
-static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
-{
- struct __test_metadata *t = __active_test;
-
- /* Sanity check handler execution environment. */
- if (!t) {
- fprintf(TH_LOG_STREAM,
- "# no active test in SIGALRM handler!?\n");
- abort();
- }
- if (sig != SIGALRM || sig != info->si_signo) {
- fprintf(TH_LOG_STREAM,
- "# %s: SIGALRM handler caught signal %d!?\n",
- t->name, sig != SIGALRM ? sig : info->si_signo);
- abort();
- }
-
- t->timed_out = true;
- // signal process group
- kill(-(t->pid), SIGKILL);
-}
-
-void __wait_for_test(struct __test_metadata *t)
-{
- struct sigaction action = {
- .sa_sigaction = __timeout_handler,
- .sa_flags = SA_SIGINFO,
- };
- struct sigaction saved_action;
/*
* Sets status so that WIFEXITED(status) returns true and
* WEXITSTATUS(status) returns KSFT_FAIL. This safe default value
* should never be evaluated because of the waitpid(2) check and
- * SIGALRM handling.
+ * timeout handling.
*/
int status = KSFT_FAIL << 8;
- int child;
+ struct pollfd poll_child;
+ int ret, child, childfd;
+ bool timed_out = false;
- if (sigaction(SIGALRM, &action, &saved_action)) {
+ childfd = syscall(__NR_pidfd_open, t->pid, 0);
+ if (childfd == -1) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: unable to install SIGALRM handler\n",
+ "# %s: unable to open pidfd\n",
t->name);
return;
}
- __active_test = t;
- t->timed_out = false;
- alarm(t->timeout);
- child = waitpid(t->pid, &status, 0);
- if (child == -1 && errno != EINTR) {
+
+ poll_child.fd = childfd;
+ poll_child.events = POLLIN;
+ ret = poll(&poll_child, 1, t->timeout * 1000);
+ if (ret == -1) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: Failed to wait for PID %d (errno: %d)\n",
- t->name, t->pid, errno);
+ "# %s: unable to wait on child pidfd\n",
+ t->name);
return;
+ } else if (ret == 0) {
+ timed_out = true;
+ /* signal process group */
+ kill(-(t->pid), SIGKILL);
}
-
- alarm(0);
- if (sigaction(SIGALRM, &saved_action, NULL)) {
+ child = waitpid(t->pid, &status, WNOHANG);
+ if (child == -1 && errno != EINTR) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: unable to uninstall SIGALRM handler\n",
- t->name);
+ "# %s: Failed to wait for PID %d (errno: %d)\n",
+ t->name, t->pid, errno);
return;
}
- __active_test = NULL;
- if (t->timed_out) {
+ if (timed_out) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by timeout\n", t->name);
@@ -1116,7 +1091,7 @@ static int test_harness_argv_check(int argc, char **argv)
{
int opt;
- while ((opt = getopt(argc, argv, "hlF:f:V:v:t:T:r:")) != -1) {
+ while ((opt = getopt(argc, argv, "dhlF:f:V:v:t:T:r:")) != -1) {
switch (opt) {
case 'f':
case 'F':
@@ -1129,12 +1104,16 @@ static int test_harness_argv_check(int argc, char **argv)
case 'l':
test_harness_list_tests();
return KSFT_SKIP;
+ case 'd':
+ ksft_debug_enabled = true;
+ break;
case 'h':
default:
fprintf(stderr,
- "Usage: %s [-h|-l] [-t|-T|-v|-V|-f|-F|-r name]\n"
+ "Usage: %s [-h|-l|-d] [-t|-T|-v|-V|-f|-F|-r name]\n"
"\t-h print help\n"
"\t-l list all tests\n"
+ "\t-d enable debug prints\n"
"\n"
"\t-t name include test\n"
"\t-T name exclude test\n"
@@ -1167,8 +1146,9 @@ static bool test_enabled(int argc, char **argv,
int opt;
optind = 1;
- while ((opt = getopt(argc, argv, "F:f:V:v:t:T:r:")) != -1) {
- has_positive |= islower(opt);
+ while ((opt = getopt(argc, argv, "dF:f:V:v:t:T:r:")) != -1) {
+ if (opt != 'd')
+ has_positive |= islower(opt);
switch (tolower(opt)) {
case 't':
@@ -1205,9 +1185,9 @@ static bool test_enabled(int argc, char **argv,
return !has_positive;
}
-void __run_test(struct __fixture_metadata *f,
- struct __fixture_variant_metadata *variant,
- struct __test_metadata *t)
+static void __run_test(struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *variant,
+ struct __test_metadata *t)
{
struct __test_xfail *xfail;
char test_name[1024];
@@ -1218,8 +1198,7 @@ void __run_test(struct __fixture_metadata *f,
t->exit_code = KSFT_PASS;
t->trigger = 0;
t->aborted = false;
- t->setup_completed = false;
- memset(t->env, 0, sizeof(t->env));
+ t->no_teardown = NULL;
memset(t->results->reason, 0, sizeof(t->results->reason));
snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
diff --git a/tools/testing/selftests/kselftest_harness/.gitignore b/tools/testing/selftests/kselftest_harness/.gitignore
new file mode 100644
index 000000000000..e4e476a333c9
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/.gitignore
@@ -0,0 +1,2 @@
+/harness-selftest
+/harness-selftest.seen
diff --git a/tools/testing/selftests/kselftest_harness/Makefile b/tools/testing/selftests/kselftest_harness/Makefile
new file mode 100644
index 000000000000..d2369c01701a
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS_EXTENDED := harness-selftest
+TEST_PROGS := harness-selftest.sh
+TEST_FILES := harness-selftest.expected
+EXTRA_CLEAN := harness-selftest.seen
+
+include ../lib.mk
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.c b/tools/testing/selftests/kselftest_harness/harness-selftest.c
new file mode 100644
index 000000000000..7820bb5d0e6d
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+
+#include <sys/resource.h>
+#include <sys/prctl.h>
+
+/* Avoid any inconsistencies */
+#define TH_LOG_STREAM stdout
+
+#include "kselftest_harness.h"
+
+static void test_helper(struct __test_metadata *_metadata)
+{
+ ASSERT_EQ(0, 0);
+}
+
+TEST(standalone_pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ EXPECT_EQ(0, 0);
+ test_helper(_metadata);
+ TH_LOG("after");
+}
+
+TEST(standalone_fail) {
+ TH_LOG("before");
+ EXPECT_EQ(0, 0);
+ EXPECT_EQ(0, 1);
+ ASSERT_EQ(0, 1);
+ TH_LOG("after");
+}
+
+TEST_SIGNAL(signal_pass, SIGUSR1) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+ kill(getpid(), SIGUSR1);
+}
+
+TEST_SIGNAL(signal_fail, SIGUSR1) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 1);
+ TH_LOG("after");
+ kill(getpid(), SIGUSR1);
+}
+
+FIXTURE(fixture) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+}
+
+FIXTURE_TEARDOWN(fixture) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ test_helper(_metadata);
+ standalone_pass(_metadata);
+ TH_LOG("after");
+}
+
+TEST_F(fixture, fail) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 1);
+ fixture_pass(_metadata, self, variant);
+ TH_LOG("after");
+}
+
+TEST_F_TIMEOUT(fixture, timeout, 1) {
+ TH_LOG("before");
+ sleep(2);
+ TH_LOG("after");
+}
+
+FIXTURE(fixture_parent) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture_parent) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+}
+
+FIXTURE_TEARDOWN_PARENT(fixture_parent) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture_parent, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+}
+
+FIXTURE(fixture_setup_failure) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture_setup_failure) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+ ASSERT_EQ(0, 1);
+}
+
+FIXTURE_TEARDOWN(fixture_setup_failure) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture_setup_failure, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+}
+
+int main(int argc, char **argv)
+{
+ /*
+ * The harness uses abort() to signal assertion failures, which triggers coredumps.
+ * This may be useful to debug real failures but not for this selftest, disable them.
+ */
+ struct rlimit rlimit = {
+ .rlim_cur = 0,
+ .rlim_max = 0,
+ };
+
+ prctl(PR_SET_DUMPABLE, 0, 0, 0, 0);
+ setrlimit(RLIMIT_CORE, &rlimit);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.expected b/tools/testing/selftests/kselftest_harness/harness-selftest.expected
new file mode 100644
index 000000000000..97e1418c1c7e
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.expected
@@ -0,0 +1,64 @@
+TAP version 13
+1..9
+# Starting 9 tests from 4 test cases.
+# RUN global.standalone_pass ...
+# harness-selftest.c:19:standalone_pass:before
+# harness-selftest.c:23:standalone_pass:after
+# OK global.standalone_pass
+ok 1 global.standalone_pass
+# RUN global.standalone_fail ...
+# harness-selftest.c:27:standalone_fail:before
+# harness-selftest.c:29:standalone_fail:Expected 0 (0) == 1 (1)
+# harness-selftest.c:30:standalone_fail:Expected 0 (0) == 1 (1)
+# standalone_fail: Test terminated by assertion
+# FAIL global.standalone_fail
+not ok 2 global.standalone_fail
+# RUN global.signal_pass ...
+# harness-selftest.c:35:signal_pass:before
+# harness-selftest.c:37:signal_pass:after
+# OK global.signal_pass
+ok 3 global.signal_pass
+# RUN global.signal_fail ...
+# harness-selftest.c:42:signal_fail:before
+# harness-selftest.c:43:signal_fail:Expected 0 (0) == 1 (1)
+# signal_fail: Test terminated by assertion
+# FAIL global.signal_fail
+not ok 4 global.signal_fail
+# RUN fixture.pass ...
+# harness-selftest.c:53:pass:setup
+# harness-selftest.c:62:pass:before
+# harness-selftest.c:19:pass:before
+# harness-selftest.c:23:pass:after
+# harness-selftest.c:66:pass:after
+# harness-selftest.c:58:pass:teardown same-process=1
+# OK fixture.pass
+ok 5 fixture.pass
+# RUN fixture.fail ...
+# harness-selftest.c:53:fail:setup
+# harness-selftest.c:70:fail:before
+# harness-selftest.c:71:fail:Expected 0 (0) == 1 (1)
+# harness-selftest.c:58:fail:teardown same-process=1
+# fail: Test terminated by assertion
+# FAIL fixture.fail
+not ok 6 fixture.fail
+# RUN fixture.timeout ...
+# harness-selftest.c:53:timeout:setup
+# harness-selftest.c:77:timeout:before
+# timeout: Test terminated by timeout
+# FAIL fixture.timeout
+not ok 7 fixture.timeout
+# RUN fixture_parent.pass ...
+# harness-selftest.c:87:pass:setup
+# harness-selftest.c:96:pass:before
+# harness-selftest.c:98:pass:after
+# harness-selftest.c:92:pass:teardown same-process=0
+# OK fixture_parent.pass
+ok 8 fixture_parent.pass
+# RUN fixture_setup_failure.pass ...
+# harness-selftest.c:106:pass:setup
+# harness-selftest.c:108:pass:Expected 0 (0) == 1 (1)
+# pass: Test terminated by assertion
+# FAIL fixture_setup_failure.pass
+not ok 9 fixture_setup_failure.pass
+# FAILED: 4 / 9 tests passed.
+# Totals: pass:4 fail:5 xfail:0 xpass:0 skip:0 error:0
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.sh b/tools/testing/selftests/kselftest_harness/harness-selftest.sh
new file mode 100755
index 000000000000..fe72d16370fe
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Selftest for kselftest_harness.h
+#
+
+set -e
+
+DIR="$(dirname $(readlink -f "$0"))"
+
+"$DIR"/harness-selftest > harness-selftest.seen || true
+
+diff -u "$DIR"/harness-selftest.expected harness-selftest.seen
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 20af35a91d6f..f2b223072b62 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -3,10 +3,10 @@ top_srcdir = ../../../..
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
+ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch))
# Top-level selftests allows ARCH=x86_64 :-(
ifeq ($(ARCH),x86_64)
- ARCH := x86
+ override ARCH := x86
endif
include Makefile.kvm
else
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 4277b983cace..ba5c2b643efa 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -8,6 +8,7 @@ LIBKVM += lib/elf.c
LIBKVM += lib/guest_modes.c
LIBKVM += lib/io.c
LIBKVM += lib/kvm_util.c
+LIBKVM += lib/lru_gen_util.c
LIBKVM += lib/memstress.c
LIBKVM += lib/guest_sprintf.c
LIBKVM += lib/rbtree.c
@@ -47,15 +48,31 @@ LIBKVM_riscv += lib/riscv/handlers.S
LIBKVM_riscv += lib/riscv/processor.c
LIBKVM_riscv += lib/riscv/ucall.c
+LIBKVM_loongarch += lib/loongarch/processor.c
+LIBKVM_loongarch += lib/loongarch/ucall.c
+LIBKVM_loongarch += lib/loongarch/exception.S
+
# Non-compiled test targets
TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+# Compiled test targets valid on all architectures with libkvm support
+TEST_GEN_PROGS_COMMON = demand_paging_test
+TEST_GEN_PROGS_COMMON += dirty_log_test
+TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += irqfd_test
+TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
+TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
+TEST_GEN_PROGS_COMMON += kvm_page_table_test
+TEST_GEN_PROGS_COMMON += set_memory_region_test
+
# Compiled test targets
-TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_x86 += x86/cpuid_test
TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fastops_test
TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
TEST_GEN_PROGS_x86 += x86/hyperv_clock
@@ -68,8 +85,15 @@ TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
TEST_GEN_PROGS_x86 += x86/kvm_clock_test
TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/msrs_test
+TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test
+TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
TEST_GEN_PROGS_x86 += x86/pmu_counters_test
TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
@@ -91,14 +115,12 @@ TEST_GEN_PROGS_x86 += x86/ucna_injection_test
TEST_GEN_PROGS_x86 += x86/userspace_io_test
TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
-TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test
TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
TEST_GEN_PROGS_x86 += x86/xapic_state_test
@@ -116,36 +138,35 @@ TEST_GEN_PROGS_x86 += x86/amx_test
TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += x86/aperfmperf_test
TEST_GEN_PROGS_x86 += access_tracking_perf_test
TEST_GEN_PROGS_x86 += coalesced_io_test
-TEST_GEN_PROGS_x86 += demand_paging_test
-TEST_GEN_PROGS_x86 += dirty_log_test
TEST_GEN_PROGS_x86 += dirty_log_perf_test
TEST_GEN_PROGS_x86 += guest_memfd_test
-TEST_GEN_PROGS_x86 += guest_print_test
TEST_GEN_PROGS_x86 += hardware_disable_test
-TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86 += kvm_page_table_test
TEST_GEN_PROGS_x86 += memslot_modification_stress_test
TEST_GEN_PROGS_x86 += memslot_perf_test
TEST_GEN_PROGS_x86 += mmu_stress_test
TEST_GEN_PROGS_x86 += rseq_test
-TEST_GEN_PROGS_x86 += set_memory_region_test
TEST_GEN_PROGS_x86 += steal_time
-TEST_GEN_PROGS_x86 += kvm_binary_stats_test
TEST_GEN_PROGS_x86 += system_counter_offset_test
TEST_GEN_PROGS_x86 += pre_fault_memory_test
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/at
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/hello_el2
+TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
-TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/external_aborts
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/sea_to_user
TEST_GEN_PROGS_arm64 += arm64/set_id_regs
TEST_GEN_PROGS_arm64 += arm64/smccc_filter
TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
@@ -154,25 +175,21 @@ TEST_GEN_PROGS_arm64 += arm64/vgic_irq
TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += arm64/kvm-uuid
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
TEST_GEN_PROGS_arm64 += arch_timer
TEST_GEN_PROGS_arm64 += coalesced_io_test
-TEST_GEN_PROGS_arm64 += demand_paging_test
-TEST_GEN_PROGS_arm64 += dirty_log_test
TEST_GEN_PROGS_arm64 += dirty_log_perf_test
-TEST_GEN_PROGS_arm64 += guest_print_test
TEST_GEN_PROGS_arm64 += get-reg-list
-TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_arm64 += kvm_page_table_test
+TEST_GEN_PROGS_arm64 += guest_memfd_test
TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
TEST_GEN_PROGS_arm64 += memslot_perf_test
TEST_GEN_PROGS_arm64 += mmu_stress_test
TEST_GEN_PROGS_arm64 += rseq_test
-TEST_GEN_PROGS_arm64 += set_memory_region_test
TEST_GEN_PROGS_arm64 += steal_time
-TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
-TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_s390 += s390/memop
TEST_GEN_PROGS_s390 += s390/resets
TEST_GEN_PROGS_s390 += s390/sync_regs_test
TEST_GEN_PROGS_s390 += s390/tprot
@@ -181,29 +198,37 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
-TEST_GEN_PROGS_s390 += demand_paging_test
-TEST_GEN_PROGS_s390 += dirty_log_test
-TEST_GEN_PROGS_s390 += guest_print_test
-TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390 += kvm_page_table_test
+TEST_GEN_PROGS_s390 += s390/user_operexec
TEST_GEN_PROGS_s390 += rseq_test
-TEST_GEN_PROGS_s390 += set_memory_region_test
-TEST_GEN_PROGS_s390 += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
+TEST_GEN_PROGS_riscv += access_tracking_perf_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += coalesced_io_test
-TEST_GEN_PROGS_riscv += demand_paging_test
-TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += dirty_log_perf_test
TEST_GEN_PROGS_riscv += get-reg-list
-TEST_GEN_PROGS_riscv += guest_print_test
-TEST_GEN_PROGS_riscv += kvm_binary_stats_test
-TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
-TEST_GEN_PROGS_riscv += kvm_page_table_test
-TEST_GEN_PROGS_riscv += set_memory_region_test
+TEST_GEN_PROGS_riscv += memslot_modification_stress_test
+TEST_GEN_PROGS_riscv += memslot_perf_test
+TEST_GEN_PROGS_riscv += mmu_stress_test
+TEST_GEN_PROGS_riscv += rseq_test
TEST_GEN_PROGS_riscv += steal_time
+TEST_GEN_PROGS_loongarch = arch_timer
+TEST_GEN_PROGS_loongarch += coalesced_io_test
+TEST_GEN_PROGS_loongarch += demand_paging_test
+TEST_GEN_PROGS_loongarch += dirty_log_perf_test
+TEST_GEN_PROGS_loongarch += dirty_log_test
+TEST_GEN_PROGS_loongarch += guest_print_test
+TEST_GEN_PROGS_loongarch += hardware_disable_test
+TEST_GEN_PROGS_loongarch += kvm_binary_stats_test
+TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus
+TEST_GEN_PROGS_loongarch += kvm_page_table_test
+TEST_GEN_PROGS_loongarch += memslot_modification_stress_test
+TEST_GEN_PROGS_loongarch += memslot_perf_test
+TEST_GEN_PROGS_loongarch += set_memory_region_test
+
SPLIT_TESTS += arch_timer
SPLIT_TESTS += get-reg-list
@@ -218,6 +243,7 @@ OVERRIDE_TARGETS = 1
# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
# which causes the environment variable to override the makefile).
include ../lib.mk
+include ../cgroup/lib/libcgroup.mk
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
@@ -271,7 +297,7 @@ LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O)
SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS))
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 3c7defd34f56..b058f27b2141 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -7,9 +7,11 @@
* This test measures the performance effects of KVM's access tracking.
* Access tracking is driven by the MMU notifiers test_young, clear_young, and
* clear_flush_young. These notifiers do not have a direct userspace API,
- * however the clear_young notifier can be triggered by marking a pages as idle
- * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
- * enable access tracking on guest memory.
+ * however the clear_young notifier can be triggered either by
+ * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR
+ * 2. adding a new MGLRU generation using the lru_gen debugfs file.
+ * This test leverages page_idle to enable access tracking on guest memory
+ * unless MGLRU is enabled, in which case MGLRU is used.
*
* To measure performance this test runs a VM with a configurable number of
* vCPUs that each touch every page in disjoint regions of memory. Performance
@@ -17,10 +19,11 @@
* predefined region.
*
* Note that a deterministic correctness test of access tracking is not possible
- * by using page_idle as it exists today. This is for a few reasons:
+ * by using page_idle or MGLRU aging as it exists today. This is for a few
+ * reasons:
*
- * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
- * means subsequent guest accesses are not guaranteed to see page table
+ * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush.
+ * This means subsequent guest accesses are not guaranteed to see page table
* updates made by KVM until some time in the future.
*
* 2. page_idle only operates on LRU pages. Newly allocated pages are not
@@ -47,10 +50,19 @@
#include "memstress.h"
#include "guest_modes.h"
#include "processor.h"
+#include "ucall_common.h"
+
+#include "cgroup_util.h"
+#include "lru_gen_util.h"
+
+static const char *TEST_MEMCG_NAME = "access_tracking_perf_test";
/* Global variable used to synchronize all of the vCPU threads. */
static int iteration;
+/* The cgroup memory controller root. Needed for lru_gen-based aging. */
+char cgroup_root[PATH_MAX];
+
/* Defines what vCPU threads should do during a given iteration. */
static enum {
/* Run the vCPU to access all its memory. */
@@ -65,6 +77,25 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
/* Whether to overlap the regions of memory vCPUs access. */
static bool overlap_memory_access;
+/*
+ * If the test should only warn if there are too many idle pages (i.e., it is
+ * expected).
+ * -1: Not yet set.
+ * 0: We do not expect too many idle pages, so FAIL if too many idle pages.
+ * 1: Having too many idle pages is expected, so merely print a warning if
+ * too many idle pages are found.
+ */
+static int idle_pages_warn_only = -1;
+
+/* Whether or not to use MGLRU instead of page_idle for access tracking */
+static bool use_lru_gen;
+
+/* Total number of pages to expect in the memcg after touching everything */
+static long test_pages;
+
+/* Last generation we found the pages in */
+static int lru_gen_last_gen = -1;
+
struct test_params {
/* The backing source for the region of memory. */
enum vm_mem_backing_src_type backing_src;
@@ -123,8 +154,24 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn)
"Set page_idle bits for PFN 0x%" PRIx64, pfn);
}
-static void mark_vcpu_memory_idle(struct kvm_vm *vm,
- struct memstress_vcpu_args *vcpu_args)
+static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx)
+{
+ char prefix[18] = {};
+
+ if (vcpu_idx >= 0)
+ snprintf(prefix, 18, "vCPU%d: ", vcpu_idx);
+
+ TEST_ASSERT(idle_pages_warn_only,
+ "%sToo many pages still idle (%lu out of %lu)",
+ prefix, idle_pages, total_pages);
+
+ printf("WARNING: %sToo many pages still idle (%lu out of %lu), "
+ "this will affect performance results.\n",
+ prefix, idle_pages, total_pages);
+}
+
+static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm,
+ struct memstress_vcpu_args *vcpu_args)
{
int vcpu_idx = vcpu_args->vcpu_idx;
uint64_t base_gva = vcpu_args->gva;
@@ -177,27 +224,79 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
* arbitrary; high enough that we ensure most memory access went through
* access tracking but low enough as to not make the test too brittle
* over time and across architectures.
- *
- * When running the guest as a nested VM, "warn" instead of asserting
- * as the TLB size is effectively unlimited and the KVM doesn't
- * explicitly flush the TLB when aging SPTEs. As a result, more pages
- * are cached and the guest won't see the "idle" bit cleared.
*/
- if (still_idle >= pages / 10) {
-#ifdef __x86_64__
- TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
- "vCPU%d: Too many pages still idle (%lu out of %lu)",
- vcpu_idx, still_idle, pages);
-#endif
- printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
- "this will affect performance results.\n",
- vcpu_idx, still_idle, pages);
- }
+ if (still_idle >= pages / 10)
+ too_many_idle_pages(still_idle, pages,
+ overlap_memory_access ? -1 : vcpu_idx);
close(page_idle_fd);
close(pagemap_fd);
}
+int find_generation(struct memcg_stats *stats, long total_pages)
+{
+ /*
+ * For finding the generation that contains our pages, use the same
+ * 90% threshold that page_idle uses.
+ */
+ int gen = lru_gen_find_generation(stats, total_pages * 9 / 10);
+
+ if (gen >= 0)
+ return gen;
+
+ if (!idle_pages_warn_only) {
+ TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).",
+ total_pages * 9 / 10);
+ return gen;
+ }
+
+ /*
+ * We couldn't find a generation with 90% of guest memory, which can
+ * happen if access tracking is unreliable. Simply look for a majority
+ * of pages.
+ */
+ puts("WARNING: Couldn't find a generation with 90% of guest memory. "
+ "Performance results may not be accurate.");
+ gen = lru_gen_find_generation(stats, total_pages / 2);
+ TEST_ASSERT(gen >= 0,
+ "Could not find a generation with 50%% of guest memory (%ld pages).",
+ total_pages / 2);
+ return gen;
+}
+
+static void lru_gen_mark_memory_idle(struct kvm_vm *vm)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ struct memcg_stats stats;
+ int new_gen;
+
+ /* Make a new generation */
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ ts_elapsed = timespec_elapsed(ts_start);
+
+ /* Check the generation again */
+ new_gen = find_generation(&stats, test_pages);
+
+ /*
+ * This function should only be invoked with newly-accessed pages,
+ * so pages should always move to a newer generation.
+ */
+ if (new_gen <= lru_gen_last_gen) {
+ /* We did not move to a newer generation. */
+ long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen,
+ &stats);
+
+ too_many_idle_pages(min_t(long, idle_pages, test_pages),
+ test_pages, -1);
+ }
+ pr_info("%-30s: %ld.%09lds\n",
+ "Mark memory idle (lru_gen)", ts_elapsed.tv_sec,
+ ts_elapsed.tv_nsec);
+ lru_gen_last_gen = new_gen;
+}
+
static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall)
{
struct ucall uc;
@@ -237,9 +336,9 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
assert_ucall(vcpu, UCALL_SYNC);
break;
case ITERATION_MARK_IDLE:
- mark_vcpu_memory_idle(vm, vcpu_args);
+ pageidle_mark_vcpu_memory_idle(vm, vcpu_args);
break;
- };
+ }
vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
}
@@ -289,15 +388,18 @@ static void access_memory(struct kvm_vm *vm, int nr_vcpus,
static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus)
{
+ if (use_lru_gen)
+ return lru_gen_mark_memory_idle(vm);
+
/*
* Even though this parallelizes the work across vCPUs, this is still a
* very slow operation because page_idle forces the test to mark one pfn
- * at a time and the clear_young notifier serializes on the KVM MMU
+ * at a time and the clear_young notifier may serialize on the KVM MMU
* lock.
*/
pr_debug("Marking VM memory idle (slow)...\n");
iteration_work = ITERATION_MARK_IDLE;
- run_iteration(vm, nr_vcpus, "Mark memory idle");
+ run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)");
}
static void run_test(enum vm_guest_mode mode, void *arg)
@@ -309,11 +411,38 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
params->backing_src, !overlap_memory_access);
+ /*
+ * If guest_page_size is larger than the host's page size, the
+ * guest (memstress) will only fault in a subset of the host's pages.
+ */
+ test_pages = params->nr_vcpus * params->vcpu_memory_bytes /
+ max(memstress_args.guest_page_size,
+ (uint64_t)getpagesize());
+
memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
pr_info("\n");
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
+ if (use_lru_gen) {
+ struct memcg_stats stats;
+
+ /*
+ * Do a page table scan now. Following initial population, aging
+ * may not cause the pages to move to a newer generation. Do
+ * an aging pass now so that future aging passes always move
+ * pages to a newer generation.
+ */
+ printf("Initial aging pass (lru_gen)\n");
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages,
+ "Not all pages accounted for (looking for %ld). "
+ "Was the memcg set up correctly?", test_pages);
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory");
+ lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME);
+ lru_gen_last_gen = find_generation(&stats, test_pages);
+ }
+
/* As a control, read and write to the populated memory first. */
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory");
access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory");
@@ -328,6 +457,37 @@ static void run_test(enum vm_guest_mode mode, void *arg)
memstress_destroy_vm(vm);
}
+static int access_tracking_unreliable(void)
+{
+#ifdef __x86_64__
+ /*
+ * When running nested, the TLB size may be effectively unlimited (for
+ * example, this is the case when running on KVM L0), and KVM doesn't
+ * explicitly flush the TLB when aging SPTEs. As a result, more pages
+ * are cached and the guest won't see the "idle" bit cleared.
+ */
+ if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ puts("Skipping idle page count sanity check, because the test is run nested");
+ return 1;
+ }
+#endif
+ /*
+ * When NUMA balancing is enabled, guest memory will be unmapped to get
+ * NUMA faults, dropping the Accessed bits.
+ */
+ if (is_numa_balancing_enabled()) {
+ puts("Skipping idle page count sanity check, because NUMA balancing is enabled");
+ return 1;
+ }
+ return 0;
+}
+
+static int run_test_for_each_guest_mode(const char *cgroup, void *arg)
+{
+ for_each_guest_mode(run_test, arg);
+ return 0;
+}
+
static void help(char *name)
{
puts("");
@@ -342,11 +502,22 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
+ printf(" -w: Control whether the test warns or fails if more than 10%%\n"
+ " of pages are still seen as idle/old after accessing guest\n"
+ " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
+ " mode, the test fails by default, but switches to warn only\n"
+ " if NUMA balancing is enabled or the test detects it's running\n"
+ " in a VM.\n");
backing_src_help("-s");
puts("");
exit(0);
}
+void destroy_cgroup(char *cg)
+{
+ printf("Destroying cgroup: %s\n", cg);
+}
+
int main(int argc, char *argv[])
{
struct test_params params = {
@@ -354,12 +525,13 @@ int main(int argc, char *argv[])
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
.nr_vcpus = 1,
};
+ char *new_cg = NULL;
int page_idle_fd;
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -376,6 +548,11 @@ int main(int argc, char *argv[])
case 's':
params.backing_src = parse_backing_src_type(optarg);
break;
+ case 'w':
+ idle_pages_warn_only =
+ atoi_non_negative("Idle pages warning",
+ optarg);
+ break;
case 'h':
default:
help(argv[0]);
@@ -383,12 +560,50 @@ int main(int argc, char *argv[])
}
}
- page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
- __TEST_REQUIRE(page_idle_fd >= 0,
- "CONFIG_IDLE_PAGE_TRACKING is not enabled");
- close(page_idle_fd);
+ if (idle_pages_warn_only == -1)
+ idle_pages_warn_only = access_tracking_unreliable();
+
+ if (lru_gen_usable()) {
+ bool cg_created = true;
+ int ret;
- for_each_guest_mode(run_test, &params);
+ puts("Using lru_gen for aging");
+ use_lru_gen = true;
+
+ if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory"))
+ ksft_exit_skip("Cannot find memory cgroup controller\n");
+
+ new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME);
+ printf("Creating cgroup: %s\n", new_cg);
+ if (cg_create(new_cg)) {
+ if (errno == EEXIST) {
+ printf("Found existing cgroup");
+ cg_created = false;
+ } else {
+ ksft_exit_skip("could not create new cgroup: %s\n", new_cg);
+ }
+ }
+
+ /*
+ * This will fork off a new process to run the test within
+ * a new memcg, so we need to properly propagate the return
+ * value up.
+ */
+ ret = cg_run(new_cg, &run_test_for_each_guest_mode, &params);
+ if (cg_created)
+ cg_destroy(new_cg);
+ if (ret < 0)
+ TEST_FAIL("child did not spawn or was abnormally killed");
+ if (ret)
+ return ret;
+ } else {
+ page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR,
+ "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
+ close(page_idle_fd);
+
+ puts("Using page_idle for aging");
+ run_test_for_each_guest_mode(NULL, &params);
+ }
return 0;
}
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
index acb2cb596332..cf8fb67104f1 100644
--- a/tools/testing/selftests/kvm/arch_timer.c
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void)
static int test_migrate_vcpu(unsigned int vcpu_idx)
{
int ret;
- cpu_set_t cpuset;
uint32_t new_pcpu = test_get_pcpu();
- CPU_ZERO(&cpuset);
- CPU_SET(new_pcpu, &cpuset);
-
pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
- ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
- sizeof(cpuset), &cpuset);
+ ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu);
/* Allow the error where the vCPU thread is already finished */
TEST_ASSERT(ret == 0 || ret == ESRCH,
diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
index cef8f7323ceb..713005b6f508 100644
--- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
@@ -146,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
return el0 == ID_AA64PFR0_EL1_EL0_IMP;
}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c
index eeba1cc87ff8..d592a4515399 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer.c
@@ -165,10 +165,8 @@ static void guest_code(void)
static void test_init_timer_irq(struct kvm_vm *vm)
{
/* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
- vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
- vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+ ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]);
+ vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
@@ -176,14 +174,14 @@ static void test_init_timer_irq(struct kvm_vm *vm)
pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
}
-static int gic_fd;
-
struct kvm_vm *test_vm_create(void)
{
struct kvm_vm *vm;
unsigned int i;
int nr_vcpus = test_args.nr_vcpus;
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
vm_init_descriptor_tables(vm);
@@ -204,8 +202,6 @@ struct kvm_vm *test_vm_create(void)
vcpu_init_descriptor_tables(vcpus[i]);
test_init_timer_irq(vm);
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);
@@ -215,6 +211,5 @@ struct kvm_vm *test_vm_create(void)
void test_vm_cleanup(struct kvm_vm *vm)
{
- close(gic_fd);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index a36a7e2db434..993c9e38e729 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -22,7 +22,8 @@
#include "gic.h"
#include "vgic.h"
-static const uint64_t CVAL_MAX = ~0ULL;
+/* Depends on counter width. */
+static uint64_t CVAL_MAX;
/* tval is a signed 32-bit int. */
static const int32_t TVAL_MAX = INT32_MAX;
static const int32_t TVAL_MIN = INT32_MIN;
@@ -30,8 +31,8 @@ static const int32_t TVAL_MIN = INT32_MIN;
/* After how much time we say there is no IRQ. */
static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
-/* A nice counter value to use as the starting one for most tests. */
-static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */
+static uint64_t DEF_CNT;
/* Number of runs. */
static const uint32_t NR_TEST_ITERS_DEF = 5;
@@ -191,8 +192,8 @@ static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
{
atomic_set(&shared_data.handled, 0);
atomic_set(&shared_data.spurious, 0);
- timer_set_ctl(timer, ctl);
timer_set_tval(timer, tval_cycles);
+ timer_set_ctl(timer, ctl);
}
static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
@@ -732,12 +733,6 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer)
test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
wm);
}
-
- for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
- sleep_method_t sm = sleep_method[i];
-
- test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
- }
}
/*
@@ -849,17 +844,17 @@ static void guest_code(enum arch_timer timer)
GUEST_DONE();
}
+static cpu_set_t default_cpuset;
+
static uint32_t next_pcpu(void)
{
uint32_t max = get_nprocs();
uint32_t cur = sched_getcpu();
uint32_t next = cur;
- cpu_set_t cpuset;
+ cpu_set_t cpuset = default_cpuset;
TEST_ASSERT(max > 1, "Need at least two physical cpus");
- sched_getaffinity(0, sizeof(cpuset), &cpuset);
-
do {
next = (next + 1) % CPU_SETSIZE;
} while (!CPU_ISSET(next, &cpuset));
@@ -867,25 +862,6 @@ static uint32_t next_pcpu(void)
return next;
}
-static void migrate_self(uint32_t new_pcpu)
-{
- int ret;
- cpu_set_t cpuset;
- pthread_t thread;
-
- thread = pthread_self();
-
- CPU_ZERO(&cpuset);
- CPU_SET(new_pcpu, &cpuset);
-
- pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
-
- ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
-
- TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
- new_pcpu, ret);
-}
-
static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
enum arch_timer timer)
{
@@ -912,7 +888,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
sched_yield();
break;
case USERSPACE_MIGRATE_SELF:
- migrate_self(next_pcpu());
+ pin_self_to_cpu(next_pcpu());
break;
default:
break;
@@ -924,7 +900,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
struct ucall uc;
/* Start on CPU 0 */
- migrate_self(0);
+ pin_self_to_cpu(0);
while (true) {
vcpu_run(vcpu);
@@ -948,10 +924,8 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
- vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
- vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+ ptimer_irq = vcpu_get_ptimer_irq(vcpu);
+ vtimer_irq = vcpu_get_vtimer_irq(vcpu);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
@@ -973,8 +947,15 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
vcpu_args_set(*vcpu, 1, timer);
test_init_timer_irq(*vm, *vcpu);
- vgic_v3_setup(*vm, 1, 64);
+
sync_global_to_guest(*vm, test_args);
+ sync_global_to_guest(*vm, CVAL_MAX);
+ sync_global_to_guest(*vm, DEF_CNT);
+}
+
+static void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
}
static void test_print_help(char *name)
@@ -986,7 +967,7 @@ static void test_print_help(char *name)
pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
LONG_WAIT_TEST_MS);
- pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+ pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n",
WAIT_TEST_MS);
pr_info("\t-p: Test physical timer (default: true)\n");
pr_info("\t-v: Test virtual timer (default: true)\n");
@@ -1035,6 +1016,17 @@ static bool parse_args(int argc, char *argv[])
return false;
}
+static void set_counter_defaults(void)
+{
+ const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600;
+ uint64_t freq = read_sysreg(CNTFRQ_EL0);
+ int width = ilog2(MIN_ROLLOVER_SECS * freq);
+
+ width = clamp(width, 56, 64);
+ CVAL_MAX = GENMASK_ULL(width - 1, 0);
+ DEF_CNT = CVAL_MAX / 2;
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
@@ -1043,19 +1035,24 @@ int main(int argc, char *argv[])
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
+ sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset);
+ set_counter_defaults();
+
if (test_args.test_virtual) {
test_vm_create(&vm, &vcpu, VIRTUAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
if (test_args.test_physical) {
test_vm_create(&vm, &vcpu, PHYSICAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
return 0;
diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c
new file mode 100644
index 000000000000..c8ee6f520734
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/at.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+#define TEST_ADDR 0x80000000
+
+enum {
+ CLEAR_ACCESS_FLAG,
+ TEST_ACCESS_FLAG,
+};
+
+static u64 *ptep_hva;
+
+#define copy_el2_to_el1(reg) \
+ write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12)
+
+/* Yes, this is an ugly hack */
+#define __at(op, addr) write_sysreg_s(addr, op)
+
+#define test_at_insn(op, expect_fault) \
+do { \
+ u64 par, fsc; \
+ bool fault; \
+ \
+ GUEST_SYNC(CLEAR_ACCESS_FLAG); \
+ \
+ __at(OP_AT_##op, TEST_ADDR); \
+ isb(); \
+ par = read_sysreg(par_el1); \
+ \
+ fault = par & SYS_PAR_EL1_F; \
+ fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \
+ \
+ __GUEST_ASSERT((expect_fault) == fault, \
+ "AT "#op": %sexpected fault (par: %lx)1", \
+ (expect_fault) ? "" : "un", par); \
+ if ((expect_fault)) { \
+ __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \
+ "AT "#op": expected access flag fault (par: %lx)", \
+ par); \
+ } else { \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \
+ GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \
+ GUEST_SYNC(TEST_ACCESS_FLAG); \
+ } \
+} while (0)
+
+static void test_at(bool expect_fault)
+{
+ test_at_insn(S1E2R, expect_fault);
+ test_at_insn(S1E2W, expect_fault);
+
+ /* Reuse the stage-1 MMU context from EL2 at EL1 */
+ copy_el2_to_el1(SCTLR);
+ copy_el2_to_el1(MAIR);
+ copy_el2_to_el1(TCR);
+ copy_el2_to_el1(TTBR0);
+ copy_el2_to_el1(TTBR1);
+
+ /* Disable stage-2 translation and enter a non-host context */
+ write_sysreg(0, vtcr_el2);
+ write_sysreg(0, vttbr_el2);
+ sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0);
+ isb();
+
+ test_at_insn(S1E1R, expect_fault);
+ test_at_insn(S1E1W, expect_fault);
+}
+
+static void guest_code(void)
+{
+ sysreg_clear_set(tcr_el1, TCR_HA, 0);
+ isb();
+
+ test_at(true);
+
+ if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1)))
+ GUEST_DONE();
+
+ /*
+ * KVM's software PTW makes the implementation choice that the AT
+ * instruction sets the access flag.
+ */
+ sysreg_clear_set(tcr_el1, 0, TCR_HA);
+ isb();
+ test_at(false);
+
+ GUEST_DONE();
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ switch (uc->args[1]) {
+ case CLEAR_ACCESS_FLAG:
+ /*
+ * Delete + reinstall the memslot to invalidate stage-2
+ * mappings of the stage-1 page tables, forcing KVM to
+ * use the 'slow' AT emulation path.
+ *
+ * This and clearing the access flag from host userspace
+ * ensures that the access flag cannot be set speculatively
+ * and is reliably cleared at the time of the AT instruction.
+ */
+ clear_bit(__ffs(PTE_AF), ptep_hva);
+ vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]);
+ break;
+ case TEST_ACCESS_FLAG:
+ TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva),
+ "Expected access flag to be set (desc: %lu)", *ptep_hva);
+ break;
+ default:
+ TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]);
+ }
+}
+
+static void run_test(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ return;
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ continue;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ virt_map(vm, TEST_ADDR, TEST_ADDR, 1);
+ ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3);
+ run_test(vcpu);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
index c7fb55c9135b..1d431de8729c 100644
--- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -116,12 +116,12 @@ static void reset_debug_state(void)
/* Reset all bcr/bvr/wcr/wvr registers */
dfr0 = read_sysreg(id_aa64dfr0_el1);
- brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
+ brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0);
for (i = 0; i <= brps; i++) {
write_dbgbcr(i, 0);
write_dbgbvr(i, 0);
}
- wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
+ wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0);
for (i = 0; i <= wrps; i++) {
write_dbgwcr(i, 0);
write_dbgwvr(i, 0);
@@ -140,7 +140,7 @@ static void enable_os_lock(void)
static void enable_monitor_debug_exceptions(void)
{
- uint32_t mdscr;
+ uint64_t mdscr;
asm volatile("msr daifclr, #8");
@@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
static void install_ss(void)
{
- uint32_t mdscr;
+ uint64_t mdscr;
asm volatile("msr daifclr, #8");
@@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt)
static int debug_version(uint64_t id_aa64dfr0)
{
- return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
+ return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0);
}
static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
@@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
int b, w, c;
/* Number of breakpoints */
- brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
+ brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1;
__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
/* Number of watchpoints */
- wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
+ wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1;
/* Number of context aware breakpoints */
- ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
+ ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1;
pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
brp_num, wrp_num, ctx_brp_num);
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
new file mode 100644
index 000000000000..d8fe17a6cc59
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * external_abort - Tests for userspace external abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR 0x8000000ULL
+#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed)
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+ handler_fn dabt_handler)
+{
+ struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+ virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+ return vm;
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static bool vcpu_has_ras(struct kvm_vcpu *vcpu)
+{
+ u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0);
+}
+
+static bool guest_has_ras(void)
+{
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1));
+}
+
+static void vcpu_inject_serror(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.serror_pending = true;
+ if (vcpu_has_ras(vcpu)) {
+ events.exception.serror_has_esr = true;
+ events.exception.serror_esr = EXPECTED_SERROR_ISS;
+ }
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ if (uc.cmd == cmd)
+ return;
+
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_DONE);
+}
+
+static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_SYNC);
+}
+
+extern char test_mmio_abort_insn;
+
+static noinline void test_mmio_abort_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+ asm volatile("test_mmio_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+ asm volatile("test_mmio_nisv_insn:\n\t"
+ "ldr x0, [%0], #8\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ unexpected_dabt_handler);
+
+ TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+ TEST_ASSERT_EQ(errno, ENOSYS);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+ TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void unexpected_serror_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Took unexpected SError exception");
+}
+
+static void test_serror_masked_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ isb();
+
+ GUEST_DONE();
+}
+
+static void test_serror_masked(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_serror_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR);
+ if (guest_has_ras())
+ GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS);
+
+ GUEST_DONE();
+}
+
+static void test_serror_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_sea_s1ptw_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3));
+
+ GUEST_DONE();
+}
+
+static noinline void test_s1ptw_abort_guest(void)
+{
+ extern char test_s1ptw_abort_insn;
+
+ WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn);
+
+ asm volatile("test_s1ptw_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("Load on S1PTW abort should not retire");
+}
+
+static void test_s1ptw_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ u64 *ptep, bad_pa;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest,
+ expect_sea_s1ptw_handler);
+
+ ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2);
+ bad_pa = BIT(vm->pa_bits) - vm->page_size;
+
+ *ptep &= ~GENMASK(47, 12);
+ *ptep |= bad_pa;
+
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_emulated_guest(void)
+{
+ GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
+
+ local_serror_enable();
+ GUEST_SYNC(0);
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken unmasked SError exception");
+}
+
+static void test_serror_emulated(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_mmio_ease_guest(void)
+{
+ sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE);
+ isb();
+
+ test_mmio_abort_guest();
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_ease(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest,
+ unexpected_dabt_handler);
+ struct kvm_run *run = vcpu->run;
+ u64 pfr1;
+
+ pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) {
+ pr_debug("Skipping %s\n", __func__);
+ return;
+ }
+
+ /*
+ * SCTLR2_ELx.EASE changes the exception vector to the SError vector but
+ * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx).
+ */
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_amo_guest(void)
+{
+ /*
+ * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked)
+ * since the write is redirected to memory. But don't write (intentionally)
+ * broken code!
+ */
+ sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0);
+ isb();
+
+ GUEST_SYNC(0);
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ /*
+ * KVM treats the effective value of AMO as 1 when
+ * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when
+ * unmasked.
+ */
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror_amo(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_mmio_abort();
+ test_mmio_nisv();
+ test_mmio_nisv_abort();
+ test_serror();
+ test_serror_masked();
+ test_serror_emulated();
+ test_mmio_ease();
+ test_s1ptw_abort();
+
+ if (!test_supports_el2())
+ return 0;
+
+ test_serror_amo();
+}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index d43fb3f49050..0a3a94c4cca1 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -15,6 +15,12 @@
#include "test_util.h"
#include "processor.h"
+#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \
+ sys_reg_Op1(SYS_ ## r), \
+ sys_reg_CRn(SYS_ ## r), \
+ sys_reg_CRm(SYS_ ## r), \
+ sys_reg_Op2(SYS_ ## r))
+
struct feature_id_reg {
__u64 reg;
__u64 id_reg;
@@ -22,37 +28,48 @@ struct feature_id_reg {
__u64 feat_min;
};
-static struct feature_id_reg feat_id_regs[] = {
- {
- ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 0,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
- },
- {
- ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
+#define FEAT(id, f, v) \
+ .id_reg = SYS_REG(id), \
+ .feat_shift = id ## _ ## f ## _SHIFT, \
+ .feat_min = id ## _ ## f ## _ ## v
+
+#define REG_FEAT(r, id, f, v) \
+ { \
+ .reg = SYS_REG(r), \
+ FEAT(id, f, v) \
}
+
+static struct feature_id_reg feat_id_regs[] = {
+ REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP),
+ REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
+ REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY),
+ REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
};
bool filter_reg(__u64 reg)
@@ -332,9 +349,21 @@ static __u64 base_regs[] = {
KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
- ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 0, 2),
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
+
+ /*
+ * EL0 Virtual Timer Registers
+ *
+ * WARNING:
+ * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+ * with the appropriate register encodings. Their values have been
+ * accidentally swapped. As this is set API, the definitions here
+ * must be used, rather than ones derived from the encodings.
+ */
+ KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0),
+ KVM_REG_ARM_TIMER_CVAL,
+ KVM_REG_ARM_TIMER_CNT,
+
ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
@@ -468,6 +497,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
+ KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1),
ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
@@ -685,6 +715,67 @@ static __u64 pauth_generic_regs[] = {
ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
};
+static __u64 el2_regs[] = {
+ SYS_REG(VPIDR_EL2),
+ SYS_REG(VMPIDR_EL2),
+ SYS_REG(SCTLR_EL2),
+ SYS_REG(ACTLR_EL2),
+ SYS_REG(SCTLR2_EL2),
+ SYS_REG(HCR_EL2),
+ SYS_REG(MDCR_EL2),
+ SYS_REG(CPTR_EL2),
+ SYS_REG(HSTR_EL2),
+ SYS_REG(HFGRTR_EL2),
+ SYS_REG(HFGWTR_EL2),
+ SYS_REG(HFGITR_EL2),
+ SYS_REG(HACR_EL2),
+ SYS_REG(ZCR_EL2),
+ SYS_REG(HCRX_EL2),
+ SYS_REG(TTBR0_EL2),
+ SYS_REG(TTBR1_EL2),
+ SYS_REG(TCR_EL2),
+ SYS_REG(TCR2_EL2),
+ SYS_REG(VTTBR_EL2),
+ SYS_REG(VTCR_EL2),
+ SYS_REG(VNCR_EL2),
+ SYS_REG(HDFGRTR2_EL2),
+ SYS_REG(HDFGWTR2_EL2),
+ SYS_REG(HFGRTR2_EL2),
+ SYS_REG(HFGWTR2_EL2),
+ SYS_REG(HDFGRTR_EL2),
+ SYS_REG(HDFGWTR_EL2),
+ SYS_REG(HAFGRTR_EL2),
+ SYS_REG(HFGITR2_EL2),
+ SYS_REG(SPSR_EL2),
+ SYS_REG(ELR_EL2),
+ SYS_REG(AFSR0_EL2),
+ SYS_REG(AFSR1_EL2),
+ SYS_REG(ESR_EL2),
+ SYS_REG(FAR_EL2),
+ SYS_REG(HPFAR_EL2),
+ SYS_REG(MAIR_EL2),
+ SYS_REG(PIRE0_EL2),
+ SYS_REG(PIR_EL2),
+ SYS_REG(POR_EL2),
+ SYS_REG(AMAIR_EL2),
+ SYS_REG(VBAR_EL2),
+ SYS_REG(CONTEXTIDR_EL2),
+ SYS_REG(TPIDR_EL2),
+ SYS_REG(CNTVOFF_EL2),
+ SYS_REG(CNTHCTL_EL2),
+ SYS_REG(CNTHP_CTL_EL2),
+ SYS_REG(CNTHP_CVAL_EL2),
+ SYS_REG(CNTHV_CTL_EL2),
+ SYS_REG(CNTHV_CVAL_EL2),
+ SYS_REG(SP_EL2),
+ SYS_REG(VDISR_EL2),
+ SYS_REG(VSESR_EL2),
+};
+
+static __u64 el2_e2h0_regs[] = {
+ /* Empty */
+};
+
#define BASE_SUBLIST \
{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
#define VREGS_SUBLIST \
@@ -711,6 +802,23 @@ static __u64 pauth_generic_regs[] = {
.regs = pauth_generic_regs, \
.regs_n = ARRAY_SIZE(pauth_generic_regs), \
}
+#define EL2_SUBLIST \
+ { \
+ .name = "EL2", \
+ .capability = KVM_CAP_ARM_EL2, \
+ .feature = KVM_ARM_VCPU_HAS_EL2, \
+ .regs = el2_regs, \
+ .regs_n = ARRAY_SIZE(el2_regs), \
+ }
+#define EL2_E2H0_SUBLIST \
+ EL2_SUBLIST, \
+ { \
+ .name = "EL2 E2H0", \
+ .capability = KVM_CAP_ARM_EL2_E2H0, \
+ .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \
+ .regs = el2_e2h0_regs, \
+ .regs_n = ARRAY_SIZE(el2_e2h0_regs), \
+ }
static struct vcpu_reg_list vregs_config = {
.sublists = {
@@ -760,6 +868,124 @@ static struct vcpu_reg_list pauth_pmu_config = {
},
};
+static struct vcpu_reg_list el2_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
struct vcpu_reg_list *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
@@ -767,5 +993,19 @@ struct vcpu_reg_list *vcpu_configs[] = {
&sve_pmu_config,
&pauth_config,
&pauth_pmu_config,
+
+ &el2_vregs_config,
+ &el2_vregs_pmu_config,
+ &el2_sve_config,
+ &el2_sve_pmu_config,
+ &el2_pauth_config,
+ &el2_pauth_pmu_config,
+
+ &el2_e2h0_vregs_config,
+ &el2_e2h0_vregs_pmu_config,
+ &el2_e2h0_sve_config,
+ &el2_e2h0_sve_pmu_config,
+ &el2_e2h0_pauth_config,
+ &el2_e2h0_pauth_pmu_config,
};
int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/hello_el2.c b/tools/testing/selftests/kvm/arm64/hello_el2.c
new file mode 100644
index 000000000000..bbe6862c6ab1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/hello_el2.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1
+ *
+ * Copyright 2025 Google LLC
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+static void guest_code(void)
+{
+ u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1);
+ u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
+ u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1);
+ u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4);
+
+ GUEST_ASSERT_EQ(get_current_el(), 2);
+ GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H);
+ GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1),
+ ID_AA64MMFR1_EL1_VH_IMP);
+
+ /*
+ * Traps of the complete ID register space are IMPDEF without FEAT_FGT,
+ * which is really annoying to deal with in KVM describing E2H as RES1.
+ *
+ * If the implementation doesn't honor the trap then expect the register
+ * to return all zeros.
+ */
+ if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP)
+ GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0),
+ ID_AA64MMFR0_EL1_FGT_NI);
+ else
+ GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c
new file mode 100644
index 000000000000..3826772fd470
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/host_sve.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls.
+ *
+ * Copyright 2025 Arm, Ltd
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/auxv.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "ucall_common.h"
+
+static void guest_code(void)
+{
+ for (int i = 0; i < 10; i++) {
+ GUEST_UCALL_NONE();
+ }
+
+ GUEST_DONE();
+}
+
+void handle_sigill(int sig, siginfo_t *info, void *ctx)
+{
+ ucontext_t *uctx = ctx;
+
+ printf(" < host signal %d >\n", sig);
+
+ /*
+ * Skip the UDF
+ */
+ uctx->uc_mcontext.pc += 4;
+}
+
+void register_sigill_handler(void)
+{
+ struct sigaction sa = {
+ .sa_sigaction = handle_sigill,
+ .sa_flags = SA_SIGINFO,
+ };
+ sigaction(SIGILL, &sa, NULL);
+}
+
+static void do_sve_roundtrip(void)
+{
+ unsigned long before, after;
+
+ /*
+ * Set all bits in a predicate register, force a save/restore via a
+ * SIGILL (which handle_sigill() will recover from), then report
+ * whether the value has changed.
+ */
+ asm volatile(
+ " .arch_extension sve\n"
+ " ptrue p0.B\n"
+ " cntp %[before], p0, p0.B\n"
+ " udf #0\n"
+ " cntp %[after], p0, p0.B\n"
+ : [before] "=r" (before),
+ [after] "=r" (after)
+ :
+ : "p0"
+ );
+
+ if (before != after) {
+ TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n",
+ before, after);
+ } else {
+ printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n",
+ before, after);
+ }
+}
+
+static void test_run(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ register_sigill_handler();
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ do_sve_roundtrip();
+
+ while (!guest_done) {
+
+ printf("Running VCPU...\n");
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_NONE:
+ do_sve_roundtrip();
+ do_sve_roundtrip();
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ /*
+ * This is testing the host environment, we don't care about
+ * guest SVE support.
+ */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ printf("SVE not supported\n");
+ return KSFT_SKIP;
+ }
+
+ test_run();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c
index ec54ec7726e9..bf038a0371f4 100644
--- a/tools/testing/selftests/kvm/arm64/hypercalls.c
+++ b/tools/testing/selftests/kvm/arm64/hypercalls.c
@@ -21,22 +21,31 @@
#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0
#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0
#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX 1
+
+#define KVM_REG_ARM_STD_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX)
+#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0
struct kvm_fw_reg_info {
uint64_t reg; /* Register definition */
uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */
+ uint64_t reset_val; /* Reset value for the register */
};
#define FW_REG_INFO(r) \
{ \
.reg = r, \
.max_feat_bit = r##_BIT_MAX, \
+ .reset_val = r##_RESET_VAL \
}
static const struct kvm_fw_reg_info fw_reg_info[] = {
FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2),
};
enum test_stage {
@@ -99,7 +108,7 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info)
for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
memset(&res, 0, sizeof(res));
- smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
+ do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
switch (stage) {
case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
@@ -171,22 +180,39 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+ uint64_t set_val;
- /* First 'read' should be an upper limit of the features supported */
+ /* First 'read' should be the reset value for the reg */
val = vcpu_get_reg(vcpu, reg_info->reg);
- TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
- "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
- reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+ TEST_ASSERT(val == reg_info->reset_val,
+ "Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+ reg_info->reg, reg_info->reset_val, val);
+
+ if (reg_info->reset_val)
+ set_val = 0;
+ else
+ set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit);
- /* Test a 'write' by disabling all the features of the register map */
- ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val);
TEST_ASSERT(ret == 0,
- "Failed to clear all the features of reg: 0x%lx; ret: %d",
- reg_info->reg, errno);
+ "Failed to %s all the features of reg: 0x%lx; ret: %d",
+ (set_val ? "set" : "clear"), reg_info->reg, errno);
val = vcpu_get_reg(vcpu, reg_info->reg);
- TEST_ASSERT(val == 0,
- "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
+ TEST_ASSERT(val == set_val,
+ "Expected all the features to be %s for reg: 0x%lx",
+ (set_val ? "set" : "cleared"), reg_info->reg);
+
+ /*
+ * If the reg has been set, clear it as test_fw_regs_after_vm_start()
+ * expects it to be cleared.
+ */
+ if (set_val) {
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ TEST_ASSERT(ret == 0,
+ "Failed to clear all the features of reg: 0x%lx; ret: %d",
+ reg_info->reg, errno);
+ }
/*
* Test enabling a feature that's not supported.
diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
new file mode 100644
index 000000000000..b5be9133535a
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that nobody has tampered with KVM's UID
+
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+/*
+ * Do NOT redefine these constants, or try to replace them with some
+ * "common" version. They are hardcoded here to detect any potential
+ * breakage happening in the rest of the kernel.
+ *
+ * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74
+ */
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU
+
+static void guest_code(void)
+{
+ struct arm_smccc_res res = {};
+
+ do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 &&
+ res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 &&
+ res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 &&
+ res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3,
+ "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3);
+ GUEST_DONE();
+}
+
+int main (int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ while (!guest_done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c
deleted file mode 100644
index 8b7a80a51b1c..000000000000
--- a/tools/testing/selftests/kvm/arm64/mmio_abort.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mmio_abort - Tests for userspace MMIO abort injection
- *
- * Copyright (c) 2024 Google LLC
- */
-#include "processor.h"
-#include "test_util.h"
-
-#define MMIO_ADDR 0x8000000ULL
-
-static u64 expected_abort_pc;
-
-static void expect_sea_handler(struct ex_regs *regs)
-{
- u64 esr = read_sysreg(esr_el1);
-
- GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
- GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
- GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
-
- GUEST_DONE();
-}
-
-static void unexpected_dabt_handler(struct ex_regs *regs)
-{
- GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
-}
-
-static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
- handler_fn dabt_handler)
-{
- struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(*vcpu);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
-
- virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
-
- return vm;
-}
-
-static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events = {};
-
- events.exception.ext_dabt_pending = true;
- vcpu_events_set(vcpu, &events);
-}
-
-static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
-}
-
-extern char test_mmio_abort_insn;
-
-static void test_mmio_abort_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
-
- asm volatile("test_mmio_abort_insn:\n\t"
- "ldr x0, [%0]\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that KVM doesn't complete MMIO emulation when userspace has made an
- * external abort pending for the instruction.
- */
-static void test_mmio_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
- TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
- TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
- TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-extern char test_mmio_nisv_insn;
-
-static void test_mmio_nisv_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
-
- asm volatile("test_mmio_nisv_insn:\n\t"
- "ldr x0, [%0], #8\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
- * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
- */
-static void test_mmio_nisv(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- unexpected_dabt_handler);
-
- TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
- TEST_ASSERT_EQ(errno, ENOSYS);
-
- kvm_vm_free(vm);
-}
-
-/*
- * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
- * reaches the guest.
- */
-static void test_mmio_nisv_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
- TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-int main(void)
-{
- test_mmio_abort();
- test_mmio_nisv();
- test_mmio_nisv_abort();
-}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
index ebd70430c89d..152c34776981 100644
--- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
+++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
@@ -54,7 +54,7 @@ static void guest_code(void)
* Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
* hidden the feature at runtime without any other userspace action.
*/
- __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
+ __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC,
read_sysreg(id_aa64pfr0_el1)) == 0,
"GICv3 wrongly advertised");
@@ -163,9 +163,11 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
uint64_t pfr0;
+ test_disable_default_vgic();
+
vm = vm_create_with_one_vcpu(&vcpu, NULL);
pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
+ __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0),
"GICv3 not supported.");
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
index ec33a8f9c908..4ccbd389d133 100644
--- a/tools/testing/selftests/kvm/arm64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
@@ -95,14 +95,14 @@ static bool guest_check_lse(void)
uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
uint64_t atomic;
- atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
+ atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0);
return atomic >= 2;
}
static bool guest_check_dc_zva(void)
{
uint64_t dczid = read_sysreg(dczid_el0);
- uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
+ uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid);
return dzp == 0;
}
@@ -195,11 +195,11 @@ static bool guest_set_ha(void)
uint64_t hadbs, tcr;
/* Skip if HA is not supported. */
- hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
+ hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1);
if (hadbs == 0)
return false;
- tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+ tcr = read_sysreg(tcr_el1) | TCR_HA;
write_sysreg(tcr, tcr_el1);
isb();
diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c
index ab491ee9e5f7..98e49f710aef 100644
--- a/tools/testing/selftests/kvm/arm64/psci_test.c
+++ b/tools/testing/selftests/kvm/arm64/psci_test.c
@@ -27,7 +27,7 @@ static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+ do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
0, 0, 0, 0, &res);
return res.a0;
@@ -38,7 +38,7 @@ static uint64_t psci_affinity_info(uint64_t target_affinity,
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+ do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
0, 0, 0, 0, 0, &res);
return res.a0;
@@ -48,7 +48,7 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
+ do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
0, 0, 0, 0, 0, &res);
return res.a0;
@@ -58,7 +58,7 @@ static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
+ do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
return res.a0;
}
@@ -67,7 +67,7 @@ static uint64_t psci_features(uint32_t func_id)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
+ do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
return res.a0;
}
@@ -89,12 +89,13 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
vm = vm_create(2);
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
*source = aarch64_vcpu_add(vm, 0, &init, guest_code);
*target = aarch64_vcpu_add(vm, 1, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c
new file mode 100644
index 000000000000..573dd790aeb8
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails
+ * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER.
+ *
+ * After reaching userspace with expected arm_sea info, also test userspace
+ * injecting a synchronous external data abort into the guest.
+ *
+ * This test utilizes EINJ to generate a REAL synchronous external data
+ * abort by consuming a recoverable uncorrectable memory error. Therefore
+ * the device under test must support EINJ in both firmware and host kernel,
+ * including the notrigger feature. Otherwise the test will be skipped.
+ * The under-test platform's APEI should be unable to claim SEA. Otherwise
+ * the test will also be skipped.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "guest_modes.h"
+
+#define PAGE_PRESENT (1ULL << 63)
+#define PAGE_PHYSICAL 0x007fffffffffffffULL
+#define PAGE_ADDR_MASK (~(0xfffULL))
+
+/* Group ISV and ISS[23:14]. */
+#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \
+ (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \
+ (ESR_ELx_SF) | (ESR_ELx_AR))
+
+#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type"
+#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1"
+#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2"
+#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags"
+#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger"
+#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject"
+/* Memory Uncorrectable non-fatal. */
+#define ERROR_TYPE_MEMORY_UER 0x10
+/* Memory address and mask valid (param1 and param2). */
+#define MASK_MEMORY_UER 0b10
+
+/* Guest virtual address region = [2G, 3G). */
+#define START_GVA 0x80000000UL
+#define VM_MEM_SIZE 0x40000000UL
+/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */
+#define EINJ_OFFSET 0x01234badUL
+#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET))
+
+static vm_paddr_t einj_gpa;
+static void *einj_hva;
+static uint64_t einj_hpa;
+static bool far_invalid;
+
+static uint64_t translate_to_host_paddr(unsigned long vaddr)
+{
+ uint64_t pinfo;
+ int64_t offset = vaddr / getpagesize() * sizeof(pinfo);
+ int fd;
+ uint64_t page_addr;
+ uint64_t paddr;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ ksft_exit_fail_perror("Failed to open /proc/self/pagemap");
+ if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
+ close(fd);
+ ksft_exit_fail_perror("Failed to read /proc/self/pagemap");
+ }
+
+ close(fd);
+
+ if ((pinfo & PAGE_PRESENT) == 0)
+ ksft_exit_fail_perror("Page not present");
+
+ page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT;
+ paddr = page_addr + (vaddr & (getpagesize() - 1));
+ return paddr;
+}
+
+static void write_einj_entry(const char *einj_path, uint64_t val)
+{
+ char cmd[256] = {0};
+ FILE *cmdfile = NULL;
+
+ sprintf(cmd, "echo %#lx > %s", val, einj_path);
+ cmdfile = popen(cmd, "r");
+
+ if (pclose(cmdfile) == 0)
+ ksft_print_msg("echo %#lx > %s - done\n", val, einj_path);
+ else
+ ksft_exit_fail_perror("Failed to write EINJ entry");
+}
+
+static void inject_uer(uint64_t paddr)
+{
+ if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1)
+ ksft_test_result_skip("EINJ table no available in firmware");
+
+ if (access(EINJ_ETYPE, R_OK | W_OK) == -1)
+ ksft_test_result_skip("EINJ module probably not loaded?");
+
+ write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER);
+ write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER);
+ write_einj_entry(EINJ_ADDR, paddr);
+ write_einj_entry(EINJ_MASK, ~0x0UL);
+ write_einj_entry(EINJ_NOTRIGGER, 1);
+ write_einj_entry(EINJ_DOIT, 1);
+}
+
+/*
+ * When host APEI successfully claims the SEA caused by guest_code, kernel
+ * will send SIGBUS signal with BUS_MCEERR_AR to test thread.
+ *
+ * We set up this SIGBUS handler to skip the test for that case.
+ */
+static void sigbus_signal_handler(int sig, siginfo_t *si, void *v)
+{
+ ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig);
+ ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n",
+ si->si_signo, si->si_errno, si->si_code, si->si_addr);
+ if (si->si_code == BUS_MCEERR_AR)
+ ksft_test_result_skip("SEA is claimed by host APEI\n");
+ else
+ ksft_test_result_fail("Exit with signal unhandled\n");
+
+ exit(0);
+}
+
+static void setup_sigbus_handler(void)
+{
+ struct sigaction act;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&act.sa_mask);
+ act.sa_sigaction = sigbus_signal_handler;
+ act.sa_flags = SA_SIGINFO;
+ TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0,
+ "Failed to setup SIGBUS handler");
+}
+
+static void guest_code(void)
+{
+ uint64_t guest_data;
+
+ /* Consumes error will cause a SEA. */
+ guest_data = *(uint64_t *)EINJ_GVA;
+
+ GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n",
+ EINJ_GVA, guest_data);
+}
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+ u64 far = read_sysreg(far_el1);
+ bool expect_far_invalid = far_invalid;
+
+ GUEST_PRINTF("Handling Guest SEA\n");
+ GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ if (expect_far_invalid) {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV);
+ GUEST_PRINTF("Guest observed garbage value in FAR\n");
+ } else {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0);
+ GUEST_ASSERT_EQ(far, EINJ_GVA);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ bool guest_done = false;
+ struct kvm_run *run = vcpu->run;
+ u64 esr;
+
+ /* Resume the vCPU after error injection to consume the error. */
+ vcpu_run(vcpu);
+
+ ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n",
+ exit_reason_str(run->exit_reason));
+ ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n",
+ run->arm_sea.esr, run->arm_sea.flags);
+ ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n",
+ run->arm_sea.gva, run->arm_sea.gpa);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA);
+
+ esr = run->arm_sea.esr;
+ TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW);
+ TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+ TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0);
+ TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0);
+ TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0);
+
+ if (!(esr & ESR_ELx_FnV)) {
+ ksft_print_msg("Expect gva to match given FnV bit is 0\n");
+ TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA);
+ }
+
+ if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) {
+ ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n");
+ TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK);
+ }
+
+ far_invalid = esr & ESR_ELx_FnV;
+
+ /* Inject a SEA into guest and expect handled in SEA handler. */
+ vcpu_inject_sea(vcpu);
+
+ /* Expect the guest to reach GUEST_DONE gracefully. */
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PRINTF:
+ ksft_print_msg("From guest: %s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ ksft_print_msg("Guest done gracefully!\n");
+ guest_done = 1;
+ break;
+ case UCALL_ABORT:
+ ksft_print_msg("Guest aborted!\n");
+ guest_done = 1;
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+ }
+ } while (!guest_done);
+}
+
+static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu)
+{
+ size_t backing_page_size;
+ size_t guest_page_size;
+ size_t alignment;
+ uint64_t num_guest_pages;
+ vm_paddr_t start_gpa;
+ enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB;
+ struct kvm_vm *vm;
+
+ backing_page_size = get_backing_src_pagesz(src_type);
+ guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ alignment = max(backing_page_size, guest_page_size);
+ num_guest_pages = VM_MEM_SIZE / guest_page_size;
+
+ vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+
+ vm_install_sync_handler(vm,
+ /*vector=*/VECTOR_SYNC_CURRENT,
+ /*ec=*/ESR_ELx_EC_DABT_CUR,
+ /*handler=*/expect_sea_handler);
+
+ start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size;
+ start_gpa = align_down(start_gpa, alignment);
+
+ vm_userspace_mem_region_add(
+ /*vm=*/vm,
+ /*src_type=*/src_type,
+ /*guest_paddr=*/start_gpa,
+ /*slot=*/1,
+ /*npages=*/num_guest_pages,
+ /*flags=*/0);
+
+ virt_map(vm, START_GVA, start_gpa, num_guest_pages);
+
+ ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n",
+ num_guest_pages, START_GVA, start_gpa);
+ return vm;
+}
+
+static void vm_inject_memory_uer(struct kvm_vm *vm)
+{
+ uint64_t guest_data;
+
+ einj_gpa = addr_gva2gpa(vm, EINJ_GVA);
+ einj_hva = addr_gva2hva(vm, EINJ_GVA);
+
+ /* Populate certain data before injecting UER. */
+ *(uint64_t *)einj_hva = 0xBAADCAFE;
+ guest_data = *(uint64_t *)einj_hva;
+ ksft_print_msg("Before EINJect: data=%#lx\n",
+ guest_data);
+
+ einj_hpa = translate_to_host_paddr((unsigned long)einj_hva);
+
+ ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n",
+ EINJ_GVA, einj_gpa, einj_hva, einj_hpa);
+
+ inject_uer(einj_hpa);
+ ksft_print_msg("Memory UER EINJected\n");
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER));
+
+ setup_sigbus_handler();
+
+ vm = vm_create_with_sea_handler(&vcpu);
+ vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0);
+ vm_inject_memory_uer(vm);
+ run_vm(vm, vcpu);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 217541fe6536..c4815d365816 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -123,20 +123,28 @@ static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
REG_FTR_END,
};
+static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0),
+ REG_FTR_END,
+};
+
static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1),
REG_FTR_END,
};
static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
@@ -146,6 +154,9 @@ static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1),
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
@@ -159,7 +170,9 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0),
REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
@@ -182,6 +195,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
REG_FTR_END,
};
+static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0),
+ REG_FTR_END,
+};
+
static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
@@ -207,11 +228,13 @@ static struct test_feature_reg test_regs[] = {
TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+ TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1),
TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+ TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1),
TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
};
@@ -224,12 +247,20 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+ GUEST_REG_SYNC(SYS_MPIDR_EL1);
+ GUEST_REG_SYNC(SYS_CLIDR_EL1);
GUEST_REG_SYNC(SYS_CTR_EL0);
+ GUEST_REG_SYNC(SYS_MIDR_EL1);
+ GUEST_REG_SYNC(SYS_REVIDR_EL1);
+ GUEST_REG_SYNC(SYS_AIDR_EL1);
GUEST_DONE();
}
@@ -237,7 +268,9 @@ static void guest_code(void)
/* Return a safe value to a given ftr_bits an ftr value */
uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+ uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
if (ftr_bits->sign == FTR_UNSIGNED) {
switch (ftr_bits->type) {
@@ -289,7 +322,9 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
/* Return an invalid value to a given ftr_bits an ftr value */
uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+ uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
if (ftr_bits->sign == FTR_UNSIGNED) {
switch (ftr_bits->type) {
@@ -537,6 +572,69 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
}
+#define MTE_IDREG_TEST 1
+static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ uint64_t val;
+ uint64_t mte;
+ uint64_t mte_frac;
+ int idx, err;
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val);
+ if (!mte) {
+ ksft_test_result_skip("MTE capability not supported, nothing to test\n");
+ return;
+ }
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+ if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) {
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n");
+ return;
+ }
+
+ /*
+ * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2)
+ * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported
+ * and MTE_frac == 0 indicates it is supported.
+ *
+ * As MTE_frac was previously unconditionally read as 0, check
+ * that the set to 0 succeeds but does not change MTE_frac
+ * from unsupported (0xF) to supported (0).
+ *
+ */
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
+ if (mte != ID_AA64PFR1_EL1_MTE_MTE2 ||
+ mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) {
+ ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n");
+ return;
+ }
+
+ /* Try to set MTE_frac=0. */
+ val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err) {
+ ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n");
+ return;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
+ if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI)
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n");
+}
+
static void test_guest_reg_read(struct kvm_vcpu *vcpu)
{
bool done = false;
@@ -578,7 +676,7 @@ static void test_clidr(struct kvm_vcpu *vcpu)
clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
/* find the first empty level in the cache hierarchy */
- for (level = 1; level < 7; level++) {
+ for (level = 1; level <= 7; level++) {
if (!CLIDR_CTYPE(clidr, level))
break;
}
@@ -609,18 +707,31 @@ static void test_ctr(struct kvm_vcpu *vcpu)
test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
}
-static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+static void test_id_reg(struct kvm_vcpu *vcpu, u32 id)
{
u64 val;
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id));
+ val++;
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val);
+ test_reg_vals[encoding_to_range_idx(id)] = val;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
test_clidr(vcpu);
test_ctr(vcpu);
- val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
- val++;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+ test_id_reg(vcpu, SYS_MPIDR_EL1);
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+ test_id_reg(vcpu, SYS_MIDR_EL1);
+ test_id_reg(vcpu, SYS_REVIDR_EL1);
+ test_id_reg(vcpu, SYS_AIDR_EL1);
- test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
ksft_test_result_pass("%s\n", __func__);
}
@@ -647,6 +758,9 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+ test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1);
ksft_test_result_pass("%s\n", __func__);
}
@@ -657,32 +771,37 @@ int main(void)
struct kvm_vm *vm;
bool aarch64_only;
uint64_t val, el0;
- int test_cnt;
+ int test_cnt, i, j;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS));
+
+ test_wants_mte();
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
/* Check for AARCH64 only system */
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP);
ksft_print_header();
- test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
- ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
- ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
- MPAM_IDREG_TEST;
+ test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST;
+ for (i = 0; i < ARRAY_SIZE(test_regs); i++)
+ for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++)
+ test_cnt++;
ksft_set_plan(test_cnt);
test_vm_ftr_id_regs(vcpu, aarch64_only);
test_vcpu_ftr_id_regs(vcpu);
+ test_vcpu_non_ftr_id_regs(vcpu);
test_user_set_mpam_reg(vcpu);
+ test_user_set_mte_reg(vcpu);
test_guest_reg_read(vcpu);
diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c
index 2d189f3da228..1763b9d45400 100644
--- a/tools/testing/selftests/kvm/arm64/smccc_filter.c
+++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c
@@ -22,8 +22,20 @@ enum smccc_conduit {
SMC_INSN,
};
+static bool test_runs_at_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ struct kvm_vcpu_init init;
+
+ kvm_get_default_vcpu_target(vm, &init);
+ kvm_vm_free(vm);
+
+ return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
+}
+
#define for_each_conduit(conduit) \
- for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
+ for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \
+ conduit <= SMC_INSN; conduit++)
static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
{
@@ -64,7 +76,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
struct kvm_vm *vm;
vm = vm_create(1);
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vm, &init);
/*
* Enable in-kernel emulation of PSCI to ensure that calls are denied
@@ -73,6 +85,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
*vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c
index b3b5fb0ff0a9..8d6d3a4ae4db 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_init.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_init.c
@@ -9,17 +9,18 @@
#include <asm/kvm.h>
#include <asm/kvm_para.h>
+#include <arm64/gic_v3.h>
+
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "vgic.h"
+#include "gic_v3.h"
#define NR_VCPUS 4
#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
-#define GICR_TYPER 0x8
-
#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
@@ -675,6 +676,44 @@ static void test_v3_its_region(void)
vm_gic_destroy(&v);
}
+static void test_v3_nassgicap(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ bool has_nassgicap;
+ struct vm_gic vm;
+ u32 typer2;
+ int ret;
+
+ vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap;
+
+ typer2 |= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ if (has_nassgicap)
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret));
+ else
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Enabled nASSGIcap even though it's unavailable");
+
+ typer2 &= ~GICD_TYPER2_nASSGIcap;
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ typer2 ^= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ TEST_ASSERT(ret && errno == EBUSY,
+ "Changed nASSGIcap after initializing the VGIC");
+
+ vm_gic_destroy(&vm);
+}
+
/*
* Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
*/
@@ -715,6 +754,220 @@ int test_kvm_device(uint32_t gic_dev_type)
return 0;
}
+struct sr_def {
+ const char *name;
+ u32 encoding;
+};
+
+#define PACK_SR(r) \
+ ((sys_reg_Op0(r) << 14) | \
+ (sys_reg_Op1(r) << 11) | \
+ (sys_reg_CRn(r) << 7) | \
+ (sys_reg_CRm(r) << 3) | \
+ (sys_reg_Op2(r)))
+
+#define SR(r) \
+ { \
+ .name = #r, \
+ .encoding = r, \
+ }
+
+static const struct sr_def sysregs_el1[] = {
+ SR(SYS_ICC_PMR_EL1),
+ SR(SYS_ICC_BPR0_EL1),
+ SR(SYS_ICC_AP0R0_EL1),
+ SR(SYS_ICC_AP0R1_EL1),
+ SR(SYS_ICC_AP0R2_EL1),
+ SR(SYS_ICC_AP0R3_EL1),
+ SR(SYS_ICC_AP1R0_EL1),
+ SR(SYS_ICC_AP1R1_EL1),
+ SR(SYS_ICC_AP1R2_EL1),
+ SR(SYS_ICC_AP1R3_EL1),
+ SR(SYS_ICC_BPR1_EL1),
+ SR(SYS_ICC_CTLR_EL1),
+ SR(SYS_ICC_SRE_EL1),
+ SR(SYS_ICC_IGRPEN0_EL1),
+ SR(SYS_ICC_IGRPEN1_EL1),
+};
+
+static const struct sr_def sysregs_el2[] = {
+ SR(SYS_ICH_AP0R0_EL2),
+ SR(SYS_ICH_AP0R1_EL2),
+ SR(SYS_ICH_AP0R2_EL2),
+ SR(SYS_ICH_AP0R3_EL2),
+ SR(SYS_ICH_AP1R0_EL2),
+ SR(SYS_ICH_AP1R1_EL2),
+ SR(SYS_ICH_AP1R2_EL2),
+ SR(SYS_ICH_AP1R3_EL2),
+ SR(SYS_ICH_HCR_EL2),
+ SR(SYS_ICC_SRE_EL2),
+ SR(SYS_ICH_VTR_EL2),
+ SR(SYS_ICH_VMCR_EL2),
+ SR(SYS_ICH_LR0_EL2),
+ SR(SYS_ICH_LR1_EL2),
+ SR(SYS_ICH_LR2_EL2),
+ SR(SYS_ICH_LR3_EL2),
+ SR(SYS_ICH_LR4_EL2),
+ SR(SYS_ICH_LR5_EL2),
+ SR(SYS_ICH_LR6_EL2),
+ SR(SYS_ICH_LR7_EL2),
+ SR(SYS_ICH_LR8_EL2),
+ SR(SYS_ICH_LR9_EL2),
+ SR(SYS_ICH_LR10_EL2),
+ SR(SYS_ICH_LR11_EL2),
+ SR(SYS_ICH_LR12_EL2),
+ SR(SYS_ICH_LR13_EL2),
+ SR(SYS_ICH_LR14_EL2),
+ SR(SYS_ICH_LR15_EL2),
+};
+
+static void test_sysreg_array(int gic, const struct sr_def *sr, int nr,
+ int (*check)(int, const struct sr_def *, const char *))
+{
+ for (int i = 0; i < nr; i++) {
+ u64 val;
+ u64 attr;
+ int ret;
+
+ /* Assume MPIDR_EL1.Aff*=0 */
+ attr = PACK_SR(sr[i].encoding);
+
+ /*
+ * The API is braindead. A register can be advertised as
+ * available, and yet not be readable or writable.
+ * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and
+ * ICH_APnR{1,2,3}_EL2 do follow suit for consistency.
+ *
+ * On the bright side, no known HW is implementing more than
+ * 5 bits of priority, so we're safe. Sort of...
+ */
+ ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr);
+ TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name);
+
+ /* Check that we can write back what we read */
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name);
+ ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name);
+ }
+}
+
+static u8 get_ctlr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICC_CTLR_EL1), &val);
+ TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable");
+
+ pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICC_AP0R1_EL1:
+ case SYS_ICC_AP1R1_EL1:
+ if (get_ctlr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICC_AP0R2_EL1:
+ case SYS_ICC_AP0R3_EL1:
+ case SYS_ICC_AP1R2_EL1:
+ case SYS_ICC_AP1R3_EL1:
+ if (get_ctlr_pribits(gic) == 7)
+ return 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static u8 get_vtr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICH_VTR_EL2), &val);
+ TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable");
+
+ pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICH_AP0R1_EL2:
+ case SYS_ICH_AP1R1_EL2:
+ if (get_vtr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICH_AP0R2_EL2:
+ case SYS_ICH_AP0R3_EL2:
+ case SYS_ICH_AP1R2_EL2:
+ case SYS_ICH_AP1R3_EL2:
+ if (get_vtr_pribits(gic) == 7)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static void test_v3_sysregs(void)
+{
+ struct kvm_vcpu_init init = {};
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ u32 feat = 0;
+ int gic;
+
+ if (kvm_check_cap(KVM_CAP_ARM_EL2))
+ feat |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+ vm = vm_create(1);
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= feat;
+
+ vcpu = aarch64_vcpu_add(vm, 0, &init, NULL);
+ TEST_ASSERT(vcpu, "Can't create a vcpu?");
+
+ gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ TEST_ASSERT(gic >= 0, "No GIC???");
+
+ kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs);
+ if (feat)
+ test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs);
+ else
+ pr_info("SKIP EL2 registers, not available\n");
+
+ close(gic);
+ kvm_vm_free(vm);
+}
+
void run_tests(uint32_t gic_dev_type)
{
test_vcpus_then_vgic(gic_dev_type);
@@ -730,6 +983,8 @@ void run_tests(uint32_t gic_dev_type)
test_v3_last_bit_single_rdist();
test_v3_redist_ipa_range_check_at_vcpu_run();
test_v3_its_region();
+ test_v3_sysregs();
+ test_v3_nassgicap();
}
}
@@ -739,6 +994,8 @@ int main(int ac, char **av)
int pa_bits;
int cnt_impl = 0;
+ test_disable_default_vgic();
+
pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
max_phys_size = 1ULL << pa_bits;
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
index f4ac28d53747..2fb2c7939fe9 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
@@ -29,6 +29,7 @@ struct test_args {
bool level_sensitive; /* 1 is level, 0 is edge */
int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+ uint32_t shared_data;
};
/*
@@ -205,7 +206,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
do { \
uint32_t _intid; \
_intid = gic_get_and_ack_irq(); \
- GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+ GUEST_ASSERT(_intid == IAR_SPURIOUS); \
} while (0)
#define CAT_HELPER(a, b) a ## b
@@ -359,8 +360,9 @@ static uint32_t wait_for_and_activate_irq(void)
* interrupts for the whole test.
*/
static void test_inject_preemption(struct test_args *args,
- uint32_t first_intid, int num,
- kvm_inject_cmd cmd)
+ uint32_t first_intid, int num,
+ const unsigned long *exclude,
+ kvm_inject_cmd cmd)
{
uint32_t intid, prio, step = KVM_PRIO_STEPS;
int i;
@@ -379,6 +381,10 @@ static void test_inject_preemption(struct test_args *args,
for (i = 0; i < num; i++) {
uint32_t tmp;
intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
KVM_INJECT(cmd, intid);
/* Each successive IRQ will preempt the previous one. */
tmp = wait_for_and_activate_irq();
@@ -390,15 +396,33 @@ static void test_inject_preemption(struct test_args *args,
/* finish handling the IRQs starting with the highest priority one. */
for (i = 0; i < num; i++) {
intid = num - i - 1 + first_intid;
+
+ if (exclude && test_bit(intid - first_intid, exclude))
+ continue;
+
gic_set_eoi(intid);
- if (args->eoi_split)
- gic_set_dir(intid);
+ }
+
+ if (args->eoi_split) {
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
}
local_irq_enable();
- for (i = 0; i < num; i++)
+ for (i = 0; i < num; i++) {
+ if (exclude && test_bit(i, exclude))
+ continue;
+
GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ }
GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
GUEST_ASSERT_IAR_EMPTY();
@@ -436,33 +460,32 @@ static void test_injection_failure(struct test_args *args,
static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
{
- /*
- * Test up to 4 levels of preemption. The reason is that KVM doesn't
- * currently implement the ability to have more than the number-of-LRs
- * number of concurrently active IRQs. The number of LRs implemented is
- * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
- */
+ /* Timer PPIs cannot be injected from userspace */
+ static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) |
+ BIT(30 - MIN_PPI) |
+ BIT(28 - MIN_PPI) |
+ BIT(26 - MIN_PPI));
+
if (f->sgi)
- test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd);
if (f->ppi)
- test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd);
if (f->spi)
- test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd);
}
static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
{
- /* Test up to 4 active IRQs. Same reason as in test_preemption. */
if (f->sgi)
- guest_restore_active(args, MIN_SGI, 4, f->cmd);
+ guest_restore_active(args, MIN_SGI, 16, f->cmd);
if (f->ppi)
- guest_restore_active(args, MIN_PPI, 4, f->cmd);
+ guest_restore_active(args, MIN_PPI, 16, f->cmd);
if (f->spi)
- guest_restore_active(args, MIN_SPI, 4, f->cmd);
+ guest_restore_active(args, MIN_SPI, 31, f->cmd);
}
static void guest_code(struct test_args *args)
@@ -473,12 +496,12 @@ static void guest_code(struct test_args *args)
gic_init(GIC_V3, 1);
- for (i = 0; i < nr_irqs; i++)
- gic_irq_enable(i);
-
for (i = MIN_SPI; i < nr_irqs; i++)
gic_irq_set_config(i, !level_sensitive);
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
gic_set_eoi_split(args->eoi_split);
reset_priorities(args);
@@ -620,18 +643,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
* that no actual interrupt was injected for those cases.
*/
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- fd[f] = eventfd(0, 0);
- TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
- }
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ fd[f] = kvm_new_eventfd();
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- struct kvm_irqfd irqfd = {
- .fd = fd[f],
- .gsi = i - MIN_SPI,
- };
assert(i <= (uint64_t)UINT_MAX);
- vm_ioctl(vm, KVM_IRQFD, &irqfd);
+ kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]);
}
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
@@ -642,7 +659,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
}
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
- close(fd[f]);
+ kvm_close(fd[f]);
}
/* handles the valid case: intid=0xffffffff num=1 */
@@ -758,7 +775,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
vcpu_args_set(vcpu, 1, args_gva);
gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
guest_irq_handlers[args.eoi_split][args.level_sensitive]);
@@ -786,6 +802,221 @@ done:
kvm_vm_free(vm);
}
+static void guest_code_asym_dir(struct test_args *args, int cpuid)
+{
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ if (cpuid == 0) {
+ uint32_t intid;
+
+ local_irq_disable();
+
+ gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO);
+ gic_irq_enable(MIN_SPI);
+ gic_irq_set_pending(MIN_SPI);
+
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(intid, MIN_SPI);
+
+ gic_set_eoi(intid);
+ isb();
+
+ WRITE_ONCE(args->shared_data, MIN_SPI);
+ dsb(ishst);
+
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) == MIN_SPI);
+ GUEST_ASSERT(!gic_irq_get_active(MIN_SPI));
+ } else {
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) != MIN_SPI);
+
+ gic_set_dir(MIN_SPI);
+ isb();
+
+ WRITE_ONCE(args->shared_data, 0);
+ dsb(ishst);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_group_en(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(0);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+ /* SGI0 is G0, which is disabled */
+ gic_irq_set_group(0, 0);
+
+ /* Configure all SGIs with decreasing priority */
+ for (intid = 0; intid < MIN_PPI; intid++) {
+ gic_set_priority(intid, (intid + 1) * 8);
+ gic_irq_enable(intid);
+ gic_irq_set_pending(intid);
+ }
+
+ /* Ack and EOI all G1 interrupts */
+ for (int i = 1; i < MIN_PPI; i++) {
+ intid = wait_for_and_activate_irq();
+
+ GUEST_ASSERT(intid < MIN_PPI);
+ gic_set_eoi(intid);
+ isb();
+ }
+
+ /*
+ * Check that SGI0 is still pending, inactive, and that we cannot
+ * ack anything.
+ */
+ GUEST_ASSERT(gic_irq_get_pending(0));
+ GUEST_ASSERT(!gic_irq_get_active(0));
+ GUEST_ASSERT_IAR_EMPTY();
+ GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS);
+
+ /* Open the G0 gates, and verify we can ack SGI0 */
+ write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1);
+ isb();
+
+ do {
+ intid = read_sysreg_s(SYS_ICC_IAR0_EL1);
+ } while (intid == IAR_SPURIOUS);
+
+ GUEST_ASSERT(intid == 0);
+ GUEST_DONE();
+}
+
+static void guest_code_timer_spi(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+ u64 val;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ /* Add a pending SPI so that KVM starts trapping DIR */
+ gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO);
+ gic_irq_set_pending(MIN_SPI + cpuid);
+
+ /* Configure the timer with a higher priority, make it pending */
+ gic_set_priority(27, IRQ_DEFAULT_PRIO - 8);
+
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* Enable both interrupts */
+ gic_irq_enable(MIN_SPI + cpuid);
+ gic_irq_enable(27);
+
+ /* The timer must fire */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ /* Check that we can deassert it */
+ write_sysreg(0, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(!gic_irq_get_pending(27));
+
+ /*
+ * Priority drop, deactivation -- we expect that the host
+ * deactivation will have been effective
+ */
+ gic_set_eoi(27);
+ gic_set_dir(27);
+
+ GUEST_ASSERT(!gic_irq_get_active(27));
+
+ /* Do it one more time */
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* The timer must fire again */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ GUEST_DONE();
+}
+
+static void *test_vcpu_run(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void test_vgic_two_cpus(void *gcode)
+{
+ pthread_t thr[2];
+ struct kvm_vcpu *vcpus[2];
+ struct test_args args = {};
+ struct kvm_vm *vm;
+ vm_vaddr_t args_gva;
+ int gic_fd, ret;
+
+ vm = vm_create_with_vcpus(2, gcode, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpus[0]);
+ vcpu_init_descriptor_tables(vcpus[1]);
+
+ /* Setup the guest args page (so it gets the args). */
+ args_gva = vm_vaddr_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vcpus[0], 2, args_gva, 0);
+ vcpu_args_set(vcpus[1], 2, args_gva, 1);
+
+ gic_fd = vgic_v3_setup(vm, 2, 64);
+
+ ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret);
+ ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret);
+
+ pthread_join(thr[0], NULL);
+ pthread_join(thr[1], NULL);
+
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
static void help(const char *name)
{
printf(
@@ -808,6 +1039,9 @@ int main(int argc, char **argv)
int opt;
bool eoi_split = false;
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+ test_disable_default_vgic();
+
while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
switch (opt) {
case 'n':
@@ -839,6 +1073,9 @@ int main(int argc, char **argv)
test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ test_vgic_two_cpus(guest_code_asym_dir);
+ test_vgic_two_cpus(guest_code_group_en);
+ test_vgic_two_cpus(guest_code_timer_spi);
} else {
test_vgic(nr_irqs, level_sensitive, eoi_split);
}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index fc4fe52fb6f8..e857a605f577 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -27,7 +27,7 @@ static vm_paddr_t gpa_base;
static struct kvm_vm *vm;
static struct kvm_vcpu **vcpus;
-static int gic_fd, its_fd;
+static int its_fd;
static struct test_data {
bool request_vcpus_stop;
@@ -118,11 +118,16 @@ static void guest_setup_gic(void)
guest_setup_its_mappings();
guest_invalidate_all_rdists();
+
+ /* SYNC to ensure ITS setup is complete */
+ for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
+ its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
}
static void guest_code(size_t nr_lpis)
{
guest_setup_gic();
+ local_irq_enable();
GUEST_SYNC(0);
@@ -214,9 +219,6 @@ static void setup_test_data(void)
static void setup_gic(void)
{
- gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
-
its_fd = vgic_its_setup(vm);
}
@@ -334,7 +336,7 @@ static void setup_vm(void)
{
int i;
- vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+ vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));
TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
@@ -355,7 +357,6 @@ static void setup_vm(void)
static void destroy_vm(void)
{
close(its_fd);
- close(gic_fd);
kvm_vm_free(vm);
free(vcpus);
}
@@ -374,6 +375,8 @@ int main(int argc, char **argv)
u32 nr_threads;
int c;
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
switch (c) {
case 'v':
diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
index f16b3b27e32e..ae36325c022f 100644
--- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
@@ -28,7 +28,6 @@
struct vpmu_vm {
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
- int gic_fd;
};
static struct vpmu_vm vpmu_vm;
@@ -45,11 +44,6 @@ static uint64_t get_pmcr_n(uint64_t pmcr)
return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
}
-static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
-{
- u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
-}
-
static uint64_t get_counters_mask(uint64_t n)
{
uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
@@ -415,10 +409,6 @@ static void create_vpmu_vm(void *guest_code)
.attr = KVM_ARM_VCPU_PMU_V3_IRQ,
.addr = (uint64_t)&irq,
};
- struct kvm_device_attr init_attr = {
- .group = KVM_ARM_VCPU_PMU_V3_CTRL,
- .attr = KVM_ARM_VCPU_PMU_V3_INIT,
- };
/* The test creates the vpmu_vm multiple times. Ensure a clean state */
memset(&vpmu_vm, 0, sizeof(vpmu_vm));
@@ -431,29 +421,25 @@ static void create_vpmu_vm(void *guest_code)
}
/* Create vCPU with PMUv3 */
- vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
vcpu_init_descriptor_tables(vpmu_vm.vcpu);
- vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
- __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
- "Failed to create vgic-v3, skipping");
+
+ kvm_arch_vm_finalize_vcpus(vpmu_vm.vm);
/* Make sure that PMUv3 support is indicated in the ID register */
dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
- pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+ pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0);
TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
"Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
- /* Initialize vPMU */
vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
- vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
}
static void destroy_vpmu_vm(void)
{
- close(vpmu_vm.gic_fd);
kvm_vm_free(vpmu_vm.vm);
}
@@ -475,33 +461,28 @@ static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
}
}
-static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
+static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail)
{
struct kvm_vcpu *vcpu;
- uint64_t pmcr, pmcr_orig;
+ unsigned int prev;
+ int ret;
create_vpmu_vm(guest_code);
vcpu = vpmu_vm.vcpu;
- pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
- pmcr = pmcr_orig;
+ prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)));
- /*
- * Setting a larger value of PMCR.N should not modify the field, and
- * return a success.
- */
- set_pmcr_n(&pmcr, pmcr_n);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
- pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+ ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
+ KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters);
if (expect_fail)
- TEST_ASSERT(pmcr_orig == pmcr,
- "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
- pmcr, pmcr_n);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded",
+ nr_counters, prev);
else
- TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
- "Failed to update PMCR.N to %lu (received: %lu)",
- pmcr_n, get_pmcr_n(pmcr));
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
+
+ vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL);
}
/*
@@ -516,11 +497,11 @@ static void run_access_test(uint64_t pmcr_n)
pr_debug("Test with pmcr_n %lu\n", pmcr_n);
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
vcpu = vpmu_vm.vcpu;
/* Save the initial sp to restore them later to run the guest again */
- sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
+ sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1));
run_vcpu(vcpu, pmcr_n);
@@ -528,11 +509,11 @@ static void run_access_test(uint64_t pmcr_n)
* Reset and re-initialize the vCPU, and run the guest code again to
* check if PMCR_EL0.N is preserved.
*/
- vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
aarch64_vcpu_setup(vcpu, &init);
vcpu_init_descriptor_tables(vcpu);
- vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp);
vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
run_vcpu(vcpu, pmcr_n);
@@ -557,7 +538,7 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
uint64_t set_reg_id, clr_reg_id, reg_val;
uint64_t valid_counters_mask, max_counters_mask;
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
vcpu = vpmu_vm.vcpu;
valid_counters_mask = get_counters_mask(pmcr_n);
@@ -611,7 +592,7 @@ static void run_error_test(uint64_t pmcr_n)
{
pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, true);
destroy_vpmu_vm();
}
@@ -629,11 +610,25 @@ static uint64_t get_pmcr_n_limit(void)
return get_pmcr_n(pmcr);
}
+static bool kvm_supports_nr_counters_attr(void)
+{
+ bool supported;
+
+ create_vpmu_vm(NULL);
+ supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
+ KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS);
+ destroy_vpmu_vm();
+
+ return supported;
+}
+
int main(void)
{
uint64_t i, pmcr_n;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+ TEST_REQUIRE(kvm_supports_nr_counters_attr());
pmcr_n = get_pmcr_n_limit();
for (i = 0; i <= pmcr_n; i++) {
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
index 8835fed09e9f..96d874b239eb 100644
--- a/tools/testing/selftests/kvm/config
+++ b/tools/testing/selftests/kvm/config
@@ -1,5 +1,6 @@
CONFIG_KVM=y
CONFIG_KVM_INTEL=y
CONFIG_KVM_AMD=y
+CONFIG_EVENTFD=y
CONFIG_USERFAULTFD=y
CONFIG_IDLE_PAGE_TRACKING=y
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index e79817bd0e29..0a1ea1d1e2d8 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -20,38 +20,6 @@
#include "guest_modes.h"
#include "ucall_common.h"
-#ifdef __aarch64__
-#include "arm64/vgic.h"
-
-static int gic_fd;
-
-static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
-{
- /*
- * The test can still run even if hardware does not support GICv3, as it
- * is only an optimization to reduce guest exits.
- */
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
-}
-
-static void arch_cleanup_vm(struct kvm_vm *vm)
-{
- if (gic_fd > 0)
- close(gic_fd);
-}
-
-#else /* __aarch64__ */
-
-static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
-{
-}
-
-static void arch_cleanup_vm(struct kvm_vm *vm)
-{
-}
-
-#endif
-
/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
#define TEST_HOST_LOOP_N 2UL
@@ -166,8 +134,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
dirty_log_manual_caps);
- arch_setup_vm(vm, nr_vcpus);
-
/* Start the iterations */
iteration = 0;
host_quit = false;
@@ -285,7 +251,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
memstress_free_bitmaps(bitmaps, p->slots);
- arch_cleanup_vm(vm);
memstress_destroy_vm(vm);
}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index aacf80f57439..d58a641b0e6a 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -31,15 +31,18 @@
/* Default guest test virtual memory offset */
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
-/* How many pages to dirty for each guest loop */
-#define TEST_PAGES_PER_LOOP 1024
-
/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
#define TEST_HOST_LOOP_N 32UL
/* Interval for each host loop (ms) */
#define TEST_HOST_LOOP_INTERVAL 10UL
+/*
+ * Ensure the vCPU is able to perform a reasonable number of writes in each
+ * iteration to provide a lower bound on coverage.
+ */
+#define TEST_MIN_WRITES_PER_ITERATION 0x100
+
/* Dirty bitmaps are always little endian, so we need to swap on big endian */
#if defined(__s390x__)
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
@@ -75,6 +78,8 @@ static uint64_t host_page_size;
static uint64_t guest_page_size;
static uint64_t guest_num_pages;
static uint64_t iteration;
+static uint64_t nr_writes;
+static bool vcpu_stop;
/*
* Guest physical memory offset of the testing memory slot.
@@ -96,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
static void guest_code(void)
{
uint64_t addr;
- int i;
+
+#ifdef __s390x__
+ uint64_t i;
/*
* On s390x, all pages of a 1M segment are initially marked as dirty
@@ -107,16 +114,19 @@ static void guest_code(void)
for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
+#endif
while (true) {
- for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+ while (!READ_ONCE(vcpu_stop)) {
addr = guest_test_virt_mem;
addr += (guest_random_u64(&guest_rng) % guest_num_pages)
* guest_page_size;
addr = align_down(addr, host_page_size);
vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
GUEST_SYNC(1);
@@ -133,25 +143,18 @@ static uint64_t host_num_pages;
/* For statistics only */
static uint64_t host_dirty_count;
static uint64_t host_clear_count;
-static uint64_t host_track_next_count;
/* Whether dirty ring reset is requested, or finished */
static sem_t sem_vcpu_stop;
static sem_t sem_vcpu_cont;
-/*
- * This is only set by main thread, and only cleared by vcpu thread. It is
- * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC
- * is the only place that we'll guarantee both "dirty bit" and "dirty data"
- * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted
- * after setting dirty bit but before the data is written.
- */
-static atomic_t vcpu_sync_stop_requested;
+
/*
* This is updated by the vcpu thread to tell the host whether it's a
* ring-full event. It should only be read until a sem_wait() of
* sem_vcpu_stop and before vcpu continues to run.
*/
static bool dirty_ring_vcpu_ring_full;
+
/*
* This is only used for verifying the dirty pages. Dirty ring has a very
* tricky case when the ring just got full, kvm will do userspace exit due to
@@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full;
* dirty gfn we've collected, so that if a mismatch of data found later in the
* verifying process, we let it pass.
*/
-static uint64_t dirty_ring_last_page;
+static uint64_t dirty_ring_last_page = -1ULL;
+
+/*
+ * In addition to the above, it is possible (especially if this
+ * test is run nested) for the above scenario to repeat multiple times:
+ *
+ * The following can happen:
+ *
+ * - L1 vCPU: Memory write is logged to PML but not committed.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT
+ * - Issues tlb flush (invept), which is intercepted by L0
+ *
+ * - L0: frees the whole nested ept mmu root as the response to invept,
+ * and thus ensures that when memory write is retried, it will fault again
+ *
+ * - L1 vCPU: Same memory write is logged to the PML but not committed again.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry (again)
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT (again)
+ * - Issues tlb flush (again) which is intercepted by L0
+ *
+ * ...
+ *
+ * N times
+ *
+ * - L1 vCPU: Memory write is logged in the PML and then committed.
+ * Lots of other memory writes are logged and committed.
+ * ...
+ *
+ * - L1 test thread: Sees the memory write along with other memory writes
+ * in the dirty ring, and since the write is usually not
+ * the last entry in the dirty-ring and has a very outdated
+ * iteration, the test fails.
+ *
+ *
+ * Note that this is only possible when the write was the last log entry
+ * write during iteration N-1, thus remember last iteration last log entry
+ * and also don't fail when it is reported in the next iteration, together with
+ * an outdated iteration count.
+ */
+static uint64_t dirty_ring_prev_iteration_last_page;
enum log_mode_t {
/* Only use KVM_GET_DIRTY_LOG for logging */
@@ -191,24 +238,6 @@ static enum log_mode_t host_log_mode;
static pthread_t vcpu_thread;
static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
-static void vcpu_kick(void)
-{
- pthread_kill(vcpu_thread, SIG_IPI);
-}
-
-/*
- * In our test we do signal tricks, let's use a better version of
- * sem_wait to avoid signal interrupts
- */
-static void sem_wait_until(sem_t *sem)
-{
- int ret;
-
- do
- ret = sem_wait(sem);
- while (ret == -1 && errno == EINTR);
-}
-
static bool clear_log_supported(void)
{
return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
@@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
/* Should only be called after a GUEST_SYNC */
static void vcpu_handle_sync_stop(void)
{
- if (atomic_read(&vcpu_sync_stop_requested)) {
- /* It means main thread is sleeping waiting */
- atomic_set(&vcpu_sync_stop_requested, false);
+ if (READ_ONCE(vcpu_stop)) {
sem_post(&sem_vcpu_stop);
- sem_wait_until(&sem_vcpu_cont);
+ sem_wait(&sem_vcpu_cont);
}
}
-static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void default_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
- TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
- "vcpu run failed: errno=%d", err);
-
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
"Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
@@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
"%u != %u", cur->slot, slot);
TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
"0x%llx >= 0x%x", cur->offset, num_pages);
- //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
__set_bit_le(cur->offset, bitmap);
dirty_ring_last_page = cur->offset;
dirty_gfn_set_collected(cur);
@@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
return count;
}
-static void dirty_ring_wait_vcpu(void)
-{
- /* This makes sure that hardware PML cache flushed */
- vcpu_kick();
- sem_wait_until(&sem_vcpu_stop);
-}
-
-static void dirty_ring_continue_vcpu(void)
-{
- pr_info("Notifying vcpu to continue\n");
- sem_post(&sem_vcpu_cont);
-}
-
static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
void *bitmap, uint32_t num_pages,
uint32_t *ring_buf_idx)
{
- uint32_t count = 0, cleared;
- bool continued_vcpu = false;
-
- dirty_ring_wait_vcpu();
-
- if (!dirty_ring_vcpu_ring_full) {
- /*
- * This is not a ring-full event, it's safe to allow
- * vcpu to continue
- */
- dirty_ring_continue_vcpu();
- continued_vcpu = true;
- }
+ uint32_t count, cleared;
/* Only have one vcpu */
count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
@@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
*/
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
"with collected (%u)", cleared, count);
-
- if (!continued_vcpu) {
- TEST_ASSERT(dirty_ring_vcpu_ring_full,
- "Didn't continue vcpu even without ring full");
- dirty_ring_continue_vcpu();
- }
-
- pr_info("Iteration %ld collected %u pages\n", iteration, count);
}
-static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
/* A ucall-sync or ring-full event is allowed */
if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
- /* We should allow this to continue */
- ;
- } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
- (ret == -1 && err == EINTR)) {
- /* Update the flag first before pause */
- WRITE_ONCE(dirty_ring_vcpu_ring_full,
- run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
- sem_post(&sem_vcpu_stop);
- pr_info("vcpu stops because %s...\n",
- dirty_ring_vcpu_ring_full ?
- "dirty ring is full" : "vcpu is kicked out");
- sem_wait_until(&sem_vcpu_cont);
- pr_info("vcpu continues now.\n");
+ vcpu_handle_sync_stop();
+ } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, true);
+ vcpu_handle_sync_stop();
} else {
TEST_ASSERT(false, "Invalid guest sync status: "
"exit_reason=%s",
@@ -426,7 +407,7 @@ struct log_mode {
void *bitmap, uint32_t num_pages,
uint32_t *ring_buf_idx);
/* Hook to call when after each vcpu run */
- void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
+ void (*after_vcpu_run)(struct kvm_vcpu *vcpu);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
@@ -449,15 +430,6 @@ struct log_mode {
},
};
-/*
- * We use this bitmap to track some pages that should have its dirty
- * bit set in the _next_ iteration. For example, if we detected the
- * page value changed to current iteration but at the same time the
- * page bit is cleared in the latest bitmap, then the system must
- * report that write in the next get dirty log call.
- */
-static unsigned long *host_bmap_track;
-
static void log_modes_dump(void)
{
int i;
@@ -497,170 +469,109 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
}
-static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->after_vcpu_run)
- mode->after_vcpu_run(vcpu, ret, err);
+ mode->after_vcpu_run(vcpu);
}
static void *vcpu_worker(void *data)
{
- int ret;
struct kvm_vcpu *vcpu = data;
- uint64_t pages_count = 0;
- struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
- + sizeof(sigset_t));
- sigset_t *sigset = (sigset_t *) &sigmask->sigset;
- /*
- * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the
- * ioctl to return with -EINTR, but it is still pending and we need
- * to accept it with the sigwait.
- */
- sigmask->len = 8;
- pthread_sigmask(0, NULL, sigset);
- sigdelset(sigset, SIG_IPI);
- vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask);
-
- sigemptyset(sigset);
- sigaddset(sigset, SIG_IPI);
+ sem_wait(&sem_vcpu_cont);
while (!READ_ONCE(host_quit)) {
- /* Clear any existing kick signals */
- pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = __vcpu_run(vcpu);
- if (ret == -1 && errno == EINTR) {
- int sig = -1;
- sigwait(sigset, &sig);
- assert(sig == SIG_IPI);
- }
- log_mode_after_vcpu_run(vcpu, ret, errno);
+ vcpu_run(vcpu);
+ log_mode_after_vcpu_run(vcpu);
}
- pr_info("Dirtied %"PRIu64" pages\n", pages_count);
-
return NULL;
}
-static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap)
{
+ uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0;
uint64_t step = vm_num_host_pages(mode, 1);
- uint64_t page;
- uint64_t *value_ptr;
- uint64_t min_iter = 0;
for (page = 0; page < host_num_pages; page += step) {
- value_ptr = host_test_mem + page * host_page_size;
-
- /* If this is a special page that we were tracking... */
- if (__test_and_clear_bit_le(page, host_bmap_track)) {
- host_track_next_count++;
- TEST_ASSERT(test_bit_le(page, bmap),
- "Page %"PRIu64" should have its dirty bit "
- "set in this iteration but it is missing",
- page);
- }
+ uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size);
+ bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]);
- if (__test_and_clear_bit_le(page, bmap)) {
- bool matched;
-
- host_dirty_count++;
+ /*
+ * Ensure both bitmaps are cleared, as a page can be written
+ * multiple times per iteration, i.e. can show up in both
+ * bitmaps, and the dirty ring is additive, i.e. doesn't purge
+ * bitmap entries from previous collections.
+ */
+ if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) {
+ nr_dirty_pages++;
/*
- * If the bit is set, the value written onto
- * the corresponding page should be either the
- * previous iteration number or the current one.
+ * If the page is dirty, the value written to memory
+ * should be the current iteration number.
*/
- matched = (*value_ptr == iteration ||
- *value_ptr == iteration - 1);
-
- if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
- if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
- /*
- * Short answer: this case is special
- * only for dirty ring test where the
- * page is the last page before a kvm
- * dirty ring full in iteration N-2.
- *
- * Long answer: Assuming ring size R,
- * one possible condition is:
- *
- * main thr vcpu thr
- * -------- --------
- * iter=1
- * write 1 to page 0~(R-1)
- * full, vmexit
- * collect 0~(R-1)
- * kick vcpu
- * write 1 to (R-1)~(2R-2)
- * full, vmexit
- * iter=2
- * collect (R-1)~(2R-2)
- * kick vcpu
- * write 1 to (2R-2)
- * (NOTE!!! "1" cached in cpu reg)
- * write 2 to (2R-1)~(3R-3)
- * full, vmexit
- * iter=3
- * collect (2R-2)~(3R-3)
- * (here if we read value on page
- * "2R-2" is 1, while iter=3!!!)
- *
- * This however can only happen once per iteration.
- */
- min_iter = iteration - 1;
+ if (val == iteration)
+ continue;
+
+ if (host_log_mode == LOG_MODE_DIRTY_RING) {
+ /*
+ * The last page in the ring from previous
+ * iteration can be written with the value
+ * from the previous iteration, as the value to
+ * be written may be cached in a CPU register.
+ */
+ if (page == dirty_ring_prev_iteration_last_page &&
+ val == iteration - 1)
continue;
- } else if (page == dirty_ring_last_page) {
- /*
- * Please refer to comments in
- * dirty_ring_last_page.
- */
+
+ /*
+ * Any value from a previous iteration is legal
+ * for the last entry, as the write may not yet
+ * have retired, i.e. the page may hold whatever
+ * it had before this iteration started.
+ */
+ if (page == dirty_ring_last_page &&
+ val < iteration)
continue;
- }
+ } else if (!val && iteration == 1 && bmap0_dirty) {
+ /*
+ * When testing get+clear, the dirty bitmap
+ * starts with all bits set, and so the first
+ * iteration can observe a "dirty" page that
+ * was never written, but only in the first
+ * bitmap (collecting the bitmap also clears
+ * all dirty pages).
+ */
+ continue;
}
- TEST_ASSERT(matched,
- "Set page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
+ TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
} else {
- host_clear_count++;
+ nr_clean_pages++;
/*
* If cleared, the value written can be any
- * value smaller or equals to the iteration
- * number. Note that the value can be exactly
- * (iteration-1) if that write can happen
- * like this:
- *
- * (1) increase loop count to "iteration-1"
- * (2) write to page P happens (with value
- * "iteration-1")
- * (3) get dirty log for "iteration-1"; we'll
- * see that page P bit is set (dirtied),
- * and not set the bit in host_bmap_track
- * (4) increase loop count to "iteration"
- * (which is current iteration)
- * (5) get dirty log for current iteration,
- * we'll see that page P is cleared, with
- * value "iteration-1".
+ * value smaller than the iteration number.
*/
- TEST_ASSERT(*value_ptr <= iteration,
- "Clear page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
- if (*value_ptr == iteration) {
- /*
- * This page is _just_ modified; it
- * should report its dirtyness in the
- * next run
- */
- __set_bit_le(page, host_bmap_track);
- }
+ TEST_ASSERT(val < iteration,
+ "Clear page %lu value (%lu) >= iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
}
}
+
+ pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n",
+ iteration, nr_dirty_pages, nr_clean_pages, nr_writes);
+
+ host_dirty_count += nr_dirty_pages;
+ host_clear_count += nr_clean_pages;
}
static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
@@ -674,6 +585,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
log_mode_create_vm_done(vm);
*vcpu = vm_vcpu_add(vm, 0, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
@@ -688,7 +600,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct test_params *p = arg;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- unsigned long *bmap;
+ unsigned long *bmap[2];
uint32_t ring_buf_idx = 0;
int sem_val;
@@ -731,12 +643,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
#ifdef __s390x__
/* Align to 1M (segment size) */
guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
+
+ /*
+ * The workaround in guest_code() to write all pages prior to the first
+ * iteration isn't compatible with the dirty ring, as the dirty ring
+ * support relies on the vCPU to actually stop when vcpu_stop is set so
+ * that the vCPU doesn't hang waiting for the dirty ring to be emptied.
+ */
+ TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING,
+ "Test needs to be updated to support s390 dirty ring");
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
- bmap = bitmap_zalloc(host_num_pages);
- host_bmap_track = bitmap_zalloc(host_num_pages);
+ bmap[0] = bitmap_zalloc(host_num_pages);
+ bmap[1] = bitmap_zalloc(host_num_pages);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -757,14 +678,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sync_global_to_guest(vm, guest_test_virt_mem);
sync_global_to_guest(vm, guest_num_pages);
- /* Start the iterations */
- iteration = 1;
- sync_global_to_guest(vm, iteration);
- WRITE_ONCE(host_quit, false);
host_dirty_count = 0;
host_clear_count = 0;
- host_track_next_count = 0;
- WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+ WRITE_ONCE(host_quit, false);
/*
* Ensure the previous iteration didn't leave a dangling semaphore, i.e.
@@ -776,21 +692,95 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sem_getvalue(&sem_vcpu_cont, &sem_val);
TEST_ASSERT_EQ(sem_val, 0);
+ TEST_ASSERT_EQ(vcpu_stop, false);
+
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
- while (iteration < p->iterations) {
- /* Give the vcpu thread some time to dirty some pages */
- usleep(p->interval * 1000);
- log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
- bmap, host_num_pages,
- &ring_buf_idx);
+ for (iteration = 1; iteration <= p->iterations; iteration++) {
+ unsigned long i;
+
+ sync_global_to_guest(vm, iteration);
+
+ WRITE_ONCE(nr_writes, 0);
+ sync_global_to_guest(vm, nr_writes);
+
+ dirty_ring_prev_iteration_last_page = dirty_ring_last_page;
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+
+ sem_post(&sem_vcpu_cont);
+
+ /*
+ * Let the vCPU run beyond the configured interval until it has
+ * performed the minimum number of writes. This verifies the
+ * guest is making forward progress, e.g. isn't stuck because
+ * of a KVM bug, and puts a firm floor on test coverage.
+ */
+ for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) {
+ /*
+ * Sleep in 1ms chunks to keep the interval math simple
+ * and so that the test doesn't run too far beyond the
+ * specified interval.
+ */
+ usleep(1000);
+
+ sync_global_from_guest(vm, nr_writes);
+
+ /*
+ * Reap dirty pages while the guest is running so that
+ * dirty ring full events are resolved, i.e. so that a
+ * larger interval doesn't always end up with a vCPU
+ * that's effectively blocked. Collecting while the
+ * guest is running also verifies KVM doesn't lose any
+ * state.
+ *
+ * For bitmap modes, KVM overwrites the entire bitmap,
+ * i.e. collecting the bitmaps is destructive. Collect
+ * the bitmap only on the first pass, otherwise this
+ * test would lose track of dirty pages.
+ */
+ if (i && host_log_mode != LOG_MODE_DIRTY_RING)
+ continue;
+
+ /*
+ * For the dirty ring, empty the ring on subsequent
+ * passes only if the ring was filled at least once,
+ * to verify KVM's handling of a full ring (emptying
+ * the ring on every pass would make it unlikely the
+ * vCPU would ever fill the fing).
+ */
+ if (i && !READ_ONCE(dirty_ring_vcpu_ring_full))
+ continue;
+
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[0], host_num_pages,
+ &ring_buf_idx);
+ }
+
+ /*
+ * Stop the vCPU prior to collecting and verifying the dirty
+ * log. If the vCPU is allowed to run during collection, then
+ * pages that are written during this iteration may be missed,
+ * i.e. collected in the next iteration. And if the vCPU is
+ * writing memory during verification, pages that this thread
+ * sees as clean may be written with this iteration's value.
+ */
+ WRITE_ONCE(vcpu_stop, true);
+ sync_global_to_guest(vm, vcpu_stop);
+ sem_wait(&sem_vcpu_stop);
/*
- * See vcpu_sync_stop_requested definition for details on why
- * we need to stop vcpu when verify data.
+ * Clear vcpu_stop after the vCPU thread has acknowledge the
+ * stop request and is waiting, i.e. is definitely not running!
*/
- atomic_set(&vcpu_sync_stop_requested, true);
- sem_wait_until(&sem_vcpu_stop);
+ WRITE_ONCE(vcpu_stop, false);
+ sync_global_to_guest(vm, vcpu_stop);
+
+ /*
+ * Sync the number of writes performed before verification, the
+ * info will be printed along with the dirty/clean page counts.
+ */
+ sync_global_from_guest(vm, nr_writes);
+
/*
* NOTE: for dirty ring, it's possible that we didn't stop at
* GUEST_SYNC but instead we stopped because ring is full;
@@ -798,32 +788,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* the flush of the last page, and since we handle the last
* page specially verification will succeed anyway.
*/
- assert(host_log_mode == LOG_MODE_DIRTY_RING ||
- atomic_read(&vcpu_sync_stop_requested) == false);
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[1], host_num_pages,
+ &ring_buf_idx);
vm_dirty_log_verify(mode, bmap);
-
- /*
- * Set host_quit before sem_vcpu_cont in the final iteration to
- * ensure that the vCPU worker doesn't resume the guest. As
- * above, the dirty ring test may stop and wait even when not
- * explicitly request to do so, i.e. would hang waiting for a
- * "continue" if it's allowed to resume the guest.
- */
- if (++iteration == p->iterations)
- WRITE_ONCE(host_quit, true);
-
- sem_post(&sem_vcpu_cont);
- sync_global_to_guest(vm, iteration);
}
+ WRITE_ONCE(host_quit, true);
+ sem_post(&sem_vcpu_cont);
+
pthread_join(vcpu_thread, NULL);
- pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
- "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
- host_track_next_count);
+ pr_info("Total bits checked: dirty (%lu), clear (%lu)\n",
+ host_dirty_count, host_clear_count);
- free(bmap);
- free(host_bmap_track);
+ free(bmap[0]);
+ free(bmap[1]);
kvm_vm_free(vm);
}
@@ -857,7 +837,6 @@ int main(int argc, char *argv[])
.interval = TEST_HOST_LOOP_INTERVAL,
};
int opt, i;
- sigset_t sigset;
sem_init(&sem_vcpu_stop, 0, 0);
sem_init(&sem_vcpu_cont, 0, 0);
@@ -908,19 +887,12 @@ int main(int argc, char *argv[])
}
}
- TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+ TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero");
TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
p.iterations, p.interval);
- srandom(time(0));
-
- /* Ensure that vCPU threads start with SIG_IPI blocked. */
- sigemptyset(&sigset);
- sigaddset(&sigset, SIG_IPI);
- pthread_sigmask(SIG_BLOCK, &sigset, NULL);
-
if (host_log_mode_option == LOG_MODE_ALL) {
/* Run each log mode */
for (i = 0; i < LOG_MODE_NUM; i++) {
diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c
index 91f05f78e824..f4644c9d2d3b 100644
--- a/tools/testing/selftests/kvm/get-reg-list.c
+++ b/tools/testing/selftests/kvm/get-reg-list.c
@@ -116,10 +116,13 @@ void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
}
#ifdef __aarch64__
-static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init)
+static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c,
+ struct kvm_vcpu_init *init)
{
struct vcpu_reg_sublist *s;
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init);
+
for_each_sublist(c, s)
if (s->capability)
init->features[s->feature / 32] |= 1 << (s->feature % 32);
@@ -127,10 +130,10 @@ static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *ini
static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
{
- struct kvm_vcpu_init init = { .target = -1, };
+ struct kvm_vcpu_init init;
struct kvm_vcpu *vcpu;
- prepare_vcpu_init(c, &init);
+ prepare_vcpu_init(vm, c, &init);
vcpu = __vm_vcpu_add(vm, 0);
aarch64_vcpu_setup(vcpu, &init);
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index ce687f8d248f..618c937f3c90 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -13,14 +13,19 @@
#include <linux/bitmap.h>
#include <linux/falloc.h>
+#include <linux/sizes.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "kvm_util.h"
+#include "numaif.h"
#include "test_util.h"
+#include "ucall_common.h"
-static void test_file_read_write(int fd)
+static size_t page_size;
+
+static void test_file_read_write(int fd, size_t total_size)
{
char buf[64];
@@ -34,15 +39,177 @@ static void test_file_read_write(int fd)
"pwrite on a guest_mem fd should fail");
}
-static void test_mmap(int fd, size_t page_size)
+static void test_mmap_cow(int fd, size_t size)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
+}
+
+static void test_mmap_supported(int fd, size_t total_size)
+{
+ const char val = 0xaa;
+ char *mem;
+ size_t i;
+ int ret;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ memset(mem, val, total_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0,
+ page_size);
+ TEST_ASSERT(!ret, "fallocate the first page should succeed.");
+
+ for (i = 0; i < page_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00);
+ for (; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ memset(mem, val, page_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_mbind(int fd, size_t total_size)
+{
+ const unsigned long nodemask_0 = 1; /* nid: 0 */
+ unsigned long nodemask = 0;
+ unsigned long maxnode = 8;
+ int policy;
+ char *mem;
+ int ret;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ /* Test MPOL_INTERLEAVE policy */
+ kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,
+ "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_INTERLEAVE, nodemask_0, policy, nodemask);
+
+ /* Test basic MPOL_BIND policy */
+ kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,
+ "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_BIND, nodemask_0, policy, nodemask);
+
+ /* Test MPOL_DEFAULT policy */
+ kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,
+ "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",
+ MPOL_DEFAULT, policy, nodemask);
+
+ /* Test with invalid policy */
+ ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "mbind with invalid policy should fail with EINVAL");
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_numa_allocation(int fd, size_t total_size)
+{
+ unsigned long node0_mask = 1; /* Node 0 */
+ unsigned long node1_mask = 2; /* Node 1 */
+ unsigned long maxnode = 8;
+ void *pages[4];
+ int status[4];
+ char *mem;
+ int i;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ for (i = 0; i < 4; i++)
+ pages[i] = (char *)mem + page_size * i;
+
+ /* Set NUMA policy after allocation */
+ memset(mem, 0xaa, page_size);
+ kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);
+
+ /* Set NUMA policy before allocation */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ /* Validate if pages are allocated on specified NUMA nodes */
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);
+
+ /* Punch hole for all pages */
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);
+
+ /* Change NUMA policy nodes and reallocate */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
+{
+ const char val = 0xaa;
+ char *mem;
+ size_t i;
+
+ mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
+ TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
+
+ for (i = 0; i < accessible_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ kvm_munmap(mem, map_size);
+}
+
+static void test_fault_overflow(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, total_size, total_size * 4);
+}
+
+static void test_fault_private(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, 0, total_size);
+}
+
+static void test_mmap_not_supported(int fd, size_t total_size)
{
char *mem;
mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
TEST_ASSERT_EQ(mem, MAP_FAILED);
+
+ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ TEST_ASSERT_EQ(mem, MAP_FAILED);
}
-static void test_file_size(int fd, size_t page_size, size_t total_size)
+static void test_file_size(int fd, size_t total_size)
{
struct stat sb;
int ret;
@@ -53,7 +220,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size)
TEST_ASSERT_EQ(sb.st_blksize, page_size);
}
-static void test_fallocate(int fd, size_t page_size, size_t total_size)
+static void test_fallocate(int fd, size_t total_size)
{
int ret;
@@ -90,7 +257,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size)
TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
}
-static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
+static void test_invalid_punch_hole(int fd, size_t total_size)
{
struct {
off_t offset;
@@ -120,26 +287,18 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
}
}
-static void test_create_guest_memfd_invalid(struct kvm_vm *vm)
+static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
+ uint64_t guest_memfd_flags)
{
- size_t page_size = getpagesize();
- uint64_t flag;
size_t size;
int fd;
for (size = 1; size < page_size; size++) {
- fd = __vm_create_guest_memfd(vm, size, 0);
- TEST_ASSERT(fd == -1 && errno == EINVAL,
+ fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags);
+ TEST_ASSERT(fd < 0 && errno == EINVAL,
"guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",
size);
}
-
- for (flag = BIT(0); flag; flag <<= 1) {
- fd = __vm_create_guest_memfd(vm, page_size, flag);
- TEST_ASSERT(fd == -1 && errno == EINVAL,
- "guest_memfd() with flag '0x%lx' should fail with EINVAL",
- flag);
- }
}
static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
@@ -147,53 +306,187 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
int fd1, fd2, ret;
struct stat st1, st2;
- fd1 = __vm_create_guest_memfd(vm, 4096, 0);
+ fd1 = __vm_create_guest_memfd(vm, page_size, 0);
TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
ret = fstat(fd1, &st1);
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
- TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size");
+ TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size");
- fd2 = __vm_create_guest_memfd(vm, 8192, 0);
+ fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0);
TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
ret = fstat(fd2, &st2);
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
- TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size");
+ TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size");
ret = fstat(fd1, &st1);
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
- TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size");
+ TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size");
TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
close(fd2);
close(fd1);
}
-int main(int argc, char *argv[])
+static void test_guest_memfd_flags(struct kvm_vm *vm)
{
- size_t page_size;
- size_t total_size;
+ uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ uint64_t flag;
int fd;
+
+ for (flag = BIT(0); flag; flag <<= 1) {
+ fd = __vm_create_guest_memfd(vm, page_size, flag);
+ if (flag & valid_flags) {
+ TEST_ASSERT(fd >= 0,
+ "guest_memfd() with flag '0x%lx' should succeed",
+ flag);
+ close(fd);
+ } else {
+ TEST_ASSERT(fd < 0 && errno == EINVAL,
+ "guest_memfd() with flag '0x%lx' should fail with EINVAL",
+ flag);
+ }
+ }
+}
+
+#define gmem_test(__test, __vm, __flags) \
+do { \
+ int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \
+ \
+ test_##__test(fd, page_size * 4); \
+ close(fd); \
+} while (0)
+
+static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
+{
+ test_create_guest_memfd_multiple(vm);
+ test_create_guest_memfd_invalid_sizes(vm, flags);
+
+ gmem_test(file_read_write, vm, flags);
+
+ if (flags & GUEST_MEMFD_FLAG_MMAP) {
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
+ gmem_test(mmap_supported, vm, flags);
+ gmem_test(fault_overflow, vm, flags);
+ gmem_test(numa_allocation, vm, flags);
+ } else {
+ gmem_test(fault_private, vm, flags);
+ }
+
+ gmem_test(mmap_cow, vm, flags);
+ gmem_test(mbind, vm, flags);
+ } else {
+ gmem_test(mmap_not_supported, vm, flags);
+ }
+
+ gmem_test(file_size, vm, flags);
+ gmem_test(fallocate, vm, flags);
+ gmem_test(invalid_punch_hole, vm, flags);
+}
+
+static void test_guest_memfd(unsigned long vm_type)
+{
+ struct kvm_vm *vm = vm_create_barebones_type(vm_type);
+ uint64_t flags;
+
+ test_guest_memfd_flags(vm);
+
+ __test_guest_memfd(vm, 0);
+
+ flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ if (flags & GUEST_MEMFD_FLAG_MMAP)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
+
+ /* MMAP should always be supported if INIT_SHARED is supported. */
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code(uint8_t *mem, uint64_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ __GUEST_ASSERT(mem[i] == 0xaa,
+ "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]);
+
+ memset(mem, 0xff, size);
+ GUEST_DONE();
+}
+
+static void test_guest_memfd_guest(void)
+{
+ /*
+ * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back
+ * the guest's code, stack, and page tables, and low memory contains
+ * the PCI hole and other MMIO regions that need to be avoided.
+ */
+ const uint64_t gpa = SZ_4G;
+ const int slot = 1;
+
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
+ uint8_t *mem;
+ size_t size;
+ int fd, i;
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+ if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
+ return;
- page_size = getpagesize();
- total_size = page_size * 4;
+ vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
- vm = vm_create_barebones();
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
+ "Default VM type should support MMAP, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
+ "Default VM type should support INIT_SHARED, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
- test_create_guest_memfd_invalid(vm);
- test_create_guest_memfd_multiple(vm);
+ size = vm->page_size;
+ fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
+ vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
+
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+ memset(mem, 0xaa, size);
+ kvm_munmap(mem, size);
- fd = vm_create_guest_memfd(vm, total_size, 0);
+ virt_pg_map(vm, gpa, gpa);
+ vcpu_args_set(vcpu, 2, gpa, size);
+ vcpu_run(vcpu);
- test_file_read_write(fd);
- test_mmap(fd, page_size);
- test_file_size(fd, page_size, total_size);
- test_fallocate(fd, page_size, total_size);
- test_invalid_punch_hole(fd, page_size, total_size);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+ for (i = 0; i < size; i++)
+ TEST_ASSERT_EQ(mem[i], 0xff);
close(fd);
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long vm_types, vm_type;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+
+ page_size = getpagesize();
+
+ /*
+ * Not all architectures support KVM_CAP_VM_TYPES. However, those that
+ * support guest_memfd have that support for the default VM type.
+ */
+ vm_types = kvm_check_cap(KVM_CAP_VM_TYPES);
+ if (!vm_types)
+ vm_types = BIT(VM_TYPE_DEFAULT);
+
+ for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types))
+ test_guest_memfd(vm_type);
+
+ test_guest_memfd_guest();
}
diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
index bf461de34785..e2c4e9f0010f 100644
--- a/tools/testing/selftests/kvm/include/arm64/arch_timer.h
+++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
@@ -155,4 +155,28 @@ static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
timer_set_tval(timer, msec_to_cycles(msec));
}
+static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu)
+{
+ u32 intid;
+ u64 attr;
+
+ attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER :
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER;
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
+
+ return intid;
+}
+
+static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu)
+{
+ u32 intid;
+ u64 attr;
+
+ attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER :
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER;
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
+
+ return intid;
+}
+
#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h
index baeb3c859389..cc7a7f34ed37 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic.h
@@ -57,6 +57,7 @@ void gic_irq_set_pending(unsigned int intid);
void gic_irq_clear_pending(unsigned int intid);
bool gic_irq_get_pending(unsigned int intid);
void gic_irq_set_config(unsigned int intid, bool is_edge);
+void gic_irq_set_group(unsigned int intid, bool group);
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
vm_paddr_t pend_table);
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
index 3722ed9c8f96..58feef3eb386 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
@@ -15,5 +15,6 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
u32 collection_id, u32 intid);
void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id);
#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
index e43a57d99b56..b973bb2c64a6 100644
--- a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
@@ -2,6 +2,9 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
-struct kvm_vm_arch {};
+struct kvm_vm_arch {
+ bool has_gic;
+ int gic_fd;
+};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 1e8d0d531fbd..ff928716574d 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -62,6 +62,67 @@
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+/* TCR_EL1 specific flags */
+#define TCR_T0SZ_OFFSET 0
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+
+#define TCR_IRGN0_SHIFT 8
+#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_ORGN0_SHIFT 10
+#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_SH0_SHIFT 12
+#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_TG0_SHIFT 14
+#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_IPS_SHIFT 32
+#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
+#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT)
+#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT)
+#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT)
+#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT)
+
+#define TCR_HA (UL(1) << 39)
+#define TCR_DS (UL(1) << 59)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PTE_ATTRINDX(t) ((t) << 2)
+#define PTE_ATTRINDX_MASK GENMASK(4, 2)
+#define PTE_ATTRINDX_SHIFT 2
+
+#define PTE_VALID BIT(0)
+#define PGD_TYPE_TABLE BIT(1)
+#define PUD_TYPE_TABLE BIT(1)
+#define PMD_TYPE_TABLE BIT(1)
+#define PTE_TYPE_PAGE BIT(1)
+
+#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
+#define PTE_AF BIT(10)
+
+#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift))
+#define PTE_ADDR_51_48 GENMASK(15, 12)
+#define PTE_ADDR_51_48_SHIFT 12
+#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift))
+#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+#define PTE_ADDR_51_50_LPA2_SHIFT 8
+
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_vcpu_init *init, void *guest_code);
@@ -102,12 +163,6 @@ enum {
(v) == VECTOR_SYNC_LOWER_64 || \
(v) == VECTOR_SYNC_LOWER_32)
-/* Access flag */
-#define PTE_AF (1ULL << 10)
-
-/* Access flag update enable/disable */
-#define TCR_EL1_HA (1ULL << 39)
-
void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
uint32_t *ipa16k, uint32_t *ipa64k);
@@ -120,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm,
void vm_install_sync_handler(struct kvm_vm *vm,
int vector, int ec, handler_fn handler);
+uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level);
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
static inline void cpu_relax(void)
@@ -199,6 +255,16 @@ static inline void local_irq_disable(void)
asm volatile("msr daifset, #3" : : : "memory");
}
+static inline void local_serror_enable(void)
+{
+ asm volatile("msr daifclr, #4" : : : "memory");
+}
+
+static inline void local_serror_disable(void)
+{
+ asm volatile("msr daifset, #4" : : : "memory");
+}
+
/**
* struct arm_smccc_res - Result from SMC/HVC call
* @a0-a3 result values from registers 0 to 3
@@ -235,4 +301,87 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
/* Execute a Wait For Interrupt instruction. */
void wfi(void);
+void test_wants_mte(void);
+void test_disable_default_vgic(void);
+
+bool vm_supports_el2(struct kvm_vm *vm);
+
+static inline bool test_supports_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ bool supported = vm_supports_el2(vm);
+
+ kvm_vm_free(vm);
+ return supported;
+}
+
+static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu)
+{
+ return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
+}
+
+#define MAPPED_EL2_SYSREG(el2, el1) \
+ case SYS_##el1: \
+ if (vcpu_has_el2(vcpu)) \
+ alias = SYS_##el2; \
+ break
+
+
+static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding)
+{
+ u32 alias = encoding;
+
+ BUILD_BUG_ON(!__builtin_constant_p(encoding));
+
+ switch (encoding) {
+ MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1);
+ MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1);
+ MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1);
+ MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1);
+ MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1);
+ MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1);
+ MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1);
+ MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1);
+ MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1);
+ MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1);
+ MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1);
+ MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1);
+ MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1);
+ MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1);
+ MAPPED_EL2_SYSREG(POR_EL2, POR_EL1);
+ MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1);
+ MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1);
+ MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1);
+ MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1);
+ MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1);
+ MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1);
+ MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1);
+ case SYS_SP_EL1:
+ if (!vcpu_has_el2(vcpu))
+ return ARM64_CORE_REG(sp_el1);
+
+ alias = SYS_SP_EL2;
+ break;
+ default:
+ BUILD_BUG();
+ }
+
+ return KVM_ARM64_SYS_REG(alias);
+}
+
+void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init);
+
+static inline unsigned int get_current_el(void)
+{
+ return (read_sysreg(CurrentEL) >> 2) & 0x3;
+}
+
+#define do_smccc(...) \
+do { \
+ if (get_current_el() == 2) \
+ smccc_smc(__VA_ARGS__); \
+ else \
+ smccc_hvc(__VA_ARGS__); \
+} while (0)
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h
index c481d0c00a5d..688beccc9436 100644
--- a/tools/testing/selftests/kvm/include/arm64/vgic.h
+++ b/tools/testing/selftests/kvm/include/arm64/vgic.h
@@ -16,6 +16,9 @@
((uint64_t)(flags) << 12) | \
index)
+bool kvm_supports_vgic_v3(void);
+int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
+void __vgic_v3_init(int fd);
int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
#define VGIC_MAX_RESERVED 1023
diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h
new file mode 100644
index 000000000000..d4e613162bba
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_SYSCALLS_H
+#define SELFTEST_KVM_SYSCALLS_H
+
+#include <sys/syscall.h>
+
+#define MAP_ARGS0(m,...)
+#define MAP_ARGS1(m,t,a,...) m(t,a)
+#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__)
+#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__)
+#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__)
+#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__)
+#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__)
+#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__)
+
+#define __DECLARE_ARGS(t, a) t a
+#define __UNPACK_ARGS(t, a) a
+
+#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args)
+#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args)
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+/* Define a kvm_<syscall>() API to assert success. */
+#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ int r; \
+ \
+ r = name(UNPACK_ARGS(nr_args, args)); \
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \
+}
+
+/*
+ * Macro to define syscall APIs, either because KVM selftests doesn't link to
+ * the standard library, e.g. libnuma, or because there is no library that yet
+ * provides the syscall. These
+ */
+#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline long name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \
+} \
+__KVM_SYSCALL_DEFINE(name, nr_args, args)
+
+/*
+ * Special case mmap(), as KVM selftest rarely/never specific an address,
+ * rarely specify an offset, and because the unique return code requires
+ * special handling anyways.
+ */
+static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
+ off_t offset)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, prot, flags, fd, offset);
+ TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
+ (int)(unsigned long)MAP_FAILED));
+ return mem;
+}
+
+static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
+{
+ return __kvm_mmap(size, prot, flags, fd, 0);
+}
+
+static inline int kvm_dup(int fd)
+{
+ int new_fd = dup(fd);
+
+ TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd));
+ return new_fd;
+}
+
+__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size);
+__KVM_SYSCALL_DEFINE(close, 1, int, fd);
+__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len);
+__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length);
+
+#endif /* SELFTEST_KVM_SYSCALLS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 4c4e5a847f67..81f4355ff28a 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -18,8 +18,12 @@
#include <asm/atomic.h>
#include <asm/kvm.h>
+#include <sys/eventfd.h>
#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include "kvm_syscalls.h"
#include "kvm_util_arch.h"
#include "kvm_util_types.h"
#include "sparsebit.h"
@@ -46,6 +50,12 @@ struct userspace_mem_region {
struct hlist_node slot_node;
};
+struct kvm_binary_stats {
+ int fd;
+ struct kvm_stats_header header;
+ struct kvm_stats_desc *desc;
+};
+
struct kvm_vcpu {
struct list_head list;
uint32_t id;
@@ -55,6 +65,10 @@ struct kvm_vcpu {
#ifdef __x86_64__
struct kvm_cpuid2 *cpuid;
#endif
+#ifdef __aarch64__
+ struct kvm_vcpu_init init;
+#endif
+ struct kvm_binary_stats stats;
struct kvm_dirty_gfn *dirty_gfns;
uint32_t fetch_index;
uint32_t dirty_gfns_count;
@@ -99,10 +113,7 @@ struct kvm_vm {
struct kvm_vm_arch arch;
- /* Cache of information for binary stats interface */
- int stats_fd;
- struct kvm_stats_header stats_header;
- struct kvm_stats_desc *stats_desc;
+ struct kvm_binary_stats stats;
/*
* KVM region slots. These are the default memslots used by page
@@ -167,12 +178,13 @@ enum vm_guest_mode {
VM_MODE_P40V48_4K,
VM_MODE_P40V48_16K,
VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */
VM_MODE_P47V64_4K,
VM_MODE_P44V64_4K,
VM_MODE_P36V48_4K,
VM_MODE_P36V48_16K,
VM_MODE_P36V48_64K,
+ VM_MODE_P47V47_16K,
VM_MODE_P36V47_16K,
NUM_VM_MODES,
};
@@ -208,7 +220,7 @@ extern enum vm_guest_mode vm_mode_default;
#elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -228,6 +240,11 @@ extern enum vm_guest_mode vm_mode_default;
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
+#elif defined(__loongarch__)
+#define VM_MODE_DEFAULT VM_MODE_P47V47_16K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
#endif
#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
@@ -243,16 +260,22 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help);
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
-bool get_kvm_param_bool(const char *param);
-bool get_kvm_intel_param_bool(const char *param);
-bool get_kvm_amd_param_bool(const char *param);
+int kvm_get_module_param_integer(const char *module_name, const char *param);
+bool kvm_get_module_param_bool(const char *module_name, const char *param);
+
+static inline bool get_kvm_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm", param);
+}
-int get_kvm_param_integer(const char *param);
-int get_kvm_intel_param_integer(const char *param);
-int get_kvm_amd_param_integer(const char *param);
+static inline int get_kvm_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm", param);
+}
unsigned int kvm_check_cap(long cap);
@@ -261,9 +284,6 @@ static inline bool kvm_has_cap(long cap)
return kvm_check_cap(cap);
}
-#define __KVM_SYSCALL_ERROR(_name, _ret) \
- "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
-
/*
* Use the "inner", double-underscore macro when reporting errors from within
* other macros so that the name of ioctl() and not its literal numeric value
@@ -492,6 +512,45 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm)
return fd;
}
+static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+ uint32_t flags)
+{
+ struct kvm_irqfd irqfd = {
+ .fd = eventfd,
+ .gsi = gsi,
+ .flags = flags,
+ .resamplefd = -1,
+ };
+
+ return __vm_ioctl(vm, KVM_IRQFD, &irqfd);
+}
+
+static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+ uint32_t flags)
+{
+ int ret = __kvm_irqfd(vm, gsi, eventfd, flags);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm);
+}
+
+static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, 0);
+}
+
+static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+static inline int kvm_new_eventfd(void)
+{
+ int fd = eventfd(0, 0);
+
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd));
+ return fd;
+}
+
static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
{
ssize_t ret;
@@ -531,15 +590,53 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
struct kvm_stats_desc *desc, uint64_t *data,
size_t max_elements);
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements);
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements);
-static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
+#define __get_stat(stats, stat) \
+({ \
+ uint64_t data; \
+ \
+ kvm_get_stat(stats, #stat, &data, 1); \
+ data; \
+})
+
+#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
+#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
+
+static inline bool read_smt_control(char *buf, size_t buf_size)
+{
+ FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r");
+ bool ret;
+
+ if (!f)
+ return false;
+
+ ret = fread(buf, sizeof(*buf), buf_size, f) > 0;
+ fclose(f);
+
+ return ret;
+}
+
+static inline bool is_smt_possible(void)
{
- uint64_t data;
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) &&
+ (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12)))
+ return false;
+
+ return true;
+}
- __vm_get_stat(vm, stat_name, &data, 1);
- return data;
+static inline bool is_smt_on(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2))
+ return true;
+
+ return false;
}
void vm_create_irqchip(struct kvm_vm *vm);
@@ -576,12 +673,12 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag
uint32_t guest_memfd, uint64_t guest_memfd_offset);
void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
+ enum vm_mem_backing_src_type src_type,
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags);
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd_fd, uint64_t guest_memfd_offset);
#ifndef vm_arch_has_protected_memory
static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
@@ -591,6 +688,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
#endif
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
@@ -963,7 +1061,36 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape
struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+void kvm_set_files_rlimit(uint32_t nr_vcpus);
+
+int __pin_task_to_cpu(pthread_t task, int cpu);
+
+static inline void pin_task_to_cpu(pthread_t task, int cpu)
+{
+ int r;
+
+ r = __pin_task_to_cpu(task, cpu);
+ TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu);
+}
+
+static inline int pin_task_to_any_cpu(pthread_t task)
+{
+ int cpu = sched_getcpu();
+
+ pin_task_to_cpu(task, cpu);
+ return cpu;
+}
+
+static inline void pin_self_to_cpu(int cpu)
+{
+ pin_task_to_cpu(pthread_self(), cpu);
+}
+
+static inline int pin_self_to_any_cpu(void)
+{
+ return pin_task_to_any_cpu(pthread_self());
+}
+
void kvm_print_vcpu_pinning_help(void);
void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
int nr_vcpus);
@@ -1077,6 +1204,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
virt_arch_pg_map(vm, vaddr, paddr);
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
}
@@ -1137,10 +1265,14 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
*/
void kvm_selftest_arch_init(void);
-void kvm_arch_vm_post_create(struct kvm_vm *vm);
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus);
+void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm);
+void kvm_arch_vm_release(struct kvm_vm *vm);
bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
uint32_t guest_get_vcpuid(void);
+bool kvm_arch_has_default_irqchip(void);
+
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
new file mode 100644
index 000000000000..2ed106b32c81
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * LoongArch Constant Timer specific interface
+ */
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+/* LoongArch timer frequency is constant 100MHZ */
+#define TIMER_FREQ (100UL << 20)
+#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000)
+#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000)
+#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ)
+
+static inline unsigned long timer_get_cycles(void)
+{
+ unsigned long val = 0;
+
+ __asm__ __volatile__(
+ "rdtime.d %0, $zero\n\t"
+ : "=r"(val)
+ :
+ );
+
+ return val;
+}
+
+static inline unsigned long timer_get_cfg(void)
+{
+ return csr_read(LOONGARCH_CSR_TCFG);
+}
+
+static inline unsigned long timer_get_val(void)
+{
+ return csr_read(LOONGARCH_CSR_TVAL);
+}
+
+static inline void disable_timer(void)
+{
+ csr_write(0, LOONGARCH_CSR_TCFG);
+}
+
+static inline void timer_irq_enable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val |= ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_irq_disable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val &= ~ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_set_next_cmp_ms(unsigned int msec, bool period)
+{
+ unsigned long val;
+
+ val = msec_to_cycles(msec) & CSR_TCFG_VAL;
+ val |= CSR_TCFG_EN;
+ if (period)
+ val |= CSR_TCFG_PERIOD;
+ csr_write(val, LOONGARCH_CSR_TCFG);
+}
+
+static inline void __delay(uint64_t cycles)
+{
+ uint64_t start = timer_get_cycles();
+
+ while ((timer_get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
new file mode 100644
index 000000000000..e43a57d99b56
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h
new file mode 100644
index 000000000000..76840ddda57d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/processor.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+#include "ucall_common.h"
+
+#else
+/* general registers */
+#define zero $r0
+#define ra $r1
+#define tp $r2
+#define sp $r3
+#define a0 $r4
+#define a1 $r5
+#define a2 $r6
+#define a3 $r7
+#define a4 $r8
+#define a5 $r9
+#define a6 $r10
+#define a7 $r11
+#define t0 $r12
+#define t1 $r13
+#define t2 $r14
+#define t3 $r15
+#define t4 $r16
+#define t5 $r17
+#define t6 $r18
+#define t7 $r19
+#define t8 $r20
+#define u0 $r21
+#define fp $r22
+#define s0 $r23
+#define s1 $r24
+#define s2 $r25
+#define s3 $r26
+#define s4 $r27
+#define s5 $r28
+#define s6 $r29
+#define s7 $r30
+#define s8 $r31
+#endif
+
+/*
+ * LoongArch page table entry definition
+ * Original header file arch/loongarch/include/asm/loongarch.h
+ */
+#define _PAGE_VALID_SHIFT 0
+#define _PAGE_DIRTY_SHIFT 1
+#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */
+#define PLV_KERN 0
+#define PLV_USER 3
+#define PLV_MASK 0x3
+#define _CACHE_SHIFT 4 /* 4~5, two bits */
+#define _PAGE_PRESENT_SHIFT 7
+#define _PAGE_WRITE_SHIFT 8
+
+#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT)
+#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT)
+#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT)
+#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT)
+#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT)
+#define __READABLE (_PAGE_VALID)
+#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE)
+/* Coherent Cached */
+#define _CACHE_CC BIT_ULL(_CACHE_SHIFT)
+#define PS_4K 0x0000000c
+#define PS_16K 0x0000000e
+#define PS_64K 0x00000010
+#define PS_DEFAULT_SIZE PS_16K
+
+/* LoongArch Basic CSR registers */
+#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */
+#define CSR_CRMD_PG_SHIFT 4
+#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT)
+#define CSR_CRMD_IE_SHIFT 2
+#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT)
+#define CSR_CRMD_PLV_SHIFT 0
+#define CSR_CRMD_PLV_WIDTH 2
+#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT)
+#define PLV_MASK 0x3
+#define LOONGARCH_CSR_PRMD 0x1
+#define LOONGARCH_CSR_EUEN 0x2
+#define LOONGARCH_CSR_ECFG 0x4
+#define ECFGB_TIMER 11
+#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER))
+#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
+#define CSR_ESTAT_EXC_SHIFT 16
+#define CSR_ESTAT_EXC_WIDTH 6
+#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT)
+#define EXCCODE_INT 0 /* Interrupt */
+#define INT_TI 11 /* Timer interrupt*/
+#define LOONGARCH_CSR_ERA 0x6 /* ERA */
+#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
+#define LOONGARCH_CSR_EENTRY 0xc
+#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */
+#define CSR_TLBIDX_PS_SHIFT 24
+#define CSR_TLBIDX_PS_WIDTH 6
+#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT)
+#define CSR_TLBIDX_SIZEM 0x3f000000
+#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT
+#define LOONGARCH_CSR_ASID 0x18 /* ASID */
+#define LOONGARCH_CSR_PGDL 0x19
+#define LOONGARCH_CSR_PGDH 0x1a
+/* Page table base */
+#define LOONGARCH_CSR_PGD 0x1b
+#define LOONGARCH_CSR_PWCTL0 0x1c
+#define LOONGARCH_CSR_PWCTL1 0x1d
+#define LOONGARCH_CSR_STLBPGSIZE 0x1e
+#define LOONGARCH_CSR_CPUID 0x20
+#define LOONGARCH_CSR_KS0 0x30
+#define LOONGARCH_CSR_KS1 0x31
+#define LOONGARCH_CSR_TMID 0x40
+#define LOONGARCH_CSR_TCFG 0x41
+#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2))
+#define CSR_TCFG_PERIOD_SHIFT 1
+#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT)
+#define CSR_TCFG_EN (0x1UL)
+#define LOONGARCH_CSR_TVAL 0x42
+#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */
+#define CSR_TINTCLR_TI_SHIFT 0
+#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT)
+/* TLB refill exception entry */
+#define LOONGARCH_CSR_TLBRENTRY 0x88
+#define LOONGARCH_CSR_TLBRSAVE 0x8b
+#define LOONGARCH_CSR_TLBREHI 0x8e
+#define CSR_TLBREHI_PS_SHIFT 0
+#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT)
+
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__( \
+ "csrrd %[val], %[reg]\n\t" \
+ : [val] "=r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define csr_write(v, csr) \
+({ \
+ register unsigned long __v = v; \
+ __asm__ __volatile__ ( \
+ "csrwr %[val], %[reg]\n\t" \
+ : [val] "+r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define EXREGS_GPRS (32)
+
+#ifndef __ASSEMBLER__
+void handle_tlb_refill(void);
+void handle_exception(void);
+
+struct ex_regs {
+ unsigned long regs[EXREGS_GPRS];
+ unsigned long pc;
+ unsigned long estat;
+ unsigned long badv;
+ unsigned long prmd;
+};
+
+#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc)
+#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat)
+#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv)
+#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd)
+#define EXREGS_SIZE sizeof(struct ex_regs)
+
+#define VECTOR_NUM 64
+
+typedef void(*handler_fn)(struct ex_regs *);
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM];
+};
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler);
+
+static inline void cpu_relax(void)
+{
+ asm volatile("nop" ::: "memory");
+}
+
+static inline void local_irq_enable(void)
+{
+ unsigned int flags = CSR_CRMD_IE;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+ unsigned int flags = 0;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+#else
+#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8)
+#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8)
+#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8)
+#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8)
+#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8)
+#endif
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h
new file mode 100644
index 000000000000..4ec801f37f00
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h
new file mode 100644
index 000000000000..d32ff5d8ffd0
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/lru_gen_util.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output.
+ *
+ * Copyright (C) 2025, Google LLC.
+ */
+#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H
+#define SELFTEST_KVM_LRU_GEN_UTIL_H
+
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "test_util.h"
+
+#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */
+#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */
+
+#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen"
+#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled"
+#define LRU_GEN_ENABLED 1
+#define LRU_GEN_MM_WALK 2
+
+struct generation_stats {
+ int gen;
+ long age_ms;
+ long nr_anon;
+ long nr_file;
+};
+
+struct node_stats {
+ int node;
+ int nr_gens; /* Number of populated gens entries. */
+ struct generation_stats gens[MAX_NR_GENS];
+};
+
+struct memcg_stats {
+ unsigned long memcg_id;
+ int nr_nodes; /* Number of populated nodes entries. */
+ struct node_stats nodes[MAX_NR_NODES];
+};
+
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg);
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats);
+long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats);
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg);
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long total_pages);
+bool lru_gen_usable(void);
+
+#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h
index b020547403fd..29572a6d789c 100644
--- a/tools/testing/selftests/kvm/include/numaif.h
+++ b/tools/testing/selftests/kvm/include/numaif.h
@@ -1,55 +1,83 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/numaif.h
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Header file that provides access to NUMA API functions not explicitly
- * exported to user space.
- */
+/* Copyright (C) 2020, Google LLC. */
#ifndef SELFTEST_KVM_NUMAIF_H
#define SELFTEST_KVM_NUMAIF_H
-#define __NR_get_mempolicy 239
-#define __NR_migrate_pages 256
+#include <dirent.h>
-/* System calls */
-long get_mempolicy(int *policy, const unsigned long *nmask,
- unsigned long maxnode, void *addr, int flags)
+#include <linux/mempolicy.h>
+
+#include "kvm_syscalls.h"
+
+KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask,
+ unsigned long, maxnode, void *, addr, int, flags);
+
+KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask,
+ unsigned long, maxnode);
+
+KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start,
+ unsigned long, len, unsigned long, home_node,
+ unsigned long, flags);
+
+KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode,
+ const unsigned long *, frommask, const unsigned long *, tomask);
+
+KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages,
+ const int *, nodes, int *, status, int, flags);
+
+KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode,
+ const unsigned long *, nodemask, unsigned long, maxnode,
+ unsigned int, flags);
+
+static inline int get_max_numa_node(void)
{
- return syscall(__NR_get_mempolicy, policy, nmask,
- maxnode, addr, flags);
+ struct dirent *de;
+ int max_node = 0;
+ DIR *d;
+
+ /*
+ * Assume there's a single node if the kernel doesn't support NUMA,
+ * or if no nodes are found.
+ */
+ d = opendir("/sys/devices/system/node");
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d)) != NULL) {
+ int node_id;
+ char *endptr;
+
+ if (strncmp(de->d_name, "node", 4) != 0)
+ continue;
+
+ node_id = strtol(de->d_name + 4, &endptr, 10);
+ if (*endptr != '\0')
+ continue;
+
+ if (node_id > max_node)
+ max_node = node_id;
+ }
+ closedir(d);
+
+ return max_node;
}
-long migrate_pages(int pid, unsigned long maxnode,
- const unsigned long *frommask,
- const unsigned long *tomask)
+static bool is_numa_available(void)
{
- return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
+ /*
+ * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall
+ * fails with ENOSYS, then the kernel was built without NUMA support.
+ * if the syscall fails with EPERM, then the process/user lacks the
+ * necessary capabilities (CAP_SYS_NICE).
+ */
+ return !get_mempolicy(NULL, NULL, 0, NULL, 0) ||
+ (errno != ENOSYS && errno != EPERM);
}
-/* Policies */
-#define MPOL_DEFAULT 0
-#define MPOL_PREFERRED 1
-#define MPOL_BIND 2
-#define MPOL_INTERLEAVE 3
-
-#define MPOL_MAX MPOL_INTERLEAVE
-
-/* Flags for get_mem_policy */
-#define MPOL_F_NODE (1<<0) /* return next il node or node of address */
- /* Warning: MPOL_F_NODE is unsupported and
- * subject to change. Don't use.
- */
-#define MPOL_F_ADDR (1<<1) /* look up vma using address */
-#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */
-
-/* Flags for mbind */
-#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
+static inline bool is_multi_numa_node_system(void)
+{
+ return is_numa_available() && get_max_numa_node() >= 1;
+}
#endif /* SELFTEST_KVM_NUMAIF_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index 5f389166338c..e58282488beb 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -9,8 +9,22 @@
#include <linux/stringify.h>
#include <asm/csr.h>
+#include <asm/vdso/processor.h>
#include "kvm_util.h"
+#define INSN_OPCODE_MASK 0x007c
+#define INSN_OPCODE_SHIFT 2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_FUNCT3 0x7000
+#define INSN_SHIFT_FUNCT3 12
+
+#define INSN_CSR_MASK 0xfff00000
+#define INSN_CSR_SHIFT 20
+
+#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3)
+#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT)
+
static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
uint64_t idx, uint64_t size)
{
@@ -60,7 +74,8 @@ static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
}
-struct ex_regs {
+struct pt_regs {
+ unsigned long epc;
unsigned long ra;
unsigned long sp;
unsigned long gp;
@@ -92,16 +107,19 @@ struct ex_regs {
unsigned long t4;
unsigned long t5;
unsigned long t6;
- unsigned long epc;
+ /* Supervisor/Machine CSRs */
unsigned long status;
+ unsigned long badaddr;
unsigned long cause;
+ /* a0 value before the syscall */
+ unsigned long orig_a0;
};
#define NR_VECTORS 2
#define NR_EXCEPTIONS 32
#define EC_MASK (NR_EXCEPTIONS - 1)
-typedef void(*exception_handler_fn)(struct ex_regs *);
+typedef void(*exception_handler_fn)(struct pt_regs *);
void vm_init_vector_tables(struct kvm_vm *vm);
void vcpu_init_vector_tables(struct kvm_vcpu *vcpu);
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 3e473058849f..b4872ba8ed12 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -8,6 +8,8 @@
#ifndef SELFTEST_KVM_TEST_UTIL_H
#define SELFTEST_KVM_TEST_UTIL_H
+#include <setjmp.h>
+#include <signal.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
@@ -22,7 +24,7 @@
#define msecs_to_usecs(msec) ((msec) * 1000ULL)
-static inline int _no_printf(const char *format, ...) { return 0; }
+static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; }
#ifdef DEBUG
#define pr_debug(...) printf(__VA_ARGS__)
@@ -78,6 +80,23 @@ do { \
__builtin_unreachable(); \
} while (0)
+extern sigjmp_buf expect_sigbus_jmpbuf;
+void expect_sigbus_handler(int signum);
+
+#define TEST_EXPECT_SIGBUS(action) \
+do { \
+ struct sigaction sa_old, sa_new = { \
+ .sa_handler = expect_sigbus_handler, \
+ }; \
+ \
+ sigaction(SIGBUS, &sa_new, &sa_old); \
+ if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \
+ action; \
+ TEST_FAIL("'%s' should have triggered SIGBUS", #action); \
+ } \
+ sigaction(SIGBUS, &sa_old, NULL); \
+} while (0)
+
size_t parse_size(const char *size);
int64_t timespec_to_ns(struct timespec ts);
@@ -153,6 +172,7 @@ bool is_backing_src_hugetlb(uint32_t i);
void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
long get_run_delay(void);
+bool is_numa_balancing_enabled(void);
/*
* Whether or not the given source type is shared memory (as opposed to
diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
index 3c10c4dc0ae8..72575eadb63a 100644
--- a/tools/testing/selftests/kvm/include/x86/pmu.h
+++ b/tools/testing/selftests/kvm/include/x86/pmu.h
@@ -5,8 +5,11 @@
#ifndef SELFTEST_KVM_PMU_H
#define SELFTEST_KVM_PMU_H
+#include <stdbool.h>
#include <stdint.h>
+#include <linux/bits.h>
+
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
/*
@@ -61,6 +64,11 @@
#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00)
#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00)
#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01)
+#define INTEL_ARCH_TOPDOWN_BE_BOUND RAW_EVENT(0xa4, 0x02)
+#define INTEL_ARCH_TOPDOWN_BAD_SPEC RAW_EVENT(0x73, 0x00)
+#define INTEL_ARCH_TOPDOWN_FE_BOUND RAW_EVENT(0x9c, 0x01)
+#define INTEL_ARCH_TOPDOWN_RETIRING RAW_EVENT(0xc2, 0x02)
+#define INTEL_ARCH_LBR_INSERTS RAW_EVENT(0xe4, 0x01)
#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00)
#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
@@ -80,6 +88,11 @@ enum intel_pmu_architectural_events {
INTEL_ARCH_BRANCHES_RETIRED_INDEX,
INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+ INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX,
+ INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX,
+ INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX,
+ INTEL_ARCH_TOPDOWN_RETIRING_INDEX,
+ INTEL_ARCH_LBR_INSERTS_INDEX,
NR_INTEL_ARCH_EVENTS,
};
@@ -94,4 +107,17 @@ enum amd_pmu_zen_events {
extern const uint64_t intel_pmu_arch_events[];
extern const uint64_t amd_pmu_zen_events[];
+enum pmu_errata {
+ INSTRUCTIONS_RETIRED_OVERCOUNT,
+ BRANCHES_RETIRED_OVERCOUNT,
+};
+extern uint64_t pmu_errata_mask;
+
+void kvm_init_pmu_errata(void);
+
+static inline bool this_pmu_has_errata(enum pmu_errata errata)
+{
+ return pmu_errata_mask & BIT_ULL(errata);
+}
+
#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index d60da8966772..57d62a425109 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -34,6 +34,8 @@ extern uint64_t guest_tsc_khz;
#define NMI_VECTOR 0x02
+const char *ex_str(int vector);
+
#define X86_EFLAGS_FIXED (1u << 1)
#define X86_CR4_VME (1ul << 0)
@@ -183,6 +185,9 @@ struct kvm_x86_cpu_feature {
* Extended Leafs, a.k.a. AMD defined
*/
#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23)
+#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24)
+#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28)
#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
@@ -197,8 +202,12 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4)
+#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
+#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
/*
* KVM defined paravirt features.
@@ -258,7 +267,7 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
-#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12)
#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
@@ -285,6 +294,8 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3)
+#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15)
#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
@@ -323,6 +334,11 @@ struct kvm_x86_pmu_feature {
#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5)
#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6)
#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7)
+#define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8)
+#define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9)
+#define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10)
+#define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11)
+#define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12)
#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0)
#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1)
@@ -1141,7 +1157,6 @@ do { \
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
void kvm_init_vm_address_properties(struct kvm_vm *vm);
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
struct ex_regs {
uint64_t rax, rcx, rdx, rbx;
@@ -1171,6 +1186,12 @@ struct idt_entry {
void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
+/*
+ * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be
+ * used to signal "no expcetion".
+ */
+#define KVM_MAGIC_DE_VECTOR 0xff
+
/* If a toddler were to say "abracadabra". */
#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
@@ -1244,7 +1265,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
uint64_t ign_error_code; \
uint8_t vector; \
\
- asm volatile(KVM_ASM_SAFE(insn) \
+ asm volatile(KVM_ASM_SAFE_FEP(insn) \
: KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
: inputs \
: KVM_ASM_SAFE_CLOBBERS); \
@@ -1306,6 +1327,26 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
bool kvm_is_tdp_enabled(void);
+static inline bool get_kvm_intel_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm_intel", param);
+}
+
+static inline bool get_kvm_amd_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm_amd", param);
+}
+
+static inline int get_kvm_intel_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm_intel", param);
+}
+
+static inline int get_kvm_amd_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm_amd", param);
+}
+
static inline bool kvm_is_pmu_enabled(void)
{
return get_kvm_param_bool("enable_pmu");
@@ -1316,6 +1357,16 @@ static inline bool kvm_is_forced_emulation_enabled(void)
return !!get_kvm_param_integer("force_emulation_prefix");
}
+static inline bool kvm_is_unrestricted_guest_enabled(void)
+{
+ return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
+static inline bool kvm_is_ignore_msrs(void)
+{
+ return get_kvm_param_bool("ignore_msrs");
+}
+
uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
int *level);
uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
@@ -1339,6 +1390,46 @@ static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
GUEST_ASSERT(!ret);
}
+/*
+ * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's
+ * intended to be a wake event arrives *after* HLT is executed. Modern CPUs,
+ * except for a few oddballs that KVM is unlikely to run on, block IRQs for one
+ * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may
+ * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too.
+ */
+static inline void safe_halt(void)
+{
+ asm volatile("sti; hlt");
+}
+
+/*
+ * Enable interrupts and ensure that interrupts are evaluated upon return from
+ * this function, i.e. execute a nop to consume the STi interrupt shadow.
+ */
+static inline void sti_nop(void)
+{
+ asm volatile ("sti; nop");
+}
+
+/*
+ * Enable interrupts for one instruction (nop), to allow the CPU to process all
+ * interrupts that are already pending.
+ */
+static inline void sti_nop_cli(void)
+{
+ asm volatile ("sti; nop; cli");
+}
+
+static inline void sti(void)
+{
+ asm volatile("sti");
+}
+
+static inline void cli(void)
+{
+ asm volatile ("cli");
+}
+
void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
#define vm_xsave_require_permission(xfeature) \
@@ -1350,7 +1441,7 @@ enum pg_level {
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
- PG_LEVEL_NUM
+ PG_LEVEL_256T
};
#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
index 82c11c81a956..008b4169f5e2 100644
--- a/tools/testing/selftests/kvm/include/x86/sev.h
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
@@ -25,19 +25,51 @@ enum sev_guest_state {
#define SEV_POLICY_NO_DBG (1UL << 0)
#define SEV_POLICY_ES (1UL << 2)
+#define SNP_POLICY_SMT (1ULL << 16)
+#define SNP_POLICY_RSVD_MBO (1ULL << 17)
+#define SNP_POLICY_DBG (1ULL << 19)
+
#define GHCB_MSR_TERM_REQ 0x100
+static inline bool is_sev_snp_vm(struct kvm_vm *vm)
+{
+ return vm->type == KVM_X86_SNP_VM;
+}
+
+static inline bool is_sev_es_vm(struct kvm_vm *vm)
+{
+ return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM;
+}
+
+static inline bool is_sev_vm(struct kvm_vm *vm)
+{
+ return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM;
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
void sev_vm_launch_finish(struct kvm_vm *vm);
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy);
+void snp_vm_launch_update(struct kvm_vm *vm);
+void snp_vm_launch_finish(struct kvm_vm *vm);
struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu);
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement);
kvm_static_assert(SEV_RET_SUCCESS == 0);
/*
+ * A SEV-SNP VM requires the policy reserved bit to always be set.
+ * The SMT policy bit is also required to be set based on SMT being
+ * available and active on the system.
+ */
+static inline u64 snp_default_policy(void)
+{
+ return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0);
+}
+
+/*
* The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
* instead of a proper struct. The size of the parameter is embedded in the
* ioctl number, i.e. is ABI and thus immutable. Hack around the mess by
@@ -70,6 +102,12 @@ kvm_static_assert(SEV_RET_SUCCESS == 0);
void sev_vm_init(struct kvm_vm *vm);
void sev_es_vm_init(struct kvm_vm *vm);
+void snp_vm_init(struct kvm_vm *vm);
+
+static inline void vmgexit(void)
+{
+ __asm__ __volatile__("rep; vmmcall");
+}
static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
struct userspace_mem_region *region)
@@ -93,4 +131,17 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
}
+static inline void snp_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+ uint64_t hva, uint64_t size, uint8_t type)
+{
+ struct kvm_sev_snp_launch_update update_data = {
+ .uaddr = hva,
+ .gfn_start = gpa >> PAGE_SHIFT,
+ .len = size,
+ .type = type,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data);
+}
+
#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
index edb3c391b982..96e2b4c630a9 100644
--- a/tools/testing/selftests/kvm/include/x86/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -568,8 +568,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t addr, uint64_t size);
bool kvm_cpu_has_ept(void);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c
new file mode 100644
index 000000000000..5d7590d01868
--- /dev/null
+++ b/tools/testing/selftests/kvm/irqfd_test.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+
+static struct kvm_vm *vm1;
+static struct kvm_vm *vm2;
+static int __eventfd;
+static bool done;
+
+/*
+ * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when
+ * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task
+ * GSI base to avoid false failures due to cross-task de-assign, i.e. so that
+ * the secondary doesn't de-assign the primary's eventfd and cause assign to
+ * unexpectedly succeed on the primary.
+ */
+#define GSI_BASE_PRIMARY 0x20
+#define GSI_BASE_SECONDARY 0x30
+
+static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd)
+{
+ int r, i;
+
+ /*
+ * The secondary task can encounter EBADF since the primary can close
+ * the eventfd at any time. And because the primary can recreate the
+ * eventfd, at the safe fd in the file table, the secondary can also
+ * encounter "unexpected" success, e.g. if the close+recreate happens
+ * between the first and second assignments. The secondary's role is
+ * mostly to antagonize KVM, not to detect bugs.
+ */
+ for (i = 0; i < 2; i++) {
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0);
+ TEST_ASSERT(!r || errno == EBUSY || errno == EBADF,
+ "Wanted success, EBUSY, or EBADF, r = %d, errno = %d",
+ r, errno);
+
+ /* De-assign should succeed unless the eventfd was closed. */
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ TEST_ASSERT(!r || errno == EBADF,
+ "De-assign should succeed unless the fd was closed");
+ }
+}
+
+static void *secondary_irqfd_juggler(void *ign)
+{
+ while (!READ_ONCE(done)) {
+ juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd));
+ juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd));
+ }
+
+ return NULL;
+}
+
+static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
+{
+ int r1, r2;
+
+ /*
+ * At least one of the assigns should fail. KVM disallows assigning a
+ * single eventfd to multiple GSIs (or VMs), so it's possible that both
+ * assignments can fail, too.
+ */
+ r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0);
+ TEST_ASSERT(!r1 || errno == EBUSY,
+ "Wanted success or EBUSY, r = %d, errno = %d", r1, errno);
+
+ r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0);
+ TEST_ASSERT(r1 || (r2 && errno == EBUSY),
+ "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d",
+ r1, r2, errno);
+
+ /*
+ * De-assign should always succeed, even if the corresponding assign
+ * failed.
+ */
+ kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t racing_thread;
+ struct kvm_vcpu *unused;
+ int r, i;
+
+ TEST_REQUIRE(kvm_arch_has_default_irqchip());
+
+ /*
+ * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also
+ * create an unused vCPU as certain architectures (like arm64) need to
+ * complete IRQ chip initialization after all possible vCPUs for a VM
+ * have been created.
+ */
+ vm1 = vm_create_with_one_vcpu(&unused, NULL);
+ vm2 = vm_create_with_one_vcpu(&unused, NULL);
+
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ kvm_irqfd(vm1, 10, __eventfd, 0);
+
+ r = __kvm_irqfd(vm1, 11, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ r = __kvm_irqfd(vm2, 12, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ /*
+ * De-assign all eventfds, along with multiple eventfds that were never
+ * assigned. KVM's ABI is that de-assign is allowed so long as the
+ * eventfd itself is valid.
+ */
+ kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+
+ close(__eventfd);
+
+ pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2);
+
+ for (i = 0; i < 10000; i++) {
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ juggle_eventfd_primary(vm1, __eventfd);
+ juggle_eventfd_primary(vm2, __eventfd);
+ close(__eventfd);
+ }
+
+ WRITE_ONCE(done, true);
+ pthread_join(racing_thread, NULL);
+}
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index f02355c3c4c2..b7dbde9c0843 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -239,14 +239,14 @@ int main(int argc, char *argv[])
* single stats file works and doesn't cause explosions.
*/
vm_stats_fds = vm_get_stats_fd(vms[i]);
- stats_test(dup(vm_stats_fds));
+ stats_test(kvm_dup(vm_stats_fds));
/* Verify userspace can instantiate multiple stats files. */
stats_test(vm_get_stats_fd(vms[i]));
for (j = 0; j < max_vcpu; ++j) {
vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
- stats_test(dup(vcpu_stats_fds[j]));
+ stats_test(kvm_dup(vcpu_stats_fds[j]));
stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
}
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index c78f34699f73..c5310736ed06 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -10,7 +10,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/resource.h>
#include "test_util.h"
@@ -39,36 +38,11 @@ int main(int argc, char *argv[])
{
int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
- /*
- * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
- * an arbitrary number for everything else.
- */
- int nr_fds_wanted = kvm_max_vcpus + 100;
- struct rlimit rl;
pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
- /*
- * Check that we're allowed to open nr_fds_wanted file descriptors and
- * try raising the limits if needed.
- */
- TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
-
- if (rl.rlim_cur < nr_fds_wanted) {
- rl.rlim_cur = nr_fds_wanted;
- if (rl.rlim_max < nr_fds_wanted) {
- int old_rlim_max = rl.rlim_max;
- rl.rlim_max = nr_fds_wanted;
-
- int r = setrlimit(RLIMIT_NOFILE, &rl);
- __TEST_REQUIRE(r >= 0,
- "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
- old_rlim_max, nr_fds_wanted);
- } else {
- TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
- }
- }
+ kvm_set_files_rlimit(kvm_max_vcpus);
/*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c
index 7abbf8866512..b023868fe0b8 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic.c
@@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_irq_set_config(intid, is_edge);
}
+
+void gic_irq_set_group(unsigned int intid, bool group)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_group(intid, group);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
index d24e9ecc96c6..b6a7e30c3eb1 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
@@ -25,6 +25,7 @@ struct gic_common_ops {
void (*gic_irq_clear_pending)(uint32_t intid);
bool (*gic_irq_get_pending)(uint32_t intid);
void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+ void (*gic_irq_set_group)(uint32_t intid, bool group);
};
extern const struct gic_common_ops gicv3_ops;
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
index 66d05506f78b..50754a27f493 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
@@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base)
}
}
+static void gicv3_set_group(uint32_t intid, bool grp)
+{
+ uint32_t cpu_or_dist;
+ uint32_t val;
+
+ cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid();
+ val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4);
+ if (grp)
+ val |= BIT(intid % 32);
+ else
+ val &= ~BIT(intid % 32);
+ gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val);
+}
+
static void gicv3_cpu_init(unsigned int cpu)
{
volatile void *sgi_base;
unsigned int i;
volatile void *redist_base_cpu;
+ u64 typer;
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
redist_base_cpu = gicr_base_cpu(cpu);
sgi_base = sgi_base_from_redist(redist_base_cpu);
+ /* Verify assumption that GICR_TYPER.Processor_number == cpu */
+ typer = readq_relaxed(redist_base_cpu + GICR_TYPER);
+ GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu);
+
gicv3_enable_redist(redist_base_cpu);
/*
@@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu)
/* Set a default priority threshold */
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+ /* Disable Group-0 interrupts */
+ write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
/* Enable non-secure Group-1 interrupts */
write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
}
@@ -400,6 +421,7 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_clear_pending = gicv3_irq_clear_pending,
.gic_irq_get_pending = gicv3_irq_get_pending,
.gic_irq_set_config = gicv3_irq_set_config,
+ .gic_irq_set_group = gicv3_set_group,
};
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
index 09f270545646..7f9fdcf42ae6 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
@@ -15,6 +15,8 @@
#include "gic_v3.h"
#include "processor.h"
+#define GITS_COLLECTION_TARGET_SHIFT 16
+
static u64 its_read_u64(unsigned long offset)
{
return readq_relaxed(GITS_BASE_GVA + offset);
@@ -163,6 +165,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
}
+static u64 procnum_to_rdbase(u32 vcpu_id)
+{
+ return vcpu_id << GITS_COLLECTION_TARGET_SHIFT;
+}
+
#define GITS_CMDQ_POLL_ITERATIONS 0
static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
@@ -217,7 +224,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
its_encode_cmd(&cmd, GITS_CMD_MAPC);
its_encode_collection(&cmd, collection_id);
- its_encode_target(&cmd, vcpu_id);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
its_encode_valid(&cmd, valid);
its_send_cmd(cmdq_base, &cmd);
@@ -246,3 +253,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
its_send_cmd(cmdq_base, &cmd);
}
+
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_SYNC);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
+
+ its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 7ba3aa3755f3..d46e4b13b92c 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -12,6 +12,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
+#include "vgic.h"
#include <linux/bitfield.h>
#include <linux/sizes.h>
@@ -72,13 +73,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
uint64_t pte;
if (use_lpa2_pte_format(vm)) {
- pte = pa & GENMASK(49, vm->page_shift);
- pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
- attrs &= ~GENMASK(9, 8);
+ pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT;
+ attrs &= ~PTE_ADDR_51_50_LPA2;
} else {
- pte = pa & GENMASK(47, vm->page_shift);
+ pte = pa & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT;
}
pte |= attrs;
@@ -90,12 +91,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
uint64_t pa;
if (use_lpa2_pte_format(vm)) {
- pa = pte & GENMASK(49, vm->page_shift);
- pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+ pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50;
} else {
- pa = pte & GENMASK(47, vm->page_shift);
+ pa = pte & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+ pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48;
}
return pa;
@@ -128,7 +129,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint64_t flags)
{
- uint8_t attr_idx = flags & 7;
+ uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT);
+ uint64_t pg_attr;
uint64_t *ptep;
TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -147,18 +149,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PGD_TYPE_TABLE | PTE_VALID);
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PUD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PMD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -167,7 +172,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
+ pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID;
+ if (!use_lpa2_pte_format(vm))
+ pg_attr |= PTE_SHARED;
+
+ *ptep = addr_pte(vm, paddr, pg_attr);
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -177,7 +186,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
_virt_pg_map(vm, vaddr, paddr, attr_idx);
}
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
uint64_t *ptep;
@@ -187,17 +196,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 0)
+ return ptep;
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 1)
+ break;
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 2)
+ break;
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
@@ -215,6 +230,11 @@ unmapped_gva:
exit(EXIT_FAILURE);
}
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return virt_get_pte_hva_at_level(vm, gva, 3);
+}
+
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep = virt_get_pte_hva(vm, gva);
@@ -258,55 +278,73 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
+bool vm_supports_el2(struct kvm_vm *vm)
+{
+ const char *value = getenv("NV");
+
+ if (value && *value == '0')
+ return false;
+
+ return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic;
+}
+
+void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init)
+{
+ struct kvm_vcpu_init preferred = {};
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
+ if (vm_supports_el2(vm))
+ preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+ *init = preferred;
+}
+
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
struct kvm_vm *vm = vcpu->vm;
uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
- if (!init)
+ if (!init) {
+ kvm_get_default_vcpu_target(vm, &default_init);
init = &default_init;
-
- if (init->target == -1) {
- struct kvm_vcpu_init preferred;
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
- init->target = preferred.target;
}
vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
+ vcpu->init = *init;
/*
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
* registers, which the variable argument list macros do.
*/
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20);
- sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
- tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
+ sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1));
+ tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1));
/* Configure base granule size */
switch (vm->mode) {
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
case VM_MODE_P48V48_64K:
case VM_MODE_P40V48_64K:
case VM_MODE_P36V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+ tcr_el1 |= TCR_TG0_64K;
break;
case VM_MODE_P52V48_16K:
case VM_MODE_P48V48_16K:
case VM_MODE_P40V48_16K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+ tcr_el1 |= TCR_TG0_16K;
break;
case VM_MODE_P52V48_4K:
case VM_MODE_P48V48_4K:
case VM_MODE_P40V48_4K:
case VM_MODE_P36V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+ tcr_el1 |= TCR_TG0_4K;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
@@ -319,41 +357,47 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
case VM_MODE_P52V48_4K:
case VM_MODE_P52V48_16K:
case VM_MODE_P52V48_64K:
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ tcr_el1 |= TCR_IPS_52_BITS;
ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+ tcr_el1 |= TCR_IPS_48_BITS;
break;
case VM_MODE_P40V48_4K:
case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+ tcr_el1 |= TCR_IPS_40_BITS;
break;
case VM_MODE_P36V48_4K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V48_64K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ tcr_el1 |= TCR_IPS_36_BITS;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
- /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
- tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
- tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+ sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I;
+
+ tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER;
+ tcr_el1 |= TCR_T0SZ(vm->va_bits);
if (use_lpa2_pte_format(vm))
- tcr_el1 |= (1ul << 59) /* DS */;
+ tcr_el1 |= TCR_DS;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
+
+ if (!vcpu_has_el2(vcpu))
+ return;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2),
+ HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H);
}
void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
@@ -387,7 +431,7 @@ static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
aarch64_vcpu_setup(vcpu, init);
- vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size);
return vcpu;
}
@@ -457,7 +501,7 @@ void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
{
extern char vectors;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors);
}
void route_exception(struct ex_regs *regs, int vector)
@@ -565,15 +609,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val);
*ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val);
*ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
ID_AA64MMFR0_EL1_TGRAN64_IMP);
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val);
*ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
@@ -645,3 +689,44 @@ void wfi(void)
{
asm volatile("wfi");
}
+
+static bool request_mte;
+static bool request_vgic = true;
+
+void test_wants_mte(void)
+{
+ request_mte = true;
+}
+
+void test_disable_default_vgic(void)
+{
+ request_vgic = false;
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+ if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE))
+ vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0);
+
+ if (request_vgic && kvm_supports_vgic_v3()) {
+ vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64);
+ vm->arch.has_gic = true;
+ }
+}
+
+void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
+{
+ if (vm->arch.has_gic)
+ __vgic_v3_init(vm->arch.gic_fd);
+}
+
+void kvm_arch_vm_release(struct kvm_vm *vm)
+{
+ if (vm->arch.has_gic)
+ close(vm->arch.gic_fd);
+}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return request_vgic && kvm_supports_vgic_v3();
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c
index 4427f43f73ea..d0f7bd0984b8 100644
--- a/tools/testing/selftests/kvm/lib/arm64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c
@@ -15,6 +15,17 @@
#include "gic.h"
#include "gic_v3.h"
+bool kvm_supports_vgic_v3(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ kvm_vm_free(vm);
+
+ return !r;
+}
+
/*
* vGIC-v3 default host setup
*
@@ -30,24 +41,11 @@
* redistributor regions of the guest. Since it depends on the number of
* vCPUs for the VM, it must be called after all the vCPUs have been created.
*/
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
{
int gic_fd;
uint64_t attr;
- struct list_head *iter;
- unsigned int nr_gic_pages, nr_vcpus_created = 0;
-
- TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
-
- /*
- * Make sure that the caller is infact calling this
- * function after all the vCPUs are added.
- */
- list_for_each(iter, &vm->vcpus)
- nr_vcpus_created++;
- TEST_ASSERT(nr_vcpus == nr_vcpus_created,
- "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
- nr_vcpus, nr_vcpus_created);
+ unsigned int nr_gic_pages;
/* Distributor setup */
gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
@@ -56,9 +54,6 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
attr = GICD_BASE_GPA;
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
@@ -73,10 +68,39 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ return gic_fd;
+}
+
+void __vgic_v3_init(int fd)
+{
+ kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+}
- return gic_fd;
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+{
+ unsigned int nr_vcpus_created = 0;
+ struct list_head *iter;
+ int fd;
+
+ TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
+
+ /*
+ * Make sure that the caller is infact calling this
+ * function after all the vCPUs are added.
+ */
+ list_for_each(iter, &vm->vcpus)
+ nr_vcpus_created++;
+ TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+ "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
+ nr_vcpus, nr_vcpus_created);
+
+ fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs);
+ if (fd < 0)
+ return fd;
+
+ __vgic_v3_init(fd);
+ return fd;
}
/* should only work for level sensitive interrupts */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 33fefeb3ca44..8279b6ced8d2 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -12,6 +12,7 @@
#include <assert.h>
#include <sched.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -23,17 +24,29 @@ uint32_t guest_random_seed;
struct guest_random_state guest_rng;
static uint32_t last_guest_seed;
-static int vcpu_mmap_sz(void);
+static size_t vcpu_mmap_sz(void);
-int open_path_or_exit(const char *path, int flags)
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help)
{
int fd;
fd = open(path, flags);
- __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno));
- TEST_ASSERT(fd >= 0, "Failed to open '%s'", path);
+ if (fd < 0)
+ goto error;
return fd;
+
+error:
+ if (errno == EACCES || errno == ENOENT)
+ ksft_exit_skip("- Cannot open '%s': %s. %s\n",
+ path, strerror(errno),
+ errno == EACCES ? "Root required?" : enoent_help);
+ TEST_FAIL("Failed to open '%s'", path);
+}
+
+int open_path_or_exit(const char *path, int flags)
+{
+ return __open_path_or_exit(path, flags, "");
}
/*
@@ -47,7 +60,7 @@ int open_path_or_exit(const char *path, int flags)
*/
static int _open_kvm_dev_path_or_exit(int flags)
{
- return open_path_or_exit(KVM_DEV_PATH, flags);
+ return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?");
}
int open_kvm_dev_path_or_exit(void)
@@ -63,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param,
ssize_t bytes_read;
int fd, r;
+ /* Verify KVM is loaded, to provide a more helpful SKIP message. */
+ close(open_kvm_dev_path_or_exit());
+
r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
module_name, param);
TEST_ASSERT(r < path_size,
@@ -79,7 +95,7 @@ static ssize_t get_module_param(const char *module_name, const char *param,
return bytes_read;
}
-static int get_module_param_integer(const char *module_name, const char *param)
+int kvm_get_module_param_integer(const char *module_name, const char *param)
{
/*
* 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
@@ -103,7 +119,7 @@ static int get_module_param_integer(const char *module_name, const char *param)
return atoi_paranoid(value);
}
-static bool get_module_param_bool(const char *module_name, const char *param)
+bool kvm_get_module_param_bool(const char *module_name, const char *param)
{
char value;
ssize_t r;
@@ -119,36 +135,6 @@ static bool get_module_param_bool(const char *module_name, const char *param)
TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
}
-bool get_kvm_param_bool(const char *param)
-{
- return get_module_param_bool("kvm", param);
-}
-
-bool get_kvm_intel_param_bool(const char *param)
-{
- return get_module_param_bool("kvm_intel", param);
-}
-
-bool get_kvm_amd_param_bool(const char *param)
-{
- return get_module_param_bool("kvm_amd", param);
-}
-
-int get_kvm_param_integer(const char *param)
-{
- return get_module_param_integer("kvm", param);
-}
-
-int get_kvm_intel_param_integer(const char *param)
-{
- return get_module_param_integer("kvm_intel", param);
-}
-
-int get_kvm_amd_param_integer(const char *param)
-{
- return get_module_param_integer("kvm_amd", param);
-}
-
/*
* Capability
*
@@ -196,6 +182,11 @@ static void vm_open(struct kvm_vm *vm)
vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
+
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vm->stats.fd = vm_get_stats_fd(vm);
+ else
+ vm->stats.fd = -1;
}
const char *vm_guest_mode_string(uint32_t i)
@@ -210,12 +201,13 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
[VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
- [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
+ [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
[VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
[VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
[VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
[VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages",
[VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
@@ -236,12 +228,13 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
[VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
[VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
- [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
+ [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 },
[VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
[VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
[VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
[VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
[VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 },
[VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
@@ -313,27 +306,30 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
case VM_MODE_P36V48_16K:
vm->pgtable_levels = 4;
break;
+ case VM_MODE_P47V47_16K:
case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
kvm_init_vm_address_properties(vm);
- /*
- * Ignore KVM support for 5-level paging (vm->va_bits == 57),
- * it doesn't take effect unless a CR4.LA57 is set, which it
- * isn't for this mode (48-bit virtual address space).
- */
- TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
- "Linear address width (%d bits) not supported",
- vm->va_bits);
+
pr_debug("Guest physical address width detected: %d\n",
vm->pa_bits);
- vm->pgtable_levels = 4;
- vm->va_bits = 48;
+ pr_debug("Guest virtual address width detected: %d\n",
+ vm->va_bits);
+
+ if (vm->va_bits == 57) {
+ vm->pgtable_levels = 5;
+ } else {
+ TEST_ASSERT(vm->va_bits == 48,
+ "Unexpected guest virtual address width: %d",
+ vm->va_bits);
+ vm->pgtable_levels = 4;
+ }
#else
- TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
+ TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");
#endif
break;
case VM_MODE_P47V64_4K:
@@ -406,6 +402,47 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
return vm_adjust_num_guest_pages(mode, nr_pages);
}
+void kvm_set_files_rlimit(uint32_t nr_vcpus)
+{
+ /*
+ * Each vCPU will open two file descriptors: the vCPU itself and the
+ * vCPU's binary stats file descriptor. Add an arbitrary amount of
+ * buffer for all other files a test may open.
+ */
+ int nr_fds_wanted = nr_vcpus * 2 + 100;
+ struct rlimit rl;
+
+ /*
+ * Check that we're allowed to open nr_fds_wanted file descriptors and
+ * try raising the limits if needed.
+ */
+ TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+ if (rl.rlim_cur < nr_fds_wanted) {
+ rl.rlim_cur = nr_fds_wanted;
+ if (rl.rlim_max < nr_fds_wanted) {
+ int old_rlim_max = rl.rlim_max;
+
+ rl.rlim_max = nr_fds_wanted;
+ __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0,
+ "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
+ old_rlim_max, nr_fds_wanted);
+ } else {
+ TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+ }
+ }
+
+}
+
+static bool is_guest_memfd_required(struct vm_shape shape)
+{
+#ifdef __x86_64__
+ return shape.type == KVM_X86_SNP_VM;
+#else
+ return false;
+#endif
+}
+
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
uint64_t nr_extra_pages)
{
@@ -413,14 +450,24 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
nr_extra_pages);
struct userspace_mem_region *slot0;
struct kvm_vm *vm;
- int i;
+ int i, flags;
+
+ kvm_set_files_rlimit(nr_runnable_vcpus);
pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
vm_guest_mode_string(shape.mode), shape.type, nr_pages);
vm = ____vm_create(shape);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
+ /*
+ * Force GUEST_MEMFD for the primary memory region if necessary, e.g.
+ * for CoCo VMs that require GUEST_MEMFD backed private memory.
+ */
+ flags = 0;
+ if (is_guest_memfd_required(shape))
+ flags |= KVM_MEM_GUEST_MEMFD;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags);
for (i = 0; i < NR_MEM_REGIONS; i++)
vm->memslots[i] = 0;
@@ -442,7 +489,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
guest_rng = new_guest_random_state(guest_random_seed);
sync_global_to_guest(vm, guest_rng);
- kvm_arch_vm_post_create(vm);
+ kvm_arch_vm_post_create(vm, nr_runnable_vcpus);
return vm;
}
@@ -480,6 +527,7 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
for (i = 0; i < nr_vcpus; ++i)
vcpus[i] = vm_vcpu_add(vm, i, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
@@ -545,15 +593,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
return vm_vcpu_recreate(vm, 0);
}
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
+int __pin_task_to_cpu(pthread_t task, int cpu)
{
- cpu_set_t mask;
- int r;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
- CPU_ZERO(&mask);
- CPU_SET(pcpu, &mask);
- r = sched_setaffinity(0, sizeof(mask), &mask);
- TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
+ return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);
}
static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
@@ -607,7 +654,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
/* 2. Check if the main worker needs to be pinned. */
if (cpu) {
- kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
+ pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask));
cpu = strtok(NULL, delim);
}
@@ -657,6 +704,20 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
return NULL;
}
+static void kvm_stats_release(struct kvm_binary_stats *stats)
+{
+ if (stats->fd < 0)
+ return;
+
+ if (stats->desc) {
+ free(stats->desc);
+ stats->desc = NULL;
+ }
+
+ kvm_close(stats->fd);
+ stats->fd = -1;
+}
+
__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
{
@@ -676,19 +737,15 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
*/
static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
- int ret;
-
if (vcpu->dirty_gfns) {
- ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->run, vcpu_mmap_sz());
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(vcpu->run, vcpu_mmap_sz());
- ret = close(vcpu->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(vcpu->fd);
+ kvm_stats_release(&vcpu->stats);
list_del(&vcpu->list);
@@ -699,35 +756,32 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
void kvm_vm_release(struct kvm_vm *vmp)
{
struct kvm_vcpu *vcpu, *tmp;
- int ret;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
vm_vcpu_rm(vmp, vcpu);
- ret = close(vmp->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(vmp->fd);
+ kvm_close(vmp->kvm_fd);
+
+ /* Free cached stats metadata and close FD */
+ kvm_stats_release(&vmp->stats);
- ret = close(vmp->kvm_fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_arch_vm_release(vmp);
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
struct userspace_mem_region *region)
{
- int ret;
-
rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
rb_erase(&region->hva_node, &vm->regions.hva_tree);
hash_del(&region->slot_node);
sparsebit_free(&region->unused_phy_pages);
sparsebit_free(&region->protected_phy_pages);
- ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_start, region->mmap_size);
if (region->fd >= 0) {
/* There's an extra map when using shared memory. */
- ret = munmap(region->mmap_alias, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_alias, region->mmap_size);
close(region->fd);
}
if (region->region.guest_memfd >= 0)
@@ -748,12 +802,6 @@ void kvm_vm_free(struct kvm_vm *vmp)
if (vmp == NULL)
return;
- /* Free cached stats metadata and close FD */
- if (vmp->stats_fd) {
- free(vmp->stats_desc);
- close(vmp->stats_fd);
- }
-
/* Free userspace_mem_regions. */
hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
__vm_mem_region_delete(vmp, region);
@@ -771,7 +819,7 @@ void kvm_vm_free(struct kvm_vm *vmp)
int kvm_memfd_alloc(size_t size, bool hugepages)
{
int memfd_flags = MFD_CLOEXEC;
- int fd, r;
+ int fd;
if (hugepages)
memfd_flags |= MFD_HUGETLB;
@@ -779,11 +827,8 @@ int kvm_memfd_alloc(size_t size, bool hugepages)
fd = memfd_create("kvm_selftest", memfd_flags);
TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
- r = ftruncate(fd, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r));
-
- r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_ftruncate(fd, size);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
return fd;
}
@@ -900,8 +945,8 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags
/* FIXME: This thing needs to be ripped apart and rewritten. */
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd, uint64_t guest_memfd_offset)
{
int ret;
struct userspace_mem_region *region;
@@ -915,30 +960,29 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
"Number of guest pages is not compatible with the host. "
"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
- TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n"
- " guest_paddr: 0x%lx vm->page_size: 0x%x",
- guest_paddr, vm->page_size);
- TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ " gpa: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->page_size);
+ TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1)
<= vm->max_gfn, "Physical range beyond maximum "
"supported physical address,\n"
- " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " gpa: 0x%lx npages: 0x%lx\n"
" vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- guest_paddr, npages, vm->max_gfn, vm->page_size);
+ gpa, npages, vm->max_gfn, vm->page_size);
/*
* Confirm a mem region with an overlapping address doesn't
* already exist.
*/
region = (struct userspace_mem_region *) userspace_mem_region_find(
- vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+ vm, gpa, (gpa + npages * vm->page_size) - 1);
if (region != NULL)
TEST_FAIL("overlapping userspace_mem_region already "
"exists\n"
- " requested guest_paddr: 0x%lx npages: 0x%lx "
- "page_size: 0x%x\n"
- " existing guest_paddr: 0x%lx size: 0x%lx",
- guest_paddr, npages, vm->page_size,
+ " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n"
+ " existing gpa: 0x%lx size: 0x%lx",
+ gpa, npages, vm->page_size,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
@@ -952,8 +996,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
"already exists.\n"
" requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
" existing slot: %u paddr: 0x%lx size: 0x%lx",
- slot, guest_paddr, npages,
- region->region.slot,
+ slot, gpa, npages, region->region.slot,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
}
@@ -979,7 +1022,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
alignment = max(backing_src_pagesz, alignment);
- TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+ TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz));
/* Add enough memory to align up if necessary */
if (alignment > 1)
@@ -990,12 +1033,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->fd = kvm_memfd_alloc(region->mmap_size,
src_type == VM_MEM_SRC_SHARED_HUGETLB);
- region->mmap_start = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_start != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
@@ -1030,8 +1070,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
* needing to track if the fd is owned by the framework
* or by the caller.
*/
- guest_memfd = dup(guest_memfd);
- TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ guest_memfd = kvm_dup(guest_memfd);
}
region->region.guest_memfd = guest_memfd;
@@ -1043,20 +1082,18 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->unused_phy_pages = sparsebit_alloc();
if (vm_arch_has_protected_memory(vm))
region->protected_phy_pages = sparsebit_alloc();
- sparsebit_set_num(region->unused_phy_pages,
- guest_paddr >> vm->page_shift, npages);
+ sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages);
region->region.slot = slot;
region->region.flags = flags;
- region->region.guest_phys_addr = guest_paddr;
+ region->region.guest_phys_addr = gpa;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d",
- ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size,
+ " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d",
+ ret, errno, slot, flags, gpa, region->region.memory_size,
region->region.guest_memfd);
/* Add to quick lookup data structures */
@@ -1066,12 +1103,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
/* If shared memory, create an alias. */
if (region->fd >= 0) {
- region->mmap_alias = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_alias != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_alias = kvm_mmap(region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
/* Align host alias address */
region->host_alias = align_ptr_up(region->mmap_alias, alignment);
@@ -1080,10 +1115,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot,
- uint64_t npages, uint32_t flags)
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags)
{
- vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
+ vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0);
}
/*
@@ -1149,6 +1184,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
ret, errno, slot, flags);
}
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)
+{
+ struct userspace_mem_region *region = memslot2region(vm, slot);
+ struct kvm_userspace_memory_region2 tmp = region->region;
+
+ tmp.memory_size = 0;
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+}
+
/*
* VM Memory Region Move
*
@@ -1231,14 +1276,14 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
}
/* Returns the size of a vCPU's kvm_run structure. */
-static int vcpu_mmap_sz(void)
+static size_t vcpu_mmap_sz(void)
{
int dev_fd, ret;
dev_fd = open_kvm_dev_path_or_exit();
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
- TEST_ASSERT(ret >= sizeof(struct kvm_run),
+ TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run),
KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
close(dev_fd);
@@ -1279,12 +1324,15 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
- "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+ "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi",
vcpu_mmap_sz(), sizeof(*vcpu->run));
- vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
- PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
- TEST_ASSERT(vcpu->run != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE,
+ MAP_SHARED, vcpu->fd);
+
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
+ else
+ vcpu->stats.fd = -1;
/* Add to linked-list of VCPUs. */
list_add(&vcpu->list, &vm->vcpus);
@@ -1401,8 +1449,6 @@ static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
pages--, vaddr += vm->page_size, paddr += vm->page_size) {
virt_pg_map(vm, vaddr, paddr);
-
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
}
return vaddr_start;
@@ -1516,7 +1562,6 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
while (npages--) {
virt_pg_map(vm, vaddr, paddr);
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
vaddr += page_size;
paddr += page_size;
@@ -1635,7 +1680,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
/* Create an interrupt controller chip for the specified VM. */
void vm_create_irqchip(struct kvm_vm *vm)
{
- vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+ int r;
+
+ /*
+ * Allocate a fully in-kernel IRQ chip by default, but fall back to a
+ * split model (x86 only) if that fails (KVM x86 allows compiling out
+ * support for KVM_CREATE_IRQCHIP).
+ */
+ r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+ if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
+ vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
+ else
+ TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
vm->has_irqchip = true;
}
@@ -1715,9 +1771,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
- addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
- page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
- TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+ addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
vcpu->dirty_gfns = addr;
vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
@@ -1958,9 +2013,9 @@ static struct exit_reason {
KVM_EXIT_STRING(RISCV_SBI),
KVM_EXIT_STRING(RISCV_CSR),
KVM_EXIT_STRING(NOTIFY),
-#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
-#endif
+ KVM_EXIT_STRING(LOONGARCH_IOCSR),
+ KVM_EXIT_STRING(MEMORY_FAULT),
+ KVM_EXIT_STRING(ARM_SEA),
};
/*
@@ -2198,49 +2253,42 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
desc->name, size, ret);
}
-/*
- * Read the data of the named stat
- *
- * Input Args:
- * vm - the VM for which the stat should be read
- * stat_name - the name of the stat to read
- * max_elements - the maximum number of 8-byte values to read into data
- *
- * Output Args:
- * data - the buffer into which stat data should be read
- *
- * Read the data values of a specified stat from the binary stats interface.
- */
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements)
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements)
{
struct kvm_stats_desc *desc;
size_t size_desc;
int i;
- if (!vm->stats_fd) {
- vm->stats_fd = vm_get_stats_fd(vm);
- read_stats_header(vm->stats_fd, &vm->stats_header);
- vm->stats_desc = read_stats_descriptors(vm->stats_fd,
- &vm->stats_header);
+ if (!stats->desc) {
+ read_stats_header(stats->fd, &stats->header);
+ stats->desc = read_stats_descriptors(stats->fd, &stats->header);
}
- size_desc = get_stats_descriptor_size(&vm->stats_header);
+ size_desc = get_stats_descriptor_size(&stats->header);
- for (i = 0; i < vm->stats_header.num_desc; ++i) {
- desc = (void *)vm->stats_desc + (i * size_desc);
+ for (i = 0; i < stats->header.num_desc; ++i) {
+ desc = (void *)stats->desc + (i * size_desc);
- if (strcmp(desc->name, stat_name))
+ if (strcmp(desc->name, name))
continue;
- read_stat_data(vm->stats_fd, &vm->stats_header, desc,
- data, max_elements);
-
- break;
+ read_stat_data(stats->fd, &stats->header, desc, data, max_elements);
+ return;
}
+
+ TEST_FAIL("Unable to find stat '%s'", name);
+}
+
+__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
}
-__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
+__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
+{
+}
+
+__weak void kvm_arch_vm_release(struct kvm_vm *vm)
{
}
@@ -2248,11 +2296,35 @@ __weak void kvm_selftest_arch_init(void)
{
}
+static void report_unexpected_signal(int signum)
+{
+#define KVM_CASE_SIGNUM(sig) \
+ case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)
+
+ switch (signum) {
+ KVM_CASE_SIGNUM(SIGBUS);
+ KVM_CASE_SIGNUM(SIGSEGV);
+ KVM_CASE_SIGNUM(SIGILL);
+ KVM_CASE_SIGNUM(SIGFPE);
+ default:
+ TEST_FAIL("Unexpected signal %d\n", signum);
+ }
+}
+
void __attribute((constructor)) kvm_selftest_init(void)
{
+ struct sigaction sig_sa = {
+ .sa_handler = report_unexpected_signal,
+ };
+
/* Tell stdout not to buffer its content. */
setbuf(stdout, NULL);
+ sigaction(SIGBUS, &sig_sa, NULL);
+ sigaction(SIGSEGV, &sig_sa, NULL);
+ sigaction(SIGILL, &sig_sa, NULL);
+ sigaction(SIGFPE, &sig_sa, NULL);
+
guest_random_seed = last_guest_seed = random();
pr_info("Random seed: 0x%x\n", guest_random_seed);
@@ -2273,3 +2345,8 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
pg = paddr >> vm->page_shift;
return sparsebit_is_set(region->protected_phy_pages, pg);
}
+
+__weak bool kvm_arch_has_default_irqchip(void)
+{
+ return false;
+}
diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S
new file mode 100644
index 000000000000..3f1e4b67c5ae
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "processor.h"
+
+/* address of refill exception should be 4K aligned */
+.balign 4096
+.global handle_tlb_refill
+handle_tlb_refill:
+ csrwr t0, LOONGARCH_CSR_TLBRSAVE
+ csrrd t0, LOONGARCH_CSR_PGD
+ lddir t0, t0, 3
+ lddir t0, t0, 1
+ ldpte t0, 0
+ ldpte t0, 1
+ tlbfill
+ csrrd t0, LOONGARCH_CSR_TLBRSAVE
+ ertn
+
+ /*
+ * save and restore all gprs except base register,
+ * and default value of base register is sp ($r3).
+ */
+.macro save_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+.macro restore_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+/* address of general exception should be 4K aligned */
+.balign 4096
+.global handle_exception
+handle_exception:
+ csrwr sp, LOONGARCH_CSR_KS0
+ csrrd sp, LOONGARCH_CSR_KS1
+ addi.d sp, sp, -EXREGS_SIZE
+
+ save_gprs sp
+ /* save sp register to stack */
+ csrrd t0, LOONGARCH_CSR_KS0
+ st.d t0, sp, 3 * 8
+
+ csrrd t0, LOONGARCH_CSR_ERA
+ st.d t0, sp, PC_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_ESTAT
+ st.d t0, sp, ESTAT_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_BADV
+ st.d t0, sp, BADV_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_PRMD
+ st.d t0, sp, PRMD_OFFSET_EXREGS
+
+ or a0, sp, zero
+ bl route_exception
+ ld.d t0, sp, PC_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_ERA
+ ld.d t0, sp, PRMD_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_PRMD
+ restore_gprs sp
+ csrrd sp, LOONGARCH_CSR_KS0
+ ertn
diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c
new file mode 100644
index 000000000000..07c103369ddb
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <linux/compiler.h>
+
+#include <asm/kvm.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000
+#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000
+
+static vm_paddr_t invalid_pgtable[4];
+static vm_vaddr_t exception_handlers;
+
+static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ unsigned int shift;
+ uint64_t mask;
+
+ shift = level * (vm->page_shift - 3) + vm->page_shift;
+ mask = (1UL << (vm->page_shift - 3)) - 1;
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return entry & ~((0x1UL << vm->page_shift) - 1);
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return 1 << (vm->page_shift - 3);
+}
+
+static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child)
+{
+ uint64_t *ptep;
+ int i, ptrs_per_pte;
+
+ ptep = addr_gpa2hva(vm, table);
+ ptrs_per_pte = 1 << (vm->page_shift - 3);
+ for (i = 0; i < ptrs_per_pte; i++)
+ WRITE_ONCE(*(ptep + i), child);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ int i;
+ vm_paddr_t child, table;
+
+ if (vm->pgd_created)
+ return;
+
+ child = table = 0;
+ for (i = 0; i < vm->pgtable_levels; i++) {
+ invalid_pgtable[i] = child;
+ table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN,
+ vm->memslots[MEM_REGION_PT]);
+ TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i);
+ virt_set_pgtable(vm, table, child);
+ child = table;
+ }
+ vm->pgd = table;
+ vm->pgd_created = true;
+}
+
+static int virt_pte_none(uint64_t *ptep, int level)
+{
+ return *ptep == invalid_pgtable[level];
+}
+
+static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc)
+{
+ int level;
+ uint64_t *ptep;
+ vm_paddr_t child;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ child = vm->pgd;
+ level = vm->pgtable_levels - 1;
+ while (level > 0) {
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ if (virt_pte_none(ptep, level)) {
+ if (alloc) {
+ child = vm_alloc_page_table(vm);
+ virt_set_pgtable(vm, child, invalid_pgtable[level - 1]);
+ WRITE_ONCE(*ptep, child);
+ } else
+ goto unmapped_gva;
+
+ } else
+ child = pte_addr(vm, *ptep);
+ level--;
+ }
+
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ return ptep;
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+ exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+
+ ptep = virt_populate_pte(vm, gva, 0);
+ TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint32_t prot_bits;
+ uint64_t *ptep;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = virt_populate_pte(vm, vaddr, 1);
+ prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER;
+ WRITE_ONCE(*ptep, paddr | prot_bits);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+ uint64_t pte, *ptep;
+ static const char * const type[] = { "pte", "pmd", "pud", "pgd"};
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (virt_pte_none(ptep, level))
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n",
+ indent, "", type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--);
+ }
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level;
+
+ if (!vm->pgd_created)
+ return;
+
+ level = vm->pgtable_levels - 1;
+ pte_dump(stream, vm, indent, vm->pgd, level);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)",
+ uc.args[0], uc.args[1], uc.args[2]);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+ int vector;
+ unsigned long pc, estat, badv;
+ struct handlers *handlers;
+
+ handlers = (struct handlers *)exception_handlers;
+ vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+ if (handlers && handlers->exception_handlers[vector])
+ return handlers->exception_handlers[vector](regs);
+
+ pc = regs->pc;
+ badv = regs->badv;
+ estat = regs->estat;
+ ucall(UCALL_UNHANDLED, 3, pc, estat, badv);
+ while (1) ;
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ void *addr;
+
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+
+ addr = addr_gva2hva(vm, vm->handlers);
+ memset(addr, 0, vm->page_size);
+ exception_handlers = vm->handlers;
+ sync_global_to_guest(vm, exception_handlers);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return csr_read(LOONGARCH_CSR_CPUID);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ int i;
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ "num: %u\n", num);
+
+ vcpu_regs_get(vcpu, &regs);
+
+ va_start(ap, num);
+ for (i = 0; i < num; i++)
+ regs.gpr[i + 4] = va_arg(ap, uint64_t);
+ va_end(ap);
+
+ vcpu_regs_set(vcpu, &regs);
+}
+
+static void loongarch_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ __vcpu_set_reg(vcpu, id, val);
+}
+
+static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_get_reg(vcpu, csrid, addr);
+}
+
+static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_set_reg(vcpu, csrid, val);
+}
+
+static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ int width;
+ unsigned long val;
+ struct kvm_vm *vm = vcpu->vm;
+
+ switch (vm->mode) {
+ case VM_MODE_P36V47_16K:
+ case VM_MODE_P47V47_16K:
+ break;
+
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ /* kernel mode and page enable mode */
+ val = PLV_KERN | CSR_CRMD_PG;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1);
+
+ /* time count start from 0 */
+ val = 0;
+ loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val);
+
+ width = vm->page_shift - 3;
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ /* pud page shift and width */
+ val = (vm->page_shift + width * 2) << 20 | (width << 25);
+ /* fall throuth */
+ case 3:
+ /* pmd page shift and width */
+ val |= (vm->page_shift + width) << 10 | (width << 15);
+ /* pte page shift and width */
+ val |= vm->page_shift | width << 5;
+ break;
+ default:
+ TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels);
+ }
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val);
+
+ /* PGD page shift and width */
+ val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd);
+
+ /*
+ * Refill exception runs on real mode
+ * Entry address should be physical address
+ */
+ val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val);
+
+ /*
+ * General exception runs on page-enabled mode
+ * Entry address should be virtual address
+ */
+ val = (unsigned long)handle_exception;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val);
+ val &= ~CSR_TLBIDX_SIZEM;
+ val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE);
+
+ /* LOONGARCH_CSR_KS1 is used for exception stack */
+ val = __vm_vaddr_alloc(vm, vm->page_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(val != 0, "No memory for exception stack");
+ val = val + vm->page_size;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val);
+ val &= ~CSR_TLBREHI_PS;
+ val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ size_t stack_size;
+ uint64_t stack_vaddr;
+ struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ stack_size = vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack");
+
+ loongarch_vcpu_setup(vcpu);
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vcpu, &regs);
+ regs.gpr[3] = stack_vaddr + stack_size;
+ vcpu_regs_set(vcpu, &regs);
+
+ return vcpu;
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ /* Setup guest PC register */
+ vcpu_regs_get(vcpu, &regs);
+ regs.pc = (uint64_t)guest_code;
+ vcpu_regs_set(vcpu, &regs);
+}
diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
new file mode 100644
index 000000000000..fc6cbb50573f
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ */
+#include "kvm_util.h"
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+vm_vaddr_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+ vm->ucall_mmio_addr = mmio_gpa;
+
+ write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+ "Unexpected ucall exit mmio address access");
+
+ return (void *)(*((uint64_t *)run->mmio.data));
+ }
+
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c
new file mode 100644
index 000000000000..46a14fd63d9e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ */
+
+#include <time.h>
+
+#include "lru_gen_util.h"
+
+/*
+ * Tracks state while we parse memcg lru_gen stats. The file we're parsing is
+ * structured like this (some extra whitespace elided):
+ *
+ * memcg (id) (path)
+ * node (id)
+ * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages)
+ */
+struct memcg_stats_parse_context {
+ bool consumed; /* Whether or not this line was consumed */
+ /* Next parse handler to invoke */
+ void (*next_handler)(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+ int current_node_idx; /* Current index in nodes array */
+ const char *name; /* The name of the memcg we're looking for */
+};
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+
+struct split_iterator {
+ char *str;
+ char *save;
+};
+
+static char *split_next(struct split_iterator *it)
+{
+ char *ret = strtok_r(it->str, " \t\n\r", &it->save);
+
+ it->str = NULL;
+ return ret;
+}
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *memcg_id = split_next(&it);
+ char *memcg_name = split_next(&it);
+ char *end;
+
+ ctx->consumed = true;
+
+ if (!prefix || strcmp("memcg", prefix))
+ return; /* Not a memcg line (maybe empty), skip */
+
+ TEST_ASSERT(memcg_id && memcg_name,
+ "malformed memcg line; no memcg id or memcg_name");
+
+ if (strcmp(memcg_name + 1, ctx->name))
+ return; /* Wrong memcg, skip */
+
+ /* Found it! */
+
+ stats->memcg_id = strtoul(memcg_id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id);
+ if (!stats->memcg_id)
+ return; /* Removed memcg? */
+
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+}
+
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *id = split_next(&it);
+ long found_node_id;
+ char *end;
+
+ ctx->consumed = true;
+ ctx->current_node_idx = -1;
+
+ if (!prefix)
+ return; /* Skip empty lines */
+
+ if (!strcmp("memcg", prefix)) {
+ /* Memcg done, found next one; stop. */
+ ctx->next_handler = NULL;
+ return;
+ } else if (strcmp("node", prefix))
+ TEST_ASSERT(false, "found malformed line after 'memcg ...',"
+ "token: '%s'", prefix);
+
+ /* At this point we know we have a node line. Parse the ID. */
+
+ TEST_ASSERT(id, "malformed node line; no node id");
+
+ found_node_id = strtol(id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed node id '%s'", id);
+
+ ctx->current_node_idx = stats->nr_nodes++;
+ TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES,
+ "memcg has stats for too many nodes, max is %d",
+ MAX_NR_NODES);
+ stats->nodes[ctx->current_node_idx].node = found_node_id;
+
+ ctx->next_handler = memcg_stats_handle_in_node;
+}
+
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ char *my_line = strdup(line);
+ struct split_iterator it = { .str = my_line };
+ char *gen, *age, *nr_anon, *nr_file;
+ struct node_stats *node_stats;
+ struct generation_stats *gen_stats;
+ char *end;
+
+ TEST_ASSERT(it.str, "failed to copy input line");
+
+ gen = split_next(&it);
+
+ if (!gen)
+ goto out_consume; /* Skip empty lines */
+
+ if (!strcmp("memcg", gen) || !strcmp("node", gen)) {
+ /*
+ * Reached next memcg or node section. Don't consume, let the
+ * other handler deal with this.
+ */
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+ goto out;
+ }
+
+ node_stats = &stats->nodes[ctx->current_node_idx];
+ TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS,
+ "found too many generation lines; max is %d",
+ MAX_NR_GENS);
+ gen_stats = &node_stats->gens[node_stats->nr_gens++];
+
+ age = split_next(&it);
+ nr_anon = split_next(&it);
+ nr_file = split_next(&it);
+
+ TEST_ASSERT(age && nr_anon && nr_file,
+ "malformed generation line; not enough tokens");
+
+ gen_stats->gen = (int)strtol(gen, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen);
+
+ gen_stats->age_ms = strtol(age, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age);
+
+ gen_stats->nr_anon = strtol(nr_anon, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'",
+ nr_anon);
+
+ gen_stats->nr_file = strtol(nr_file, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file);
+
+out_consume:
+ ctx->consumed = true;
+out:
+ free(my_line);
+}
+
+static void print_memcg_stats(const struct memcg_stats *stats, const char *name)
+{
+ int node, gen;
+
+ pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id);
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ pr_debug("\tnode %d\n", stats->nodes[node].node);
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ const struct generation_stats *gstats =
+ &stats->nodes[node].gens[gen];
+
+ pr_debug("\t\tgen %d\tage_ms %ld"
+ "\tnr_anon %ld\tnr_file %ld\n",
+ gstats->gen, gstats->age_ms, gstats->nr_anon,
+ gstats->nr_file);
+ }
+ }
+}
+
+/* Re-read lru_gen debugfs information for @memcg into @stats. */
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg)
+{
+ FILE *f;
+ ssize_t read = 0;
+ char *line = NULL;
+ size_t bufsz;
+ struct memcg_stats_parse_context ctx = {
+ .next_handler = memcg_stats_handle_searching,
+ .name = memcg,
+ };
+
+ memset(stats, 0, sizeof(struct memcg_stats));
+
+ f = fopen(LRU_GEN_DEBUGFS, "r");
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+
+ while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) {
+ ctx.consumed = false;
+
+ do {
+ ctx.next_handler(stats, &ctx, line);
+ if (!ctx.next_handler)
+ break;
+ } while (!ctx.consumed);
+ }
+
+ if (read < 0 && !feof(f))
+ TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS);
+
+ TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n"
+ "Did the memcg get created in the proper mount?",
+ memcg);
+ if (line)
+ free(line);
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+
+ print_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find all pages tracked by lru_gen for this memcg in generation @target_gen.
+ *
+ * If @target_gen is negative, look for all generations.
+ */
+long lru_gen_sum_memcg_stats_for_gen(int target_gen,
+ const struct memcg_stats *stats)
+{
+ int node, gen;
+ long total_nr = 0;
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ const struct node_stats *node_stats = &stats->nodes[node];
+
+ for (gen = 0; gen < node_stats->nr_gens; ++gen) {
+ const struct generation_stats *gen_stats =
+ &node_stats->gens[gen];
+
+ if (target_gen >= 0 && gen_stats->gen != target_gen)
+ continue;
+
+ total_nr += gen_stats->nr_anon + gen_stats->nr_file;
+ }
+ }
+
+ return total_nr;
+}
+
+/* Find all pages tracked by lru_gen for this memcg. */
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats)
+{
+ return lru_gen_sum_memcg_stats_for_gen(-1, stats);
+}
+
+/*
+ * If lru_gen aging should force page table scanning.
+ *
+ * If you want to set this to false, you will need to do eviction
+ * before doing extra aging passes.
+ */
+static const bool force_scan = true;
+
+static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen)
+{
+ FILE *f = fopen(LRU_GEN_DEBUGFS, "w");
+ char *command;
+ size_t sz;
+
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+ sz = asprintf(&command, "+ %lu %d %d 1 %d\n",
+ memcg_id, node_id, max_gen, force_scan);
+ TEST_ASSERT(sz > 0, "creating aging command failed");
+
+ pr_debug("Running aging command: %s", command);
+ if (fwrite(command, sizeof(char), sz, f) < sz) {
+ TEST_ASSERT(false, "writing aging command %s to %s failed",
+ command, LRU_GEN_DEBUGFS);
+ }
+
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+}
+
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg)
+{
+ int node, gen;
+
+ pr_debug("lru_gen: invoking aging...\n");
+
+ /* Must read memcg stats to construct the proper aging command. */
+ lru_gen_read_memcg_stats(stats, memcg);
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ int max_gen = 0;
+
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ int this_gen = stats->nodes[node].gens[gen].gen;
+
+ max_gen = max_gen > this_gen ? max_gen : this_gen;
+ }
+
+ run_aging_impl(stats->memcg_id, stats->nodes[node].node,
+ max_gen);
+ }
+
+ /* Re-read so callers get updated information */
+ lru_gen_read_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find which generation contains at least @pages pages, assuming that
+ * such a generation exists.
+ */
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long pages)
+{
+ int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1;
+
+ for (node = 0; node < stats->nr_nodes; ++node)
+ for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens;
+ ++gen_idx) {
+ gen = stats->nodes[node].gens[gen_idx].gen;
+ max_gen = gen > max_gen ? gen : max_gen;
+ min_gen = gen < min_gen ? gen : min_gen;
+ }
+
+ for (gen = min_gen; gen <= max_gen; ++gen)
+ /* See if this generation has enough pages. */
+ if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages)
+ return gen;
+
+ return -1;
+}
+
+bool lru_gen_usable(void)
+{
+ long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK;
+ int lru_gen_fd, lru_gen_debug_fd;
+ char mglru_feature_str[8] = {};
+ long mglru_features;
+
+ lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY);
+ if (lru_gen_fd < 0) {
+ puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH);
+ return false;
+ }
+ if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) {
+ puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH);
+ close(lru_gen_fd);
+ return false;
+ }
+ close(lru_gen_fd);
+
+ mglru_features = strtol(mglru_feature_str, NULL, 16);
+ if ((mglru_features & required_features) != required_features) {
+ printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n",
+ mglru_features, required_features);
+ printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n",
+ required_features);
+ return false;
+ }
+
+ lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR);
+ __TEST_REQUIRE(lru_gen_debug_fd >= 0,
+ "lru_gen: Could not open " LRU_GEN_DEBUGFS ", "
+ "but lru_gen is enabled, so cannot use page_idle.");
+ close(lru_gen_debug_fd);
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 313277486a1d..557c0a0a5658 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -265,7 +265,7 @@ static void *vcpu_thread_main(void *data)
int vcpu_idx = vcpu->vcpu_idx;
if (memstress_args.pin_vcpus)
- kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
+ pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
WRITE_ONCE(vcpu->running, true);
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S
index aa0abd3f35bb..b787b982e922 100644
--- a/tools/testing/selftests/kvm/lib/riscv/handlers.S
+++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S
@@ -10,85 +10,88 @@
#include <asm/csr.h>
.macro save_context
- addi sp, sp, (-8*34)
- sd x1, 0(sp)
- sd x2, 8(sp)
- sd x3, 16(sp)
- sd x4, 24(sp)
- sd x5, 32(sp)
- sd x6, 40(sp)
- sd x7, 48(sp)
- sd x8, 56(sp)
- sd x9, 64(sp)
- sd x10, 72(sp)
- sd x11, 80(sp)
- sd x12, 88(sp)
- sd x13, 96(sp)
- sd x14, 104(sp)
- sd x15, 112(sp)
- sd x16, 120(sp)
- sd x17, 128(sp)
- sd x18, 136(sp)
- sd x19, 144(sp)
- sd x20, 152(sp)
- sd x21, 160(sp)
- sd x22, 168(sp)
- sd x23, 176(sp)
- sd x24, 184(sp)
- sd x25, 192(sp)
- sd x26, 200(sp)
- sd x27, 208(sp)
- sd x28, 216(sp)
- sd x29, 224(sp)
- sd x30, 232(sp)
- sd x31, 240(sp)
+ addi sp, sp, (-8*36)
+ sd x1, 8(sp)
+ sd x2, 16(sp)
+ sd x3, 24(sp)
+ sd x4, 32(sp)
+ sd x5, 40(sp)
+ sd x6, 48(sp)
+ sd x7, 56(sp)
+ sd x8, 64(sp)
+ sd x9, 72(sp)
+ sd x10, 80(sp)
+ sd x11, 88(sp)
+ sd x12, 96(sp)
+ sd x13, 104(sp)
+ sd x14, 112(sp)
+ sd x15, 120(sp)
+ sd x16, 128(sp)
+ sd x17, 136(sp)
+ sd x18, 144(sp)
+ sd x19, 152(sp)
+ sd x20, 160(sp)
+ sd x21, 168(sp)
+ sd x22, 176(sp)
+ sd x23, 184(sp)
+ sd x24, 192(sp)
+ sd x25, 200(sp)
+ sd x26, 208(sp)
+ sd x27, 216(sp)
+ sd x28, 224(sp)
+ sd x29, 232(sp)
+ sd x30, 240(sp)
+ sd x31, 248(sp)
csrr s0, CSR_SEPC
csrr s1, CSR_SSTATUS
- csrr s2, CSR_SCAUSE
- sd s0, 248(sp)
+ csrr s2, CSR_STVAL
+ csrr s3, CSR_SCAUSE
+ sd s0, 0(sp)
sd s1, 256(sp)
sd s2, 264(sp)
+ sd s3, 272(sp)
.endm
.macro restore_context
+ ld s3, 272(sp)
ld s2, 264(sp)
ld s1, 256(sp)
- ld s0, 248(sp)
- csrw CSR_SCAUSE, s2
+ ld s0, 0(sp)
+ csrw CSR_SCAUSE, s3
csrw CSR_SSTATUS, s1
csrw CSR_SEPC, s0
- ld x31, 240(sp)
- ld x30, 232(sp)
- ld x29, 224(sp)
- ld x28, 216(sp)
- ld x27, 208(sp)
- ld x26, 200(sp)
- ld x25, 192(sp)
- ld x24, 184(sp)
- ld x23, 176(sp)
- ld x22, 168(sp)
- ld x21, 160(sp)
- ld x20, 152(sp)
- ld x19, 144(sp)
- ld x18, 136(sp)
- ld x17, 128(sp)
- ld x16, 120(sp)
- ld x15, 112(sp)
- ld x14, 104(sp)
- ld x13, 96(sp)
- ld x12, 88(sp)
- ld x11, 80(sp)
- ld x10, 72(sp)
- ld x9, 64(sp)
- ld x8, 56(sp)
- ld x7, 48(sp)
- ld x6, 40(sp)
- ld x5, 32(sp)
- ld x4, 24(sp)
- ld x3, 16(sp)
- ld x2, 8(sp)
- ld x1, 0(sp)
- addi sp, sp, (8*34)
+ ld x31, 248(sp)
+ ld x30, 240(sp)
+ ld x29, 232(sp)
+ ld x28, 224(sp)
+ ld x27, 216(sp)
+ ld x26, 208(sp)
+ ld x25, 200(sp)
+ ld x24, 192(sp)
+ ld x23, 184(sp)
+ ld x22, 176(sp)
+ ld x21, 168(sp)
+ ld x20, 160(sp)
+ ld x19, 152(sp)
+ ld x18, 144(sp)
+ ld x17, 136(sp)
+ ld x16, 128(sp)
+ ld x15, 120(sp)
+ ld x14, 112(sp)
+ ld x13, 104(sp)
+ ld x12, 96(sp)
+ ld x11, 88(sp)
+ ld x10, 80(sp)
+ ld x9, 72(sp)
+ ld x8, 64(sp)
+ ld x7, 56(sp)
+ ld x6, 48(sp)
+ ld x5, 40(sp)
+ ld x4, 32(sp)
+ ld x3, 24(sp)
+ ld x2, 16(sp)
+ ld x1, 8(sp)
+ addi sp, sp, (8*36)
.endm
.balign 4
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index dd663bcf0cc0..2eac7d4b59e9 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -402,7 +402,7 @@ struct handlers {
exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
};
-void route_exception(struct ex_regs *regs)
+void route_exception(struct pt_regs *regs)
{
struct handlers *handlers = (struct handlers *)exception_handlers;
int vector = 0, ec;
diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 20cfe970e3e3..8ceeb17c819a 100644
--- a/tools/testing/selftests/kvm/lib/s390/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
@@ -221,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index cfed9d26cc71..a99188f87a38 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -116,7 +116,7 @@
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*
@@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*/
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 8ed0b74ae837..8a1848586a85 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -18,6 +18,13 @@
#include "test_util.h"
+sigjmp_buf expect_sigbus_jmpbuf;
+
+void __attribute__((used)) expect_sigbus_handler(int signum)
+{
+ siglongjmp(expect_sigbus_jmpbuf, 1);
+}
+
/*
* Random number generator that is usable from guest code. This is the
* Park-Miller LCG using standard constants.
@@ -132,37 +139,57 @@ void print_skip(const char *fmt, ...)
puts(", skipping test");
}
-bool thp_configured(void)
+static bool test_sysfs_path(const char *path)
{
- int ret;
struct stat statbuf;
+ int ret;
- ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
+ ret = stat(path, &statbuf);
TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
- "Error in stating /sys/kernel/mm/transparent_hugepage");
+ "Error in stat()ing '%s'", path);
return ret == 0;
}
-size_t get_trans_hugepagesz(void)
+bool thp_configured(void)
+{
+ return test_sysfs_path("/sys/kernel/mm/transparent_hugepage");
+}
+
+static size_t get_sysfs_val(const char *path)
{
size_t size;
FILE *f;
int ret;
- TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
-
- f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
- TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
+ f = fopen(path, "r");
+ TEST_ASSERT(f, "Error opening '%s'", path);
ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret > 0, "Error reading '%s'", path);
+
+ /* Re-scan the input stream to verify the entire file was read. */
ret = fscanf(f, "%ld", &size);
- TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
- fclose(f);
+ TEST_ASSERT(ret < 1, "Error reading '%s'", path);
+ fclose(f);
return size;
}
+size_t get_trans_hugepagesz(void)
+{
+ TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
+
+ return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size");
+}
+
+bool is_numa_balancing_enabled(void)
+{
+ if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
+ return false;
+ return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
+}
+
size_t get_def_hugetlb_pagesz(void)
{
char buf[64];
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 7c9de8414462..5bde176cedd5 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -114,7 +114,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
- is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+ is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY");
uffd_desc = malloc(sizeof(struct uffd_desc));
TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
index 7f5d62a65c68..0b1f288ad556 100644
--- a/tools/testing/selftests/kvm/lib/x86/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -63,7 +63,7 @@ void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
{
uint64_t start, end;
- prepare_eptp(vmx, vm, 0);
+ prepare_eptp(vmx, vm);
/*
* Identity map the first 4G and the test region with 1G pages so that
diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c
index f31f0427c17c..34cb57d1d671 100644
--- a/tools/testing/selftests/kvm/lib/x86/pmu.c
+++ b/tools/testing/selftests/kvm/lib/x86/pmu.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include "kvm_util.h"
+#include "processor.h"
#include "pmu.h"
const uint64_t intel_pmu_arch_events[] = {
@@ -19,6 +20,11 @@ const uint64_t intel_pmu_arch_events[] = {
INTEL_ARCH_BRANCHES_RETIRED,
INTEL_ARCH_BRANCHES_MISPREDICTED,
INTEL_ARCH_TOPDOWN_SLOTS,
+ INTEL_ARCH_TOPDOWN_BE_BOUND,
+ INTEL_ARCH_TOPDOWN_BAD_SPEC,
+ INTEL_ARCH_TOPDOWN_FE_BOUND,
+ INTEL_ARCH_TOPDOWN_RETIRING,
+ INTEL_ARCH_LBR_INSERTS,
};
kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
@@ -29,3 +35,46 @@ const uint64_t amd_pmu_zen_events[] = {
AMD_ZEN_BRANCHES_MISPREDICTED,
};
kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
+
+/*
+ * For Intel Atom CPUs, the PMU events "Instruction Retired" or
+ * "Branch Instruction Retired" may be overcounted for some certain
+ * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD
+ * and complex SGX/SMX/CSTATE instructions/flows.
+ *
+ * The detailed information can be found in the errata (section SRF7):
+ * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/
+ *
+ * For the Atom platforms before Sierra Forest (including Sierra Forest),
+ * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would
+ * be overcounted on these certain instructions, but for Clearwater Forest
+ * only "Instruction Retired" event is overcounted on these instructions.
+ */
+static uint64_t get_pmu_errata(void)
+{
+ if (!this_cpu_is_intel())
+ return 0;
+
+ if (this_cpu_family() != 0x6)
+ return 0;
+
+ switch (this_cpu_model()) {
+ case 0xDD: /* Clearwater Forest */
+ return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT);
+ case 0xAF: /* Sierra Forest */
+ case 0x4D: /* Avaton, Rangely */
+ case 0x5F: /* Denverton */
+ case 0x86: /* Jacobsville */
+ return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) |
+ BIT_ULL(BRANCHES_RETIRED_OVERCOUNT);
+ default:
+ return 0;
+ }
+}
+
+uint64_t pmu_errata_mask;
+
+void kvm_init_pmu_errata(void)
+{
+ pmu_errata_mask = get_pmu_errata();
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index bd5a802fa7a5..36104d27f3d9 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -6,6 +6,7 @@
#include "linux/bitmap.h"
#include "test_util.h"
#include "kvm_util.h"
+#include "pmu.h"
#include "processor.h"
#include "sev.h"
@@ -23,6 +24,39 @@ bool host_cpu_is_intel;
bool is_forced_emulation_enabled;
uint64_t guest_tsc_khz;
+const char *ex_str(int vector)
+{
+ switch (vector) {
+#define VEC_STR(v) case v##_VECTOR: return "#" #v
+ case DE_VECTOR: return "no exception";
+ case KVM_MAGIC_DE_VECTOR: return "#DE";
+ VEC_STR(DB);
+ VEC_STR(NMI);
+ VEC_STR(BP);
+ VEC_STR(OF);
+ VEC_STR(BR);
+ VEC_STR(UD);
+ VEC_STR(NM);
+ VEC_STR(DF);
+ VEC_STR(TS);
+ VEC_STR(NP);
+ VEC_STR(SS);
+ VEC_STR(GP);
+ VEC_STR(PF);
+ VEC_STR(MF);
+ VEC_STR(AC);
+ VEC_STR(MC);
+ VEC_STR(XM);
+ VEC_STR(VE);
+ VEC_STR(CP);
+ VEC_STR(HV);
+ VEC_STR(VC);
+ VEC_STR(SX);
+ default: return "#??";
+#undef VEC_STR
+ }
+}
+
static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
{
fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
@@ -124,10 +158,10 @@ bool kvm_is_tdp_enabled(void)
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
- /* If needed, create page map l4 table. */
+ /* If needed, create the top-level page table. */
if (!vm->pgd_created) {
vm->pgd = vm_alloc_page_table(vm);
vm->pgd_created = true;
@@ -184,11 +218,11 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
{
const uint64_t pg_size = PG_LEVEL_SIZE(level);
- uint64_t *pml4e, *pdpe, *pde;
- uint64_t *pte;
+ uint64_t *pte = &vm->pgd;
+ int current_level;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
- "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT((vaddr % pg_size) == 0,
"Virtual address not aligned,\n"
@@ -209,20 +243,17 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
* Allocate upper level page tables, if not already present. Return
* early if a hugepage was created.
*/
- pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
- if (*pml4e & PTE_LARGE_MASK)
- return;
-
- pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
- if (*pdpe & PTE_LARGE_MASK)
- return;
-
- pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
- if (*pde & PTE_LARGE_MASK)
- return;
+ for (current_level = vm->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_create_upper_pte(vm, pte, vaddr, paddr,
+ current_level, level);
+ if (*pte & PTE_LARGE_MASK)
+ return;
+ }
/* Fill in page table entry. */
- pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx", vaddr);
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
@@ -255,6 +286,8 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
for (i = 0; i < nr_pages; i++) {
__virt_pg_map(vm, vaddr, paddr, level);
+ sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift,
+ nr_bytes / PAGE_SIZE);
vaddr += pg_size;
paddr += pg_size;
@@ -276,40 +309,38 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
int *level)
{
- uint64_t *pml4e, *pdpe, *pde;
+ int va_width = 12 + (vm->pgtable_levels) * 9;
+ uint64_t *pte = &vm->pgd;
+ int current_level;
TEST_ASSERT(!vm->arch.is_pt_protected,
"Walking page tables of protected guests is impossible");
- TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels,
"Invalid PG_LEVEL_* '%d'", *level);
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
/*
- * Based on the mode check above there are 48 bits in the vaddr, so
- * shift 16 to sign extend the last bit (bit-47),
+ * Check that the vaddr is a sign-extended va_width value.
*/
- TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
- "Canonical check failed. The virtual address is invalid.");
-
- pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
- if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
- return pml4e;
-
- pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
- if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
- return pdpe;
-
- pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
- if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
- return pde;
+ TEST_ASSERT(vaddr ==
+ (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))),
+ "Canonical check failed. The virtual address is invalid.");
+
+ for (current_level = vm->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_get_pte(vm, pte, vaddr, current_level);
+ if (vm_is_target_pte(pte, level, current_level))
+ return pte;
+ }
- return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K);
}
uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
@@ -492,7 +523,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
- TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
/* Set mode specific system register values. */
vcpu_sregs_get(vcpu, &sregs);
@@ -506,6 +538,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
if (kvm_cpu_has(X86_FEATURE_XSAVE))
sregs.cr4 |= X86_CR4_OSXSAVE;
+ if (vm->pgtable_levels == 5)
+ sregs.cr4 |= X86_CR4_LA57;
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
@@ -557,7 +591,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs)
return false;
if (regs->vector == DE_VECTOR)
- return false;
+ regs->vector = KVM_MAGIC_DE_VECTOR;
regs->rip = regs->r11;
regs->r9 = regs->vector;
@@ -625,7 +659,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
REPORT_GUEST_ASSERT(uc);
}
-void kvm_arch_vm_post_create(struct kvm_vm *vm)
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
{
int r;
@@ -638,8 +672,9 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
sync_global_to_guest(vm, host_cpu_is_intel);
sync_global_to_guest(vm, host_cpu_is_amd);
sync_global_to_guest(vm, is_forced_emulation_enabled);
+ sync_global_to_guest(vm, pmu_errata_mask);
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
struct kvm_sev_init init = { 0 };
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
@@ -1156,7 +1191,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
void kvm_init_vm_address_properties(struct kvm_vm *vm)
{
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
vm->gpa_tag_mask = vm->arch.c_bit;
@@ -1264,21 +1299,13 @@ done:
return min(max_gfn, ht_gfn - 1);
}
-/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
- /* Ensure that a KVM vendor-specific module is loaded. */
- if (vm == NULL)
- close(open_kvm_dev_path_or_exit());
-
- return get_kvm_intel_param_bool("unrestricted_guest");
-}
-
void kvm_selftest_arch_init(void)
{
host_cpu_is_intel = this_cpu_is_intel();
host_cpu_is_amd = this_cpu_is_amd();
is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+
+ kvm_init_pmu_errata();
}
bool sys_clocksource_is_based_on_tsc(void)
@@ -1291,3 +1318,8 @@ bool sys_clocksource_is_based_on_tsc(void)
return ret;
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
index e9535ee20b7f..c3a9838f4806 100644
--- a/tools/testing/selftests/kvm/lib/x86/sev.c
+++ b/tools/testing/selftests/kvm/lib/x86/sev.c
@@ -14,7 +14,8 @@
* and find the first range, but that's correct because the condition
* expression would cause us to quit the loop.
*/
-static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region,
+ uint8_t page_type, bool private)
{
const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
const vm_paddr_t gpa_base = region->region.guest_phys_addr;
@@ -24,25 +25,35 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio
if (!sparsebit_any_set(protected_phy_pages))
return;
- sev_register_encrypted_memory(vm, region);
+ if (!is_sev_snp_vm(vm))
+ sev_register_encrypted_memory(vm, region);
sparsebit_for_each_set_range(protected_phy_pages, i, j) {
const uint64_t size = (j - i + 1) * vm->page_size;
const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
- sev_launch_update_data(vm, gpa_base + offset, size);
+ if (private)
+ vm_mem_set_private(vm, gpa_base + offset, size);
+
+ if (is_sev_snp_vm(vm))
+ snp_launch_update_data(vm, gpa_base + offset,
+ (uint64_t)addr_gpa2hva(vm, gpa_base + offset),
+ size, page_type);
+ else
+ sev_launch_update_data(vm, gpa_base + offset, size);
+
}
}
void sev_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
@@ -50,16 +61,24 @@ void sev_vm_init(struct kvm_vm *vm)
void sev_es_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_ES_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
+void snp_vm_init(struct kvm_vm *vm)
+{
+ struct kvm_sev_init init = { 0 };
+
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
{
struct kvm_sev_launch_start launch_start = {
@@ -76,7 +95,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
- encrypt_region(vm, region);
+ encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false);
if (policy & SEV_POLICY_ES)
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
@@ -112,6 +131,33 @@ void sev_vm_launch_finish(struct kvm_vm *vm)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
}
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy)
+{
+ struct kvm_sev_snp_launch_start launch_start = {
+ .policy = policy,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start);
+}
+
+void snp_vm_launch_update(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ int ctr;
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void snp_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_snp_launch_finish launch_finish = { 0 };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish);
+}
+
struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu)
{
@@ -128,8 +174,20 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
return vm;
}
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement)
{
+ if (is_sev_snp_vm(vm)) {
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE));
+
+ snp_vm_launch_start(vm, policy);
+
+ snp_vm_launch_update(vm);
+
+ snp_vm_launch_finish(vm);
+
+ return;
+ }
+
sev_vm_launch(vm, policy);
if (!measurement)
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index d4d1208dd023..29b082a58daa 100644
--- a/tools/testing/selftests/kvm/lib/x86/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -401,11 +401,11 @@ void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
uint16_t index;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT((nested_paddr >> 48) == 0,
- "Nested physical address 0x%lx requires 5-level paging",
+ "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT",
nested_paddr);
TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
@@ -534,8 +534,7 @@ bool kvm_cpu_has_ept(void)
return ctrl & SECONDARY_EXEC_ENABLE_EPT;
}
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm)
{
TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c
new file mode 100644
index 000000000000..355ecac30954
--- /dev/null
+++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates periodic/one-shot constant timer IRQ using
+ * CSR.TCFG and CSR.TVAL registers.
+ */
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+
+static void do_idle(void)
+{
+ unsigned int intid;
+ unsigned long estat;
+
+ __asm__ __volatile__("idle 0" : : : "memory");
+
+ estat = csr_read(LOONGARCH_CSR_ESTAT);
+ intid = !!(estat & BIT(INT_TI));
+
+ /* Make sure pending timer IRQ arrived */
+ GUEST_ASSERT_EQ(intid, 1);
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid;
+ uint32_t cpu = guest_get_vcpuid();
+ uint64_t xcnt, val, cfg, xcnt_diff_us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ intid = !!(regs->estat & BIT(INT_TI));
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, 1);
+
+ cfg = timer_get_cfg();
+ if (cfg & CSR_TCFG_PERIOD) {
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1);
+ if (shared_data->nr_iter == 0)
+ disable_timer();
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ return;
+ }
+
+ /*
+ * On real machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1
+ * On virtual machine, its value counts down from BIT_ULL(48) - 1
+ */
+ val = timer_get_val();
+ xcnt = timer_get_cycles();
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Basic 'timer condition met' check */
+ __GUEST_ASSERT(val > cfg,
+ "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx",
+ val, cfg, xcnt_diff_us);
+
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_test_period_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = test_args.nr_iter;
+ shared_data->xcnt = timer_get_cycles();
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ timer_set_next_cmp_ms(test_args.timer_period_ms, true);
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+ }
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(irq_iter == 0,
+ "irq_iter = 0x%x.\n"
+ " Guest period timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ irq_iter);
+}
+
+static void guest_test_oneshot_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = 0;
+ shared_data->guest_stage = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_test_emulate_timer(uint32_t cpu)
+{
+ uint32_t config_iter;
+ uint64_t xcnt_diff_us, us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+ shared_data->nr_iter = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ do_idle();
+
+ xcnt_diff_us = cycles_to_usec(timer_get_cycles() - shared_data->xcnt);
+ __GUEST_ASSERT(xcnt_diff_us >= us,
+ "xcnt_diff_us = 0x%lx, us = 0x%lx.\n",
+ xcnt_diff_us, us);
+ }
+ local_irq_enable();
+}
+
+static void guest_time_count_test(uint32_t cpu)
+{
+ uint32_t config_iter;
+ unsigned long start, end, prev, us;
+
+ /* Assuming that test case starts to run in 1 second */
+ start = timer_get_cycles();
+ us = msec_to_cycles(1000);
+ __GUEST_ASSERT(start <= us,
+ "start = 0x%lx, us = 0x%lx.\n",
+ start, us);
+
+ us = msec_to_cycles(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ start = timer_get_cycles();
+ end = start + us;
+ /* test time count growing up always */
+ while (start < end) {
+ prev = start;
+ start = timer_get_cycles();
+ __GUEST_ASSERT(prev <= start,
+ "prev = 0x%lx, start = 0x%lx.\n",
+ prev, start);
+ }
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ /* must run at first */
+ guest_time_count_test(cpu);
+
+ timer_irq_enable();
+ local_irq_enable();
+ guest_test_period_timer(cpu);
+ guest_test_oneshot_timer(cpu);
+ guest_test_emulate_timer(cpu);
+
+ GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index c81a84990eab..3cdfa3b19b85 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -22,6 +22,7 @@
#include "processor.h"
#include "test_util.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#define DUMMY_MEMSLOT_INDEX 7
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index e3711beff7f3..5087d082c4b0 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -25,6 +25,7 @@
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
+#include <ucall_common.h>
#define MEM_EXTRA_SIZE SZ_64K
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index d9c76b4c0d88..51c070556f3e 100644
--- a/tools/testing/selftests/kvm/mmu_stress_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -18,6 +18,7 @@
#include "ucall_common.h"
static bool mprotect_ro_done;
+static bool all_vcpus_hit_ro_fault;
static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
{
@@ -36,9 +37,9 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
/*
* Write to the region while mprotect(PROT_READ) is underway. Keep
- * looping until the memory is guaranteed to be read-only, otherwise
- * vCPUs may complete their writes and advance to the next stage
- * prematurely.
+ * looping until the memory is guaranteed to be read-only and a fault
+ * has occurred, otherwise vCPUs may complete their writes and advance
+ * to the next stage prematurely.
*
* For architectures that support skipping the faulting instruction,
* generate the store via inline assembly to ensure the exact length
@@ -56,7 +57,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
#else
vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
#endif
- } while (!READ_ONCE(mprotect_ro_done));
+ } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault));
/*
* Only architectures that write the entire range can explicitly sync,
@@ -81,6 +82,7 @@ struct vcpu_info {
static int nr_vcpus;
static atomic_t rendezvous;
+static atomic_t nr_ro_faults;
static void rendezvous_with_boss(void)
{
@@ -148,12 +150,16 @@ static void *vcpu_worker(void *data)
* be stuck on the faulting instruction for other architectures. Go to
* stage 3 without a rendezvous
*/
- do {
- r = _vcpu_run(vcpu);
- } while (!r);
+ r = _vcpu_run(vcpu);
TEST_ASSERT(r == -1 && errno == EFAULT,
"Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno);
+ atomic_inc(&nr_ro_faults);
+ if (atomic_read(&nr_ro_faults) == nr_vcpus) {
+ WRITE_ONCE(all_vcpus_hit_ro_fault, true);
+ sync_global_to_guest(vm, all_vcpus_hit_ro_fault);
+ }
+
#if defined(__x86_64__) || defined(__aarch64__)
/*
* Verify *all* writes from the guest hit EFAULT due to the VMA now
@@ -257,8 +263,10 @@ static void calc_default_nr_vcpus(void)
TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)",
errno, strerror(errno));
- nr_vcpus = CPU_COUNT(&possible_mask) * 3/4;
+ nr_vcpus = CPU_COUNT(&possible_mask);
TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?");
+ if (nr_vcpus >= 2)
+ nr_vcpus = nr_vcpus * 3/4;
}
int main(int argc, char *argv[])
@@ -333,8 +341,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
fd = kvm_memfd_alloc(slot_size, hugepages);
- mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+ mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
@@ -355,11 +362,9 @@ int main(int argc, char *argv[])
#ifdef __x86_64__
/* Identity map memory in the guest using 1gb pages. */
- for (i = 0; i < slot_size; i += SZ_1G)
- __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
+ virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G);
#else
- for (i = 0; i < slot_size; i += vm->page_size)
- virt_pg_map(vm, gpa + i, gpa + i);
+ virt_map(vm, gpa, gpa, slot_size >> vm->page_shift);
#endif
}
@@ -378,7 +383,6 @@ int main(int argc, char *argv[])
rendezvous_with_vcpus(&time_run2, "run 2");
mprotect(mem, slot_size, PROT_READ);
- usleep(10);
mprotect_ro_done = true;
sync_global_to_guest(vm, mprotect_ro_done);
@@ -408,7 +412,7 @@ int main(int argc, char *argv[])
for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2)
vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL);
- munmap(mem, slot_size / 2);
+ kvm_munmap(mem, slot_size / 2);
/* Sanity check that the vCPUs actually ran. */
for (i = 0; i < nr_vcpus; i++)
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
index 0350a8896a2f..93e603d91311 100644
--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -10,19 +10,20 @@
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
+#include <pthread.h>
/* Arbitrarily chosen values */
#define TEST_SIZE (SZ_2M + PAGE_SIZE)
#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE)
#define TEST_SLOT 10
-static void guest_code(uint64_t base_gpa)
+static void guest_code(uint64_t base_gva)
{
volatile uint64_t val __used;
int i;
for (i = 0; i < TEST_NPAGES; i++) {
- uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE);
+ uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE);
val = *src;
}
@@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa)
GUEST_DONE();
}
-static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
- u64 left)
+struct slot_worker_data {
+ struct kvm_vm *vm;
+ u64 gpa;
+ uint32_t flags;
+ bool worker_ready;
+ bool prefault_ready;
+ bool recreate_slot;
+};
+
+static void *delete_slot_worker(void *__data)
+{
+ struct slot_worker_data *data = __data;
+ struct kvm_vm *vm = data->vm;
+
+ WRITE_ONCE(data->worker_ready, true);
+
+ while (!READ_ONCE(data->prefault_ready))
+ cpu_relax();
+
+ vm_mem_region_delete(vm, TEST_SLOT);
+
+ while (!READ_ONCE(data->recreate_slot))
+ cpu_relax();
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa,
+ TEST_SLOT, TEST_NPAGES, data->flags);
+
+ return NULL;
+}
+
+static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
+ u64 size, u64 expected_left, bool private)
{
struct kvm_pre_fault_memory range = {
- .gpa = gpa,
+ .gpa = base_gpa + offset,
.size = size,
.flags = 0,
};
- u64 prev;
+ struct slot_worker_data data = {
+ .vm = vcpu->vm,
+ .gpa = base_gpa,
+ .flags = private ? KVM_MEM_GUEST_MEMFD : 0,
+ };
+ bool slot_recreated = false;
+ pthread_t slot_worker;
int ret, save_errno;
+ u64 prev;
+
+ /*
+ * Concurrently delete (and recreate) the slot to test KVM's handling
+ * of a racing memslot deletion with prefaulting.
+ */
+ pthread_create(&slot_worker, NULL, delete_slot_worker, &data);
+
+ while (!READ_ONCE(data.worker_ready))
+ cpu_relax();
- do {
+ WRITE_ONCE(data.prefault_ready, true);
+
+ for (;;) {
prev = range.size;
ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
save_errno = errno;
@@ -49,22 +98,70 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
"%sexpecting range.size to change on %s",
ret < 0 ? "not " : "",
ret < 0 ? "failure" : "success");
- } while (ret >= 0 ? range.size : save_errno == EINTR);
- TEST_ASSERT(range.size == left,
- "Completed with %lld bytes left, expected %" PRId64,
- range.size, left);
+ /*
+ * Immediately retry prefaulting if KVM was interrupted by an
+ * unrelated signal/event.
+ */
+ if (ret < 0 && save_errno == EINTR)
+ continue;
+
+ /*
+ * Tell the worker to recreate the slot in order to complete
+ * prefaulting (if prefault didn't already succeed before the
+ * slot was deleted) and/or to prepare for the next testcase.
+ * Wait for the worker to exit so that the next invocation of
+ * prefaulting is guaranteed to complete (assuming no KVM bugs).
+ */
+ if (!slot_recreated) {
+ WRITE_ONCE(data.recreate_slot, true);
+ pthread_join(slot_worker, NULL);
+ slot_recreated = true;
+
+ /*
+ * Retry prefaulting to get a stable result, i.e. to
+ * avoid seeing random EAGAIN failures. Don't retry if
+ * prefaulting already succeeded, as KVM disallows
+ * prefaulting with size=0, i.e. blindly retrying would
+ * result in test failures due to EINVAL. KVM should
+ * always return success if all bytes are prefaulted,
+ * i.e. there is no need to guard against EAGAIN being
+ * returned.
+ */
+ if (range.size)
+ continue;
+ }
+
+ /*
+ * All done if there are no remaining bytes to prefault, or if
+ * prefaulting failed (EINTR was handled above, and EAGAIN due
+ * to prefaulting a memslot that's being actively deleted should
+ * be impossible since the memslot has already been recreated).
+ */
+ if (!range.size || ret < 0)
+ break;
+ }
- if (left == 0)
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT(range.size == expected_left,
+ "Completed with %llu bytes left, expected %lu",
+ range.size, expected_left);
+
+ /*
+ * Assert success if prefaulting the entire range should succeed, i.e.
+ * complete with no bytes remaining. Otherwise prefaulting should have
+ * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when
+ * no memslot exists).
+ */
+ if (!expected_left)
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
else
- /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */
- __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
- "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
+ KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
}
static void __test_pre_fault_memory(unsigned long vm_type, bool private)
{
+ uint64_t gpa, gva, alignment, guest_page_size;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
.type = vm_type,
@@ -74,34 +171,30 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
struct kvm_vm *vm;
struct ucall uc;
- uint64_t guest_test_phys_mem;
- uint64_t guest_test_virt_mem;
- uint64_t alignment, guest_page_size;
-
vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code);
alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
- guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
+ gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
#ifdef __s390x__
alignment = max(0x100000UL, guest_page_size);
#else
alignment = SZ_2M;
#endif
- guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
- guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1);
+ gpa = align_down(gpa, alignment);
+ gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_test_phys_mem, TEST_SLOT, TEST_NPAGES,
- private ? KVM_MEM_GUEST_MEMFD : 0);
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT,
+ TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0);
+ virt_map(vm, gva, gpa, TEST_NPAGES);
if (private)
- vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0);
- pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE);
+ vm_mem_set_private(vm, gpa, TEST_SIZE);
+
+ pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private);
+ pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
- vcpu_args_set(vcpu, 1, guest_test_virt_mem);
+ vcpu_args_set(vcpu, 1, gva);
vcpu_run(vcpu);
run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 9e370800a6a2..f962fefc48fa 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -15,7 +15,7 @@
static int timer_irq = IRQ_S_TIMER;
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
uint64_t xcnt, xcnt_diff_us, cmp;
unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
index cfed6c727bfc..739d17befb5a 100644
--- a/tools/testing/selftests/kvm/riscv/ebreak_test.c
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -27,7 +27,7 @@ static void guest_code(void)
GUEST_DONE();
}
-static void guest_breakpoint_handler(struct ex_regs *regs)
+static void guest_breakpoint_handler(struct pt_regs *regs)
{
WRITE_ONCE(sw_bp_addr, regs->epc);
regs->epc += 4;
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 8515921dfdbf..cb54a56990a0 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -17,6 +17,15 @@ enum {
VCPU_FEATURE_SBI_EXT,
};
+enum {
+ KVM_RISC_V_REG_OFFSET_VSTART = 0,
+ KVM_RISC_V_REG_OFFSET_VL,
+ KVM_RISC_V_REG_OFFSET_VTYPE,
+ KVM_RISC_V_REG_OFFSET_VCSR,
+ KVM_RISC_V_REG_OFFSET_VLENB,
+ KVM_RISC_V_REG_OFFSET_MAX,
+};
+
static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX];
bool filter_reg(__u64 reg)
@@ -53,8 +62,10 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
@@ -69,9 +80,11 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR:
@@ -92,6 +105,8 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFWMA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB:
@@ -117,6 +132,8 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
return true;
@@ -141,6 +158,38 @@ bool check_reject_set(int err)
return err == EINVAL;
}
+static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s,
+ uint64_t feature)
+{
+ unsigned long vlenb_reg = 0;
+ int rc;
+ u64 reg, size;
+
+ /* Enable V extension so that we can get the vlenb register */
+ rc = __vcpu_set_reg(vcpu, feature, 1);
+ if (rc)
+ return rc;
+
+ vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]);
+ if (!vlenb_reg) {
+ TEST_FAIL("Can't compute vector register size from zero vlenb\n");
+ return -EPERM;
+ }
+
+ size = __builtin_ctzl(vlenb_reg);
+ size <<= KVM_REG_SIZE_SHIFT;
+
+ for (int i = 0; i < 32; i++) {
+ reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i);
+ s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg;
+ }
+
+ /* We should assert if disabling failed here while enabling succeeded before */
+ vcpu_set_reg(vcpu, feature, 0);
+
+ return 0;
+}
+
void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
{
unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
@@ -170,6 +219,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
if (!s->feature)
continue;
+ if (s->feature == KVM_RISCV_ISA_EXT_V) {
+ feature = RISCV_ISA_EXT_REG(s->feature);
+ rc = override_vector_reg_size(vcpu, s, feature);
+ if (rc)
+ goto skip;
+ }
+
switch (s->feature_type) {
case VCPU_FEATURE_ISA_EXT:
feature = RISCV_ISA_EXT_REG(s->feature);
@@ -184,6 +240,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
/* Try to enable the desired extension */
__vcpu_set_reg(vcpu, feature, 1);
+skip:
/* Double check whether the desired extension was enabled */
__TEST_REQUIRE(__vcpu_has_ext(vcpu, feature),
"%s not available, skipping tests", s->name);
@@ -204,6 +261,8 @@ static const char *config_id_to_str(const char *prefix, __u64 id)
return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)";
case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicbop_block_size)";
case KVM_REG_RISCV_CONFIG_REG(mvendorid):
return "KVM_REG_RISCV_CONFIG_REG(mvendorid)";
case KVM_REG_RISCV_CONFIG_REG(marchid):
@@ -408,6 +467,35 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id)
return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
+static const char *vector_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_v_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR);
+ int reg_index = 0;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR);
+
+ if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0))
+ reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0);
+ switch (reg_off) {
+ case KVM_REG_RISCV_VECTOR_REG(0) ...
+ KVM_REG_RISCV_VECTOR_REG(31):
+ return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index);
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
#define KVM_ISA_EXT_ARR(ext) \
[KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext
@@ -434,8 +522,10 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(SVVPTC),
+ KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
@@ -450,9 +540,11 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZCF),
KVM_ISA_EXT_ARR(ZCMOP),
KVM_ISA_EXT_ARR(ZFA),
+ KVM_ISA_EXT_ARR(ZFBFMIN),
KVM_ISA_EXT_ARR(ZFH),
KVM_ISA_EXT_ARR(ZFHMIN),
KVM_ISA_EXT_ARR(ZICBOM),
+ KVM_ISA_EXT_ARR(ZICBOP),
KVM_ISA_EXT_ARR(ZICBOZ),
KVM_ISA_EXT_ARR(ZICCRSE),
KVM_ISA_EXT_ARR(ZICNTR),
@@ -473,6 +565,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZTSO),
KVM_ISA_EXT_ARR(ZVBB),
KVM_ISA_EXT_ARR(ZVBC),
+ KVM_ISA_EXT_ARR(ZVFBFMIN),
+ KVM_ISA_EXT_ARR(ZVFBFWMA),
KVM_ISA_EXT_ARR(ZVFH),
KVM_ISA_EXT_ARR(ZVFHMIN),
KVM_ISA_EXT_ARR(ZVKB),
@@ -545,6 +639,8 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off)
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
};
@@ -601,6 +697,19 @@ static const char *sbi_sta_id_to_str(__u64 reg_off)
return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off);
}
+static const char *sbi_fwft_id_to_str(__u64 reg_off)
+{
+ switch (reg_off) {
+ case 0: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable)";
+ case 1: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags)";
+ case 2: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value)";
+ case 3: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable)";
+ case 4: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags)";
+ case 5: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value)";
+ }
+ return strdup_printf("KVM_REG_RISCV_SBI_FWFT | %lld /* UNKNOWN */", reg_off);
+}
+
static const char *sbi_id_to_str(const char *prefix, __u64 id)
{
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE);
@@ -613,6 +722,8 @@ static const char *sbi_id_to_str(const char *prefix, __u64 id)
switch (reg_subtype) {
case KVM_REG_RISCV_SBI_STA:
return sbi_sta_id_to_str(reg_off);
+ case KVM_REG_RISCV_SBI_FWFT:
+ return sbi_fwft_id_to_str(reg_off);
}
return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
@@ -635,6 +746,9 @@ void print_reg(const char *prefix, __u64 id)
case KVM_REG_SIZE_U128:
reg_size = "KVM_REG_SIZE_U128";
break;
+ case KVM_REG_SIZE_U256:
+ reg_size = "KVM_REG_SIZE_U256";
+ break;
default:
printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n",
(id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK);
@@ -666,6 +780,10 @@ void print_reg(const char *prefix, __u64 id)
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
reg_size, fp_d_id_to_str(prefix, id));
break;
+ case KVM_REG_RISCV_VECTOR:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n",
+ reg_size, vector_id_to_str(prefix, id));
+ break;
case KVM_REG_RISCV_ISA_EXT:
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
reg_size, isa_ext_id_to_str(prefix, id));
@@ -691,10 +809,13 @@ void print_reg(const char *prefix, __u64 id)
*/
static __u64 base_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp),
@@ -770,11 +891,26 @@ static __u64 sbi_sta_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi),
};
+static __u64 sbi_fwft_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value),
+};
+
static __u64 zicbom_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM,
};
+static __u64 zicbop_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP,
+};
+
static __u64 zicboz_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ,
@@ -870,6 +1006,48 @@ static __u64 fp_d_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D,
};
+/* Define a default vector registers with length. This will be overwritten at runtime */
+static __u64 vector_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V,
+};
+
#define SUBLIST_BASE \
{"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
.skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
@@ -879,8 +1057,13 @@ static __u64 fp_d_regs[] = {
#define SUBLIST_SBI_STA \
{"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \
.regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),}
+#define SUBLIST_SBI_FWFT \
+ {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \
+ .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),}
#define SUBLIST_ZICBOM \
{"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),}
+#define SUBLIST_ZICBOP \
+ {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),}
#define SUBLIST_ZICBOZ \
{"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),}
#define SUBLIST_AIA \
@@ -894,6 +1077,9 @@ static __u64 fp_d_regs[] = {
{"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
.regs_n = ARRAY_SIZE(fp_d_regs),}
+#define SUBLIST_V \
+ {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),}
+
#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \
static __u64 regs_##ext[] = { \
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
@@ -958,10 +1144,13 @@ KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
+KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY);
+KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT);
KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
+KVM_ISA_EXT_SUBLIST_CONFIG(v, V);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM);
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
@@ -974,8 +1163,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO);
KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA);
KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC);
KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
@@ -990,9 +1181,11 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN);
KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM);
+KVM_ISA_EXT_SUBLIST_CONFIG(zicbop, ZICBOP);
KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ);
KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE);
KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR);
@@ -1013,6 +1206,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT);
KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO);
KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB);
KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfmin, ZVFBFMIN);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfwma, ZVFBFWMA);
KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH);
KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN);
KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB);
@@ -1030,10 +1225,13 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_pmu,
&config_sbi_dbcn,
&config_sbi_susp,
+ &config_sbi_mpxy,
+ &config_sbi_fwft,
&config_aia,
&config_fp_f,
&config_fp_d,
&config_h,
+ &config_v,
&config_smnpm,
&config_smstateen,
&config_sscofpmf,
@@ -1045,8 +1243,10 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_svnapot,
&config_svpbmt,
&config_svvptc,
+ &config_zaamo,
&config_zabha,
&config_zacas,
+ &config_zalrsc,
&config_zawrs,
&config_zba,
&config_zbb,
@@ -1061,9 +1261,11 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zcf,
&config_zcmop,
&config_zfa,
+ &config_zfbfmin,
&config_zfh,
&config_zfhmin,
&config_zicbom,
+ &config_zicbop,
&config_zicboz,
&config_ziccrse,
&config_zicntr,
@@ -1084,6 +1286,8 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_ztso,
&config_zvbb,
&config_zvbc,
+ &config_zvfbfmin,
+ &config_zvfbfwma,
&config_zvfh,
&config_zvfhmin,
&config_zvkb,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index f45c0ecc902d..924a335d2262 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -39,7 +39,13 @@ static bool illegal_handler_invoked;
#define SBI_PMU_TEST_SNAPSHOT BIT(2)
#define SBI_PMU_TEST_OVERFLOW BIT(3)
-static int disabled_tests;
+#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5
+struct test_args {
+ int disabled_tests;
+ int overflow_irqnum;
+};
+
+static struct test_args targs;
unsigned long pmu_csr_read_num(int csr_num)
{
@@ -67,7 +73,6 @@ unsigned long pmu_csr_read_num(int csr_num)
switch (csr_num) {
switchcase_csr_read_32(CSR_CYCLE, ret)
- switchcase_csr_read_32(CSR_CYCLEH, ret)
default :
break;
}
@@ -118,26 +123,44 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
0, 0, 0);
- __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
- counter, ret.error);
+ __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld error %ld\n", counter, ret.error);
}
-static void guest_illegal_exception_handler(struct ex_regs *regs)
+static void guest_illegal_exception_handler(struct pt_regs *regs)
{
+ unsigned long insn;
+ int opcode, csr_num, funct3;
+
__GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
"Unexpected exception handler %lx\n", regs->cause);
+ insn = regs->badaddr;
+ opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT;
+ __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM,
+ "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn);
+
+ csr_num = GET_CSR_NUM(insn);
+ funct3 = GET_RM(insn);
+ /* Validate if it is a CSR read/write operation */
+ __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4),
+ "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n",
+ funct3, csr_num);
+
+ /* Validate if it is a HPMCOUNTER CSR operation */
+ __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31),
+ "Unexpected csr_num 0x%x\n", csr_num);
+
illegal_handler_invoked = true;
/* skip the trapping instruction */
regs->epc += 4;
}
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
unsigned long overflown_mask;
- unsigned long counter_val = 0;
/* Validate that we are in the correct irq handler */
GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
@@ -151,10 +174,6 @@ static void guest_irq_handler(struct ex_regs *regs)
GUEST_ASSERT(overflown_mask & 0x01);
WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
-
- counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
- /* Now start the counter to mimick the real driver behavior */
- start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
}
static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
@@ -479,7 +498,7 @@ static void test_pmu_events_snaphost(void)
static void test_pmu_events_overflow(void)
{
- int num_counters = 0;
+ int num_counters = 0, i = 0;
/* Verify presence of SBI PMU and minimum requrired SBI version */
verify_sbi_requirement_assert();
@@ -496,11 +515,15 @@ static void test_pmu_events_overflow(void)
* Qemu supports overflow for cycle/instruction.
* This test may fail on any platform that do not support overflow for these two events.
*/
- test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
- test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
+ vcpu_shared_irq_count = 0;
+
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
GUEST_DONE();
}
@@ -609,7 +632,11 @@ static void test_vm_events_overflow(void *guest_code)
vcpu_init_vector_tables(vcpu);
/* Initialize guest timer frequency. */
timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency));
+
+ /* Export the shared variables to the guest */
sync_global_to_guest(vm, timer_freq);
+ sync_global_to_guest(vm, vcpu_shared_irq_count);
+ sync_global_to_guest(vm, targs);
run_vcpu(vcpu);
@@ -618,28 +645,38 @@ static void test_vm_events_overflow(void *guest_code)
static void test_print_help(char *name)
{
- pr_info("Usage: %s [-h] [-d <test name>]\n", name);
- pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n",
+ name);
+ pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n",
+ SBI_PMU_OVERFLOW_IRQNUM_DEFAULT);
pr_info("\t-h: print this help screen\n");
}
static bool parse_args(int argc, char *argv[])
{
int opt;
+ int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT |
+ SBI_PMU_TEST_OVERFLOW;
+ int overflow_interrupts = 0;
- while ((opt = getopt(argc, argv, "hd:")) != -1) {
+ while ((opt = getopt(argc, argv, "ht:n:")) != -1) {
switch (opt) {
- case 'd':
+ case 't':
if (!strncmp("basic", optarg, 5))
- disabled_tests |= SBI_PMU_TEST_BASIC;
+ temp_disabled_tests &= ~SBI_PMU_TEST_BASIC;
else if (!strncmp("events", optarg, 6))
- disabled_tests |= SBI_PMU_TEST_EVENTS;
+ temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS;
else if (!strncmp("snapshot", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
+ temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT;
else if (!strncmp("overflow", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_OVERFLOW;
+ temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW;
else
goto done;
+ targs.disabled_tests = temp_disabled_tests;
+ break;
+ case 'n':
+ overflow_interrupts = atoi_positive("Number of LCOFI", optarg);
break;
case 'h':
default:
@@ -647,6 +684,15 @@ static bool parse_args(int argc, char *argv[])
}
}
+ if (overflow_interrupts > 0) {
+ if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) {
+ pr_info("-n option is only available for overflow test\n");
+ goto done;
+ } else {
+ targs.overflow_irqnum = overflow_interrupts;
+ }
+ }
+
return true;
done:
test_print_help(argv[0]);
@@ -655,25 +701,28 @@ done:
int main(int argc, char *argv[])
{
+ targs.disabled_tests = 0;
+ targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT;
+
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
- if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) {
test_vm_basic_test(test_pmu_basic_sanity);
pr_info("SBI PMU basic test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) {
test_vm_events_test(test_pmu_events);
pr_info("SBI PMU event verification test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
test_vm_events_snapshot_test(test_pmu_events_snaphost);
pr_info("SBI PMU event verification with snapshot test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
test_vm_events_overflow(test_pmu_events_overflow);
pr_info("SBI PMU event verification with overflow test : PASS\n");
}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index e5898678bfab..1375fca80bcd 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -196,25 +196,27 @@ static void calc_min_max_cpu(void)
static void help(const char *name)
{
puts("");
- printf("usage: %s [-h] [-u]\n", name);
+ printf("usage: %s [-h] [-u] [-l latency]\n", name);
printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n");
puts("");
exit(0);
}
int main(int argc, char *argv[])
{
+ int r, i, snapshot, opt, fd = -1, latency = -1;
bool skip_sanity_check = false;
- int r, i, snapshot;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
- int opt;
- while ((opt = getopt(argc, argv, "hu")) != -1) {
+ while ((opt = getopt(argc, argv, "hl:u")) != -1) {
switch (opt) {
case 'u':
skip_sanity_check = true;
+ case 'l':
+ latency = atoi_paranoid(optarg);
break;
case 'h':
default:
@@ -243,6 +245,20 @@ int main(int argc, char *argv[])
pthread_create(&migration_thread, NULL, migration_worker,
(void *)(unsigned long)syscall(SYS_gettid));
+ if (latency >= 0) {
+ /*
+ * Writes to cpu_dma_latency persist only while the file is
+ * open, i.e. it allows userspace to provide guaranteed latency
+ * while running a workload. Keep the file open until the test
+ * completes, otherwise writing cpu_dma_latency is meaningless.
+ */
+ fd = open("/dev/cpu_dma_latency", O_RDWR);
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd));
+
+ r = write(fd, &latency, 4);
+ TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency");
+ }
+
for (i = 0; !done; i++) {
vcpu_run(vcpu);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
@@ -278,6 +294,9 @@ int main(int argc, char *argv[])
"rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
}
+ if (fd > 0)
+ close(fd);
+
/*
* Sanity check that the test was able to enter the guest a reasonable
* number of times, e.g. didn't get stalled too often/long waiting for
@@ -293,8 +312,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
"Only performed %d KVM_RUNs, task stalled too much?\n\n"
" Try disabling deep sleep states to reduce CPU wakeup latency,\n"
- " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n"
- " or run with -u to disable this sanity check.", i);
+ " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n"
+ " disable this sanity check.", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c
index 85cc8c18d6e7..e39a724fe860 100644
--- a/tools/testing/selftests/kvm/s390/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390/cmma_test.c
@@ -145,7 +145,7 @@ static void finish_vm_setup(struct kvm_vm *vm)
slot0 = memslot2region(vm, 0);
ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
- kvm_arch_vm_post_create(vm);
+ kvm_arch_vm_post_create(vm, 0);
}
static struct kvm_vm *create_vm_two_memslots(void)
diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
index 27255880dabd..aded795d42be 100644
--- a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
+++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
@@ -291,7 +291,7 @@ int main(int argc, char *argv[])
ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
free(array);
} else {
- ksft_test_result_skip("%s feature is not avaialable\n",
+ ksft_test_result_skip("%s feature is not available\n",
testlist[idx].subfunc_name);
}
}
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
index d265b34c54be..50bc1c38225a 100644
--- a/tools/testing/selftests/kvm/s390/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm)
self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
- self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
- PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
- ASSERT_NE(self->run, MAP_FAILED);
+ self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd);
/**
* For virtual cpus that have been created with S390 user controlled
* virtual machines, the resulting vcpu fd can be memory mapped at page
* offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
* the virtual cpu's hardware control block.
*/
- self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
- ASSERT_NE(self->sie_block, MAP_FAILED);
+ self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd,
+ KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
TH_LOG("VM created %p %p", self->run, self->sie_block);
@@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm)
FIXTURE_TEARDOWN(uc_kvm)
{
- munmap(self->sie_block, PAGE_SIZE);
- munmap(self->run, self->kvm_run_size);
+ kvm_munmap(self->sie_block, PAGE_SIZE);
+ kvm_munmap(self->run, self->kvm_run_size);
close(self->vcpu_fd);
close(self->vm_fd);
close(self->kvm_fd);
diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c
new file mode 100644
index 000000000000..714906c1d12a
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/user_operexec.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test operation exception forwarding.
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Authors:
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#include "kselftest.h"
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+static void guest_code_instr0(void)
+{
+ asm(".word 0x0000");
+}
+
+static void test_user_instr0(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code_user_operexec(void)
+{
+ asm(".word 0x0807");
+}
+
+static void test_user_operexec(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Since user_operexec is the superset it can be used for the
+ * 0 instruction.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+/* combine user_instr0 and user_operexec */
+static void test_user_operexec_combined(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /* Reverse enablement order */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Run all tests above.
+ *
+ * Enablement after VCPU has been added is automatically tested since
+ * we enable the capability after VCPU creation.
+ */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "instr0", test_user_instr0 },
+ { "operexec", test_user_operexec },
+ { "operexec_combined", test_user_operexec_combined},
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0));
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index bc440d5aba57..7fe427ff9b38 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -350,7 +350,7 @@ static void test_invalid_memory_region_flags(void)
struct kvm_vm *vm;
int r, i;
-#if defined __aarch64__ || defined __riscv || defined __x86_64__
+#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__
supported_flags |= KVM_MEM_READONLY;
#endif
@@ -433,10 +433,10 @@ static void test_add_max_memory_regions(void)
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
- mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
- TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+ mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1);
mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
for (slot = 0; slot < max_mem_slots; slot++)
@@ -446,9 +446,8 @@ static void test_add_max_memory_regions(void)
mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
/* Check it cannot be added memory slots beyond the limit */
- mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host");
+ mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
(uint64_t)max_mem_slots * MEM_REGION_SIZE,
@@ -456,8 +455,8 @@ static void test_add_max_memory_regions(void)
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Adding one more memory slot should fail with EINVAL");
- munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
- munmap(mem_extra, MEM_REGION_SIZE);
+ kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
+ kvm_munmap(mem_extra, MEM_REGION_SIZE);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index cce2520af720..8edc1fca345b 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -118,7 +118,7 @@ static int64_t smccc(uint32_t func, uint64_t arg)
{
struct arm_smccc_res res;
- smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res);
+ do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res);
return res.a0;
}
diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
new file mode 100644
index 000000000000..8b15a13df939
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for KVM_X86_DISABLE_EXITS_APERFMPERF
+ *
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test the ability to disable VM-exits for rdmsr of IA32_APERF and
+ * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
+ * return the host's values.
+ *
+ * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <asm/msr-index.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define NUM_ITERATIONS 10000
+
+static int open_dev_msr(int cpu)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
+ return open_path_or_exit(path, O_RDONLY);
+}
+
+static uint64_t read_dev_msr(int msr_fd, uint32_t msr)
+{
+ uint64_t data;
+ ssize_t rc;
+
+ rc = pread(msr_fd, &data, sizeof(data), msr);
+ TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
+
+ return data;
+}
+
+static void guest_read_aperf_mperf(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_ITERATIONS; i++)
+ GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_read_aperf_mperf();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set
+ * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel.
+ */
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *nested_test_data)
+{
+ guest_read_aperf_mperf();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(nested_test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(nested_test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+static void guest_no_aperfmperf(void)
+{
+ uint64_t msr_val;
+ uint8_t vector;
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ uint64_t host_aperf_before, host_mperf_before;
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int msr_fd, cpu, i;
+
+ /* Sanity check that APERF/MPERF are unsupported by default. */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+ kvm_vm_free(vm);
+
+ cpu = pin_self_to_any_cpu();
+
+ msr_fd = open_dev_msr(cpu);
+
+ /*
+ * This test requires a non-standard VM initialization, because
+ * KVM_ENABLE_CAP cannot be used on a VM file descriptor after
+ * a VCPU has been created.
+ */
+ vm = vm_create(1);
+
+ TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (!has_nested)
+ nested_test_data_gva = NONCANONICAL;
+ else if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) {
+ uint64_t host_aperf_after, host_mperf_after;
+ uint64_t guest_aperf, guest_mperf;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ guest_aperf = uc.args[0];
+ guest_mperf = uc.args[1];
+
+ host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ TEST_ASSERT(host_aperf_before < guest_aperf,
+ "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_aperf_before, guest_aperf);
+ TEST_ASSERT(guest_aperf < host_aperf_after,
+ "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_aperf, host_aperf_after);
+ TEST_ASSERT(host_mperf_before < guest_mperf,
+ "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_mperf_before, guest_mperf);
+ TEST_ASSERT(guest_mperf < host_mperf_after,
+ "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_mperf, host_mperf_after);
+
+ host_aperf_before = host_aperf_after;
+ host_mperf_before = host_mperf_after;
+
+ break;
+ }
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE\n");
+done:
+ kvm_vm_free(vm);
+ close(msr_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
index 2929c067c207..b0d2b04a7ff2 100644
--- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
@@ -41,9 +41,9 @@ struct kvm_page_stats {
static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
{
- stats->pages_4k = vm_get_stat(vm, "pages_4k");
- stats->pages_2m = vm_get_stat(vm, "pages_2m");
- stats->pages_1g = vm_get_stat(vm, "pages_1g");
+ stats->pages_4k = vm_get_stat(vm, pages_4k);
+ stats->pages_2m = vm_get_stat(vm, pages_2m);
+ stats->pages_1g = vm_get_stat(vm, pages_1g);
stats->hugepages = stats->pages_2m + stats->pages_1g;
pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c
new file mode 100644
index 000000000000..8926cfe0e209
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fastops_test.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Execute a fastop() instruction, with or without forced emulation. BT bit 0
+ * to set RFLAGS.CF based on whether or not the input is even or odd, so that
+ * instructions like ADC and SBB are deterministic.
+ */
+#define fastop(__insn) \
+ "bt $0, %[bt_val]\n\t" \
+ __insn "\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t"
+
+#define flags_constraint(flags_val) [flags]"=r"(flags_val)
+#define bt_constraint(__bt_val) [bt_val]"rm"((uint32_t)__bt_val)
+
+#define guest_execute_fastop_1(FEP, insn, __val, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %[val]") \
+ : [val]"+r"(__val), flags_constraint(__flags) \
+ : bt_constraint(__val) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_1(insn, type_t, __val) \
+({ \
+ type_t val = __val, ex_val = __val, input = __val; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_1("", insn, ex_val, ex_flags); \
+ guest_execute_fastop_1(KVM_FEP, insn, val, flags); \
+ \
+ __GUEST_ASSERT(val == ex_val, \
+ "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \
+ (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, flags); \
+})
+
+#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %[input], %[output]") \
+ : [output]"+r"(__output), flags_constraint(__flags) \
+ : [input]"r"(__input), bt_constraint(__output) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_2(insn, type_t, __val1, __val2) \
+({ \
+ type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \
+ guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, (uint64_t)input, \
+ (uint64_t)input2, (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \
+})
+
+#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %%cl, %[output]") \
+ : [output]"+r"(__output), flags_constraint(__flags) \
+ : "c"(__shift), bt_constraint(__output) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \
+({ \
+ type_t output = __val2, ex_output = __val2, input = __val2; \
+ uint8_t shift = __val1; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \
+ guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, shift, (uint64_t)input, \
+ (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ ex_flags, insn, shift, (uint64_t)input, flags); \
+})
+
+#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \
+({ \
+ uint64_t ign_error_code; \
+ uint8_t vector; \
+ \
+ __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \
+ : "+a"(__a), "+d"(__d), flags_constraint(__flags), \
+ KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : [denom]"rm"(__rm), bt_constraint(__rm) \
+ : "cc", "memory", KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define guest_test_fastop_div(insn, type_t, __val1, __val2) \
+({ \
+ type_t _a = __val1, _d = __val1, rm = __val2; \
+ type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \
+ uint64_t flags, ex_flags; \
+ uint8_t v, ex_v; \
+ \
+ ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \
+ v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \
+ \
+ GUEST_ASSERT_EQ(v, ex_v); \
+ __GUEST_ASSERT(v == ex_v, \
+ "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \
+ ex_v, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, v); \
+ __GUEST_ASSERT(a == ex_a && d == ex_d, \
+ "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\
+ (uint64_t)ex_a, (uint64_t)ex_d, insn, (uint64_t)_a, \
+ (uint64_t)_d, (uint64_t)rm, (uint64_t)a, (uint64_t)d); \
+ __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \
+ "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, flags);\
+})
+
+static const uint64_t vals[] = {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 0x5555555555555555,
+ 0xaaaaaaaaaaaaaaaa,
+ 0xfefefefefefefefe,
+ 0xffffffffffffffff,
+};
+
+#define guest_test_fastops(type_t, suffix) \
+do { \
+ int i, j; \
+ \
+ for (i = 0; i < ARRAY_SIZE(vals); i++) { \
+ guest_test_fastop_1("dec" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("inc" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("neg" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("not" suffix, type_t, vals[i]); \
+ \
+ for (j = 0; j < ARRAY_SIZE(vals); j++) { \
+ guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \
+if (sizeof(type_t) != 1) { \
+ guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \
+} \
+ guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]); \
+ } \
+ } \
+} while (0)
+
+static void guest_code(void)
+{
+ guest_test_fastops(uint8_t, "b");
+ guest_test_fastops(uint16_t, "w");
+ guest_test_fastops(uint32_t, "l");
+ guest_test_fastops(uint64_t, "q");
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
index 4e920705681a..3c21af811d8f 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -22,25 +22,6 @@ static void guest_code(void)
{
}
-static bool smt_possible(void)
-{
- char buf[16];
- FILE *f;
- bool res = true;
-
- f = fopen("/sys/devices/system/cpu/smt/control", "r");
- if (f) {
- if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
- if (!strncmp(buf, "forceoff", 8) ||
- !strncmp(buf, "notsupported", 12))
- res = false;
- }
- fclose(f);
- }
-
- return res;
-}
-
static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
{
const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
@@ -64,7 +45,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
TEST_ASSERT((entry->function >= 0x40000000) &&
(entry->function <= 0x40000082),
- "function %x is our of supported range",
+ "function %x is out of supported range",
entry->function);
TEST_ASSERT(entry->index == 0,
@@ -93,7 +74,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
case 0x40000004:
test_val = entry->eax & (1UL << 18);
- TEST_ASSERT(!!test_val == !smt_possible(),
+ TEST_ASSERT(!!test_val == !is_smt_possible(),
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
index 068e9c69710d..130b9ce7e5dd 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
@@ -54,12 +54,12 @@ static void guest_msr(struct msr_data *msr)
if (msr->fault_expected)
__GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
- msr->write ? "WR" : "RD", msr->idx, vector);
+ "Expected #GP on %sMSR(0x%x), got %s",
+ msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
else
__GUEST_ASSERT(!vector,
- "Expected success on %sMSR(0x%x), got vector '0x%x'",
- msr->write ? "WR" : "RD", msr->idx, vector);
+ "Expected success on %sMSR(0x%x), got %s",
+ msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
if (vector || is_write_only_msr(msr->idx))
goto done;
@@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
input = pgs_gpa;
- output = pgs_gpa + 4096;
+ output = pgs_gpa + PAGE_SIZE;
} else {
input = output = 0;
}
@@ -102,12 +102,12 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
vector = __hyperv_hypercall(hcall->control, input, output, &res);
if (hcall->ud_expected) {
__GUEST_ASSERT(vector == UD_VECTOR,
- "Expected #UD for control '%lu', got vector '0x%x'",
- hcall->control, vector);
+ "Expected #UD for control '%lu', got %s",
+ hcall->control, ex_str(vector));
} else {
__GUEST_ASSERT(!vector,
- "Expected no exception for control '%lu', got vector '0x%x'",
- hcall->control, vector);
+ "Expected no exception for control '%lu', got %s",
+ hcall->control, ex_str(vector));
GUEST_ASSERT_EQ(res, hcall->expect);
}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 22c0c124582f..ca61836c4e32 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* Signal sender vCPU we're ready */
ipis_rcvd[vcpu_id] = (u64)-1;
- for (;;)
- asm volatile("sti; hlt; cli");
+ for (;;) {
+ safe_halt();
+ cli();
+ }
}
static void guest_ipi_handler(struct ex_regs *regs)
@@ -100,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
ipi->vector = IPI_VECTOR;
ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
- hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -114,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 0;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -136,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -158,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -181,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
- hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index 077cd0ec3040..a3b7ce155981 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -621,7 +621,7 @@ int main(int argc, char *argv[])
for (i = 0; i < NTEST_PAGES; i++) {
pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
gpa = addr_hva2gpa(vm, pte);
- __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+ virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK);
data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
}
diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
new file mode 100644
index 000000000000..d88500c118eb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ */
+#include <linux/atomic.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+#include "test_util.h"
+
+#define NR_BUS_LOCKS_PER_LEVEL 100
+#define CACHE_LINE_SIZE 64
+
+/*
+ * To generate a bus lock, carve out a buffer that precisely occupies two cache
+ * lines and perform an atomic access that splits the two lines.
+ */
+static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE);
+static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)];
+
+static void guest_generate_buslocks(void)
+{
+ for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++)
+ atomic_inc(val);
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_generate_buslocks();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *test_data)
+{
+ guest_generate_buslocks();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ int i, bus_locks = 0;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT));
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ run = vcpu->run;
+
+ for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto done;
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK);
+
+ /*
+ * Verify the counter is actually getting incremented, e.g. that
+ * KVM isn't skipping the instruction. On Intel, the exit is
+ * trap-like, i.e. the counter should already have been
+ * incremented. On AMD, it's fault-like, i.e. the counter will
+ * be incremented when the guest re-executes the instruction.
+ */
+ sync_global_from_guest(vm, *val);
+ TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel);
+
+ bus_locks++;
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks);
+done:
+ TEST_ASSERT_EQ(i, bus_locks);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..e45c028d2a7e 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -7,6 +7,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "kselftest.h"
#define CPUID_MWAIT (1u << 3)
@@ -14,6 +15,8 @@ enum monitor_mwait_testcases {
MWAIT_QUIRK_DISABLED = BIT(0),
MISC_ENABLES_QUIRK_DISABLED = BIT(1),
MWAIT_DISABLED = BIT(2),
+ CPUID_DISABLED = BIT(3),
+ TEST_MAX = CPUID_DISABLED * 2 - 1,
};
/*
@@ -27,19 +30,27 @@ do { \
\
if (fault_wanted) \
__GUEST_ASSERT((vector) == UD_VECTOR, \
- "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
- testcase, vector); \
+ "Expected #UD on " insn " for testcase '0x%x', got %s", \
+ testcase, ex_str(vector)); \
else \
__GUEST_ASSERT(!(vector), \
- "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
- testcase, vector); \
+ "Expected success on " insn " for testcase '0x%x', got %s", \
+ testcase, ex_str(vector)); \
} while (0)
-static void guest_monitor_wait(int testcase)
+static void guest_monitor_wait(void *arg)
{
+ int testcase = (int) (long) arg;
u8 vector;
- GUEST_SYNC(testcase);
+ u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT;
+ if (!(testcase & MWAIT_DISABLED))
+ val |= MSR_IA32_MISC_ENABLE_MWAIT;
+ wrmsr(MSR_IA32_MISC_ENABLE, val);
+
+ __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED),
+ "Expected CPUID.MWAIT %s\n",
+ (testcase & MWAIT_DISABLED) ? "cleared" : "set");
/*
* Arbitrarily MONITOR this function, SVM performs fault checks before
@@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase)
vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
- guest_monitor_wait(MWAIT_DISABLED);
-
- guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
GUEST_DONE();
}
@@ -74,56 +72,65 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct ucall uc;
int testcase;
+ char test[80];
TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ ksft_print_header();
+ ksft_set_plan(12);
+ for (testcase = 0; testcase <= TEST_MAX; testcase++) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait);
+ vcpu_args_set(vcpu, 1, (void *)(long)testcase);
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ strcpy(test, "MWAIT can fault");
+ } else {
+ strcpy(test, "MWAIT never faults");
+ }
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ strcat(test, ", MISC_ENABLE updates CPUID");
+ } else {
+ strcat(test, ", no CPUID updates");
+ }
+
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) &&
+ (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED)))
+ continue;
+
+ if (testcase & CPUID_DISABLED) {
+ strcat(test, ", CPUID clear");
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ } else {
+ strcat(test, ", CPUID set");
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ }
+
+ if (testcase & MWAIT_DISABLED)
+ strcat(test, ", MWAIT disabled");
- while (1) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- testcase = uc.args[1];
- break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- goto done;
+ /* Detected in vcpu_run */
+ break;
case UCALL_DONE:
- goto done;
+ ksft_test_result_pass("%s\n", test);
+ break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
- goto done;
- }
-
- disabled_quirks = 0;
- if (testcase & MWAIT_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
- if (testcase & MISC_ENABLES_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
- /*
- * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
- * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
- * bit in MISC_ENABLES accordingly. If the quirk is enabled,
- * the only valid configuration is MWAIT disabled, as CPUID
- * can't be manually changed after running the vCPU.
- */
- if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
- TEST_ASSERT(testcase & MWAIT_DISABLED,
- "Can't toggle CPUID features after running vCPU");
- continue;
+ break;
}
-
- vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
- (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ kvm_vm_free(vm);
}
+ ksft_finished();
-done:
- kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c
new file mode 100644
index 000000000000..40d918aedce6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/msrs_test.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/msr-index.h>
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */
+#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR
+
+struct kvm_msr {
+ const struct kvm_x86_cpu_feature feature;
+ const struct kvm_x86_cpu_feature feature2;
+ const char *name;
+ const u64 reset_val;
+ const u64 write_val;
+ const u64 rsvd_val;
+ const u32 index;
+ const bool is_kvm_defined;
+};
+
+#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm) \
+{ \
+ .index = msr, \
+ .name = str, \
+ .write_val = val, \
+ .rsvd_val = rsvd, \
+ .reset_val = reset, \
+ .feature = X86_FEATURE_ ##feat, \
+ .feature2 = X86_FEATURE_ ##f2, \
+ .is_kvm_defined = is_kvm, \
+}
+
+#define __MSR_TEST(msr, str, val, rsvd, reset, feat) \
+ ____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false)
+
+#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, reset, feat)
+
+#define MSR_TEST(msr, val, rsvd, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, 0, feat)
+
+#define MSR_TEST2(msr, val, rsvd, feat, f2) \
+ ____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false)
+
+/*
+ * Note, use a page aligned value for the canonical value so that the value
+ * is compatible with MSRs that use bits 11:0 for things other than addresses.
+ */
+static const u64 canonical_val = 0x123456789000ull;
+
+/*
+ * Arbitrary value with bits set in every byte, but not all bits set. This is
+ * also a non-canonical value, but that's coincidental (any 64-bit value with
+ * an alternating 0s/1s pattern will be non-canonical).
+ */
+static const u64 u64_val = 0xaaaa5555aaaa5555ull;
+
+#define MSR_TEST_CANONICAL(msr, feat) \
+ __MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat)
+
+#define MSR_TEST_KVM(msr, val, rsvd, feat) \
+ ____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true)
+
+/*
+ * The main struct must be scoped to a function due to the use of structures to
+ * define features. For the global structure, allocate enough space for the
+ * foreseeable future without getting too ridiculous, to minimize maintenance
+ * costs (bumping the array size every time an MSR is added is really annoying).
+ */
+static struct kvm_msr msrs[128];
+static int idx;
+
+static bool ignore_unsupported_msrs;
+
+static u64 fixup_rdmsr_val(u32 msr, u64 want)
+{
+ /*
+ * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support. KVM
+ * is supposed to emulate that behavior based on guest vendor model
+ * (which is the same as the host vendor model for this test).
+ */
+ if (!host_cpu_is_amd)
+ return want;
+
+ switch (msr) {
+ case MSR_IA32_SYSENTER_ESP:
+ case MSR_IA32_SYSENTER_EIP:
+ case MSR_TSC_AUX:
+ return want & GENMASK_ULL(31, 0);
+ default:
+ return want;
+ }
+}
+
+static void __rdmsr(u32 msr, u64 want)
+{
+ u64 val;
+ u8 vec;
+
+ vec = rdmsr_safe(msr, &val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr);
+
+ __GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx",
+ want, msr, val);
+}
+
+static void __wrmsr(u32 msr, u64 val)
+{
+ u8 vec;
+
+ vec = wrmsr_safe(msr, val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)",
+ ex_str(vec), msr, val);
+ __rdmsr(msr, fixup_rdmsr_val(msr, val));
+}
+
+static void guest_test_supported_msr(const struct kvm_msr *msr)
+{
+ __rdmsr(msr->index, msr->reset_val);
+ __wrmsr(msr->index, msr->write_val);
+ GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val));
+
+ __rdmsr(msr->index, msr->reset_val);
+}
+
+static void guest_test_unsupported_msr(const struct kvm_msr *msr)
+{
+ u64 val;
+ u8 vec;
+
+ /*
+ * KVM's ABI with respect to ignore_msrs is a mess and largely beyond
+ * repair, just skip the unsupported MSR tests.
+ */
+ if (ignore_unsupported_msrs)
+ goto skip_wrmsr_gp;
+
+ /*
+ * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are
+ * writable only if their associated feature is supported. Skip the
+ * RDMSR #GP test if the secondary feature is supported, but perform
+ * the WRMSR #GP test as the to-be-written value is tied to the primary
+ * feature. For all other MSRs, simply do nothing.
+ */
+ if (this_cpu_has(msr->feature2)) {
+ if (msr->index != MSR_IA32_U_CET &&
+ msr->index != MSR_IA32_S_CET)
+ goto skip_wrmsr_gp;
+
+ goto skip_rdmsr_gp;
+ }
+
+ vec = rdmsr_safe(msr->index, &val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s",
+ msr->index, ex_str(vec));
+
+skip_rdmsr_gp:
+ vec = wrmsr_safe(msr->index, msr->write_val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->write_val, ex_str(vec));
+
+skip_wrmsr_gp:
+ GUEST_SYNC(0);
+}
+
+void guest_test_reserved_val(const struct kvm_msr *msr)
+{
+ /* Skip reserved value checks as well, ignore_msrs is trully a mess. */
+ if (ignore_unsupported_msrs)
+ return;
+
+ /*
+ * If the CPU will truncate the written value (e.g. SYSENTER on AMD),
+ * expect success and a truncated value, not #GP.
+ */
+ if (!this_cpu_has(msr->feature) ||
+ msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) {
+ u8 vec = wrmsr_safe(msr->index, msr->rsvd_val);
+
+ __GUEST_ASSERT(vec == GP_VECTOR,
+ "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->rsvd_val, ex_str(vec));
+ } else {
+ __wrmsr(msr->index, msr->rsvd_val);
+ __wrmsr(msr->index, msr->reset_val);
+ }
+}
+
+static void guest_main(void)
+{
+ for (;;) {
+ const struct kvm_msr *msr = &msrs[READ_ONCE(idx)];
+
+ if (this_cpu_has(msr->feature))
+ guest_test_supported_msr(msr);
+ else
+ guest_test_unsupported_msr(msr);
+
+ if (msr->rsvd_val)
+ guest_test_reserved_val(msr);
+
+ GUEST_SYNC(msr->reset_val);
+ }
+}
+
+static bool has_one_reg;
+static bool use_one_reg;
+
+#define KVM_X86_MAX_NR_REGS 1
+
+static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg)
+{
+ struct {
+ struct kvm_reg_list list;
+ u64 regs[KVM_X86_MAX_NR_REGS];
+ } regs = {};
+ int r, i;
+
+ /*
+ * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported
+ * regs, then the vCPU obviously doesn't support the reg.
+ */
+ r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ if (!r)
+ return false;
+
+ TEST_ASSERT_EQ(errno, E2BIG);
+
+ /*
+ * KVM x86 is expected to support enumerating a relative small number
+ * of regs. The majority of registers supported by KVM_{G,S}ET_ONE_REG
+ * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST. For
+ * simplicity, hardcode the maximum number of regs and manually update
+ * the test as necessary.
+ */
+ TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS,
+ "KVM reports %llu regs, test expects at most %u regs, stale test?",
+ regs.list.n, KVM_X86_MAX_NR_REGS);
+
+ vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ for (i = 0; i < regs.list.n; i++) {
+ if (regs.regs[i] == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static void host_test_kvm_reg(struct kvm_vcpu *vcpu)
+{
+ bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature);
+ u64 reset_val = msrs[idx].reset_val;
+ u64 write_val = msrs[idx].write_val;
+ u64 rsvd_val = msrs[idx].rsvd_val;
+ u32 reg = msrs[idx].index;
+ u64 val;
+ int r;
+
+ if (!use_one_reg)
+ return;
+
+ TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg);
+
+ if (!has_reg) {
+ r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val);
+ TEST_ASSERT(r && errno == EINVAL,
+ "Expected failure on get_reg(0x%x)", reg);
+ rsvd_val = 0;
+ goto out;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, reg, val);
+
+ vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val);
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ write_val, reg, val);
+
+out:
+ r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val);
+ TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val);
+}
+
+static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val)
+{
+ u64 reset_val = msrs[idx].reset_val;
+ u32 msr = msrs[idx].index;
+ u64 val;
+
+ if (!kvm_cpu_has(msrs[idx].feature))
+ return;
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ guest_val, msr, val);
+
+ if (use_one_reg)
+ vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val);
+ else
+ vcpu_set_msr(vcpu, msr, reset_val);
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ reset_val, msr, val);
+
+ if (!has_one_reg)
+ return;
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, msr, val);
+}
+
+static void do_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ for (;;) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ host_test_msr(vcpu, uc.args[1]);
+ return;
+ case UCALL_PRINTF:
+ pr_info("%s", uc.buffer);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_DONE:
+ TEST_FAIL("Unexpected UCALL_DONE");
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS)
+{
+ int i;
+
+ for (i = 0; i < NR_VCPUS; i++)
+ do_vcpu_run(vcpus[i]);
+}
+
+#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
+
+static void test_msrs(void)
+{
+ const struct kvm_msr __msrs[] = {
+ MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE,
+ MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING,
+ MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE),
+ MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE),
+
+ /*
+ * TSC_AUX is supported if RDTSCP *or* RDPID is supported. Add
+ * entries for each features so that TSC_AUX doesn't exists for
+ * the "unsupported" vCPU, and obviously to test both cases.
+ */
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID),
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP),
+
+ MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE),
+ /*
+ * SYSENTER_{ESP,EIP} are technically non-canonical on Intel,
+ * but KVM doesn't emulate that behavior on emulated writes,
+ * i.e. this test will observe different behavior if the MSR
+ * writes are handed by hardware vs. KVM. KVM's behavior is
+ * intended (though far from ideal), so don't bother testing
+ * non-canonical values.
+ */
+ MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE),
+ MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE),
+
+ MSR_TEST_CANONICAL(MSR_FS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_LSTAR, LM),
+ MSR_TEST_CANONICAL(MSR_CSTAR, LM),
+ MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM),
+
+ MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK),
+
+ MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK),
+ };
+
+ const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE;
+ const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM;
+
+ /*
+ * Create three vCPUs, but run them on the same task, to validate KVM's
+ * context switching of MSR state. Don't pin the task to a pCPU to
+ * also validate KVM's handling of cross-pCPU migration. Use the full
+ * set of features for the first two vCPUs, but clear all features in
+ * third vCPU in order to test both positive and negative paths.
+ */
+ const int NR_VCPUS = 3;
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct kvm_vm *vm;
+ int i;
+
+ kvm_static_assert(sizeof(__msrs) <= sizeof(msrs));
+ kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs));
+ memcpy(msrs, __msrs, sizeof(__msrs));
+
+ ignore_unsupported_msrs = kvm_is_ignore_msrs();
+
+ vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus);
+
+ sync_global_to_guest(vm, msrs);
+ sync_global_to_guest(vm, ignore_unsupported_msrs);
+
+ /*
+ * Clear features in the "unsupported features" vCPU. This needs to be
+ * done before the first vCPU run as KVM's ABI is that guest CPUID is
+ * immutable once the vCPU has been run.
+ */
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ /*
+ * Don't clear LM; selftests are 64-bit only, and KVM doesn't
+ * honor LM=0 for MSRs that are supposed to exist if and only
+ * if the vCPU is a 64-bit model. Ditto for NONE; clearing a
+ * fake feature flag will result in false failures.
+ */
+ if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) &&
+ memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none)))
+ vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature);
+ }
+
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ struct kvm_msr *msr = &msrs[idx];
+
+ if (msr->is_kvm_defined) {
+ for (i = 0; i < NR_VCPUS; i++)
+ host_test_kvm_reg(vcpus[i]);
+ continue;
+ }
+
+ /*
+ * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST
+ * are consistent with respect to MSRs whose existence is
+ * enumerated via CPUID. Skip the check for FS/GS.base MSRs,
+ * as they aren't reported in the save/restore list since their
+ * state is managed via SREGS.
+ */
+ TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE ||
+ kvm_msr_is_in_save_restore_list(msr->index) ==
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)),
+ "%s %s in save/restore list, but %s according to CPUID", msr->name,
+ kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't",
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ?
+ "supported" : "unsupported");
+
+ sync_global_to_guest(vm, idx);
+
+ vcpus_run(vcpus, NR_VCPUS);
+ vcpus_run(vcpus, NR_VCPUS);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG);
+
+ test_msrs();
+
+ if (has_one_reg) {
+ use_one_reg = true;
+ test_msrs();
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
index dad988351493..f001cb836bfa 100644
--- a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_close_while_nested
- *
* Copyright (C) 2019, Red Hat, Inc.
*
* Verify that nothing bad happens if a KVM user exits with open
@@ -12,6 +10,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -22,6 +21,8 @@ enum {
PORT_L0_EXIT = 0x2000,
};
+#define L2_GUEST_STACK_SIZE 64
+
static void l2_guest_code(void)
{
/* Exit to L0 */
@@ -29,9 +30,8 @@ static void l2_guest_code(void)
: : [port] "d" (PORT_L0_EXIT) : "rax");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(0);
}
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Prepare the VMCB for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t guest_gva;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
for (;;) {
volatile struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
new file mode 100644
index 000000000000..abc824dba04f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+enum {
+ SVM_F,
+ VMX_F,
+ NR_VIRTUALIZATION_FLAVORS,
+};
+
+struct emulated_instruction {
+ const char name[32];
+ uint8_t opcode[15];
+ uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS];
+};
+
+static struct emulated_instruction instructions[] = {
+ {
+ .name = "pause",
+ .opcode = { 0xf3, 0x90 },
+ .exit_reason = { SVM_EXIT_PAUSE,
+ EXIT_REASON_PAUSE_INSTRUCTION, }
+ },
+ {
+ .name = "hlt",
+ .opcode = { 0xf4 },
+ .exit_reason = { SVM_EXIT_HLT,
+ EXIT_REASON_HLT, }
+ },
+};
+
+static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */
+static uint8_t l2_guest_code[sizeof(kvm_fep) + 15];
+static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)];
+
+static uint32_t get_instruction_length(struct emulated_instruction *insn)
+{
+ uint32_t i;
+
+ for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++)
+ ;
+
+ return i;
+}
+
+static void guest_code(void *test_data)
+{
+ int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F;
+ int i;
+
+ memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep));
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, NULL, NULL);
+ vmcb->save.idtr.limit = 0;
+ vmcb->save.rip = (u64)l2_guest_code;
+
+ vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) |
+ BIT_ULL(INTERCEPT_PAUSE) |
+ BIT_ULL(INTERCEPT_HLT);
+ vmcb->control.intercept_exceptions = 0;
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(test_data));
+ GUEST_ASSERT(load_vmcs(test_data));
+
+ prepare_vmcs(test_data, NULL, NULL);
+ GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0));
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0));
+
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_PAUSE_EXITING |
+ CPU_BASED_HLT_EXITING);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(instructions); i++) {
+ struct emulated_instruction *insn = &instructions[i];
+ uint32_t insn_len = get_instruction_length(insn);
+ uint32_t exit_insn_len;
+ u32 exit_reason;
+
+ /*
+ * Copy the target instruction to the L2 code stream, and fill
+ * the remaining bytes with INT3s so that a missed intercept
+ * results in a consistent failure mode (SHUTDOWN).
+ */
+ memcpy(l2_instruction, insn->opcode, insn_len);
+ memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len);
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ exit_reason = vmcb->control.exit_code;
+ exit_insn_len = vmcb->control.next_rip - vmcb->save.rip;
+ GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction);
+ } else {
+ GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0);
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
+ GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction);
+ }
+
+ __GUEST_ASSERT(exit_reason == insn->exit_reason[f],
+ "Wanted exit_reason '0x%x' for '%s', got '0x%x'",
+ insn->exit_reason[f], insn->name, exit_reason);
+
+ __GUEST_ASSERT(exit_insn_len == insn_len,
+ "Wanted insn_len '%u' for '%s', got '%u'",
+ insn_len, insn->name, exit_insn_len);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
index 3eb0313ffa39..3641a42934ac 100644
--- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
@@ -85,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+ GUEST_ASSERT(!ctrl->int_state);
}
static void l1_svm_code(struct svm_test_data *svm)
@@ -122,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+ GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO));
}
static void l1_vmx_code(struct vmx_pages *vmx)
diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
new file mode 100644
index 000000000000..a6b6da9cf7fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * This test verifies that L1 fails to enter L2 with an invalid CR3, and
+ * succeeds otherwise.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ vmcall();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = svm->vmcb->save.cr3;
+ svm->vmcb->save.cr3 = -1ull;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR);
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ svm->vmcb->save.cr3 = save_cr3;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ vmwrite(GUEST_CR3, save_cr3);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t guest_gva = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
index 2ceb5c78c442..2839f650e5c9 100644
--- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_tsc_adjust_test
- *
* Copyright (C) 2018, Google LLC.
*
* IA32_TSC_ADJUST test
@@ -22,6 +20,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -35,6 +34,8 @@
#define TSC_ADJUST_VALUE (1ll << 32)
#define TSC_OFFSET_VALUE -(1ll << 48)
+#define L2_GUEST_STACK_SIZE 64
+
enum {
PORT_ABORT = 0x1000,
PORT_REPORT,
@@ -72,42 +73,47 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_guest_code(void *data)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
- uintptr_t save_cr3;
+ /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */
GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
- vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
- /* Jump into L2. First, test failure to load guest CR3. */
- save_cr3 = vmreadz(GUEST_CR3);
- vmwrite(GUEST_CR3, -1ull);
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
- (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
- check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- vmwrite(GUEST_CR3, save_cr3);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ /*
+ * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not
+ * intercepting the write so it should update L1's TSC_ADJUST.
+ */
+ if (this_cpu_has(X86_FEATURE_VMX)) {
+ struct vmx_pages *vmx_pages = data;
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ } else {
+ struct svm_test_data *svm = data;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ }
check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
GUEST_DONE();
}
@@ -119,16 +125,19 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t nested_gva;
struct kvm_vcpu *vcpu;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (;;) {
struct ucall uc;
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
index 1759fa5cb3f2..4260c9e4f489 100644
--- a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
@@ -13,6 +13,7 @@
#include "kvm_util.h"
#include "vmx.h"
+#include "svm_util.h"
#include "kselftest.h"
/* L2 is scaled up (from L1's perspective) by this factor */
@@ -79,7 +80,30 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC scaling for L2 */
+ wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32);
+
+ /* launch L2 */
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
{
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
uint32_t control;
@@ -116,11 +140,19 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_DONE();
}
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t guest_gva = 0;
uint64_t tsc_start, tsc_end;
uint64_t tsc_khz;
@@ -129,7 +161,8 @@ int main(int argc, char *argv[])
uint64_t l1_tsc_freq = 0;
uint64_t l2_tsc_freq = 0;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
@@ -152,8 +185,13 @@ int main(int argc, char *argv[])
printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
index e7efb2b35f8b..c0d84827f736 100644
--- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
@@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
{
int actual_pages_2m;
- actual_pages_2m = vm_get_stat(vm, "pages_2m");
+ actual_pages_2m = vm_get_stat(vm, pages_2m);
TEST_ASSERT(actual_pages_2m == expected_pages_2m,
"Unexpected 2m page count. Expected %d, got %d",
@@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits)
{
int actual_splits;
- actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+ actual_splits = vm_get_stat(vm, nx_lpage_splits);
TEST_ASSERT(actual_splits == expected_splits,
"Unexpected NX huge page split count. Expected %d, got %d",
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index 698cb36989db..3eaa216b96c0 100644
--- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -14,10 +14,10 @@
#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
/*
- * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
- * 1 LOOP.
+ * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
+ * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
*/
-#define NUM_INSNS_PER_LOOP 3
+#define NUM_INSNS_PER_LOOP 6
/*
* Number of "extra" instructions that will be counted, i.e. the number of
@@ -29,10 +29,64 @@
/* Total number of instructions retired within the measured section. */
#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+/* Track which architectural events are supported by hardware. */
+static uint32_t hardware_pmu_arch_events;
static uint8_t kvm_pmu_version;
static bool kvm_has_perf_caps;
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
+struct kvm_intel_pmu_event {
+ struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
+};
+
+/*
+ * Wrap the array to appease the compiler, as the macros used to construct each
+ * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
+ * compiler often thinks the feature definitions aren't compile-time constants.
+ */
+static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
+{
+ const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
+ [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same as the
+ * general purpose architectural event. The fixed counter explicitly
+ * counts at the same frequency as the TSC, whereas the GP event counts
+ * at a fixed, but uarch specific, frequency. Bundle them here for
+ * simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL },
+ };
+
+ kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
+
+ return __intel_event_to_feature[idx];
+}
+
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
void *guest_code,
uint8_t pmu_version,
@@ -42,6 +96,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
vm = vm_create_with_one_vcpu(vcpu, guest_code);
sync_global_to_guest(vm, kvm_pmu_version);
+ sync_global_to_guest(vm, hardware_pmu_arch_events);
/*
* Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
@@ -98,22 +153,28 @@ static uint8_t guest_get_pmu_version(void)
* Sanity check that in all cases, the event doesn't count when it's disabled,
* and that KVM correctly emulates the write of an arbitrary value.
*/
-static void guest_assert_event_count(uint8_t idx,
- struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr)
+static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
{
uint64_t count;
count = _rdpmc(pmc);
- if (!this_pmu_has(event))
+ if (!(hardware_pmu_arch_events & BIT(idx)))
goto sanity_checks;
switch (idx) {
case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
- GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+ /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+ if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
+ GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ else
+ GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
break;
case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
- GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
+ /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+ if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
+ GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
+ else
+ GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
break;
case INTEL_ARCH_LLC_REFERENCES_INDEX:
case INTEL_ARCH_LLC_MISSES_INDEX:
@@ -123,10 +184,15 @@ static void guest_assert_event_count(uint8_t idx,
fallthrough;
case INTEL_ARCH_CPU_CYCLES_INDEX:
case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+ case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX:
+ case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX:
GUEST_ASSERT_NE(count, 0);
break;
case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
- GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ case INTEL_ARCH_TOPDOWN_RETIRING_INDEX:
+ __GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
+ "Expected top-down slots >= %u, got count = %lu",
+ NUM_INSNS_RETIRED, count);
break;
default:
break;
@@ -160,77 +226,46 @@ do { \
__asm__ __volatile__("wrmsr\n\t" \
" mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
"1:\n\t" \
+ FEP "enter $0, $0\n\t" \
clflush "\n\t" \
"mfence\n\t" \
+ "mov %[m], %%eax\n\t" \
+ FEP "leave\n\t" \
FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \
FEP "xor %%edx, %%edx\n\t" \
"wrmsr\n\t" \
:: "a"((uint32_t)_value), "d"(_value >> 32), \
- "c"(_msr), "D"(_msr) \
+ "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \
); \
} while (0)
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
do { \
- wrmsr(pmc_msr, 0); \
+ wrmsr(_pmc_msr, 0); \
\
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
else \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
\
- guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
+ guest_assert_event_count(_idx, _pmc, _pmc_msr); \
} while (0)
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr,
+static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
uint32_t ctrl_msr, uint64_t ctrl_msr_value)
{
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
if (is_forced_emulation_enabled)
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL \
-({ \
- struct kvm_x86_pmu_feature feature = {}; \
- \
- feature; \
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
- return !(*(u64 *)&event);
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
}
static void guest_test_arch_event(uint8_t idx)
{
- const struct {
- struct kvm_x86_pmu_feature gp_event;
- struct kvm_x86_pmu_feature fixed_event;
- } intel_event_to_feature[] = {
- [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
- [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
- /*
- * Note, the fixed counter for reference cycles is NOT the same
- * as the general purpose architectural event. The fixed counter
- * explicitly counts at the same frequency as the TSC, whereas
- * the GP event counts at a fixed, but uarch specific, frequency.
- * Bundle them here for simplicity.
- */
- [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
- [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
- };
-
uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint32_t pmu_version = guest_get_pmu_version();
/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
@@ -248,7 +283,7 @@ static void guest_test_arch_event(uint8_t idx)
else
base_pmc_msr = MSR_IA32_PERFCTR0;
- gp_event = intel_event_to_feature[idx].gp_event;
+ gp_event = intel_event_to_feature(idx).gp_event;
GUEST_ASSERT_EQ(idx, gp_event.f.bit);
GUEST_ASSERT(nr_gp_counters);
@@ -262,14 +297,14 @@ static void guest_test_arch_event(uint8_t idx)
if (guest_has_perf_global_ctrl)
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
- __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+ __guest_test_arch_event(idx, i, base_pmc_msr + i,
MSR_P6_EVNTSEL0 + i, eventsel);
}
if (!guest_has_perf_global_ctrl)
return;
- fixed_event = intel_event_to_feature[idx].fixed_event;
+ fixed_event = intel_event_to_feature(idx).fixed_event;
if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
return;
@@ -277,7 +312,7 @@ static void guest_test_arch_event(uint8_t idx)
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
- __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+ __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
MSR_CORE_PERF_FIXED_CTR0 + i,
MSR_CORE_PERF_GLOBAL_CTRL,
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
@@ -294,7 +329,7 @@ static void guest_test_arch_events(void)
}
static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
- uint8_t length, uint8_t unavailable_mask)
+ uint8_t length, uint32_t unavailable_mask)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -303,6 +338,9 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
if (!pmu_version)
return;
+ unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit,
+ X86_PROPERTY_PMU_EVENTS_MASK.lo_bit);
+
vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
pmu_version, perf_capabilities);
@@ -327,13 +365,13 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
- "Expected %s on " #insn "(0x%x), got vector %u", \
- expect_gp ? "#GP" : "no fault", msr, vector) \
+ "Expected %s on " #insn "(0x%x), got %s", \
+ expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \
#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
- __GUEST_ASSERT(val == expected_val, \
+ __GUEST_ASSERT(val == expected, \
"Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
- msr, expected_val, val);
+ msr, expected, val);
static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
uint64_t expected_val)
@@ -545,7 +583,6 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
static void test_intel_counters(void)
{
- uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
@@ -559,6 +596,26 @@ static void test_intel_counters(void)
};
/*
+ * To keep the total runtime reasonable, test only a handful of select,
+ * semi-arbitrary values for the mask of unavailable PMU events. Test
+ * 0 (all events available) and all ones (no events available) as well
+ * as alternating bit sequencues, e.g. to detect if KVM is checking the
+ * wrong bit(s).
+ */
+ const uint32_t unavailable_masks[] = {
+ 0x0,
+ 0xffffffffu,
+ 0xaaaaaaaau,
+ 0x55555555u,
+ 0xf0f0f0f0u,
+ 0x0f0f0f0fu,
+ 0xa0a0a0a0u,
+ 0x0a0a0a0au,
+ 0x50505050u,
+ 0x05050505u,
+ };
+
+ /*
* Test up to PMU v5, which is the current maximum version defined by
* Intel, i.e. is the last version that is guaranteed to be backwards
* compatible with KVM's existing behavior.
@@ -567,18 +624,26 @@ static void test_intel_counters(void)
/*
* Detect the existence of events that aren't supported by selftests.
- * This will (obviously) fail any time the kernel adds support for a
- * new event, but it's worth paying that price to keep the test fresh.
+ * This will (obviously) fail any time hardware adds support for a new
+ * event, but it's worth paying that price to keep the test fresh.
*/
- TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+ TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
"New architectural event(s) detected; please update this test (length = %u, mask = %x)",
- nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+ this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
+ this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
/*
- * Force iterating over known arch events regardless of whether or not
- * KVM/hardware supports a given event.
+ * Iterate over known arch events irrespective of KVM/hardware support
+ * to verify that KVM doesn't reject programming of events just because
+ * the *architectural* encoding is unsupported. Track which events are
+ * supported in hardware; the guest side will validate supported events
+ * count correctly, even if *enumeration* of the event is unsupported
+ * by KVM and/or isn't exposed to the guest.
*/
- nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
+ if (this_pmu_has(intel_event_to_feature(i).gp_event))
+ hardware_pmu_arch_events |= BIT(i);
+ }
for (v = 0; v <= max_pmu_version; v++) {
for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
@@ -587,16 +652,7 @@ static void test_intel_counters(void)
pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
v, perf_caps[i]);
- /*
- * To keep the total runtime reasonable, test every
- * possible non-zero, non-reserved bitmap combination
- * only with the native PMU version and the full bit
- * vector length.
- */
- if (v == pmu_version) {
- for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
- test_arch_events(v, perf_caps[i], nr_arch_events, k);
- }
+
/*
* Test single bits for all PMU version and lengths up
* the number of events +1 (to verify KVM doesn't do
@@ -604,12 +660,9 @@ static void test_intel_counters(void)
* host length). Explicitly test a mask of '0' and all
* ones i.e. all events being available and unavailable.
*/
- for (j = 0; j <= nr_arch_events + 1; j++) {
- test_arch_events(v, perf_caps[i], j, 0);
- test_arch_events(v, perf_caps[i], j, 0xff);
-
- for (k = 0; k < nr_arch_events; k++)
- test_arch_events(v, perf_caps[i], j, BIT(k));
+ for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
+ for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++)
+ test_arch_events(v, perf_caps[i], j, unavailable_masks[k]);
}
pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
index c15513cd74d1..1c5b7611db24 100644
--- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
@@ -214,8 +214,10 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
do { \
uint64_t br = pmc_results.branches_retired; \
uint64_t ir = pmc_results.instructions_retired; \
+ bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \
+ br >= NUM_BRANCHES : br == NUM_BRANCHES; \
\
- if (br && br != NUM_BRANCHES) \
+ if (br && !br_matched) \
pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
__func__, br, NUM_BRANCHES); \
TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
index 82a8d88b5338..1969f4ab9b28 100644
--- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
@@ -380,7 +380,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
pthread_t threads[KVM_MAX_VCPUS];
struct kvm_vm *vm;
- int memfd, i, r;
+ int memfd, i;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
@@ -428,11 +428,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
* should prevent the VM from being fully destroyed until the last
* reference to the guest_memfd is also put.
*/
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
close(memfd);
}
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
index 3fb967f40c6a..b238615196ad 100644
--- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
@@ -28,6 +28,7 @@
int kvm_fd;
u64 supported_vmsa_features;
bool have_sev_es;
+bool have_snp;
static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
{
@@ -83,6 +84,9 @@ void test_vm_types(void)
if (have_sev_es)
test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+ if (have_snp)
+ test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){});
+
test_init2_invalid(0, &(struct kvm_sev_init){},
"VM type is KVM_X86_DEFAULT_VM");
if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
@@ -138,15 +142,24 @@ int main(int argc, char *argv[])
"sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+ have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP);
+ TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)),
+ "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not",
+ kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM);
+
test_vm_types();
test_flags(KVM_X86_SEV_VM);
if (have_sev_es)
test_flags(KVM_X86_SEV_ES_VM);
+ if (have_snp)
+ test_flags(KVM_X86_SNP_VM);
test_features(KVM_X86_SEV_VM, 0);
if (have_sev_es)
test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+ if (have_snp)
+ test_features(KVM_X86_SNP_VM, supported_vmsa_features);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index a1a688e75266..86ad1c7d068f 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -16,6 +16,18 @@
#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+static void guest_snp_code(void)
+{
+ uint64_t sev_msr = rdmsr(MSR_AMD64_SEV);
+
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED);
+
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ vmgexit();
+}
+
static void guest_sev_es_code(void)
{
/* TODO: Check CPUID after GHCB-based hypercall support is added. */
@@ -27,7 +39,7 @@ static void guest_sev_es_code(void)
* force "termination" to signal "done" via the GHCB MSR protocol.
*/
wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
- __asm__ __volatile__("rep; vmmcall");
+ vmgexit();
}
static void guest_sev_code(void)
@@ -52,7 +64,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
bool bad = false;
for (i = 0; i < 4095; i++) {
if (from_host[i] != from_guest[i]) {
- printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
+ printf("mismatch at %u | %02hhx %02hhx\n",
+ i, from_host[i], from_guest[i]);
bad = true;
}
}
@@ -61,7 +74,7 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
abort();
}
-static void test_sync_vmsa(uint32_t policy)
+static void test_sync_vmsa(uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -71,7 +84,7 @@ static void test_sync_vmsa(uint32_t policy)
double x87val = M_PI;
struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
- vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
+ vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu);
gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
MEM_REGION_TEST_DATA);
hva = addr_gva2hva(vm, gva);
@@ -88,10 +101,10 @@ static void test_sync_vmsa(uint32_t policy)
: "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
vcpu_xsave_set(vcpu, &xsave);
- vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+ vm_sev_launch(vm, policy, NULL);
/* This page is shared, so make it decrypted. */
- memset(hva, 0, 4096);
+ memset(hva, 0, PAGE_SIZE);
vcpu_run(vcpu);
@@ -107,14 +120,12 @@ static void test_sync_vmsa(uint32_t policy)
kvm_vm_free(vm);
}
-static void test_sev(void *guest_code, uint64_t policy)
+static void test_sev(void *guest_code, uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
/* TODO: Validate the measurement is as expected. */
@@ -123,7 +134,7 @@ static void test_sev(void *guest_code, uint64_t policy)
for (;;) {
vcpu_run(vcpu);
- if (policy & SEV_POLICY_ES) {
+ if (is_sev_es_vm(vm)) {
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
"Wanted SYSTEM_EVENT, got %s",
exit_reason_str(vcpu->run->exit_reason));
@@ -160,16 +171,14 @@ static void guest_shutdown_code(void)
__asm__ __volatile__("ud2");
}
-static void test_sev_es_shutdown(void)
+static void test_sev_shutdown(uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- uint32_t type = KVM_X86_SEV_ES_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
- vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+ vm_sev_launch(vm, policy, NULL);
vcpu_run(vcpu);
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
@@ -179,27 +188,42 @@ static void test_sev_es_shutdown(void)
kvm_vm_free(vm);
}
-int main(int argc, char *argv[])
+static void test_sev_smoke(void *guest, uint32_t type, uint64_t policy)
{
const u64 xf_mask = XFEATURE_MASK_X87_AVX;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
- test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
- test_sev(guest_sev_code, 0);
+ if (type == KVM_X86_SNP_VM)
+ test_sev(guest, type, policy | SNP_POLICY_DBG);
+ else
+ test_sev(guest, type, policy | SEV_POLICY_NO_DBG);
+ test_sev(guest, type, policy);
- if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
- test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
- test_sev(guest_sev_es_code, SEV_POLICY_ES);
+ if (type == KVM_X86_SEV_VM)
+ return;
- test_sev_es_shutdown();
+ test_sev_shutdown(type, policy);
- if (kvm_has_cap(KVM_CAP_XCRS) &&
- (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
- test_sync_vmsa(0);
- test_sync_vmsa(SEV_POLICY_NO_DBG);
- }
+ if (kvm_has_cap(KVM_CAP_XCRS) &&
+ (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+ test_sync_vmsa(type, policy);
+ if (type == KVM_X86_SNP_VM)
+ test_sync_vmsa(type, policy | SNP_POLICY_DBG);
+ else
+ test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG);
}
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_ES))
+ test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_SNP))
+ test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy());
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
index 141b7fc0c965..f2c7a1c297e3 100644
--- a/tools/testing/selftests/kvm/x86/state_test.c
+++ b/tools/testing/selftests/kvm/x86/state_test.c
@@ -141,7 +141,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
if (this_cpu_has(X86_FEATURE_XSAVE)) {
uint64_t supported_xcr0 = this_cpu_supported_xcr0();
- uint8_t buffer[4096];
+ uint8_t buffer[PAGE_SIZE];
memset(buffer, 0xcc, sizeof(buffer));
diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
index 916e04248fbb..917b6066cfc1 100644
--- a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
@@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm)
x2apic_write_reg(APIC_ICR,
APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
GUEST_ASSERT(vintr_irq_called);
GUEST_ASSERT(intr_irq_called);
diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
index 57f157c06b39..1e5e564523b3 100644
--- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
@@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void)
wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
/* Enables interrupt in guest. */
- asm volatile("sti");
+ sti();
/* Let user space inject the first UCNA */
GUEST_SYNC(SYNC_FIRST_UCNA);
diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
index 9481cbcf284f..be7d72f3c029 100644
--- a/tools/testing/selftests/kvm/x86/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
@@ -85,7 +85,7 @@ int main(int argc, char *argv[])
regs.rcx = 1;
if (regs.rcx == 3)
regs.rcx = 8192;
- memset((void *)run + run->io.data_offset, 0xaa, 4096);
+ memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE);
vcpu_regs_set(vcpu, &regs);
}
diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
index 32b2794b78fe..8463a9956410 100644
--- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
@@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void)
data = test_rdmsr(MSR_GS_BASE);
GUEST_ASSERT(data == MSR_GS_BASE);
+ /* Access the MSRs again to ensure KVM has disabled interception.*/
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data != MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data != MSR_GS_BASE);
+
GUEST_DONE();
}
@@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
"Expected ucall state to be UCALL_SYNC.");
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
run_guest_then_process_ucall_done(vcpu);
}
diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
index fa512d033205..98cb6bdab3e6 100644
--- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
@@ -120,17 +120,17 @@ static void test_vmx_dirty_log(bool enable_ept)
* GPAs as the EPT enabled case.
*/
if (enable_ept) {
- prepare_eptp(vmx, vm, 0);
+ prepare_eptp(vmx, vm);
nested_map_memslot(vmx, vm, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE);
}
bmap = bitmap_zalloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
while (!done) {
- memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
+ memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
@@ -153,9 +153,9 @@ static void test_vmx_dirty_log(bool enable_ept)
}
TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+ TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
+ TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
break;
case UCALL_DONE:
done = true;
diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
index 3fd6eceab46f..2cae86d9d5e2 100644
--- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
@@ -110,7 +110,7 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
TEST_REQUIRE(host_cpu_is_intel);
- TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+ TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled());
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
get_set_sigalrm_vcpu(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
new file mode 100644
index 000000000000..cf1d2d1f2a8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test KVM's ability to save and restore nested state when the L1 guest
+ * is using 5-level paging and the L2 guest is using 4-level paging.
+ *
+ * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86:
+ * model canonical checks more precisely").
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define LA57_GS_BASE 0xff2bc0311fb00000ull
+
+static void l2_guest_code(void)
+{
+ /*
+ * Sync with L0 to trigger save/restore. After
+ * resuming, execute VMCALL to exit back to L1.
+ */
+ GUEST_SYNC(1);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 guest_cr4;
+ vm_paddr_t pml5_pa, pml4_pa;
+ u64 *pml5;
+ u64 exit_reason;
+
+ /* Set GS_BASE to a value that is only canonical with LA57. */
+ wrmsr(MSR_GS_BASE, LA57_GS_BASE);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE);
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Set up L2 with a 4-level page table by pointing its CR3 to
+ * L1's first PML4 table and clearing CR4.LA57. This creates
+ * the CR4.LA57 mismatch that exercises the bug.
+ */
+ pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK;
+ pml5 = (u64 *)pml5_pa;
+ pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK;
+ vmwrite(GUEST_CR3, pml4_pa);
+
+ guest_cr4 = vmreadz(GUEST_CR4);
+ guest_cr4 &= ~X86_CR4_LA57;
+ vmwrite(GUEST_CR4, guest_cr4);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ l1_guest_code(vmx_pages);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /*
+ * L1 needs to read its own PML5 table to set up L2. Identity map
+ * the PML5 table to facilitate this.
+ */
+ virt_map(vm, vm->pgd, vm->pgd, 1);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(uc.args[1] == stage,
+ "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]);
+ if (stage == 1) {
+ pr_info("L2 is active; performing save/restore.\n");
+ state = vcpu_save_state(vcpu);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
index a1f5ff45d518..7ff6f62e20a3 100644
--- a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
@@ -29,7 +29,7 @@ static union perf_capabilities {
u64 pebs_baseline:1;
u64 perf_metrics:1;
u64 pebs_output_pt_available:1;
- u64 anythread_deprecated:1;
+ u64 pebs_timing_info:1;
};
u64 capabilities;
} host_cap;
@@ -44,6 +44,7 @@ static const union perf_capabilities immutable_caps = {
.pebs_arch_reg = 1,
.pebs_format = -1,
.pebs_baseline = 1,
+ .pebs_timing_info = 1,
};
static const union perf_capabilities format_caps = {
@@ -56,8 +57,8 @@ static void guest_test_perf_capabilities_gp(uint64_t val)
uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
__GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP for value '0x%lx', got vector '0x%x'",
- val, vector);
+ "Expected #GP for value '0x%lx', got %s",
+ val, ex_str(vector));
}
static void guest_code(uint64_t current_val)
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index a76078a08ff8..ae4a4b6c05ca 100644
--- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data)
data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
- asm volatile("sti; hlt; cli");
+ safe_halt();
+ cli();
data->wake_count++;
}
}
@@ -255,7 +256,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
int nodes = 0;
time_t start_time, last_update, now;
time_t interval_secs = 1;
- int i, r;
+ int i;
int from, to;
unsigned long bit;
uint64_t hlt_count;
@@ -266,9 +267,8 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
delay_usecs);
/* Get set of first 64 numa nodes available */
- r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0, MPOL_F_MEMS_ALLOWED);
- TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
"(each 1-bit indicates node is present): %#lx\n",
@@ -465,6 +465,19 @@ int main(int argc, char *argv[])
cancel_join_vcpu_thread(threads[0], params[0].vcpu);
cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+ /*
+ * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
+ * then the number of HLT exits may be less than the number of HLTs
+ * that were executed, as Idle HLT elides the exit if the vCPU has an
+ * unmasked, pending IRQ (or NMI).
+ */
+ if (this_cpu_has(X86_FEATURE_IDLE_HLT))
+ TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
+ "HLT insns = %lu, HLT exits = %lu",
+ data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+ else
+ TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+
fprintf(stderr,
"Test successful after running for %d seconds.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
index 88bcca188799..3b4814c55722 100644
--- a/tools/testing/selftests/kvm/x86/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -18,7 +18,7 @@ struct xapic_vcpu {
static void xapic_guest_code(void)
{
- asm volatile("cli");
+ cli();
xapic_enable();
@@ -38,7 +38,7 @@ static void xapic_guest_code(void)
static void x2apic_guest_code(void)
{
- asm volatile("cli");
+ cli();
x2apic_enable();
@@ -120,8 +120,8 @@ static void test_icr(struct xapic_vcpu *x)
__test_icr(x, icr | i);
/*
- * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of
- * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff.
+ * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use
+ * vector 0xff.
*/
icr = APIC_INT_ASSERT | 0xff;
for (i = 0; i < 0xff; i++) {
diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
index c8a5c5e51661..d038c1571729 100644
--- a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
@@ -81,13 +81,13 @@ static void guest_code(void)
vector = xsetbv_safe(0, XFEATURE_MASK_FP);
__GUEST_ASSERT(!vector,
- "Expected success on XSETBV(FP), got vector '0x%x'",
- vector);
+ "Expected success on XSETBV(FP), got %s",
+ ex_str(vector));
vector = xsetbv_safe(0, supported_xcr0);
__GUEST_ASSERT(!vector,
- "Expected success on XSETBV(0x%lx), got vector '0x%x'",
- supported_xcr0, vector);
+ "Expected success on XSETBV(0x%lx), got %s",
+ supported_xcr0, ex_str(vector));
for (i = 0; i < 64; i++) {
if (supported_xcr0 & BIT_ULL(i))
@@ -95,8 +95,8 @@ static void guest_code(void)
vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
__GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
- BIT_ULL(i), supported_xcr0, vector);
+ "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s",
+ BIT_ULL(i), supported_xcr0, ex_str(vector));
}
GUEST_DONE();
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index a59b3c799bb2..23909b501ac2 100644
--- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -191,10 +191,7 @@ static void guest_code(void)
struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
int i;
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
/* Trigger an interrupt injection */
GUEST_SYNC(TEST_INJECT_VECTOR);
@@ -550,15 +547,9 @@ int main(int argc, char *argv[])
int irq_fd[2] = { -1, -1 };
if (do_eventfd_tests) {
- irq_fd[0] = eventfd(0, 0);
- irq_fd[1] = eventfd(0, 0);
+ irq_fd[0] = kvm_new_eventfd();
+ irq_fd[1] = kvm_new_eventfd();
- /* Unexpected, but not a KVM failure */
- if (irq_fd[0] == -1 || irq_fd[1] == -1)
- do_evtchn_tests = do_eventfd_tests = false;
- }
-
- if (do_eventfd_tests) {
irq_routes.info.nr = 2;
irq_routes.entries[0].gsi = 32;
@@ -575,15 +566,8 @@ int main(int argc, char *argv[])
vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
- struct kvm_irqfd ifd = { };
-
- ifd.fd = irq_fd[0];
- ifd.gsi = 32;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
-
- ifd.fd = irq_fd[1];
- ifd.gsi = 33;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
+ kvm_assign_irqfd(vm, 32, irq_fd[0]);
+ kvm_assign_irqfd(vm, 33, irq_fd[1]);
struct sigaction sa = { };
sa.sa_handler = handle_alrm;
diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
index 470203a7cd73..a820329cae0d 100644
--- a/tools/testing/selftests/landlock/.gitignore
+++ b/tools/testing/selftests/landlock/.gitignore
@@ -1,2 +1,5 @@
/*_test
+/sandbox-and-launch
/true
+/wait-pipe
+/wait-pipe-sandbox
diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
index 5cb0828f0514..044b83bde16e 100644
--- a/tools/testing/selftests/landlock/Makefile
+++ b/tools/testing/selftests/landlock/Makefile
@@ -4,13 +4,17 @@
CFLAGS += -Wall -O2 $(KHDR_INCLUDES)
-LOCAL_HDRS += common.h
+LOCAL_HDRS += $(wildcard *.h)
src_test := $(wildcard *_test.c)
TEST_GEN_PROGS := $(src_test:.c=)
-TEST_GEN_PROGS_EXTENDED := true sandbox-and-launch wait-pipe
+TEST_GEN_PROGS_EXTENDED := \
+ true \
+ sandbox-and-launch \
+ wait-pipe \
+ wait-pipe-sandbox
# Short targets:
$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
new file mode 100644
index 000000000000..44eb433e9666
--- /dev/null
+++ b/tools/testing/selftests/landlock/audit.h
@@ -0,0 +1,478 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock audit helpers
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/audit.h>
+#include <linux/limits.h>
+#include <linux/netlink.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+#define REGEX_LANDLOCK_PREFIX "^audit([0-9.:]\\+): domain=\\([0-9a-f]\\+\\)"
+
+struct audit_filter {
+ __u32 record_type;
+ size_t exe_len;
+ char exe[PATH_MAX];
+};
+
+struct audit_message {
+ struct nlmsghdr header;
+ union {
+ struct audit_status status;
+ struct audit_features features;
+ struct audit_rule_data rule;
+ struct nlmsgerr err;
+ char data[PATH_MAX + 200];
+ };
+};
+
+static const struct timeval audit_tv_dom_drop = {
+ /*
+ * Because domain deallocation is tied to asynchronous credential
+ * freeing, receiving such event may take some time. In practice,
+ * on a small VM, it should not exceed 100k usec, but let's wait up
+ * to 1 second to be safe.
+ */
+ .tv_sec = 1,
+};
+
+static const struct timeval audit_tv_default = {
+ .tv_usec = 1,
+};
+
+static int audit_send(const int fd, const struct audit_message *const msg)
+{
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+ int ret;
+
+ do {
+ ret = sendto(fd, msg, msg->header.nlmsg_len, 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0)
+ return -errno;
+
+ if (ret != msg->header.nlmsg_len)
+ return -E2BIG;
+
+ return 0;
+}
+
+static int audit_recv(const int fd, struct audit_message *msg)
+{
+ struct sockaddr_nl addr;
+ socklen_t addrlen = sizeof(addr);
+ struct audit_message msg_tmp;
+ int err;
+
+ if (!msg)
+ msg = &msg_tmp;
+
+ do {
+ err = recvfrom(fd, msg, sizeof(*msg), 0,
+ (struct sockaddr *)&addr, &addrlen);
+ } while (err < 0 && errno == EINTR);
+
+ if (err < 0)
+ return -errno;
+
+ if (addrlen != sizeof(addr) || addr.nl_pid != 0)
+ return -EINVAL;
+
+ /* Checks Netlink error or end of messages. */
+ if (msg->header.nlmsg_type == NLMSG_ERROR)
+ return msg->err.error;
+
+ return 0;
+}
+
+static int audit_request(const int fd,
+ const struct audit_message *const request,
+ struct audit_message *reply)
+{
+ struct audit_message msg_tmp;
+ bool first_reply = true;
+ int err;
+
+ err = audit_send(fd, request);
+ if (err)
+ return err;
+
+ if (!reply)
+ reply = &msg_tmp;
+
+ do {
+ if (first_reply)
+ first_reply = false;
+ else
+ reply = &msg_tmp;
+
+ err = audit_recv(fd, reply);
+ if (err)
+ return err;
+ } while (reply->header.nlmsg_type != NLMSG_ERROR &&
+ reply->err.msg.nlmsg_type != request->header.nlmsg_type);
+
+ return reply->err.error;
+}
+
+static int audit_filter_exe(const int audit_fd,
+ const struct audit_filter *const filter,
+ const __u16 type)
+{
+ struct audit_message msg = {
+ .header = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.rule)) +
+ NLMSG_ALIGN(filter->exe_len),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .rule = {
+ .flags = AUDIT_FILTER_EXCLUDE,
+ .action = AUDIT_NEVER,
+ .field_count = 1,
+ .fields[0] = filter->record_type,
+ .fieldflags[0] = AUDIT_NOT_EQUAL,
+ .values[0] = filter->exe_len,
+ .buflen = filter->exe_len,
+ }
+ };
+
+ if (filter->record_type != AUDIT_EXE)
+ return -EINVAL;
+
+ memcpy(msg.rule.buf, filter->exe, filter->exe_len);
+ return audit_request(audit_fd, &msg, NULL);
+}
+
+static int audit_filter_drop(const int audit_fd, const __u16 type)
+{
+ struct audit_message msg = {
+ .header = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.rule)),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .rule = {
+ .flags = AUDIT_FILTER_EXCLUDE,
+ .action = AUDIT_NEVER,
+ .field_count = 1,
+ .fields[0] = AUDIT_MSGTYPE,
+ .fieldflags[0] = AUDIT_NOT_EQUAL,
+ .values[0] = AUDIT_LANDLOCK_DOMAIN,
+ }
+ };
+
+ return audit_request(audit_fd, &msg, NULL);
+}
+
+static int audit_set_status(int fd, __u32 key, __u32 val)
+{
+ const struct audit_message msg = {
+ .header = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.status)),
+ .nlmsg_type = AUDIT_SET,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ },
+ .status = {
+ .mask = key,
+ .enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+ .pid = key == AUDIT_STATUS_PID ? val : 0,
+ }
+ };
+
+ return audit_request(fd, &msg, NULL);
+}
+
+/* Returns a pointer to the last filled character of @dst, which is `\0`. */
+static __maybe_unused char *regex_escape(const char *const src, char *dst,
+ size_t dst_size)
+{
+ char *d = dst;
+
+ for (const char *s = src; *s; s++) {
+ switch (*s) {
+ case '$':
+ case '*':
+ case '.':
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ if (d >= dst + dst_size - 2)
+ return (char *)-ENOMEM;
+
+ *d++ = '\\';
+ *d++ = *s;
+ break;
+ default:
+ if (d >= dst + dst_size - 1)
+ return (char *)-ENOMEM;
+
+ *d++ = *s;
+ }
+ }
+ if (d >= dst + dst_size - 1)
+ return (char *)-ENOMEM;
+
+ *d = '\0';
+ return d;
+}
+
+/*
+ * @domain_id: The domain ID extracted from the audit message (if the first part
+ * of @pattern is REGEX_LANDLOCK_PREFIX). It is set to 0 if the domain ID is
+ * not found.
+ */
+static int audit_match_record(int audit_fd, const __u16 type,
+ const char *const pattern, __u64 *domain_id)
+{
+ struct audit_message msg;
+ int ret, err = 0;
+ bool matches_record = !type;
+ regmatch_t matches[2];
+ regex_t regex;
+
+ ret = regcomp(&regex, pattern, 0);
+ if (ret)
+ return -EINVAL;
+
+ do {
+ memset(&msg, 0, sizeof(msg));
+ err = audit_recv(audit_fd, &msg);
+ if (err)
+ goto out;
+
+ if (msg.header.nlmsg_type == type)
+ matches_record = true;
+ } while (!matches_record);
+
+ ret = regexec(&regex, msg.data, ARRAY_SIZE(matches), matches, 0);
+ if (ret) {
+ printf("DATA: %s\n", msg.data);
+ printf("ERROR: no match for pattern: %s\n", pattern);
+ err = -ENOENT;
+ }
+
+ if (domain_id) {
+ *domain_id = 0;
+ if (matches[1].rm_so != -1) {
+ int match_len = matches[1].rm_eo - matches[1].rm_so;
+ /* The maximal characters of a 2^64 hexadecimal number is 17. */
+ char dom_id[18];
+
+ if (match_len > 0 && match_len < sizeof(dom_id)) {
+ memcpy(dom_id, msg.data + matches[1].rm_so,
+ match_len);
+ dom_id[match_len] = '\0';
+ if (domain_id)
+ *domain_id = strtoull(dom_id, NULL, 16);
+ }
+ }
+ }
+
+out:
+ regfree(&regex);
+ return err;
+}
+
+static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid,
+ __u64 *domain_id)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " status=allocated mode=enforcing pid=%d uid=[0-9]\\+"
+ " exe=\"[^\"]\\+\" comm=\".*_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, pid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
+ domain_id);
+}
+
+static int __maybe_unused matches_log_domain_deallocated(
+ int audit_fd, unsigned int num_denials, __u64 *domain_id)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " status=deallocated denials=%u$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len = snprintf(log_match, sizeof(log_match), log_template,
+ num_denials);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
+ domain_id);
+}
+
+struct audit_records {
+ size_t access;
+ size_t domain;
+};
+
+static int audit_count_records(int audit_fd, struct audit_records *records)
+{
+ struct audit_message msg;
+ int err;
+
+ records->access = 0;
+ records->domain = 0;
+
+ do {
+ memset(&msg, 0, sizeof(msg));
+ err = audit_recv(audit_fd, &msg);
+ if (err) {
+ if (err == -EAGAIN)
+ return 0;
+ else
+ return err;
+ }
+
+ switch (msg.header.nlmsg_type) {
+ case AUDIT_LANDLOCK_ACCESS:
+ records->access++;
+ break;
+ case AUDIT_LANDLOCK_DOMAIN:
+ records->domain++;
+ break;
+ }
+ } while (true);
+
+ return 0;
+}
+
+static int audit_init(void)
+{
+ int fd, err;
+
+ fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+ if (fd < 0)
+ return -errno;
+
+ err = audit_set_status(fd, AUDIT_STATUS_ENABLED, 1);
+ if (err)
+ return err;
+
+ err = audit_set_status(fd, AUDIT_STATUS_PID, getpid());
+ if (err)
+ return err;
+
+ /* Sets a timeout for negative tests. */
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default,
+ sizeof(audit_tv_default));
+ if (err)
+ return -errno;
+
+ return fd;
+}
+
+static int audit_init_filter_exe(struct audit_filter *filter, const char *path)
+{
+ char *absolute_path = NULL;
+
+ /* It is assume that there is not already filtering rules. */
+ filter->record_type = AUDIT_EXE;
+ if (!path) {
+ int ret = readlink("/proc/self/exe", filter->exe,
+ sizeof(filter->exe) - 1);
+ if (ret < 0)
+ return -errno;
+
+ filter->exe_len = ret;
+ return 0;
+ }
+
+ absolute_path = realpath(path, NULL);
+ if (!absolute_path)
+ return -errno;
+
+ /* No need for the terminating NULL byte. */
+ filter->exe_len = strlen(absolute_path);
+ if (filter->exe_len > sizeof(filter->exe))
+ return -E2BIG;
+
+ memcpy(filter->exe, absolute_path, filter->exe_len);
+ free(absolute_path);
+ return 0;
+}
+
+static int audit_cleanup(int audit_fd, struct audit_filter *filter)
+{
+ struct audit_filter new_filter;
+
+ if (audit_fd < 0 || !filter) {
+ int err;
+
+ /*
+ * Simulates audit_init_with_exe_filter() when called from
+ * FIXTURE_TEARDOWN_PARENT().
+ */
+ audit_fd = audit_init();
+ if (audit_fd < 0)
+ return audit_fd;
+
+ filter = &new_filter;
+ err = audit_init_filter_exe(filter, NULL);
+ if (err)
+ return err;
+ }
+
+ /* Filters might not be in place. */
+ audit_filter_exe(audit_fd, filter, AUDIT_DEL_RULE);
+ audit_filter_drop(audit_fd, AUDIT_DEL_RULE);
+
+ /*
+ * Because audit_cleanup() might not be called by the test auditd
+ * process, it might not be possible to explicitly set it. Anyway,
+ * AUDIT_STATUS_ENABLED will implicitly be set to 0 when the auditd
+ * process will exit.
+ */
+ return close(audit_fd);
+}
+
+static int audit_init_with_exe_filter(struct audit_filter *filter)
+{
+ int fd, err;
+
+ fd = audit_init();
+ if (fd < 0)
+ return fd;
+
+ err = audit_init_filter_exe(filter, NULL);
+ if (err)
+ return err;
+
+ err = audit_filter_exe(fd, filter, AUDIT_ADD_RULE);
+ if (err)
+ return err;
+
+ return fd;
+}
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
new file mode 100644
index 000000000000..46d02d49835a
--- /dev/null
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Audit
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/landlock.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "audit.h"
+#include "common.h"
+
+static int matches_log_signal(struct __test_metadata *const _metadata,
+ int audit_fd, const pid_t opid, __u64 *domain_id)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=scope\\.signal opid=%d ocomm=\"audit_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, opid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ domain_id);
+}
+
+FIXTURE(audit)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd)
+ {
+ const char *error_msg;
+
+ /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */
+ if (self->audit_fd == -EEXIST)
+ error_msg = "socket already in use (e.g. auditd)";
+ else
+ error_msg = strerror(-self->audit_fd);
+ TH_LOG("Failed to initialize audit: %s", error_msg);
+ }
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+FIXTURE_TEARDOWN(audit)
+{
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+TEST_F(audit, layers)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int status, ruleset_fd, i;
+ __u64(*domain_stack)[16];
+ __u64 prev_dom = 3;
+ pid_t child;
+
+ domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, domain_stack);
+ memset(domain_stack, 0, sizeof(*domain_stack));
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) {
+ __u64 denial_dom = 1;
+ __u64 allocated_dom = 2;
+
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+
+ /* Creates a denial to get the domain ID. */
+ EXPECT_EQ(-1, kill(getppid(), 0));
+ EXPECT_EQ(EPERM, errno);
+ EXPECT_EQ(0,
+ matches_log_signal(_metadata, self->audit_fd,
+ getppid(), &denial_dom));
+ EXPECT_EQ(0, matches_log_domain_allocated(
+ self->audit_fd, getpid(),
+ &allocated_dom));
+ EXPECT_NE(denial_dom, 1);
+ EXPECT_NE(denial_dom, 0);
+ EXPECT_EQ(denial_dom, allocated_dom);
+
+ /* Checks that the new domain is younger than the previous one. */
+ EXPECT_GT(allocated_dom, prev_dom);
+ prev_dom = allocated_dom;
+ (*domain_stack)[i] = allocated_dom;
+ }
+
+ /* Checks that we reached the maximum number of layers. */
+ EXPECT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(E2BIG, errno);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0,
+ audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ /* Purges log from deallocated domains. */
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
+ for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) {
+ __u64 deallocated_dom = 2;
+
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
+ &deallocated_dom));
+ EXPECT_EQ((*domain_stack)[i], deallocated_dom)
+ {
+ TH_LOG("Failed to match domain %llx (#%d)",
+ (*domain_stack)[i], i);
+ }
+ }
+ EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack)));
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default, sizeof(audit_tv_default)));
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+struct thread_data {
+ pid_t parent_pid;
+ int ruleset_fd, pipe_child, pipe_parent;
+};
+
+static void *thread_audit_test(void *arg)
+{
+ const struct thread_data *data = (struct thread_data *)arg;
+ uintptr_t err = 0;
+ char buffer;
+
+ /* TGID and TID are different for a second thread. */
+ if (getpid() == gettid()) {
+ err = 1;
+ goto out;
+ }
+
+ if (landlock_restrict_self(data->ruleset_fd, 0)) {
+ err = 2;
+ goto out;
+ }
+
+ if (close(data->ruleset_fd)) {
+ err = 3;
+ goto out;
+ }
+
+ /* Creates a denial to get the domain ID. */
+ if (kill(data->parent_pid, 0) != -1) {
+ err = 4;
+ goto out;
+ }
+
+ if (EPERM != errno) {
+ err = 5;
+ goto out;
+ }
+
+ /* Signals the parent to read denial logs. */
+ if (write(data->pipe_child, ".", 1) != 1) {
+ err = 6;
+ goto out;
+ }
+
+ /* Waits for the parent to update audit filters. */
+ if (read(data->pipe_parent, &buffer, 1) != 1) {
+ err = 7;
+ goto out;
+ }
+
+out:
+ close(data->pipe_child);
+ close(data->pipe_parent);
+ return (void *)err;
+}
+
+/* Checks that the PID tied to a domain is not a TID but the TGID. */
+TEST_F(audit, thread)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ __u64 denial_dom = 1;
+ __u64 allocated_dom = 2;
+ __u64 deallocated_dom = 3;
+ pthread_t thread;
+ int pipe_child[2], pipe_parent[2];
+ char buffer;
+ struct thread_data child_data;
+
+ child_data.parent_pid = getppid();
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ child_data.pipe_child = pipe_child[1];
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ child_data.pipe_parent = pipe_parent[0];
+ child_data.ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, child_data.ruleset_fd);
+
+ /* TGID and TID are the same for the initial thread . */
+ EXPECT_EQ(getpid(), gettid());
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test,
+ &child_data));
+
+ /* Waits for the child to generate a denial. */
+ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ /* Matches the signal log to get the domain ID. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ child_data.parent_pid, &denial_dom));
+ EXPECT_NE(denial_dom, 1);
+ EXPECT_NE(denial_dom, 0);
+
+ EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(),
+ &allocated_dom));
+ EXPECT_EQ(denial_dom, allocated_dom);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ /* Signals the thread to exit, which will generate a domain deallocation. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, pthread_join(thread, NULL));
+
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
+ &deallocated_dom));
+ EXPECT_EQ(denial_dom, deallocated_dom);
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default, sizeof(audit_tv_default)));
+}
+
+FIXTURE(audit_flags)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+ __u64 *domain_id;
+};
+
+FIXTURE_VARIANT(audit_flags)
+{
+ const int restrict_flags;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, default) {
+ /* clang-format on */
+ .restrict_flags = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, same_exec_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, subdomains_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_flags, cross_exec_on) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+};
+
+FIXTURE_SETUP(audit_flags)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd)
+ {
+ const char *error_msg;
+
+ /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */
+ if (self->audit_fd == -EEXIST)
+ error_msg = "socket already in use (e.g. auditd)";
+ else
+ error_msg = strerror(-self->audit_fd);
+ TH_LOG("Failed to initialize audit: %s", error_msg);
+ }
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ self->domain_id = mmap(NULL, sizeof(*self->domain_id),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, self->domain_id);
+ /* Domain IDs are greater or equal to 2^32. */
+ *self->domain_id = 1;
+}
+
+FIXTURE_TEARDOWN(audit_flags)
+{
+ EXPECT_EQ(0, munmap(self->domain_id, sizeof(*self->domain_id)));
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+TEST_F(audit_flags, signal)
+{
+ int status;
+ pid_t child;
+ struct audit_records records;
+ __u64 deallocated_dom = 2;
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int ruleset_fd;
+
+ /* Add filesystem restrictions. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd,
+ variant->restrict_flags));
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* First signal checks to test log entries. */
+ EXPECT_EQ(-1, kill(getppid(), 0));
+ EXPECT_EQ(EPERM, errno);
+
+ if (variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ EXPECT_EQ(-EAGAIN, matches_log_signal(
+ _metadata, self->audit_fd,
+ getppid(), self->domain_id));
+ EXPECT_EQ(*self->domain_id, 1);
+ } else {
+ __u64 allocated_dom = 3;
+
+ EXPECT_EQ(0, matches_log_signal(
+ _metadata, self->audit_fd,
+ getppid(), self->domain_id));
+
+ /* Checks domain information records. */
+ EXPECT_EQ(0, matches_log_domain_allocated(
+ self->audit_fd, getpid(),
+ &allocated_dom));
+ EXPECT_NE(*self->domain_id, 1);
+ EXPECT_NE(*self->domain_id, 0);
+ EXPECT_EQ(*self->domain_id, allocated_dom);
+ }
+
+ /* Second signal checks to test audit_count_records(). */
+ EXPECT_EQ(-1, kill(getppid(), 0));
+ EXPECT_EQ(EPERM, errno);
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ if (variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ EXPECT_EQ(0, records.access);
+ } else {
+ EXPECT_EQ(1, records.access);
+ }
+ EXPECT_EQ(0, records.domain);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0,
+ audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ if (variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
+ EXPECT_EQ(-EAGAIN,
+ matches_log_domain_deallocated(self->audit_fd, 0,
+ &deallocated_dom));
+ EXPECT_EQ(deallocated_dom, 2);
+ } else {
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop,
+ sizeof(audit_tv_dom_drop)));
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 2,
+ &deallocated_dom));
+ EXPECT_NE(deallocated_dom, 2);
+ EXPECT_NE(deallocated_dom, 0);
+ EXPECT_EQ(deallocated_dom, *self->domain_id);
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default,
+ sizeof(audit_tv_default)));
+ }
+}
+
+static int matches_log_fs_read_root(int audit_fd)
+{
+ return audit_match_record(
+ audit_fd, AUDIT_LANDLOCK_ACCESS,
+ REGEX_LANDLOCK_PREFIX
+ " blockers=fs\\.read_dir path=\"/\" dev=\"[^\"]\\+\" ino=[0-9]\\+$",
+ NULL);
+}
+
+FIXTURE(audit_exec)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_VARIANT(audit_exec)
+{
+ const int restrict_flags;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, default) {
+ /* clang-format on */
+ .restrict_flags = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, same_exec_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, subdomains_off) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, cross_exec_on) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit_exec, subdomains_off_and_cross_exec_on) {
+ /* clang-format on */
+ .restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON,
+};
+
+FIXTURE_SETUP(audit_exec)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ self->audit_fd = audit_init();
+ EXPECT_LE(0, self->audit_fd)
+ {
+ const char *error_msg;
+
+ /* kill "$(auditctl -s | sed -ne 's/^pid \([0-9]\+\)$/\1/p')" */
+ if (self->audit_fd == -EEXIST)
+ error_msg = "socket already in use (e.g. auditd)";
+ else
+ error_msg = strerror(-self->audit_fd);
+ TH_LOG("Failed to initialize audit: %s", error_msg);
+ }
+
+ /* Applies test filter for the bin_wait_pipe_sandbox program. */
+ EXPECT_EQ(0, audit_init_filter_exe(&self->audit_filter,
+ bin_wait_pipe_sandbox));
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_ADD_RULE));
+
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+FIXTURE_TEARDOWN(audit_exec)
+{
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, close(self->audit_fd));
+}
+
+TEST_F(audit_exec, signal_and_open)
+{
+ struct audit_records records;
+ int pipe_child[2], pipe_parent[2];
+ char buf_parent;
+ pid_t child;
+ int status;
+
+ ASSERT_EQ(0, pipe2(pipe_child, 0));
+ ASSERT_EQ(0, pipe2(pipe_parent, 0));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const struct landlock_ruleset_attr layer1 = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ char pipe_child_str[12], pipe_parent_str[12];
+ char *const argv[] = { (char *)bin_wait_pipe_sandbox,
+ pipe_child_str, pipe_parent_str, NULL };
+ int ruleset_fd;
+
+ /* Passes the pipe FDs to the executed binary. */
+ EXPECT_EQ(0, close(pipe_child[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ snprintf(pipe_child_str, sizeof(pipe_child_str), "%d",
+ pipe_child[1]);
+ snprintf(pipe_parent_str, sizeof(pipe_parent_str), "%d",
+ pipe_parent[0]);
+
+ ruleset_fd =
+ landlock_create_ruleset(&layer1, sizeof(layer1), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create a ruleset");
+ _exit(1);
+ }
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ if (landlock_restrict_self(ruleset_fd,
+ variant->restrict_flags)) {
+ perror("Failed to restrict self");
+ _exit(1);
+ }
+ close(ruleset_fd);
+
+ ASSERT_EQ(0, execve(argv[0], argv, NULL))
+ {
+ TH_LOG("Failed to execute \"%s\": %s", argv[0],
+ strerror(errno));
+ };
+ _exit(1);
+ return;
+ }
+
+ EXPECT_EQ(0, close(pipe_child[1]));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ /* Waits for the child. */
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that there was no denial until now. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ /*
+ * Wait for the child to do a first denied action by layer1 and
+ * sandbox itself with layer2.
+ */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that the audit record only matches the child. */
+ if (variant->restrict_flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON) {
+ /* Matches the current domain. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+ }
+
+ /* Checks that we didn't miss anything. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ /*
+ * Wait for the child to do a second denied action by layer1 and
+ * layer2, and sandbox itself with layer3.
+ */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests that the audit record only matches the child. */
+ if (variant->restrict_flags & LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON) {
+ /* Matches the current domain. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+ }
+
+ if (!(variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+ /* Matches the child domain. */
+ EXPECT_EQ(0, matches_log_fs_read_root(self->audit_fd));
+ }
+
+ /* Checks that we didn't miss anything. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+
+ /* Waits for the child to terminate. */
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(0, WEXITSTATUS(status));
+
+ /* Tests that the audit record only matches the child. */
+ if (!(variant->restrict_flags &
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
+ /*
+ * Matches the child domains, which tests that the
+ * llcred->domain_exec bitmask is correctly updated with a new
+ * domain.
+ */
+ EXPECT_EQ(0, matches_log_fs_read_root(self->audit_fd));
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ getpid(), NULL));
+ }
+
+ /* Checks that we didn't miss anything. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 1bc16fde2e8a..7b69002239d7 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -76,7 +76,7 @@ TEST(abi_version)
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
- ASSERT_EQ(6, landlock_create_ruleset(NULL, 0,
+ ASSERT_EQ(7, landlock_create_ruleset(NULL, 0,
LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
@@ -98,10 +98,54 @@ TEST(abi_version)
ASSERT_EQ(EINVAL, errno);
}
+/*
+ * Old source trees might not have the set of Kselftest fixes related to kernel
+ * UAPI headers.
+ */
+#ifndef LANDLOCK_CREATE_RULESET_ERRATA
+#define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1)
+#endif
+
+TEST(errata)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ int errata;
+
+ errata = landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_ERRATA);
+ /* The errata bitmask will not be backported to tests. */
+ ASSERT_LE(0, errata);
+ TH_LOG("errata: 0x%x", errata);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1,
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(
+ NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION |
+ LANDLOCK_CREATE_RULESET_ERRATA));
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_ERRATA |
+ 1 << 31));
+ ASSERT_EQ(EINVAL, errno);
+}
+
/* Tests ordering of syscall argument checks. */
TEST(create_ruleset_checks_ordering)
{
- const int last_flag = LANDLOCK_CREATE_RULESET_VERSION;
+ const int last_flag = LANDLOCK_CREATE_RULESET_ERRATA;
const int invalid_flag = last_flag << 1;
int ruleset_fd;
const struct landlock_ruleset_attr ruleset_attr = {
@@ -233,6 +277,88 @@ TEST(restrict_self_checks_ordering)
ASSERT_EQ(0, close(ruleset_fd));
}
+TEST(restrict_self_fd)
+{
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(-1, landlock_restrict_self(fd, 0));
+ EXPECT_EQ(EBADFD, errno);
+}
+
+TEST(restrict_self_fd_flags)
+{
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ /*
+ * LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF accepts -1 but not any file
+ * descriptor.
+ */
+ EXPECT_EQ(-1, landlock_restrict_self(
+ fd, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADFD, errno);
+}
+
+TEST(restrict_self_flags)
+{
+ const __u32 last_flag = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF;
+
+ /* Tests invalid flag combinations. */
+
+ EXPECT_EQ(-1, landlock_restrict_self(-1, last_flag << 1));
+ EXPECT_EQ(EINVAL, errno);
+
+ EXPECT_EQ(-1, landlock_restrict_self(-1, -1));
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Tests valid flag combinations. */
+
+ EXPECT_EQ(-1, landlock_restrict_self(-1, 0));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1,
+ landlock_restrict_self(
+ -1,
+ LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1,
+ landlock_restrict_self(
+ -1,
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
+ LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(-1,
+ landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON));
+ EXPECT_EQ(EBADF, errno);
+
+ /* Tests with an invalid ruleset_fd. */
+
+ EXPECT_EQ(-1, landlock_restrict_self(
+ -2, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+ EXPECT_EQ(EBADF, errno);
+
+ EXPECT_EQ(0, landlock_restrict_self(
+ -1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
+}
+
TEST(ruleset_fd_io)
{
struct landlock_ruleset_attr ruleset_attr = {
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index a604ea5d8297..230b75f6015b 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -11,25 +11,23 @@
#include <errno.h>
#include <linux/securebits.h>
#include <sys/capability.h>
+#include <sys/prctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "wrappers.h"
#define TMP_DIR "tmp"
-#ifndef __maybe_unused
-#define __maybe_unused __attribute__((__unused__))
-#endif
-
/* TEST_F_FORK() should not be used for new tests. */
#define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name)
static const char bin_sandbox_and_launch[] = "./sandbox-and-launch";
static const char bin_wait_pipe[] = "./wait-pipe";
+static const char bin_wait_pipe_sandbox[] = "./wait-pipe-sandbox";
static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
{
@@ -37,10 +35,12 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
/* Only these three capabilities are useful for the tests. */
const cap_value_t caps[] = {
/* clang-format off */
+ CAP_AUDIT_CONTROL,
CAP_DAC_OVERRIDE,
CAP_MKNOD,
CAP_NET_ADMIN,
CAP_NET_BIND_SERVICE,
+ CAP_SETUID,
CAP_SYS_ADMIN,
CAP_SYS_CHROOT,
/* clang-format on */
@@ -204,9 +204,26 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd)
}
}
+static void __maybe_unused
+drop_access_rights(struct __test_metadata *const _metadata,
+ const struct landlock_ruleset_attr *const ruleset_attr)
+{
+ int ruleset_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(ruleset_attr, sizeof(*ruleset_attr), 0);
+ EXPECT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
struct protocol_variant {
int domain;
int type;
+ int protocol;
};
struct service_fixture {
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
index 29af19c4e9f9..8fe9b461b1fd 100644
--- a/tools/testing/selftests/landlock/config
+++ b/tools/testing/selftests/landlock/config
@@ -1,8 +1,12 @@
+CONFIG_AF_UNIX_OOB=y
+CONFIG_AUDIT=y
CONFIG_CGROUPS=y
CONFIG_CGROUP_SCHED=y
CONFIG_INET=y
CONFIG_IPV6=y
CONFIG_KEYS=y
+CONFIG_MPTCP=y
+CONFIG_MPTCP_IPV6=y
CONFIG_NET=y
CONFIG_NET_NS=y
CONFIG_OVERLAY_FS=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index aa6f2c1cbec7..eee814e09dd7 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -41,6 +41,7 @@
#define _ASM_GENERIC_FCNTL_H
#include <linux/fcntl.h>
+#include "audit.h"
#include "common.h"
#ifndef renameat2
@@ -1831,6 +1832,46 @@ TEST_F_FORK(layout1, release_inodes)
ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY));
}
+/*
+ * This test checks that a rule on a directory used as a mount point does not
+ * grant access to the mount covering it. It is a generalization of the bind
+ * mount case in layout3_fs.hostfs.release_inodes that tests hidden mount points.
+ */
+TEST_F_FORK(layout1, covered_rule)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s3d2,
+ .access = LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ /* Unmount to simplify FIXTURE_TEARDOWN. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, umount(dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* Creates a ruleset with the future hidden directory. */
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Covers with a new mount point. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that access to the new mount point is denied. */
+ ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY));
+}
+
enum relative_access {
REL_OPEN,
REL_CHDIR,
@@ -2226,6 +2267,22 @@ static int test_exchange(const char *const oldpath, const char *const newpath)
return 0;
}
+static int test_renameat(int olddirfd, const char *oldpath, int newdirfd,
+ const char *newpath)
+{
+ if (renameat2(olddirfd, oldpath, newdirfd, newpath, 0))
+ return errno;
+ return 0;
+}
+
+static int test_exchangeat(int olddirfd, const char *oldpath, int newdirfd,
+ const char *newpath)
+{
+ if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_EXCHANGE))
+ return errno;
+ return 0;
+}
+
TEST_F_FORK(layout1, rename_file)
{
const struct rule rules[] = {
@@ -4520,6 +4577,18 @@ TEST_F_FORK(ioctl, handle_file_access_file)
FIXTURE(layout1_bind) {};
/* clang-format on */
+static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3";
+static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1";
+
+/* Move targets for disconnected path tests. */
+static const char dir_s4d1[] = TMP_DIR "/s4d1";
+static const char file1_s4d1[] = TMP_DIR "/s4d1/f1";
+static const char file2_s4d1[] = TMP_DIR "/s4d1/f2";
+static const char dir_s4d2[] = TMP_DIR "/s4d1/s4d2";
+static const char file1_s4d2[] = TMP_DIR "/s4d1/s4d2/f1";
+static const char file1_name[] = "f1";
+static const char file2_name[] = "f2";
+
FIXTURE_SETUP(layout1_bind)
{
prepare_layout(_metadata);
@@ -4535,14 +4604,14 @@ FIXTURE_TEARDOWN_PARENT(layout1_bind)
{
/* umount(dir_s2d2)) is handled by namespace lifetime. */
+ remove_path(file1_s4d1);
+ remove_path(file2_s4d1);
+
remove_layout1(_metadata);
cleanup_layout(_metadata);
}
-static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3";
-static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1";
-
/*
* layout1_bind hierarchy:
*
@@ -4553,20 +4622,25 @@ static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1";
* │   └── s1d2
* │   ├── f1
* │   ├── f2
- * │   └── s1d3
+ * │   └── s1d3 [disconnected by path_disconnected]
* │   ├── f1
* │   └── f2
* ├── s2d1
* │   ├── f1
- * │   └── s2d2
+ * │   └── s2d2 [bind mount from s1d2]
* │   ├── f1
* │   ├── f2
* │   └── s1d3
* │   ├── f1
* │   └── f2
- * └── s3d1
- * └── s3d2
- * └── s3d3
+ * ├── s3d1
+ * │   └── s3d2
+ * │   └── s3d3
+ * └── s4d1 [renamed from s1d3 by path_disconnected]
+ *    ├── f1
+ *    ├── f2
+ * └── s4d2
+ * └── f1
*/
TEST_F_FORK(layout1_bind, no_restriction)
@@ -4765,6 +4839,1431 @@ TEST_F_FORK(layout1_bind, reparent_cross_mount)
ASSERT_EQ(0, rename(bind_file1_s1d3, file1_s2d2));
}
+/*
+ * Make sure access to file through a disconnected path works as expected.
+ * This test moves s1d3 to s4d1.
+ */
+TEST_F_FORK(layout1_bind, path_disconnected)
+{
+ const struct rule layer1_allow_all[] = {
+ {
+ .path = TMP_DIR,
+ .access = ACCESS_ALL,
+ },
+ {},
+ };
+ const struct rule layer2_allow_just_f1[] = {
+ {
+ .path = file1_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct rule layer3_only_s1d2[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+
+ /* Landlock should not deny access just because it is disconnected. */
+ int ruleset_fd_l1 =
+ create_ruleset(_metadata, ACCESS_ALL, layer1_allow_all);
+
+ /* Creates the new ruleset now before we move the dir containing the file. */
+ int ruleset_fd_l2 =
+ create_ruleset(_metadata, ACCESS_RW, layer2_allow_just_f1);
+ int ruleset_fd_l3 =
+ create_ruleset(_metadata, ACCESS_RW, layer3_only_s1d2);
+ int bind_s1d3_fd;
+
+ ASSERT_LE(0, ruleset_fd_l1);
+ ASSERT_LE(0, ruleset_fd_l2);
+ ASSERT_LE(0, ruleset_fd_l3);
+
+ enforce_ruleset(_metadata, ruleset_fd_l1);
+ EXPECT_EQ(0, close(ruleset_fd_l1));
+
+ bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, bind_s1d3_fd);
+
+ /* Tests access is possible before we move. */
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, "..", O_RDONLY | O_DIRECTORY));
+
+ /* Makes it disconnected. */
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s4d1))
+ {
+ TH_LOG("Failed to rename %s to %s: %s", dir_s1d3, dir_s4d1,
+ strerror(errno));
+ }
+
+ /* Tests that access is still possible. */
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+
+ /*
+ * Tests that ".." is not possible (not because of Landlock, but just
+ * because it's disconnected).
+ */
+ EXPECT_EQ(ENOENT,
+ test_open_rel(bind_s1d3_fd, "..", O_RDONLY | O_DIRECTORY));
+
+ /* This should still work with a narrower rule. */
+ enforce_ruleset(_metadata, ruleset_fd_l2);
+ EXPECT_EQ(0, close(ruleset_fd_l2));
+
+ EXPECT_EQ(0, test_open(file1_s4d1, O_RDONLY));
+ /*
+ * Accessing a file through a disconnected file descriptor can still be
+ * allowed by a rule tied to this file, even if it is no longer visible in
+ * its mount point.
+ */
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(EACCES, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+
+ enforce_ruleset(_metadata, ruleset_fd_l3);
+ EXPECT_EQ(0, close(ruleset_fd_l3));
+
+ EXPECT_EQ(EACCES, test_open(file1_s4d1, O_RDONLY));
+ /*
+ * Accessing a file through a disconnected file descriptor can still be
+ * allowed by a rule tied to the original mount point, even if it is no
+ * longer visible in its mount point.
+ */
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(EACCES, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+}
+
+/*
+ * Test that renameat with disconnected paths works under Landlock. This test
+ * moves s1d3 to s4d2, so that we can have a rule allowing refers on the move
+ * target's immediate parent.
+ */
+TEST_F_FORK(layout1_bind, path_disconnected_rename)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = dir_s4d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {}
+ };
+
+ /* This layer only handles LANDLOCK_ACCESS_FS_READ_FILE. */
+ const struct rule layer2_only_s1d2[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ int ruleset_fd_l1, ruleset_fd_l2;
+ pid_t child_pid;
+ int bind_s1d3_fd, status;
+
+ ASSERT_EQ(0, mkdir(dir_s4d1, 0755))
+ {
+ TH_LOG("Failed to create %s: %s", dir_s4d1, strerror(errno));
+ }
+ ruleset_fd_l1 = create_ruleset(_metadata, ACCESS_ALL, layer1);
+ ruleset_fd_l2 = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer2_only_s1d2);
+ ASSERT_LE(0, ruleset_fd_l1);
+ ASSERT_LE(0, ruleset_fd_l2);
+
+ enforce_ruleset(_metadata, ruleset_fd_l1);
+ EXPECT_EQ(0, close(ruleset_fd_l1));
+
+ bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, bind_s1d3_fd);
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+
+ /* Tests ENOENT priority over EACCES for disconnected directory. */
+ EXPECT_EQ(EACCES, test_open_rel(bind_s1d3_fd, "..", O_DIRECTORY));
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s4d2))
+ {
+ TH_LOG("Failed to rename %s to %s: %s", dir_s1d3, dir_s4d2,
+ strerror(errno));
+ }
+ EXPECT_EQ(ENOENT, test_open_rel(bind_s1d3_fd, "..", O_DIRECTORY));
+
+ /*
+ * The file is no longer under s1d2 but we should still be able to access it
+ * with layer 2 because its mount point is evaluated as the first valid
+ * directory because it was initially a parent. Do a fork to test this so
+ * we don't prevent ourselves from renaming it back later.
+ */
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ enforce_ruleset(_metadata, ruleset_fd_l2);
+ EXPECT_EQ(0, close(ruleset_fd_l2));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(EACCES, test_open(file1_s4d2, O_RDONLY));
+
+ /*
+ * Tests that access widening checks indeed prevents us from renaming it
+ * back.
+ */
+ EXPECT_EQ(-1, rename(dir_s4d2, dir_s1d3));
+ EXPECT_EQ(EXDEV, errno);
+
+ /*
+ * Including through the now disconnected fd (but it should return
+ * EXDEV).
+ */
+ EXPECT_EQ(-1, renameat(bind_s1d3_fd, file1_name, AT_FDCWD,
+ file1_s2d2));
+ EXPECT_EQ(EXDEV, errno);
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ EXPECT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ ASSERT_EQ(0, rename(dir_s4d2, dir_s1d3))
+ {
+ TH_LOG("Failed to rename %s back to %s: %s", dir_s4d1, dir_s1d3,
+ strerror(errno));
+ }
+
+ /* Now checks that we can access it under l2. */
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ enforce_ruleset(_metadata, ruleset_fd_l2);
+ EXPECT_EQ(0, close(ruleset_fd_l2));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ EXPECT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /*
+ * Also test that we can rename via a disconnected path. We move the
+ * dir back to the disconnected place first, then we rename file1 to
+ * file2 through our dir fd.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s4d2))
+ {
+ TH_LOG("Failed to rename %s to %s: %s", dir_s1d3, dir_s4d2,
+ strerror(errno));
+ }
+ ASSERT_EQ(0,
+ renameat(bind_s1d3_fd, file1_name, bind_s1d3_fd, file2_name))
+ {
+ TH_LOG("Failed to rename %s to %s within disconnected %s: %s",
+ file1_name, file2_name, bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file2_name, O_RDONLY));
+ ASSERT_EQ(0, renameat(bind_s1d3_fd, file2_name, AT_FDCWD, file1_s2d2))
+ {
+ TH_LOG("Failed to rename %s to %s through disconnected %s: %s",
+ file2_name, file1_s2d2, bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open(file1_s2d2, O_RDONLY));
+ EXPECT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+
+ /* Move it back using the disconnected path as the target. */
+ ASSERT_EQ(0, renameat(AT_FDCWD, file1_s2d2, bind_s1d3_fd, file1_name))
+ {
+ TH_LOG("Failed to rename %s to %s through disconnected %s: %s",
+ file1_s1d2, file1_name, bind_dir_s1d3, strerror(errno));
+ }
+
+ /* Now make it connected again. */
+ ASSERT_EQ(0, rename(dir_s4d2, dir_s1d3))
+ {
+ TH_LOG("Failed to rename %s back to %s: %s", dir_s4d2, dir_s1d3,
+ strerror(errno));
+ }
+
+ /* Checks again that we can access it under l2. */
+ enforce_ruleset(_metadata, ruleset_fd_l2);
+ EXPECT_EQ(0, close(ruleset_fd_l2));
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+ EXPECT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+}
+
+/*
+ * Test that linkat(2) with disconnected paths works under Landlock. This
+ * test moves s1d3 to s4d1.
+ */
+TEST_F_FORK(layout1_bind, path_disconnected_link)
+{
+ /* Ruleset to be applied after renaming s1d3 to s4d1. */
+ const struct rule layer1[] = {
+ {
+ .path = dir_s4d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {}
+ };
+ int ruleset_fd, bind_s1d3_fd;
+
+ /* Removes unneeded files created by layout1, otherwise it will EEXIST. */
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ bind_s1d3_fd = open(bind_dir_s1d3, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, bind_s1d3_fd);
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY));
+
+ /* Disconnects bind_s1d3_fd. */
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s4d1))
+ {
+ TH_LOG("Failed to rename %s to %s: %s", dir_s1d3, dir_s4d1,
+ strerror(errno));
+ }
+
+ /* Need this later to test different parent link. */
+ ASSERT_EQ(0, mkdir(dir_s4d2, 0755))
+ {
+ TH_LOG("Failed to create %s: %s", dir_s4d2, strerror(errno));
+ }
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_ALL, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* From disconnected to connected. */
+ ASSERT_EQ(0, linkat(bind_s1d3_fd, file1_name, AT_FDCWD, file1_s2d2, 0))
+ {
+ TH_LOG("Failed to link %s to %s via disconnected %s: %s",
+ file1_name, file1_s2d2, bind_dir_s1d3, strerror(errno));
+ }
+
+ /* Tests that we can access via the new link... */
+ EXPECT_EQ(0, test_open(file1_s2d2, O_RDONLY))
+ {
+ TH_LOG("Failed to open newly linked %s: %s", file1_s2d2,
+ strerror(errno));
+ }
+
+ /* ...as well as the old one. */
+ EXPECT_EQ(0, test_open(file1_s4d1, O_RDONLY))
+ {
+ TH_LOG("Failed to open original %s: %s", file1_s4d1,
+ strerror(errno));
+ }
+
+ /* From connected to disconnected. */
+ ASSERT_EQ(0, unlink(file1_s4d1));
+ ASSERT_EQ(0, linkat(AT_FDCWD, file1_s2d2, bind_s1d3_fd, file2_name, 0))
+ {
+ TH_LOG("Failed to link %s to %s via disconnected %s: %s",
+ file1_s2d2, file2_name, bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open(file2_s4d1, O_RDONLY));
+ ASSERT_EQ(0, unlink(file1_s2d2));
+
+ /* From disconnected to disconnected (same parent). */
+ ASSERT_EQ(0,
+ linkat(bind_s1d3_fd, file2_name, bind_s1d3_fd, file1_name, 0))
+ {
+ TH_LOG("Failed to link %s to %s within disconnected %s: %s",
+ file2_name, file1_name, bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open(file1_s4d1, O_RDONLY))
+ {
+ TH_LOG("Failed to open newly linked %s: %s", file1_s4d1,
+ strerror(errno));
+ }
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, file1_name, O_RDONLY))
+ {
+ TH_LOG("Failed to open %s through newly created link under disconnected path: %s",
+ file1_name, strerror(errno));
+ }
+ ASSERT_EQ(0, unlink(file2_s4d1));
+
+ /* From disconnected to disconnected (different parent). */
+ ASSERT_EQ(0,
+ linkat(bind_s1d3_fd, file1_name, bind_s1d3_fd, "s4d2/f1", 0))
+ {
+ TH_LOG("Failed to link %s to %s within disconnected %s: %s",
+ file1_name, "s4d2/f1", bind_dir_s1d3, strerror(errno));
+ }
+ EXPECT_EQ(0, test_open(file1_s4d2, O_RDONLY))
+ {
+ TH_LOG("Failed to open %s after link: %s", file1_s4d2,
+ strerror(errno));
+ }
+ EXPECT_EQ(0, test_open_rel(bind_s1d3_fd, "s4d2/f1", O_RDONLY))
+ {
+ TH_LOG("Failed to open %s through disconnected path after link: %s",
+ "s4d2/f1", strerror(errno));
+ }
+}
+
+/*
+ * layout4_disconnected_leafs with bind mount and renames:
+ *
+ * tmp
+ * ├── s1d1
+ * │   └── s1d2 [source of the bind mount]
+ * │ ├── s1d31
+ * │   │ └── s1d41 [now renamed beneath s3d1]
+ * │ │ ├── f1
+ * │ │ └── f2
+ * │   └── s1d32
+ * │ └── s1d42 [now renamed beneath s4d1]
+ * │ ├── f3
+ * │ └── f4
+ * ├── s2d1
+ * │   └── s2d2 [bind mount of s1d2]
+ * │ ├── s1d31
+ * │   │ └── s1d41 [opened FD, now renamed beneath s3d1]
+ * │ │ ├── f1
+ * │ │ └── f2
+ * │   └── s1d32
+ * │ └── s1d42 [opened FD, now renamed beneath s4d1]
+ * │ ├── f3
+ * │ └── f4
+ * ├── s3d1
+ * │  └── s1d41 [renamed here]
+ * │ ├── f1
+ * │ └── f2
+ * └── s4d1
+ * └── s1d42 [renamed here]
+ * ├── f3
+ * └── f4
+ */
+/* clang-format off */
+FIXTURE(layout4_disconnected_leafs) {
+ int s2d2_fd;
+};
+/* clang-format on */
+
+FIXTURE_SETUP(layout4_disconnected_leafs)
+{
+ prepare_layout(_metadata);
+
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d31/s1d41/f1");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d31/s1d41/f2");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d32/s1d42/f3");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d32/s1d42/f4");
+ create_directory(_metadata, TMP_DIR "/s2d1/s2d2");
+ create_directory(_metadata, TMP_DIR "/s3d1");
+ create_directory(_metadata, TMP_DIR "/s4d1");
+
+ self->s2d2_fd =
+ open(TMP_DIR "/s2d1/s2d2", O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, self->s2d2_fd);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount(TMP_DIR "/s1d1/s1d2", TMP_DIR "/s2d1/s2d2", NULL,
+ MS_BIND, NULL));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+FIXTURE_TEARDOWN_PARENT(layout4_disconnected_leafs)
+{
+ /* umount(TMP_DIR "/s2d1") is handled by namespace lifetime. */
+
+ /* Removes files after renames. */
+ remove_path(TMP_DIR "/s3d1/s1d41/f1");
+ remove_path(TMP_DIR "/s3d1/s1d41/f2");
+ remove_path(TMP_DIR "/s4d1/s1d42/f1");
+ remove_path(TMP_DIR "/s4d1/s1d42/f3");
+ remove_path(TMP_DIR "/s4d1/s1d42/f4");
+ remove_path(TMP_DIR "/s4d1/s1d42/f5");
+
+ cleanup_layout(_metadata);
+}
+
+FIXTURE_VARIANT(layout4_disconnected_leafs)
+{
+ /*
+ * Parent of the bind mount source. It should always be ignored when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s1d1;
+ /*
+ * Source of bind mount (to s2d2). It should always be enforced when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s1d2;
+ /*
+ * Original parent of s1d41. It should always be ignored when testing
+ * against files under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_s1d31;
+ /*
+ * Original parent of s1d42. It should always be ignored when testing
+ * against files under the s1d42 disconnected directory.
+ */
+ const __u64 allowed_s1d32;
+ /*
+ * Opened and disconnected source directory. It should always be enforced
+ * when testing against files under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_s1d41;
+ /*
+ * Opened and disconnected source directory. It should always be enforced
+ * when testing against files under the s1d42 disconnected directory.
+ */
+ const __u64 allowed_s1d42;
+ /*
+ * File in the s1d41 disconnected directory. It should always be enforced
+ * when testing against itself under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_f1;
+ /*
+ * File in the s1d41 disconnected directory. It should always be enforced
+ * when testing against itself under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_f2;
+ /*
+ * File in the s1d42 disconnected directory. It should always be enforced
+ * when testing against itself under the s1d42 disconnected directory.
+ */
+ const __u64 allowed_f3;
+ /*
+ * Parent of the bind mount destination. It should always be enforced when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s2d1;
+ /*
+ * Directory covered by the bind mount. It should always be ignored when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s2d2;
+ /*
+ * New parent of the renamed s1d41. It should always be ignored when
+ * testing against files under the s1d41 disconnected directory.
+ */
+ const __u64 allowed_s3d1;
+ /*
+ * New parent of the renamed s1d42. It should always be ignored when
+ * testing against files under the s1d42 disconnected directory.
+ */
+ const __u64 allowed_s4d1;
+
+ /* Expected result of the call to open([fd:s1d41]/f1, O_RDONLY). */
+ const int expected_read_result;
+ /* Expected result of the call to renameat([fd:s1d41]/f1, [fd:s1d42]/f1). */
+ const int expected_rename_result;
+ /*
+ * Expected result of the call to renameat([fd:s1d41]/f2, [fd:s1d42]/f3,
+ * RENAME_EXCHANGE).
+ */
+ const int expected_exchange_result;
+ /* Expected result of the call to renameat([fd:s1d42]/f4, [fd:s1d42]/f5). */
+ const int expected_same_dir_rename_result;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d1_mount_src_parent) {
+ /* clang-format on */
+ .allowed_s1d1 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d2_mount_src_refer) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d2_mount_src_create) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d2_mount_src_rename) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d31_s1d32_old_parent) {
+ /* clang-format on */
+ .allowed_s1d31 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s1d32 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_refer) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_create) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_rename_even) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* The destination directory has more access right. */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_rename_more) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ /* Access denied. */
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* The destination directory has less access right. */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s1d41_s1d42_disconnected_rename_less) {
+ /* clang-format on */
+ .allowed_s1d41 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ .allowed_s1d42 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ /* Access allowed. */
+ .expected_rename_result = 0,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s2d1_mount_dst_parent_create) {
+ /* clang-format on */
+ .allowed_s2d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s2d1_mount_dst_parent_refer) {
+ /* clang-format on */
+ .allowed_s2d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s2d1_mount_dst_parent_mini) {
+ /* clang-format on */
+ .allowed_s2d1 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s2d2_covered_by_mount) {
+ /* clang-format on */
+ .allowed_s2d2 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* Tests collect_domain_accesses(). */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s3d1_s4d1_new_parent_refer) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s3d1_s4d1_new_parent_create) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs,
+ s3d1_s4d1_disconnected_rename_even){
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* The destination directory has more access right. */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s3d1_s4d1_disconnected_rename_more) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ /* Access denied. */
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* The destination directory has less access right. */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, s3d1_s4d1_disconnected_rename_less) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ /* Access allowed. */
+ .expected_rename_result = 0,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout4_disconnected_leafs, f1_f2_f3) {
+ /* clang-format on */
+ .allowed_f1 = LANDLOCK_ACCESS_FS_READ_FILE,
+ .allowed_f2 = LANDLOCK_ACCESS_FS_READ_FILE,
+ .allowed_f3 = LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+TEST_F_FORK(layout4_disconnected_leafs, read_rename_exchange)
+{
+ const __u64 handled_access =
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_MAKE_REG;
+ const struct rule rules[] = {
+ {
+ .path = TMP_DIR "/s1d1",
+ .access = variant->allowed_s1d1,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2",
+ .access = variant->allowed_s1d2,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d31",
+ .access = variant->allowed_s1d31,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d32",
+ .access = variant->allowed_s1d32,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d31/s1d41",
+ .access = variant->allowed_s1d41,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d32/s1d42",
+ .access = variant->allowed_s1d42,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d31/s1d41/f1",
+ .access = variant->allowed_f1,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d31/s1d41/f2",
+ .access = variant->allowed_f2,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d32/s1d42/f3",
+ .access = variant->allowed_f3,
+ },
+ {
+ .path = TMP_DIR "/s2d1",
+ .access = variant->allowed_s2d1,
+ },
+ /* s2d2_fd */
+ {
+ .path = TMP_DIR "/s3d1",
+ .access = variant->allowed_s3d1,
+ },
+ {
+ .path = TMP_DIR "/s4d1",
+ .access = variant->allowed_s4d1,
+ },
+ {},
+ };
+ int ruleset_fd, s1d41_bind_fd, s1d42_bind_fd;
+
+ ruleset_fd = create_ruleset(_metadata, handled_access, rules);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Adds rule for the covered directory. */
+ if (variant->allowed_s2d2) {
+ ASSERT_EQ(0, landlock_add_rule(
+ ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &(struct landlock_path_beneath_attr){
+ .parent_fd = self->s2d2_fd,
+ .allowed_access =
+ variant->allowed_s2d2,
+ },
+ 0));
+ }
+ EXPECT_EQ(0, close(self->s2d2_fd));
+
+ s1d41_bind_fd = open(TMP_DIR "/s2d1/s2d2/s1d31/s1d41",
+ O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, s1d41_bind_fd);
+ s1d42_bind_fd = open(TMP_DIR "/s2d1/s2d2/s1d32/s1d42",
+ O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, s1d42_bind_fd);
+
+ /* Disconnects and checks source and destination directories. */
+ EXPECT_EQ(0, test_open_rel(s1d41_bind_fd, "..", O_DIRECTORY));
+ EXPECT_EQ(0, test_open_rel(s1d42_bind_fd, "..", O_DIRECTORY));
+ /* Renames to make it accessible through s3d1/s1d41 */
+ ASSERT_EQ(0, test_renameat(AT_FDCWD, TMP_DIR "/s1d1/s1d2/s1d31/s1d41",
+ AT_FDCWD, TMP_DIR "/s3d1/s1d41"));
+ /* Renames to make it accessible through s4d1/s1d42 */
+ ASSERT_EQ(0, test_renameat(AT_FDCWD, TMP_DIR "/s1d1/s1d2/s1d32/s1d42",
+ AT_FDCWD, TMP_DIR "/s4d1/s1d42"));
+ EXPECT_EQ(ENOENT, test_open_rel(s1d41_bind_fd, "..", O_DIRECTORY));
+ EXPECT_EQ(ENOENT, test_open_rel(s1d42_bind_fd, "..", O_DIRECTORY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ EXPECT_EQ(variant->expected_read_result,
+ test_open_rel(s1d41_bind_fd, "f1", O_RDONLY));
+
+ EXPECT_EQ(variant->expected_rename_result,
+ test_renameat(s1d41_bind_fd, "f1", s1d42_bind_fd, "f1"));
+ EXPECT_EQ(variant->expected_exchange_result,
+ test_exchangeat(s1d41_bind_fd, "f2", s1d42_bind_fd, "f3"));
+
+ EXPECT_EQ(variant->expected_same_dir_rename_result,
+ test_renameat(s1d42_bind_fd, "f4", s1d42_bind_fd, "f5"));
+}
+
+/*
+ * layout5_disconnected_branch before rename:
+ *
+ * tmp
+ * ├── s1d1
+ * │   └── s1d2 [source of the first bind mount]
+ * │   └── s1d3
+ * │   ├── s1d41
+ * │   │   ├── f1
+ * │   │   └── f2
+ * │   └── s1d42
+ * │   ├── f3
+ * │   └── f4
+ * ├── s2d1
+ * │   └── s2d2 [source of the second bind mount]
+ * │   └── s2d3
+ * │   └── s2d4 [first s1d2 bind mount]
+ * │   └── s1d3
+ * │   ├── s1d41
+ * │   │   ├── f1
+ * │   │   └── f2
+ * │   └── s1d42
+ * │   ├── f3
+ * │   └── f4
+ * ├── s3d1
+ * │   └── s3d2 [second s2d2 bind mount]
+ * │   └── s2d3
+ * │   └── s2d4 [first s1d2 bind mount]
+ * │   └── s1d3
+ * │   ├── s1d41
+ * │   │   ├── f1
+ * │   │   └── f2
+ * │   └── s1d42
+ * │   ├── f3
+ * │   └── f4
+ * └── s4d1
+ *
+ * After rename:
+ *
+ * tmp
+ * ├── s1d1
+ * │   └── s1d2 [source of the first bind mount]
+ * │   └── s1d3
+ * │   ├── s1d41
+ * │   │   ├── f1
+ * │   │   └── f2
+ * │   └── s1d42
+ * │   ├── f3
+ * │   └── f4
+ * ├── s2d1
+ * │   └── s2d2 [source of the second bind mount]
+ * ├── s3d1
+ * │   └── s3d2 [second s2d2 bind mount]
+ * └── s4d1
+ * └── s2d3 [renamed here]
+ * └── s2d4 [first s1d2 bind mount]
+ * └── s1d3
+ * ├── s1d41
+ * │   ├── f1
+ * │   └── f2
+ * └── s1d42
+ * ├── f3
+ * └── f4
+ *
+ * Decision path for access from the s3d1/s3d2/s2d3/s2d4/s1d3 file descriptor:
+ * 1. first bind mount: s1d3 -> s1d2
+ * 2. second bind mount: s2d3
+ * 3. tmp mount: s4d1 -> tmp [disconnected branch]
+ * 4. second bind mount: s2d2
+ * 5. tmp mount: s3d1 -> tmp
+ * 6. parent mounts: [...] -> /
+ *
+ * The s4d1 directory is evaluated even if it is not in the s2d2 mount.
+ */
+
+/* clang-format off */
+FIXTURE(layout5_disconnected_branch) {
+ int s2d4_fd, s3d2_fd;
+};
+/* clang-format on */
+
+FIXTURE_SETUP(layout5_disconnected_branch)
+{
+ prepare_layout(_metadata);
+
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d3/s1d41/f1");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d3/s1d41/f2");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f3");
+ create_file(_metadata, TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f4");
+ create_directory(_metadata, TMP_DIR "/s2d1/s2d2/s2d3/s2d4");
+ create_directory(_metadata, TMP_DIR "/s3d1/s3d2");
+ create_directory(_metadata, TMP_DIR "/s4d1");
+
+ self->s2d4_fd = open(TMP_DIR "/s2d1/s2d2/s2d3/s2d4",
+ O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, self->s2d4_fd);
+
+ self->s3d2_fd =
+ open(TMP_DIR "/s3d1/s3d2", O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, self->s3d2_fd);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount(TMP_DIR "/s1d1/s1d2", TMP_DIR "/s2d1/s2d2/s2d3/s2d4",
+ NULL, MS_BIND, NULL));
+ ASSERT_EQ(0, mount(TMP_DIR "/s2d1/s2d2", TMP_DIR "/s3d1/s3d2", NULL,
+ MS_BIND | MS_REC, NULL));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+FIXTURE_TEARDOWN_PARENT(layout5_disconnected_branch)
+{
+ /* Bind mounts are handled by namespace lifetime. */
+
+ /* Removes files after renames. */
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d41/f1");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d41/f2");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f1");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f3");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f4");
+ remove_path(TMP_DIR "/s1d1/s1d2/s1d3/s1d42/f5");
+
+ cleanup_layout(_metadata);
+}
+
+FIXTURE_VARIANT(layout5_disconnected_branch)
+{
+ /*
+ * Parent of all files. It should always be enforced when testing against
+ * files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_base;
+ /*
+ * Parent of the first bind mount source. It should always be ignored when
+ * testing against files under the s1d41 or s1d42 disconnected directories.
+ */
+ const __u64 allowed_s1d1;
+ const __u64 allowed_s1d2;
+ const __u64 allowed_s1d3;
+ const __u64 allowed_s2d1;
+ const __u64 allowed_s2d2;
+ const __u64 allowed_s2d3;
+ const __u64 allowed_s2d4;
+ const __u64 allowed_s3d1;
+ const __u64 allowed_s3d2;
+ const __u64 allowed_s4d1;
+
+ /* Expected result of the call to open([fd:s1d3]/s1d41/f1, O_RDONLY). */
+ const int expected_read_result;
+ /*
+ * Expected result of the call to renameat([fd:s1d3]/s1d41/f1,
+ * [fd:s1d3]/s1d42/f1).
+ */
+ const int expected_rename_result;
+ /*
+ * Expected result of the call to renameat([fd:s1d3]/s1d41/f2,
+ * [fd:s1d3]/s1d42/f3, RENAME_EXCHANGE).
+ */
+ const int expected_exchange_result;
+ /*
+ * Expected result of the call to renameat([fd:s1d3]/s1d42/f4,
+ * [fd:s1d3]/s1d42/f5).
+ */
+ const int expected_same_dir_rename_result;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d1_mount1_src_parent) {
+ /* clang-format on */
+ .allowed_s1d1 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d2_mount1_src_refer) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d2_mount1_src_create) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d2_mount1_src_rename) {
+ /* clang-format on */
+ .allowed_s1d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d3_fd_refer) {
+ /* clang-format on */
+ .allowed_s1d3 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d3_fd_create) {
+ /* clang-format on */
+ .allowed_s1d3 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d3_fd_rename) {
+ /* clang-format on */
+ .allowed_s1d3 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s1d3_fd_full) {
+ /* clang-format on */
+ .allowed_s1d3 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d1_mount2_src_parent) {
+ /* clang-format on */
+ .allowed_s2d1 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d2_mount2_src_refer) {
+ /* clang-format on */
+ .allowed_s2d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d2_mount2_src_create) {
+ /* clang-format on */
+ .allowed_s2d2 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d2_mount2_src_rename) {
+ /* clang-format on */
+ .allowed_s2d2 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d3_mount1_dst_parent_refer) {
+ /* clang-format on */
+ .allowed_s2d3 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d3_mount1_dst_parent_create) {
+ /* clang-format on */
+ .allowed_s2d3 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d3_mount1_dst_parent_rename) {
+ /* clang-format on */
+ .allowed_s2d3 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s2d4_mount1_dst) {
+ /* clang-format on */
+ .allowed_s2d4 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s3d1_mount2_dst_parent_refer) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s3d1_mount2_dst_parent_create) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s3d1_mount2_dst_parent_rename) {
+ /* clang-format on */
+ .allowed_s3d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s3d2_mount1_dst) {
+ /* clang-format on */
+ .allowed_s3d2 = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s4d1_rename_parent_refer) {
+ /* clang-format on */
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = EACCES,
+ .expected_rename_result = EACCES,
+ .expected_exchange_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s4d1_rename_parent_create) {
+ /* clang-format on */
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = 0,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = EXDEV,
+ .expected_exchange_result = EXDEV,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout5_disconnected_branch, s4d1_rename_parent_rename) {
+ /* clang-format on */
+ .allowed_s4d1 = LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_MAKE_REG,
+ .expected_read_result = EACCES,
+ .expected_same_dir_rename_result = 0,
+ .expected_rename_result = 0,
+ .expected_exchange_result = 0,
+};
+
+TEST_F_FORK(layout5_disconnected_branch, read_rename_exchange)
+{
+ const __u64 handled_access =
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_MAKE_REG;
+ const struct rule rules[] = {
+ {
+ .path = TMP_DIR "/s1d1",
+ .access = variant->allowed_s1d1,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2",
+ .access = variant->allowed_s1d2,
+ },
+ {
+ .path = TMP_DIR "/s1d1/s1d2/s1d3",
+ .access = variant->allowed_s1d3,
+ },
+ {
+ .path = TMP_DIR "/s2d1",
+ .access = variant->allowed_s2d1,
+ },
+ {
+ .path = TMP_DIR "/s2d1/s2d2",
+ .access = variant->allowed_s2d2,
+ },
+ {
+ .path = TMP_DIR "/s2d1/s2d2/s2d3",
+ .access = variant->allowed_s2d3,
+ },
+ /* s2d4_fd */
+ {
+ .path = TMP_DIR "/s3d1",
+ .access = variant->allowed_s3d1,
+ },
+ /* s3d2_fd */
+ {
+ .path = TMP_DIR "/s4d1",
+ .access = variant->allowed_s4d1,
+ },
+ {},
+ };
+ int ruleset_fd, s1d3_bind_fd;
+
+ ruleset_fd = create_ruleset(_metadata, handled_access, rules);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Adds rules for the covered directories. */
+ if (variant->allowed_s2d4) {
+ ASSERT_EQ(0, landlock_add_rule(
+ ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &(struct landlock_path_beneath_attr){
+ .parent_fd = self->s2d4_fd,
+ .allowed_access =
+ variant->allowed_s2d4,
+ },
+ 0));
+ }
+ EXPECT_EQ(0, close(self->s2d4_fd));
+
+ if (variant->allowed_s3d2) {
+ ASSERT_EQ(0, landlock_add_rule(
+ ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &(struct landlock_path_beneath_attr){
+ .parent_fd = self->s3d2_fd,
+ .allowed_access =
+ variant->allowed_s3d2,
+ },
+ 0));
+ }
+ EXPECT_EQ(0, close(self->s3d2_fd));
+
+ s1d3_bind_fd = open(TMP_DIR "/s3d1/s3d2/s2d3/s2d4/s1d3",
+ O_DIRECTORY | O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, s1d3_bind_fd);
+
+ /* Disconnects and checks source and destination directories. */
+ EXPECT_EQ(0, test_open_rel(s1d3_bind_fd, "..", O_DIRECTORY));
+ EXPECT_EQ(0, test_open_rel(s1d3_bind_fd, "../..", O_DIRECTORY));
+ /* Renames to make it accessible through s3d1/s1d41 */
+ ASSERT_EQ(0, test_renameat(AT_FDCWD, TMP_DIR "/s2d1/s2d2/s2d3",
+ AT_FDCWD, TMP_DIR "/s4d1/s2d3"));
+ EXPECT_EQ(0, test_open_rel(s1d3_bind_fd, "..", O_DIRECTORY));
+ EXPECT_EQ(ENOENT, test_open_rel(s1d3_bind_fd, "../..", O_DIRECTORY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ EXPECT_EQ(variant->expected_read_result,
+ test_open_rel(s1d3_bind_fd, "s1d41/f1", O_RDONLY));
+
+ EXPECT_EQ(variant->expected_rename_result,
+ test_renameat(s1d3_bind_fd, "s1d41/f1", s1d3_bind_fd,
+ "s1d42/f1"));
+ EXPECT_EQ(variant->expected_exchange_result,
+ test_exchangeat(s1d3_bind_fd, "s1d41/f2", s1d3_bind_fd,
+ "s1d42/f3"));
+
+ EXPECT_EQ(variant->expected_same_dir_rename_result,
+ test_renameat(s1d3_bind_fd, "s1d42/f4", s1d3_bind_fd,
+ "s1d42/f5"));
+}
+
#define LOWER_BASE TMP_DIR "/lower"
#define LOWER_DATA LOWER_BASE "/data"
static const char lower_fl1[] = LOWER_DATA "/fl1";
@@ -5554,4 +7053,598 @@ TEST_F_FORK(layout3_fs, release_inodes)
ASSERT_EQ(EACCES, test_open(TMP_DIR, O_RDONLY));
}
+static int matches_log_fs_extra(struct __test_metadata *const _metadata,
+ int audit_fd, const char *const blockers,
+ const char *const path, const char *const extra)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=fs\\.%s path=\"%s\" dev=\"[^\"]\\+\" ino=[0-9]\\+$";
+ char *absolute_path = NULL;
+ size_t log_match_remaining = sizeof(log_template) + strlen(blockers) +
+ PATH_MAX * 2 +
+ (extra ? strlen(extra) : 0) + 1;
+ char log_match[log_match_remaining];
+ char *log_match_cursor = log_match;
+ size_t chunk_len;
+
+ chunk_len = snprintf(log_match_cursor, log_match_remaining,
+ REGEX_LANDLOCK_PREFIX " blockers=%s path=\"",
+ blockers);
+ if (chunk_len < 0 || chunk_len >= log_match_remaining)
+ return -E2BIG;
+
+ /*
+ * It is assume that absolute_path does not contain control characters nor
+ * spaces, see audit_string_contains_control().
+ */
+ absolute_path = realpath(path, NULL);
+ if (!absolute_path)
+ return -errno;
+
+ log_match_remaining -= chunk_len;
+ log_match_cursor += chunk_len;
+ log_match_cursor = regex_escape(absolute_path, log_match_cursor,
+ log_match_remaining);
+ free(absolute_path);
+ if (log_match_cursor < 0)
+ return (long long)log_match_cursor;
+
+ log_match_remaining -= log_match_cursor - log_match;
+ chunk_len = snprintf(log_match_cursor, log_match_remaining,
+ "\" dev=\"[^\"]\\+\" ino=[0-9]\\+%s$",
+ extra ?: "");
+ if (chunk_len < 0 || chunk_len >= log_match_remaining)
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+static int matches_log_fs(struct __test_metadata *const _metadata, int audit_fd,
+ const char *const blockers, const char *const path)
+{
+ return matches_log_fs_extra(_metadata, audit_fd, blockers, path, NULL);
+}
+
+FIXTURE(audit_layout1)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit_layout1)
+{
+ prepare_layout(_metadata);
+
+ create_layout1(_metadata);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ disable_caps(_metadata);
+}
+
+FIXTURE_TEARDOWN_PARENT(audit_layout1)
+{
+ remove_layout1(_metadata);
+
+ cleanup_layout(_metadata);
+
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+}
+
+TEST_F(audit_layout1, execute_make)
+{
+ struct audit_records records;
+
+ copy_file(_metadata, bin_true, file1_s1d1);
+ test_execute(_metadata, 0, file1_s1d1);
+ test_check_exec(_metadata, 0, file1_s1d1);
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ });
+
+ test_execute(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.execute",
+ file1_s1d1));
+ test_check_exec(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.execute",
+ file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+/*
+ * Using a set of handled/denied access rights make it possible to check that
+ * only the blocked ones are logged.
+ */
+
+/* clang-format off */
+static const __u64 access_fs_16 =
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_CHAR |
+ LANDLOCK_ACCESS_FS_MAKE_DIR |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO |
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK |
+ LANDLOCK_ACCESS_FS_MAKE_SYM |
+ LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_TRUNCATE |
+ LANDLOCK_ACCESS_FS_IOCTL_DEV;
+/* clang-format on */
+
+TEST_F(audit_layout1, execute_read)
+{
+ struct audit_records records;
+
+ copy_file(_metadata, bin_true, file1_s1d1);
+ test_execute(_metadata, 0, file1_s1d1);
+ test_check_exec(_metadata, 0, file1_s1d1);
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ /*
+ * The only difference with the previous audit_layout1.execute_read test is
+ * the extra ",fs\\.read_file" blocked by the executable file.
+ */
+ test_execute(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.execute,fs\\.read_file", file1_s1d1));
+ test_check_exec(_metadata, EACCES, file1_s1d1);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.execute,fs\\.read_file", file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, write_file)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.write_file", file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, read_file)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.read_file",
+ file1_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, read_dir)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_open(dir_s1d1, O_DIRECTORY));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.read_dir",
+ dir_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, remove_dir)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+ EXPECT_EQ(0, unlink(file2_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, rmdir(dir_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_dir", dir_s1d2));
+
+ EXPECT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_dir", dir_s1d2));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, remove_file)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, unlink(file1_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file", dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_char)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFCHR | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_char",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_dir)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mkdir(file1_s1d3, 0755));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_dir",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_reg)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFREG | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_reg",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_sock)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFSOCK | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_sock",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_fifo)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFIFO | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_fifo",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_block)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, mknod(file1_s1d3, S_IFBLK | 0644, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.make_block", dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, make_sym)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, symlink("target", file1_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.make_sym",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, refer_handled)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_REFER,
+ });
+
+ EXPECT_EQ(-1, link(file1_s1d1, file1_s1d3));
+ EXPECT_EQ(EXDEV, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+ EXPECT_EQ(0,
+ matches_log_domain_allocated(self->audit_fd, getpid(), NULL));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, refer_make)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_REFER,
+ });
+
+ EXPECT_EQ(-1, link(file1_s1d1, file1_s1d3));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.make_reg,fs\\.refer", dir_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, refer_rename)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(EACCES, test_rename(file1_s1d2, file1_s2d3));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.refer", dir_s1d2));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s2d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+TEST_F(audit_layout1, refer_exchange)
+{
+ struct audit_records records;
+
+ EXPECT_EQ(0, unlink(file1_s1d3));
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ /*
+ * The only difference with the previous audit_layout1.refer_rename test is
+ * the extra ",fs\\.make_reg" blocked by the source directory.
+ */
+ EXPECT_EQ(EACCES, test_exchange(file1_s1d2, file1_s2d3));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s1d2));
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.remove_file,fs\\.make_reg,fs\\.refer",
+ dir_s2d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
+/*
+ * This test checks that the audit record is correctly generated when the
+ * operation is only partially denied. This is the case for rename(2) when the
+ * source file is allowed to be referenced but the destination directory is not.
+ *
+ * This is also a regression test for commit d617f0d72d80 ("landlock: Optimize
+ * file path walks and prepare for audit support") and commit 058518c20920
+ * ("landlock: Align partial refer access checks with final ones").
+ */
+TEST_F(audit_layout1, refer_rename_half)
+{
+ struct audit_records records;
+ const struct rule layer1[] = {
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {},
+ };
+ int ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Only half of the request is denied. */
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
+ dir_s1d1));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, truncate)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata, &(struct landlock_ruleset_attr){
+ .handled_access_fs = access_fs_16,
+ });
+
+ EXPECT_EQ(-1, truncate(file1_s1d3, 0));
+ EXPECT_EQ(EACCES, errno);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.truncate",
+ file1_s1d3));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, ioctl_dev)
+{
+ struct audit_records records;
+ int fd;
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ access_fs_16 &
+ ~LANDLOCK_ACCESS_FS_READ_FILE,
+ });
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIONREAD));
+ EXPECT_EQ(0, matches_log_fs_extra(_metadata, self->audit_fd,
+ "fs\\.ioctl_dev", "/dev/null",
+ " ioctlcmd=0x541b"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
+TEST_F(audit_layout1, mount)
+{
+ struct audit_records records;
+
+ drop_access_rights(_metadata,
+ &(struct landlock_ruleset_attr){
+ .handled_access_fs =
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ });
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
+ EXPECT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd,
+ "fs\\.change_topology", dir_s3d2));
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 4e0aeb53b225..2a45208551e6 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -20,6 +20,7 @@
#include <sys/syscall.h>
#include <sys/un.h>
+#include "audit.h"
#include "common.h"
const short sock_port_start = (1 << 10);
@@ -85,18 +86,18 @@ static void setup_loopback(struct __test_metadata *const _metadata)
clear_ambient_cap(_metadata, CAP_NET_ADMIN);
}
+static bool prot_is_tcp(const struct protocol_variant *const prot)
+{
+ return (prot->domain == AF_INET || prot->domain == AF_INET6) &&
+ prot->type == SOCK_STREAM &&
+ (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP);
+}
+
static bool is_restricted(const struct protocol_variant *const prot,
const enum sandbox_type sandbox)
{
- switch (prot->domain) {
- case AF_INET:
- case AF_INET6:
- switch (prot->type) {
- case SOCK_STREAM:
- return sandbox == TCP_SANDBOX;
- }
- break;
- }
+ if (sandbox == TCP_SANDBOX)
+ return prot_is_tcp(prot);
return false;
}
@@ -105,7 +106,7 @@ static int socket_variant(const struct service_fixture *const srv)
int ret;
ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
- 0);
+ srv->protocol.protocol);
if (ret < 0)
return -errno;
return ret;
@@ -290,22 +291,70 @@ FIXTURE_TEARDOWN(protocol)
}
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) {
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) {
/* clang-format on */
.sandbox = NO_SANDBOX,
.prot = {
.domain = AF_INET,
.type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
},
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) {
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) {
/* clang-format on */
.sandbox = NO_SANDBOX,
.prot = {
.domain = AF_INET6,
.type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
},
};
@@ -350,22 +399,70 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) {
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) {
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
.domain = AF_INET,
.type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
},
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) {
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
.domain = AF_INET6,
.type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
},
};
@@ -1772,4 +1869,135 @@ TEST_F(port_specific, bind_connect_1023)
EXPECT_EQ(0, close(bind_fd));
}
+static int matches_log_tcp(const int audit_fd, const char *const blockers,
+ const char *const dir_addr, const char *const addr,
+ const char *const dir_port)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=%s %s=%s %s=1024$";
+ /*
+ * Max strlen(blockers): 16
+ * Max strlen(dir_addr): 5
+ * Max strlen(addr): 12
+ * Max strlen(dir_port): 4
+ */
+ char log_match[sizeof(log_template) + 37];
+ int log_match_len;
+
+ log_match_len = snprintf(log_match, sizeof(log_match), log_template,
+ blockers, dir_addr, addr, dir_port);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+FIXTURE(audit)
+{
+ struct service_fixture srv0;
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_VARIANT(audit)
+{
+ const char *const addr;
+ const struct protocol_variant prot;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv4) {
+ /* clang-format on */
+ .addr = "127\\.0\\.0\\.1",
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(audit, ipv6) {
+ /* clang-format on */
+ .addr = "::1",
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+FIXTURE_SETUP(audit)
+{
+ ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ setup_loopback(_metadata);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ disable_caps(_metadata);
+};
+
+FIXTURE_TEARDOWN(audit)
+{
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+TEST_F(audit, bind)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ struct audit_records records;
+ int ruleset_fd, sock_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, bind_variant(sock_fd, &self->srv0));
+ EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.bind_tcp", "saddr",
+ variant->addr, "src"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
+TEST_F(audit, connect)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ struct audit_records records;
+ int ruleset_fd, sock_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ sock_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, sock_fd);
+ EXPECT_EQ(-EACCES, connect_variant(sock_fd, &self->srv0));
+ EXPECT_EQ(0, matches_log_tcp(self->audit_fd, "net\\.connect_tcp",
+ "daddr", variant->addr, "dest"));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(1, records.domain);
+
+ EXPECT_EQ(0, close(sock_fd));
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 8f31b673ff2d..4e356334ecb7 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -4,6 +4,7 @@
*
* Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
* Copyright © 2019-2020 ANSSI
+ * Copyright © 2024-2025 Microsoft Corporation
*/
#define _GNU_SOURCE
@@ -17,6 +18,7 @@
#include <sys/wait.h>
#include <unistd.h>
+#include "audit.h"
#include "common.h"
/* Copied from security/yama/yama_lsm.c */
@@ -434,4 +436,142 @@ TEST_F(hierarchy, trace)
_metadata->exit_code = KSFT_FAIL;
}
+static int matches_log_ptrace(struct __test_metadata *const _metadata,
+ int audit_fd, const pid_t opid)
+{
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " blockers=ptrace opid=%d ocomm=\"ptrace_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, opid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_ACCESS, log_match,
+ NULL);
+}
+
+FIXTURE(audit)
+{
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(audit)
+{
+ disable_caps(_metadata);
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+}
+
+FIXTURE_TEARDOWN_PARENT(audit)
+{
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+}
+
+/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
+TEST_F(audit, trace)
+{
+ pid_t child;
+ int status;
+ int pipe_child[2], pipe_parent[2];
+ int yama_ptrace_scope;
+ char buf_parent;
+ struct audit_records records;
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ yama_ptrace_scope = get_yama_ptrace_scope();
+ ASSERT_LE(0, yama_ptrace_scope);
+
+ if (yama_ptrace_scope > YAMA_SCOPE_DISABLED)
+ TH_LOG("Incomplete tests due to Yama restrictions (scope %d)",
+ yama_ptrace_scope);
+
+ /*
+ * Removes all effective and permitted capabilities to not interfere
+ * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS.
+ */
+ drop_caps(_metadata);
+
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ char buf_child;
+
+ ASSERT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, close(pipe_child[0]));
+
+ /* Waits for the parent to be in a domain, if any. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+
+ /* Tests child PTRACE_TRACEME. */
+ EXPECT_EQ(-1, ptrace(PTRACE_TRACEME));
+ EXPECT_EQ(EPERM, errno);
+ /* We should see the child process. */
+ EXPECT_EQ(0, matches_log_ptrace(_metadata, self->audit_fd,
+ getpid()));
+
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ /* Checks for a domain creation. */
+ EXPECT_EQ(1, records.domain);
+
+ /*
+ * Signals that the PTRACE_ATTACH test is done and the
+ * PTRACE_TRACEME test is ongoing.
+ */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ /* Waits for the parent PTRACE_ATTACH test. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(0, close(pipe_child[1]));
+ ASSERT_EQ(0, close(pipe_parent[0]));
+ create_domain(_metadata);
+
+ /* Signals that the parent is in a domain. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ /*
+ * Waits for the child to test PTRACE_ATTACH on the parent and start
+ * testing PTRACE_TRACEME.
+ */
+ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* The child should not be traced by the parent. */
+ EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0));
+ EXPECT_EQ(ESRCH, errno);
+
+ /* Tests PTRACE_ATTACH on the child. */
+ EXPECT_EQ(-1, ptrace(PTRACE_ATTACH, child, NULL, 0));
+ EXPECT_EQ(EPERM, errno);
+ EXPECT_EQ(0, matches_log_ptrace(_metadata, self->audit_fd, child));
+
+ /* Signals that the parent PTRACE_ATTACH test is done. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
index a6b59d2ab1b4..6825082c079c 100644
--- a/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
+++ b/tools/testing/selftests/landlock/scoped_abstract_unix_test.c
@@ -20,6 +20,7 @@
#include <sys/wait.h>
#include <unistd.h>
+#include "audit.h"
#include "common.h"
#include "scoped_common.h"
@@ -267,6 +268,116 @@ TEST_F(scoped_domains, connect_to_child)
_metadata->exit_code = KSFT_FAIL;
}
+FIXTURE(scoped_audit)
+{
+ struct service_fixture dgram_address;
+ struct audit_filter audit_filter;
+ int audit_fd;
+};
+
+FIXTURE_SETUP(scoped_audit)
+{
+ disable_caps(_metadata);
+
+ memset(&self->dgram_address, 0, sizeof(self->dgram_address));
+ set_unix_address(&self->dgram_address, 1);
+
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ self->audit_fd = audit_init_with_exe_filter(&self->audit_filter);
+ EXPECT_LE(0, self->audit_fd);
+ drop_caps(_metadata);
+}
+
+FIXTURE_TEARDOWN_PARENT(scoped_audit)
+{
+ EXPECT_EQ(0, audit_cleanup(-1, NULL));
+}
+
+/* python -c 'print(b"\0selftests-landlock-abstract-unix-".hex().upper())' */
+#define ABSTRACT_SOCKET_PATH_PREFIX \
+ "0073656C6674657374732D6C616E646C6F636B2D61627374726163742D756E69782D"
+
+/*
+ * Simpler version of scoped_domains.connect_to_child, but with audit tests.
+ */
+TEST_F(scoped_audit, connect_to_child)
+{
+ pid_t child;
+ int err_dgram, status;
+ int pipe_child[2], pipe_parent[2];
+ char buf;
+ int dgram_client;
+ struct audit_records records;
+
+ /* Makes sure there is no superfluous logged records. */
+ EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
+ EXPECT_EQ(0, records.access);
+ EXPECT_EQ(0, records.domain);
+
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int dgram_server;
+
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ /* Waits for the parent to be in a domain. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+
+ dgram_server = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_server);
+ ASSERT_EQ(0, bind(dgram_server, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len));
+
+ /* Signals to the parent that child is listening. */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ /* Waits to connect. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf, 1));
+ EXPECT_EQ(0, close(dgram_server));
+ _exit(_metadata->exit_code);
+ return;
+ }
+ EXPECT_EQ(0, close(pipe_child[1]));
+ EXPECT_EQ(0, close(pipe_parent[0]));
+
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET);
+
+ /* Signals that the parent is in a domain, if any. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ dgram_client = socket(AF_UNIX, SOCK_DGRAM, 0);
+ ASSERT_LE(0, dgram_client);
+
+ /* Waits for the child to listen */
+ ASSERT_EQ(1, read(pipe_child[0], &buf, 1));
+ err_dgram = connect(dgram_client, &self->dgram_address.unix_addr,
+ self->dgram_address.unix_addr_len);
+ EXPECT_EQ(-1, err_dgram);
+ EXPECT_EQ(EPERM, errno);
+
+ EXPECT_EQ(
+ 0,
+ audit_match_record(
+ self->audit_fd, AUDIT_LANDLOCK_ACCESS,
+ REGEX_LANDLOCK_PREFIX
+ " blockers=scope\\.abstract_unix_socket path=" ABSTRACT_SOCKET_PATH_PREFIX
+ "[0-9A-F]\\+$",
+ NULL));
+
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(dgram_client));
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
FIXTURE(scoped_vs_unscoped)
{
struct service_fixture parent_stream_address, parent_dgram_address,
diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c
index 475ee62a832d..d8bf33417619 100644
--- a/tools/testing/selftests/landlock/scoped_signal_test.c
+++ b/tools/testing/selftests/landlock/scoped_signal_test.c
@@ -249,47 +249,67 @@ TEST_F(scoped_domains, check_access_signal)
_metadata->exit_code = KSFT_FAIL;
}
-static int thread_pipe[2];
-
enum thread_return {
THREAD_INVALID = 0,
THREAD_SUCCESS = 1,
THREAD_ERROR = 2,
+ THREAD_TEST_FAILED = 3,
};
-void *thread_func(void *arg)
+static void *thread_sync(void *arg)
{
+ const int pipe_read = *(int *)arg;
char buf;
- if (read(thread_pipe[0], &buf, 1) != 1)
+ if (read(pipe_read, &buf, 1) != 1)
return (void *)THREAD_ERROR;
return (void *)THREAD_SUCCESS;
}
-TEST(signal_scoping_threads)
+TEST(signal_scoping_thread_before)
{
- pthread_t no_sandbox_thread, scoped_thread;
+ pthread_t no_sandbox_thread;
enum thread_return ret = THREAD_INVALID;
+ int thread_pipe[2];
drop_caps(_metadata);
ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC));
- ASSERT_EQ(0,
- pthread_create(&no_sandbox_thread, NULL, thread_func, NULL));
+ ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_sync,
+ &thread_pipe[0]));
- /* Restricts the domain after creating the first thread. */
+ /* Enforces restriction after creating the thread. */
create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
- ASSERT_EQ(EPERM, pthread_kill(no_sandbox_thread, 0));
- ASSERT_EQ(1, write(thread_pipe[1], ".", 1));
-
- ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_func, NULL));
- ASSERT_EQ(0, pthread_kill(scoped_thread, 0));
- ASSERT_EQ(1, write(thread_pipe[1], ".", 1));
+ EXPECT_EQ(0, pthread_kill(no_sandbox_thread, 0));
+ EXPECT_EQ(1, write(thread_pipe[1], ".", 1));
EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret));
EXPECT_EQ(THREAD_SUCCESS, ret);
+
+ EXPECT_EQ(0, close(thread_pipe[0]));
+ EXPECT_EQ(0, close(thread_pipe[1]));
+}
+
+TEST(signal_scoping_thread_after)
+{
+ pthread_t scoped_thread;
+ enum thread_return ret = THREAD_INVALID;
+ int thread_pipe[2];
+
+ drop_caps(_metadata);
+ ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC));
+
+ /* Enforces restriction before creating the thread. */
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_sync,
+ &thread_pipe[0]));
+
+ EXPECT_EQ(0, pthread_kill(scoped_thread, 0));
+ EXPECT_EQ(1, write(thread_pipe[1], ".", 1));
+
EXPECT_EQ(0, pthread_join(scoped_thread, (void **)&ret));
EXPECT_EQ(THREAD_SUCCESS, ret);
@@ -297,6 +317,64 @@ TEST(signal_scoping_threads)
EXPECT_EQ(0, close(thread_pipe[1]));
}
+struct thread_setuid_args {
+ int pipe_read, new_uid;
+};
+
+void *thread_setuid(void *ptr)
+{
+ const struct thread_setuid_args *arg = ptr;
+ char buf;
+
+ if (read(arg->pipe_read, &buf, 1) != 1)
+ return (void *)THREAD_ERROR;
+
+ /* libc's setuid() should update all thread's credentials. */
+ if (getuid() != arg->new_uid)
+ return (void *)THREAD_TEST_FAILED;
+
+ return (void *)THREAD_SUCCESS;
+}
+
+TEST(signal_scoping_thread_setuid)
+{
+ struct thread_setuid_args arg;
+ pthread_t no_sandbox_thread;
+ enum thread_return ret = THREAD_INVALID;
+ int pipe_parent[2];
+ int prev_uid;
+
+ disable_caps(_metadata);
+
+ /* This test does not need to be run as root. */
+ prev_uid = getuid();
+ arg.new_uid = prev_uid + 1;
+ EXPECT_LT(0, arg.new_uid);
+
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ arg.pipe_read = pipe_parent[0];
+
+ /* Capabilities must be set before creating a new thread. */
+ set_cap(_metadata, CAP_SETUID);
+ ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_setuid,
+ &arg));
+
+ /* Enforces restriction after creating the thread. */
+ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL);
+
+ EXPECT_NE(arg.new_uid, getuid());
+ EXPECT_EQ(0, setuid(arg.new_uid));
+ EXPECT_EQ(arg.new_uid, getuid());
+ EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret));
+ EXPECT_EQ(THREAD_SUCCESS, ret);
+
+ clear_cap(_metadata, CAP_SETUID);
+ EXPECT_EQ(0, close(pipe_parent[0]));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+}
+
const short backlog = 10;
static volatile sig_atomic_t signal_received;
diff --git a/tools/testing/selftests/landlock/wait-pipe-sandbox.c b/tools/testing/selftests/landlock/wait-pipe-sandbox.c
new file mode 100644
index 000000000000..87dbc9164430
--- /dev/null
+++ b/tools/testing/selftests/landlock/wait-pipe-sandbox.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Write in a pipe, wait, sandbox itself, test sandboxing, and wait again.
+ *
+ * Used by audit_exec.flags from audit_test.c
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/prctl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "wrappers.h"
+
+static int sync_with(int pipe_child, int pipe_parent)
+{
+ char buf;
+
+ /* Signals that we are waiting. */
+ if (write(pipe_child, ".", 1) != 1) {
+ perror("Failed to write to first argument");
+ return 1;
+ }
+
+ /* Waits for the parent do its test. */
+ if (read(pipe_parent, &buf, 1) != 1) {
+ perror("Failed to write to the second argument");
+ return 1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ const struct landlock_ruleset_attr layer2 = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ const struct landlock_ruleset_attr layer3 = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ int err, pipe_child, pipe_parent, ruleset_fd;
+
+ /* The first argument must be the file descriptor number of a pipe. */
+ if (argc != 3) {
+ fprintf(stderr, "Wrong number of arguments (not two)\n");
+ return 1;
+ }
+
+ pipe_child = atoi(argv[1]);
+ pipe_parent = atoi(argv[2]);
+ /* PR_SET_NO_NEW_PRIVS already set by parent. */
+
+ /* First step to test parent's layer1. */
+ err = sync_with(pipe_child, pipe_parent);
+ if (err)
+ return err;
+
+ /* Tries to send a signal, denied by layer1. */
+ if (!kill(getppid(), 0)) {
+ fprintf(stderr, "Successfully sent a signal to the parent");
+ return 1;
+ }
+
+ /* Second step to test parent's layer1 and our layer2. */
+ err = sync_with(pipe_child, pipe_parent);
+ if (err)
+ return err;
+
+ ruleset_fd = landlock_create_ruleset(&layer2, sizeof(layer2), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create the layer2 ruleset");
+ return 1;
+ }
+
+ if (landlock_restrict_self(ruleset_fd, 0)) {
+ perror("Failed to restrict self");
+ return 1;
+ }
+ close(ruleset_fd);
+
+ /* Tries to send a signal, denied by layer1. */
+ if (!kill(getppid(), 0)) {
+ fprintf(stderr, "Successfully sent a signal to the parent");
+ return 1;
+ }
+
+ /* Tries to open ., denied by layer2. */
+ if (open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC) >= 0) {
+ fprintf(stderr, "Successfully opened /");
+ return 1;
+ }
+
+ /* Third step to test our layer2 and layer3. */
+ err = sync_with(pipe_child, pipe_parent);
+ if (err)
+ return err;
+
+ ruleset_fd = landlock_create_ruleset(&layer3, sizeof(layer3), 0);
+ if (ruleset_fd < 0) {
+ perror("Failed to create the layer3 ruleset");
+ return 1;
+ }
+
+ if (landlock_restrict_self(ruleset_fd, 0)) {
+ perror("Failed to restrict self");
+ return 1;
+ }
+ close(ruleset_fd);
+
+ /* Tries to open ., denied by layer2. */
+ if (open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC) >= 0) {
+ fprintf(stderr, "Successfully opened /");
+ return 1;
+ }
+
+ /* Tries to send a signal, denied by layer3. */
+ if (!kill(getppid(), 0)) {
+ fprintf(stderr, "Successfully sent a signal to the parent");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index d6edcfcb5be8..f02cc8a2e4ae 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -199,6 +199,9 @@ clean: $(if $(TEST_GEN_MODS_DIR),clean_mods_dir)
# Build with _GNU_SOURCE by default
CFLAGS += -D_GNU_SOURCE=
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -I${top_srcdir}/tools/testing/selftests
+
# Enables to extend CFLAGS and LDFLAGS from command line, e.g.
# make USERCFLAGS=-Werror USERLDFLAGS=-static
CFLAGS += $(USERCFLAGS)
@@ -228,4 +231,10 @@ $(OUTPUT)/%:%.S
$(LINK.S) $^ $(LDLIBS) -o $@
endif
-.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir
+# Extract the expected header directory
+khdr_output := $(patsubst %/usr/include,%,$(filter %/usr/include,$(KHDR_INCLUDES)))
+
+headers:
+ $(Q)$(MAKE) -f $(top_srcdir)/Makefile -C $(khdr_output) headers
+
+.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir headers
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index c52fe3ad8e98..f876bf4744e1 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,5 +4,5 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh
+TEST_PROGS := bitmap.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index dc15aba8d0a3..377b3699ff31 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,5 +1,3 @@
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
deleted file mode 100755
index 370b79a9cb2e..000000000000
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Checks fast/slow prime_number generation for inconsistencies
-$(dirname $0)/../kselftest/module.sh "prime numbers" prime_numbers selftest=65536
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
deleted file mode 100755
index 05f4544e87f9..000000000000
--- a/tools/testing/selftests/lib/printf.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Tests the printf infrastructure using test_printf kernel module.
-$(dirname $0)/../kselftest/module.sh "printf" test_printf
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
deleted file mode 100755
index b59b8ba561c3..000000000000
--- a/tools/testing/selftests/lib/scanf.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Tests the scanf infrastructure using test_scanf kernel module.
-$(dirname $0)/../kselftest/module.sh "scanf" test_scanf
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 15601402dee6..8ec0cb64ad94 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -10,6 +10,11 @@ SYSFS_KERNEL_DIR="/sys/kernel"
SYSFS_KLP_DIR="$SYSFS_KERNEL_DIR/livepatch"
SYSFS_DEBUG_DIR="$SYSFS_KERNEL_DIR/debug"
SYSFS_KPROBES_DIR="$SYSFS_DEBUG_DIR/kprobes"
+if [[ -e /sys/kernel/tracing/trace ]]; then
+ SYSFS_TRACING_DIR="$SYSFS_KERNEL_DIR/tracing"
+else
+ SYSFS_TRACING_DIR="$SYSFS_DEBUG_DIR/tracing"
+fi
# Kselftest framework requirement - SKIP code is 4
ksft_skip=4
@@ -62,6 +67,9 @@ function push_config() {
awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}')
FTRACE_ENABLED=$(sysctl --values kernel.ftrace_enabled)
KPROBE_ENABLED=$(cat "$SYSFS_KPROBES_DIR/enabled")
+ TRACING_ON=$(cat "$SYSFS_TRACING_DIR/tracing_on")
+ CURRENT_TRACER=$(cat "$SYSFS_TRACING_DIR/current_tracer")
+ FTRACE_FILTER=$(cat "$SYSFS_TRACING_DIR/set_ftrace_filter")
}
function pop_config() {
@@ -74,6 +82,17 @@ function pop_config() {
if [[ -n "$KPROBE_ENABLED" ]]; then
echo "$KPROBE_ENABLED" > "$SYSFS_KPROBES_DIR/enabled"
fi
+ if [[ -n "$TRACING_ON" ]]; then
+ echo "$TRACING_ON" > "$SYSFS_TRACING_DIR/tracing_on"
+ fi
+ if [[ -n "$CURRENT_TRACER" ]]; then
+ echo "$CURRENT_TRACER" > "$SYSFS_TRACING_DIR/current_tracer"
+ fi
+ if [[ -n "$FTRACE_FILTER" ]]; then
+ echo "$FTRACE_FILTER" \
+ | sed -e "/#### all functions enabled ####/d" \
+ > "$SYSFS_TRACING_DIR/set_ftrace_filter"
+ fi
}
function set_dynamic_debug() {
@@ -352,3 +371,37 @@ function check_sysfs_value() {
die "Unexpected value in $path: $expected_value vs. $value"
fi
}
+
+# cleanup_tracing() - stop and clean up function tracing
+function cleanup_tracing() {
+ echo 0 > "$SYSFS_TRACING_DIR/tracing_on"
+ echo "" > "$SYSFS_TRACING_DIR/set_ftrace_filter"
+ echo "nop" > "$SYSFS_TRACING_DIR/current_tracer"
+ echo "" > "$SYSFS_TRACING_DIR/trace"
+}
+
+# trace_function(function) - start tracing of a function
+# function - to be traced function
+function trace_function() {
+ local function="$1"; shift
+
+ cleanup_tracing
+
+ echo "function" > "$SYSFS_TRACING_DIR/current_tracer"
+ echo "$function" > "$SYSFS_TRACING_DIR/set_ftrace_filter"
+ echo 1 > "$SYSFS_TRACING_DIR/tracing_on"
+}
+
+# check_traced_functions(functions...) - check whether each function appeared in the trace log
+# functions - list of functions to be checked
+function check_traced_functions() {
+ local function
+
+ for function in "$@"; do
+ if ! grep -Fwq "$function" "$SYSFS_TRACING_DIR/trace" ; then
+ die "Function ($function) did not appear in the trace"
+ fi
+ done
+
+ cleanup_tracing
+}
diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh
index fe14f248913a..094176f1a46a 100755
--- a/tools/testing/selftests/livepatch/test-ftrace.sh
+++ b/tools/testing/selftests/livepatch/test-ftrace.sh
@@ -61,4 +61,38 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
+# - verify livepatch can load
+# - check if traces have a patched function
+# - reset trace and unload livepatch
+
+start_test "trace livepatched function and check that the live patch remains in effect"
+
+FUNCTION_NAME="livepatch_cmdline_proc_show"
+
+load_lp $MOD_LIVEPATCH
+trace_function "$FUNCTION_NAME"
+
+if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]] ; then
+ log "livepatch: ok"
+fi
+
+check_traced_functions "$FUNCTION_NAME"
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+livepatch: ok
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
exit 0
diff --git a/tools/testing/selftests/livepatch/test-kprobe.sh b/tools/testing/selftests/livepatch/test-kprobe.sh
index 115065156016..b67dfad03d97 100755
--- a/tools/testing/selftests/livepatch/test-kprobe.sh
+++ b/tools/testing/selftests/livepatch/test-kprobe.sh
@@ -5,6 +5,8 @@
. $(dirname $0)/functions.sh
+grep -q kprobe_ftrace_ops /proc/kallsyms || skip "test-kprobe requires CONFIG_KPROBES_ON_FTRACE"
+
MOD_LIVEPATCH=test_klp_livepatch
MOD_KPROBE=test_klp_kprobe
diff --git a/tools/testing/selftests/liveupdate/.gitignore b/tools/testing/selftests/liveupdate/.gitignore
new file mode 100644
index 000000000000..661827083ab6
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/.gitignore
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*
+!/**/
+!*.c
+!*.h
+!*.sh
+!.gitignore
+!config
+!Makefile
diff --git a/tools/testing/selftests/liveupdate/Makefile b/tools/testing/selftests/liveupdate/Makefile
new file mode 100644
index 000000000000..080754787ede
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+LIB_C += luo_test_utils.c
+
+TEST_GEN_PROGS += liveupdate
+
+TEST_GEN_PROGS_EXTENDED += luo_kexec_simple
+TEST_GEN_PROGS_EXTENDED += luo_multi_session
+
+TEST_FILES += do_kexec.sh
+
+include ../lib.mk
+
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -MD
+
+LIB_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIB_C))
+TEST_O := $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_O += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
+
+TEST_DEP_FILES := $(patsubst %.o, %.d, $(LIB_O))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(TEST_O))
+-include $(TEST_DEP_FILES)
+
+$(LIB_O): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/%: %.o $(LIB_O)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIB_O) $(LDLIBS) -o $@
+
+EXTRA_CLEAN += $(LIB_O)
+EXTRA_CLEAN += $(TEST_O)
+EXTRA_CLEAN += $(TEST_DEP_FILES)
diff --git a/tools/testing/selftests/liveupdate/config b/tools/testing/selftests/liveupdate/config
new file mode 100644
index 000000000000..91d03f9a6a39
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/config
@@ -0,0 +1,11 @@
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_HANDOVER=y
+CONFIG_KEXEC_HANDOVER_ENABLE_DEFAULT=y
+CONFIG_KEXEC_HANDOVER_DEBUGFS=y
+CONFIG_KEXEC_HANDOVER_DEBUG=y
+CONFIG_LIVEUPDATE=y
+CONFIG_LIVEUPDATE_TEST=y
+CONFIG_MEMFD_CREATE=y
+CONFIG_TMPFS=y
+CONFIG_SHMEM=y
diff --git a/tools/testing/selftests/liveupdate/do_kexec.sh b/tools/testing/selftests/liveupdate/do_kexec.sh
new file mode 100755
index 000000000000..3c7c6cafbef8
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/do_kexec.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+set -e
+
+# Use $KERNEL and $INITRAMFS to pass custom Kernel and optional initramfs
+
+KERNEL="${KERNEL:-/boot/bzImage}"
+set -- -l -s --reuse-cmdline "$KERNEL"
+
+INITRAMFS="${INITRAMFS:-/boot/initramfs}"
+if [ -f "$INITRAMFS" ]; then
+ set -- "$@" --initrd="$INITRAMFS"
+fi
+
+kexec "$@"
+kexec -e
diff --git a/tools/testing/selftests/liveupdate/liveupdate.c b/tools/testing/selftests/liveupdate/liveupdate.c
new file mode 100644
index 000000000000..c2878e3d5ef9
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/liveupdate.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+/*
+ * Selftests for the Live Update Orchestrator.
+ * This test suite verifies the functionality and behavior of the
+ * /dev/liveupdate character device and its session management capabilities.
+ *
+ * Tests include:
+ * - Device access: basic open/close, and enforcement of exclusive access.
+ * - Session management: creation of unique sessions, and duplicate name detection.
+ * - Resource preservation: successfully preserving individual and multiple memfds,
+ * verifying contents remain accessible.
+ * - Complex multi-session scenarios involving mixed empty and populated files.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <linux/liveupdate.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define LIVEUPDATE_DEV "/dev/liveupdate"
+
+FIXTURE(liveupdate_device) {
+ int fd1;
+ int fd2;
+};
+
+FIXTURE_SETUP(liveupdate_device)
+{
+ self->fd1 = -1;
+ self->fd2 = -1;
+}
+
+FIXTURE_TEARDOWN(liveupdate_device)
+{
+ if (self->fd1 >= 0)
+ close(self->fd1);
+ if (self->fd2 >= 0)
+ close(self->fd2);
+}
+
+/*
+ * Test Case: Basic Open and Close
+ *
+ * Verifies that the /dev/liveupdate device can be opened and subsequently
+ * closed without errors. Skips if the device does not exist.
+ */
+TEST_F(liveupdate_device, basic_open_close)
+{
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist.", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+ ASSERT_EQ(close(self->fd1), 0);
+ self->fd1 = -1;
+}
+
+/*
+ * Test Case: Exclusive Open Enforcement
+ *
+ * Verifies that the /dev/liveupdate device can only be opened by one process
+ * at a time. It checks that a second attempt to open the device fails with
+ * the EBUSY error code.
+ */
+TEST_F(liveupdate_device, exclusive_open)
+{
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist.", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+ self->fd2 = open(LIVEUPDATE_DEV, O_RDWR);
+ EXPECT_LT(self->fd2, 0);
+ EXPECT_EQ(errno, EBUSY);
+}
+
+/* Helper function to create a LUO session via ioctl. */
+static int create_session(int lu_fd, const char *name)
+{
+ struct liveupdate_ioctl_create_session args = {};
+
+ args.size = sizeof(args);
+ strncpy((char *)args.name, name, sizeof(args.name) - 1);
+
+ if (ioctl(lu_fd, LIVEUPDATE_IOCTL_CREATE_SESSION, &args))
+ return -errno;
+
+ return args.fd;
+}
+
+/*
+ * Test Case: Create Duplicate Session
+ *
+ * Verifies that attempting to create two sessions with the same name fails
+ * on the second attempt with EEXIST.
+ */
+TEST_F(liveupdate_device, create_duplicate_session)
+{
+ int session_fd1, session_fd2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd1 = create_session(self->fd1, "duplicate-session-test");
+ ASSERT_GE(session_fd1, 0);
+
+ session_fd2 = create_session(self->fd1, "duplicate-session-test");
+ EXPECT_LT(session_fd2, 0);
+ EXPECT_EQ(-session_fd2, EEXIST);
+
+ ASSERT_EQ(close(session_fd1), 0);
+}
+
+/*
+ * Test Case: Create Distinct Sessions
+ *
+ * Verifies that creating two sessions with different names succeeds.
+ */
+TEST_F(liveupdate_device, create_distinct_sessions)
+{
+ int session_fd1, session_fd2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd1 = create_session(self->fd1, "distinct-session-1");
+ ASSERT_GE(session_fd1, 0);
+
+ session_fd2 = create_session(self->fd1, "distinct-session-2");
+ ASSERT_GE(session_fd2, 0);
+
+ ASSERT_EQ(close(session_fd1), 0);
+ ASSERT_EQ(close(session_fd2), 0);
+}
+
+static int preserve_fd(int session_fd, int fd_to_preserve, __u64 token)
+{
+ struct liveupdate_session_preserve_fd args = {};
+
+ args.size = sizeof(args);
+ args.fd = fd_to_preserve;
+ args.token = token;
+
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_PRESERVE_FD, &args))
+ return -errno;
+
+ return 0;
+}
+
+/*
+ * Test Case: Preserve MemFD
+ *
+ * Verifies that a valid memfd can be successfully preserved in a session and
+ * that its contents remain intact after the preservation call.
+ */
+TEST_F(liveupdate_device, preserve_memfd)
+{
+ const char *test_str = "hello liveupdate";
+ char read_buf[64] = {};
+ int session_fd, mem_fd;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "preserve-memfd-test");
+ ASSERT_GE(session_fd, 0);
+
+ mem_fd = memfd_create("test-memfd", 0);
+ ASSERT_GE(mem_fd, 0);
+
+ ASSERT_EQ(write(mem_fd, test_str, strlen(test_str)), strlen(test_str));
+ ASSERT_EQ(preserve_fd(session_fd, mem_fd, 0x1234), 0);
+ ASSERT_EQ(close(session_fd), 0);
+
+ ASSERT_EQ(lseek(mem_fd, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd, read_buf, sizeof(read_buf)), strlen(test_str));
+ ASSERT_STREQ(read_buf, test_str);
+ ASSERT_EQ(close(mem_fd), 0);
+}
+
+/*
+ * Test Case: Preserve Multiple MemFDs
+ *
+ * Verifies that multiple memfds can be preserved in a single session,
+ * each with a unique token, and that their contents remain distinct and
+ * correct after preservation.
+ */
+TEST_F(liveupdate_device, preserve_multiple_memfds)
+{
+ const char *test_str1 = "data for memfd one";
+ const char *test_str2 = "data for memfd two";
+ char read_buf[64] = {};
+ int session_fd, mem_fd1, mem_fd2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "preserve-multi-memfd-test");
+ ASSERT_GE(session_fd, 0);
+
+ mem_fd1 = memfd_create("test-memfd-1", 0);
+ ASSERT_GE(mem_fd1, 0);
+ mem_fd2 = memfd_create("test-memfd-2", 0);
+ ASSERT_GE(mem_fd2, 0);
+
+ ASSERT_EQ(write(mem_fd1, test_str1, strlen(test_str1)), strlen(test_str1));
+ ASSERT_EQ(write(mem_fd2, test_str2, strlen(test_str2)), strlen(test_str2));
+
+ ASSERT_EQ(preserve_fd(session_fd, mem_fd1, 0xAAAA), 0);
+ ASSERT_EQ(preserve_fd(session_fd, mem_fd2, 0xBBBB), 0);
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ASSERT_EQ(lseek(mem_fd1, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd1, read_buf, sizeof(read_buf)), strlen(test_str1));
+ ASSERT_STREQ(read_buf, test_str1);
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ASSERT_EQ(lseek(mem_fd2, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd2, read_buf, sizeof(read_buf)), strlen(test_str2));
+ ASSERT_STREQ(read_buf, test_str2);
+
+ ASSERT_EQ(close(mem_fd1), 0);
+ ASSERT_EQ(close(mem_fd2), 0);
+ ASSERT_EQ(close(session_fd), 0);
+}
+
+/*
+ * Test Case: Preserve Complex Scenario
+ *
+ * Verifies a more complex scenario with multiple sessions and a mix of empty
+ * and non-empty memfds distributed across them.
+ */
+TEST_F(liveupdate_device, preserve_complex_scenario)
+{
+ const char *data1 = "data for session 1";
+ const char *data2 = "data for session 2";
+ char read_buf[64] = {};
+ int session_fd1, session_fd2;
+ int mem_fd_data1, mem_fd_empty1, mem_fd_data2, mem_fd_empty2;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd1 = create_session(self->fd1, "complex-session-1");
+ ASSERT_GE(session_fd1, 0);
+ session_fd2 = create_session(self->fd1, "complex-session-2");
+ ASSERT_GE(session_fd2, 0);
+
+ mem_fd_data1 = memfd_create("data1", 0);
+ ASSERT_GE(mem_fd_data1, 0);
+ ASSERT_EQ(write(mem_fd_data1, data1, strlen(data1)), strlen(data1));
+
+ mem_fd_empty1 = memfd_create("empty1", 0);
+ ASSERT_GE(mem_fd_empty1, 0);
+
+ mem_fd_data2 = memfd_create("data2", 0);
+ ASSERT_GE(mem_fd_data2, 0);
+ ASSERT_EQ(write(mem_fd_data2, data2, strlen(data2)), strlen(data2));
+
+ mem_fd_empty2 = memfd_create("empty2", 0);
+ ASSERT_GE(mem_fd_empty2, 0);
+
+ ASSERT_EQ(preserve_fd(session_fd1, mem_fd_data1, 0x1111), 0);
+ ASSERT_EQ(preserve_fd(session_fd1, mem_fd_empty1, 0x2222), 0);
+ ASSERT_EQ(preserve_fd(session_fd2, mem_fd_data2, 0x3333), 0);
+ ASSERT_EQ(preserve_fd(session_fd2, mem_fd_empty2, 0x4444), 0);
+
+ ASSERT_EQ(lseek(mem_fd_data1, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_data1, read_buf, sizeof(read_buf)), strlen(data1));
+ ASSERT_STREQ(read_buf, data1);
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ASSERT_EQ(lseek(mem_fd_data2, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_data2, read_buf, sizeof(read_buf)), strlen(data2));
+ ASSERT_STREQ(read_buf, data2);
+
+ ASSERT_EQ(lseek(mem_fd_empty1, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_empty1, read_buf, sizeof(read_buf)), 0);
+
+ ASSERT_EQ(lseek(mem_fd_empty2, 0, SEEK_SET), 0);
+ ASSERT_EQ(read(mem_fd_empty2, read_buf, sizeof(read_buf)), 0);
+
+ ASSERT_EQ(close(mem_fd_data1), 0);
+ ASSERT_EQ(close(mem_fd_empty1), 0);
+ ASSERT_EQ(close(mem_fd_data2), 0);
+ ASSERT_EQ(close(mem_fd_empty2), 0);
+ ASSERT_EQ(close(session_fd1), 0);
+ ASSERT_EQ(close(session_fd2), 0);
+}
+
+/*
+ * Test Case: Preserve Unsupported File Descriptor
+ *
+ * Verifies that attempting to preserve a file descriptor that does not have
+ * a registered Live Update handler fails gracefully.
+ * Uses /dev/null as a representative of a file type (character device)
+ * that is not supported by the orchestrator.
+ */
+TEST_F(liveupdate_device, preserve_unsupported_fd)
+{
+ int session_fd, unsupported_fd;
+ int ret;
+
+ self->fd1 = open(LIVEUPDATE_DEV, O_RDWR);
+ if (self->fd1 < 0 && errno == ENOENT)
+ SKIP(return, "%s does not exist", LIVEUPDATE_DEV);
+ ASSERT_GE(self->fd1, 0);
+
+ session_fd = create_session(self->fd1, "unsupported-fd-test");
+ ASSERT_GE(session_fd, 0);
+
+ unsupported_fd = open("/dev/null", O_RDWR);
+ ASSERT_GE(unsupported_fd, 0);
+
+ ret = preserve_fd(session_fd, unsupported_fd, 0xDEAD);
+ EXPECT_EQ(ret, -ENOENT);
+
+ ASSERT_EQ(close(unsupported_fd), 0);
+ ASSERT_EQ(close(session_fd), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/liveupdate/luo_kexec_simple.c b/tools/testing/selftests/liveupdate/luo_kexec_simple.c
new file mode 100644
index 000000000000..d7ac1f3dc4cb
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_kexec_simple.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * A simple selftest to validate the end-to-end lifecycle of a LUO session
+ * across a single kexec reboot.
+ */
+
+#include "luo_test_utils.h"
+
+#define TEST_SESSION_NAME "test-session"
+#define TEST_MEMFD_TOKEN 0x1A
+#define TEST_MEMFD_DATA "hello kexec world"
+
+/* Constants for the state-tracking mechanism, specific to this test file. */
+#define STATE_SESSION_NAME "kexec_simple_state"
+#define STATE_MEMFD_TOKEN 999
+
+/* Stage 1: Executed before the kexec reboot. */
+static void run_stage_1(int luo_fd)
+{
+ int session_fd;
+
+ ksft_print_msg("[STAGE 1] Starting pre-kexec setup...\n");
+
+ ksft_print_msg("[STAGE 1] Creating state file for next stage (2)...\n");
+ create_state_file(luo_fd, STATE_SESSION_NAME, STATE_MEMFD_TOKEN, 2);
+
+ ksft_print_msg("[STAGE 1] Creating session '%s' and preserving memfd...\n",
+ TEST_SESSION_NAME);
+ session_fd = luo_create_session(luo_fd, TEST_SESSION_NAME);
+ if (session_fd < 0)
+ fail_exit("luo_create_session for '%s'", TEST_SESSION_NAME);
+
+ if (create_and_preserve_memfd(session_fd, TEST_MEMFD_TOKEN,
+ TEST_MEMFD_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ TEST_MEMFD_TOKEN);
+ }
+
+ close(luo_fd);
+ daemonize_and_wait();
+}
+
+/* Stage 2: Executed after the kexec reboot. */
+static void run_stage_2(int luo_fd, int state_session_fd)
+{
+ int session_fd, mfd, stage;
+
+ ksft_print_msg("[STAGE 2] Starting post-kexec verification...\n");
+
+ restore_and_read_stage(state_session_fd, STATE_MEMFD_TOKEN, &stage);
+ if (stage != 2)
+ fail_exit("Expected stage 2, but state file contains %d", stage);
+
+ ksft_print_msg("[STAGE 2] Retrieving session '%s'...\n", TEST_SESSION_NAME);
+ session_fd = luo_retrieve_session(luo_fd, TEST_SESSION_NAME);
+ if (session_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", TEST_SESSION_NAME);
+
+ ksft_print_msg("[STAGE 2] Restoring and verifying memfd (token %#x)...\n",
+ TEST_MEMFD_TOKEN);
+ mfd = restore_and_verify_memfd(session_fd, TEST_MEMFD_TOKEN,
+ TEST_MEMFD_DATA);
+ if (mfd < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", TEST_MEMFD_TOKEN);
+ close(mfd);
+
+ ksft_print_msg("[STAGE 2] Test data verified successfully.\n");
+ ksft_print_msg("[STAGE 2] Finalizing test session...\n");
+ if (luo_session_finish(session_fd) < 0)
+ fail_exit("luo_session_finish for test session");
+ close(session_fd);
+
+ ksft_print_msg("[STAGE 2] Finalizing state session...\n");
+ if (luo_session_finish(state_session_fd) < 0)
+ fail_exit("luo_session_finish for state session");
+ close(state_session_fd);
+
+ ksft_print_msg("\n--- SIMPLE KEXEC TEST PASSED ---\n");
+}
+
+int main(int argc, char *argv[])
+{
+ return luo_test(argc, argv, STATE_SESSION_NAME,
+ run_stage_1, run_stage_2);
+}
diff --git a/tools/testing/selftests/liveupdate/luo_multi_session.c b/tools/testing/selftests/liveupdate/luo_multi_session.c
new file mode 100644
index 000000000000..0ee2d795beef
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_multi_session.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * A selftest to validate the end-to-end lifecycle of multiple LUO sessions
+ * across a kexec reboot, including empty sessions and sessions with multiple
+ * files.
+ */
+
+#include "luo_test_utils.h"
+
+#define SESSION_EMPTY_1 "multi-test-empty-1"
+#define SESSION_EMPTY_2 "multi-test-empty-2"
+#define SESSION_FILES_1 "multi-test-files-1"
+#define SESSION_FILES_2 "multi-test-files-2"
+
+#define MFD1_TOKEN 0x1001
+#define MFD2_TOKEN 0x2002
+#define MFD3_TOKEN 0x3003
+
+#define MFD1_DATA "Data for session files 1"
+#define MFD2_DATA "First file for session files 2"
+#define MFD3_DATA "Second file for session files 2"
+
+#define STATE_SESSION_NAME "kexec_multi_state"
+#define STATE_MEMFD_TOKEN 998
+
+/* Stage 1: Executed before the kexec reboot. */
+static void run_stage_1(int luo_fd)
+{
+ int s_empty1_fd, s_empty2_fd, s_files1_fd, s_files2_fd;
+
+ ksft_print_msg("[STAGE 1] Starting pre-kexec setup for multi-session test...\n");
+
+ ksft_print_msg("[STAGE 1] Creating state file for next stage (2)...\n");
+ create_state_file(luo_fd, STATE_SESSION_NAME, STATE_MEMFD_TOKEN, 2);
+
+ ksft_print_msg("[STAGE 1] Creating empty sessions '%s' and '%s'...\n",
+ SESSION_EMPTY_1, SESSION_EMPTY_2);
+ s_empty1_fd = luo_create_session(luo_fd, SESSION_EMPTY_1);
+ if (s_empty1_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_EMPTY_1);
+
+ s_empty2_fd = luo_create_session(luo_fd, SESSION_EMPTY_2);
+ if (s_empty2_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_EMPTY_2);
+
+ ksft_print_msg("[STAGE 1] Creating session '%s' with one memfd...\n",
+ SESSION_FILES_1);
+
+ s_files1_fd = luo_create_session(luo_fd, SESSION_FILES_1);
+ if (s_files1_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_FILES_1);
+ if (create_and_preserve_memfd(s_files1_fd, MFD1_TOKEN, MFD1_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ MFD1_TOKEN);
+ }
+
+ ksft_print_msg("[STAGE 1] Creating session '%s' with two memfds...\n",
+ SESSION_FILES_2);
+
+ s_files2_fd = luo_create_session(luo_fd, SESSION_FILES_2);
+ if (s_files2_fd < 0)
+ fail_exit("luo_create_session for '%s'", SESSION_FILES_2);
+ if (create_and_preserve_memfd(s_files2_fd, MFD2_TOKEN, MFD2_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ MFD2_TOKEN);
+ }
+ if (create_and_preserve_memfd(s_files2_fd, MFD3_TOKEN, MFD3_DATA) < 0) {
+ fail_exit("create_and_preserve_memfd for token %#x",
+ MFD3_TOKEN);
+ }
+
+ close(luo_fd);
+ daemonize_and_wait();
+}
+
+/* Stage 2: Executed after the kexec reboot. */
+static void run_stage_2(int luo_fd, int state_session_fd)
+{
+ int s_empty1_fd, s_empty2_fd, s_files1_fd, s_files2_fd;
+ int mfd1, mfd2, mfd3, stage;
+
+ ksft_print_msg("[STAGE 2] Starting post-kexec verification...\n");
+
+ restore_and_read_stage(state_session_fd, STATE_MEMFD_TOKEN, &stage);
+ if (stage != 2) {
+ fail_exit("Expected stage 2, but state file contains %d",
+ stage);
+ }
+
+ ksft_print_msg("[STAGE 2] Retrieving all sessions...\n");
+ s_empty1_fd = luo_retrieve_session(luo_fd, SESSION_EMPTY_1);
+ if (s_empty1_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_EMPTY_1);
+
+ s_empty2_fd = luo_retrieve_session(luo_fd, SESSION_EMPTY_2);
+ if (s_empty2_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_EMPTY_2);
+
+ s_files1_fd = luo_retrieve_session(luo_fd, SESSION_FILES_1);
+ if (s_files1_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_FILES_1);
+
+ s_files2_fd = luo_retrieve_session(luo_fd, SESSION_FILES_2);
+ if (s_files2_fd < 0)
+ fail_exit("luo_retrieve_session for '%s'", SESSION_FILES_2);
+
+ ksft_print_msg("[STAGE 2] Verifying contents of session '%s'...\n",
+ SESSION_FILES_1);
+ mfd1 = restore_and_verify_memfd(s_files1_fd, MFD1_TOKEN, MFD1_DATA);
+ if (mfd1 < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", MFD1_TOKEN);
+ close(mfd1);
+
+ ksft_print_msg("[STAGE 2] Verifying contents of session '%s'...\n",
+ SESSION_FILES_2);
+
+ mfd2 = restore_and_verify_memfd(s_files2_fd, MFD2_TOKEN, MFD2_DATA);
+ if (mfd2 < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", MFD2_TOKEN);
+ close(mfd2);
+
+ mfd3 = restore_and_verify_memfd(s_files2_fd, MFD3_TOKEN, MFD3_DATA);
+ if (mfd3 < 0)
+ fail_exit("restore_and_verify_memfd for token %#x", MFD3_TOKEN);
+ close(mfd3);
+
+ ksft_print_msg("[STAGE 2] Test data verified successfully.\n");
+
+ ksft_print_msg("[STAGE 2] Finalizing all test sessions...\n");
+ if (luo_session_finish(s_empty1_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_EMPTY_1);
+ close(s_empty1_fd);
+
+ if (luo_session_finish(s_empty2_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_EMPTY_2);
+ close(s_empty2_fd);
+
+ if (luo_session_finish(s_files1_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_FILES_1);
+ close(s_files1_fd);
+
+ if (luo_session_finish(s_files2_fd) < 0)
+ fail_exit("luo_session_finish for '%s'", SESSION_FILES_2);
+ close(s_files2_fd);
+
+ ksft_print_msg("[STAGE 2] Finalizing state session...\n");
+ if (luo_session_finish(state_session_fd) < 0)
+ fail_exit("luo_session_finish for state session");
+ close(state_session_fd);
+
+ ksft_print_msg("\n--- MULTI-SESSION KEXEC TEST PASSED ---\n");
+}
+
+int main(int argc, char *argv[])
+{
+ return luo_test(argc, argv, STATE_SESSION_NAME,
+ run_stage_1, run_stage_2);
+}
diff --git a/tools/testing/selftests/liveupdate/luo_test_utils.c b/tools/testing/selftests/liveupdate/luo_test_utils.c
new file mode 100644
index 000000000000..3c8721c505df
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_test_utils.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <stdarg.h>
+
+#include "luo_test_utils.h"
+
+int luo_open_device(void)
+{
+ return open(LUO_DEVICE, O_RDWR);
+}
+
+int luo_create_session(int luo_fd, const char *name)
+{
+ struct liveupdate_ioctl_create_session arg = { .size = sizeof(arg) };
+
+ snprintf((char *)arg.name, LIVEUPDATE_SESSION_NAME_LENGTH, "%.*s",
+ LIVEUPDATE_SESSION_NAME_LENGTH - 1, name);
+
+ if (ioctl(luo_fd, LIVEUPDATE_IOCTL_CREATE_SESSION, &arg) < 0)
+ return -errno;
+
+ return arg.fd;
+}
+
+int luo_retrieve_session(int luo_fd, const char *name)
+{
+ struct liveupdate_ioctl_retrieve_session arg = { .size = sizeof(arg) };
+
+ snprintf((char *)arg.name, LIVEUPDATE_SESSION_NAME_LENGTH, "%.*s",
+ LIVEUPDATE_SESSION_NAME_LENGTH - 1, name);
+
+ if (ioctl(luo_fd, LIVEUPDATE_IOCTL_RETRIEVE_SESSION, &arg) < 0)
+ return -errno;
+
+ return arg.fd;
+}
+
+int create_and_preserve_memfd(int session_fd, int token, const char *data)
+{
+ struct liveupdate_session_preserve_fd arg = { .size = sizeof(arg) };
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ void *map = MAP_FAILED;
+ int mfd = -1, ret = -1;
+
+ mfd = memfd_create("test_mfd", 0);
+ if (mfd < 0)
+ return -errno;
+
+ if (ftruncate(mfd, page_size) != 0)
+ goto out;
+
+ map = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, mfd, 0);
+ if (map == MAP_FAILED)
+ goto out;
+
+ snprintf(map, page_size, "%s", data);
+ munmap(map, page_size);
+
+ arg.fd = mfd;
+ arg.token = token;
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_PRESERVE_FD, &arg) < 0)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret != 0 && errno != 0)
+ ret = -errno;
+ if (mfd >= 0)
+ close(mfd);
+ return ret;
+}
+
+int restore_and_verify_memfd(int session_fd, int token,
+ const char *expected_data)
+{
+ struct liveupdate_session_retrieve_fd arg = { .size = sizeof(arg) };
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ void *map = MAP_FAILED;
+ int mfd = -1, ret = -1;
+
+ arg.token = token;
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_RETRIEVE_FD, &arg) < 0)
+ return -errno;
+ mfd = arg.fd;
+
+ map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, mfd, 0);
+ if (map == MAP_FAILED)
+ goto out;
+
+ if (expected_data && strcmp(expected_data, map) != 0) {
+ ksft_print_msg("Data mismatch! Expected '%s', Got '%s'\n",
+ expected_data, (char *)map);
+ ret = -EINVAL;
+ goto out_munmap;
+ }
+
+ ret = mfd;
+out_munmap:
+ munmap(map, page_size);
+out:
+ if (ret < 0 && errno != 0)
+ ret = -errno;
+ if (ret < 0 && mfd >= 0)
+ close(mfd);
+ return ret;
+}
+
+int luo_session_finish(int session_fd)
+{
+ struct liveupdate_session_finish arg = { .size = sizeof(arg) };
+
+ if (ioctl(session_fd, LIVEUPDATE_SESSION_FINISH, &arg) < 0)
+ return -errno;
+
+ return 0;
+}
+
+void create_state_file(int luo_fd, const char *session_name, int token,
+ int next_stage)
+{
+ char buf[32];
+ int state_session_fd;
+
+ state_session_fd = luo_create_session(luo_fd, session_name);
+ if (state_session_fd < 0)
+ fail_exit("luo_create_session for state tracking");
+
+ snprintf(buf, sizeof(buf), "%d", next_stage);
+ if (create_and_preserve_memfd(state_session_fd, token, buf) < 0)
+ fail_exit("create_and_preserve_memfd for state tracking");
+
+ /*
+ * DO NOT close session FD, otherwise it is going to be unpreserved
+ */
+}
+
+void restore_and_read_stage(int state_session_fd, int token, int *stage)
+{
+ char buf[32] = {0};
+ int mfd;
+
+ mfd = restore_and_verify_memfd(state_session_fd, token, NULL);
+ if (mfd < 0)
+ fail_exit("failed to restore state memfd");
+
+ if (read(mfd, buf, sizeof(buf) - 1) < 0)
+ fail_exit("failed to read state mfd");
+
+ *stage = atoi(buf);
+
+ close(mfd);
+}
+
+void daemonize_and_wait(void)
+{
+ pid_t pid;
+
+ ksft_print_msg("[STAGE 1] Forking persistent child to hold sessions...\n");
+
+ pid = fork();
+ if (pid < 0)
+ fail_exit("fork failed");
+
+ if (pid > 0) {
+ ksft_print_msg("[STAGE 1] Child PID: %d. Resources are pinned.\n", pid);
+ ksft_print_msg("[STAGE 1] You may now perform kexec reboot.\n");
+ exit(EXIT_SUCCESS);
+ }
+
+ /* Detach from terminal so closing the window doesn't kill us */
+ if (setsid() < 0)
+ fail_exit("setsid failed");
+
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+
+ /* Change dir to root to avoid locking filesystems */
+ if (chdir("/") < 0)
+ exit(EXIT_FAILURE);
+
+ while (1)
+ sleep(60);
+}
+
+static int parse_stage_args(int argc, char *argv[])
+{
+ static struct option long_options[] = {
+ {"stage", required_argument, 0, 's'},
+ {0, 0, 0, 0}
+ };
+ int option_index = 0;
+ int stage = 1;
+ int opt;
+
+ optind = 1;
+ while ((opt = getopt_long(argc, argv, "s:", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 's':
+ stage = atoi(optarg);
+ if (stage != 1 && stage != 2)
+ fail_exit("Invalid stage argument");
+ break;
+ default:
+ fail_exit("Unknown argument");
+ }
+ }
+ return stage;
+}
+
+int luo_test(int argc, char *argv[],
+ const char *state_session_name,
+ luo_test_stage1_fn stage1,
+ luo_test_stage2_fn stage2)
+{
+ int target_stage = parse_stage_args(argc, argv);
+ int luo_fd = luo_open_device();
+ int state_session_fd;
+ int detected_stage;
+
+ if (luo_fd < 0) {
+ ksft_exit_skip("Failed to open %s. Is the luo module loaded?\n",
+ LUO_DEVICE);
+ }
+
+ state_session_fd = luo_retrieve_session(luo_fd, state_session_name);
+ if (state_session_fd == -ENOENT)
+ detected_stage = 1;
+ else if (state_session_fd >= 0)
+ detected_stage = 2;
+ else
+ fail_exit("Failed to check for state session");
+
+ if (target_stage != detected_stage) {
+ ksft_exit_fail_msg("Stage mismatch Requested --stage %d, but system is in stage %d.\n"
+ "(State session %s: %s)\n",
+ target_stage, detected_stage, state_session_name,
+ (detected_stage == 2) ? "EXISTS" : "MISSING");
+ }
+
+ if (target_stage == 1)
+ stage1(luo_fd);
+ else
+ stage2(luo_fd, state_session_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/liveupdate/luo_test_utils.h b/tools/testing/selftests/liveupdate/luo_test_utils.h
new file mode 100644
index 000000000000..90099bf49577
--- /dev/null
+++ b/tools/testing/selftests/liveupdate/luo_test_utils.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2025, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ *
+ * Utility functions for LUO kselftests.
+ */
+
+#ifndef LUO_TEST_UTILS_H
+#define LUO_TEST_UTILS_H
+
+#include <errno.h>
+#include <string.h>
+#include <linux/liveupdate.h>
+#include "../kselftest.h"
+
+#define LUO_DEVICE "/dev/liveupdate"
+
+#define fail_exit(fmt, ...) \
+ ksft_exit_fail_msg("[%s:%d] " fmt " (errno: %s)\n", \
+ __func__, __LINE__, ##__VA_ARGS__, strerror(errno))
+
+int luo_open_device(void);
+int luo_create_session(int luo_fd, const char *name);
+int luo_retrieve_session(int luo_fd, const char *name);
+int luo_session_finish(int session_fd);
+
+int create_and_preserve_memfd(int session_fd, int token, const char *data);
+int restore_and_verify_memfd(int session_fd, int token, const char *expected_data);
+
+void create_state_file(int luo_fd, const char *session_name, int token,
+ int next_stage);
+void restore_and_read_stage(int state_session_fd, int token, int *stage);
+
+void daemonize_and_wait(void);
+
+typedef void (*luo_test_stage1_fn)(int luo_fd);
+typedef void (*luo_test_stage2_fn)(int luo_fd, int state_session_fd);
+
+int luo_test(int argc, char *argv[], const char *state_session_name,
+ luo_test_stage1_fn stage1, luo_test_stage2_fn stage2);
+
+#endif /* LUO_TEST_UTILS_H */
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 7afe05e8c4d7..bd09fdaf53e0 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -2,7 +2,7 @@ CONFIG_LKDTM=y
CONFIG_DEBUG_LIST=y
CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_FORTIFY_SOURCE=y
-CONFIG_GCC_PLUGIN_STACKLEAK=y
+CONFIG_KSTACK_ERASE=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
CONFIG_INIT_ON_FREE_DEFAULT_ON=y
diff --git a/tools/testing/selftests/lsm/lsm_get_self_attr_test.c b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
index df215e4aa63f..60caf8528f81 100644
--- a/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
+++ b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
@@ -13,7 +13,7 @@
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "common.h"
static struct lsm_ctx *next_ctx(struct lsm_ctx *ctxp)
diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c
index 1cc8a977c711..54d59044ace1 100644
--- a/tools/testing/selftests/lsm/lsm_list_modules_test.c
+++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c
@@ -12,7 +12,7 @@
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "common.h"
TEST(size_null_lsm_list_modules)
diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
index 732e89fe99c0..dcb6f8aa772e 100644
--- a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
+++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
@@ -12,7 +12,7 @@
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "common.h"
TEST(ctx_null_lsm_set_self_attr)
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
index 93183a37b133..4396bf2273a4 100644
--- a/tools/testing/selftests/media_tests/media_device_open.c
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -34,7 +34,7 @@
#include <sys/stat.h>
#include <linux/media.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
index 4b9953359e40..6e4a8090a0eb 100644
--- a/tools/testing/selftests/media_tests/media_device_test.c
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -39,7 +39,7 @@
#include <time.h>
#include <linux/media.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/membarrier/membarrier_test_impl.h b/tools/testing/selftests/membarrier/membarrier_test_impl.h
index af89855adb7b..f6d7c44b2288 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_impl.h
+++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h
@@ -7,7 +7,7 @@
#include <string.h>
#include <pthread.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static int registrations;
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index e949a43a6145..cdd022c1c497 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -15,8 +15,8 @@
#include <string.h>
#include <fcntl.h>
-#include "../kselftest.h"
-#include "../kselftest_harness.h"
+#include "kselftest.h"
+#include "kselftest_harness.h"
/* Default test file size: 4MB */
#define MB (1UL << 20)
@@ -261,9 +261,6 @@ TEST(check_file_mmap)
TH_LOG("No read-ahead pages found in memory");
}
- EXPECT_LT(i, vec_size) {
- TH_LOG("Read-ahead pages reached the end of the file");
- }
/*
* End of the readahead window. The rest of the pages shouldn't
* be in memory.
@@ -286,8 +283,7 @@ out_free:
/*
* Test mincore() behavior on a page backed by a tmpfs file. This test
- * performs the same steps as the previous one. However, we don't expect
- * any readahead in this case.
+ * performs the same steps as the previous one.
*/
TEST(check_tmpfs_mmap)
{
@@ -298,7 +294,6 @@ TEST(check_tmpfs_mmap)
int page_size;
int fd;
int i;
- int ra_pages = 0;
page_size = sysconf(_SC_PAGESIZE);
vec_size = FILE_SIZE / page_size;
@@ -341,8 +336,7 @@ TEST(check_tmpfs_mmap)
}
/*
- * Touch a page in the middle of the mapping. We expect only
- * that page to be fetched into memory.
+ * Touch a page in the middle of the mapping.
*/
addr[FILE_SIZE / 2] = 1;
retval = mincore(addr, FILE_SIZE, vec);
@@ -351,15 +345,6 @@ TEST(check_tmpfs_mmap)
TH_LOG("Page not found in memory after use");
}
- i = FILE_SIZE / 2 / page_size + 1;
- while (i < vec_size && vec[i]) {
- ra_pages++;
- i++;
- }
- ASSERT_EQ(ra_pages, 0) {
- TH_LOG("Read-ahead pages found in memory");
- }
-
munmap(addr, FILE_SIZE);
close(fd);
free(vec);
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index 121000c28c10..c2a8586e51a1 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -20,6 +20,8 @@ mremap_test
on-fault-limit
transhuge-stress
pagemap_ioctl
+pfnmap
+process_madv
*.tmp*
protection_keys
protection_keys_32
@@ -37,9 +39,6 @@ map_fixed_noreplace
write_to_hugetlbfs
hmm-tests
memfd_secret
-hugetlb_dio
-pkey_sighandler_tests_32
-pkey_sighandler_tests_64
soft-dirty
split_huge_page_test
ksm_tests
@@ -57,4 +56,7 @@ droppable
hugetlb_dio
pkey_sighandler_tests_32
pkey_sighandler_tests_64
-guard-pages
+guard-regions
+merge
+prctl_thp_disable
+rmap
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 63ce39d024bb..eaf9312097f7 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -34,6 +34,7 @@ endif
MAKEFLAGS += --no-builtin-rules
CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+CFLAGS += -Wunreachable-code
LDLIBS = -lrt -lpthread -lm
# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is
@@ -84,6 +85,9 @@ TEST_GEN_FILES += mremap_test
TEST_GEN_FILES += mseal_test
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += pagemap_ioctl
+TEST_GEN_FILES += pfnmap
+TEST_GEN_FILES += process_madv
+TEST_GEN_FILES += prctl_thp_disable
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += uffd-stress
@@ -97,7 +101,9 @@ TEST_GEN_FILES += hugetlb_fault_after_madv
TEST_GEN_FILES += hugetlb_madv_vs_map
TEST_GEN_FILES += hugetlb_dio
TEST_GEN_FILES += droppable
-TEST_GEN_FILES += guard-pages
+TEST_GEN_FILES += guard-regions
+TEST_GEN_FILES += merge
+TEST_GEN_FILES += rmap
ifneq ($(ARCH),arm64)
TEST_GEN_FILES += soft-dirty
@@ -225,6 +231,8 @@ $(OUTPUT)/ksm_tests: LDLIBS += -lnuma
$(OUTPUT)/migration: LDLIBS += -lnuma
+$(OUTPUT)/rmap: LDLIBS += -lnuma
+
local_config.mk local_config.h: check_config.sh
/bin/sh ./check_config.sh $(CC)
diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index 67df7b47087f..e1fe16bcbbe8 100755
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -29,7 +29,7 @@ fi
if [[ $cgroup2 ]]; then
cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
if [[ -z "$cgroup_path" ]]; then
- cgroup_path=/dev/cgroup/memory
+ cgroup_path=$(mktemp -d)
mount -t cgroup2 none $cgroup_path
do_umount=1
fi
@@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then
else
cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
if [[ -z "$cgroup_path" ]]; then
- cgroup_path=/dev/cgroup/memory
+ cgroup_path=$(mktemp -d)
mount -t cgroup memory,hugetlb $cgroup_path
do_umount=1
fi
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index 2c3a0eb6b22d..30209c40b697 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -16,7 +16,7 @@
#include <unistd.h>
#include <string.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define MAP_SIZE_MB 100
#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024)
@@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
int compaction_index = 0;
char nr_hugepages[20] = {0};
char init_nr_hugepages[24] = {0};
+ char target_nr_hugepages[24] = {0};
+ int slen;
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
"%lu", initial_nr_hugepages);
@@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
goto out;
}
- /* Request a large number of huge pages. The Kernel will allocate
- as much as it can */
- if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
- ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
+ /*
+ * Request huge pages for about half of the free memory. The Kernel
+ * will allocate as much as it can, and we expect it will get at least 1/3
+ */
+ nr_hugepages_ul = mem_free / hugepage_size / 2;
+ snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
+ "%lu", nr_hugepages_ul);
+
+ slen = strlen(target_nr_hugepages);
+ if (write(fd, target_nr_hugepages, slen) != slen) {
+ ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
+ nr_hugepages_ul, strerror(errno));
goto close_fd;
}
diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config
index a28baa536332..deba93379c80 100644
--- a/tools/testing/selftests/mm/config
+++ b/tools/testing/selftests/mm/config
@@ -8,3 +8,6 @@ CONFIG_GUP_TEST=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ANON_VMA_NAME=y
+CONFIG_FTRACE=y
+CONFIG_PROFILING=y
+CONFIG_UPROBES=y
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index 9446673645eb..accfd198dbda 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -27,7 +27,7 @@
#endif /* LOCAL_CONFIG_HAVE_LIBURING */
#include "../../../../mm/gup_test.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#include "thp_settings.h"
@@ -41,11 +41,6 @@ static size_t hugetlbsizes[10];
static int gup_fd;
static bool has_huge_zeropage;
-static int sz2ord(size_t size)
-{
- return __builtin_ctzll(size / pagesize);
-}
-
static int detect_thp_sizes(size_t sizes[], int max)
{
int count = 0;
@@ -57,7 +52,7 @@ static int detect_thp_sizes(size_t sizes[], int max)
if (!pmdsize)
return 0;
- orders = 1UL << sz2ord(pmdsize);
+ orders = 1UL << sz2ord(pmdsize, pagesize);
orders |= thp_supported_orders();
for (i = 0; orders && count < max; i++) {
@@ -72,31 +67,6 @@ static int detect_thp_sizes(size_t sizes[], int max)
return count;
}
-static void detect_huge_zeropage(void)
-{
- int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
- O_RDONLY);
- size_t enabled = 0;
- char buf[15];
- int ret;
-
- if (fd < 0)
- return;
-
- ret = pread(fd, buf, sizeof(buf), 0);
- if (ret > 0 && ret < sizeof(buf)) {
- buf[ret] = 0;
-
- enabled = strtoul(buf, NULL, 10);
- if (enabled == 1) {
- has_huge_zeropage = true;
- ksft_print_msg("[INFO] huge zeropage is enabled\n");
- }
- }
-
- close(fd);
-}
-
static bool range_is_swapped(void *addr, size_t size)
{
for (; size; addr += pagesize, size -= pagesize)
@@ -112,9 +82,12 @@ struct comm_pipes {
static int setup_comm_pipes(struct comm_pipes *comm_pipes)
{
- if (pipe(comm_pipes->child_ready) < 0)
+ if (pipe(comm_pipes->child_ready) < 0) {
+ ksft_perror("pipe() failed");
return -errno;
+ }
if (pipe(comm_pipes->parent_ready) < 0) {
+ ksft_perror("pipe() failed");
close(comm_pipes->child_ready[0]);
close(comm_pipes->child_ready[1]);
return -errno;
@@ -207,13 +180,14 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
exit(fn(mem, size, &comm_pipes));
@@ -228,9 +202,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
* write-faults by directly mapping pages writable.
*/
ret = mprotect(mem, size, PROT_READ);
- ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+
+ ret = mprotect(mem, size, PROT_READ|PROT_WRITE);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
goto close_comm_pipes;
@@ -248,16 +231,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
ret = -EINVAL;
if (!ret) {
- ksft_test_result_pass("No leak from parent into child\n");
+ log_test_result(KSFT_PASS);
} else if (xfail) {
/*
* With hugetlb, some vmsplice() tests are currently expected to
* fail because (a) harder to fix and (b) nobody really cares.
* Flag them as expected failure for now.
*/
- ksft_test_result_xfail("Leak from parent into child\n");
+ ksft_print_msg("Leak from parent into child\n");
+ log_test_result(KSFT_XFAIL);
} else {
- ksft_test_result_fail("Leak from parent into child\n");
+ ksft_print_msg("Leak from parent into child\n");
+ log_test_result(KSFT_FAIL);
}
close_comm_pipes:
close_comm_pipes(&comm_pipes);
@@ -293,7 +278,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
.iov_base = mem,
.iov_len = size,
};
- ssize_t cur, total, transferred;
+ ssize_t cur, total, transferred = 0;
struct comm_pipes comm_pipes;
char *old, *new;
int ret, fds[2];
@@ -306,26 +291,29 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
goto free;
}
if (pipe(fds) < 0) {
- ksft_test_result_fail("pipe() failed\n");
+ ksft_perror("pipe() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
if (before_fork) {
transferred = vmsplice(fds[1], &iov, 1, 0);
if (transferred <= 0) {
- ksft_test_result_fail("vmsplice() failed\n");
+ ksft_perror("vmsplice() failed\n");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
}
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed\n");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
} else if (!ret) {
write(comm_pipes.child_ready[1], "0", 1);
@@ -339,7 +327,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
if (!before_fork) {
transferred = vmsplice(fds[1], &iov, 1, 0);
if (transferred <= 0) {
- ksft_test_result_fail("vmsplice() failed\n");
+ ksft_perror("vmsplice() failed");
+ log_test_result(KSFT_FAIL);
wait(&ret);
goto close_pipe;
}
@@ -348,7 +337,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
;
if (munmap(mem, size) < 0) {
- ksft_test_result_fail("munmap() failed\n");
+ ksft_perror("munmap() failed");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
write(comm_pipes.parent_ready[1], "0", 1);
@@ -356,7 +346,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
/* Wait until the child is done writing. */
wait(&ret);
if (!WIFEXITED(ret)) {
- ksft_test_result_fail("wait() failed\n");
+ ksft_perror("wait() failed");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
@@ -364,22 +355,25 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
for (total = 0; total < transferred; total += cur) {
cur = read(fds[0], new + total, transferred - total);
if (cur < 0) {
- ksft_test_result_fail("read() failed\n");
+ ksft_perror("read() failed");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
}
if (!memcmp(old, new, transferred)) {
- ksft_test_result_pass("No leak from child into parent\n");
+ log_test_result(KSFT_PASS);
} else if (xfail) {
/*
* With hugetlb, some vmsplice() tests are currently expected to
* fail because (a) harder to fix and (b) nobody really cares.
* Flag them as expected failure for now.
*/
- ksft_test_result_xfail("Leak from child into parent\n");
+ ksft_print_msg("Leak from child into parent\n");
+ log_test_result(KSFT_XFAIL);
} else {
- ksft_test_result_fail("Leak from child into parent\n");
+ ksft_print_msg("Leak from child into parent\n");
+ log_test_result(KSFT_FAIL);
}
close_pipe:
close(fds[0]);
@@ -416,13 +410,14 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed\n");
+ ksft_perror("tmpfile() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
fd = fileno(file);
@@ -430,14 +425,16 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
tmp = malloc(size);
if (!tmp) {
- ksft_test_result_fail("malloc() failed\n");
+ ksft_print_msg("malloc() failed\n");
+ log_test_result(KSFT_FAIL);
goto close_file;
}
/* Skip on errors, as we might just lack kernel support. */
ret = io_uring_queue_init(1, &ring, 0);
if (ret < 0) {
- ksft_test_result_skip("io_uring_queue_init() failed\n");
+ ksft_print_msg("io_uring_queue_init() failed\n");
+ log_test_result(KSFT_SKIP);
goto free_tmp;
}
@@ -452,7 +449,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
iov.iov_len = size;
ret = io_uring_register_buffers(&ring, &iov, 1);
if (ret) {
- ksft_test_result_skip("io_uring_register_buffers() failed\n");
+ ksft_print_msg("io_uring_register_buffers() failed\n");
+ log_test_result(KSFT_SKIP);
goto queue_exit;
}
@@ -463,7 +461,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
*/
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto unregister_buffers;
} else if (!ret) {
write(comm_pipes.child_ready[1], "0", 1);
@@ -483,10 +482,17 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
* if the page is mapped R/O vs. R/W).
*/
ret = mprotect(mem, size, PROT_READ);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto unregister_buffers;
+ }
+
clear_softdirty();
- ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
+ ret = mprotect(mem, size, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto unregister_buffers;
}
}
@@ -498,25 +504,29 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
memset(mem, 0xff, size);
sqe = io_uring_get_sqe(&ring);
if (!sqe) {
- ksft_test_result_fail("io_uring_get_sqe() failed\n");
+ ksft_print_msg("io_uring_get_sqe() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
ret = io_uring_submit(&ring);
if (ret < 0) {
- ksft_test_result_fail("io_uring_submit() failed\n");
+ ksft_print_msg("io_uring_submit() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret < 0) {
- ksft_test_result_fail("io_uring_wait_cqe() failed\n");
+ ksft_print_msg("io_uring_wait_cqe() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
if (cqe->res != size) {
- ksft_test_result_fail("write_fixed failed\n");
+ ksft_print_msg("write_fixed failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
io_uring_cqe_seen(&ring, cqe);
@@ -526,15 +536,20 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
while (total < size) {
cur = pread(fd, tmp + total, size - total, total);
if (cur < 0) {
- ksft_test_result_fail("pread() failed\n");
+ ksft_perror("pread() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
total += cur;
}
/* Finally, check if we read what we expected. */
- ksft_test_result(!memcmp(mem, tmp, size),
- "Longterm R/W pin is reliable\n");
+ if (!memcmp(mem, tmp, size)) {
+ log_test_result(KSFT_PASS);
+ } else {
+ ksft_print_msg("Longtom R/W pin is not reliable\n");
+ log_test_result(KSFT_FAIL);
+ }
quit_child:
if (use_fork) {
@@ -582,19 +597,21 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
int ret;
if (gup_fd < 0) {
- ksft_test_result_skip("gup_test not available\n");
+ ksft_print_msg("gup_test not available\n");
+ log_test_result(KSFT_SKIP);
return;
}
tmp = malloc(size);
if (!tmp) {
- ksft_test_result_fail("malloc() failed\n");
+ ksft_perror("malloc() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
goto free_tmp;
}
@@ -609,7 +626,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
*/
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
write(comm_pipes.child_ready[1], "0", 1);
@@ -646,7 +664,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
clear_softdirty();
ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
break;
@@ -661,9 +680,11 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
if (ret) {
if (errno == EINVAL)
- ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+ ret = KSFT_SKIP;
else
- ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+ ret = KSFT_FAIL;
+ ksft_perror("PIN_LONGTERM_TEST_START failed");
+ log_test_result(ret);
goto wait;
}
@@ -676,22 +697,28 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
*/
tmp_val = (__u64)(uintptr_t)tmp;
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
- if (ret)
- ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
- else
- ksft_test_result(!memcmp(mem, tmp, size),
- "Longterm R/O pin is reliable\n");
+ if (ret) {
+ ksft_perror("PIN_LONGTERM_TEST_READ failed");
+ log_test_result(KSFT_FAIL);
+ } else {
+ if (!memcmp(mem, tmp, size)) {
+ log_test_result(KSFT_PASS);
+ } else {
+ ksft_print_msg("Longterm R/O pin is not reliable\n");
+ log_test_result(KSFT_FAIL);
+ }
+ }
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
if (ret)
- ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+ ksft_perror("PIN_LONGTERM_TEST_STOP failed");
wait:
switch (test) {
case RO_PIN_TEST_SHARED:
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
if (!WIFEXITED(ret))
- ksft_print_msg("[INFO] wait() failed\n");
+ ksft_perror("wait() failed");
break;
default:
break;
@@ -746,14 +773,16 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
/* Ignore if not around on a kernel. */
if (ret && errno != EINVAL) {
- ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+ ksft_perror("MADV_NOHUGEPAGE failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -763,7 +792,8 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
if (swapout) {
madvise(mem, pagesize, MADV_PAGEOUT);
if (!pagemap_is_swapped(pagemap_fd, mem)) {
- ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
}
@@ -775,13 +805,13 @@ munmap:
static void run_with_base_page(test_fn fn, const char *desc)
{
- ksft_print_msg("[RUN] %s ... with base page\n", desc);
+ log_test_start("%s ... with base page", desc);
do_run_with_base_page(fn, false);
}
static void run_with_base_page_swap(test_fn fn, const char *desc)
{
- ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
+ log_test_start("%s ... with swapped out base page", desc);
do_run_with_base_page(fn, true);
}
@@ -807,7 +837,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
@@ -816,7 +847,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
ret = madvise(mem, thpsize, MADV_HUGEPAGE);
if (ret) {
- ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+ ksft_perror("MADV_HUGEPAGE failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -826,7 +858,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
mem[0] = 1;
if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
- ksft_test_result_skip("Did not get a THP populated\n");
+ ksft_print_msg("Did not get a THP populated\n");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
memset(mem, 1, thpsize);
@@ -846,12 +879,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
ret = mprotect(mem + pagesize, pagesize, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
break;
@@ -863,7 +898,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
if (ret) {
- ksft_test_result_fail("MADV_DONTNEED failed\n");
+ ksft_perror("MADV_DONTNEED failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
size = pagesize;
@@ -876,14 +912,16 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
mremap_size = thpsize / 2;
mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ if (mremap_mem == MAP_FAILED) {
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
if (tmp != mremap_mem) {
- ksft_test_result_fail("mremap() failed\n");
+ ksft_perror("mremap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
size = mremap_size;
@@ -896,12 +934,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
if (ret) {
- ksft_test_result_fail("MADV_DONTFORK failed\n");
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
} else if (!ret) {
exit(0);
@@ -910,7 +950,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
/* Allow for sharing all pages again. */
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
if (ret) {
- ksft_test_result_fail("MADV_DOFORK failed\n");
+ ksft_perror("MADV_DOFORK failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
break;
@@ -924,7 +965,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
case THP_RUN_SINGLE_PTE_SWAPOUT:
madvise(mem, size, MADV_PAGEOUT);
if (!range_is_swapped(mem, size)) {
- ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
break;
@@ -941,56 +983,56 @@ munmap:
static void run_with_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n",
+ log_test_start("%s ... with THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PMD, size);
}
static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n",
+ log_test_start("%s ... with swapped-out THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
}
static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n",
+ log_test_start("%s ... with PTE-mapped THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PTE, size);
}
static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n",
+ log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
}
static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n",
+ log_test_start("%s ... with single PTE of THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
}
static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n",
+ log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
}
static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n",
+ log_test_start("%s ... with partially mremap()'ed THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
}
static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n",
+ log_test_start("%s ... with partially shared THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
}
@@ -1000,14 +1042,15 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
char *mem, *dummy;
- ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
+ log_test_start("%s ... with hugetlb (%zu kB)", desc,
hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
return;
}
@@ -1020,7 +1063,8 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
*/
dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
if (dummy == MAP_FAILED) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
munmap(dummy, hugetlbsize);
@@ -1167,8 +1211,8 @@ static void run_anon_test_case(struct test_case const *test_case)
size_t size = thpsizes[i];
struct thp_settings settings = *thp_current_settings();
- settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER;
- settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
+ settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER;
+ settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS;
thp_push_settings(&settings);
if (size == pmdsize) {
@@ -1226,7 +1270,7 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
@@ -1236,12 +1280,14 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
*/
ret = mprotect(mem + pagesize, pagesize, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
@@ -1250,8 +1296,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Collapse before actually COW-sharing the page. */
ret = madvise(mem, size, MADV_COLLAPSE);
if (ret) {
- ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
- strerror(errno));
+ ksft_perror("MADV_COLLAPSE failed");
+ log_test_result(KSFT_SKIP);
goto close_comm_pipes;
}
break;
@@ -1262,7 +1308,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Don't COW-share the upper part of the THP. */
ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
if (ret) {
- ksft_test_result_fail("MADV_DONTFORK failed\n");
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
break;
@@ -1270,7 +1317,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Don't COW-share the lower part of the THP. */
ret = madvise(mem, size / 2, MADV_DONTFORK);
if (ret) {
- ksft_test_result_fail("MADV_DONTFORK failed\n");
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
break;
@@ -1280,7 +1328,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
switch (test) {
@@ -1314,7 +1363,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
*/
ret = madvise(mem, size, MADV_DOFORK);
if (ret) {
- ksft_test_result_fail("MADV_DOFORK failed\n");
+ ksft_perror("MADV_DOFORK failed");
+ log_test_result(KSFT_FAIL);
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
goto close_comm_pipes;
@@ -1324,8 +1374,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Collapse before anyone modified the COW-shared page. */
ret = madvise(mem, size, MADV_COLLAPSE);
if (ret) {
- ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
- strerror(errno));
+ ksft_perror("MADV_COLLAPSE failed");
+ log_test_result(KSFT_SKIP);
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
goto close_comm_pipes;
@@ -1345,7 +1395,12 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
else
ret = -EINVAL;
- ksft_test_result(!ret, "No leak from parent into child\n");
+ if (!ret) {
+ log_test_result(KSFT_PASS);
+ } else {
+ ksft_print_msg("Leak from parent into child\n");
+ log_test_result(KSFT_FAIL);
+ }
close_comm_pipes:
close_comm_pipes(&comm_pipes);
}
@@ -1430,7 +1485,7 @@ static void run_anon_thp_test_cases(void)
for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
struct test_case const *test_case = &anon_thp_test_cases[i];
- ksft_print_msg("[RUN] %s\n", test_case->desc);
+ log_test_start("%s", test_case->desc);
do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
}
}
@@ -1453,8 +1508,12 @@ static void test_cow(char *mem, const char *smem, size_t size)
memset(mem, 0xff, size);
/* See if we still read the old values via the other mapping. */
- ksft_test_result(!memcmp(smem, old, size),
- "Other mapping not modified\n");
+ if (!memcmp(smem, old, size)) {
+ log_test_result(KSFT_PASS);
+ } else {
+ ksft_print_msg("Other mapping modified\n");
+ log_test_result(KSFT_FAIL);
+ }
free(old);
}
@@ -1470,26 +1529,28 @@ static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
{
- char *mem, *smem, tmp;
+ char *mem, *smem;
- ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
+ log_test_start("%s ... with shared zeropage", desc);
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, -1, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
/* Read from the page to populate the shared zeropage. */
- tmp = *mem + *smem;
- asm volatile("" : "+r" (tmp));
+ FORCE_READ(*mem);
+ FORCE_READ(*smem);
fn(mem, smem, pagesize);
munmap:
@@ -1500,14 +1561,15 @@ munmap:
static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
{
- char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
+ char *mem, *smem, *mmap_mem, *mmap_smem;
size_t mmap_size;
int ret;
- ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
+ log_test_start("%s ... with huge zeropage", desc);
if (!has_huge_zeropage) {
- ksft_test_result_skip("Huge zeropage not enabled\n");
+ ksft_print_msg("Huge zeropage not enabled\n");
+ log_test_result(KSFT_SKIP);
return;
}
@@ -1516,13 +1578,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
mmap_smem = mmap(NULL, mmap_size, PROT_READ,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1531,9 +1595,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
- ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
if (ret) {
- ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+ ksft_perror("madvise()");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
+ ret = madvise(smem, pmdsize, MADV_HUGEPAGE);
+ if (ret) {
+ ksft_perror("madvise()");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1542,8 +1612,8 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
* the first sub-page and test if we get another sub-page populated
* automatically.
*/
- tmp = *mem + *smem;
- asm volatile("" : "+r" (tmp));
+ FORCE_READ(mem);
+ FORCE_READ(smem);
if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
!pagemap_is_populated(pagemap_fd, smem + pagesize)) {
ksft_test_result_skip("Did not get THPs populated\n");
@@ -1559,38 +1629,42 @@ munmap:
static void run_with_memfd(non_anon_test_fn fn, const char *desc)
{
- char *mem, *smem, tmp;
+ char *mem, *smem;
int fd;
- ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+ log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0);
if (fd < 0) {
- ksft_test_result_fail("memfd_create() failed\n");
+ ksft_perror("memfd_create() failed");
+ log_test_result(KSFT_FAIL);
return;
}
/* File consists of a single page filled with zeroes. */
if (fallocate(fd, 0, 0, pagesize)) {
- ksft_test_result_fail("fallocate() failed\n");
+ ksft_perror("fallocate() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
/* Create a private mapping of the memfd. */
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
/* Fault the page in. */
- tmp = *mem + *smem;
- asm volatile("" : "+r" (tmp));
+ FORCE_READ(mem);
+ FORCE_READ(smem);
fn(mem, smem, pagesize);
munmap:
@@ -1603,45 +1677,50 @@ close:
static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
{
- char *mem, *smem, tmp;
+ char *mem, *smem;
FILE *file;
int fd;
- ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+ log_test_start("%s ... with tmpfile", desc);
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed\n");
+ ksft_perror("tmpfile() failed");
+ log_test_result(KSFT_FAIL);
return;
}
fd = fileno(file);
if (fd < 0) {
- ksft_test_result_skip("fileno() failed\n");
+ ksft_perror("fileno() failed");
+ log_test_result(KSFT_SKIP);
return;
}
/* File consists of a single page filled with zeroes. */
if (fallocate(fd, 0, 0, pagesize)) {
- ksft_test_result_fail("fallocate() failed\n");
+ ksft_perror("fallocate() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
/* Create a private mapping of the memfd. */
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
/* Fault the page in. */
- tmp = *mem + *smem;
- asm volatile("" : "+r" (tmp));
+ FORCE_READ(mem);
+ FORCE_READ(smem);
fn(mem, smem, pagesize);
munmap:
@@ -1656,23 +1735,25 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
size_t hugetlbsize)
{
int flags = MFD_HUGETLB;
- char *mem, *smem, tmp;
+ char *mem, *smem;
int fd;
- ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+ log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
fd = memfd_create("test", flags);
if (fd < 0) {
- ksft_test_result_skip("memfd_create() failed\n");
+ ksft_perror("memfd_create() failed");
+ log_test_result(KSFT_SKIP);
return;
}
/* File consists of a single page filled with zeroes. */
if (fallocate(fd, 0, 0, hugetlbsize)) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
goto close;
}
@@ -1680,18 +1761,20 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
0);
if (mem == MAP_FAILED) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
goto close;
}
smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
/* Fault the page in. */
- tmp = *mem + *smem;
- asm volatile("" : "+r" (tmp));
+ FORCE_READ(mem);
+ FORCE_READ(smem);
fn(mem, smem, hugetlbsize);
munmap:
@@ -1771,7 +1854,6 @@ static int tests_per_non_anon_test_case(void)
int main(int argc, char **argv)
{
- int err;
struct thp_settings default_settings;
ksft_print_header();
@@ -1781,7 +1863,7 @@ int main(int argc, char **argv)
if (pmdsize) {
/* Only if THP is supported. */
thp_read_settings(&default_settings);
- default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT;
+ default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT;
thp_save_settings();
thp_push_settings(&default_settings);
@@ -1791,7 +1873,7 @@ int main(int argc, char **argv)
}
nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
ARRAY_SIZE(hugetlbsizes));
- detect_huge_zeropage();
+ has_huge_zeropage = detect_huge_zeropage();
ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
@@ -1811,9 +1893,5 @@ int main(int argc, char **argv)
thp_restore_settings();
}
- err = ksft_get_fail_cnt();
- if (err)
- ksft_exit_fail_msg("%d out of %d tests failed\n",
- err, ksft_test_num());
- ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/droppable.c b/tools/testing/selftests/mm/droppable.c
index f3d9ecf96890..44940f75c461 100644
--- a/tools/testing/selftests/mm/droppable.c
+++ b/tools/testing/selftests/mm/droppable.c
@@ -13,7 +13,7 @@
#include <sys/mman.h>
#include <linux/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
int main(int argc, char *argv[])
{
diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-pages.c
deleted file mode 100644
index ece37212a8a2..000000000000
--- a/tools/testing/selftests/mm/guard-pages.c
+++ /dev/null
@@ -1,1320 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#define _GNU_SOURCE
-#include "../kselftest_harness.h"
-#include <asm-generic/mman.h> /* Force the import of the tools version. */
-#include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <linux/userfaultfd.h>
-#include <setjmp.h>
-#include <signal.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <sys/uio.h>
-#include <unistd.h>
-
-/*
- * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1:
- *
- * "If the signal occurs other than as the result of calling the abort or raise
- * function, the behavior is undefined if the signal handler refers to any
- * object with static storage duration other than by assigning a value to an
- * object declared as volatile sig_atomic_t"
- */
-static volatile sig_atomic_t signal_jump_set;
-static sigjmp_buf signal_jmp_buf;
-
-/*
- * Ignore the checkpatch warning, we must read from x but don't want to do
- * anything with it in order to trigger a read page fault. We therefore must use
- * volatile to stop the compiler from optimising this away.
- */
-#define FORCE_READ(x) (*(volatile typeof(x) *)x)
-
-static int userfaultfd(int flags)
-{
- return syscall(SYS_userfaultfd, flags);
-}
-
-static void handle_fatal(int c)
-{
- if (!signal_jump_set)
- return;
-
- siglongjmp(signal_jmp_buf, c);
-}
-
-static int pidfd_open(pid_t pid, unsigned int flags)
-{
- return syscall(SYS_pidfd_open, pid, flags);
-}
-
-static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
- size_t n, int advice, unsigned int flags)
-{
- return syscall(__NR_process_madvise, pidfd, iovec, n, advice, flags);
-}
-
-/*
- * Enable our signal catcher and try to read/write the specified buffer. The
- * return value indicates whether the read/write succeeds without a fatal
- * signal.
- */
-static bool try_access_buf(char *ptr, bool write)
-{
- bool failed;
-
- /* Tell signal handler to jump back here on fatal signal. */
- signal_jump_set = true;
- /* If a fatal signal arose, we will jump back here and failed is set. */
- failed = sigsetjmp(signal_jmp_buf, 0) != 0;
-
- if (!failed) {
- if (write)
- *ptr = 'x';
- else
- FORCE_READ(ptr);
- }
-
- signal_jump_set = false;
- return !failed;
-}
-
-/* Try and read from a buffer, return true if no fatal signal. */
-static bool try_read_buf(char *ptr)
-{
- return try_access_buf(ptr, false);
-}
-
-/* Try and write to a buffer, return true if no fatal signal. */
-static bool try_write_buf(char *ptr)
-{
- return try_access_buf(ptr, true);
-}
-
-/*
- * Try and BOTH read from AND write to a buffer, return true if BOTH operations
- * succeed.
- */
-static bool try_read_write_buf(char *ptr)
-{
- return try_read_buf(ptr) && try_write_buf(ptr);
-}
-
-FIXTURE(guard_pages)
-{
- unsigned long page_size;
-};
-
-FIXTURE_SETUP(guard_pages)
-{
- struct sigaction act = {
- .sa_handler = &handle_fatal,
- .sa_flags = SA_NODEFER,
- };
-
- sigemptyset(&act.sa_mask);
- if (sigaction(SIGSEGV, &act, NULL))
- ksft_exit_fail_perror("sigaction");
-
- self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
-};
-
-FIXTURE_TEARDOWN(guard_pages)
-{
- struct sigaction act = {
- .sa_handler = SIG_DFL,
- .sa_flags = SA_NODEFER,
- };
-
- sigemptyset(&act.sa_mask);
- sigaction(SIGSEGV, &act, NULL);
-}
-
-TEST_F(guard_pages, basic)
-{
- const unsigned long NUM_PAGES = 10;
- const unsigned long page_size = self->page_size;
- char *ptr;
- int i;
-
- ptr = mmap(NULL, NUM_PAGES * page_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANON, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Trivially assert we can touch the first page. */
- ASSERT_TRUE(try_read_write_buf(ptr));
-
- ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
-
- /* Establish that 1st page SIGSEGV's. */
- ASSERT_FALSE(try_read_write_buf(ptr));
-
- /* Ensure we can touch everything else.*/
- for (i = 1; i < NUM_PAGES; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- /* Establish a guard page at the end of the mapping. */
- ASSERT_EQ(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size,
- MADV_GUARD_INSTALL), 0);
-
- /* Check that both guard pages result in SIGSEGV. */
- ASSERT_FALSE(try_read_write_buf(ptr));
- ASSERT_FALSE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size]));
-
- /* Remove the first guard page. */
- ASSERT_FALSE(madvise(ptr, page_size, MADV_GUARD_REMOVE));
-
- /* Make sure we can touch it. */
- ASSERT_TRUE(try_read_write_buf(ptr));
-
- /* Remove the last guard page. */
- ASSERT_FALSE(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size,
- MADV_GUARD_REMOVE));
-
- /* Make sure we can touch it. */
- ASSERT_TRUE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size]));
-
- /*
- * Test setting a _range_ of pages, namely the first 3. The first of
- * these be faulted in, so this also tests that we can install guard
- * pages over backed pages.
- */
- ASSERT_EQ(madvise(ptr, 3 * page_size, MADV_GUARD_INSTALL), 0);
-
- /* Make sure they are all guard pages. */
- for (i = 0; i < 3; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Make sure the rest are not. */
- for (i = 3; i < NUM_PAGES; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- /* Remove guard pages. */
- ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0);
-
- /* Now make sure we can touch everything. */
- for (i = 0; i < NUM_PAGES; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- /*
- * Now remove all guard pages, make sure we don't remove existing
- * entries.
- */
- ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0);
-
- for (i = 0; i < NUM_PAGES * page_size; i += page_size) {
- char chr = ptr[i];
-
- ASSERT_EQ(chr, 'x');
- }
-
- ASSERT_EQ(munmap(ptr, NUM_PAGES * page_size), 0);
-}
-
-/* Assert that operations applied across multiple VMAs work as expected. */
-TEST_F(guard_pages, multi_vma)
-{
- const unsigned long page_size = self->page_size;
- char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3;
- int i;
-
- /* Reserve a 100 page region over which we can install VMAs. */
- ptr_region = mmap(NULL, 100 * page_size, PROT_NONE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr_region, MAP_FAILED);
-
- /* Place a VMA of 10 pages size at the start of the region. */
- ptr1 = mmap(ptr_region, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr1, MAP_FAILED);
-
- /* Place a VMA of 5 pages size 50 pages into the region. */
- ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr2, MAP_FAILED);
-
- /* Place a VMA of 20 pages size at the end of the region. */
- ptr3 = mmap(&ptr_region[80 * page_size], 20 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr3, MAP_FAILED);
-
- /* Unmap gaps. */
- ASSERT_EQ(munmap(&ptr_region[10 * page_size], 40 * page_size), 0);
- ASSERT_EQ(munmap(&ptr_region[55 * page_size], 25 * page_size), 0);
-
- /*
- * We end up with VMAs like this:
- *
- * 0 10 .. 50 55 .. 80 100
- * [---] [---] [---]
- */
-
- /*
- * Now mark the whole range as guard pages and make sure all VMAs are as
- * such.
- */
-
- /*
- * madvise() is certifiable and lets you perform operations over gaps,
- * everything works, but it indicates an error and errno is set to
- * -ENOMEM. Also if anything runs out of memory it is set to
- * -ENOMEM. You are meant to guess which is which.
- */
- ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), -1);
- ASSERT_EQ(errno, ENOMEM);
-
- for (i = 0; i < 10; i++) {
- char *curr = &ptr1[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- for (i = 0; i < 5; i++) {
- char *curr = &ptr2[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- for (i = 0; i < 20; i++) {
- char *curr = &ptr3[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Now remove guar pages over range and assert the opposite. */
-
- ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), -1);
- ASSERT_EQ(errno, ENOMEM);
-
- for (i = 0; i < 10; i++) {
- char *curr = &ptr1[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- for (i = 0; i < 5; i++) {
- char *curr = &ptr2[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- for (i = 0; i < 20; i++) {
- char *curr = &ptr3[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- /* Now map incompatible VMAs in the gaps. */
- ptr = mmap(&ptr_region[10 * page_size], 40 * page_size,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
- ptr = mmap(&ptr_region[55 * page_size], 25 * page_size,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /*
- * We end up with VMAs like this:
- *
- * 0 10 .. 50 55 .. 80 100
- * [---][xxxx][---][xxxx][---]
- *
- * Where 'x' signifies VMAs that cannot be merged with those adjacent to
- * them.
- */
-
- /* Multiple VMAs adjacent to one another should result in no error. */
- ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), 0);
- for (i = 0; i < 100; i++) {
- char *curr = &ptr_region[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
- ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), 0);
- for (i = 0; i < 100; i++) {
- char *curr = &ptr_region[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr_region, 100 * page_size), 0);
-}
-
-/*
- * Assert that batched operations performed using process_madvise() work as
- * expected.
- */
-TEST_F(guard_pages, process_madvise)
-{
- const unsigned long page_size = self->page_size;
- pid_t pid = getpid();
- int pidfd = pidfd_open(pid, 0);
- char *ptr_region, *ptr1, *ptr2, *ptr3;
- ssize_t count;
- struct iovec vec[6];
-
- ASSERT_NE(pidfd, -1);
-
- /* Reserve region to map over. */
- ptr_region = mmap(NULL, 100 * page_size, PROT_NONE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr_region, MAP_FAILED);
-
- /*
- * 10 pages offset 1 page into reserve region. We MAP_POPULATE so we
- * overwrite existing entries and test this code path against
- * overwriting existing entries.
- */
- ptr1 = mmap(&ptr_region[page_size], 10 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE | MAP_POPULATE, -1, 0);
- ASSERT_NE(ptr1, MAP_FAILED);
- /* We want guard markers at start/end of each VMA. */
- vec[0].iov_base = ptr1;
- vec[0].iov_len = page_size;
- vec[1].iov_base = &ptr1[9 * page_size];
- vec[1].iov_len = page_size;
-
- /* 5 pages offset 50 pages into reserve region. */
- ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr2, MAP_FAILED);
- vec[2].iov_base = ptr2;
- vec[2].iov_len = page_size;
- vec[3].iov_base = &ptr2[4 * page_size];
- vec[3].iov_len = page_size;
-
- /* 20 pages offset 79 pages into reserve region. */
- ptr3 = mmap(&ptr_region[79 * page_size], 20 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr3, MAP_FAILED);
- vec[4].iov_base = ptr3;
- vec[4].iov_len = page_size;
- vec[5].iov_base = &ptr3[19 * page_size];
- vec[5].iov_len = page_size;
-
- /* Free surrounding VMAs. */
- ASSERT_EQ(munmap(ptr_region, page_size), 0);
- ASSERT_EQ(munmap(&ptr_region[11 * page_size], 39 * page_size), 0);
- ASSERT_EQ(munmap(&ptr_region[55 * page_size], 24 * page_size), 0);
- ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0);
-
- /* Now guard in one step. */
- count = sys_process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0);
-
- /* OK we don't have permission to do this, skip. */
- if (count == -1 && errno == EPERM)
- ksft_exit_skip("No process_madvise() permissions, try running as root.\n");
-
- /* Returns the number of bytes advised. */
- ASSERT_EQ(count, 6 * page_size);
-
- /* Now make sure the guarding was applied. */
-
- ASSERT_FALSE(try_read_write_buf(ptr1));
- ASSERT_FALSE(try_read_write_buf(&ptr1[9 * page_size]));
-
- ASSERT_FALSE(try_read_write_buf(ptr2));
- ASSERT_FALSE(try_read_write_buf(&ptr2[4 * page_size]));
-
- ASSERT_FALSE(try_read_write_buf(ptr3));
- ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size]));
-
- /* Now do the same with unguard... */
- count = sys_process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0);
-
- /* ...and everything should now succeed. */
-
- ASSERT_TRUE(try_read_write_buf(ptr1));
- ASSERT_TRUE(try_read_write_buf(&ptr1[9 * page_size]));
-
- ASSERT_TRUE(try_read_write_buf(ptr2));
- ASSERT_TRUE(try_read_write_buf(&ptr2[4 * page_size]));
-
- ASSERT_TRUE(try_read_write_buf(ptr3));
- ASSERT_TRUE(try_read_write_buf(&ptr3[19 * page_size]));
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr1, 10 * page_size), 0);
- ASSERT_EQ(munmap(ptr2, 5 * page_size), 0);
- ASSERT_EQ(munmap(ptr3, 20 * page_size), 0);
- close(pidfd);
-}
-
-/* Assert that unmapping ranges does not leave guard markers behind. */
-TEST_F(guard_pages, munmap)
-{
- const unsigned long page_size = self->page_size;
- char *ptr, *ptr_new1, *ptr_new2;
-
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Guard first and last pages. */
- ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
- ASSERT_EQ(madvise(&ptr[9 * page_size], page_size, MADV_GUARD_INSTALL), 0);
-
- /* Assert that they are guarded. */
- ASSERT_FALSE(try_read_write_buf(ptr));
- ASSERT_FALSE(try_read_write_buf(&ptr[9 * page_size]));
-
- /* Unmap them. */
- ASSERT_EQ(munmap(ptr, page_size), 0);
- ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0);
-
- /* Map over them.*/
- ptr_new1 = mmap(ptr, page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr_new1, MAP_FAILED);
- ptr_new2 = mmap(&ptr[9 * page_size], page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr_new2, MAP_FAILED);
-
- /* Assert that they are now not guarded. */
- ASSERT_TRUE(try_read_write_buf(ptr_new1));
- ASSERT_TRUE(try_read_write_buf(ptr_new2));
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/* Assert that mprotect() operations have no bearing on guard markers. */
-TEST_F(guard_pages, mprotect)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
- int i;
-
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Guard the middle of the range. */
- ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size,
- MADV_GUARD_INSTALL), 0);
-
- /* Assert that it is indeed guarded. */
- ASSERT_FALSE(try_read_write_buf(&ptr[5 * page_size]));
- ASSERT_FALSE(try_read_write_buf(&ptr[6 * page_size]));
-
- /* Now make these pages read-only. */
- ASSERT_EQ(mprotect(&ptr[5 * page_size], 2 * page_size, PROT_READ), 0);
-
- /* Make sure the range is still guarded. */
- ASSERT_FALSE(try_read_buf(&ptr[5 * page_size]));
- ASSERT_FALSE(try_read_buf(&ptr[6 * page_size]));
-
- /* Make sure we can guard again without issue.*/
- ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size,
- MADV_GUARD_INSTALL), 0);
-
- /* Make sure the range is, yet again, still guarded. */
- ASSERT_FALSE(try_read_buf(&ptr[5 * page_size]));
- ASSERT_FALSE(try_read_buf(&ptr[6 * page_size]));
-
- /* Now unguard the whole range. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
-
- /* Make sure the whole range is readable. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_buf(curr));
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/* Split and merge VMAs and make sure guard pages still behave. */
-TEST_F(guard_pages, split_merge)
-{
- const unsigned long page_size = self->page_size;
- char *ptr, *ptr_new;
- int i;
-
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Guard the whole range. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
-
- /* Make sure the whole range is guarded. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Now unmap some pages in the range so we split. */
- ASSERT_EQ(munmap(&ptr[2 * page_size], page_size), 0);
- ASSERT_EQ(munmap(&ptr[5 * page_size], page_size), 0);
- ASSERT_EQ(munmap(&ptr[8 * page_size], page_size), 0);
-
- /* Make sure the remaining ranges are guarded post-split. */
- for (i = 0; i < 2; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
- for (i = 2; i < 5; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
- for (i = 6; i < 8; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
- for (i = 9; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Now map them again - the unmap will have cleared the guards. */
- ptr_new = mmap(&ptr[2 * page_size], page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr_new, MAP_FAILED);
- ptr_new = mmap(&ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr_new, MAP_FAILED);
- ptr_new = mmap(&ptr[8 * page_size], page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr_new, MAP_FAILED);
-
- /* Now make sure guard pages are established. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
- bool result = try_read_write_buf(curr);
- bool expect_true = i == 2 || i == 5 || i == 8;
-
- ASSERT_TRUE(expect_true ? result : !result);
- }
-
- /* Now guard everything again. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
-
- /* Make sure the whole range is guarded. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Now split the range into three. */
- ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
- ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0);
-
- /* Make sure the whole range is guarded for read. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_buf(curr));
- }
-
- /* Now reset protection bits so we merge the whole thing. */
- ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0);
- ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size,
- PROT_READ | PROT_WRITE), 0);
-
- /* Make sure the whole range is still guarded. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Split range into 3 again... */
- ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
- ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0);
-
- /* ...and unguard the whole range. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
-
- /* Make sure the whole range is remedied for read. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_buf(curr));
- }
-
- /* Merge them again. */
- ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0);
- ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size,
- PROT_READ | PROT_WRITE), 0);
-
- /* Now ensure the merged range is remedied for read/write. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/* Assert that MADV_DONTNEED does not remove guard markers. */
-TEST_F(guard_pages, dontneed)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
- int i;
-
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Back the whole range. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- *curr = 'y';
- }
-
- /* Guard every other page. */
- for (i = 0; i < 10; i += 2) {
- char *curr = &ptr[i * page_size];
- int res = madvise(curr, page_size, MADV_GUARD_INSTALL);
-
- ASSERT_EQ(res, 0);
- }
-
- /* Indicate that we don't need any of the range. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_DONTNEED), 0);
-
- /* Check to ensure guard markers are still in place. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
- bool result = try_read_buf(curr);
-
- if (i % 2 == 0) {
- ASSERT_FALSE(result);
- } else {
- ASSERT_TRUE(result);
- /* Make sure we really did get reset to zero page. */
- ASSERT_EQ(*curr, '\0');
- }
-
- /* Now write... */
- result = try_write_buf(&ptr[i * page_size]);
-
- /* ...and make sure same result. */
- ASSERT_TRUE(i % 2 != 0 ? result : !result);
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/* Assert that mlock()'ed pages work correctly with guard markers. */
-TEST_F(guard_pages, mlock)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
- int i;
-
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Populate. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- *curr = 'y';
- }
-
- /* Lock. */
- ASSERT_EQ(mlock(ptr, 10 * page_size), 0);
-
- /* Now try to guard, should fail with EINVAL. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), -1);
- ASSERT_EQ(errno, EINVAL);
-
- /* OK unlock. */
- ASSERT_EQ(munlock(ptr, 10 * page_size), 0);
-
- /* Guard first half of range, should now succeed. */
- ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
-
- /* Make sure guard works. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
- bool result = try_read_write_buf(curr);
-
- if (i < 5) {
- ASSERT_FALSE(result);
- } else {
- ASSERT_TRUE(result);
- ASSERT_EQ(*curr, 'x');
- }
- }
-
- /*
- * Now lock the latter part of the range. We can't lock the guard pages,
- * as this would result in the pages being populated and the guarding
- * would cause this to error out.
- */
- ASSERT_EQ(mlock(&ptr[5 * page_size], 5 * page_size), 0);
-
- /*
- * Now remove guard pages, we permit mlock()'d ranges to have guard
- * pages removed as it is a non-destructive operation.
- */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
-
- /* Now check that no guard pages remain. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/*
- * Assert that moving, extending and shrinking memory via mremap() retains
- * guard markers where possible.
- *
- * - Moving a mapping alone should retain markers as they are.
- */
-TEST_F(guard_pages, mremap_move)
-{
- const unsigned long page_size = self->page_size;
- char *ptr, *ptr_new;
-
- /* Map 5 pages. */
- ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Place guard markers at both ends of the 5 page span. */
- ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
- ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
-
- /* Make sure the guard pages are in effect. */
- ASSERT_FALSE(try_read_write_buf(ptr));
- ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
-
- /* Map a new region we will move this range into. Doing this ensures
- * that we have reserved a range to map into.
- */
- ptr_new = mmap(NULL, 5 * page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE,
- -1, 0);
- ASSERT_NE(ptr_new, MAP_FAILED);
-
- ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size,
- MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new), ptr_new);
-
- /* Make sure the guard markers are retained. */
- ASSERT_FALSE(try_read_write_buf(ptr_new));
- ASSERT_FALSE(try_read_write_buf(&ptr_new[4 * page_size]));
-
- /*
- * Clean up - we only need reference the new pointer as we overwrote the
- * PROT_NONE range and moved the existing one.
- */
- munmap(ptr_new, 5 * page_size);
-}
-
-/*
- * Assert that moving, extending and shrinking memory via mremap() retains
- * guard markers where possible.
- *
- * Expanding should retain guard pages, only now in different position. The user
- * will have to remove guard pages manually to fix up (they'd have to do the
- * same if it were a PROT_NONE mapping).
- */
-TEST_F(guard_pages, mremap_expand)
-{
- const unsigned long page_size = self->page_size;
- char *ptr, *ptr_new;
-
- /* Map 10 pages... */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
- /* ...But unmap the last 5 so we can ensure we can expand into them. */
- ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0);
-
- /* Place guard markers at both ends of the 5 page span. */
- ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
- ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
-
- /* Make sure the guarding is in effect. */
- ASSERT_FALSE(try_read_write_buf(ptr));
- ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
-
- /* Now expand to 10 pages. */
- ptr = mremap(ptr, 5 * page_size, 10 * page_size, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /*
- * Make sure the guard markers are retained in their original positions.
- */
- ASSERT_FALSE(try_read_write_buf(ptr));
- ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
-
- /* Reserve a region which we can move to and expand into. */
- ptr_new = mmap(NULL, 20 * page_size, PROT_NONE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr_new, MAP_FAILED);
-
- /* Now move and expand into it. */
- ptr = mremap(ptr, 10 * page_size, 20 * page_size,
- MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new);
- ASSERT_EQ(ptr, ptr_new);
-
- /*
- * Again, make sure the guard markers are retained in their original positions.
- */
- ASSERT_FALSE(try_read_write_buf(ptr));
- ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
-
- /*
- * A real user would have to remove guard markers, but would reasonably
- * expect all characteristics of the mapping to be retained, including
- * guard markers.
- */
-
- /* Cleanup. */
- munmap(ptr, 20 * page_size);
-}
-/*
- * Assert that moving, extending and shrinking memory via mremap() retains
- * guard markers where possible.
- *
- * Shrinking will result in markers that are shrunk over being removed. Again,
- * if the user were using a PROT_NONE mapping they'd have to manually fix this
- * up also so this is OK.
- */
-TEST_F(guard_pages, mremap_shrink)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
- int i;
-
- /* Map 5 pages. */
- ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Place guard markers at both ends of the 5 page span. */
- ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
- ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
-
- /* Make sure the guarding is in effect. */
- ASSERT_FALSE(try_read_write_buf(ptr));
- ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
-
- /* Now shrink to 3 pages. */
- ptr = mremap(ptr, 5 * page_size, 3 * page_size, MREMAP_MAYMOVE);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* We expect the guard marker at the start to be retained... */
- ASSERT_FALSE(try_read_write_buf(ptr));
-
- /* ...But remaining pages will not have guard markers. */
- for (i = 1; i < 3; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- /*
- * As with expansion, a real user would have to remove guard pages and
- * fixup. But you'd have to do similar manual things with PROT_NONE
- * mappings too.
- */
-
- /*
- * If we expand back to the original size, the end marker will, of
- * course, no longer be present.
- */
- ptr = mremap(ptr, 3 * page_size, 5 * page_size, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Again, we expect the guard marker at the start to be retained... */
- ASSERT_FALSE(try_read_write_buf(ptr));
-
- /* ...But remaining pages will not have guard markers. */
- for (i = 1; i < 5; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- /* Cleanup. */
- munmap(ptr, 5 * page_size);
-}
-
-/*
- * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set
- * retain guard pages.
- */
-TEST_F(guard_pages, fork)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
- pid_t pid;
- int i;
-
- /* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Establish guard pages in the first 5 pages. */
- ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
-
- pid = fork();
- ASSERT_NE(pid, -1);
- if (!pid) {
- /* This is the child process now. */
-
- /* Assert that the guarding is in effect. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
- bool result = try_read_write_buf(curr);
-
- ASSERT_TRUE(i >= 5 ? result : !result);
- }
-
- /* Now unguard the range.*/
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
-
- exit(0);
- }
-
- /* Parent process. */
-
- /* Parent simply waits on child. */
- waitpid(pid, NULL, 0);
-
- /* Child unguard does not impact parent page table state. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
- bool result = try_read_write_buf(curr);
-
- ASSERT_TRUE(i >= 5 ? result : !result);
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/*
- * Assert expected behaviour after we fork populated ranges of anonymous memory
- * and then guard and unguard the range.
- */
-TEST_F(guard_pages, fork_cow)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
- pid_t pid;
- int i;
-
- /* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Populate range. */
- for (i = 0; i < 10 * page_size; i++) {
- char chr = 'a' + (i % 26);
-
- ptr[i] = chr;
- }
-
- pid = fork();
- ASSERT_NE(pid, -1);
- if (!pid) {
- /* This is the child process now. */
-
- /* Ensure the range is as expected. */
- for (i = 0; i < 10 * page_size; i++) {
- char expected = 'a' + (i % 26);
- char actual = ptr[i];
-
- ASSERT_EQ(actual, expected);
- }
-
- /* Establish guard pages across the whole range. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
- /* Remove it. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
-
- /*
- * By removing the guard pages, the page tables will be
- * cleared. Assert that we are looking at the zero page now.
- */
- for (i = 0; i < 10 * page_size; i++) {
- char actual = ptr[i];
-
- ASSERT_EQ(actual, '\0');
- }
-
- exit(0);
- }
-
- /* Parent process. */
-
- /* Parent simply waits on child. */
- waitpid(pid, NULL, 0);
-
- /* Ensure the range is unchanged in parent anon range. */
- for (i = 0; i < 10 * page_size; i++) {
- char expected = 'a' + (i % 26);
- char actual = ptr[i];
-
- ASSERT_EQ(actual, expected);
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/*
- * Assert that forking a process with VMAs that do have VM_WIPEONFORK set
- * behave as expected.
- */
-TEST_F(guard_pages, fork_wipeonfork)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
- pid_t pid;
- int i;
-
- /* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Mark wipe on fork. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_WIPEONFORK), 0);
-
- /* Guard the first 5 pages. */
- ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
-
- pid = fork();
- ASSERT_NE(pid, -1);
- if (!pid) {
- /* This is the child process now. */
-
- /* Guard will have been wiped. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_TRUE(try_read_write_buf(curr));
- }
-
- exit(0);
- }
-
- /* Parent process. */
-
- waitpid(pid, NULL, 0);
-
- /* Guard markers should be in effect.*/
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
- bool result = try_read_write_buf(curr);
-
- ASSERT_TRUE(i >= 5 ? result : !result);
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/* Ensure that MADV_FREE retains guard entries as expected. */
-TEST_F(guard_pages, lazyfree)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
- int i;
-
- /* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Guard range. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
-
- /* Ensure guarded. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Lazyfree range. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_FREE), 0);
-
- /* This should leave the guard markers in place. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */
-TEST_F(guard_pages, populate)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
-
- /* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Guard range. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
-
- /* Populate read should error out... */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_READ), -1);
- ASSERT_EQ(errno, EFAULT);
-
- /* ...as should populate write. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_WRITE), -1);
- ASSERT_EQ(errno, EFAULT);
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */
-TEST_F(guard_pages, cold_pageout)
-{
- const unsigned long page_size = self->page_size;
- char *ptr;
- int i;
-
- /* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Guard range. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
-
- /* Ensured guarded. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Now mark cold. This should have no impact on guard markers. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_COLD), 0);
-
- /* Should remain guarded. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* OK, now page out. This should equally, have no effect on markers. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
-
- /* Should remain guarded. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Cleanup. */
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-/* Ensure that guard pages do not break userfaultd. */
-TEST_F(guard_pages, uffd)
-{
- const unsigned long page_size = self->page_size;
- int uffd;
- char *ptr;
- int i;
- struct uffdio_api api = {
- .api = UFFD_API,
- .features = 0,
- };
- struct uffdio_register reg;
- struct uffdio_range range;
-
- /* Set up uffd. */
- uffd = userfaultfd(0);
- if (uffd == -1 && errno == EPERM)
- ksft_exit_skip("No userfaultfd permissions, try running as root.\n");
- ASSERT_NE(uffd, -1);
-
- ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0);
-
- /* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- ASSERT_NE(ptr, MAP_FAILED);
-
- /* Register the range with uffd. */
- range.start = (unsigned long)ptr;
- range.len = 10 * page_size;
- reg.range = range;
- reg.mode = UFFDIO_REGISTER_MODE_MISSING;
- ASSERT_EQ(ioctl(uffd, UFFDIO_REGISTER, &reg), 0);
-
- /* Guard the range. This should not trigger the uffd. */
- ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
-
- /* The guarding should behave as usual with no uffd intervention. */
- for (i = 0; i < 10; i++) {
- char *curr = &ptr[i * page_size];
-
- ASSERT_FALSE(try_read_write_buf(curr));
- }
-
- /* Cleanup. */
- ASSERT_EQ(ioctl(uffd, UFFDIO_UNREGISTER, &range), 0);
- close(uffd);
- ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
-}
-
-TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
new file mode 100644
index 000000000000..dbd21d66d383
--- /dev/null
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -0,0 +1,2326 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include "kselftest_harness.h"
+#include <asm-generic/mman.h> /* Force the import of the tools version. */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <linux/userfaultfd.h>
+#include <linux/fs.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include "vm_util.h"
+
+#include "../pidfd/pidfd.h"
+
+/*
+ * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1:
+ *
+ * "If the signal occurs other than as the result of calling the abort or raise
+ * function, the behavior is undefined if the signal handler refers to any
+ * object with static storage duration other than by assigning a value to an
+ * object declared as volatile sig_atomic_t"
+ */
+static volatile sig_atomic_t signal_jump_set;
+static sigjmp_buf signal_jmp_buf;
+
+/*
+ * How is the test backing the mapping being tested?
+ */
+enum backing_type {
+ ANON_BACKED,
+ SHMEM_BACKED,
+ LOCAL_FILE_BACKED,
+};
+
+FIXTURE(guard_regions)
+{
+ unsigned long page_size;
+ char path[PATH_MAX];
+ int fd;
+};
+
+FIXTURE_VARIANT(guard_regions)
+{
+ enum backing_type backing;
+};
+
+FIXTURE_VARIANT_ADD(guard_regions, anon)
+{
+ .backing = ANON_BACKED,
+};
+
+FIXTURE_VARIANT_ADD(guard_regions, shmem)
+{
+ .backing = SHMEM_BACKED,
+};
+
+FIXTURE_VARIANT_ADD(guard_regions, file)
+{
+ .backing = LOCAL_FILE_BACKED,
+};
+
+static bool is_anon_backed(const FIXTURE_VARIANT(guard_regions) * variant)
+{
+ switch (variant->backing) {
+ case ANON_BACKED:
+ case SHMEM_BACKED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void *mmap_(FIXTURE_DATA(guard_regions) * self,
+ const FIXTURE_VARIANT(guard_regions) * variant,
+ void *addr, size_t length, int prot, int extra_flags,
+ off_t offset)
+{
+ int fd;
+ int flags = extra_flags;
+
+ switch (variant->backing) {
+ case ANON_BACKED:
+ flags |= MAP_PRIVATE | MAP_ANON;
+ fd = -1;
+ offset = 0;
+ break;
+ case SHMEM_BACKED:
+ case LOCAL_FILE_BACKED:
+ flags |= MAP_SHARED;
+ fd = self->fd;
+ break;
+ default:
+ ksft_exit_fail();
+ break;
+ }
+
+ return mmap(addr, length, prot, flags, fd, offset);
+}
+
+static int userfaultfd(int flags)
+{
+ return syscall(SYS_userfaultfd, flags);
+}
+
+static void handle_fatal(int c)
+{
+ if (!signal_jump_set)
+ return;
+
+ siglongjmp(signal_jmp_buf, c);
+}
+
+static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
+ size_t n, int advice, unsigned int flags)
+{
+ return syscall(__NR_process_madvise, pidfd, iovec, n, advice, flags);
+}
+
+/*
+ * Enable our signal catcher and try to read/write the specified buffer. The
+ * return value indicates whether the read/write succeeds without a fatal
+ * signal.
+ */
+static bool try_access_buf(char *ptr, bool write)
+{
+ bool failed;
+
+ /* Tell signal handler to jump back here on fatal signal. */
+ signal_jump_set = true;
+ /* If a fatal signal arose, we will jump back here and failed is set. */
+ failed = sigsetjmp(signal_jmp_buf, 0) != 0;
+
+ if (!failed) {
+ if (write)
+ *ptr = 'x';
+ else
+ FORCE_READ(*ptr);
+ }
+
+ signal_jump_set = false;
+ return !failed;
+}
+
+/* Try and read from a buffer, return true if no fatal signal. */
+static bool try_read_buf(char *ptr)
+{
+ return try_access_buf(ptr, false);
+}
+
+/* Try and write to a buffer, return true if no fatal signal. */
+static bool try_write_buf(char *ptr)
+{
+ return try_access_buf(ptr, true);
+}
+
+/*
+ * Try and BOTH read from AND write to a buffer, return true if BOTH operations
+ * succeed.
+ */
+static bool try_read_write_buf(char *ptr)
+{
+ return try_read_buf(ptr) && try_write_buf(ptr);
+}
+
+static void setup_sighandler(void)
+{
+ struct sigaction act = {
+ .sa_handler = &handle_fatal,
+ .sa_flags = SA_NODEFER,
+ };
+
+ sigemptyset(&act.sa_mask);
+ if (sigaction(SIGSEGV, &act, NULL))
+ ksft_exit_fail_perror("sigaction");
+}
+
+static void teardown_sighandler(void)
+{
+ struct sigaction act = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_NODEFER,
+ };
+
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGSEGV, &act, NULL);
+}
+
+static int open_file(const char *prefix, char *path)
+{
+ int fd;
+
+ snprintf(path, PATH_MAX, "%sguard_regions_test_file_XXXXXX", prefix);
+ fd = mkstemp(path);
+ if (fd < 0)
+ ksft_exit_fail_perror("mkstemp");
+
+ return fd;
+}
+
+/* Establish a varying pattern in a buffer. */
+static void set_pattern(char *ptr, size_t num_pages, size_t page_size)
+{
+ size_t i;
+
+ for (i = 0; i < num_pages; i++) {
+ char *ptr2 = &ptr[i * page_size];
+
+ memset(ptr2, 'a' + (i % 26), page_size);
+ }
+}
+
+/*
+ * Check that a buffer contains the pattern set by set_pattern(), starting at a
+ * page offset of pgoff within the buffer.
+ */
+static bool check_pattern_offset(char *ptr, size_t num_pages, size_t page_size,
+ size_t pgoff)
+{
+ size_t i;
+
+ for (i = 0; i < num_pages * page_size; i++) {
+ size_t offset = pgoff * page_size + i;
+ char actual = ptr[offset];
+ char expected = 'a' + ((offset / page_size) % 26);
+
+ if (actual != expected)
+ return false;
+ }
+
+ return true;
+}
+
+/* Check that a buffer contains the pattern set by set_pattern(). */
+static bool check_pattern(char *ptr, size_t num_pages, size_t page_size)
+{
+ return check_pattern_offset(ptr, num_pages, page_size, 0);
+}
+
+/* Determine if a buffer contains only repetitions of a specified char. */
+static bool is_buf_eq(char *buf, size_t size, char chr)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (buf[i] != chr)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Some file systems have issues with merging due to changing merge-sensitive
+ * parameters in the .mmap callback, and prior to .mmap_prepare being
+ * implemented everywhere this will now result in an unexpected failure to
+ * merge (e.g. - overlayfs).
+ *
+ * Perform a simple test to see if the local file system suffers from this, if
+ * it does then we can skip test logic that assumes local file system merging is
+ * sane.
+ */
+static bool local_fs_has_sane_mmap(FIXTURE_DATA(guard_regions) * self,
+ const FIXTURE_VARIANT(guard_regions) * variant)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr2;
+ struct procmap_fd procmap;
+
+ if (variant->backing != LOCAL_FILE_BACKED)
+ return true;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ if (ptr == MAP_FAILED)
+ return false;
+ /* Unmap the middle. */
+ munmap(&ptr[5 * page_size], page_size);
+
+ /* Map again. */
+ ptr2 = mmap_(self, variant, &ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED, 5 * page_size);
+
+ if (ptr2 == MAP_FAILED)
+ return false;
+
+ /* Now make sure they all merged. */
+ if (open_self_procmap(&procmap) != 0)
+ return false;
+ if (!find_vma_procmap(&procmap, ptr))
+ return false;
+ if (procmap.query.vma_start != (unsigned long)ptr)
+ return false;
+ if (procmap.query.vma_end != (unsigned long)ptr + 10 * page_size)
+ return false;
+ close_procmap(&procmap);
+
+ return true;
+}
+
+FIXTURE_SETUP(guard_regions)
+{
+ self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
+ setup_sighandler();
+
+ switch (variant->backing) {
+ case ANON_BACKED:
+ return;
+ case LOCAL_FILE_BACKED:
+ self->fd = open_file("", self->path);
+ break;
+ case SHMEM_BACKED:
+ self->fd = memfd_create(self->path, 0);
+ break;
+ }
+
+ /* We truncate file to at least 100 pages, tests can modify as needed. */
+ ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0);
+};
+
+FIXTURE_TEARDOWN_PARENT(guard_regions)
+{
+ teardown_sighandler();
+
+ if (variant->backing == ANON_BACKED)
+ return;
+
+ if (self->fd >= 0)
+ close(self->fd);
+
+ if (self->path[0] != '\0')
+ unlink(self->path);
+}
+
+TEST_F(guard_regions, basic)
+{
+ const unsigned long NUM_PAGES = 10;
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, NUM_PAGES * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Trivially assert we can touch the first page. */
+ ASSERT_TRUE(try_read_write_buf(ptr));
+
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Establish that 1st page SIGSEGV's. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+
+ /* Ensure we can touch everything else.*/
+ for (i = 1; i < NUM_PAGES; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Establish a guard page at the end of the mapping. */
+ ASSERT_EQ(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /* Check that both guard pages result in SIGSEGV. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size]));
+
+ /* Remove the first guard page. */
+ ASSERT_FALSE(madvise(ptr, page_size, MADV_GUARD_REMOVE));
+
+ /* Make sure we can touch it. */
+ ASSERT_TRUE(try_read_write_buf(ptr));
+
+ /* Remove the last guard page. */
+ ASSERT_FALSE(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size,
+ MADV_GUARD_REMOVE));
+
+ /* Make sure we can touch it. */
+ ASSERT_TRUE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size]));
+
+ /*
+ * Test setting a _range_ of pages, namely the first 3. The first of
+ * these be faulted in, so this also tests that we can install guard
+ * pages over backed pages.
+ */
+ ASSERT_EQ(madvise(ptr, 3 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure they are all guard pages. */
+ for (i = 0; i < 3; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Make sure the rest are not. */
+ for (i = 3; i < NUM_PAGES; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Remove guard pages. */
+ ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now make sure we can touch everything. */
+ for (i = 0; i < NUM_PAGES; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /*
+ * Now remove all guard pages, make sure we don't remove existing
+ * entries.
+ */
+ ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0);
+
+ for (i = 0; i < NUM_PAGES * page_size; i += page_size) {
+ char chr = ptr[i];
+
+ ASSERT_EQ(chr, 'x');
+ }
+
+ ASSERT_EQ(munmap(ptr, NUM_PAGES * page_size), 0);
+}
+
+/* Assert that operations applied across multiple VMAs work as expected. */
+TEST_F(guard_regions, multi_vma)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3;
+ int i;
+
+ /* Reserve a 100 page region over which we can install VMAs. */
+ ptr_region = mmap_(self, variant, NULL, 100 * page_size,
+ PROT_NONE, 0, 0);
+ ASSERT_NE(ptr_region, MAP_FAILED);
+
+ /* Place a VMA of 10 pages size at the start of the region. */
+ ptr1 = mmap_(self, variant, ptr_region, 10 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr1, MAP_FAILED);
+
+ /* Place a VMA of 5 pages size 50 pages into the region. */
+ ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Place a VMA of 20 pages size at the end of the region. */
+ ptr3 = mmap_(self, variant, &ptr_region[80 * page_size], 20 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ /* Unmap gaps. */
+ ASSERT_EQ(munmap(&ptr_region[10 * page_size], 40 * page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[55 * page_size], 25 * page_size), 0);
+
+ /*
+ * We end up with VMAs like this:
+ *
+ * 0 10 .. 50 55 .. 80 100
+ * [---] [---] [---]
+ */
+
+ /*
+ * Now mark the whole range as guard pages and make sure all VMAs are as
+ * such.
+ */
+
+ /*
+ * madvise() is certifiable and lets you perform operations over gaps,
+ * everything works, but it indicates an error and errno is set to
+ * -ENOMEM. Also if anything runs out of memory it is set to
+ * -ENOMEM. You are meant to guess which is which.
+ */
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), -1);
+ ASSERT_EQ(errno, ENOMEM);
+
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr1[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 5; i++) {
+ char *curr = &ptr2[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 20; i++) {
+ char *curr = &ptr3[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now remove guar pages over range and assert the opposite. */
+
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), -1);
+ ASSERT_EQ(errno, ENOMEM);
+
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr1[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 5; i++) {
+ char *curr = &ptr2[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 20; i++) {
+ char *curr = &ptr3[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Now map incompatible VMAs in the gaps. */
+ ptr = mmap_(self, variant, &ptr_region[10 * page_size], 40 * page_size,
+ PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ ptr = mmap_(self, variant, &ptr_region[55 * page_size], 25 * page_size,
+ PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * We end up with VMAs like this:
+ *
+ * 0 10 .. 50 55 .. 80 100
+ * [---][xxxx][---][xxxx][---]
+ *
+ * Where 'x' signifies VMAs that cannot be merged with those adjacent to
+ * them.
+ */
+
+ /* Multiple VMAs adjacent to one another should result in no error. */
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), 0);
+ for (i = 0; i < 100; i++) {
+ char *curr = &ptr_region[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), 0);
+ for (i = 0; i < 100; i++) {
+ char *curr = &ptr_region[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr_region, 100 * page_size), 0);
+}
+
+/*
+ * Assert that batched operations performed using process_madvise() work as
+ * expected.
+ */
+TEST_F(guard_regions, process_madvise)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr_region, *ptr1, *ptr2, *ptr3;
+ ssize_t count;
+ struct iovec vec[6];
+
+ /* Reserve region to map over. */
+ ptr_region = mmap_(self, variant, NULL, 100 * page_size,
+ PROT_NONE, 0, 0);
+ ASSERT_NE(ptr_region, MAP_FAILED);
+
+ /*
+ * 10 pages offset 1 page into reserve region. We MAP_POPULATE so we
+ * overwrite existing entries and test this code path against
+ * overwriting existing entries.
+ */
+ ptr1 = mmap_(self, variant, &ptr_region[page_size], 10 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED | MAP_POPULATE, 0);
+ ASSERT_NE(ptr1, MAP_FAILED);
+ /* We want guard markers at start/end of each VMA. */
+ vec[0].iov_base = ptr1;
+ vec[0].iov_len = page_size;
+ vec[1].iov_base = &ptr1[9 * page_size];
+ vec[1].iov_len = page_size;
+
+ /* 5 pages offset 50 pages into reserve region. */
+ ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+ vec[2].iov_base = ptr2;
+ vec[2].iov_len = page_size;
+ vec[3].iov_base = &ptr2[4 * page_size];
+ vec[3].iov_len = page_size;
+
+ /* 20 pages offset 79 pages into reserve region. */
+ ptr3 = mmap_(self, variant, &ptr_region[79 * page_size], 20 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr3, MAP_FAILED);
+ vec[4].iov_base = ptr3;
+ vec[4].iov_len = page_size;
+ vec[5].iov_base = &ptr3[19 * page_size];
+ vec[5].iov_len = page_size;
+
+ /* Free surrounding VMAs. */
+ ASSERT_EQ(munmap(ptr_region, page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[11 * page_size], 39 * page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[55 * page_size], 24 * page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0);
+
+ /* Now guard in one step. */
+ count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_INSTALL, 0);
+
+ /* OK we don't have permission to do this, skip. */
+ if (count == -1 && errno == EPERM)
+ SKIP(return, "No process_madvise() permissions, try running as root.\n");
+
+ /* Returns the number of bytes advised. */
+ ASSERT_EQ(count, 6 * page_size);
+
+ /* Now make sure the guarding was applied. */
+
+ ASSERT_FALSE(try_read_write_buf(ptr1));
+ ASSERT_FALSE(try_read_write_buf(&ptr1[9 * page_size]));
+
+ ASSERT_FALSE(try_read_write_buf(ptr2));
+ ASSERT_FALSE(try_read_write_buf(&ptr2[4 * page_size]));
+
+ ASSERT_FALSE(try_read_write_buf(ptr3));
+ ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size]));
+
+ /* Now do the same with unguard... */
+ count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_REMOVE, 0);
+
+ /* ...and everything should now succeed. */
+
+ ASSERT_TRUE(try_read_write_buf(ptr1));
+ ASSERT_TRUE(try_read_write_buf(&ptr1[9 * page_size]));
+
+ ASSERT_TRUE(try_read_write_buf(ptr2));
+ ASSERT_TRUE(try_read_write_buf(&ptr2[4 * page_size]));
+
+ ASSERT_TRUE(try_read_write_buf(ptr3));
+ ASSERT_TRUE(try_read_write_buf(&ptr3[19 * page_size]));
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr1, 10 * page_size), 0);
+ ASSERT_EQ(munmap(ptr2, 5 * page_size), 0);
+ ASSERT_EQ(munmap(ptr3, 20 * page_size), 0);
+}
+
+/* Assert that unmapping ranges does not leave guard markers behind. */
+TEST_F(guard_regions, munmap)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new1, *ptr_new2;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard first and last pages. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[9 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Assert that they are guarded. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[9 * page_size]));
+
+ /* Unmap them. */
+ ASSERT_EQ(munmap(ptr, page_size), 0);
+ ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0);
+
+ /* Map over them.*/
+ ptr_new1 = mmap_(self, variant, ptr, page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED, 0);
+ ASSERT_NE(ptr_new1, MAP_FAILED);
+ ptr_new2 = mmap_(self, variant, &ptr[9 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr_new2, MAP_FAILED);
+
+ /* Assert that they are now not guarded. */
+ ASSERT_TRUE(try_read_write_buf(ptr_new1));
+ ASSERT_TRUE(try_read_write_buf(ptr_new2));
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Assert that mprotect() operations have no bearing on guard markers. */
+TEST_F(guard_regions, mprotect)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard the middle of the range. */
+ ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /* Assert that it is indeed guarded. */
+ ASSERT_FALSE(try_read_write_buf(&ptr[5 * page_size]));
+ ASSERT_FALSE(try_read_write_buf(&ptr[6 * page_size]));
+
+ /* Now make these pages read-only. */
+ ASSERT_EQ(mprotect(&ptr[5 * page_size], 2 * page_size, PROT_READ), 0);
+
+ /* Make sure the range is still guarded. */
+ ASSERT_FALSE(try_read_buf(&ptr[5 * page_size]));
+ ASSERT_FALSE(try_read_buf(&ptr[6 * page_size]));
+
+ /* Make sure we can guard again without issue.*/
+ ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the range is, yet again, still guarded. */
+ ASSERT_FALSE(try_read_buf(&ptr[5 * page_size]));
+ ASSERT_FALSE(try_read_buf(&ptr[6 * page_size]));
+
+ /* Now unguard the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Make sure the whole range is readable. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Split and merge VMAs and make sure guard pages still behave. */
+TEST_F(guard_regions, split_merge)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the whole range is guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now unmap some pages in the range so we split. */
+ ASSERT_EQ(munmap(&ptr[2 * page_size], page_size), 0);
+ ASSERT_EQ(munmap(&ptr[5 * page_size], page_size), 0);
+ ASSERT_EQ(munmap(&ptr[8 * page_size], page_size), 0);
+
+ /* Make sure the remaining ranges are guarded post-split. */
+ for (i = 0; i < 2; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ for (i = 2; i < 5; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ for (i = 6; i < 8; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ for (i = 9; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now map them again - the unmap will have cleared the guards. */
+ ptr_new = mmap_(self, variant, &ptr[2 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+ ptr_new = mmap_(self, variant, &ptr[5 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+ ptr_new = mmap_(self, variant, &ptr[8 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+
+ /* Now make sure guard pages are established. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+ bool expect_true = i == 2 || i == 5 || i == 8;
+
+ ASSERT_TRUE(expect_true ? result : !result);
+ }
+
+ /* Now guard everything again. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the whole range is guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now split the range into three. */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Make sure the whole range is guarded for read. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_buf(curr));
+ }
+
+ /* Now reset protection bits so we merge the whole thing. */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE), 0);
+
+ /* Make sure the whole range is still guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Split range into 3 again... */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* ...and unguard the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Make sure the whole range is remedied for read. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(curr));
+ }
+
+ /* Merge them again. */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE), 0);
+
+ /* Now ensure the merged range is remedied for read/write. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Assert that MADV_DONTNEED does not remove guard markers. */
+TEST_F(guard_regions, dontneed)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Back the whole range. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ *curr = 'y';
+ }
+
+ /* Guard every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *curr = &ptr[i * page_size];
+ int res = madvise(curr, page_size, MADV_GUARD_INSTALL);
+
+ ASSERT_EQ(res, 0);
+ }
+
+ /* Indicate that we don't need any of the range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_DONTNEED), 0);
+
+ /* Check to ensure guard markers are still in place. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_buf(curr);
+
+ if (i % 2 == 0) {
+ ASSERT_FALSE(result);
+ } else {
+ ASSERT_TRUE(result);
+ switch (variant->backing) {
+ case ANON_BACKED:
+ /* If anon, then we get a zero page. */
+ ASSERT_EQ(*curr, '\0');
+ break;
+ default:
+ /* Otherwise, we get the file data. */
+ ASSERT_EQ(*curr, 'y');
+ break;
+ }
+ }
+
+ /* Now write... */
+ result = try_write_buf(&ptr[i * page_size]);
+
+ /* ...and make sure same result. */
+ ASSERT_TRUE(i % 2 != 0 ? result : !result);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Assert that mlock()'ed pages work correctly with guard markers. */
+TEST_F(guard_regions, mlock)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Populate. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ *curr = 'y';
+ }
+
+ /* Lock. */
+ ASSERT_EQ(mlock(ptr, 10 * page_size), 0);
+
+ /* Now try to guard, should fail with EINVAL. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ /* OK unlock. */
+ ASSERT_EQ(munlock(ptr, 10 * page_size), 0);
+
+ /* Guard first half of range, should now succeed. */
+ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure guard works. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ if (i < 5) {
+ ASSERT_FALSE(result);
+ } else {
+ ASSERT_TRUE(result);
+ ASSERT_EQ(*curr, 'x');
+ }
+ }
+
+ /*
+ * Now lock the latter part of the range. We can't lock the guard pages,
+ * as this would result in the pages being populated and the guarding
+ * would cause this to error out.
+ */
+ ASSERT_EQ(mlock(&ptr[5 * page_size], 5 * page_size), 0);
+
+ /*
+ * Now remove guard pages, we permit mlock()'d ranges to have guard
+ * pages removed as it is a non-destructive operation.
+ */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now check that no guard pages remain. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that moving, extending and shrinking memory via mremap() retains
+ * guard markers where possible.
+ *
+ * - Moving a mapping alone should retain markers as they are.
+ */
+TEST_F(guard_regions, mremap_move)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new;
+
+ /* Map 5 pages. */
+ ptr = mmap_(self, variant, NULL, 5 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Place guard markers at both ends of the 5 page span. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the guard pages are in effect. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Map a new region we will move this range into. Doing this ensures
+ * that we have reserved a range to map into.
+ */
+ ptr_new = mmap_(self, variant, NULL, 5 * page_size, PROT_NONE, 0, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+
+ ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new), ptr_new);
+
+ /* Make sure the guard markers are retained. */
+ ASSERT_FALSE(try_read_write_buf(ptr_new));
+ ASSERT_FALSE(try_read_write_buf(&ptr_new[4 * page_size]));
+
+ /*
+ * Clean up - we only need reference the new pointer as we overwrote the
+ * PROT_NONE range and moved the existing one.
+ */
+ munmap(ptr_new, 5 * page_size);
+}
+
+/*
+ * Assert that moving, extending and shrinking memory via mremap() retains
+ * guard markers where possible.
+ *
+ * Expanding should retain guard pages, only now in different position. The user
+ * will have to remove guard pages manually to fix up (they'd have to do the
+ * same if it were a PROT_NONE mapping).
+ */
+TEST_F(guard_regions, mremap_expand)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new;
+
+ /* Map 10 pages... */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /* ...But unmap the last 5 so we can ensure we can expand into them. */
+ ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0);
+
+ /* Place guard markers at both ends of the 5 page span. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the guarding is in effect. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Now expand to 10 pages. */
+ ptr = mremap(ptr, 5 * page_size, 10 * page_size, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Make sure the guard markers are retained in their original positions.
+ */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Reserve a region which we can move to and expand into. */
+ ptr_new = mmap_(self, variant, NULL, 20 * page_size, PROT_NONE, 0, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+
+ /* Now move and expand into it. */
+ ptr = mremap(ptr, 10 * page_size, 20 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new);
+ ASSERT_EQ(ptr, ptr_new);
+
+ /*
+ * Again, make sure the guard markers are retained in their original positions.
+ */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /*
+ * A real user would have to remove guard markers, but would reasonably
+ * expect all characteristics of the mapping to be retained, including
+ * guard markers.
+ */
+
+ /* Cleanup. */
+ munmap(ptr, 20 * page_size);
+}
+/*
+ * Assert that moving, extending and shrinking memory via mremap() retains
+ * guard markers where possible.
+ *
+ * Shrinking will result in markers that are shrunk over being removed. Again,
+ * if the user were using a PROT_NONE mapping they'd have to manually fix this
+ * up also so this is OK.
+ */
+TEST_F(guard_regions, mremap_shrink)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ /* Map 5 pages. */
+ ptr = mmap_(self, variant, NULL, 5 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Place guard markers at both ends of the 5 page span. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the guarding is in effect. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Now shrink to 3 pages. */
+ ptr = mremap(ptr, 5 * page_size, 3 * page_size, MREMAP_MAYMOVE);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* We expect the guard marker at the start to be retained... */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+
+ /* ...But remaining pages will not have guard markers. */
+ for (i = 1; i < 3; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /*
+ * As with expansion, a real user would have to remove guard pages and
+ * fixup. But you'd have to do similar manual things with PROT_NONE
+ * mappings too.
+ */
+
+ /*
+ * If we expand back to the original size, the end marker will, of
+ * course, no longer be present.
+ */
+ ptr = mremap(ptr, 3 * page_size, 5 * page_size, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Again, we expect the guard marker at the start to be retained... */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+
+ /* ...But remaining pages will not have guard markers. */
+ for (i = 1; i < 5; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ munmap(ptr, 5 * page_size);
+}
+
+/*
+ * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set
+ * retain guard pages.
+ */
+TEST_F(guard_regions, fork)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ pid_t pid;
+ int i;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Establish guard pages in the first 5 pages. */
+ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid) {
+ /* This is the child process now. */
+
+ /* Assert that the guarding is in effect. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ ASSERT_TRUE(i >= 5 ? result : !result);
+ }
+
+ /* Now unguard the range.*/
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ exit(0);
+ }
+
+ /* Parent process. */
+
+ /* Parent simply waits on child. */
+ waitpid(pid, NULL, 0);
+
+ /* Child unguard does not impact parent page table state. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ ASSERT_TRUE(i >= 5 ? result : !result);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert expected behaviour after we fork populated ranges of anonymous memory
+ * and then guard and unguard the range.
+ */
+TEST_F(guard_regions, fork_cow)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ pid_t pid;
+ int i;
+
+ if (variant->backing != ANON_BACKED)
+ SKIP(return, "CoW only supported on anon mappings");
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Populate range. */
+ for (i = 0; i < 10 * page_size; i++) {
+ char chr = 'a' + (i % 26);
+
+ ptr[i] = chr;
+ }
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid) {
+ /* This is the child process now. */
+
+ /* Ensure the range is as expected. */
+ for (i = 0; i < 10 * page_size; i++) {
+ char expected = 'a' + (i % 26);
+ char actual = ptr[i];
+
+ ASSERT_EQ(actual, expected);
+ }
+
+ /* Establish guard pages across the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+ /* Remove it. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /*
+ * By removing the guard pages, the page tables will be
+ * cleared. Assert that we are looking at the zero page now.
+ */
+ for (i = 0; i < 10 * page_size; i++) {
+ char actual = ptr[i];
+
+ ASSERT_EQ(actual, '\0');
+ }
+
+ exit(0);
+ }
+
+ /* Parent process. */
+
+ /* Parent simply waits on child. */
+ waitpid(pid, NULL, 0);
+
+ /* Ensure the range is unchanged in parent anon range. */
+ for (i = 0; i < 10 * page_size; i++) {
+ char expected = 'a' + (i % 26);
+ char actual = ptr[i];
+
+ ASSERT_EQ(actual, expected);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that forking a process with VMAs that do have VM_WIPEONFORK set
+ * behave as expected.
+ */
+TEST_F(guard_regions, fork_wipeonfork)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ pid_t pid;
+ int i;
+
+ if (variant->backing != ANON_BACKED)
+ SKIP(return, "Wipe on fork only supported on anon mappings");
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Mark wipe on fork. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_WIPEONFORK), 0);
+
+ /* Guard the first 5 pages. */
+ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid) {
+ /* This is the child process now. */
+
+ /* Guard will have been wiped. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ exit(0);
+ }
+
+ /* Parent process. */
+
+ waitpid(pid, NULL, 0);
+
+ /* Guard markers should be in effect.*/
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ ASSERT_TRUE(i >= 5 ? result : !result);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that MADV_FREE retains guard entries as expected. */
+TEST_F(guard_regions, lazyfree)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing != ANON_BACKED)
+ SKIP(return, "MADV_FREE only supported on anon mappings");
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Ensure guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Lazyfree range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_FREE), 0);
+
+ /* This should leave the guard markers in place. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */
+TEST_F(guard_regions, populate)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Populate read should error out... */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_READ), -1);
+ ASSERT_EQ(errno, EFAULT);
+
+ /* ...as should populate write. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_WRITE), -1);
+ ASSERT_EQ(errno, EFAULT);
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */
+TEST_F(guard_regions, cold_pageout)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Ensured guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now mark cold. This should have no impact on guard markers. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_COLD), 0);
+
+ /* Should remain guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* OK, now page out. This should equally, have no effect on markers. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
+
+ /* Should remain guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that guard pages do not break userfaultd. */
+TEST_F(guard_regions, uffd)
+{
+ const unsigned long page_size = self->page_size;
+ int uffd;
+ char *ptr;
+ int i;
+ struct uffdio_api api = {
+ .api = UFFD_API,
+ .features = 0,
+ };
+ struct uffdio_register reg;
+ struct uffdio_range range;
+
+ if (!is_anon_backed(variant))
+ SKIP(return, "uffd only works on anon backing");
+
+ /* Set up uffd. */
+ uffd = userfaultfd(0);
+ if (uffd == -1) {
+ switch (errno) {
+ case EPERM:
+ SKIP(return, "No userfaultfd permissions, try running as root.");
+ break;
+ case ENOSYS:
+ SKIP(return, "userfaultfd is not supported/not enabled.");
+ break;
+ default:
+ ksft_exit_fail_msg("userfaultfd failed with %s\n",
+ strerror(errno));
+ break;
+ }
+ }
+
+ ASSERT_NE(uffd, -1);
+
+ ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0);
+
+ /* Map 10 pages. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Register the range with uffd. */
+ range.start = (unsigned long)ptr;
+ range.len = 10 * page_size;
+ reg.range = range;
+ reg.mode = UFFDIO_REGISTER_MODE_MISSING;
+ ASSERT_EQ(ioctl(uffd, UFFDIO_REGISTER, &reg), 0);
+
+ /* Guard the range. This should not trigger the uffd. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* The guarding should behave as usual with no uffd intervention. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(ioctl(uffd, UFFDIO_UNREGISTER, &range), 0);
+ close(uffd);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Mark a region within a file-backed mapping using MADV_SEQUENTIAL so we
+ * aggressively read-ahead, then install guard regions and assert that it
+ * behaves correctly.
+ *
+ * We page out using MADV_PAGEOUT before checking guard regions so we drop page
+ * cache folios, meaning we maximise the possibility of some broken readahead.
+ */
+TEST_F(guard_regions, madvise_sequential)
+{
+ char *ptr;
+ int i;
+ const unsigned long page_size = self->page_size;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "MADV_SEQUENTIAL meaningful only for file-backed");
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Establish a pattern of data in the file. */
+ set_pattern(ptr, 10, page_size);
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ /* Mark it as being accessed sequentially. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_SEQUENTIAL), 0);
+
+ /* Mark every other page a guard page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr2 = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr2, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now page it out. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
+
+ /* Now make sure pages are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *chrp = &ptr[i * page_size];
+
+ if (i % 2 == 0) {
+ bool result = try_read_write_buf(chrp);
+
+ ASSERT_FALSE(result);
+ } else {
+ ASSERT_EQ(*chrp, 'a' + i);
+ }
+ }
+
+ /* Now remove guard pages. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now make sure all data is as expected. */
+ if (!check_pattern(ptr, 10, page_size))
+ ASSERT_TRUE(false);
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Check that file-backed mappings implement guard regions with MAP_PRIVATE
+ * correctly.
+ */
+TEST_F(guard_regions, map_private)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr_shared, *ptr_private;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "MAP_PRIVATE test specific to file-backed");
+
+ ptr_shared = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr_shared, MAP_FAILED);
+
+ /* Manually mmap(), do not use mmap_() wrapper so we can force MAP_PRIVATE. */
+ ptr_private = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->fd, 0);
+ ASSERT_NE(ptr_private, MAP_FAILED);
+
+ /* Set pattern in shared mapping. */
+ set_pattern(ptr_shared, 10, page_size);
+
+ /* Install guard regions in every other page in the shared mapping. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr = &ptr_shared[i * page_size];
+
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ for (i = 0; i < 10; i++) {
+ /* Every even shared page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0);
+ /* Private mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size]));
+ }
+
+ /* Install guard regions in every other page in the private mapping. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr = &ptr_private[i * page_size];
+
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ for (i = 0; i < 10; i++) {
+ /* Every even shared page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0);
+ /* Every odd private page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0);
+ }
+
+ /* Remove guard regions from shared mapping. */
+ ASSERT_EQ(madvise(ptr_shared, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ for (i = 0; i < 10; i++) {
+ /* Shared mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size]));
+ /* Every even private page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0);
+ }
+
+ /* Remove guard regions from private mapping. */
+ ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ for (i = 0; i < 10; i++) {
+ /* Shared mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size]));
+ /* Private mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size]));
+ }
+
+ /* Ensure patterns are intact. */
+ ASSERT_TRUE(check_pattern(ptr_shared, 10, page_size));
+ ASSERT_TRUE(check_pattern(ptr_private, 10, page_size));
+
+ /* Now write out every other page to MAP_PRIVATE. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr = &ptr_private[i * page_size];
+
+ memset(ptr, 'a' + i, page_size);
+ }
+
+ /*
+ * At this point the mapping is:
+ *
+ * 0123456789
+ * SPSPSPSPSP
+ *
+ * Where S = shared, P = private mappings.
+ */
+
+ /* Now mark the beginning of the mapping guarded. */
+ ASSERT_EQ(madvise(ptr_private, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /*
+ * This renders the mapping:
+ *
+ * 0123456789
+ * xxxxxPSPSP
+ */
+
+ for (i = 0; i < 10; i++) {
+ char *ptr = &ptr_private[i * page_size];
+
+ /* Ensure guard regions as expected. */
+ ASSERT_EQ(try_read_buf(ptr), i >= 5);
+ /* The shared mapping should always succeed. */
+ ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size]));
+ }
+
+ /* Remove the guard regions altogether. */
+ ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /*
+ *
+ * We now expect the mapping to be:
+ *
+ * 0123456789
+ * SSSSSPSPSP
+ *
+ * As we removed guard regions, the private pages from the first 5 will
+ * have been zapped, so on fault will reestablish the shared mapping.
+ */
+
+ for (i = 0; i < 10; i++) {
+ char *ptr = &ptr_private[i * page_size];
+
+ /*
+ * Assert that shared mappings in the MAP_PRIVATE mapping match
+ * the shared mapping.
+ */
+ if (i < 5 || i % 2 == 0) {
+ char *ptr_s = &ptr_shared[i * page_size];
+
+ ASSERT_EQ(memcmp(ptr, ptr_s, page_size), 0);
+ continue;
+ }
+
+ /* Everything else is a private mapping. */
+ ASSERT_TRUE(is_buf_eq(ptr, page_size, 'a' + i));
+ }
+
+ ASSERT_EQ(munmap(ptr_shared, 10 * page_size), 0);
+ ASSERT_EQ(munmap(ptr_private, 10 * page_size), 0);
+}
+
+/* Test that guard regions established over a read-only mapping function correctly. */
+TEST_F(guard_regions, readonly_file)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing != LOCAL_FILE_BACKED)
+ SKIP(return, "Read-only test specific to file-backed");
+
+ /* Map shared so we can populate with pattern, populate it, unmap. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ set_pattern(ptr, 10, page_size);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+ /* Close the fd so we can re-open read-only. */
+ ASSERT_EQ(close(self->fd), 0);
+
+ /* Re-open read-only. */
+ self->fd = open(self->path, O_RDONLY);
+ ASSERT_NE(self->fd, -1);
+ /* Re-map read-only. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Mark every other page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_pg = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_pg, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Assert that the guard regions are in place.*/
+ for (i = 0; i < 10; i++) {
+ char *ptr_pg = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_pg), i % 2 != 0);
+ }
+
+ /* Remove guard regions. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Ensure the data is as expected. */
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, fault_around)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "Fault-around test specific to file-backed");
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Establish a pattern in the backing file. */
+ set_pattern(ptr, 10, page_size);
+
+ /*
+ * Now drop it from the page cache so we get major faults when next we
+ * map it.
+ */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
+
+ /* Unmap and remap 'to be sure'. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now fault in every odd page. This should trigger fault-around. */
+ for (i = 1; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(ptr_p));
+ }
+
+ /* Finally, ensure that guard regions are intact as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0);
+ }
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, truncation)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "Truncation test specific to file-backed");
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Establish a pattern in the backing file, just so there is data
+ * there.
+ */
+ set_pattern(ptr, 10, page_size);
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now truncate to actually used size (initialised to 100). */
+ ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0);
+
+ /* Here the guard regions will remain intact. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now truncate to half the size, then truncate again to the full size. */
+ ASSERT_EQ(ftruncate(self->fd, 5 * page_size), 0);
+ ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0);
+
+ /* Again, guard pages will remain intact. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0);
+ }
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, hole_punch)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "Truncation test specific to file-backed");
+
+ /* Establish pattern in mapping. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ set_pattern(ptr, 10, page_size);
+
+ /* Install a guard region in the middle of the mapping. */
+ ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /*
+ * The buffer will now be:
+ *
+ * 0123456789
+ * ***xxxx***
+ *
+ * Where * is data and x is the guard region.
+ */
+
+ /* Ensure established. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7);
+ }
+
+ /* Now hole punch the guarded region. */
+ ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size,
+ MADV_REMOVE), 0);
+
+ /* Ensure guard regions remain. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7);
+ }
+
+ /* Now remove guard region throughout. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Check that the pattern exists in non-hole punched region. */
+ ASSERT_TRUE(check_pattern(ptr, 3, page_size));
+ /* Check that hole punched region is zeroed. */
+ ASSERT_TRUE(is_buf_eq(&ptr[3 * page_size], 4 * page_size, '\0'));
+ /* Check that the pattern exists in the remainder of the file. */
+ ASSERT_TRUE(check_pattern_offset(ptr, 3, page_size, 7));
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Ensure that a memfd works correctly with guard regions, that we can write
+ * seal it then open the mapping read-only and still establish guard regions
+ * within, remove those guard regions and have everything work correctly.
+ */
+TEST_F(guard_regions, memfd_write_seal)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing != SHMEM_BACKED)
+ SKIP(return, "memfd write seal test specific to shmem");
+
+ /* OK, we need a memfd, so close existing one. */
+ ASSERT_EQ(close(self->fd), 0);
+
+ /* Create and truncate memfd. */
+ self->fd = memfd_create("guard_regions_memfd_seals_test",
+ MFD_ALLOW_SEALING);
+ ASSERT_NE(self->fd, -1);
+ ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0);
+
+ /* Map, set pattern, unmap. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ set_pattern(ptr, 10, page_size);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+
+ /* Write-seal the memfd. */
+ ASSERT_EQ(fcntl(self->fd, F_ADD_SEALS, F_SEAL_WRITE), 0);
+
+ /* Now map the memfd readonly. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Ensure pattern is as expected. */
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now remove guard regions. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Ensure pattern is as expected. */
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ /* Ensure write seal intact. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_write_buf(ptr_p));
+ }
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+
+/*
+ * Since we are now permitted to establish guard regions in read-only anonymous
+ * mappings, for the sake of thoroughness, though it probably has no practical
+ * use, test that guard regions function with a mapping to the anonymous zero
+ * page.
+ */
+TEST_F(guard_regions, anon_zeropage)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (!is_anon_backed(variant))
+ SKIP(return, "anon zero page test specific to anon/shmem");
+
+ /* Obtain a read-only i.e. anon zero page mapping. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now remove all guard regions. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(ptr_p));
+ }
+
+ /* Ensure zero page...*/
+ ASSERT_TRUE(is_buf_eq(ptr, 10 * page_size, '\0'));
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that /proc/$pid/pagemap correctly identifies guard region ranges.
+ */
+TEST_F(guard_regions, pagemap)
+{
+ const unsigned long page_size = self->page_size;
+ int proc_fd;
+ char *ptr;
+ int i;
+
+ proc_fd = open("/proc/self/pagemap", O_RDONLY);
+ ASSERT_NE(proc_fd, -1);
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Read from pagemap, and assert no guard regions are detected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+ unsigned long entry = pagemap_get_entry(proc_fd, ptr_p);
+ unsigned long masked = entry & PM_GUARD_REGION;
+
+ ASSERT_EQ(masked, 0);
+ }
+
+ /* Install a guard region in every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Re-read from pagemap, and assert guard regions are detected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+ unsigned long entry = pagemap_get_entry(proc_fd, ptr_p);
+ unsigned long masked = entry & PM_GUARD_REGION;
+
+ ASSERT_EQ(masked, i % 2 == 0 ? PM_GUARD_REGION : 0);
+ }
+
+ ASSERT_EQ(close(proc_fd), 0);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that PAGEMAP_SCAN correctly reports guard region ranges.
+ */
+TEST_F(guard_regions, pagemap_scan)
+{
+ const unsigned long page_size = self->page_size;
+ struct page_region pm_regs[10];
+ struct pm_scan_arg pm_scan_args = {
+ .size = sizeof(struct pm_scan_arg),
+ .category_anyof_mask = PAGE_IS_GUARD,
+ .return_mask = PAGE_IS_GUARD,
+ .vec = (long)&pm_regs,
+ .vec_len = ARRAY_SIZE(pm_regs),
+ };
+ int proc_fd, i;
+ char *ptr;
+
+ proc_fd = open("/proc/self/pagemap", O_RDONLY);
+ ASSERT_NE(proc_fd, -1);
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ pm_scan_args.start = (long)ptr;
+ pm_scan_args.end = (long)ptr + 10 * page_size;
+ ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 0);
+ ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size);
+
+ /* Install a guard region in every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(syscall(__NR_madvise, ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /*
+ * Assert ioctl() returns the count of located regions, where each
+ * region spans every other page within the range of 10 pages.
+ */
+ ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 5);
+ ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size);
+
+ /* Re-read from pagemap, and assert guard regions are detected. */
+ for (i = 0; i < 5; i++) {
+ long ptr_p = (long)&ptr[2 * i * page_size];
+
+ ASSERT_EQ(pm_regs[i].start, ptr_p);
+ ASSERT_EQ(pm_regs[i].end, ptr_p + page_size);
+ ASSERT_EQ(pm_regs[i].categories, PAGE_IS_GUARD);
+ }
+
+ ASSERT_EQ(close(proc_fd), 0);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, collapse)
+{
+ const unsigned long page_size = self->page_size;
+ const unsigned long size = 2 * HPAGE_SIZE;
+ const unsigned long num_pages = size / page_size;
+ char *ptr;
+ int i;
+
+ /* Need file to be correct size for tests for non-anon. */
+ if (variant->backing != ANON_BACKED)
+ ASSERT_EQ(ftruncate(self->fd, size), 0);
+
+ /*
+ * We must close and re-open local-file backed as read-only for
+ * CONFIG_READ_ONLY_THP_FOR_FS to work.
+ */
+ if (variant->backing == LOCAL_FILE_BACKED) {
+ ASSERT_EQ(close(self->fd), 0);
+
+ self->fd = open(self->path, O_RDONLY);
+ ASSERT_GE(self->fd, 0);
+ }
+
+ ptr = mmap_(self, variant, NULL, size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Prevent being faulted-in as huge. */
+ ASSERT_EQ(madvise(ptr, size, MADV_NOHUGEPAGE), 0);
+ /* Fault in. */
+ ASSERT_EQ(madvise(ptr, size, MADV_POPULATE_READ), 0);
+
+ /* Install guard regions in ever other page. */
+ for (i = 0; i < num_pages; i += 2) {
+ char *ptr_page = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_page, page_size, MADV_GUARD_INSTALL), 0);
+ /* Accesses should now fail. */
+ ASSERT_FALSE(try_read_buf(ptr_page));
+ }
+
+ /* Allow huge page throughout region. */
+ ASSERT_EQ(madvise(ptr, size, MADV_HUGEPAGE), 0);
+
+ /*
+ * Now collapse the entire region. This should fail in all cases.
+ *
+ * The madvise() call will also fail if CONFIG_READ_ONLY_THP_FOR_FS is
+ * not set for the local file case, but we can't differentiate whether
+ * this occurred or if the collapse was rightly rejected.
+ */
+ EXPECT_NE(madvise(ptr, size, MADV_COLLAPSE), 0);
+
+ /*
+ * If we introduce a bug that causes the collapse to succeed, gather
+ * data on whether guard regions are at least preserved. The test will
+ * fail at this point in any case.
+ */
+ for (i = 0; i < num_pages; i += 2) {
+ char *ptr_page = &ptr[i * page_size];
+
+ /* Accesses should still fail. */
+ ASSERT_FALSE(try_read_buf(ptr_page));
+ }
+}
+
+TEST_F(guard_regions, smaps)
+{
+ const unsigned long page_size = self->page_size;
+ struct procmap_fd procmap;
+ char *ptr, *ptr2;
+ int i;
+
+ /* Map a region. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* We shouldn't yet see a guard flag. */
+ ASSERT_FALSE(check_vmflag_guard(ptr));
+
+ /* Install a single guard region. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Now we should see a guard flag. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+
+ /*
+ * Removing the guard region should not change things because we simply
+ * cannot accurately track whether a given VMA has had all of its guard
+ * regions removed.
+ */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_REMOVE), 0);
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+
+ /* Install guard regions throughout. */
+ for (i = 0; i < 10; i++) {
+ ASSERT_EQ(madvise(&ptr[i * page_size], page_size, MADV_GUARD_INSTALL), 0);
+ /* We should always see the guard region flag. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+ }
+
+ /* Split into two VMAs. */
+ ASSERT_EQ(munmap(&ptr[4 * page_size], page_size), 0);
+
+ /* Both VMAs should have the guard flag set. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+ ASSERT_TRUE(check_vmflag_guard(&ptr[5 * page_size]));
+
+ /*
+ * If the local file system is unable to merge VMAs due to having
+ * unusual characteristics, there is no point in asserting merge
+ * behaviour.
+ */
+ if (!local_fs_has_sane_mmap(self, variant)) {
+ TH_LOG("local filesystem does not support sane merging skipping merge test");
+ return;
+ }
+
+ /* Map a fresh VMA between the two split VMAs. */
+ ptr2 = mmap_(self, variant, &ptr[4 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 4 * page_size);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /*
+ * Check the procmap to ensure that this VMA merged with the adjacent
+ * two. The guard region flag is 'sticky' so should not preclude
+ * merging.
+ */
+ ASSERT_EQ(open_self_procmap(&procmap), 0);
+ ASSERT_TRUE(find_vma_procmap(&procmap, ptr));
+ ASSERT_EQ(procmap.query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap.query.vma_end, (unsigned long)ptr + 10 * page_size);
+ ASSERT_EQ(close_procmap(&procmap), 0);
+ /* And, of course, this VMA should have the guard flag set. */
+ ASSERT_TRUE(check_vmflag_guard(ptr));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index 9423ad439a61..6279893a0adc 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -27,7 +27,7 @@
#endif /* LOCAL_CONFIG_HAVE_LIBURING */
#include "../../../../mm/gup_test.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
static size_t pagesize;
@@ -93,29 +93,56 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
__fsword_t fs_type = get_fs_type(fd);
bool should_work;
char *mem;
+ int result = KSFT_PASS;
int ret;
+ if (fd < 0) {
+ result = KSFT_FAIL;
+ goto report;
+ }
+
if (ftruncate(fd, size)) {
- ksft_test_result_fail("ftruncate() failed\n");
+ if (errno == ENOENT) {
+ skip_test_dodgy_fs("ftruncate()");
+ } else {
+ ksft_print_msg("ftruncate() failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
+ goto report;
+ }
return;
}
if (fallocate(fd, 0, 0, size)) {
- if (size == pagesize)
- ksft_test_result_fail("fallocate() failed\n");
- else
- ksft_test_result_skip("need more free huge pages\n");
- return;
+ /*
+ * Some filesystems (eg, NFSv3) don't support
+ * fallocate(), report this as a skip rather than a
+ * test failure.
+ */
+ if (errno == EOPNOTSUPP) {
+ ksft_print_msg("fallocate() not supported by filesystem\n");
+ result = KSFT_SKIP;
+ } else if (size == pagesize) {
+ ksft_print_msg("fallocate() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ ksft_print_msg("need more free huge pages\n");
+ result = KSFT_SKIP;
+ }
+ goto report;
}
mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
- if (size == pagesize || shared)
- ksft_test_result_fail("mmap() failed\n");
- else
- ksft_test_result_skip("need more free huge pages\n");
- return;
+ if (size == pagesize || shared) {
+ ksft_print_msg("mmap() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ ksft_print_msg("need more free huge pages\n");
+ result = KSFT_SKIP;
+ }
+ goto report;
}
/* Fault in the page such that GUP-fast can pin it directly. */
@@ -130,7 +157,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
*/
ret = mprotect(mem, size, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_print_msg("mprotect() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
goto munmap;
}
/* FALLTHROUGH */
@@ -143,18 +171,20 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
type == TEST_TYPE_RW_FAST;
if (gup_fd < 0) {
- ksft_test_result_skip("gup_test not available\n");
+ ksft_print_msg("gup_test not available\n");
+ result = KSFT_SKIP;
break;
}
if (rw && shared && fs_is_unknown(fs_type)) {
- ksft_test_result_skip("Unknown filesystem\n");
+ ksft_print_msg("Unknown filesystem\n");
+ result = KSFT_SKIP;
return;
}
/*
* R/O pinning or pinning in a private mapping is always
* expected to work. Otherwise, we expect long-term R/W pinning
- * to only succeed for special fielesystems.
+ * to only succeed for special filesystems.
*/
should_work = !shared || !rw ||
fs_supports_writable_longterm_pinning(fs_type);
@@ -165,25 +195,35 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
if (ret && errno == EINVAL) {
- ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+ ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n");
+ result = KSFT_SKIP;
break;
} else if (ret && errno == EFAULT) {
- ksft_test_result(!should_work, "Should have failed\n");
+ if (should_work)
+ result = KSFT_FAIL;
+ else
+ result = KSFT_PASS;
break;
} else if (ret) {
- ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+ ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
break;
}
if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP))
- ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+ ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed (%s)\n",
+ strerror(errno));
/*
* TODO: if the kernel ever supports long-term R/W pinning on
* some previously unsupported filesystems, we might want to
* perform some additional tests for possible data corruptions.
*/
- ksft_test_result(should_work, "Should have worked\n");
+ if (should_work)
+ result = KSFT_PASS;
+ else
+ result = KSFT_FAIL;
break;
}
#ifdef LOCAL_CONFIG_HAVE_LIBURING
@@ -193,8 +233,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* io_uring always pins pages writable. */
if (shared && fs_is_unknown(fs_type)) {
- ksft_test_result_skip("Unknown filesystem\n");
- return;
+ ksft_print_msg("Unknown filesystem\n");
+ result = KSFT_SKIP;
+ goto report;
}
should_work = !shared ||
fs_supports_writable_longterm_pinning(fs_type);
@@ -202,7 +243,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* Skip on errors, as we might just lack kernel support. */
ret = io_uring_queue_init(1, &ring, 0);
if (ret < 0) {
- ksft_test_result_skip("io_uring_queue_init() failed\n");
+ ksft_print_msg("io_uring_queue_init() failed (%s)\n",
+ strerror(-ret));
+ result = KSFT_SKIP;
break;
}
/*
@@ -215,15 +258,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* Only new kernels return EFAULT. */
if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
errno == EFAULT)) {
- ksft_test_result(!should_work, "Should have failed\n");
+ if (should_work) {
+ ksft_print_msg("Should have failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ result = KSFT_PASS;
+ }
} else if (ret) {
/*
* We might just lack support or have insufficient
* MEMLOCK limits.
*/
- ksft_test_result_skip("io_uring_register_buffers() failed\n");
+ ksft_print_msg("io_uring_register_buffers() failed (%s)\n",
+ strerror(-ret));
+ result = KSFT_SKIP;
} else {
- ksft_test_result(should_work, "Should have worked\n");
+ if (should_work) {
+ result = KSFT_PASS;
+ } else {
+ ksft_print_msg("Should have worked\n");
+ result = KSFT_FAIL;
+ }
io_uring_unregister_buffers(&ring);
}
@@ -237,6 +293,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
munmap:
munmap(mem, size);
+report:
+ log_test_result(result);
}
typedef void (*test_fn)(int fd, size_t size);
@@ -245,11 +303,12 @@ static void run_with_memfd(test_fn fn, const char *desc)
{
int fd;
- ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+ log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0);
if (fd < 0) {
- ksft_test_result_fail("memfd_create() failed\n");
+ ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
+ log_test_result(KSFT_SKIP);
return;
}
@@ -262,23 +321,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc)
FILE *file;
int fd;
- ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+ log_test_start("%s ... with tmpfile", desc);
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed\n");
- return;
- }
-
- fd = fileno(file);
- if (fd < 0) {
- ksft_test_result_fail("fileno() failed\n");
- goto close;
+ ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno));
+ fd = -1;
+ } else {
+ fd = fileno(file);
+ if (fd < 0) {
+ ksft_print_msg("fileno() failed (%s)\n", strerror(errno));
+ }
}
fn(fd, pagesize);
-close:
- fclose(file);
+
+ if (file)
+ fclose(file);
}
static void run_with_local_tmpfile(test_fn fn, const char *desc)
@@ -286,22 +345,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc)
char filename[] = __FILE__"_tmpfile_XXXXXX";
int fd;
- ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
+ log_test_start("%s ... with local tmpfile", desc);
fd = mkstemp(filename);
- if (fd < 0) {
- ksft_test_result_fail("mkstemp() failed\n");
- return;
- }
+ if (fd < 0)
+ ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno));
if (unlink(filename)) {
- ksft_test_result_fail("unlink() failed\n");
- goto close;
+ ksft_print_msg("unlink() failed (%s)\n", strerror(errno));
+ close(fd);
+ fd = -1;
}
fn(fd, pagesize);
-close:
- close(fd);
+
+ if (fd >= 0)
+ close(fd);
}
static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
@@ -310,14 +369,15 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
int flags = MFD_HUGETLB;
int fd;
- ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+ log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
fd = memfd_create("test", flags);
if (fd < 0) {
- ksft_test_result_skip("memfd_create() failed\n");
+ ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
+ log_test_result(KSFT_SKIP);
return;
}
@@ -446,7 +506,7 @@ static int tests_per_test_case(void)
int main(int argc, char **argv)
{
- int i, err;
+ int i;
pagesize = getpagesize();
nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
@@ -460,9 +520,5 @@ int main(int argc, char **argv)
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_test_case(&test_cases[i]);
- err = ksft_get_fail_cnt();
- if (err)
- ksft_exit_fail_msg("%d out of %d tests failed\n",
- err, ksft_test_num());
- ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
index bdeaac67ff9a..fb8f9ae49efa 100644
--- a/tools/testing/selftests/mm/gup_test.c
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -12,14 +12,13 @@
#include <pthread.h>
#include <assert.h>
#include <mm/gup_test.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#define MB (1UL << 20)
-/* Just the flags we need, copied from mm.h: */
+/* Just the flags we need, copied from the kernel internals. */
#define FOLL_WRITE 0x01 /* check pte is writable */
-#define FOLL_TOUCH 0x02 /* mark page accessed */
#define GUP_TEST_FILE "/sys/kernel/debug/gup_test"
@@ -93,7 +92,7 @@ int main(int argc, char **argv)
{
struct gup_test gup = { 0 };
int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
- int flags = MAP_PRIVATE, touch = 0;
+ int flags = MAP_PRIVATE;
char *file = "/dev/zero";
pthread_t *tid;
char *p;
@@ -139,6 +138,8 @@ int main(int argc, char **argv)
break;
case 'n':
nr_pages = atoi(optarg);
+ if (nr_pages < 0)
+ nr_pages = size / psize();
break;
case 't':
thp = 1;
@@ -168,10 +169,6 @@ int main(int argc, char **argv)
case 'H':
flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
break;
- case 'z':
- /* fault pages in gup, do not fault in userland */
- touch = 1;
- break;
default:
ksft_exit_fail_msg("Wrong argument\n");
}
@@ -242,18 +239,9 @@ int main(int argc, char **argv)
else if (thp == 0)
madvise(p, size, MADV_NOHUGEPAGE);
- /*
- * FOLL_TOUCH, in gup_test, is used as an either/or case: either
- * fault pages in from the kernel via FOLL_TOUCH, or fault them
- * in here, from user space. This allows comparison of performance
- * between those two cases.
- */
- if (touch) {
- gup.gup_flags |= FOLL_TOUCH;
- } else {
- for (; (unsigned long)p < gup.addr + size; p += psize())
- p[0] = 0;
- }
+ /* Fault them in here, from user space. */
+ for (; (unsigned long)p < gup.addr + size; p += psize())
+ p[0] = 0;
tid = malloc(sizeof(pthread_t) * nthreads);
assert(tid);
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 141bf63cbe05..e8328c89d855 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -10,7 +10,7 @@
* bugs.
*/
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include <errno.h>
#include <fcntl.h>
@@ -25,6 +25,7 @@
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
+#include <sys/time.h>
/*
@@ -50,6 +51,8 @@ enum {
HMM_COHERENCE_DEVICE_TWO,
};
+#define ONEKB (1 << 10)
+#define ONEMEG (1 << 20)
#define TWOMEG (1 << 21)
#define HMM_BUFFER_SIZE (1024 << 12)
#define HMM_PATH_MAX 64
@@ -207,8 +210,10 @@ static void hmm_buffer_free(struct hmm_buffer *buffer)
if (buffer == NULL)
return;
- if (buffer->ptr)
+ if (buffer->ptr) {
munmap(buffer->ptr, buffer->size);
+ buffer->ptr = NULL;
+ }
free(buffer->mirror);
free(buffer);
}
@@ -525,6 +530,8 @@ TEST_F(hmm, anon_write_prot)
/*
* Check that a device writing an anonymous private mapping
* will copy-on-write if a child process inherits the mapping.
+ *
+ * Also verifies after fork() memory the device can be read by child.
*/
TEST_F(hmm, anon_write_child)
{
@@ -532,72 +539,101 @@ TEST_F(hmm, anon_write_child)
unsigned long npages;
unsigned long size;
unsigned long i;
+ void *old_ptr;
+ void *map;
int *ptr;
pid_t pid;
int child_fd;
- int ret;
-
- npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
- ASSERT_NE(npages, 0);
- size = npages << self->page_shift;
-
- buffer = malloc(sizeof(*buffer));
- ASSERT_NE(buffer, NULL);
-
- buffer->fd = -1;
- buffer->size = size;
- buffer->mirror = malloc(size);
- ASSERT_NE(buffer->mirror, NULL);
-
- buffer->ptr = mmap(NULL, size,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS,
- buffer->fd, 0);
- ASSERT_NE(buffer->ptr, MAP_FAILED);
-
- /* Initialize buffer->ptr so we can tell if it is written. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ptr[i] = i;
-
- /* Initialize data that the device will write to buffer->ptr. */
- for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
- ptr[i] = -i;
+ int ret, use_thp, migrate;
+
+ for (migrate = 0; migrate < 2; ++migrate) {
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ npages = ALIGN(use_thp ? TWOMEG : HMM_BUFFER_SIZE,
+ self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size * 2;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size * 2,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ old_ptr = buffer->ptr;
+ if (use_thp) {
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+ }
+
+ /* Initialize buffer->ptr so we can tell if it is written. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ if (migrate) {
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ }
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (pid != 0) {
+ waitpid(pid, &ret, 0);
+ ASSERT_EQ(WIFEXITED(ret), 1);
+
+ /* Check that the parent's buffer did not change. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ continue;
+ }
+
+ /* Check that we see the parent's values. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ if (!migrate) {
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ }
+
+ /* The child process needs its own mirror to its own mm. */
+ child_fd = hmm_open(0);
+ ASSERT_GE(child_fd, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
- pid = fork();
- if (pid == -1)
- ASSERT_EQ(pid, 0);
- if (pid != 0) {
- waitpid(pid, &ret, 0);
- ASSERT_EQ(WIFEXITED(ret), 1);
+ /* Check what the device wrote. */
+ if (!migrate) {
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ }
- /* Check that the parent's buffer did not change. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], i);
- return;
+ close(child_fd);
+ exit(0);
+ }
}
-
- /* Check that we see the parent's values. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], i);
- for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], -i);
-
- /* The child process needs its own mirror to its own mm. */
- child_fd = hmm_open(0);
- ASSERT_GE(child_fd, 0);
-
- /* Simulate a device writing system memory. */
- ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
- ASSERT_EQ(ret, 0);
- ASSERT_EQ(buffer->cpages, npages);
- ASSERT_EQ(buffer->faults, 1);
-
- /* Check what the device wrote. */
- for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
- ASSERT_EQ(ptr[i], -i);
-
- close(child_fd);
- exit(0);
}
/*
@@ -2027,11 +2063,10 @@ TEST_F(hmm, hmm_cow_in_device)
if (pid == -1)
ASSERT_EQ(pid, 0);
if (!pid) {
- /* Child process waitd for SIGTERM from the parent. */
+ /* Child process waits for SIGTERM from the parent. */
while (1) {
}
- perror("Should not reach this\n");
- exit(0);
+ /* Should not reach this */
}
/* Parent process writes to COW pages(s) and gets a
* new copy in system. In case of device private pages,
@@ -2056,4 +2091,765 @@ TEST_F(hmm, hmm_cow_in_device)
hmm_buffer_free(buffer);
}
+
+/*
+ * Migrate private anonymous huge empty page.
+ */
+TEST_F(hmm, migrate_anon_huge_empty)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge zero page.
+ */
+TEST_F(hmm, migrate_anon_huge_zero)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+ int val;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize a read-only zero huge page. */
+ val = *(int *)buffer->ptr;
+ ASSERT_EQ(val, 0);
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) {
+ ASSERT_EQ(ptr[i], 0);
+ /* If it asserts once, it probably will 500,000 times */
+ if (ptr[i] != 0)
+ break;
+ }
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge page and free.
+ */
+TEST_F(hmm, migrate_anon_huge_free)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Try freeing it. */
+ ret = madvise(map, size, MADV_FREE);
+ ASSERT_EQ(ret, 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge page and fault back to sysmem.
+ */
+TEST_F(hmm, migrate_anon_huge_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate memory and fault back to sysmem after partially unmapping.
+ */
+TEST_F(hmm, migrate_partial_unmap_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size = TWOMEG;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret, j, use_thp;
+ int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, 2 * size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ if (use_thp)
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ else
+ ret = madvise(map, size, MADV_NOHUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ munmap(buffer->ptr + offsets[j], ONEMEG);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ if (i * sizeof(int) < offsets[j] ||
+ i * sizeof(int) >= offsets[j] + ONEMEG)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ }
+ }
+}
+
+TEST_F(hmm, migrate_remap_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size = TWOMEG;
+ unsigned long i;
+ void *old_ptr, *new_ptr = NULL;
+ void *map;
+ int *ptr;
+ int ret, j, use_thp, dont_unmap, before;
+ int offsets[] = { 0, 512 * ONEKB, ONEMEG };
+
+ for (before = 0; before < 2; ++before) {
+ for (dont_unmap = 0; dont_unmap < 2; ++dont_unmap) {
+ for (use_thp = 0; use_thp < 2; ++use_thp) {
+ for (j = 0; j < ARRAY_SIZE(offsets); ++j) {
+ int flags = MREMAP_MAYMOVE | MREMAP_FIXED;
+
+ if (dont_unmap)
+ flags |= MREMAP_DONTUNMAP;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 8 * size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, size);
+
+ buffer->ptr = mmap(NULL, buffer->size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ if (use_thp)
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ else
+ ret = madvise(map, size, MADV_NOHUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ munmap(map + size, size * 2);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr;
+ i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ if (before) {
+ new_ptr = mremap((void *)map, size, size, flags,
+ map + size + offsets[j]);
+ ASSERT_NE(new_ptr, MAP_FAILED);
+ buffer->ptr = new_ptr;
+ }
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror;
+ i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ if (!before) {
+ new_ptr = mremap((void *)map, size, size, flags,
+ map + size + offsets[j]);
+ ASSERT_NE(new_ptr, MAP_FAILED);
+ buffer->ptr = new_ptr;
+ }
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr;
+ i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ munmap(new_ptr, size);
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Migrate private anonymous huge page with allocation errors.
+ */
+TEST_F(hmm, migrate_anon_huge_err)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(2 * size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, 2 * size);
+
+ old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) {
+ ASSERT_EQ(ptr[i], i);
+ if (ptr[i] != i)
+ break;
+ }
+
+ /* Try faulting back a single (PAGE_SIZE) page. */
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 2048);
+
+ /* unmap and remap the region to reset things. */
+ ret = munmap(old_ptr, 2 * size);
+ ASSERT_EQ(ret, 0);
+ old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate THP to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /*
+ * Force an allocation error when faulting back a THP resident in the
+ * device.
+ */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+
+ ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 2048);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate private anonymous huge zero page with allocation errors.
+ */
+TEST_F(hmm, migrate_anon_huge_zero_err)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = 2 * size;
+ buffer->mirror = malloc(2 * size);
+ ASSERT_NE(buffer->mirror, NULL);
+ memset(buffer->mirror, 0xFF, 2 * size);
+
+ old_ptr = mmap(NULL, 2 * size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Try faulting back a single (PAGE_SIZE) page. */
+ ptr = buffer->ptr;
+ ASSERT_EQ(ptr[2048], 0);
+
+ /* unmap and remap the region to reset things. */
+ ret = munmap(old_ptr, 2 * size);
+ ASSERT_EQ(ret, 0);
+ old_ptr = mmap(NULL, 2 * size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(old_ptr, MAP_FAILED);
+ map = (void *)ALIGN((uintptr_t)old_ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ buffer->ptr = map;
+
+ /* Initialize buffer in system memory (zero THP page). */
+ ret = ptr[0];
+ ASSERT_EQ(ret, 0);
+
+ /* Migrate memory to device but force a THP allocation error. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer,
+ HMM_DMIRROR_FLAG_FAIL_ALLOC);
+ ASSERT_EQ(ret, 0);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Fault the device memory back and check it. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+struct benchmark_results {
+ double sys_to_dev_time;
+ double dev_to_sys_time;
+ double throughput_s2d;
+ double throughput_d2s;
+};
+
+static double get_time_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0);
+}
+
+static inline struct hmm_buffer *hmm_buffer_alloc(unsigned long size)
+{
+ struct hmm_buffer *buffer;
+
+ buffer = malloc(sizeof(*buffer));
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ memset(buffer->mirror, 0xFF, size);
+ return buffer;
+}
+
+static void print_benchmark_results(const char *test_name, size_t buffer_size,
+ struct benchmark_results *thp,
+ struct benchmark_results *regular)
+{
+ double s2d_improvement = ((regular->sys_to_dev_time - thp->sys_to_dev_time) /
+ regular->sys_to_dev_time) * 100.0;
+ double d2s_improvement = ((regular->dev_to_sys_time - thp->dev_to_sys_time) /
+ regular->dev_to_sys_time) * 100.0;
+ double throughput_s2d_improvement = ((thp->throughput_s2d - regular->throughput_s2d) /
+ regular->throughput_s2d) * 100.0;
+ double throughput_d2s_improvement = ((thp->throughput_d2s - regular->throughput_d2s) /
+ regular->throughput_d2s) * 100.0;
+
+ printf("\n=== %s (%.1f MB) ===\n", test_name, buffer_size / (1024.0 * 1024.0));
+ printf(" | With THP | Without THP | Improvement\n");
+ printf("---------------------------------------------------------------------\n");
+ printf("Sys->Dev Migration | %.3f ms | %.3f ms | %.1f%%\n",
+ thp->sys_to_dev_time, regular->sys_to_dev_time, s2d_improvement);
+ printf("Dev->Sys Migration | %.3f ms | %.3f ms | %.1f%%\n",
+ thp->dev_to_sys_time, regular->dev_to_sys_time, d2s_improvement);
+ printf("S->D Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n",
+ thp->throughput_s2d, regular->throughput_s2d, throughput_s2d_improvement);
+ printf("D->S Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n",
+ thp->throughput_d2s, regular->throughput_d2s, throughput_d2s_improvement);
+}
+
+/*
+ * Run a single migration benchmark
+ * fd: file descriptor for hmm device
+ * use_thp: whether to use THP
+ * buffer_size: size of buffer to allocate
+ * iterations: number of iterations
+ * results: where to store results
+ */
+static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_size,
+ int iterations, struct benchmark_results *results)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages = buffer_size / sysconf(_SC_PAGESIZE);
+ double start, end;
+ double s2d_total = 0, d2s_total = 0;
+ int ret, i;
+ int *ptr;
+
+ buffer = hmm_buffer_alloc(buffer_size);
+
+ /* Map memory */
+ buffer->ptr = mmap(NULL, buffer_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (!buffer->ptr)
+ return -1;
+
+ /* Apply THP hint if requested */
+ if (use_thp)
+ ret = madvise(buffer->ptr, buffer_size, MADV_HUGEPAGE);
+ else
+ ret = madvise(buffer->ptr, buffer_size, MADV_NOHUGEPAGE);
+
+ if (ret)
+ return ret;
+
+ /* Initialize memory to make sure pages are allocated */
+ ptr = (int *)buffer->ptr;
+ for (i = 0; i < buffer_size / sizeof(int); i++)
+ ptr[i] = i & 0xFF;
+
+ /* Warmup iteration */
+ ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ /* Benchmark iterations */
+ for (i = 0; i < iterations; i++) {
+ /* System to device migration */
+ start = get_time_ms();
+
+ ret = hmm_migrate_sys_to_dev(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ end = get_time_ms();
+ s2d_total += (end - start);
+
+ /* Device to system migration */
+ start = get_time_ms();
+
+ ret = hmm_migrate_dev_to_sys(fd, buffer, npages);
+ if (ret)
+ return ret;
+
+ end = get_time_ms();
+ d2s_total += (end - start);
+ }
+
+ /* Calculate average times and throughput */
+ results->sys_to_dev_time = s2d_total / iterations;
+ results->dev_to_sys_time = d2s_total / iterations;
+ results->throughput_s2d = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
+ (results->sys_to_dev_time / 1000.0);
+ results->throughput_d2s = (buffer_size / (1024.0 * 1024.0 * 1024.0)) /
+ (results->dev_to_sys_time / 1000.0);
+
+ /* Cleanup */
+ hmm_buffer_free(buffer);
+ return 0;
+}
+
+/*
+ * Benchmark THP migration with different buffer sizes
+ */
+TEST_F_TIMEOUT(hmm, benchmark_thp_migration, 120)
+{
+ struct benchmark_results thp_results, regular_results;
+ size_t thp_size = 2 * 1024 * 1024; /* 2MB - typical THP size */
+ int iterations = 5;
+
+ printf("\nHMM THP Migration Benchmark\n");
+ printf("---------------------------\n");
+ printf("System page size: %ld bytes\n", sysconf(_SC_PAGESIZE));
+
+ /* Test different buffer sizes */
+ size_t test_sizes[] = {
+ thp_size / 4, /* 512KB - smaller than THP */
+ thp_size / 2, /* 1MB - half THP */
+ thp_size, /* 2MB - single THP */
+ thp_size * 2, /* 4MB - two THPs */
+ thp_size * 4, /* 8MB - four THPs */
+ thp_size * 8, /* 16MB - eight THPs */
+ thp_size * 128, /* 256MB - one twenty eight THPs */
+ };
+
+ static const char *const test_names[] = {
+ "Small Buffer (512KB)",
+ "Half THP Size (1MB)",
+ "Single THP Size (2MB)",
+ "Two THP Size (4MB)",
+ "Four THP Size (8MB)",
+ "Eight THP Size (16MB)",
+ "One twenty eight THP Size (256MB)"
+ };
+
+ int num_tests = ARRAY_SIZE(test_sizes);
+
+ /* Run all tests */
+ for (int i = 0; i < num_tests; i++) {
+ /* Test with THP */
+ ASSERT_EQ(run_migration_benchmark(self->fd, 1, test_sizes[i],
+ iterations, &thp_results), 0);
+
+ /* Test without THP */
+ ASSERT_EQ(run_migration_benchmark(self->fd, 0, test_sizes[i],
+ iterations, &regular_results), 0);
+
+ /* Print results */
+ print_benchmark_results(test_names[i], test_sizes[i],
+ &thp_results, &regular_results);
+ }
+}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c
index 3b1b532f1cbb..d543419de040 100644
--- a/tools/testing/selftests/mm/hugepage-mmap.c
+++ b/tools/testing/selftests/mm/hugepage-mmap.c
@@ -15,7 +15,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
index ada9156cc497..b8f7d92e5a35 100644
--- a/tools/testing/selftests/mm/hugepage-mremap.c
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -15,7 +15,7 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <sys/mman.h>
#include <errno.h>
#include <fcntl.h> /* Definition of O_* constants */
@@ -24,7 +24,7 @@
#include <sys/ioctl.h>
#include <string.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#define DEFAULT_LENGTH_MB 10UL
@@ -65,10 +65,20 @@ static void register_region_with_uffd(char *addr, size_t len)
struct uffdio_api uffdio_api;
/* Create and enable userfaultfd object. */
-
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd == -1)
- ksft_exit_fail_msg("userfaultfd: %s\n", strerror(errno));
+ if (uffd == -1) {
+ switch (errno) {
+ case EPERM:
+ ksft_exit_skip("Insufficient permissions, try running as root.\n");
+ break;
+ case ENOSYS:
+ ksft_exit_skip("userfaultfd is not supported/not enabled.\n");
+ break;
+ default:
+ ksft_exit_fail_msg("userfaultfd failed with %s\n", strerror(errno));
+ break;
+ }
+ }
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
index e74107185324..05d9d2805ae4 100644
--- a/tools/testing/selftests/mm/hugetlb-madvise.c
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -19,7 +19,7 @@
#include <sys/mman.h>
#include <fcntl.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define MIN_FREE_PAGES 20
#define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */
@@ -47,14 +47,13 @@ void write_fault_pages(void *addr, unsigned long nr_pages)
void read_fault_pages(void *addr, unsigned long nr_pages)
{
- volatile unsigned long dummy = 0;
unsigned long i;
for (i = 0; i < nr_pages; i++) {
- dummy += *((unsigned long *)(addr + (i * huge_page_size)));
-
+ unsigned long *addr2 =
+ ((unsigned long *)(addr + (i * huge_page_size)));
/* Prevent the compiler from optimizing out the entire loop: */
- asm volatile("" : "+r" (dummy));
+ FORCE_READ(*addr2);
}
}
diff --git a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
index ba6cc6f9cabc..46230462ad48 100644
--- a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
+++ b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
@@ -11,7 +11,7 @@
#include <errno.h>
#include <stdbool.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define PREFIX " ... "
#define ERROR_PREFIX " !!! "
diff --git a/tools/testing/selftests/mm/hugetlb-soft-offline.c b/tools/testing/selftests/mm/hugetlb-soft-offline.c
index f086f0e04756..a8bc02688085 100644
--- a/tools/testing/selftests/mm/hugetlb-soft-offline.c
+++ b/tools/testing/selftests/mm/hugetlb-soft-offline.c
@@ -24,7 +24,7 @@
#include <sys/statfs.h>
#include <sys/types.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef MADV_SOFT_OFFLINE
#define MADV_SOFT_OFFLINE 101
diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c
index db63abe5ee5e..9ac62eb4c97d 100644
--- a/tools/testing/selftests/mm/hugetlb_dio.c
+++ b/tools/testing/selftests/mm/hugetlb_dio.c
@@ -18,7 +18,7 @@
#include <string.h>
#include <sys/mman.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off)
{
diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
index e2640529dbb2..b4b257775b74 100644
--- a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
+++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
@@ -9,7 +9,7 @@
#include <signal.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define INLOOP_ITER 100
diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
index 8f122a0f0828..efd774b41389 100644
--- a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
+++ b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
@@ -25,7 +25,7 @@
#include <unistd.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define INLOOP_ITER 100
diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 11f9bbe7dc22..0dd31892ff67 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -23,7 +23,7 @@ fi
if [[ $cgroup2 ]]; then
CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}')
if [[ -z "$CGROUP_ROOT" ]]; then
- CGROUP_ROOT=/dev/cgroup/memory
+ CGROUP_ROOT=$(mktemp -d)
mount -t cgroup2 none $CGROUP_ROOT
do_umount=1
fi
@@ -36,7 +36,7 @@ else
do_umount=1
fi
fi
-MNT='/mnt/huge/'
+MNT='/mnt/huge'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
@@ -56,10 +56,45 @@ function cleanup() {
rmdir "$CGROUP_ROOT"/a/b 2>/dev/null
rmdir "$CGROUP_ROOT"/a 2>/dev/null
rmdir "$CGROUP_ROOT"/test1 2>/dev/null
- echo 0 >/proc/sys/vm/nr_hugepages
+ echo $nr_hugepgs >/proc/sys/vm/nr_hugepages
set -e
}
+function assert_with_retry() {
+ local actual_path="$1"
+ local expected="$2"
+ local tolerance=$((7 * 1024 * 1024))
+ local timeout=20
+ local interval=1
+ local start_time
+ local now
+ local elapsed
+ local actual
+
+ start_time=$(date +%s)
+
+ while true; do
+ actual="$(cat "$actual_path")"
+
+ if [[ $actual -ge $(($expected - $tolerance)) ]] &&
+ [[ $actual -le $(($expected + $tolerance)) ]]; then
+ return 0
+ fi
+
+ now=$(date +%s)
+ elapsed=$((now - start_time))
+
+ if [[ $elapsed -ge $timeout ]]; then
+ echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
+ echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
+ cleanup
+ exit 1
+ fi
+
+ sleep $interval
+ done
+}
+
function assert_state() {
local expected_a="$1"
local expected_a_hugetlb="$2"
@@ -70,58 +105,13 @@ function assert_state() {
expected_b="$3"
expected_b_hugetlb="$4"
fi
- local tolerance=$((5 * 1024 * 1024))
-
- local actual_a
- actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
- if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
- [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
- echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
- echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
- local actual_a_hugetlb
- actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
- [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
- echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
- echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
+ assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
+ assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
- cleanup
- exit 1
- fi
-
- if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
- return
- fi
-
- local actual_b
- actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
- if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
- [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
- echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
- echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
-
- local actual_b_hugetlb
- actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
- [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
- echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
- echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
+ if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
+ assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
+ assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
fi
}
@@ -175,7 +165,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
cleanup
echo
-echo
echo Test charge, rmdir, uncharge
setup
echo mkdir
@@ -195,7 +184,6 @@ cleanup
echo done
echo
-echo
if [[ ! $cgroup2 ]]; then
echo "Test parent and child hugetlb usage"
setup
@@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
assert_state 0 $(($size * 2)) 0 $size
rmdir "$CGROUP_ROOT"/a/b
- sleep 5
echo Assert memory reparent correctly.
assert_state 0 $(($size * 2))
@@ -225,7 +212,6 @@ if [[ ! $cgroup2 ]]; then
fi
echo
-echo
echo "Test child only hugetlb usage"
echo setup
setup
diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c
index 8a4d34cce36b..3fe7ef04ac62 100644
--- a/tools/testing/selftests/mm/khugepaged.c
+++ b/tools/testing/selftests/mm/khugepaged.c
@@ -394,7 +394,7 @@ static void *file_setup_area(int nr_hpages)
perror("open()");
exit(EXIT_FAILURE);
}
- p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC,
+ p = mmap(BASE_ADDR, size, PROT_READ,
MAP_PRIVATE, finfo.fd, 0);
if (p == MAP_FAILED || p != BASE_ADDR) {
perror("mmap()");
@@ -561,8 +561,6 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
usleep(TICK);
}
- madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
-
return timeout == -1;
}
@@ -1190,6 +1188,11 @@ int main(int argc, char **argv)
.read_ahead_kb = 0,
};
+ if (!thp_is_enabled()) {
+ printf("Transparent Hugepages not available\n");
+ return KSFT_SKIP;
+ }
+
parse_test_type(argc, argv);
setbuf(stdout, NULL);
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 66b4e111b5a2..8d874c4754f3 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -11,7 +11,7 @@
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
@@ -21,7 +21,7 @@
#include <sys/wait.h>
#include <linux/userfaultfd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
#define KiB 1024u
@@ -38,14 +38,13 @@ enum ksm_merge_mode {
};
static int mem_fd;
-static int ksm_fd;
-static int ksm_full_scans_fd;
-static int proc_self_ksm_stat_fd;
-static int proc_self_ksm_merging_pages_fd;
-static int ksm_use_zero_pages_fd;
+static int pages_to_scan_fd;
+static int sleep_millisecs_fd;
static int pagemap_fd;
static size_t pagesize;
+static void init_global_file_handles(void);
+
static bool range_maps_duplicates(char *addr, unsigned long size)
{
unsigned long offs_a, offs_b, pfn_a, pfn_b;
@@ -73,88 +72,6 @@ static bool range_maps_duplicates(char *addr, unsigned long size)
return false;
}
-static long get_my_ksm_zero_pages(void)
-{
- char buf[200];
- char *substr_ksm_zero;
- size_t value_pos;
- ssize_t read_size;
- unsigned long my_ksm_zero_pages;
-
- if (!proc_self_ksm_stat_fd)
- return 0;
-
- read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0);
- if (read_size < 0)
- return -errno;
-
- buf[read_size] = 0;
-
- substr_ksm_zero = strstr(buf, "ksm_zero_pages");
- if (!substr_ksm_zero)
- return 0;
-
- value_pos = strcspn(substr_ksm_zero, "0123456789");
- my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10);
-
- return my_ksm_zero_pages;
-}
-
-static long get_my_merging_pages(void)
-{
- char buf[10];
- ssize_t ret;
-
- if (proc_self_ksm_merging_pages_fd < 0)
- return proc_self_ksm_merging_pages_fd;
-
- ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0);
- if (ret <= 0)
- return -errno;
- buf[ret] = 0;
-
- return strtol(buf, NULL, 10);
-}
-
-static long ksm_get_full_scans(void)
-{
- char buf[10];
- ssize_t ret;
-
- ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0);
- if (ret <= 0)
- return -errno;
- buf[ret] = 0;
-
- return strtol(buf, NULL, 10);
-}
-
-static int ksm_merge(void)
-{
- long start_scans, end_scans;
-
- /* Wait for two full scans such that any possible merging happened. */
- start_scans = ksm_get_full_scans();
- if (start_scans < 0)
- return start_scans;
- if (write(ksm_fd, "1", 1) != 1)
- return -errno;
- do {
- end_scans = ksm_get_full_scans();
- if (end_scans < 0)
- return end_scans;
- } while (end_scans < start_scans + 2);
-
- return 0;
-}
-
-static int ksm_unmerge(void)
-{
- if (write(ksm_fd, "2", 1) != 1)
- return -errno;
- return 0;
-}
-
static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
enum ksm_merge_mode mode)
{
@@ -163,12 +80,12 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
int ret;
/* Stabilize accounting by disabling KSM completely. */
- if (ksm_unmerge()) {
+ if (ksm_stop() < 0) {
ksft_print_msg("Disabling (unmerging) KSM failed\n");
return err_map;
}
- if (get_my_merging_pages() > 0) {
+ if (ksm_get_self_merging_pages() > 0) {
ksft_print_msg("Still pages merged\n");
return err_map;
}
@@ -218,7 +135,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
}
/* Run KSM to trigger merging and wait. */
- if (ksm_merge()) {
+ if (ksm_start() < 0) {
ksft_print_msg("Running KSM failed\n");
goto unmap;
}
@@ -227,7 +144,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
* Check if anything was merged at all. Ignore the zero page that is
* accounted differently (depending on kernel support).
*/
- if (val && !get_my_merging_pages()) {
+ if (val && !ksm_get_self_merging_pages()) {
ksft_print_msg("No pages got merged\n");
goto unmap;
}
@@ -274,6 +191,7 @@ static void test_unmerge(void)
ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
unmap:
+ ksm_stop();
munmap(map, size);
}
@@ -286,15 +204,12 @@ static void test_unmerge_zero_pages(void)
ksft_print_msg("[RUN] %s\n", __func__);
- if (proc_self_ksm_stat_fd < 0) {
- ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n");
+ if (ksm_get_self_zero_pages() < 0) {
+ ksft_test_result_skip("accessing \"/proc/self/ksm_stat\" failed\n");
return;
}
- if (ksm_use_zero_pages_fd < 0) {
- ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
- return;
- }
- if (write(ksm_use_zero_pages_fd, "1", 1) != 1) {
+
+ if (ksm_use_zero_pages() < 0) {
ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
return;
}
@@ -306,7 +221,7 @@ static void test_unmerge_zero_pages(void)
/* Check if ksm_zero_pages is updated correctly after KSM merging */
pages_expected = size / pagesize;
- if (pages_expected != get_my_ksm_zero_pages()) {
+ if (pages_expected != ksm_get_self_zero_pages()) {
ksft_test_result_fail("'ksm_zero_pages' updated after merging\n");
goto unmap;
}
@@ -319,7 +234,7 @@ static void test_unmerge_zero_pages(void)
/* Check if ksm_zero_pages is updated correctly after unmerging */
pages_expected /= 2;
- if (pages_expected != get_my_ksm_zero_pages()) {
+ if (pages_expected != ksm_get_self_zero_pages()) {
ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n");
goto unmap;
}
@@ -329,7 +244,7 @@ static void test_unmerge_zero_pages(void)
*((unsigned int *)&map[offs]) = offs;
/* Now we should have no zeropages remaining. */
- if (get_my_ksm_zero_pages()) {
+ if (ksm_get_self_zero_pages()) {
ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n");
goto unmap;
}
@@ -338,6 +253,7 @@ static void test_unmerge_zero_pages(void)
ksft_test_result(!range_maps_duplicates(map, size),
"KSM zero pages were unmerged\n");
unmap:
+ ksm_stop();
munmap(map, size);
}
@@ -366,9 +282,11 @@ static void test_unmerge_discarded(void)
ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
unmap:
+ ksm_stop();
munmap(map, size);
}
+#ifdef __NR_userfaultfd
static void test_unmerge_uffd_wp(void)
{
struct uffdio_writeprotect uffd_writeprotect;
@@ -392,9 +310,13 @@ static void test_unmerge_uffd_wp(void)
/* See if UFFD-WP is around. */
uffdio_api.api = UFFD_API;
- uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+ uffdio_api.features = 0;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
- ksft_test_result_fail("UFFDIO_API failed\n");
+ if (errno == EINVAL)
+ ksft_test_result_skip("The API version requested is not supported\n");
+ else
+ ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno));
+
goto close_uffd;
}
if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
@@ -402,6 +324,26 @@ static void test_unmerge_uffd_wp(void)
goto close_uffd;
}
+ /*
+ * UFFDIO_API must only be called once to enable features.
+ * So we close the old userfaultfd and create a new one to
+ * actually enable UFFD_FEATURE_PAGEFAULT_FLAG_WP.
+ */
+ close(uffd);
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ ksft_test_result_fail("__NR_userfaultfd failed\n");
+ goto unmap;
+ }
+
+ /* Now, enable it ("two-step handshake") */
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
+ ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno));
+ goto close_uffd;
+ }
+
/* Register UFFD-WP, no need for an actual handler. */
if (uffd_register(uffd, map, size, false, true, false)) {
ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n");
@@ -427,8 +369,10 @@ static void test_unmerge_uffd_wp(void)
close_uffd:
close(uffd);
unmap:
+ ksm_stop();
munmap(map, size);
}
+#endif
/* Verify that KSM can be enabled / queried with prctl. */
static void test_prctl(void)
@@ -480,27 +424,30 @@ static int test_child_ksm(void)
/* Test if KSM is enabled for the process. */
if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1)
- return -1;
+ return 1;
/* Test if merge could really happen. */
map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE);
if (map == MAP_MERGE_FAIL)
- return -2;
+ return 2;
else if (map == MAP_MERGE_SKIP)
- return -3;
+ return 3;
+ ksm_stop();
munmap(map, size);
return 0;
}
static void test_child_ksm_err(int status)
{
- if (status == -1)
+ if (status == 1)
ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
- else if (status == -2)
+ else if (status == 2)
ksft_test_result_fail("Merge in child failed\n");
- else if (status == -3)
+ else if (status == 3)
ksft_test_result_skip("Merge in child skipped\n");
+ else if (status == 4)
+ ksft_test_result_fail("Binary not found\n");
}
/* Verify that prctl ksm flag is inherited. */
@@ -522,6 +469,7 @@ static void test_prctl_fork(void)
child_pid = fork();
if (!child_pid) {
+ init_global_file_handles();
exit(test_child_ksm());
} else if (child_pid < 0) {
ksft_test_result_fail("fork() failed\n");
@@ -547,6 +495,46 @@ static void test_prctl_fork(void)
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
+static int start_ksmd_and_set_frequency(char *pages_to_scan, char *sleep_ms)
+{
+ int ksm_fd;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ if (write(ksm_fd, "1", 1) != 1)
+ return -errno;
+
+ if (write(pages_to_scan_fd, pages_to_scan, strlen(pages_to_scan)) <= 0)
+ return -errno;
+
+ if (write(sleep_millisecs_fd, sleep_ms, strlen(sleep_ms)) <= 0)
+ return -errno;
+
+ return 0;
+}
+
+static int stop_ksmd_and_restore_frequency(void)
+{
+ int ksm_fd;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ if (write(ksm_fd, "2", 1) != 1)
+ return -errno;
+
+ if (write(pages_to_scan_fd, "100", 3) <= 0)
+ return -errno;
+
+ if (write(sleep_millisecs_fd, "20", 2) <= 0)
+ return -errno;
+
+ return 0;
+}
+
static void test_prctl_fork_exec(void)
{
int ret, status;
@@ -554,6 +542,9 @@ static void test_prctl_fork_exec(void)
ksft_print_msg("[RUN] %s\n", __func__);
+ if (start_ksmd_and_set_frequency("2000", "0"))
+ ksft_test_result_fail("set ksmd's scanning frequency failed\n");
+
ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
if (ret < 0 && errno == EINVAL) {
ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
@@ -569,10 +560,10 @@ static void test_prctl_fork_exec(void)
return;
} else if (child_pid == 0) {
char *prg_name = "./ksm_functional_tests";
- char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME };
+ char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME, NULL };
execv(prg_name, argv_for_program);
- return;
+ exit(4);
}
if (waitpid(child_pid, &status, 0) > 0) {
@@ -596,6 +587,11 @@ static void test_prctl_fork_exec(void)
return;
}
+ if (stop_ksmd_and_restore_frequency()) {
+ ksft_test_result_fail("restore ksmd frequency failed\n");
+ return;
+ }
+
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
@@ -618,6 +614,7 @@ static void test_prctl_unmerge(void)
ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
unmap:
+ ksm_stop();
munmap(map, size);
}
@@ -651,6 +648,47 @@ static void test_prot_none(void)
ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
unmap:
+ ksm_stop();
+ munmap(map, size);
+}
+
+static void test_fork_ksm_merging_page_count(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+ pid_t child_pid;
+ int status;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
+ if (map == MAP_FAILED)
+ return;
+
+ child_pid = fork();
+ if (!child_pid) {
+ init_global_file_handles();
+ exit(ksm_get_self_merging_pages());
+ } else if (child_pid < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ goto unmap;
+ }
+
+ if (waitpid(child_pid, &status, 0) < 0) {
+ ksft_test_result_fail("waitpid() failed\n");
+ goto unmap;
+ }
+
+ status = WEXITSTATUS(status);
+ if (status) {
+ ksft_test_result_fail("ksm_merging_page in child: %d\n", status);
+ goto unmap;
+ }
+
+ ksft_test_result_pass("ksm_merging_pages is not inherited after fork\n");
+
+unmap:
+ ksm_stop();
munmap(map, size);
}
@@ -659,24 +697,27 @@ static void init_global_file_handles(void)
mem_fd = open("/proc/self/mem", O_RDWR);
if (mem_fd < 0)
ksft_exit_fail_msg("opening /proc/self/mem failed\n");
- ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
- if (ksm_fd < 0)
- ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
- ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
- if (ksm_full_scans_fd < 0)
- ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
+ if (ksm_stop() < 0)
+ ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/run\") failed\n");
+ if (ksm_get_full_scans() < 0)
+ ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/full_scans\") failed\n");
pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
if (pagemap_fd < 0)
ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
- proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
- proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
- O_RDONLY);
- ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+ if (ksm_get_self_merging_pages() < 0)
+ ksft_exit_skip("accessing \"/proc/self/ksm_merging_pages\") failed\n");
+
+ pages_to_scan_fd = open("/sys/kernel/mm/ksm/pages_to_scan", O_RDWR);
+ if (pages_to_scan_fd < 0)
+ ksft_exit_fail_msg("opening /sys/kernel/mm/ksm/pages_to_scan failed\n");
+ sleep_millisecs_fd = open("/sys/kernel/mm/ksm/sleep_millisecs", O_RDWR);
+ if (sleep_millisecs_fd < 0)
+ ksft_exit_fail_msg("opening /sys/kernel/mm/ksm/sleep_millisecs failed\n");
}
int main(int argc, char **argv)
{
- unsigned int tests = 8;
+ unsigned int tests = 9;
int err;
if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
@@ -684,7 +725,9 @@ int main(int argc, char **argv)
exit(test_child_ksm());
}
+#ifdef __NR_userfaultfd
tests++;
+#endif
ksft_print_header();
ksft_set_plan(tests);
@@ -696,7 +739,9 @@ int main(int argc, char **argv)
test_unmerge();
test_unmerge_zero_pages();
test_unmerge_discarded();
+#ifdef __NR_userfaultfd
test_unmerge_uffd_wp();
+#endif
test_prot_none();
@@ -704,6 +749,7 @@ int main(int argc, char **argv)
test_prctl_fork();
test_prctl_fork_exec();
test_prctl_unmerge();
+ test_fork_ksm_merging_page_count();
err = ksft_get_fail_cnt();
if (err)
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index dcdd5bb20f3d..a0b48b839d54 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -12,9 +12,10 @@
#include <stdint.h>
#include <err.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include <include/vdso/time64.h>
#include "vm_util.h"
+#include "thp_settings.h"
#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
#define KSM_FP(s) (KSM_SYSFS_PATH s)
@@ -58,40 +59,12 @@ int debug;
static int ksm_write_sysfs(const char *file_path, unsigned long val)
{
- FILE *f = fopen(file_path, "w");
-
- if (!f) {
- fprintf(stderr, "f %s\n", file_path);
- perror("fopen");
- return 1;
- }
- if (fprintf(f, "%lu", val) < 0) {
- perror("fprintf");
- fclose(f);
- return 1;
- }
- fclose(f);
-
- return 0;
+ return write_sysfs(file_path, val);
}
static int ksm_read_sysfs(const char *file_path, unsigned long *val)
{
- FILE *f = fopen(file_path, "r");
-
- if (!f) {
- fprintf(stderr, "f %s\n", file_path);
- perror("fopen");
- return 1;
- }
- if (fscanf(f, "%lu", val) != 1) {
- perror("fscanf");
- fclose(f);
- return 1;
- }
- fclose(f);
-
- return 0;
+ return read_sysfs(file_path, val);
}
static void ksm_print_sysfs(void)
@@ -555,6 +528,11 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot,
unsigned long scan_time_ns;
int pagemap_fd, n_normal_pages, n_huge_pages;
+ if (!thp_is_enabled()) {
+ printf("Transparent Hugepages not available\n");
+ return KSFT_SKIP;
+ }
+
map_size *= MB;
size_t len = map_size;
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
index ef7d911da13e..88050e0f829a 100644
--- a/tools/testing/selftests/mm/madv_populate.c
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -17,7 +17,7 @@
#include <linux/mman.h>
#include <sys/mman.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#include "vm_util.h"
/*
@@ -172,12 +172,12 @@ static void test_populate_read(void)
if (addr == MAP_FAILED)
ksft_exit_fail_msg("mmap failed\n");
ksft_test_result(range_is_not_populated(addr, SIZE),
- "range initially not populated\n");
+ "read range initially not populated\n");
ret = madvise(addr, SIZE, MADV_POPULATE_READ);
ksft_test_result(!ret, "MADV_POPULATE_READ\n");
ksft_test_result(range_is_populated(addr, SIZE),
- "range is populated\n");
+ "read range is populated\n");
munmap(addr, SIZE);
}
@@ -194,12 +194,12 @@ static void test_populate_write(void)
if (addr == MAP_FAILED)
ksft_exit_fail_msg("mmap failed\n");
ksft_test_result(range_is_not_populated(addr, SIZE),
- "range initially not populated\n");
+ "write range initially not populated\n");
ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
ksft_test_result(range_is_populated(addr, SIZE),
- "range is populated\n");
+ "write range is populated\n");
munmap(addr, SIZE);
}
@@ -247,40 +247,23 @@ static void test_softdirty(void)
/* Clear any softdirty bits. */
clear_softdirty();
ksft_test_result(range_is_not_softdirty(addr, SIZE),
- "range is not softdirty\n");
+ "cleared range is not softdirty\n");
/* Populating READ should set softdirty. */
ret = madvise(addr, SIZE, MADV_POPULATE_READ);
- ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n");
ksft_test_result(range_is_not_softdirty(addr, SIZE),
- "range is not softdirty\n");
+ "range is not softdirty after MADV_POPULATE_READ\n");
/* Populating WRITE should set softdirty. */
ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
- ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n");
ksft_test_result(range_is_softdirty(addr, SIZE),
- "range is softdirty\n");
+ "range is softdirty after MADV_POPULATE_WRITE \n");
munmap(addr, SIZE);
}
-static int system_has_softdirty(void)
-{
- /*
- * There is no way to check if the kernel supports soft-dirty, other
- * than by writing to a page and seeing if the bit was set. But the
- * tests are intended to check that the bit gets set when it should, so
- * doing that check would turn a potentially legitimate fail into a
- * skip. Fortunately, we know for sure that arm64 does not support
- * soft-dirty. So for now, let's just use the arch as a corse guide.
- */
-#if defined(__aarch64__)
- return 0;
-#else
- return 1;
-#endif
-}
-
int main(int argc, char **argv)
{
int nr_tests = 16;
@@ -288,7 +271,7 @@ int main(int argc, char **argv)
pagesize = getpagesize();
- if (system_has_softdirty())
+ if (softdirty_supported())
nr_tests += 5;
ksft_print_header();
@@ -300,7 +283,7 @@ int main(int argc, char **argv)
test_holes();
test_populate_read();
test_populate_write();
- if (system_has_softdirty())
+ if (softdirty_supported())
test_softdirty();
err = ksft_get_fail_cnt();
diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
index d53de2486080..11241edde7fe 100644
--- a/tools/testing/selftests/mm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -12,7 +12,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static void dump_maps(void)
{
@@ -96,7 +96,7 @@ int main(void)
ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");
}
ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
- ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
+ ksft_test_result_pass("Second mmap() 5*PAGE_SIZE at base\n");
/*
* Second mapping contained within first:
diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c
index b47399feab53..aa409107611b 100644
--- a/tools/testing/selftests/mm/map_hugetlb.c
+++ b/tools/testing/selftests/mm/map_hugetlb.c
@@ -11,7 +11,7 @@
#include <sys/mman.h>
#include <fcntl.h>
#include "vm_util.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c
index 5c8a53869b1b..712327f4e932 100644
--- a/tools/testing/selftests/mm/map_populate.c
+++ b/tools/testing/selftests/mm/map_populate.c
@@ -16,7 +16,9 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
+
+#include "vm_util.h"
#define MMAP_SZ 4096
@@ -87,6 +89,9 @@ int main(int argc, char **argv)
BUG_ON(!ftmp, "tmpfile()");
ret = ftruncate(fileno(ftmp), MMAP_SZ);
+ if (ret < 0 && errno == ENOENT) {
+ skip_test_dodgy_fs("ftruncate()");
+ }
BUG_ON(ret, "ftruncate()");
smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c
index 200bedcdc32e..647779653da0 100644
--- a/tools/testing/selftests/mm/mdwe_test.c
+++ b/tools/testing/selftests/mm/mdwe_test.c
@@ -14,7 +14,7 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef __aarch64__
# define PROT_BTI 0
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
index 74c911aa3aea..aac4f795c327 100644
--- a/tools/testing/selftests/mm/memfd_secret.c
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -17,17 +17,19 @@
#include <stdlib.h>
#include <string.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define fail(fmt, ...) ksft_test_result_fail(fmt, ##__VA_ARGS__)
#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
#define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__)
+#ifdef __NR_memfd_secret
+
#define PATTERN 0x55
static const int prot = PROT_READ | PROT_WRITE;
@@ -332,3 +334,13 @@ int main(int argc, char *argv[])
ksft_finished();
}
+
+#else /* __NR_memfd_secret */
+
+int main(int argc, char *argv[])
+{
+ printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
new file mode 100644
index 000000000000..363c1033cc7d
--- /dev/null
+++ b/tools/testing/selftests/mm/merge.c
@@ -0,0 +1,1174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include "kselftest_harness.h"
+#include <linux/prctl.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <linux/perf_event.h>
+#include "vm_util.h"
+#include <linux/mman.h>
+
+FIXTURE(merge)
+{
+ unsigned int page_size;
+ char *carveout;
+ struct procmap_fd procmap;
+};
+
+FIXTURE_SETUP(merge)
+{
+ self->page_size = psize();
+ /* Carve out PROT_NONE region to map over. */
+ self->carveout = mmap(NULL, 30 * self->page_size, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(self->carveout, MAP_FAILED);
+ /* Setup PROCMAP_QUERY interface. */
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+}