diff options
Diffstat (limited to 'drivers/android')
40 files changed, 21707 insertions, 0 deletions
diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig new file mode 100644 index 000000000000..e2e402c9d175 --- /dev/null +++ b/drivers/android/Kconfig @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0 +menu "Android" + +config ANDROID_BINDER_IPC + bool "Android Binder IPC Driver" + depends on MMU + depends on NET + default n + help + Binder is used in Android for both communication between processes, + and remote method invocation. + + This means one Android process can call a method/routine in another + Android process, using Binder to identify, invoke and pass arguments + between said processes. + +config ANDROID_BINDER_IPC_RUST + bool "Rust version of Android Binder IPC Driver" + depends on RUST && MMU && !ANDROID_BINDER_IPC + help + This enables the Rust implementation of the Binder driver. + + Binder is used in Android for both communication between processes, + and remote method invocation. + + This means one Android process can call a method/routine in another + Android process, using Binder to identify, invoke and pass arguments + between said processes. + +config ANDROID_BINDERFS + bool "Android Binderfs filesystem" + depends on ANDROID_BINDER_IPC + default n + help + Binderfs is a pseudo-filesystem for the Android Binder IPC driver + which can be mounted per-ipc namespace allowing to run multiple + instances of Android. + Each binderfs mount initially only contains a binder-control device. + It can be used to dynamically allocate new binder IPC devices via + ioctls. + +config ANDROID_BINDER_DEVICES + string "Android Binder devices" + depends on ANDROID_BINDER_IPC || ANDROID_BINDER_IPC_RUST + default "binder,hwbinder,vndbinder" + help + Default value for the binder.devices parameter. + + The binder.devices parameter is a comma-separated list of strings + that specifies the names of the binder device nodes that will be + created. Each binder device has its own context manager, and is + therefore logically separated from the other devices. + +config ANDROID_BINDER_ALLOC_KUNIT_TEST + tristate "KUnit Tests for Android Binder Alloc" if !KUNIT_ALL_TESTS + depends on ANDROID_BINDER_IPC && KUNIT + default KUNIT_ALL_TESTS + help + This feature builds the binder alloc KUnit tests. + + Each test case runs using a pared-down binder_alloc struct and + test-specific freelist, which allows this KUnit module to be loaded + for testing without interfering with a running system. + +endmenu diff --git a/drivers/android/Makefile b/drivers/android/Makefile new file mode 100644 index 000000000000..e0c650d3898e --- /dev/null +++ b/drivers/android/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +ccflags-y += -I$(src) # needed for trace events + +obj-$(CONFIG_ANDROID_BINDERFS) += binderfs.o +obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o binder_netlink.o +obj-$(CONFIG_ANDROID_BINDER_ALLOC_KUNIT_TEST) += tests/ +obj-$(CONFIG_ANDROID_BINDER_IPC_RUST) += binder/ diff --git a/drivers/android/binder.c b/drivers/android/binder.c new file mode 100644 index 000000000000..535fc881c8da --- /dev/null +++ b/drivers/android/binder.c @@ -0,0 +1,7160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* binder.c + * + * Android IPC Subsystem + * + * Copyright (C) 2007-2008 Google, Inc. + */ + +/* + * Locking overview + * + * There are 3 main spinlocks which must be acquired in the + * order shown: + * + * 1) proc->outer_lock : protects binder_ref + * binder_proc_lock() and binder_proc_unlock() are + * used to acq/rel. + * 2) node->lock : protects most fields of binder_node. + * binder_node_lock() and binder_node_unlock() are + * used to acq/rel + * 3) proc->inner_lock : protects the thread and node lists + * (proc->threads, proc->waiting_threads, proc->nodes) + * and all todo lists associated with the binder_proc + * (proc->todo, thread->todo, proc->delivered_death and + * node->async_todo), as well as thread->transaction_stack + * binder_inner_proc_lock() and binder_inner_proc_unlock() + * are used to acq/rel + * + * Any lock under procA must never be nested under any lock at the same + * level or below on procB. + * + * Functions that require a lock held on entry indicate which lock + * in the suffix of the function name: + * + * foo_olocked() : requires node->outer_lock + * foo_nlocked() : requires node->lock + * foo_ilocked() : requires proc->inner_lock + * foo_oilocked(): requires proc->outer_lock and proc->inner_lock + * foo_nilocked(): requires node->lock and proc->inner_lock + * ... + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/fdtable.h> +#include <linux/file.h> +#include <linux/freezer.h> +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/nsproxy.h> +#include <linux/poll.h> +#include <linux/debugfs.h> +#include <linux/rbtree.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> +#include <linux/seq_file.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/pid_namespace.h> +#include <linux/security.h> +#include <linux/spinlock.h> +#include <linux/ratelimit.h> +#include <linux/syscalls.h> +#include <linux/task_work.h> +#include <linux/sizes.h> +#include <linux/ktime.h> + +#include <kunit/visibility.h> + +#include <uapi/linux/android/binder.h> + +#include <linux/cacheflush.h> + +#include "binder_netlink.h" +#include "binder_internal.h" +#include "binder_trace.h" + +static HLIST_HEAD(binder_deferred_list); +static DEFINE_MUTEX(binder_deferred_lock); + +static HLIST_HEAD(binder_devices); +static DEFINE_SPINLOCK(binder_devices_lock); + +static HLIST_HEAD(binder_procs); +static DEFINE_MUTEX(binder_procs_lock); + +static HLIST_HEAD(binder_dead_nodes); +static DEFINE_SPINLOCK(binder_dead_nodes_lock); + +static struct dentry *binder_debugfs_dir_entry_root; +static struct dentry *binder_debugfs_dir_entry_proc; +static atomic_t binder_last_id; + +static int proc_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(proc); + +#define FORBIDDEN_MMAP_FLAGS (VM_WRITE) + +enum { + BINDER_DEBUG_USER_ERROR = 1U << 0, + BINDER_DEBUG_FAILED_TRANSACTION = 1U << 1, + BINDER_DEBUG_DEAD_TRANSACTION = 1U << 2, + BINDER_DEBUG_OPEN_CLOSE = 1U << 3, + BINDER_DEBUG_DEAD_BINDER = 1U << 4, + BINDER_DEBUG_DEATH_NOTIFICATION = 1U << 5, + BINDER_DEBUG_READ_WRITE = 1U << 6, + BINDER_DEBUG_USER_REFS = 1U << 7, + BINDER_DEBUG_THREADS = 1U << 8, + BINDER_DEBUG_TRANSACTION = 1U << 9, + BINDER_DEBUG_TRANSACTION_COMPLETE = 1U << 10, + BINDER_DEBUG_FREE_BUFFER = 1U << 11, + BINDER_DEBUG_INTERNAL_REFS = 1U << 12, + BINDER_DEBUG_PRIORITY_CAP = 1U << 13, + BINDER_DEBUG_SPINLOCKS = 1U << 14, +}; +static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | + BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; +module_param_named(debug_mask, binder_debug_mask, uint, 0644); + +char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; +module_param_named(devices, binder_devices_param, charp, 0444); + +static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); +static int binder_stop_on_user_error; + +static int binder_set_stop_on_user_error(const char *val, + const struct kernel_param *kp) +{ + int ret; + + ret = param_set_int(val, kp); + if (binder_stop_on_user_error < 2) + wake_up(&binder_user_error_wait); + return ret; +} +module_param_call(stop_on_user_error, binder_set_stop_on_user_error, + param_get_int, &binder_stop_on_user_error, 0644); + +static __printf(2, 3) void binder_debug(int mask, const char *format, ...) +{ + struct va_format vaf; + va_list args; + + if (binder_debug_mask & mask) { + va_start(args, format); + vaf.va = &args; + vaf.fmt = format; + pr_info_ratelimited("%pV", &vaf); + va_end(args); + } +} + +#define binder_txn_error(x...) \ + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, x) + +static __printf(1, 2) void binder_user_error(const char *format, ...) +{ + struct va_format vaf; + va_list args; + + if (binder_debug_mask & BINDER_DEBUG_USER_ERROR) { + va_start(args, format); + vaf.va = &args; + vaf.fmt = format; + pr_info_ratelimited("%pV", &vaf); + va_end(args); + } + + if (binder_stop_on_user_error) + binder_stop_on_user_error = 2; +} + +#define binder_set_extended_error(ee, _id, _command, _param) \ + do { \ + (ee)->id = _id; \ + (ee)->command = _command; \ + (ee)->param = _param; \ + } while (0) + +#define to_flat_binder_object(hdr) \ + container_of(hdr, struct flat_binder_object, hdr) + +#define to_binder_fd_object(hdr) container_of(hdr, struct binder_fd_object, hdr) + +#define to_binder_buffer_object(hdr) \ + container_of(hdr, struct binder_buffer_object, hdr) + +#define to_binder_fd_array_object(hdr) \ + container_of(hdr, struct binder_fd_array_object, hdr) + +static struct binder_stats binder_stats; + +static inline void binder_stats_deleted(enum binder_stat_types type) +{ + atomic_inc(&binder_stats.obj_deleted[type]); +} + +static inline void binder_stats_created(enum binder_stat_types type) +{ + atomic_inc(&binder_stats.obj_created[type]); +} + +struct binder_transaction_log_entry { + int debug_id; + int debug_id_done; + int call_type; + int from_proc; + int from_thread; + int target_handle; + int to_proc; + int to_thread; + int to_node; + int data_size; + int offsets_size; + int return_error_line; + uint32_t return_error; + uint32_t return_error_param; + char context_name[BINDERFS_MAX_NAME + 1]; +}; + +struct binder_transaction_log { + atomic_t cur; + bool full; + struct binder_transaction_log_entry entry[32]; +}; + +static struct binder_transaction_log binder_transaction_log; +static struct binder_transaction_log binder_transaction_log_failed; + +static struct binder_transaction_log_entry *binder_transaction_log_add( + struct binder_transaction_log *log) +{ + struct binder_transaction_log_entry *e; + unsigned int cur = atomic_inc_return(&log->cur); + + if (cur >= ARRAY_SIZE(log->entry)) + log->full = true; + e = &log->entry[cur % ARRAY_SIZE(log->entry)]; + WRITE_ONCE(e->debug_id_done, 0); + /* + * write-barrier to synchronize access to e->debug_id_done. + * We make sure the initialized 0 value is seen before + * memset() other fields are zeroed by memset. + */ + smp_wmb(); + memset(e, 0, sizeof(*e)); + return e; +} + +enum binder_deferred_state { + BINDER_DEFERRED_FLUSH = 0x01, + BINDER_DEFERRED_RELEASE = 0x02, +}; + +enum { + BINDER_LOOPER_STATE_REGISTERED = 0x01, + BINDER_LOOPER_STATE_ENTERED = 0x02, + BINDER_LOOPER_STATE_EXITED = 0x04, + BINDER_LOOPER_STATE_INVALID = 0x08, + BINDER_LOOPER_STATE_WAITING = 0x10, + BINDER_LOOPER_STATE_POLL = 0x20, +}; + +/** + * binder_proc_lock() - Acquire outer lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Acquires proc->outer_lock. Used to protect binder_ref + * structures associated with the given proc. + */ +#define binder_proc_lock(proc) _binder_proc_lock(proc, __LINE__) +static void +_binder_proc_lock(struct binder_proc *proc, int line) + __acquires(&proc->outer_lock) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&proc->outer_lock); +} + +/** + * binder_proc_unlock() - Release outer lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Release lock acquired via binder_proc_lock() + */ +#define binder_proc_unlock(proc) _binder_proc_unlock(proc, __LINE__) +static void +_binder_proc_unlock(struct binder_proc *proc, int line) + __releases(&proc->outer_lock) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_unlock(&proc->outer_lock); +} + +/** + * binder_inner_proc_lock() - Acquire inner lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Acquires proc->inner_lock. Used to protect todo lists + */ +#define binder_inner_proc_lock(proc) _binder_inner_proc_lock(proc, __LINE__) +static void +_binder_inner_proc_lock(struct binder_proc *proc, int line) + __acquires(&proc->inner_lock) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&proc->inner_lock); +} + +/** + * binder_inner_proc_unlock() - Release inner lock for given binder_proc + * @proc: struct binder_proc to acquire + * + * Release lock acquired via binder_inner_proc_lock() + */ +#define binder_inner_proc_unlock(proc) _binder_inner_proc_unlock(proc, __LINE__) +static void +_binder_inner_proc_unlock(struct binder_proc *proc, int line) + __releases(&proc->inner_lock) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_unlock(&proc->inner_lock); +} + +/** + * binder_node_lock() - Acquire spinlock for given binder_node + * @node: struct binder_node to acquire + * + * Acquires node->lock. Used to protect binder_node fields + */ +#define binder_node_lock(node) _binder_node_lock(node, __LINE__) +static void +_binder_node_lock(struct binder_node *node, int line) + __acquires(&node->lock) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&node->lock); +} + +/** + * binder_node_unlock() - Release spinlock for given binder_proc + * @node: struct binder_node to acquire + * + * Release lock acquired via binder_node_lock() + */ +#define binder_node_unlock(node) _binder_node_unlock(node, __LINE__) +static void +_binder_node_unlock(struct binder_node *node, int line) + __releases(&node->lock) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_unlock(&node->lock); +} + +/** + * binder_node_inner_lock() - Acquire node and inner locks + * @node: struct binder_node to acquire + * + * Acquires node->lock. If node->proc also acquires + * proc->inner_lock. Used to protect binder_node fields + */ +#define binder_node_inner_lock(node) _binder_node_inner_lock(node, __LINE__) +static void +_binder_node_inner_lock(struct binder_node *node, int line) + __acquires(&node->lock) __acquires(&node->proc->inner_lock) +{ + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + spin_lock(&node->lock); + if (node->proc) + binder_inner_proc_lock(node->proc); + else + /* annotation for sparse */ + __acquire(&node->proc->inner_lock); +} + +/** + * binder_node_inner_unlock() - Release node and inner locks + * @node: struct binder_node to acquire + * + * Release lock acquired via binder_node_lock() + */ +#define binder_node_inner_unlock(node) _binder_node_inner_unlock(node, __LINE__) +static void +_binder_node_inner_unlock(struct binder_node *node, int line) + __releases(&node->lock) __releases(&node->proc->inner_lock) +{ + struct binder_proc *proc = node->proc; + + binder_debug(BINDER_DEBUG_SPINLOCKS, + "%s: line=%d\n", __func__, line); + if (proc) + binder_inner_proc_unlock(proc); + else + /* annotation for sparse */ + __release(&node->proc->inner_lock); + spin_unlock(&node->lock); +} + +static bool binder_worklist_empty_ilocked(struct list_head *list) +{ + return list_empty(list); +} + +/** + * binder_worklist_empty() - Check if no items on the work list + * @proc: binder_proc associated with list + * @list: list to check + * + * Return: true if there are no items on list, else false + */ +static bool binder_worklist_empty(struct binder_proc *proc, + struct list_head *list) +{ + bool ret; + + binder_inner_proc_lock(proc); + ret = binder_worklist_empty_ilocked(list); + binder_inner_proc_unlock(proc); + return ret; +} + +/** + * binder_enqueue_work_ilocked() - Add an item to the work list + * @work: struct binder_work to add to list + * @target_list: list to add work to + * + * Adds the work to the specified list. Asserts that work + * is not already on a list. + * + * Requires the proc->inner_lock to be held. + */ +static void +binder_enqueue_work_ilocked(struct binder_work *work, + struct list_head *target_list) +{ + BUG_ON(target_list == NULL); + BUG_ON(work->entry.next && !list_empty(&work->entry)); + list_add_tail(&work->entry, target_list); +} + +/** + * binder_enqueue_deferred_thread_work_ilocked() - Add deferred thread work + * @thread: thread to queue work to + * @work: struct binder_work to add to list + * + * Adds the work to the todo list of the thread. Doesn't set the process_todo + * flag, which means that (if it wasn't already set) the thread will go to + * sleep without handling this work when it calls read. + * + * Requires the proc->inner_lock to be held. + */ +static void +binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread, + struct binder_work *work) +{ + WARN_ON(!list_empty(&thread->waiting_thread_node)); + binder_enqueue_work_ilocked(work, &thread->todo); +} + +/** + * binder_enqueue_thread_work_ilocked() - Add an item to the thread work list + * @thread: thread to queue work to + * @work: struct binder_work to add to list + * + * Adds the work to the todo list of the thread, and enables processing + * of the todo queue. + * + * Requires the proc->inner_lock to be held. + */ +static void +binder_enqueue_thread_work_ilocked(struct binder_thread *thread, + struct binder_work *work) +{ + WARN_ON(!list_empty(&thread->waiting_thread_node)); + binder_enqueue_work_ilocked(work, &thread->todo); + + /* (e)poll-based threads require an explicit wakeup signal when + * queuing their own work; they rely on these events to consume + * messages without I/O block. Without it, threads risk waiting + * indefinitely without handling the work. + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL && + thread->pid == current->pid && !thread->process_todo) + wake_up_interruptible_sync(&thread->wait); + + thread->process_todo = true; +} + +/** + * binder_enqueue_thread_work() - Add an item to the thread work list + * @thread: thread to queue work to + * @work: struct binder_work to add to list + * + * Adds the work to the todo list of the thread, and enables processing + * of the todo queue. + */ +static void +binder_enqueue_thread_work(struct binder_thread *thread, + struct binder_work *work) +{ + binder_inner_proc_lock(thread->proc); + binder_enqueue_thread_work_ilocked(thread, work); + binder_inner_proc_unlock(thread->proc); +} + +static void +binder_dequeue_work_ilocked(struct binder_work *work) +{ + list_del_init(&work->entry); +} + +/** + * binder_dequeue_work() - Removes an item from the work list + * @proc: binder_proc associated with list + * @work: struct binder_work to remove from list + * + * Removes the specified work item from whatever list it is on. + * Can safely be called if work is not on any list. + */ +static void +binder_dequeue_work(struct binder_proc *proc, struct binder_work *work) +{ + binder_inner_proc_lock(proc); + binder_dequeue_work_ilocked(work); + binder_inner_proc_unlock(proc); +} + +static struct binder_work *binder_dequeue_work_head_ilocked( + struct list_head *list) +{ + struct binder_work *w; + + w = list_first_entry_or_null(list, struct binder_work, entry); + if (w) + list_del_init(&w->entry); + return w; +} + +static void +binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); +static void binder_free_thread(struct binder_thread *thread); +static void binder_free_proc(struct binder_proc *proc); +static void binder_inc_node_tmpref_ilocked(struct binder_node *node); + +static bool binder_has_work_ilocked(struct binder_thread *thread, + bool do_proc_work) +{ + return thread->process_todo || + thread->looper_need_return || + (do_proc_work && + !binder_worklist_empty_ilocked(&thread->proc->todo)); +} + +static bool binder_has_work(struct binder_thread *thread, bool do_proc_work) +{ + bool has_work; + + binder_inner_proc_lock(thread->proc); + has_work = binder_has_work_ilocked(thread, do_proc_work); + binder_inner_proc_unlock(thread->proc); + + return has_work; +} + +static bool binder_available_for_proc_work_ilocked(struct binder_thread *thread) +{ + return !thread->transaction_stack && + binder_worklist_empty_ilocked(&thread->todo); +} + +static void binder_wakeup_poll_threads_ilocked(struct binder_proc *proc, + bool sync) +{ + struct rb_node *n; + struct binder_thread *thread; + + for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) { + thread = rb_entry(n, struct binder_thread, rb_node); + if (thread->looper & BINDER_LOOPER_STATE_POLL && + binder_available_for_proc_work_ilocked(thread)) { + if (sync) + wake_up_interruptible_sync(&thread->wait); + else + wake_up_interruptible(&thread->wait); + } + } +} + +/** + * binder_select_thread_ilocked() - selects a thread for doing proc work. + * @proc: process to select a thread from + * + * Note that calling this function moves the thread off the waiting_threads + * list, so it can only be woken up by the caller of this function, or a + * signal. Therefore, callers *should* always wake up the thread this function + * returns. + * + * Return: If there's a thread currently waiting for process work, + * returns that thread. Otherwise returns NULL. + */ +static struct binder_thread * +binder_select_thread_ilocked(struct binder_proc *proc) +{ + struct binder_thread *thread; + + assert_spin_locked(&proc->inner_lock); + thread = list_first_entry_or_null(&proc->waiting_threads, + struct binder_thread, + waiting_thread_node); + + if (thread) + list_del_init(&thread->waiting_thread_node); + + return thread; +} + +/** + * binder_wakeup_thread_ilocked() - wakes up a thread for doing proc work. + * @proc: process to wake up a thread in + * @thread: specific thread to wake-up (may be NULL) + * @sync: whether to do a synchronous wake-up + * + * This function wakes up a thread in the @proc process. + * The caller may provide a specific thread to wake-up in + * the @thread parameter. If @thread is NULL, this function + * will wake up threads that have called poll(). + * + * Note that for this function to work as expected, callers + * should first call binder_select_thread() to find a thread + * to handle the work (if they don't have a thread already), + * and pass the result into the @thread parameter. + */ +static void binder_wakeup_thread_ilocked(struct binder_proc *proc, + struct binder_thread *thread, + bool sync) +{ + assert_spin_locked(&proc->inner_lock); + + if (thread) { + if (sync) + wake_up_interruptible_sync(&thread->wait); + else + wake_up_interruptible(&thread->wait); + return; + } + + /* Didn't find a thread waiting for proc work; this can happen + * in two scenarios: + * 1. All threads are busy handling transactions + * In that case, one of those threads should call back into + * the kernel driver soon and pick up this work. + * 2. Threads are using the (e)poll interface, in which case + * they may be blocked on the waitqueue without having been + * added to waiting_threads. For this case, we just iterate + * over all threads not handling transaction work, and + * wake them all up. We wake all because we don't know whether + * a thread that called into (e)poll is handling non-binder + * work currently. + */ + binder_wakeup_poll_threads_ilocked(proc, sync); +} + +static void binder_wakeup_proc_ilocked(struct binder_proc *proc) +{ + struct binder_thread *thread = binder_select_thread_ilocked(proc); + + binder_wakeup_thread_ilocked(proc, thread, /* sync = */false); +} + +static void binder_set_nice(long nice) +{ + long min_nice; + + if (can_nice(current, nice)) { + set_user_nice(current, nice); + return; + } + min_nice = rlimit_to_nice(rlimit(RLIMIT_NICE)); + binder_debug(BINDER_DEBUG_PRIORITY_CAP, + "%d: nice value %ld not allowed use %ld instead\n", + current->pid, nice, min_nice); + set_user_nice(current, min_nice); + if (min_nice <= MAX_NICE) + return; + binder_user_error("%d RLIMIT_NICE not set\n", current->pid); +} + +static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc, + binder_uintptr_t ptr) +{ + struct rb_node *n = proc->nodes.rb_node; + struct binder_node *node; + + assert_spin_locked(&proc->inner_lock); + + while (n) { + node = rb_entry(n, struct binder_node, rb_node); + + if (ptr < node->ptr) + n = n->rb_left; + else if (ptr > node->ptr) + n = n->rb_right; + else { + /* + * take an implicit weak reference + * to ensure node stays alive until + * call to binder_put_node() + */ + binder_inc_node_tmpref_ilocked(node); + return node; + } + } + return NULL; +} + +static struct binder_node *binder_get_node(struct binder_proc *proc, + binder_uintptr_t ptr) +{ + struct binder_node *node; + + binder_inner_proc_lock(proc); + node = binder_get_node_ilocked(proc, ptr); + binder_inner_proc_unlock(proc); + return node; +} + +static struct binder_node *binder_init_node_ilocked( + struct binder_proc *proc, + struct binder_node *new_node, + struct flat_binder_object *fp) +{ + struct rb_node **p = &proc->nodes.rb_node; + struct rb_node *parent = NULL; + struct binder_node *node; + binder_uintptr_t ptr = fp ? fp->binder : 0; + binder_uintptr_t cookie = fp ? fp->cookie : 0; + __u32 flags = fp ? fp->flags : 0; + + assert_spin_locked(&proc->inner_lock); + + while (*p) { + + parent = *p; + node = rb_entry(parent, struct binder_node, rb_node); + + if (ptr < node->ptr) + p = &(*p)->rb_left; + else if (ptr > node->ptr) + p = &(*p)->rb_right; + else { + /* + * A matching node is already in + * the rb tree. Abandon the init + * and return it. + */ + binder_inc_node_tmpref_ilocked(node); + return node; + } + } + node = new_node; + binder_stats_created(BINDER_STAT_NODE); + node->tmp_refs++; + rb_link_node(&node->rb_node, parent, p); + rb_insert_color(&node->rb_node, &proc->nodes); + node->debug_id = atomic_inc_return(&binder_last_id); + node->proc = proc; + node->ptr = ptr; + node->cookie = cookie; + node->work.type = BINDER_WORK_NODE; + node->min_priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); + node->txn_security_ctx = !!(flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX); + spin_lock_init(&node->lock); + INIT_LIST_HEAD(&node->work.entry); + INIT_LIST_HEAD(&node->async_todo); + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d:%d node %d u%016llx c%016llx created\n", + proc->pid, current->pid, node->debug_id, + (u64)node->ptr, (u64)node->cookie); + + return node; +} + +static struct binder_node *binder_new_node(struct binder_proc *proc, + struct flat_binder_object *fp) +{ + struct binder_node *node; + struct binder_node *new_node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!new_node) + return NULL; + binder_inner_proc_lock(proc); + node = binder_init_node_ilocked(proc, new_node, fp); + binder_inner_proc_unlock(proc); + if (node != new_node) + /* + * The node was already added by another thread + */ + kfree(new_node); + + return node; +} + +static void binder_free_node(struct binder_node *node) +{ + kfree(node); + binder_stats_deleted(BINDER_STAT_NODE); +} + +static int binder_inc_node_nilocked(struct binder_node *node, int strong, + int internal, + struct list_head *target_list) +{ + struct binder_proc *proc = node->proc; + + assert_spin_locked(&node->lock); + if (proc) + assert_spin_locked(&proc->inner_lock); + if (strong) { + if (internal) { + if (target_list == NULL && + node->internal_strong_refs == 0 && + !(node->proc && + node == node->proc->context->binder_context_mgr_node && + node->has_strong_ref)) { + pr_err("invalid inc strong node for %d\n", + node->debug_id); + return -EINVAL; + } + node->internal_strong_refs++; + } else + node->local_strong_refs++; + if (!node->has_strong_ref && target_list) { + struct binder_thread *thread = container_of(target_list, + struct binder_thread, todo); + binder_dequeue_work_ilocked(&node->work); + BUG_ON(&thread->todo != target_list); + binder_enqueue_deferred_thread_work_ilocked(thread, + &node->work); + } + } else { + if (!internal) + node->local_weak_refs++; + if (!node->has_weak_ref && target_list && list_empty(&node->work.entry)) + binder_enqueue_work_ilocked(&node->work, target_list); + } + return 0; +} + +static int binder_inc_node(struct binder_node *node, int strong, int internal, + struct list_head *target_list) +{ + int ret; + + binder_node_inner_lock(node); + ret = binder_inc_node_nilocked(node, strong, internal, target_list); + binder_node_inner_unlock(node); + + return ret; +} + +static bool binder_dec_node_nilocked(struct binder_node *node, + int strong, int internal) +{ + struct binder_proc *proc = node->proc; + + assert_spin_locked(&node->lock); + if (proc) + assert_spin_locked(&proc->inner_lock); + if (strong) { + if (internal) + node->internal_strong_refs--; + else + node->local_strong_refs--; + if (node->local_strong_refs || node->internal_strong_refs) + return false; + } else { + if (!internal) + node->local_weak_refs--; + if (node->local_weak_refs || node->tmp_refs || + !hlist_empty(&node->refs)) + return false; + } + + if (proc && (node->has_strong_ref || node->has_weak_ref)) { + if (list_empty(&node->work.entry)) { + binder_enqueue_work_ilocked(&node->work, &proc->todo); + binder_wakeup_proc_ilocked(proc); + } + } else { + if (hlist_empty(&node->refs) && !node->local_strong_refs && + !node->local_weak_refs && !node->tmp_refs) { + if (proc) { + binder_dequeue_work_ilocked(&node->work); + rb_erase(&node->rb_node, &proc->nodes); + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "refless node %d deleted\n", + node->debug_id); + } else { + BUG_ON(!list_empty(&node->work.entry)); + spin_lock(&binder_dead_nodes_lock); + /* + * tmp_refs could have changed so + * check it again + */ + if (node->tmp_refs) { + spin_unlock(&binder_dead_nodes_lock); + return false; + } + hlist_del(&node->dead_node); + spin_unlock(&binder_dead_nodes_lock); + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "dead node %d deleted\n", + node->debug_id); + } + return true; + } + } + return false; +} + +static void binder_dec_node(struct binder_node *node, int strong, int internal) +{ + bool free_node; + + binder_node_inner_lock(node); + free_node = binder_dec_node_nilocked(node, strong, internal); + binder_node_inner_unlock(node); + if (free_node) + binder_free_node(node); +} + +static void binder_inc_node_tmpref_ilocked(struct binder_node *node) +{ + /* + * No call to binder_inc_node() is needed since we + * don't need to inform userspace of any changes to + * tmp_refs + */ + node->tmp_refs++; +} + +/** + * binder_inc_node_tmpref() - take a temporary reference on node + * @node: node to reference + * + * Take reference on node to prevent the node from being freed + * while referenced only by a local variable. The inner lock is + * needed to serialize with the node work on the queue (which + * isn't needed after the node is dead). If the node is dead + * (node->proc is NULL), use binder_dead_nodes_lock to protect + * node->tmp_refs against dead-node-only cases where the node + * lock cannot be acquired (eg traversing the dead node list to + * print nodes) + */ +static void binder_inc_node_tmpref(struct binder_node *node) +{ + binder_node_lock(node); + if (node->proc) + binder_inner_proc_lock(node->proc); + else + spin_lock(&binder_dead_nodes_lock); + binder_inc_node_tmpref_ilocked(node); + if (node->proc) + binder_inner_proc_unlock(node->proc); + else + spin_unlock(&binder_dead_nodes_lock); + binder_node_unlock(node); +} + +/** + * binder_dec_node_tmpref() - remove a temporary reference on node + * @node: node to reference + * + * Release temporary reference on node taken via binder_inc_node_tmpref() + */ +static void binder_dec_node_tmpref(struct binder_node *node) +{ + bool free_node; + + binder_node_inner_lock(node); + if (!node->proc) + spin_lock(&binder_dead_nodes_lock); + else + __acquire(&binder_dead_nodes_lock); + node->tmp_refs--; + BUG_ON(node->tmp_refs < 0); + if (!node->proc) + spin_unlock(&binder_dead_nodes_lock); + else + __release(&binder_dead_nodes_lock); + /* + * Call binder_dec_node() to check if all refcounts are 0 + * and cleanup is needed. Calling with strong=0 and internal=1 + * causes no actual reference to be released in binder_dec_node(). + * If that changes, a change is needed here too. + */ + free_node = binder_dec_node_nilocked(node, 0, 1); + binder_node_inner_unlock(node); + if (free_node) + binder_free_node(node); +} + +static void binder_put_node(struct binder_node *node) +{ + binder_dec_node_tmpref(node); +} + +static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc, + u32 desc, bool need_strong_ref) +{ + struct rb_node *n = proc->refs_by_desc.rb_node; + struct binder_ref *ref; + + while (n) { + ref = rb_entry(n, struct binder_ref, rb_node_desc); + + if (desc < ref->data.desc) { + n = n->rb_left; + } else if (desc > ref->data.desc) { + n = n->rb_right; + } else if (need_strong_ref && !ref->data.strong) { + binder_user_error("tried to use weak ref as strong ref\n"); + return NULL; + } else { + return ref; + } + } + return NULL; +} + +/* Find the smallest unused descriptor the "slow way" */ +static u32 slow_desc_lookup_olocked(struct binder_proc *proc, u32 offset) +{ + struct binder_ref *ref; + struct rb_node *n; + u32 desc; + + desc = offset; + for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) { + ref = rb_entry(n, struct binder_ref, rb_node_desc); + if (ref->data.desc > desc) + break; + desc = ref->data.desc + 1; + } + + return desc; +} + +/* + * Find an available reference descriptor ID. The proc->outer_lock might + * be released in the process, in which case -EAGAIN is returned and the + * @desc should be considered invalid. + */ +static int get_ref_desc_olocked(struct binder_proc *proc, + struct binder_node *node, + u32 *desc) +{ + struct dbitmap *dmap = &proc->dmap; + unsigned int nbits, offset; + unsigned long *new, bit; + + /* 0 is reserved for the context manager */ + offset = (node == proc->context->binder_context_mgr_node) ? 0 : 1; + + if (!dbitmap_enabled(dmap)) { + *desc = slow_desc_lookup_olocked(proc, offset); + return 0; + } + + if (dbitmap_acquire_next_zero_bit(dmap, offset, &bit) == 0) { + *desc = bit; + return 0; + } + + /* + * The dbitmap is full and needs to grow. The proc->outer_lock + * is briefly released to allocate the new bitmap safely. + */ + nbits = dbitmap_grow_nbits(dmap); + binder_proc_unlock(proc); + new = bitmap_zalloc(nbits, GFP_KERNEL); + binder_proc_lock(proc); + dbitmap_grow(dmap, new, nbits); + + return -EAGAIN; +} + +/** + * binder_get_ref_for_node_olocked() - get the ref associated with given node + * @proc: binder_proc that owns the ref + * @node: binder_node of target + * @new_ref: newly allocated binder_ref to be initialized or %NULL + * + * Look up the ref for the given node and return it if it exists + * + * If it doesn't exist and the caller provides a newly allocated + * ref, initialize the fields of the newly allocated ref and insert + * into the given proc rb_trees and node refs list. + * + * Return: the ref for node. It is possible that another thread + * allocated/initialized the ref first in which case the + * returned ref would be different than the passed-in + * new_ref. new_ref must be kfree'd by the caller in + * this case. + */ +static struct binder_ref *binder_get_ref_for_node_olocked( + struct binder_proc *proc, + struct binder_node *node, + struct binder_ref *new_ref) +{ + struct binder_ref *ref; + struct rb_node *parent; + struct rb_node **p; + u32 desc; + +retry: + p = &proc->refs_by_node.rb_node; + parent = NULL; + while (*p) { + parent = *p; + ref = rb_entry(parent, struct binder_ref, rb_node_node); + + if (node < ref->node) + p = &(*p)->rb_left; + else if (node > ref->node) + p = &(*p)->rb_right; + else + return ref; + } + if (!new_ref) + return NULL; + + /* might release the proc->outer_lock */ + if (get_ref_desc_olocked(proc, node, &desc) == -EAGAIN) + goto retry; + + binder_stats_created(BINDER_STAT_REF); + new_ref->data.debug_id = atomic_inc_return(&binder_last_id); + new_ref->proc = proc; + new_ref->node = node; + rb_link_node(&new_ref->rb_node_node, parent, p); + rb_insert_color(&new_ref->rb_node_node, &proc->refs_by_node); + + new_ref->data.desc = desc; + p = &proc->refs_by_desc.rb_node; + while (*p) { + parent = *p; + ref = rb_entry(parent, struct binder_ref, rb_node_desc); + + if (new_ref->data.desc < ref->data.desc) + p = &(*p)->rb_left; + else if (new_ref->data.desc > ref->data.desc) + p = &(*p)->rb_right; + else + BUG(); + } + rb_link_node(&new_ref->rb_node_desc, parent, p); + rb_insert_color(&new_ref->rb_node_desc, &proc->refs_by_desc); + + binder_node_lock(node); + hlist_add_head(&new_ref->node_entry, &node->refs); + + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d new ref %d desc %d for node %d\n", + proc->pid, new_ref->data.debug_id, new_ref->data.desc, + node->debug_id); + binder_node_unlock(node); + return new_ref; +} + +static void binder_cleanup_ref_olocked(struct binder_ref *ref) +{ + struct dbitmap *dmap = &ref->proc->dmap; + bool delete_node = false; + + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d delete ref %d desc %d for node %d\n", + ref->proc->pid, ref->data.debug_id, ref->data.desc, + ref->node->debug_id); + + if (dbitmap_enabled(dmap)) + dbitmap_clear_bit(dmap, ref->data.desc); + rb_erase(&ref->rb_node_desc, &ref->proc->refs_by_desc); + rb_erase(&ref->rb_node_node, &ref->proc->refs_by_node); + + binder_node_inner_lock(ref->node); + if (ref->data.strong) + binder_dec_node_nilocked(ref->node, 1, 1); + + hlist_del(&ref->node_entry); + delete_node = binder_dec_node_nilocked(ref->node, 0, 1); + binder_node_inner_unlock(ref->node); + /* + * Clear ref->node unless we want the caller to free the node + */ + if (!delete_node) { + /* + * The caller uses ref->node to determine + * whether the node needs to be freed. Clear + * it since the node is still alive. + */ + ref->node = NULL; + } + + if (ref->death) { + binder_debug(BINDER_DEBUG_DEAD_BINDER, + "%d delete ref %d desc %d has death notification\n", + ref->proc->pid, ref->data.debug_id, + ref->data.desc); + binder_dequeue_work(ref->proc, &ref->death->work); + binder_stats_deleted(BINDER_STAT_DEATH); + } + + if (ref->freeze) { + binder_dequeue_work(ref->proc, &ref->freeze->work); + binder_stats_deleted(BINDER_STAT_FREEZE); + } + + binder_stats_deleted(BINDER_STAT_REF); +} + +/** + * binder_inc_ref_olocked() - increment the ref for given handle + * @ref: ref to be incremented + * @strong: if true, strong increment, else weak + * @target_list: list to queue node work on + * + * Increment the ref. @ref->proc->outer_lock must be held on entry + * + * Return: 0, if successful, else errno + */ +static int binder_inc_ref_olocked(struct binder_ref *ref, int strong, + struct list_head *target_list) +{ + int ret; + + if (strong) { + if (ref->data.strong == 0) { + ret = binder_inc_node(ref->node, 1, 1, target_list); + if (ret) + return ret; + } + ref->data.strong++; + } else { + if (ref->data.weak == 0) { + ret = binder_inc_node(ref->node, 0, 1, target_list); + if (ret) + return ret; + } + ref->data.weak++; + } + return 0; +} + +/** + * binder_dec_ref_olocked() - dec the ref for given handle + * @ref: ref to be decremented + * @strong: if true, strong decrement, else weak + * + * Decrement the ref. + * + * Return: %true if ref is cleaned up and ready to be freed. + */ +static bool binder_dec_ref_olocked(struct binder_ref *ref, int strong) +{ + if (strong) { + if (ref->data.strong == 0) { + binder_user_error("%d invalid dec strong, ref %d desc %d s %d w %d\n", + ref->proc->pid, ref->data.debug_id, + ref->data.desc, ref->data.strong, + ref->data.weak); + return false; + } + ref->data.strong--; + if (ref->data.strong == 0) + binder_dec_node(ref->node, strong, 1); + } else { + if (ref->data.weak == 0) { + binder_user_error("%d invalid dec weak, ref %d desc %d s %d w %d\n", + ref->proc->pid, ref->data.debug_id, + ref->data.desc, ref->data.strong, + ref->data.weak); + return false; + } + ref->data.weak--; + } + if (ref->data.strong == 0 && ref->data.weak == 0) { + binder_cleanup_ref_olocked(ref); + return true; + } + return false; +} + +/** + * binder_get_node_from_ref() - get the node from the given proc/desc + * @proc: proc containing the ref + * @desc: the handle associated with the ref + * @need_strong_ref: if true, only return node if ref is strong + * @rdata: the id/refcount data for the ref + * + * Given a proc and ref handle, return the associated binder_node + * + * Return: a binder_node or NULL if not found or not strong when strong required + */ +static struct binder_node *binder_get_node_from_ref( + struct binder_proc *proc, + u32 desc, bool need_strong_ref, + struct binder_ref_data *rdata) +{ + struct binder_node *node; + struct binder_ref *ref; + + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, desc, need_strong_ref); + if (!ref) + goto err_no_ref; + node = ref->node; + /* + * Take an implicit reference on the node to ensure + * it stays alive until the call to binder_put_node() + */ + binder_inc_node_tmpref(node); + if (rdata) + *rdata = ref->data; + binder_proc_unlock(proc); + + return node; + +err_no_ref: + binder_proc_unlock(proc); + return NULL; +} + +/** + * binder_free_ref() - free the binder_ref + * @ref: ref to free + * + * Free the binder_ref. Free the binder_node indicated by ref->node + * (if non-NULL) and the binder_ref_death indicated by ref->death. + */ +static void binder_free_ref(struct binder_ref *ref) +{ + if (ref->node) + binder_free_node(ref->node); + kfree(ref->death); + kfree(ref->freeze); + kfree(ref); +} + +/* shrink descriptor bitmap if needed */ +static void try_shrink_dmap(struct binder_proc *proc) +{ + unsigned long *new; + int nbits; + + binder_proc_lock(proc); + nbits = dbitmap_shrink_nbits(&proc->dmap); + binder_proc_unlock(proc); + + if (!nbits) + return; + + new = bitmap_zalloc(nbits, GFP_KERNEL); + binder_proc_lock(proc); + dbitmap_shrink(&proc->dmap, new, nbits); + binder_proc_unlock(proc); +} + +/** + * binder_update_ref_for_handle() - inc/dec the ref for given handle + * @proc: proc containing the ref + * @desc: the handle associated with the ref + * @increment: true=inc reference, false=dec reference + * @strong: true=strong reference, false=weak reference + * @rdata: the id/refcount data for the ref + * + * Given a proc and ref handle, increment or decrement the ref + * according to "increment" arg. + * + * Return: 0 if successful, else errno + */ +static int binder_update_ref_for_handle(struct binder_proc *proc, + uint32_t desc, bool increment, bool strong, + struct binder_ref_data *rdata) +{ + int ret = 0; + struct binder_ref *ref; + bool delete_ref = false; + + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, desc, strong); + if (!ref) { + ret = -EINVAL; + goto err_no_ref; + } + if (increment) + ret = binder_inc_ref_olocked(ref, strong, NULL); + else + delete_ref = binder_dec_ref_olocked(ref, strong); + + if (rdata) + *rdata = ref->data; + binder_proc_unlock(proc); + + if (delete_ref) { + binder_free_ref(ref); + try_shrink_dmap(proc); + } + return ret; + +err_no_ref: + binder_proc_unlock(proc); + return ret; +} + +/** + * binder_dec_ref_for_handle() - dec the ref for given handle + * @proc: proc containing the ref + * @desc: the handle associated with the ref + * @strong: true=strong reference, false=weak reference + * @rdata: the id/refcount data for the ref + * + * Just calls binder_update_ref_for_handle() to decrement the ref. + * + * Return: 0 if successful, else errno + */ +static int binder_dec_ref_for_handle(struct binder_proc *proc, + uint32_t desc, bool strong, struct binder_ref_data *rdata) +{ + return binder_update_ref_for_handle(proc, desc, false, strong, rdata); +} + + +/** + * binder_inc_ref_for_node() - increment the ref for given proc/node + * @proc: proc containing the ref + * @node: target node + * @strong: true=strong reference, false=weak reference + * @target_list: worklist to use if node is incremented + * @rdata: the id/refcount data for the ref + * + * Given a proc and node, increment the ref. Create the ref if it + * doesn't already exist + * + * Return: 0 if successful, else errno + */ +static int binder_inc_ref_for_node(struct binder_proc *proc, + struct binder_node *node, + bool strong, + struct list_head *target_list, + struct binder_ref_data *rdata) +{ + struct binder_ref *ref; + struct binder_ref *new_ref = NULL; + int ret = 0; + + binder_proc_lock(proc); + ref = binder_get_ref_for_node_olocked(proc, node, NULL); + if (!ref) { + binder_proc_unlock(proc); + new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!new_ref) + return -ENOMEM; + binder_proc_lock(proc); + ref = binder_get_ref_for_node_olocked(proc, node, new_ref); + } + ret = binder_inc_ref_olocked(ref, strong, target_list); + *rdata = ref->data; + if (ret && ref == new_ref) { + /* + * Cleanup the failed reference here as the target + * could now be dead and have already released its + * references by now. Calling on the new reference + * with strong=0 and a tmp_refs will not decrement + * the node. The new_ref gets kfree'd below. + */ + binder_cleanup_ref_olocked(new_ref); + ref = NULL; + } + + binder_proc_unlock(proc); + if (new_ref && ref != new_ref) + /* + * Another thread created the ref first so + * free the one we allocated + */ + kfree(new_ref); + return ret; +} + +static void binder_pop_transaction_ilocked(struct binder_thread *target_thread, + struct binder_transaction *t) +{ + BUG_ON(!target_thread); + assert_spin_locked(&target_thread->proc->inner_lock); + BUG_ON(target_thread->transaction_stack != t); + BUG_ON(target_thread->transaction_stack->from != target_thread); + target_thread->transaction_stack = + target_thread->transaction_stack->from_parent; + t->from = NULL; +} + +/** + * binder_thread_dec_tmpref() - decrement thread->tmp_ref + * @thread: thread to decrement + * + * A thread needs to be kept alive while being used to create or + * handle a transaction. binder_get_txn_from() is used to safely + * extract t->from from a binder_transaction and keep the thread + * indicated by t->from from being freed. When done with that + * binder_thread, this function is called to decrement the + * tmp_ref and free if appropriate (thread has been released + * and no transaction being processed by the driver) + */ +static void binder_thread_dec_tmpref(struct binder_thread *thread) +{ + /* + * atomic is used to protect the counter value while + * it cannot reach zero or thread->is_dead is false + */ + binder_inner_proc_lock(thread->proc); + atomic_dec(&thread->tmp_ref); + if (thread->is_dead && !atomic_read(&thread->tmp_ref)) { + binder_inner_proc_unlock(thread->proc); + binder_free_thread(thread); + return; + } + binder_inner_proc_unlock(thread->proc); +} + +/** + * binder_proc_dec_tmpref() - decrement proc->tmp_ref + * @proc: proc to decrement + * + * A binder_proc needs to be kept alive while being used to create or + * handle a transaction. proc->tmp_ref is incremented when + * creating a new transaction or the binder_proc is currently in-use + * by threads that are being released. When done with the binder_proc, + * this function is called to decrement the counter and free the + * proc if appropriate (proc has been released, all threads have + * been released and not currently in-use to process a transaction). + */ +static void binder_proc_dec_tmpref(struct binder_proc *proc) +{ + binder_inner_proc_lock(proc); + proc->tmp_ref--; + if (proc->is_dead && RB_EMPTY_ROOT(&proc->threads) && + !proc->tmp_ref) { + binder_inner_proc_unlock(proc); + binder_free_proc(proc); + return; + } + binder_inner_proc_unlock(proc); +} + +/** + * binder_get_txn_from() - safely extract the "from" thread in transaction + * @t: binder transaction for t->from + * + * Atomically return the "from" thread and increment the tmp_ref + * count for the thread to ensure it stays alive until + * binder_thread_dec_tmpref() is called. + * + * Return: the value of t->from + */ +static struct binder_thread *binder_get_txn_from( + struct binder_transaction *t) +{ + struct binder_thread *from; + + guard(spinlock)(&t->lock); + from = t->from; + if (from) + atomic_inc(&from->tmp_ref); + return from; +} + +/** + * binder_get_txn_from_and_acq_inner() - get t->from and acquire inner lock + * @t: binder transaction for t->from + * + * Same as binder_get_txn_from() except it also acquires the proc->inner_lock + * to guarantee that the thread cannot be released while operating on it. + * The caller must call binder_inner_proc_unlock() to release the inner lock + * as well as call binder_dec_thread_txn() to release the reference. + * + * Return: the value of t->from + */ +static struct binder_thread *binder_get_txn_from_and_acq_inner( + struct binder_transaction *t) + __acquires(&t->from->proc->inner_lock) +{ + struct binder_thread *from; + + from = binder_get_txn_from(t); + if (!from) { + __acquire(&from->proc->inner_lock); + return NULL; + } + binder_inner_proc_lock(from->proc); + if (t->from) { + BUG_ON(from != t->from); + return from; + } + binder_inner_proc_unlock(from->proc); + __acquire(&from->proc->inner_lock); + binder_thread_dec_tmpref(from); + return NULL; +} + +/** + * binder_free_txn_fixups() - free unprocessed fd fixups + * @t: binder transaction for t->from + * + * If the transaction is being torn down prior to being + * processed by the target process, free all of the + * fd fixups and fput the file structs. It is safe to + * call this function after the fixups have been + * processed -- in that case, the list will be empty. + */ +static void binder_free_txn_fixups(struct binder_transaction *t) +{ + struct binder_txn_fd_fixup *fixup, *tmp; + + list_for_each_entry_safe(fixup, tmp, &t->fd_fixups, fixup_entry) { + fput(fixup->file); + if (fixup->target_fd >= 0) + put_unused_fd(fixup->target_fd); + list_del(&fixup->fixup_entry); + kfree(fixup); + } +} + +static void binder_txn_latency_free(struct binder_transaction *t) +{ + int from_proc, from_thread, to_proc, to_thread; + + spin_lock(&t->lock); + from_proc = t->from ? t->from->proc->pid : 0; + from_thread = t->from ? t->from->pid : 0; + to_proc = t->to_proc ? t->to_proc->pid : 0; + to_thread = t->to_thread ? t->to_thread->pid : 0; + spin_unlock(&t->lock); + + trace_binder_txn_latency_free(t, from_proc, from_thread, to_proc, to_thread); +} + +static void binder_free_transaction(struct binder_transaction *t) +{ + struct binder_proc *target_proc = t->to_proc; + + if (target_proc) { + binder_inner_proc_lock(target_proc); + target_proc->outstanding_txns--; + if (target_proc->outstanding_txns < 0) + pr_warn("%s: Unexpected outstanding_txns %d\n", + __func__, target_proc->outstanding_txns); + if (!target_proc->outstanding_txns && target_proc->is_frozen) + wake_up_interruptible_all(&target_proc->freeze_wait); + if (t->buffer) + t->buffer->transaction = NULL; + binder_inner_proc_unlock(target_proc); + } + if (trace_binder_txn_latency_free_enabled()) + binder_txn_latency_free(t); + /* + * If the transaction has no target_proc, then + * t->buffer->transaction has already been cleared. + */ + binder_free_txn_fixups(t); + kfree(t); + binder_stats_deleted(BINDER_STAT_TRANSACTION); +} + +static void binder_send_failed_reply(struct binder_transaction *t, + uint32_t error_code) +{ + struct binder_thread *target_thread; + struct binder_transaction *next; + + BUG_ON(t->flags & TF_ONE_WAY); + while (1) { + target_thread = binder_get_txn_from_and_acq_inner(t); + if (target_thread) { + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, + "send failed reply for transaction %d to %d:%d\n", + t->debug_id, + target_thread->proc->pid, + target_thread->pid); + + binder_pop_transaction_ilocked(target_thread, t); + if (target_thread->reply_error.cmd == BR_OK) { + target_thread->reply_error.cmd = error_code; + binder_enqueue_thread_work_ilocked( + target_thread, + &target_thread->reply_error.work); + wake_up_interruptible(&target_thread->wait); + } else { + /* + * Cannot get here for normal operation, but + * we can if multiple synchronous transactions + * are sent without blocking for responses. + * Just ignore the 2nd error in this case. + */ + pr_warn("Unexpected reply error: %u\n", + target_thread->reply_error.cmd); + } + binder_inner_proc_unlock(target_thread->proc); + binder_thread_dec_tmpref(target_thread); + binder_free_transaction(t); + return; + } + __release(&target_thread->proc->inner_lock); + next = t->from_parent; + + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, + "send failed reply for transaction %d, target dead\n", + t->debug_id); + + binder_free_transaction(t); + if (next == NULL) { + binder_debug(BINDER_DEBUG_DEAD_BINDER, + "reply failed, no target thread at root\n"); + return; + } + t = next; + binder_debug(BINDER_DEBUG_DEAD_BINDER, + "reply failed, no target thread -- retry %d\n", + t->debug_id); + } +} + +/** + * binder_cleanup_transaction() - cleans up undelivered transaction + * @t: transaction that needs to be cleaned up + * @reason: reason the transaction wasn't delivered + * @error_code: error to return to caller (if synchronous call) + */ +static void binder_cleanup_transaction(struct binder_transaction *t, + const char *reason, + uint32_t error_code) +{ + if (t->buffer->target_node && !(t->flags & TF_ONE_WAY)) { + binder_send_failed_reply(t, error_code); + } else { + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "undelivered transaction %d, %s\n", + t->debug_id, reason); + binder_free_transaction(t); + } +} + +/** + * binder_get_object() - gets object and checks for valid metadata + * @proc: binder_proc owning the buffer + * @u: sender's user pointer to base of buffer + * @buffer: binder_buffer that we're parsing. + * @offset: offset in the @buffer at which to validate an object. + * @object: struct binder_object to read into + * + * Copy the binder object at the given offset into @object. If @u is + * provided then the copy is from the sender's buffer. If not, then + * it is copied from the target's @buffer. + * + * Return: If there's a valid metadata object at @offset, the + * size of that object. Otherwise, it returns zero. The object + * is read into the struct binder_object pointed to by @object. + */ +static size_t binder_get_object(struct binder_proc *proc, + const void __user *u, + struct binder_buffer *buffer, + unsigned long offset, + struct binder_object *object) +{ + size_t read_size; + struct binder_object_header *hdr; + size_t object_size = 0; + + read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); + if (offset > buffer->data_size || read_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) + return 0; + + if (u) { + if (copy_from_user(object, u + offset, read_size)) + return 0; + } else { + if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, + offset, read_size)) + return 0; + } + + /* Ok, now see if we read a complete object. */ + hdr = &object->hdr; + switch (hdr->type) { + case BINDER_TYPE_BINDER: + case BINDER_TYPE_WEAK_BINDER: + case BINDER_TYPE_HANDLE: + case BINDER_TYPE_WEAK_HANDLE: + object_size = sizeof(struct flat_binder_object); + break; + case BINDER_TYPE_FD: + object_size = sizeof(struct binder_fd_object); + break; + case BINDER_TYPE_PTR: + object_size = sizeof(struct binder_buffer_object); + break; + case BINDER_TYPE_FDA: + object_size = sizeof(struct binder_fd_array_object); + break; + default: + return 0; + } + if (offset <= buffer->data_size - object_size && + buffer->data_size >= object_size) + return object_size; + else + return 0; +} + +/** + * binder_validate_ptr() - validates binder_buffer_object in a binder_buffer. + * @proc: binder_proc owning the buffer + * @b: binder_buffer containing the object + * @object: struct binder_object to read into + * @index: index in offset array at which the binder_buffer_object is + * located + * @start_offset: points to the start of the offset array + * @object_offsetp: offset of @object read from @b + * @num_valid: the number of valid offsets in the offset array + * + * Return: If @index is within the valid range of the offset array + * described by @start and @num_valid, and if there's a valid + * binder_buffer_object at the offset found in index @index + * of the offset array, that object is returned. Otherwise, + * %NULL is returned. + * Note that the offset found in index @index itself is not + * verified; this function assumes that @num_valid elements + * from @start were previously verified to have valid offsets. + * If @object_offsetp is non-NULL, then the offset within + * @b is written to it. + */ +static struct binder_buffer_object *binder_validate_ptr( + struct binder_proc *proc, + struct binder_buffer *b, + struct binder_object *object, + binder_size_t index, + binder_size_t start_offset, + binder_size_t *object_offsetp, + binder_size_t num_valid) +{ + size_t object_size; + binder_size_t object_offset; + unsigned long buffer_offset; + + if (index >= num_valid) + return NULL; + + buffer_offset = start_offset + sizeof(binder_size_t) * index; + if (binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, + b, buffer_offset, + sizeof(object_offset))) + return NULL; + object_size = binder_get_object(proc, NULL, b, object_offset, object); + if (!object_size || object->hdr.type != BINDER_TYPE_PTR) + return NULL; + if (object_offsetp) + *object_offsetp = object_offset; + + return &object->bbo; +} + +/** + * binder_validate_fixup() - validates pointer/fd fixups happen in order. + * @proc: binder_proc owning the buffer + * @b: transaction buffer + * @objects_start_offset: offset to start of objects buffer + * @buffer_obj_offset: offset to binder_buffer_object in which to fix up + * @fixup_offset: start offset in @buffer to fix up + * @last_obj_offset: offset to last binder_buffer_object that we fixed + * @last_min_offset: minimum fixup offset in object at @last_obj_offset + * + * Return: %true if a fixup in buffer @buffer at offset @offset is + * allowed. + * + * For safety reasons, we only allow fixups inside a buffer to happen + * at increasing offsets; additionally, we only allow fixup on the last + * buffer object that was verified, or one of its parents. + * + * Example of what is allowed: + * + * A + * B (parent = A, offset = 0) + * C (parent = A, offset = 16) + * D (parent = C, offset = 0) + * E (parent = A, offset = 32) // min_offset is 16 (C.parent_offset) + * + * Examples of what is not allowed: + * + * Decreasing offsets within the same parent: + * A + * C (parent = A, offset = 16) + * B (parent = A, offset = 0) // decreasing offset within A + * + * Referring to a parent that wasn't the last object or any of its parents: + * A + * B (parent = A, offset = 0) + * C (parent = A, offset = 0) + * C (parent = A, offset = 16) + * D (parent = B, offset = 0) // B is not A or any of A's parents + */ +static bool binder_validate_fixup(struct binder_proc *proc, + struct binder_buffer *b, + binder_size_t objects_start_offset, + binder_size_t buffer_obj_offset, + binder_size_t fixup_offset, + binder_size_t last_obj_offset, + binder_size_t last_min_offset) +{ + if (!last_obj_offset) { + /* Nothing to fix up in */ + return false; + } + + while (last_obj_offset != buffer_obj_offset) { + unsigned long buffer_offset; + struct binder_object last_object; + struct binder_buffer_object *last_bbo; + size_t object_size = binder_get_object(proc, NULL, b, + last_obj_offset, + &last_object); + if (object_size != sizeof(*last_bbo)) + return false; + + last_bbo = &last_object.bbo; + /* + * Safe to retrieve the parent of last_obj, since it + * was already previously verified by the driver. + */ + if ((last_bbo->flags & BINDER_BUFFER_FLAG_HAS_PARENT) == 0) + return false; + last_min_offset = last_bbo->parent_offset + sizeof(uintptr_t); + buffer_offset = objects_start_offset + + sizeof(binder_size_t) * last_bbo->parent; + if (binder_alloc_copy_from_buffer(&proc->alloc, + &last_obj_offset, + b, buffer_offset, + sizeof(last_obj_offset))) + return false; + } + return (fixup_offset >= last_min_offset); +} + +/** + * struct binder_task_work_cb - for deferred close + * + * @twork: callback_head for task work + * @file: file to close + * + * Structure to pass task work to be handled after + * returning from binder_ioctl() via task_work_add(). + */ +struct binder_task_work_cb { + struct callback_head twork; + struct file *file; +}; + +/** + * binder_do_fd_close() - close list of file descriptors + * @twork: callback head for task work + * + * It is not safe to call ksys_close() during the binder_ioctl() + * function if there is a chance that binder's own file descriptor + * might be closed. This is to meet the requirements for using + * fdget() (see comments for __fget_light()). Therefore use + * task_work_add() to schedule the close operation once we have + * returned from binder_ioctl(). This function is a callback + * for that mechanism and does the actual ksys_close() on the + * given file descriptor. + */ +static void binder_do_fd_close(struct callback_head *twork) +{ + struct binder_task_work_cb *twcb = container_of(twork, + struct binder_task_work_cb, twork); + + fput(twcb->file); + kfree(twcb); +} + +/** + * binder_deferred_fd_close() - schedule a close for the given file-descriptor + * @fd: file-descriptor to close + * + * See comments in binder_do_fd_close(). This function is used to schedule + * a file-descriptor to be closed after returning from binder_ioctl(). + */ +static void binder_deferred_fd_close(int fd) +{ + struct binder_task_work_cb *twcb; + + twcb = kzalloc(sizeof(*twcb), GFP_KERNEL); + if (!twcb) + return; + init_task_work(&twcb->twork, binder_do_fd_close); + twcb->file = file_close_fd(fd); + if (twcb->file) { + // pin it until binder_do_fd_close(); see comments there + get_file(twcb->file); + filp_close(twcb->file, current->files); + task_work_add(current, &twcb->twork, TWA_RESUME); + } else { + kfree(twcb); + } +} + +static void binder_transaction_buffer_release(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_buffer *buffer, + binder_size_t off_end_offset, + bool is_failure) +{ + int debug_id = buffer->debug_id; + binder_size_t off_start_offset, buffer_offset; + + binder_debug(BINDER_DEBUG_TRANSACTION, + "%d buffer release %d, size %zd-%zd, failed at %llx\n", + proc->pid, buffer->debug_id, + buffer->data_size, buffer->offsets_size, + (unsigned long long)off_end_offset); + + if (buffer->target_node) + binder_dec_node(buffer->target_node, 1, 0); + + off_start_offset = ALIGN(buffer->data_size, sizeof(void *)); + + for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; + buffer_offset += sizeof(binder_size_t)) { + struct binder_object_header *hdr; + size_t object_size = 0; + struct binder_object object; + binder_size_t object_offset; + + if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, + buffer, buffer_offset, + sizeof(object_offset))) + object_size = binder_get_object(proc, NULL, buffer, + object_offset, &object); + if (object_size == 0) { + pr_err("transaction release %d bad object at offset %lld, size %zd\n", + debug_id, (u64)object_offset, buffer->data_size); + continue; + } + hdr = &object.hdr; + switch (hdr->type) { + case BINDER_TYPE_BINDER: + case BINDER_TYPE_WEAK_BINDER: { + struct flat_binder_object *fp; + struct binder_node *node; + + fp = to_flat_binder_object(hdr); + node = binder_get_node(proc, fp->binder); + if (node == NULL) { + pr_err("transaction release %d bad node %016llx\n", + debug_id, (u64)fp->binder); + break; + } + binder_debug(BINDER_DEBUG_TRANSACTION, + " node %d u%016llx\n", + node->debug_id, (u64)node->ptr); + binder_dec_node(node, hdr->type == BINDER_TYPE_BINDER, + 0); + binder_put_node(node); + } break; + case BINDER_TYPE_HANDLE: + case BINDER_TYPE_WEAK_HANDLE: { + struct flat_binder_object *fp; + struct binder_ref_data rdata; + int ret; + + fp = to_flat_binder_object(hdr); + ret = binder_dec_ref_for_handle(proc, fp->handle, + hdr->type == BINDER_TYPE_HANDLE, &rdata); + + if (ret) { + pr_err("transaction release %d bad handle %d, ret = %d\n", + debug_id, fp->handle, ret); + break; + } + binder_debug(BINDER_DEBUG_TRANSACTION, + " ref %d desc %d\n", + rdata.debug_id, rdata.desc); + } break; + + case BINDER_TYPE_FD: { + /* + * No need to close the file here since user-space + * closes it for successfully delivered + * transactions. For transactions that weren't + * delivered, the new fd was never allocated so + * there is no need to close and the fput on the + * file is done when the transaction is torn + * down. + */ + } break; + case BINDER_TYPE_PTR: + /* + * Nothing to do here, this will get cleaned up when the + * transaction buffer gets freed + */ + break; + case BINDER_TYPE_FDA: { + struct binder_fd_array_object *fda; + struct binder_buffer_object *parent; + struct binder_object ptr_object; + binder_size_t fda_offset; + size_t fd_index; + binder_size_t fd_buf_size; + binder_size_t num_valid; + + if (is_failure) { + /* + * The fd fixups have not been applied so no + * fds need to be closed. + */ + continue; + } + + num_valid = (buffer_offset - off_start_offset) / + sizeof(binder_size_t); + fda = to_binder_fd_array_object(hdr); + parent = binder_validate_ptr(proc, buffer, &ptr_object, + fda->parent, + off_start_offset, + NULL, + num_valid); + if (!parent) { + pr_err("transaction release %d bad parent offset\n", + debug_id); + continue; + } + fd_buf_size = sizeof(u32) * fda->num_fds; + if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { + pr_err("transaction release %d invalid number of fds (%lld)\n", + debug_id, (u64)fda->num_fds); + continue; + } + if (fd_buf_size > parent->length || + fda->parent_offset > parent->length - fd_buf_size) { + /* No space for all file descriptors here. */ + pr_err("transaction release %d not enough space for %lld fds in buffer\n", + debug_id, (u64)fda->num_fds); + continue; + } + /* + * the source data for binder_buffer_object is visible + * to user-space and the @buffer element is the user + * pointer to the buffer_object containing the fd_array. + * Convert the address to an offset relative to + * the base of the transaction buffer. + */ + fda_offset = parent->buffer - buffer->user_data + + fda->parent_offset; + for (fd_index = 0; fd_index < fda->num_fds; + fd_index++) { + u32 fd; + int err; + binder_size_t offset = fda_offset + + fd_index * sizeof(fd); + + err = binder_alloc_copy_from_buffer( + &proc->alloc, &fd, buffer, + offset, sizeof(fd)); + WARN_ON(err); + if (!err) { + binder_deferred_fd_close(fd); + /* + * Need to make sure the thread goes + * back to userspace to complete the + * deferred close + */ + if (thread) + thread->looper_need_return = true; + } + } + } break; + default: + pr_err("transaction release %d bad object type %x\n", + debug_id, hdr->type); + break; + } + } +} + +/* Clean up all the objects in the buffer */ +static inline void binder_release_entire_buffer(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_buffer *buffer, + bool is_failure) +{ + binder_size_t off_end_offset; + + off_end_offset = ALIGN(buffer->data_size, sizeof(void *)); + off_end_offset += buffer->offsets_size; + + binder_transaction_buffer_release(proc, thread, buffer, + off_end_offset, is_failure); +} + +static int binder_translate_binder(struct flat_binder_object *fp, + struct binder_transaction *t, + struct binder_thread *thread) +{ + struct binder_node *node; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + struct binder_ref_data rdata; + int ret = 0; + + node = binder_get_node(proc, fp->binder); + if (!node) { + node = binder_new_node(proc, fp); + if (!node) + return -ENOMEM; + } + if (fp->cookie != node->cookie) { + binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n", + proc->pid, thread->pid, (u64)fp->binder, + node->debug_id, (u64)fp->cookie, + (u64)node->cookie); + ret = -EINVAL; + goto done; + } + if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { + ret = -EPERM; + goto done; + } + + ret = binder_inc_ref_for_node(target_proc, node, + fp->hdr.type == BINDER_TYPE_BINDER, + &thread->todo, &rdata); + if (ret) + goto done; + + if (fp->hdr.type == BINDER_TYPE_BINDER) + fp->hdr.type = BINDER_TYPE_HANDLE; + else + fp->hdr.type = BINDER_TYPE_WEAK_HANDLE; + fp->binder = 0; + fp->handle = rdata.desc; + fp->cookie = 0; + + trace_binder_transaction_node_to_ref(t, node, &rdata); + binder_debug(BINDER_DEBUG_TRANSACTION, + " node %d u%016llx -> ref %d desc %d\n", + node->debug_id, (u64)node->ptr, + rdata.debug_id, rdata.desc); +done: + binder_put_node(node); + return ret; +} + +static int binder_translate_handle(struct flat_binder_object *fp, + struct binder_transaction *t, + struct binder_thread *thread) +{ + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + struct binder_node *node; + struct binder_ref_data src_rdata; + int ret = 0; + + node = binder_get_node_from_ref(proc, fp->handle, + fp->hdr.type == BINDER_TYPE_HANDLE, &src_rdata); + if (!node) { + binder_user_error("%d:%d got transaction with invalid handle, %d\n", + proc->pid, thread->pid, fp->handle); + return -EINVAL; + } + if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { + ret = -EPERM; + goto done; + } + + binder_node_lock(node); + if (node->proc == target_proc) { + if (fp->hdr.type == BINDER_TYPE_HANDLE) + fp->hdr.type = BINDER_TYPE_BINDER; + else + fp->hdr.type = BINDER_TYPE_WEAK_BINDER; + fp->binder = node->ptr; + fp->cookie = node->cookie; + if (node->proc) + binder_inner_proc_lock(node->proc); + else + __acquire(&node->proc->inner_lock); + binder_inc_node_nilocked(node, + fp->hdr.type == BINDER_TYPE_BINDER, + 0, NULL); + if (node->proc) + binder_inner_proc_unlock(node->proc); + else + __release(&node->proc->inner_lock); + trace_binder_transaction_ref_to_node(t, node, &src_rdata); + binder_debug(BINDER_DEBUG_TRANSACTION, + " ref %d desc %d -> node %d u%016llx\n", + src_rdata.debug_id, src_rdata.desc, node->debug_id, + (u64)node->ptr); + binder_node_unlock(node); + } else { + struct binder_ref_data dest_rdata; + + binder_node_unlock(node); + ret = binder_inc_ref_for_node(target_proc, node, + fp->hdr.type == BINDER_TYPE_HANDLE, + NULL, &dest_rdata); + if (ret) + goto done; + + fp->binder = 0; + fp->handle = dest_rdata.desc; + fp->cookie = 0; + trace_binder_transaction_ref_to_ref(t, node, &src_rdata, + &dest_rdata); + binder_debug(BINDER_DEBUG_TRANSACTION, + " ref %d desc %d -> ref %d desc %d (node %d)\n", + src_rdata.debug_id, src_rdata.desc, + dest_rdata.debug_id, dest_rdata.desc, + node->debug_id); + } +done: + binder_put_node(node); + return ret; +} + +static int binder_translate_fd(u32 fd, binder_size_t fd_offset, + struct binder_transaction *t, + struct binder_thread *thread, + struct binder_transaction *in_reply_to) +{ + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + struct binder_txn_fd_fixup *fixup; + struct file *file; + int ret = 0; + bool target_allows_fd; + + if (in_reply_to) + target_allows_fd = !!(in_reply_to->flags & TF_ACCEPT_FDS); + else + target_allows_fd = t->buffer->target_node->accept_fds; + if (!target_allows_fd) { + binder_user_error("%d:%d got %s with fd, %d, but target does not allow fds\n", + proc->pid, thread->pid, + in_reply_to ? "reply" : "transaction", + fd); + ret = -EPERM; + goto err_fd_not_accepted; + } + + file = fget(fd); + if (!file) { + binder_user_error("%d:%d got transaction with invalid fd, %d\n", + proc->pid, thread->pid, fd); + ret = -EBADF; + goto err_fget; + } + ret = security_binder_transfer_file(proc->cred, target_proc->cred, file); + if (ret < 0) { + ret = -EPERM; + goto err_security; + } + + /* + * Add fixup record for this transaction. The allocation + * of the fd in the target needs to be done from a + * target thread. + */ + fixup = kzalloc(sizeof(*fixup), GFP_KERNEL); + if (!fixup) { + ret = -ENOMEM; + goto err_alloc; + } + fixup->file = file; + fixup->offset = fd_offset; + fixup->target_fd = -1; + trace_binder_transaction_fd_send(t, fd, fixup->offset); + list_add_tail(&fixup->fixup_entry, &t->fd_fixups); + + return ret; + +err_alloc: +err_security: + fput(file); +err_fget: +err_fd_not_accepted: + return ret; +} + +/** + * struct binder_ptr_fixup - data to be fixed-up in target buffer + * @offset: offset in target buffer to fixup + * @skip_size: bytes to skip in copy (fixup will be written later) + * @fixup_data: data to write at fixup offset + * @node: list node + * + * This is used for the pointer fixup list (pf) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_ptr_fixup { + binder_size_t offset; + size_t skip_size; + binder_uintptr_t fixup_data; + struct list_head node; +}; + +/** + * struct binder_sg_copy - scatter-gather data to be copied + * @offset: offset in target buffer + * @sender_uaddr: user address in source buffer + * @length: bytes to copy + * @node: list node + * + * This is used for the sg copy list (sgc) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_sg_copy { + binder_size_t offset; + const void __user *sender_uaddr; + size_t length; + struct list_head node; +}; + +/** + * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data + * @alloc: binder_alloc associated with @buffer + * @buffer: binder buffer in target process + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Processes all elements of @sgc_head, applying fixups from @pf_head + * and copying the scatter-gather data from the source process' user + * buffer to the target's buffer. It is expected that the list creation + * and processing all occurs during binder_transaction() so these lists + * are only accessed in local context. + * + * Return: 0=success, else -errno + */ +static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, + struct binder_buffer *buffer, + struct list_head *sgc_head, + struct list_head *pf_head) +{ + int ret = 0; + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *tmppf; + struct binder_ptr_fixup *pf = + list_first_entry_or_null(pf_head, struct binder_ptr_fixup, + node); + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + size_t bytes_copied = 0; + + while (bytes_copied < sgc->length) { + size_t copy_size; + size_t bytes_left = sgc->length - bytes_copied; + size_t offset = sgc->offset + bytes_copied; + + /* + * We copy up to the fixup (pointed to by pf) + */ + copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset) + : bytes_left; + if (!ret && copy_size) + ret = binder_alloc_copy_user_to_buffer( + alloc, buffer, + offset, + sgc->sender_uaddr + bytes_copied, + copy_size); + bytes_copied += copy_size; + if (copy_size != bytes_left) { + BUG_ON(!pf); + /* we stopped at a fixup offset */ + if (pf->skip_size) { + /* + * we are just skipping. This is for + * BINDER_TYPE_FDA where the translated + * fds will be fixed up when we get + * to target context. + */ + bytes_copied += pf->skip_size; + } else { + /* apply the fixup indicated by pf */ + if (!ret) + ret = binder_alloc_copy_to_buffer( + alloc, buffer, + pf->offset, + &pf->fixup_data, + sizeof(pf->fixup_data)); + bytes_copied += sizeof(pf->fixup_data); + } + list_del(&pf->node); + kfree(pf); + pf = list_first_entry_or_null(pf_head, + struct binder_ptr_fixup, node); + } + } + list_del(&sgc->node); + kfree(sgc); + } + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + BUG_ON(pf->skip_size == 0); + list_del(&pf->node); + kfree(pf); + } + BUG_ON(!list_empty(sgc_head)); + + return ret > 0 ? -EINVAL : ret; +} + +/** + * binder_cleanup_deferred_txn_lists() - free specified lists + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Called to clean up @sgc_head and @pf_head if there is an + * error. + */ +static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head, + struct list_head *pf_head) +{ + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *pf, *tmppf; + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + list_del(&sgc->node); + kfree(sgc); + } + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + list_del(&pf->node); + kfree(pf); + } +} + +/** + * binder_defer_copy() - queue a scatter-gather buffer for copy + * @sgc_head: list_head of scatter-gather copy list + * @offset: binder buffer offset in target process + * @sender_uaddr: user address in source process + * @length: bytes to copy + * + * Specify a scatter-gather block to be copied. The actual copy must + * be deferred until all the needed fixups are identified and queued. + * Then the copy and fixups are done together so un-translated values + * from the source are never visible in the target buffer. + * + * We are guaranteed that repeated calls to this function will have + * monotonically increasing @offset values so the list will naturally + * be ordered. + * + * Return: 0=success, else -errno + */ +static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset, + const void __user *sender_uaddr, size_t length) +{ + struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL); + + if (!bc) + return -ENOMEM; + + bc->offset = offset; + bc->sender_uaddr = sender_uaddr; + bc->length = length; + INIT_LIST_HEAD(&bc->node); + + /* + * We are guaranteed that the deferred copies are in-order + * so just add to the tail. + */ + list_add_tail(&bc->node, sgc_head); + + return 0; +} + +/** + * binder_add_fixup() - queue a fixup to be applied to sg copy + * @pf_head: list_head of binder ptr fixup list + * @offset: binder buffer offset in target process + * @fixup: bytes to be copied for fixup + * @skip_size: bytes to skip when copying (fixup will be applied later) + * + * Add the specified fixup to a list ordered by @offset. When copying + * the scatter-gather buffers, the fixup will be copied instead of + * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup + * will be applied later (in target process context), so we just skip + * the bytes specified by @skip_size. If @skip_size is 0, we copy the + * value in @fixup. + * + * This function is called *mostly* in @offset order, but there are + * exceptions. Since out-of-order inserts are relatively uncommon, + * we insert the new element by searching backward from the tail of + * the list. + * + * Return: 0=success, else -errno + */ +static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset, + binder_uintptr_t fixup, size_t skip_size) +{ + struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL); + struct binder_ptr_fixup *tmppf; + + if (!pf) + return -ENOMEM; + + pf->offset = offset; + pf->fixup_data = fixup; + pf->skip_size = skip_size; + INIT_LIST_HEAD(&pf->node); + + /* Fixups are *mostly* added in-order, but there are some + * exceptions. Look backwards through list for insertion point. + */ + list_for_each_entry_reverse(tmppf, pf_head, node) { + if (tmppf->offset < pf->offset) { + list_add(&pf->node, &tmppf->node); + return 0; + } + } + /* + * if we get here, then the new offset is the lowest so + * insert at the head + */ + list_add(&pf->node, pf_head); + return 0; +} + +static int binder_translate_fd_array(struct list_head *pf_head, + struct binder_fd_array_object *fda, + const void __user *sender_ubuffer, + struct binder_buffer_object *parent, + struct binder_buffer_object *sender_uparent, + struct binder_transaction *t, + struct binder_thread *thread, + struct binder_transaction *in_reply_to) +{ + binder_size_t fdi, fd_buf_size; + binder_size_t fda_offset; + const void __user *sender_ufda_base; + struct binder_proc *proc = thread->proc; + int ret; + + if (fda->num_fds == 0) + return 0; + + fd_buf_size = sizeof(u32) * fda->num_fds; + if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { + binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n", + proc->pid, thread->pid, (u64)fda->num_fds); + return -EINVAL; + } + if (fd_buf_size > parent->length || + fda->parent_offset > parent->length - fd_buf_size) { + /* No space for all file descriptors here. */ + binder_user_error("%d:%d not enough space to store %lld fds in buffer\n", + proc->pid, thread->pid, (u64)fda->num_fds); + return -EINVAL; + } + /* + * the source data for binder_buffer_object is visible + * to user-space and the @buffer element is the user + * pointer to the buffer_object containing the fd_array. + * Convert the address to an offset relative to + * the base of the transaction buffer. + */ + fda_offset = parent->buffer - t->buffer->user_data + + fda->parent_offset; + sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer + + fda->parent_offset; + + if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || + !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { + binder_user_error("%d:%d parent offset not aligned correctly.\n", + proc->pid, thread->pid); + return -EINVAL; + } + ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32)); + if (ret) + return ret; + + for (fdi = 0; fdi < fda->num_fds; fdi++) { + u32 fd; + binder_size_t offset = fda_offset + fdi * sizeof(fd); + binder_size_t sender_uoffset = fdi * sizeof(fd); + + ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd)); + if (!ret) + ret = binder_translate_fd(fd, offset, t, thread, + in_reply_to); + if (ret) + return ret > 0 ? -EINVAL : ret; + } + return 0; +} + +static int binder_fixup_parent(struct list_head *pf_head, + struct binder_transaction *t, + struct binder_thread *thread, + struct binder_buffer_object *bp, + binder_size_t off_start_offset, + binder_size_t num_valid, + binder_size_t last_fixup_obj_off, + binder_size_t last_fixup_min_off) +{ + struct binder_buffer_object *parent; + struct binder_buffer *b = t->buffer; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + struct binder_object object; + binder_size_t buffer_offset; + binder_size_t parent_offset; + + if (!(bp->flags & BINDER_BUFFER_FLAG_HAS_PARENT)) + return 0; + + parent = binder_validate_ptr(target_proc, b, &object, bp->parent, + off_start_offset, &parent_offset, + num_valid); + if (!parent) { + binder_user_error("%d:%d got transaction with invalid parent offset or type\n", + proc->pid, thread->pid); + return -EINVAL; + } + + if (!binder_validate_fixup(target_proc, b, off_start_offset, + parent_offset, bp->parent_offset, + last_fixup_obj_off, + last_fixup_min_off)) { + binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", + proc->pid, thread->pid); + return -EINVAL; + } + + if (parent->length < sizeof(binder_uintptr_t) || + bp->parent_offset > parent->length - sizeof(binder_uintptr_t)) { + /* No space for a pointer here! */ + binder_user_error("%d:%d got transaction with invalid parent offset\n", + proc->pid, thread->pid); + return -EINVAL; + } + + buffer_offset = bp->parent_offset + parent->buffer - b->user_data; + + return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0); +} + +/** + * binder_can_update_transaction() - Can a txn be superseded by an updated one? + * @t1: the pending async txn in the frozen process + * @t2: the new async txn to supersede the outdated pending one + * + * Return: true if t2 can supersede t1 + * false if t2 can not supersede t1 + */ +static bool binder_can_update_transaction(struct binder_transaction *t1, + struct binder_transaction *t2) +{ + if ((t1->flags & t2->flags & (TF_ONE_WAY | TF_UPDATE_TXN)) != + (TF_ONE_WAY | TF_UPDATE_TXN) || !t1->to_proc || !t2->to_proc) + return false; + if (t1->to_proc->tsk == t2->to_proc->tsk && t1->code == t2->code && + t1->flags == t2->flags && t1->buffer->pid == t2->buffer->pid && + t1->buffer->target_node->ptr == t2->buffer->target_node->ptr && + t1->buffer->target_node->cookie == t2->buffer->target_node->cookie) + return true; + return false; +} + +/** + * binder_find_outdated_transaction_ilocked() - Find the outdated transaction + * @t: new async transaction + * @target_list: list to find outdated transaction + * + * Return: the outdated transaction if found + * NULL if no outdated transacton can be found + * + * Requires the proc->inner_lock to be held. + */ +static struct binder_transaction * +binder_find_outdated_transaction_ilocked(struct binder_transaction *t, + struct list_head *target_list) +{ + struct binder_work *w; + + list_for_each_entry(w, target_list, entry) { + struct binder_transaction *t_queued; + + if (w->type != BINDER_WORK_TRANSACTION) + continue; + t_queued = container_of(w, struct binder_transaction, work); + if (binder_can_update_transaction(t_queued, t)) + return t_queued; + } + return NULL; +} + +/** + * binder_proc_transaction() - sends a transaction to a process and wakes it up + * @t: transaction to send + * @proc: process to send the transaction to + * @thread: thread in @proc to send the transaction to (may be NULL) + * + * This function queues a transaction to the specified process. It will try + * to find a thread in the target process to handle the transaction and + * wake it up. If no thread is found, the work is queued to the proc + * waitqueue. + * + * If the @thread parameter is not NULL, the transaction is always queued + * to the waitlist of that specific thread. + * + * Return: 0 if the transaction was successfully queued + * BR_DEAD_REPLY if the target process or thread is dead + * BR_FROZEN_REPLY if the target process or thread is frozen and + * the sync transaction was rejected + * BR_TRANSACTION_PENDING_FROZEN if the target process is frozen + * and the async transaction was successfully queued + */ +static int binder_proc_transaction(struct binder_transaction *t, + struct binder_proc *proc, + struct binder_thread *thread) +{ + struct binder_node *node = t->buffer->target_node; + bool oneway = !!(t->flags & TF_ONE_WAY); + bool pending_async = false; + struct binder_transaction *t_outdated = NULL; + bool frozen = false; + + BUG_ON(!node); + binder_node_lock(node); + if (oneway) { + BUG_ON(thread); + if (node->has_async_transaction) + pending_async = true; + else + node->has_async_transaction = true; + } + + binder_inner_proc_lock(proc); + if (proc->is_frozen) { + frozen = true; + proc->sync_recv |= !oneway; + proc->async_recv |= oneway; + } + + if ((frozen && !oneway) || proc->is_dead || + (thread && thread->is_dead)) { + binder_inner_proc_unlock(proc); + binder_node_unlock(node); + return frozen ? BR_FROZEN_REPLY : BR_DEAD_REPLY; + } + + if (!thread && !pending_async) + thread = binder_select_thread_ilocked(proc); + + if (thread) { + binder_enqueue_thread_work_ilocked(thread, &t->work); + } else if (!pending_async) { + binder_enqueue_work_ilocked(&t->work, &proc->todo); + } else { + if ((t->flags & TF_UPDATE_TXN) && frozen) { + t_outdated = binder_find_outdated_transaction_ilocked(t, + &node->async_todo); + if (t_outdated) { + binder_debug(BINDER_DEBUG_TRANSACTION, + "txn %d supersedes %d\n", + t->debug_id, t_outdated->debug_id); + list_del_init(&t_outdated->work.entry); + proc->outstanding_txns--; + } + } + binder_enqueue_work_ilocked(&t->work, &node->async_todo); + } + + if (!pending_async) + binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */); + + proc->outstanding_txns++; + binder_inner_proc_unlock(proc); + binder_node_unlock(node); + + /* + * To reduce potential contention, free the outdated transaction and + * buffer after releasing the locks. + */ + if (t_outdated) { + struct binder_buffer *buffer = t_outdated->buffer; + + t_outdated->buffer = NULL; + buffer->transaction = NULL; + trace_binder_transaction_update_buffer_release(buffer); + binder_release_entire_buffer(proc, NULL, buffer, false); + binder_alloc_free_buf(&proc->alloc, buffer); + kfree(t_outdated); + binder_stats_deleted(BINDER_STAT_TRANSACTION); + } + + if (oneway && frozen) + return BR_TRANSACTION_PENDING_FROZEN; + + return 0; +} + +/** + * binder_get_node_refs_for_txn() - Get required refs on node for txn + * @node: struct binder_node for which to get refs + * @procp: returns @node->proc if valid + * @error: if no @procp then returns BR_DEAD_REPLY + * + * User-space normally keeps the node alive when creating a transaction + * since it has a reference to the target. The local strong ref keeps it + * alive if the sending process dies before the target process processes + * the transaction. If the source process is malicious or has a reference + * counting bug, relying on the local strong ref can fail. + * + * Since user-space can cause the local strong ref to go away, we also take + * a tmpref on the node to ensure it survives while we are constructing + * the transaction. We also need a tmpref on the proc while we are + * constructing the transaction, so we take that here as well. + * + * Return: The target_node with refs taken or NULL if no @node->proc is NULL. + * Also sets @procp if valid. If the @node->proc is NULL indicating that the + * target proc has died, @error is set to BR_DEAD_REPLY. + */ +static struct binder_node *binder_get_node_refs_for_txn( + struct binder_node *node, + struct binder_proc **procp, + uint32_t *error) +{ + struct binder_node *target_node = NULL; + + binder_node_inner_lock(node); + if (node->proc) { + target_node = node; + binder_inc_node_nilocked(node, 1, 0, NULL); + binder_inc_node_tmpref_ilocked(node); + node->proc->tmp_ref++; + *procp = node->proc; + } else + *error = BR_DEAD_REPLY; + binder_node_inner_unlock(node); + + return target_node; +} + +static void binder_set_txn_from_error(struct binder_transaction *t, int id, + uint32_t command, int32_t param) +{ + struct binder_thread *from = binder_get_txn_from_and_acq_inner(t); + + if (!from) { + /* annotation for sparse */ + __release(&from->proc->inner_lock); + return; + } + + /* don't override existing errors */ + if (from->ee.command == BR_OK) + binder_set_extended_error(&from->ee, id, command, param); + binder_inner_proc_unlock(from->proc); + binder_thread_dec_tmpref(from); +} + +/** + * binder_netlink_report() - report a transaction failure via netlink + * @proc: the binder proc sending the transaction + * @t: the binder transaction that failed + * @data_size: the user provided data size for the transaction + * @error: enum binder_driver_return_protocol returned to sender + */ +static void binder_netlink_report(struct binder_proc *proc, + struct binder_transaction *t, + u32 data_size, + u32 error) +{ + const char *context = proc->context->name; + struct sk_buff *skb; + void *hdr; + + if (!genl_has_listeners(&binder_nl_family, &init_net, + BINDER_NLGRP_REPORT)) + return; + + trace_binder_netlink_report(context, t, data_size, error); + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return; + + hdr = genlmsg_put(skb, 0, 0, &binder_nl_family, 0, BINDER_CMD_REPORT); + if (!hdr) + goto free_skb; + + if (nla_put_u32(skb, BINDER_A_REPORT_ERROR, error) || + nla_put_string(skb, BINDER_A_REPORT_CONTEXT, context) || + nla_put_u32(skb, BINDER_A_REPORT_FROM_PID, t->from_pid) || + nla_put_u32(skb, BINDER_A_REPORT_FROM_TID, t->from_tid)) + goto cancel_skb; + + if (t->to_proc && + nla_put_u32(skb, BINDER_A_REPORT_TO_PID, t->to_proc->pid)) + goto cancel_skb; + + if (t->to_thread && + nla_put_u32(skb, BINDER_A_REPORT_TO_TID, t->to_thread->pid)) + goto cancel_skb; + + if (t->is_reply && nla_put_flag(skb, BINDER_A_REPORT_IS_REPLY)) + goto cancel_skb; + + if (nla_put_u32(skb, BINDER_A_REPORT_FLAGS, t->flags) || + nla_put_u32(skb, BINDER_A_REPORT_CODE, t->code) || + nla_put_u32(skb, BINDER_A_REPORT_DATA_SIZE, data_size)) + goto cancel_skb; + + genlmsg_end(skb, hdr); + genlmsg_multicast(&binder_nl_family, skb, 0, BINDER_NLGRP_REPORT, + GFP_KERNEL); + return; + +cancel_skb: + genlmsg_cancel(skb, hdr); +free_skb: + nlmsg_free(skb); +} + +static void binder_transaction(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_transaction_data *tr, int reply, + binder_size_t extra_buffers_size) +{ + int ret; + struct binder_transaction *t; + struct binder_work *w; + struct binder_work *tcomplete; + binder_size_t buffer_offset = 0; + binder_size_t off_start_offset, off_end_offset; + binder_size_t off_min; + binder_size_t sg_buf_offset, sg_buf_end_offset; + binder_size_t user_offset = 0; + struct binder_proc *target_proc = NULL; + struct binder_thread *target_thread = NULL; + struct binder_node *target_node = NULL; + struct binder_transaction *in_reply_to = NULL; + struct binder_transaction_log_entry *e; + uint32_t return_error = 0; + uint32_t return_error_param = 0; + uint32_t return_error_line = 0; + binder_size_t last_fixup_obj_off = 0; + binder_size_t last_fixup_min_off = 0; + struct binder_context *context = proc->context; + int t_debug_id = atomic_inc_return(&binder_last_id); + ktime_t t_start_time = ktime_get(); + struct lsm_context lsmctx = { }; + struct list_head sgc_head; + struct list_head pf_head; + const void __user *user_buffer = (const void __user *) + (uintptr_t)tr->data.ptr.buffer; + INIT_LIST_HEAD(&sgc_head); + INIT_LIST_HEAD(&pf_head); + + e = binder_transaction_log_add(&binder_transaction_log); + e->debug_id = t_debug_id; + e->call_type = reply ? 2 : !!(tr->flags & TF_ONE_WAY); + e->from_proc = proc->pid; + e->from_thread = thread->pid; + e->target_handle = tr->target.handle; + e->data_size = tr->data_size; + e->offsets_size = tr->offsets_size; + strscpy(e->context_name, proc->context->name, BINDERFS_MAX_NAME); + + binder_inner_proc_lock(proc); + binder_set_extended_error(&thread->ee, t_debug_id, BR_OK, 0); + binder_inner_proc_unlock(proc); + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) { + binder_txn_error("%d:%d cannot allocate transaction\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -ENOMEM; + return_error_line = __LINE__; + goto err_alloc_t_failed; + } + INIT_LIST_HEAD(&t->fd_fixups); + binder_stats_created(BINDER_STAT_TRANSACTION); + spin_lock_init(&t->lock); + t->debug_id = t_debug_id; + t->start_time = t_start_time; + t->from_pid = proc->pid; + t->from_tid = thread->pid; + t->sender_euid = task_euid(proc->tsk); + t->code = tr->code; + t->flags = tr->flags; + t->priority = task_nice(current); + t->work.type = BINDER_WORK_TRANSACTION; + t->is_async = !reply && (tr->flags & TF_ONE_WAY); + t->is_reply = reply; + if (!reply && !(tr->flags & TF_ONE_WAY)) + t->from = thread; + + if (reply) { + binder_inner_proc_lock(proc); + in_reply_to = thread->transaction_stack; + if (in_reply_to == NULL) { + binder_inner_proc_unlock(proc); + binder_user_error("%d:%d got reply transaction with no transaction stack\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; + goto err_empty_call_stack; + } + if (in_reply_to->to_thread != thread) { + spin_lock(&in_reply_to->lock); + binder_user_error("%d:%d got reply transaction with bad transaction stack, transaction %d has target %d:%d\n", + proc->pid, thread->pid, in_reply_to->debug_id, + in_reply_to->to_proc ? + in_reply_to->to_proc->pid : 0, + in_reply_to->to_thread ? + in_reply_to->to_thread->pid : 0); + spin_unlock(&in_reply_to->lock); + binder_inner_proc_unlock(proc); + return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; + in_reply_to = NULL; + goto err_bad_call_stack; + } + thread->transaction_stack = in_reply_to->to_parent; + binder_inner_proc_unlock(proc); + binder_set_nice(in_reply_to->saved_priority); + target_thread = binder_get_txn_from_and_acq_inner(in_reply_to); + if (target_thread == NULL) { + /* annotation for sparse */ + __release(&target_thread->proc->inner_lock); + binder_txn_error("%d:%d reply target not found\n", + thread->pid, proc->pid); + return_error = BR_DEAD_REPLY; + return_error_line = __LINE__; + goto err_dead_binder; + } + if (target_thread->transaction_stack != in_reply_to) { + binder_user_error("%d:%d got reply transaction with bad target transaction stack %d, expected %d\n", + proc->pid, thread->pid, + target_thread->transaction_stack ? + target_thread->transaction_stack->debug_id : 0, + in_reply_to->debug_id); + binder_inner_proc_unlock(target_thread->proc); + return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; + in_reply_to = NULL; + target_thread = NULL; + goto err_dead_binder; + } + target_proc = target_thread->proc; + target_proc->tmp_ref++; + binder_inner_proc_unlock(target_thread->proc); + } else { + if (tr->target.handle) { + struct binder_ref *ref; + + /* + * There must already be a strong ref + * on this node. If so, do a strong + * increment on the node to ensure it + * stays alive until the transaction is + * done. + */ + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, tr->target.handle, + true); + if (ref) { + target_node = binder_get_node_refs_for_txn( + ref->node, &target_proc, + &return_error); + } else { + binder_user_error("%d:%d got transaction to invalid handle, %u\n", + proc->pid, thread->pid, tr->target.handle); + return_error = BR_FAILED_REPLY; + } + binder_proc_unlock(proc); + } else { + mutex_lock(&context->context_mgr_node_lock); + target_node = context->binder_context_mgr_node; + if (target_node) + target_node = binder_get_node_refs_for_txn( + target_node, &target_proc, + &return_error); + else + return_error = BR_DEAD_REPLY; + mutex_unlock(&context->context_mgr_node_lock); + if (target_node && target_proc->pid == proc->pid) { + binder_user_error("%d:%d got transaction to context manager from process owning it\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_invalid_target_handle; + } + } + if (!target_node) { + binder_txn_error("%d:%d cannot find target node\n", + proc->pid, thread->pid); + /* return_error is set above */ + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_dead_binder; + } + e->to_node = target_node->debug_id; + if (WARN_ON(proc == target_proc)) { + binder_txn_error("%d:%d self transactions not allowed\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_invalid_target_handle; + } + if (security_binder_transaction(proc->cred, + target_proc->cred) < 0) { + binder_txn_error("%d:%d transaction credentials failed\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EPERM; + return_error_line = __LINE__; + goto err_invalid_target_handle; + } + binder_inner_proc_lock(proc); + + w = list_first_entry_or_null(&thread->todo, + struct binder_work, entry); + if (!(tr->flags & TF_ONE_WAY) && w && + w->type == BINDER_WORK_TRANSACTION) { + /* + * Do not allow new outgoing transaction from a + * thread that has a transaction at the head of + * its todo list. Only need to check the head + * because binder_select_thread_ilocked picks a + * thread from proc->waiting_threads to enqueue + * the transaction, and nothing is queued to the + * todo list while the thread is on waiting_threads. + */ + binder_user_error("%d:%d new transaction not allowed when there is a transaction on thread todo\n", + proc->pid, thread->pid); + binder_inner_proc_unlock(proc); + return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; + goto err_bad_todo_list; + } + + if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) { + struct binder_transaction *tmp; + + tmp = thread->transaction_stack; + if (tmp->to_thread != thread) { + spin_lock(&tmp->lock); + binder_user_error("%d:%d got new transaction with bad transaction stack, transaction %d has target %d:%d\n", + proc->pid, thread->pid, tmp->debug_id, + tmp->to_proc ? tmp->to_proc->pid : 0, + tmp->to_thread ? + tmp->to_thread->pid : 0); + spin_unlock(&tmp->lock); + binder_inner_proc_unlock(proc); + return_error = BR_FAILED_REPLY; + return_error_param = -EPROTO; + return_error_line = __LINE__; + goto err_bad_call_stack; + } + while (tmp) { + struct binder_thread *from; + + spin_lock(&tmp->lock); + from = tmp->from; + if (from && from->proc == target_proc) { + atomic_inc(&from->tmp_ref); + target_thread = from; + spin_unlock(&tmp->lock); + break; + } + spin_unlock(&tmp->lock); + tmp = tmp->from_parent; + } + } + binder_inner_proc_unlock(proc); + } + + t->to_proc = target_proc; + t->to_thread = target_thread; + if (target_thread) + e->to_thread = target_thread->pid; + e->to_proc = target_proc->pid; + + tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL); + if (tcomplete == NULL) { + binder_txn_error("%d:%d cannot allocate work for transaction\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -ENOMEM; + return_error_line = __LINE__; + goto err_alloc_tcomplete_failed; + } + binder_stats_created(BINDER_STAT_TRANSACTION_COMPLETE); + + if (reply) + binder_debug(BINDER_DEBUG_TRANSACTION, + "%d:%d BC_REPLY %d -> %d:%d, data size %lld-%lld-%lld\n", + proc->pid, thread->pid, t->debug_id, + target_proc->pid, target_thread->pid, + (u64)tr->data_size, (u64)tr->offsets_size, + (u64)extra_buffers_size); + else + binder_debug(BINDER_DEBUG_TRANSACTION, + "%d:%d BC_TRANSACTION %d -> %d - node %d, data size %lld-%lld-%lld\n", + proc->pid, thread->pid, t->debug_id, + target_proc->pid, target_node->debug_id, + (u64)tr->data_size, (u64)tr->offsets_size, + (u64)extra_buffers_size); + + if (target_node && target_node->txn_security_ctx) { + u32 secid; + size_t added_size; + + security_cred_getsecid(proc->cred, &secid); + ret = security_secid_to_secctx(secid, &lsmctx); + if (ret < 0) { + binder_txn_error("%d:%d failed to get security context\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_get_secctx_failed; + } + added_size = ALIGN(lsmctx.len, sizeof(u64)); + extra_buffers_size += added_size; + if (extra_buffers_size < added_size) { + binder_txn_error("%d:%d integer overflow of extra_buffers_size\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_extra_size; + } + } + + trace_binder_transaction(reply, t, target_node); + + t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size, + tr->offsets_size, extra_buffers_size, + !reply && (t->flags & TF_ONE_WAY)); + if (IS_ERR(t->buffer)) { + char *s; + + ret = PTR_ERR(t->buffer); + s = (ret == -ESRCH) ? ": vma cleared, target dead or dying" + : (ret == -ENOSPC) ? ": no space left" + : (ret == -ENOMEM) ? ": memory allocation failed" + : ""; + binder_txn_error("cannot allocate buffer%s", s); + + return_error_param = PTR_ERR(t->buffer); + return_error = return_error_param == -ESRCH ? + BR_DEAD_REPLY : BR_FAILED_REPLY; + return_error_line = __LINE__; + t->buffer = NULL; + goto err_binder_alloc_buf_failed; + } + if (lsmctx.context) { + int err; + size_t buf_offset = ALIGN(tr->data_size, sizeof(void *)) + + ALIGN(tr->offsets_size, sizeof(void *)) + + ALIGN(extra_buffers_size, sizeof(void *)) - + ALIGN(lsmctx.len, sizeof(u64)); + + t->security_ctx = t->buffer->user_data + buf_offset; + err = binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, buf_offset, + lsmctx.context, lsmctx.len); + if (err) { + t->security_ctx = 0; + WARN_ON(1); + } + security_release_secctx(&lsmctx); + lsmctx.context = NULL; + } + t->buffer->debug_id = t->debug_id; + t->buffer->transaction = t; + t->buffer->target_node = target_node; + t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF); + trace_binder_transaction_alloc_buf(t->buffer); + + if (binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, + ALIGN(tr->data_size, sizeof(void *)), + (const void __user *) + (uintptr_t)tr->data.ptr.offsets, + tr->offsets_size)) { + binder_user_error("%d:%d got transaction with invalid offsets ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } + if (!IS_ALIGNED(tr->offsets_size, sizeof(binder_size_t))) { + binder_user_error("%d:%d got transaction with invalid offsets size, %lld\n", + proc->pid, thread->pid, (u64)tr->offsets_size); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_offset; + } + if (!IS_ALIGNED(extra_buffers_size, sizeof(u64))) { + binder_user_error("%d:%d got transaction with unaligned buffers size, %lld\n", + proc->pid, thread->pid, + (u64)extra_buffers_size); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_offset; + } + off_start_offset = ALIGN(tr->data_size, sizeof(void *)); + buffer_offset = off_start_offset; + off_end_offset = off_start_offset + tr->offsets_size; + sg_buf_offset = ALIGN(off_end_offset, sizeof(void *)); + sg_buf_end_offset = sg_buf_offset + extra_buffers_size - + ALIGN(lsmctx.len, sizeof(u64)); + off_min = 0; + for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; + buffer_offset += sizeof(binder_size_t)) { + struct binder_object_header *hdr; + size_t object_size; + struct binder_object object; + binder_size_t object_offset; + binder_size_t copy_size; + + if (binder_alloc_copy_from_buffer(&target_proc->alloc, + &object_offset, + t->buffer, + buffer_offset, + sizeof(object_offset))) { + binder_txn_error("%d:%d copy offset from buffer failed\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_offset; + } + + /* + * Copy the source user buffer up to the next object + * that will be processed. + */ + copy_size = object_offset - user_offset; + if (copy_size && (user_offset > object_offset || + object_offset > tr->data_size || + binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + copy_size))) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } + object_size = binder_get_object(target_proc, user_buffer, + t->buffer, object_offset, &object); + if (object_size == 0 || object_offset < off_min) { + binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n", + proc->pid, thread->pid, + (u64)object_offset, + (u64)off_min, + (u64)t->buffer->data_size); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_offset; + } + /* + * Set offset to the next buffer fragment to be + * copied + */ + user_offset = object_offset + object_size; + + hdr = &object.hdr; + off_min = object_offset + object_size; + switch (hdr->type) { + case BINDER_TYPE_BINDER: + case BINDER_TYPE_WEAK_BINDER: { + struct flat_binder_object *fp; + + fp = to_flat_binder_object(hdr); + ret = binder_translate_binder(fp, t, thread); + + if (ret < 0 || + binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, + object_offset, + fp, sizeof(*fp))) { + binder_txn_error("%d:%d translate binder failed\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_translate_failed; + } + } break; + case BINDER_TYPE_HANDLE: + case BINDER_TYPE_WEAK_HANDLE: { + struct flat_binder_object *fp; + + fp = to_flat_binder_object(hdr); + ret = binder_translate_handle(fp, t, thread); + if (ret < 0 || + binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, + object_offset, + fp, sizeof(*fp))) { + binder_txn_error("%d:%d translate handle failed\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_translate_failed; + } + } break; + + case BINDER_TYPE_FD: { + struct binder_fd_object *fp = to_binder_fd_object(hdr); + binder_size_t fd_offset = object_offset + + (uintptr_t)&fp->fd - (uintptr_t)fp; + int ret = binder_translate_fd(fp->fd, fd_offset, t, + thread, in_reply_to); + + fp->pad_binder = 0; + if (ret < 0 || + binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, + object_offset, + fp, sizeof(*fp))) { + binder_txn_error("%d:%d translate fd failed\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_translate_failed; + } + } break; + case BINDER_TYPE_FDA: { + struct binder_object ptr_object; + binder_size_t parent_offset; + struct binder_object user_object; + size_t user_parent_size; + struct binder_fd_array_object *fda = + to_binder_fd_array_object(hdr); + size_t num_valid = (buffer_offset - off_start_offset) / + sizeof(binder_size_t); + struct binder_buffer_object *parent = + binder_validate_ptr(target_proc, t->buffer, + &ptr_object, fda->parent, + off_start_offset, + &parent_offset, + num_valid); + if (!parent) { + binder_user_error("%d:%d got transaction with invalid parent offset or type\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_parent; + } + if (!binder_validate_fixup(target_proc, t->buffer, + off_start_offset, + parent_offset, + fda->parent_offset, + last_fixup_obj_off, + last_fixup_min_off)) { + binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_parent; + } + /* + * We need to read the user version of the parent + * object to get the original user offset + */ + user_parent_size = + binder_get_object(proc, user_buffer, t->buffer, + parent_offset, &user_object); + if (user_parent_size != sizeof(user_object.bbo)) { + binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n", + proc->pid, thread->pid, + user_parent_size, + sizeof(user_object.bbo)); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_parent; + } + ret = binder_translate_fd_array(&pf_head, fda, + user_buffer, parent, + &user_object.bbo, t, + thread, in_reply_to); + if (!ret) + ret = binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, + object_offset, + fda, sizeof(*fda)); + if (ret) { + binder_txn_error("%d:%d translate fd array failed\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret > 0 ? -EINVAL : ret; + return_error_line = __LINE__; + goto err_translate_failed; + } + last_fixup_obj_off = parent_offset; + last_fixup_min_off = + fda->parent_offset + sizeof(u32) * fda->num_fds; + } break; + case BINDER_TYPE_PTR: { + struct binder_buffer_object *bp = + to_binder_buffer_object(hdr); + size_t buf_left = sg_buf_end_offset - sg_buf_offset; + size_t num_valid; + + if (bp->length > buf_left) { + binder_user_error("%d:%d got transaction with too large buffer\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_offset; + } + ret = binder_defer_copy(&sgc_head, sg_buf_offset, + (const void __user *)(uintptr_t)bp->buffer, + bp->length); + if (ret) { + binder_txn_error("%d:%d deferred copy failed\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_translate_failed; + } + /* Fixup buffer pointer to target proc address space */ + bp->buffer = t->buffer->user_data + sg_buf_offset; + sg_buf_offset += ALIGN(bp->length, sizeof(u64)); + + num_valid = (buffer_offset - off_start_offset) / + sizeof(binder_size_t); + ret = binder_fixup_parent(&pf_head, t, + thread, bp, + off_start_offset, + num_valid, + last_fixup_obj_off, + last_fixup_min_off); + if (ret < 0 || + binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, + object_offset, + bp, sizeof(*bp))) { + binder_txn_error("%d:%d failed to fixup parent\n", + thread->pid, proc->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_translate_failed; + } + last_fixup_obj_off = object_offset; + last_fixup_min_off = 0; + } break; + default: + binder_user_error("%d:%d got transaction with invalid object type, %x\n", + proc->pid, thread->pid, hdr->type); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_object_type; + } + } + /* Done processing objects, copy the rest of the buffer */ + if (binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + tr->data_size - user_offset)) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } + + ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer, + &sgc_head, &pf_head); + if (ret) { + binder_user_error("%d:%d got transaction with invalid offsets ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_copy_data_failed; + } + if (t->buffer->oneway_spam_suspect) { + tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT; + binder_netlink_report(proc, t, tr->data_size, + BR_ONEWAY_SPAM_SUSPECT); + } else { + tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; + } + + if (reply) { + binder_enqueue_thread_work(thread, tcomplete); + binder_inner_proc_lock(target_proc); + if (target_thread->is_dead) { + return_error = BR_DEAD_REPLY; + binder_inner_proc_unlock(target_proc); + goto err_dead_proc_or_thread; + } + BUG_ON(t->buffer->async_transaction != 0); + binder_pop_transaction_ilocked(target_thread, in_reply_to); + binder_enqueue_thread_work_ilocked(target_thread, &t->work); + target_proc->outstanding_txns++; + binder_inner_proc_unlock(target_proc); + wake_up_interruptible_sync(&target_thread->wait); + binder_free_transaction(in_reply_to); + } else if (!(t->flags & TF_ONE_WAY)) { + BUG_ON(t->buffer->async_transaction != 0); + binder_inner_proc_lock(proc); + /* + * Defer the TRANSACTION_COMPLETE, so we don't return to + * userspace immediately; this allows the target process to + * immediately start processing this transaction, reducing + * latency. We will then return the TRANSACTION_COMPLETE when + * the target replies (or there is an error). + */ + binder_enqueue_deferred_thread_work_ilocked(thread, tcomplete); + t->from_parent = thread->transaction_stack; + thread->transaction_stack = t; + binder_inner_proc_unlock(proc); + return_error = binder_proc_transaction(t, + target_proc, target_thread); + if (return_error) { + binder_inner_proc_lock(proc); + binder_pop_transaction_ilocked(thread, t); + binder_inner_proc_unlock(proc); + goto err_dead_proc_or_thread; + } + } else { + BUG_ON(target_node == NULL); + BUG_ON(t->buffer->async_transaction != 1); + return_error = binder_proc_transaction(t, target_proc, NULL); + /* + * Let the caller know when async transaction reaches a frozen + * process and is put in a pending queue, waiting for the target + * process to be unfrozen. + */ + if (return_error == BR_TRANSACTION_PENDING_FROZEN) { + tcomplete->type = BINDER_WORK_TRANSACTION_PENDING; + binder_netlink_report(proc, t, tr->data_size, + return_error); + } + binder_enqueue_thread_work(thread, tcomplete); + if (return_error && + return_error != BR_TRANSACTION_PENDING_FROZEN) + goto err_dead_proc_or_thread; + } + if (target_thread) + binder_thread_dec_tmpref(target_thread); + binder_proc_dec_tmpref(target_proc); + if (target_node) + binder_dec_node_tmpref(target_node); + /* + * write barrier to synchronize with initialization + * of log entry + */ + smp_wmb(); + WRITE_ONCE(e->debug_id_done, t_debug_id); + return; + +err_dead_proc_or_thread: + binder_txn_error("%d:%d dead process or thread\n", + thread->pid, proc->pid); + return_error_line = __LINE__; + binder_dequeue_work(proc, tcomplete); +err_translate_failed: +err_bad_object_type: +err_bad_offset: +err_bad_parent: +err_copy_data_failed: + binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head); + binder_free_txn_fixups(t); + trace_binder_transaction_failed_buffer_release(t->buffer); + binder_transaction_buffer_release(target_proc, NULL, t->buffer, + buffer_offset, true); + if (target_node) + binder_dec_node_tmpref(target_node); + target_node = NULL; + t->buffer->transaction = NULL; + binder_alloc_free_buf(&target_proc->alloc, t->buffer); +err_binder_alloc_buf_failed: +err_bad_extra_size: + if (lsmctx.context) + security_release_secctx(&lsmctx); +err_get_secctx_failed: + kfree(tcomplete); + binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); +err_alloc_tcomplete_failed: + if (trace_binder_txn_latency_free_enabled()) + binder_txn_latency_free(t); +err_bad_todo_list: +err_bad_call_stack: +err_empty_call_stack: +err_dead_binder: +err_invalid_target_handle: + if (target_node) { + binder_dec_node(target_node, 1, 0); + binder_dec_node_tmpref(target_node); + } + + binder_netlink_report(proc, t, tr->data_size, return_error); + kfree(t); + binder_stats_deleted(BINDER_STAT_TRANSACTION); +err_alloc_t_failed: + + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, + "%d:%d transaction %s to %d:%d failed %d/%d/%d, code %u size %lld-%lld line %d\n", + proc->pid, thread->pid, reply ? "reply" : + (tr->flags & TF_ONE_WAY ? "async" : "call"), + target_proc ? target_proc->pid : 0, + target_thread ? target_thread->pid : 0, + t_debug_id, return_error, return_error_param, + tr->code, (u64)tr->data_size, (u64)tr->offsets_size, + return_error_line); + + if (target_thread) + binder_thread_dec_tmpref(target_thread); + if (target_proc) + binder_proc_dec_tmpref(target_proc); + + { + struct binder_transaction_log_entry *fe; + + e->return_error = return_error; + e->return_error_param = return_error_param; + e->return_error_line = return_error_line; + fe = binder_transaction_log_add(&binder_transaction_log_failed); + *fe = *e; + /* + * write barrier to synchronize with initialization + * of log entry + */ + smp_wmb(); + WRITE_ONCE(e->debug_id_done, t_debug_id); + WRITE_ONCE(fe->debug_id_done, t_debug_id); + } + + BUG_ON(thread->return_error.cmd != BR_OK); + if (in_reply_to) { + binder_set_txn_from_error(in_reply_to, t_debug_id, + return_error, return_error_param); + thread->return_error.cmd = BR_TRANSACTION_COMPLETE; + binder_enqueue_thread_work(thread, &thread->return_error.work); + binder_send_failed_reply(in_reply_to, return_error); + } else { + binder_inner_proc_lock(proc); + binder_set_extended_error(&thread->ee, t_debug_id, + return_error, return_error_param); + binder_inner_proc_unlock(proc); + thread->return_error.cmd = return_error; + binder_enqueue_thread_work(thread, &thread->return_error.work); + } +} + +static int +binder_request_freeze_notification(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_handle_cookie *handle_cookie) +{ + struct binder_ref_freeze *freeze; + struct binder_ref *ref; + + freeze = kzalloc(sizeof(*freeze), GFP_KERNEL); + if (!freeze) + return -ENOMEM; + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, handle_cookie->handle, false); + if (!ref) { + binder_user_error("%d:%d BC_REQUEST_FREEZE_NOTIFICATION invalid ref %d\n", + proc->pid, thread->pid, handle_cookie->handle); + binder_proc_unlock(proc); + kfree(freeze); + return -EINVAL; + } + + binder_node_lock(ref->node); + if (ref->freeze) { + binder_user_error("%d:%d BC_REQUEST_FREEZE_NOTIFICATION already set\n", + proc->pid, thread->pid); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + kfree(freeze); + return -EINVAL; + } + + binder_stats_created(BINDER_STAT_FREEZE); + INIT_LIST_HEAD(&freeze->work.entry); + freeze->cookie = handle_cookie->cookie; + freeze->work.type = BINDER_WORK_FROZEN_BINDER; + ref->freeze = freeze; + + if (ref->node->proc) { + binder_inner_proc_lock(ref->node->proc); + freeze->is_frozen = ref->node->proc->is_frozen; + binder_inner_proc_unlock(ref->node->proc); + + binder_inner_proc_lock(proc); + binder_enqueue_work_ilocked(&freeze->work, &proc->todo); + binder_wakeup_proc_ilocked(proc); + binder_inner_proc_unlock(proc); + } + + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + return 0; +} + +static int +binder_clear_freeze_notification(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_handle_cookie *handle_cookie) +{ + struct binder_ref_freeze *freeze; + struct binder_ref *ref; + + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, handle_cookie->handle, false); + if (!ref) { + binder_user_error("%d:%d BC_CLEAR_FREEZE_NOTIFICATION invalid ref %d\n", + proc->pid, thread->pid, handle_cookie->handle); + binder_proc_unlock(proc); + return -EINVAL; + } + + binder_node_lock(ref->node); + + if (!ref->freeze) { + binder_user_error("%d:%d BC_CLEAR_FREEZE_NOTIFICATION freeze notification not active\n", + proc->pid, thread->pid); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + return -EINVAL; + } + freeze = ref->freeze; + binder_inner_proc_lock(proc); + if (freeze->cookie != handle_cookie->cookie) { + binder_user_error("%d:%d BC_CLEAR_FREEZE_NOTIFICATION freeze notification cookie mismatch %016llx != %016llx\n", + proc->pid, thread->pid, (u64)freeze->cookie, + (u64)handle_cookie->cookie); + binder_inner_proc_unlock(proc); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + return -EINVAL; + } + ref->freeze = NULL; + /* + * Take the existing freeze object and overwrite its work type. There are three cases here: + * 1. No pending notification. In this case just add the work to the queue. + * 2. A notification was sent and is pending an ack from userspace. Once an ack arrives, we + * should resend with the new work type. + * 3. A notification is pending to be sent. Since the work is already in the queue, nothing + * needs to be done here. + */ + freeze->work.type = BINDER_WORK_CLEAR_FREEZE_NOTIFICATION; + if (list_empty(&freeze->work.entry)) { + binder_enqueue_work_ilocked(&freeze->work, &proc->todo); + binder_wakeup_proc_ilocked(proc); + } else if (freeze->sent) { + freeze->resend = true; + } + binder_inner_proc_unlock(proc); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + return 0; +} + +static int +binder_freeze_notification_done(struct binder_proc *proc, + struct binder_thread *thread, + binder_uintptr_t cookie) +{ + struct binder_ref_freeze *freeze = NULL; + struct binder_work *w; + + binder_inner_proc_lock(proc); + list_for_each_entry(w, &proc->delivered_freeze, entry) { + struct binder_ref_freeze *tmp_freeze = + container_of(w, struct binder_ref_freeze, work); + + if (tmp_freeze->cookie == cookie) { + freeze = tmp_freeze; + break; + } + } + if (!freeze) { + binder_user_error("%d:%d BC_FREEZE_NOTIFICATION_DONE %016llx not found\n", + proc->pid, thread->pid, (u64)cookie); + binder_inner_proc_unlock(proc); + return -EINVAL; + } + binder_dequeue_work_ilocked(&freeze->work); + freeze->sent = false; + if (freeze->resend) { + freeze->resend = false; + binder_enqueue_work_ilocked(&freeze->work, &proc->todo); + binder_wakeup_proc_ilocked(proc); + } + binder_inner_proc_unlock(proc); + return 0; +} + +/** + * binder_free_buf() - free the specified buffer + * @proc: binder proc that owns buffer + * @thread: binder thread performing the buffer release + * @buffer: buffer to be freed + * @is_failure: failed to send transaction + * + * If the buffer is for an async transaction, enqueue the next async + * transaction from the node. + * + * Cleanup the buffer and free it. + */ +static void +binder_free_buf(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_buffer *buffer, bool is_failure) +{ + binder_inner_proc_lock(proc); + if (buffer->transaction) { + buffer->transaction->buffer = NULL; + buffer->transaction = NULL; + } + binder_inner_proc_unlock(proc); + if (buffer->async_transaction && buffer->target_node) { + struct binder_node *buf_node; + struct binder_work *w; + + buf_node = buffer->target_node; + binder_node_inner_lock(buf_node); + BUG_ON(!buf_node->has_async_transaction); + BUG_ON(buf_node->proc != proc); + w = binder_dequeue_work_head_ilocked( + &buf_node->async_todo); + if (!w) { + buf_node->has_async_transaction = false; + } else { + binder_enqueue_work_ilocked( + w, &proc->todo); + binder_wakeup_proc_ilocked(proc); + } + binder_node_inner_unlock(buf_node); + } + trace_binder_transaction_buffer_release(buffer); + binder_release_entire_buffer(proc, thread, buffer, is_failure); + binder_alloc_free_buf(&proc->alloc, buffer); +} + +static int binder_thread_write(struct binder_proc *proc, + struct binder_thread *thread, + binder_uintptr_t binder_buffer, size_t size, + binder_size_t *consumed) +{ + uint32_t cmd; + struct binder_context *context = proc->context; + void __user *buffer = (void __user *)(uintptr_t)binder_buffer; + void __user *ptr = buffer + *consumed; + void __user *end = buffer + size; + + while (ptr < end && thread->return_error.cmd == BR_OK) { + int ret; + + if (get_user(cmd, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + trace_binder_command(cmd); + if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) { + atomic_inc(&binder_stats.bc[_IOC_NR(cmd)]); + atomic_inc(&proc->stats.bc[_IOC_NR(cmd)]); + atomic_inc(&thread->stats.bc[_IOC_NR(cmd)]); + } + switch (cmd) { + case BC_INCREFS: + case BC_ACQUIRE: + case BC_RELEASE: + case BC_DECREFS: { + uint32_t target; + const char *debug_string; + bool strong = cmd == BC_ACQUIRE || cmd == BC_RELEASE; + bool increment = cmd == BC_INCREFS || cmd == BC_ACQUIRE; + struct binder_ref_data rdata; + + if (get_user(target, (uint32_t __user *)ptr)) + return -EFAULT; + + ptr += sizeof(uint32_t); + ret = -1; + if (increment && !target) { + struct binder_node *ctx_mgr_node; + + mutex_lock(&context->context_mgr_node_lock); + ctx_mgr_node = context->binder_context_mgr_node; + if (ctx_mgr_node) { + if (ctx_mgr_node->proc == proc) { + binder_user_error("%d:%d context manager tried to acquire desc 0\n", + proc->pid, thread->pid); + mutex_unlock(&context->context_mgr_node_lock); + return -EINVAL; + } + ret = binder_inc_ref_for_node( + proc, ctx_mgr_node, + strong, NULL, &rdata); + } + mutex_unlock(&context->context_mgr_node_lock); + } + if (ret) + ret = binder_update_ref_for_handle( + proc, target, increment, strong, + &rdata); + if (!ret && rdata.desc != target) { + binder_user_error("%d:%d tried to acquire reference to desc %d, got %d instead\n", + proc->pid, thread->pid, + target, rdata.desc); + } + switch (cmd) { + case BC_INCREFS: + debug_string = "IncRefs"; + break; + case BC_ACQUIRE: + debug_string = "Acquire"; + break; + case BC_RELEASE: + debug_string = "Release"; + break; + case BC_DECREFS: + default: + debug_string = "DecRefs"; + break; + } + if (ret) { + binder_user_error("%d:%d %s %d refcount change on invalid ref %d ret %d\n", + proc->pid, thread->pid, debug_string, + strong, target, ret); + break; + } + binder_debug(BINDER_DEBUG_USER_REFS, + "%d:%d %s ref %d desc %d s %d w %d\n", + proc->pid, thread->pid, debug_string, + rdata.debug_id, rdata.desc, rdata.strong, + rdata.weak); + break; + } + case BC_INCREFS_DONE: + case BC_ACQUIRE_DONE: { + binder_uintptr_t node_ptr; + binder_uintptr_t cookie; + struct binder_node *node; + bool free_node; + + if (get_user(node_ptr, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + if (get_user(cookie, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + node = binder_get_node(proc, node_ptr); + if (node == NULL) { + binder_user_error("%d:%d %s u%016llx no match\n", + proc->pid, thread->pid, + cmd == BC_INCREFS_DONE ? + "BC_INCREFS_DONE" : + "BC_ACQUIRE_DONE", + (u64)node_ptr); + break; + } + if (cookie != node->cookie) { + binder_user_error("%d:%d %s u%016llx node %d cookie mismatch %016llx != %016llx\n", + proc->pid, thread->pid, + cmd == BC_INCREFS_DONE ? + "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", + (u64)node_ptr, node->debug_id, + (u64)cookie, (u64)node->cookie); + binder_put_node(node); + break; + } + binder_node_inner_lock(node); + if (cmd == BC_ACQUIRE_DONE) { + if (node->pending_strong_ref == 0) { + binder_user_error("%d:%d BC_ACQUIRE_DONE node %d has no pending acquire request\n", + proc->pid, thread->pid, + node->debug_id); + binder_node_inner_unlock(node); + binder_put_node(node); + break; + } + node->pending_strong_ref = 0; + } else { + if (node->pending_weak_ref == 0) { + binder_user_error("%d:%d BC_INCREFS_DONE node %d has no pending increfs request\n", + proc->pid, thread->pid, + node->debug_id); + binder_node_inner_unlock(node); + binder_put_node(node); + break; + } + node->pending_weak_ref = 0; + } + free_node = binder_dec_node_nilocked(node, + cmd == BC_ACQUIRE_DONE, 0); + WARN_ON(free_node); + binder_debug(BINDER_DEBUG_USER_REFS, + "%d:%d %s node %d ls %d lw %d tr %d\n", + proc->pid, thread->pid, + cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", + node->debug_id, node->local_strong_refs, + node->local_weak_refs, node->tmp_refs); + binder_node_inner_unlock(node); + binder_put_node(node); + break; + } + case BC_ATTEMPT_ACQUIRE: + pr_err("BC_ATTEMPT_ACQUIRE not supported\n"); + return -EINVAL; + case BC_ACQUIRE_RESULT: + pr_err("BC_ACQUIRE_RESULT not supported\n"); + return -EINVAL; + + case BC_FREE_BUFFER: { + binder_uintptr_t data_ptr; + struct binder_buffer *buffer; + + if (get_user(data_ptr, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + + buffer = binder_alloc_prepare_to_free(&proc->alloc, + data_ptr); + if (IS_ERR_OR_NULL(buffer)) { + if (PTR_ERR(buffer) == -EPERM) { + binder_user_error( + "%d:%d BC_FREE_BUFFER matched unreturned or currently freeing buffer at offset %lx\n", + proc->pid, thread->pid, + (unsigned long)data_ptr - proc->alloc.vm_start); + } else { + binder_user_error( + "%d:%d BC_FREE_BUFFER no match for buffer at offset %lx\n", + proc->pid, thread->pid, + (unsigned long)data_ptr - proc->alloc.vm_start); + } + break; + } + binder_debug(BINDER_DEBUG_FREE_BUFFER, + "%d:%d BC_FREE_BUFFER at offset %lx found buffer %d for %s transaction\n", + proc->pid, thread->pid, + (unsigned long)data_ptr - proc->alloc.vm_start, + buffer->debug_id, + buffer->transaction ? "active" : "finished"); + binder_free_buf(proc, thread, buffer, false); + break; + } + + case BC_TRANSACTION_SG: + case BC_REPLY_SG: { + struct binder_transaction_data_sg tr; + + if (copy_from_user(&tr, ptr, sizeof(tr))) + return -EFAULT; + ptr += sizeof(tr); + binder_transaction(proc, thread, &tr.transaction_data, + cmd == BC_REPLY_SG, tr.buffers_size); + break; + } + case BC_TRANSACTION: + case BC_REPLY: { + struct binder_transaction_data tr; + + if (copy_from_user(&tr, ptr, sizeof(tr))) + return -EFAULT; + ptr += sizeof(tr); + binder_transaction(proc, thread, &tr, + cmd == BC_REPLY, 0); + break; + } + + case BC_REGISTER_LOOPER: + binder_debug(BINDER_DEBUG_THREADS, + "%d:%d BC_REGISTER_LOOPER\n", + proc->pid, thread->pid); + binder_inner_proc_lock(proc); + if (thread->looper & BINDER_LOOPER_STATE_ENTERED) { + thread->looper |= BINDER_LOOPER_STATE_INVALID; + binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called after BC_ENTER_LOOPER\n", + proc->pid, thread->pid); + } else if (proc->requested_threads == 0) { + thread->looper |= BINDER_LOOPER_STATE_INVALID; + binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called without request\n", + proc->pid, thread->pid); + } else { + proc->requested_threads--; + proc->requested_threads_started++; + } + thread->looper |= BINDER_LOOPER_STATE_REGISTERED; + binder_inner_proc_unlock(proc); + break; + case BC_ENTER_LOOPER: + binder_debug(BINDER_DEBUG_THREADS, + "%d:%d BC_ENTER_LOOPER\n", + proc->pid, thread->pid); + if (thread->looper & BINDER_LOOPER_STATE_REGISTERED) { + thread->looper |= BINDER_LOOPER_STATE_INVALID; + binder_user_error("%d:%d ERROR: BC_ENTER_LOOPER called after BC_REGISTER_LOOPER\n", + proc->pid, thread->pid); + } + thread->looper |= BINDER_LOOPER_STATE_ENTERED; + break; + case BC_EXIT_LOOPER: + binder_debug(BINDER_DEBUG_THREADS, + "%d:%d BC_EXIT_LOOPER\n", + proc->pid, thread->pid); + thread->looper |= BINDER_LOOPER_STATE_EXITED; + break; + + case BC_REQUEST_DEATH_NOTIFICATION: + case BC_CLEAR_DEATH_NOTIFICATION: { + uint32_t target; + binder_uintptr_t cookie; + struct binder_ref *ref; + struct binder_ref_death *death = NULL; + + if (get_user(target, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + if (get_user(cookie, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { + /* + * Allocate memory for death notification + * before taking lock + */ + death = kzalloc(sizeof(*death), GFP_KERNEL); + if (death == NULL) { + WARN_ON(thread->return_error.cmd != + BR_OK); + thread->return_error.cmd = BR_ERROR; + binder_enqueue_thread_work( + thread, + &thread->return_error.work); + binder_debug( + BINDER_DEBUG_FAILED_TRANSACTION, + "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", + proc->pid, thread->pid); + break; + } + } + binder_proc_lock(proc); + ref = binder_get_ref_olocked(proc, target, false); + if (ref == NULL) { + binder_user_error("%d:%d %s invalid ref %d\n", + proc->pid, thread->pid, + cmd == BC_REQUEST_DEATH_NOTIFICATION ? + "BC_REQUEST_DEATH_NOTIFICATION" : + "BC_CLEAR_DEATH_NOTIFICATION", + target); + binder_proc_unlock(proc); + kfree(death); + break; + } + + binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, + "%d:%d %s %016llx ref %d desc %d s %d w %d for node %d\n", + proc->pid, thread->pid, + cmd == BC_REQUEST_DEATH_NOTIFICATION ? + "BC_REQUEST_DEATH_NOTIFICATION" : + "BC_CLEAR_DEATH_NOTIFICATION", + (u64)cookie, ref->data.debug_id, + ref->data.desc, ref->data.strong, + ref->data.weak, ref->node->debug_id); + + binder_node_lock(ref->node); + if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { + if (ref->death) { + binder_user_error("%d:%d BC_REQUEST_DEATH_NOTIFICATION death notification already set\n", + proc->pid, thread->pid); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + kfree(death); + break; + } + binder_stats_created(BINDER_STAT_DEATH); + INIT_LIST_HEAD(&death->work.entry); + death->cookie = cookie; + ref->death = death; + if (ref->node->proc == NULL) { + ref->death->work.type = BINDER_WORK_DEAD_BINDER; + + binder_inner_proc_lock(proc); + binder_enqueue_work_ilocked( + &ref->death->work, &proc->todo); + binder_wakeup_proc_ilocked(proc); + binder_inner_proc_unlock(proc); + } + } else { + if (ref->death == NULL) { + binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n", + proc->pid, thread->pid); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + break; + } + death = ref->death; + if (death->cookie != cookie) { + binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch %016llx != %016llx\n", + proc->pid, thread->pid, + (u64)death->cookie, + (u64)cookie); + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + break; + } + ref->death = NULL; + binder_inner_proc_lock(proc); + if (list_empty(&death->work.entry)) { + death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION; + if (thread->looper & + (BINDER_LOOPER_STATE_REGISTERED | + BINDER_LOOPER_STATE_ENTERED)) + binder_enqueue_thread_work_ilocked( + thread, + &death->work); + else { + binder_enqueue_work_ilocked( + &death->work, + &proc->todo); + binder_wakeup_proc_ilocked( + proc); + } + } else { + BUG_ON(death->work.type != BINDER_WORK_DEAD_BINDER); + death->work.type = BINDER_WORK_DEAD_BINDER_AND_CLEAR; + } + binder_inner_proc_unlock(proc); + } + binder_node_unlock(ref->node); + binder_proc_unlock(proc); + } break; + case BC_DEAD_BINDER_DONE: { + struct binder_work *w; + binder_uintptr_t cookie; + struct binder_ref_death *death = NULL; + + if (get_user(cookie, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + + ptr += sizeof(cookie); + binder_inner_proc_lock(proc); + list_for_each_entry(w, &proc->delivered_death, + entry) { + struct binder_ref_death *tmp_death = + container_of(w, + struct binder_ref_death, + work); + + if (tmp_death->cookie == cookie) { + death = tmp_death; + break; + } + } + binder_debug(BINDER_DEBUG_DEAD_BINDER, + "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n", + proc->pid, thread->pid, (u64)cookie, + death); + if (death == NULL) { + binder_user_error("%d:%d BC_DEAD_BINDER_DONE %016llx not found\n", + proc->pid, thread->pid, (u64)cookie); + binder_inner_proc_unlock(proc); + break; + } + binder_dequeue_work_ilocked(&death->work); + if (death->work.type == BINDER_WORK_DEAD_BINDER_AND_CLEAR) { + death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION; + if (thread->looper & + (BINDER_LOOPER_STATE_REGISTERED | + BINDER_LOOPER_STATE_ENTERED)) + binder_enqueue_thread_work_ilocked( + thread, &death->work); + else { + binder_enqueue_work_ilocked( + &death->work, + &proc->todo); + binder_wakeup_proc_ilocked(proc); + } + } + binder_inner_proc_unlock(proc); + } break; + + case BC_REQUEST_FREEZE_NOTIFICATION: { + struct binder_handle_cookie handle_cookie; + int error; + + if (copy_from_user(&handle_cookie, ptr, sizeof(handle_cookie))) + return -EFAULT; + ptr += sizeof(handle_cookie); + error = binder_request_freeze_notification(proc, thread, + &handle_cookie); + if (error) + return error; + } break; + + case BC_CLEAR_FREEZE_NOTIFICATION: { + struct binder_handle_cookie handle_cookie; + int error; + + if (copy_from_user(&handle_cookie, ptr, sizeof(handle_cookie))) + return -EFAULT; + ptr += sizeof(handle_cookie); + error = binder_clear_freeze_notification(proc, thread, &handle_cookie); + if (error) + return error; + } break; + + case BC_FREEZE_NOTIFICATION_DONE: { + binder_uintptr_t cookie; + int error; + + if (get_user(cookie, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + + ptr += sizeof(cookie); + error = binder_freeze_notification_done(proc, thread, cookie); + if (error) + return error; + } break; + + default: + pr_err("%d:%d unknown command %u\n", + proc->pid, thread->pid, cmd); + return -EINVAL; + } + *consumed = ptr - buffer; + } + return 0; +} + +static void binder_stat_br(struct binder_proc *proc, + struct binder_thread *thread, uint32_t cmd) +{ + trace_binder_return(cmd); + if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) { + atomic_inc(&binder_stats.br[_IOC_NR(cmd)]); + atomic_inc(&proc->stats.br[_IOC_NR(cmd)]); + atomic_inc(&thread->stats.br[_IOC_NR(cmd)]); + } +} + +static int binder_put_node_cmd(struct binder_proc *proc, + struct binder_thread *thread, + void __user **ptrp, + binder_uintptr_t node_ptr, + binder_uintptr_t node_cookie, + int node_debug_id, + uint32_t cmd, const char *cmd_name) +{ + void __user *ptr = *ptrp; + + if (put_user(cmd, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + + if (put_user(node_ptr, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + + if (put_user(node_cookie, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + + binder_stat_br(proc, thread, cmd); + binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s %d u%016llx c%016llx\n", + proc->pid, thread->pid, cmd_name, node_debug_id, + (u64)node_ptr, (u64)node_cookie); + + *ptrp = ptr; + return 0; +} + +static int binder_wait_for_work(struct binder_thread *thread, + bool do_proc_work) +{ + DEFINE_WAIT(wait); + struct binder_proc *proc = thread->proc; + int ret = 0; + + binder_inner_proc_lock(proc); + for (;;) { + prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE); + if (binder_has_work_ilocked(thread, do_proc_work)) + break; + if (do_proc_work) + list_add(&thread->waiting_thread_node, + &proc->waiting_threads); + binder_inner_proc_unlock(proc); + schedule(); + binder_inner_proc_lock(proc); + list_del_init(&thread->waiting_thread_node); + if (signal_pending(current)) { + ret = -EINTR; + break; + } + } + finish_wait(&thread->wait, &wait); + binder_inner_proc_unlock(proc); + + return ret; +} + +/** + * binder_apply_fd_fixups() - finish fd translation + * @proc: binder_proc associated @t->buffer + * @t: binder transaction with list of fd fixups + * + * Now that we are in the context of the transaction target + * process, we can allocate and install fds. Process the + * list of fds to translate and fixup the buffer with the + * new fds first and only then install the files. + * + * If we fail to allocate an fd, skip the install and release + * any fds that have already been allocated. + * + * Return: 0 on success, a negative errno code on failure. + */ +static int binder_apply_fd_fixups(struct binder_proc *proc, + struct binder_transaction *t) +{ + struct binder_txn_fd_fixup *fixup, *tmp; + int ret = 0; + + list_for_each_entry(fixup, &t->fd_fixups, fixup_entry) { + int fd = get_unused_fd_flags(O_CLOEXEC); + + if (fd < 0) { + binder_debug(BINDER_DEBUG_TRANSACTION, + "failed fd fixup txn %d fd %d\n", + t->debug_id, fd); + ret = -ENOMEM; + goto err; + } + binder_debug(BINDER_DEBUG_TRANSACTION, + "fd fixup txn %d fd %d\n", + t->debug_id, fd); + trace_binder_transaction_fd_recv(t, fd, fixup->offset); + fixup->target_fd = fd; + if (binder_alloc_copy_to_buffer(&proc->alloc, t->buffer, + fixup->offset, &fd, + sizeof(u32))) { + ret = -EINVAL; + goto err; + } + } + list_for_each_entry_safe(fixup, tmp, &t->fd_fixups, fixup_entry) { + fd_install(fixup->target_fd, fixup->file); + list_del(&fixup->fixup_entry); + kfree(fixup); + } + + return ret; + +err: + binder_free_txn_fixups(t); + return ret; +} + +static int binder_thread_read(struct binder_proc *proc, + struct binder_thread *thread, + binder_uintptr_t binder_buffer, size_t size, + binder_size_t *consumed, int non_block) +{ + void __user *buffer = (void __user *)(uintptr_t)binder_buffer; + void __user *ptr = buffer + *consumed; + void __user *end = buffer + size; + + int ret = 0; + int wait_for_proc_work; + + if (*consumed == 0) { + if (put_user(BR_NOOP, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + } + +retry: + binder_inner_proc_lock(proc); + wait_for_proc_work = binder_available_for_proc_work_ilocked(thread); + binder_inner_proc_unlock(proc); + + thread->looper |= BINDER_LOOPER_STATE_WAITING; + + trace_binder_wait_for_work(wait_for_proc_work, + !!thread->transaction_stack, + !binder_worklist_empty(proc, &thread->todo)); + if (wait_for_proc_work) { + if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED | + BINDER_LOOPER_STATE_ENTERED))) { + binder_user_error("%d:%d ERROR: Thread waiting for process work before calling BC_REGISTER_LOOPER or BC_ENTER_LOOPER (state %x)\n", + proc->pid, thread->pid, thread->looper); + wait_event_interruptible(binder_user_error_wait, + binder_stop_on_user_error < 2); + } + binder_set_nice(proc->default_priority); + } + + if (non_block) { + if (!binder_has_work(thread, wait_for_proc_work)) + ret = -EAGAIN; + } else { + ret = binder_wait_for_work(thread, wait_for_proc_work); + } + + thread->looper &= ~BINDER_LOOPER_STATE_WAITING; + + if (ret) + return ret; + + while (1) { + uint32_t cmd; + struct binder_transaction_data_secctx tr; + struct binder_transaction_data *trd = &tr.transaction_data; + struct binder_work *w = NULL; + struct list_head *list = NULL; + struct binder_transaction *t = NULL; + struct binder_thread *t_from; + size_t trsize = sizeof(*trd); + + binder_inner_proc_lock(proc); + if (!binder_worklist_empty_ilocked(&thread->todo)) + list = &thread->todo; + else if (!binder_worklist_empty_ilocked(&proc->todo) && + wait_for_proc_work) + list = &proc->todo; + else { + binder_inner_proc_unlock(proc); + + /* no data added */ + if (ptr - buffer == 4 && !thread->looper_need_return) + goto retry; + break; + } + + if (end - ptr < sizeof(tr) + 4) { + binder_inner_proc_unlock(proc); + break; + } + w = binder_dequeue_work_head_ilocked(list); + if (binder_worklist_empty_ilocked(&thread->todo)) + thread->process_todo = false; + + switch (w->type) { + case BINDER_WORK_TRANSACTION: { + binder_inner_proc_unlock(proc); + t = container_of(w, struct binder_transaction, work); + } break; + case BINDER_WORK_RETURN_ERROR: { + struct binder_error *e = container_of( + w, struct binder_error, work); + + WARN_ON(e->cmd == BR_OK); + binder_inner_proc_unlock(proc); + if (put_user(e->cmd, (uint32_t __user *)ptr)) + return -EFAULT; + cmd = e->cmd; + e->cmd = BR_OK; + ptr += sizeof(uint32_t); + + binder_stat_br(proc, thread, cmd); + } break; + case BINDER_WORK_TRANSACTION_COMPLETE: + case BINDER_WORK_TRANSACTION_PENDING: + case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: { + if (proc->oneway_spam_detection_enabled && + w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT) + cmd = BR_ONEWAY_SPAM_SUSPECT; + else if (w->type == BINDER_WORK_TRANSACTION_PENDING) + cmd = BR_TRANSACTION_PENDING_FROZEN; + else + cmd = BR_TRANSACTION_COMPLETE; + binder_inner_proc_unlock(proc); + kfree(w); + binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); + if (put_user(cmd, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + + binder_stat_br(proc, thread, cmd); + binder_debug(BINDER_DEBUG_TRANSACTION_COMPLETE, + "%d:%d BR_TRANSACTION_COMPLETE\n", + proc->pid, thread->pid); + } break; + case BINDER_WORK_NODE: { + struct binder_node *node = container_of(w, struct binder_node, work); + int strong, weak; + binder_uintptr_t node_ptr = node->ptr; + binder_uintptr_t node_cookie = node->cookie; + int node_debug_id = node->debug_id; + int has_weak_ref; + int has_strong_ref; + void __user *orig_ptr = ptr; + + BUG_ON(proc != node->proc); + strong = node->internal_strong_refs || + node->local_strong_refs; + weak = !hlist_empty(&node->refs) || + node->local_weak_refs || + node->tmp_refs || strong; + has_strong_ref = node->has_strong_ref; + has_weak_ref = node->has_weak_ref; + + if (weak && !has_weak_ref) { + node->has_weak_ref = 1; + node->pending_weak_ref = 1; + node->local_weak_refs++; + } + if (strong && !has_strong_ref) { + node->has_strong_ref = 1; + node->pending_strong_ref = 1; + node->local_strong_refs++; + } + if (!strong && has_strong_ref) + node->has_strong_ref = 0; + if (!weak && has_weak_ref) + node->has_weak_ref = 0; + if (!weak && !strong) { + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d:%d node %d u%016llx c%016llx deleted\n", + proc->pid, thread->pid, + node_debug_id, + (u64)node_ptr, + (u64)node_cookie); + rb_erase(&node->rb_node, &proc->nodes); + binder_inner_proc_unlock(proc); + binder_node_lock(node); + /* + * Acquire the node lock before freeing the + * node to serialize with other threads that + * may have been holding the node lock while + * decrementing this node (avoids race where + * this thread frees while the other thread + * is unlocking the node after the final + * decrement) + */ + binder_node_unlock(node); + binder_free_node(node); + } else + binder_inner_proc_unlock(proc); + + if (weak && !has_weak_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_INCREFS, "BR_INCREFS"); + if (!ret && strong && !has_strong_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_ACQUIRE, "BR_ACQUIRE"); + if (!ret && !strong && has_strong_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_RELEASE, "BR_RELEASE"); + if (!ret && !weak && has_weak_ref) + ret = binder_put_node_cmd( + proc, thread, &ptr, node_ptr, + node_cookie, node_debug_id, + BR_DECREFS, "BR_DECREFS"); + if (orig_ptr == ptr) + binder_debug(BINDER_DEBUG_INTERNAL_REFS, + "%d:%d node %d u%016llx c%016llx state unchanged\n", + proc->pid, thread->pid, + node_debug_id, + (u64)node_ptr, + (u64)node_cookie); + if (ret) + return ret; + } break; + case BINDER_WORK_DEAD_BINDER: + case BINDER_WORK_DEAD_BINDER_AND_CLEAR: + case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: { + struct binder_ref_death *death; + uint32_t cmd; + binder_uintptr_t cookie; + + death = container_of(w, struct binder_ref_death, work); + if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) + cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE; + else + cmd = BR_DEAD_BINDER; + cookie = death->cookie; + + binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, + "%d:%d %s %016llx\n", + proc->pid, thread->pid, + cmd == BR_DEAD_BINDER ? + "BR_DEAD_BINDER" : + "BR_CLEAR_DEATH_NOTIFICATION_DONE", + (u64)cookie); + if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) { + binder_inner_proc_unlock(proc); + kfree(death); + binder_stats_deleted(BINDER_STAT_DEATH); + } else { + binder_enqueue_work_ilocked( + w, &proc->delivered_death); + binder_inner_proc_unlock(proc); + } + if (put_user(cmd, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + if (put_user(cookie, + (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + binder_stat_br(proc, thread, cmd); + if (cmd == BR_DEAD_BINDER) + goto done; /* DEAD_BINDER notifications can cause transactions */ + } break; + + case BINDER_WORK_FROZEN_BINDER: { + struct binder_ref_freeze *freeze; + struct binder_frozen_state_info info; + + memset(&info, 0, sizeof(info)); + freeze = container_of(w, struct binder_ref_freeze, work); + info.is_frozen = freeze->is_frozen; + info.cookie = freeze->cookie; + freeze->sent = true; + binder_enqueue_work_ilocked(w, &proc->delivered_freeze); + binder_inner_proc_unlock(proc); + + if (put_user(BR_FROZEN_BINDER, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + if (copy_to_user(ptr, &info, sizeof(info))) + return -EFAULT; + ptr += sizeof(info); + binder_stat_br(proc, thread, BR_FROZEN_BINDER); + goto done; /* BR_FROZEN_BINDER notifications can cause transactions */ + } break; + + case BINDER_WORK_CLEAR_FREEZE_NOTIFICATION: { + struct binder_ref_freeze *freeze = + container_of(w, struct binder_ref_freeze, work); + binder_uintptr_t cookie = freeze->cookie; + + binder_inner_proc_unlock(proc); + kfree(freeze); + binder_stats_deleted(BINDER_STAT_FREEZE); + if (put_user(BR_CLEAR_FREEZE_NOTIFICATION_DONE, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + if (put_user(cookie, (binder_uintptr_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(binder_uintptr_t); + binder_stat_br(proc, thread, BR_CLEAR_FREEZE_NOTIFICATION_DONE); + } break; + + default: + binder_inner_proc_unlock(proc); + pr_err("%d:%d: bad work type %d\n", + proc->pid, thread->pid, w->type); + break; + } + + if (!t) + continue; + + BUG_ON(t->buffer == NULL); + if (t->buffer->target_node) { + struct binder_node *target_node = t->buffer->target_node; + + trd->target.ptr = target_node->ptr; + trd->cookie = target_node->cookie; + t->saved_priority = task_nice(current); + if (t->priority < target_node->min_priority && + !(t->flags & TF_ONE_WAY)) + binder_set_nice(t->priority); + else if (!(t->flags & TF_ONE_WAY) || + t->saved_priority > target_node->min_priority) + binder_set_nice(target_node->min_priority); + cmd = BR_TRANSACTION; + } else { + trd->target.ptr = 0; + trd->cookie = 0; + cmd = BR_REPLY; + } + trd->code = t->code; + trd->flags = t->flags; + trd->sender_euid = from_kuid(current_user_ns(), t->sender_euid); + + t_from = binder_get_txn_from(t); + if (t_from) { + struct task_struct *sender = t_from->proc->tsk; + + trd->sender_pid = + task_tgid_nr_ns(sender, + task_active_pid_ns(current)); + } else { + trd->sender_pid = 0; + } + + ret = binder_apply_fd_fixups(proc, t); + if (ret) { + struct binder_buffer *buffer = t->buffer; + bool oneway = !!(t->flags & TF_ONE_WAY); + int tid = t->debug_id; + + if (t_from) + binder_thread_dec_tmpref(t_from); + buffer->transaction = NULL; + binder_cleanup_transaction(t, "fd fixups failed", + BR_FAILED_REPLY); + binder_free_buf(proc, thread, buffer, true); + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, + "%d:%d %stransaction %d fd fixups failed %d/%d, line %d\n", + proc->pid, thread->pid, + oneway ? "async " : + (cmd == BR_REPLY ? "reply " : ""), + tid, BR_FAILED_REPLY, ret, __LINE__); + if (cmd == BR_REPLY) { + cmd = BR_FAILED_REPLY; + if (put_user(cmd, (uint32_t __user *)ptr)) + return -EFAULT; + ptr += sizeof(uint32_t); + binder_stat_br(proc, thread, cmd); + break; + } + continue; + } + trd->data_size = t->buffer->data_size; + trd->offsets_size = t->buffer->offsets_size; + trd->data.ptr.buffer = t->buffer->user_data; + trd->data.ptr.offsets = trd->data.ptr.buffer + + ALIGN(t->buffer->data_size, + sizeof(void *)); + + tr.secctx = t->security_ctx; + if (t->security_ctx) { + cmd = BR_TRANSACTION_SEC_CTX; + trsize = sizeof(tr); + } + if (put_user(cmd, (uint32_t __user *)ptr)) { + if (t_from) + binder_thread_dec_tmpref(t_from); + + binder_cleanup_transaction(t, "put_user failed", + BR_FAILED_REPLY); + + return -EFAULT; + } + ptr += sizeof(uint32_t); + if (copy_to_user(ptr, &tr, trsize)) { + if (t_from) + binder_thread_dec_tmpref(t_from); + + binder_cleanup_transaction(t, "copy_to_user failed", + BR_FAILED_REPLY); + + return -EFAULT; + } + ptr += trsize; + + trace_binder_transaction_received(t); + binder_stat_br(proc, thread, cmd); + binder_debug(BINDER_DEBUG_TRANSACTION, + "%d:%d %s %d %d:%d, cmd %u size %zd-%zd\n", + proc->pid, thread->pid, + (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" : + (cmd == BR_TRANSACTION_SEC_CTX) ? + "BR_TRANSACTION_SEC_CTX" : "BR_REPLY", + t->debug_id, t_from ? t_from->proc->pid : 0, + t_from ? t_from->pid : 0, cmd, + t->buffer->data_size, t->buffer->offsets_size); + + if (t_from) + binder_thread_dec_tmpref(t_from); + t->buffer->allow_user_free = 1; + if (cmd != BR_REPLY && !(t->flags & TF_ONE_WAY)) { + binder_inner_proc_lock(thread->proc); + t->to_parent = thread->transaction_stack; + t->to_thread = thread; + thread->transaction_stack = t; + binder_inner_proc_unlock(thread->proc); + } else { + binder_free_transaction(t); + } + break; + } + +done: + + *consumed = ptr - buffer; + binder_inner_proc_lock(proc); + if (proc->requested_threads == 0 && + list_empty(&thread->proc->waiting_threads) && + proc->requested_threads_started < proc->max_threads && + (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | + BINDER_LOOPER_STATE_ENTERED)) /* the user-space code fails to */ + /*spawn a new thread if we leave this out */) { + proc->requested_threads++; + binder_inner_proc_unlock(proc); + binder_debug(BINDER_DEBUG_THREADS, + "%d:%d BR_SPAWN_LOOPER\n", + proc->pid, thread->pid); + if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer)) + return -EFAULT; + binder_stat_br(proc, thread, BR_SPAWN_LOOPER); + } else + binder_inner_proc_unlock(proc); + return 0; +} + +static void binder_release_work(struct binder_proc *proc, + struct list_head *list) +{ + struct binder_work *w; + enum binder_work_type wtype; + + while (1) { + binder_inner_proc_lock(proc); + w = binder_dequeue_work_head_ilocked(list); + wtype = w ? w->type : 0; + binder_inner_proc_unlock(proc); + if (!w) + return; + + switch (wtype) { + case BINDER_WORK_TRANSACTION: { + struct binder_transaction *t; + + t = container_of(w, struct binder_transaction, work); + + binder_cleanup_transaction(t, "process died.", + BR_DEAD_REPLY); + } break; + case BINDER_WORK_RETURN_ERROR: { + struct binder_error *e = container_of( + w, struct binder_error, work); + + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "undelivered TRANSACTION_ERROR: %u\n", + e->cmd); + } break; + case BINDER_WORK_TRANSACTION_PENDING: + case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: + case BINDER_WORK_TRANSACTION_COMPLETE: { + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "undelivered TRANSACTION_COMPLETE\n"); + kfree(w); + binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); + } break; + case BINDER_WORK_DEAD_BINDER_AND_CLEAR: + case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: { + struct binder_ref_death *death; + + death = container_of(w, struct binder_ref_death, work); + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "undelivered death notification, %016llx\n", + (u64)death->cookie); + kfree(death); + binder_stats_deleted(BINDER_STAT_DEATH); + } break; + case BINDER_WORK_NODE: + break; + case BINDER_WORK_CLEAR_FREEZE_NOTIFICATION: { + struct binder_ref_freeze *freeze; + + freeze = container_of(w, struct binder_ref_freeze, work); + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "undelivered freeze notification, %016llx\n", + (u64)freeze->cookie); + kfree(freeze); + binder_stats_deleted(BINDER_STAT_FREEZE); + } break; + default: + pr_err("unexpected work type, %d, not freed\n", + wtype); + break; + } + } + +} + +static struct binder_thread *binder_get_thread_ilocked( + struct binder_proc *proc, struct binder_thread *new_thread) +{ + struct binder_thread *thread = NULL; + struct rb_node *parent = NULL; + struct rb_node **p = &proc->threads.rb_node; + + while (*p) { + parent = *p; + thread = rb_entry(parent, struct binder_thread, rb_node); + + if (current->pid < thread->pid) + p = &(*p)->rb_left; + else if (current->pid > thread->pid) + p = &(*p)->rb_right; + else + return thread; + } + if (!new_thread) + return NULL; + thread = new_thread; + binder_stats_created(BINDER_STAT_THREAD); + thread->proc = proc; + thread->pid = current->pid; + atomic_set(&thread->tmp_ref, 0); + init_waitqueue_head(&thread->wait); + INIT_LIST_HEAD(&thread->todo); + rb_link_node(&thread->rb_node, parent, p); + rb_insert_color(&thread->rb_node, &proc->threads); + thread->looper_need_return = true; + thread->return_error.work.type = BINDER_WORK_RETURN_ERROR; + thread->return_error.cmd = BR_OK; + thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR; + thread->reply_error.cmd = BR_OK; + thread->ee.command = BR_OK; + INIT_LIST_HEAD(&new_thread->waiting_thread_node); + return thread; +} + +static struct binder_thread *binder_get_thread(struct binder_proc *proc) +{ + struct binder_thread *thread; + struct binder_thread *new_thread; + + binder_inner_proc_lock(proc); + thread = binder_get_thread_ilocked(proc, NULL); + binder_inner_proc_unlock(proc); + if (!thread) { + new_thread = kzalloc(sizeof(*thread), GFP_KERNEL); + if (new_thread == NULL) + return NULL; + binder_inner_proc_lock(proc); + thread = binder_get_thread_ilocked(proc, new_thread); + binder_inner_proc_unlock(proc); + if (thread != new_thread) + kfree(new_thread); + } + return thread; +} + +static void binder_free_proc(struct binder_proc *proc) +{ + struct binder_device *device; + + BUG_ON(!list_empty(&proc->todo)); + BUG_ON(!list_empty(&proc->delivered_death)); + if (proc->outstanding_txns) + pr_warn("%s: Unexpected outstanding_txns %d\n", + __func__, proc->outstanding_txns); + device = container_of(proc->context, struct binder_device, context); + if (refcount_dec_and_test(&device->ref)) { + binder_remove_device(device); + kfree(proc->context->name); + kfree(device); + } + binder_alloc_deferred_release(&proc->alloc); + put_task_struct(proc->tsk); + put_cred(proc->cred); + binder_stats_deleted(BINDER_STAT_PROC); + dbitmap_free(&proc->dmap); + kfree(proc); +} + +static void binder_free_thread(struct binder_thread *thread) +{ + BUG_ON(!list_empty(&thread->todo)); + binder_stats_deleted(BINDER_STAT_THREAD); + binder_proc_dec_tmpref(thread->proc); + kfree(thread); +} + +static int binder_thread_release(struct binder_proc *proc, + struct binder_thread *thread) +{ + struct binder_transaction *t; + struct binder_transaction *send_reply = NULL; + int active_transactions = 0; + struct binder_transaction *last_t = NULL; + + binder_inner_proc_lock(thread->proc); + /* + * take a ref on the proc so it survives + * after we remove this thread from proc->threads. + * The corresponding dec is when we actually + * free the thread in binder_free_thread() + */ + proc->tmp_ref++; + /* + * take a ref on this thread to ensure it + * survives while we are releasing it + */ + atomic_inc(&thread->tmp_ref); + rb_erase(&thread->rb_node, &proc->threads); + t = thread->transaction_stack; + if (t) { + spin_lock(&t->lock); + if (t->to_thread == thread) + send_reply = t; + } else { + __acquire(&t->lock); + } + thread->is_dead = true; + + while (t) { + last_t = t; + active_transactions++; + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "release %d:%d transaction %d %s, still active\n", + proc->pid, thread->pid, + t->debug_id, + (t->to_thread == thread) ? "in" : "out"); + + if (t->to_thread == thread) { + thread->proc->outstanding_txns--; + t->to_proc = NULL; + t->to_thread = NULL; + if (t->buffer) { + t->buffer->transaction = NULL; + t->buffer = NULL; + } + t = t->to_parent; + } else if (t->from == thread) { + t->from = NULL; + t = t->from_parent; + } else + BUG(); + spin_unlock(&last_t->lock); + if (t) + spin_lock(&t->lock); + else + __acquire(&t->lock); + } + /* annotation for sparse, lock not acquired in last iteration above */ + __release(&t->lock); + + /* + * If this thread used poll, make sure we remove the waitqueue from any + * poll data structures holding it. + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL) + wake_up_pollfree(&thread->wait); + + binder_inner_proc_unlock(thread->proc); + + /* + * This is needed to avoid races between wake_up_pollfree() above and + * someone else removing the last entry from the queue for other reasons + * (e.g. ep_remove_wait_queue() being called due to an epoll file + * descriptor being closed). Such other users hold an RCU read lock, so + * we can be sure they're done after we call synchronize_rcu(). + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL) + synchronize_rcu(); + + if (send_reply) + binder_send_failed_reply(send_reply, BR_DEAD_REPLY); + binder_release_work(proc, &thread->todo); + binder_thread_dec_tmpref(thread); + return active_transactions; +} + +static __poll_t binder_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct binder_proc *proc = filp->private_data; + struct binder_thread *thread = NULL; + bool wait_for_proc_work; + + thread = binder_get_thread(proc); + if (!thread) + return EPOLLERR; + + binder_inner_proc_lock(thread->proc); + thread->looper |= BINDER_LOOPER_STATE_POLL; + wait_for_proc_work = binder_available_for_proc_work_ilocked(thread); + + binder_inner_proc_unlock(thread->proc); + + poll_wait(filp, &thread->wait, wait); + + if (binder_has_work(thread, wait_for_proc_work)) + return EPOLLIN; + + return 0; +} + +static int binder_ioctl_write_read(struct file *filp, unsigned long arg, + struct binder_thread *thread) +{ + int ret = 0; + struct binder_proc *proc = filp->private_data; + void __user *ubuf = (void __user *)arg; + struct binder_write_read bwr; + + if (copy_from_user(&bwr, ubuf, sizeof(bwr))) + return -EFAULT; + + binder_debug(BINDER_DEBUG_READ_WRITE, + "%d:%d write %lld at %016llx, read %lld at %016llx\n", + proc->pid, thread->pid, + (u64)bwr.write_size, (u64)bwr.write_buffer, + (u64)bwr.read_size, (u64)bwr.read_buffer); + + if (bwr.write_size > 0) { + ret = binder_thread_write(proc, thread, + bwr.write_buffer, + bwr.write_size, + &bwr.write_consumed); + trace_binder_write_done(ret); + if (ret < 0) { + bwr.read_consumed = 0; + goto out; + } + } + if (bwr.read_size > 0) { + ret = binder_thread_read(proc, thread, bwr.read_buffer, + bwr.read_size, + &bwr.read_consumed, + filp->f_flags & O_NONBLOCK); + trace_binder_read_done(ret); + binder_inner_proc_lock(proc); + if (!binder_worklist_empty_ilocked(&proc->todo)) + binder_wakeup_proc_ilocked(proc); + binder_inner_proc_unlock(proc); + if (ret < 0) + goto out; + } + binder_debug(BINDER_DEBUG_READ_WRITE, + "%d:%d wrote %lld of %lld, read return %lld of %lld\n", + proc->pid, thread->pid, + (u64)bwr.write_consumed, (u64)bwr.write_size, + (u64)bwr.read_consumed, (u64)bwr.read_size); +out: + if (copy_to_user(ubuf, &bwr, sizeof(bwr))) + ret = -EFAULT; + return ret; +} + +static int binder_ioctl_set_ctx_mgr(struct file *filp, + struct flat_binder_object *fbo) +{ + int ret = 0; + struct binder_proc *proc = filp->private_data; + struct binder_context *context = proc->context; + struct binder_node *new_node; + kuid_t curr_euid = current_euid(); + + guard(mutex)(&context->context_mgr_node_lock); + if (context->binder_context_mgr_node) { + pr_err("BINDER_SET_CONTEXT_MGR already set\n"); + return -EBUSY; + } + ret = security_binder_set_context_mgr(proc->cred); + if (ret < 0) + return ret; + if (uid_valid(context->binder_context_mgr_uid)) { + if (!uid_eq(context->binder_context_mgr_uid, curr_euid)) { + pr_err("BINDER_SET_CONTEXT_MGR bad uid %d != %d\n", + from_kuid(&init_user_ns, curr_euid), + from_kuid(&init_user_ns, + context->binder_context_mgr_uid)); + return -EPERM; + } + } else { + context->binder_context_mgr_uid = curr_euid; + } + new_node = binder_new_node(proc, fbo); + if (!new_node) + return -ENOMEM; + binder_node_lock(new_node); + new_node->local_weak_refs++; + new_node->local_strong_refs++; + new_node->has_strong_ref = 1; + new_node->has_weak_ref = 1; + context->binder_context_mgr_node = new_node; + binder_node_unlock(new_node); + binder_put_node(new_node); + return ret; +} + +static int binder_ioctl_get_node_info_for_ref(struct binder_proc *proc, + struct binder_node_info_for_ref *info) +{ + struct binder_node *node; + struct binder_context *context = proc->context; + __u32 handle = info->handle; + + if (info->strong_count || info->weak_count || info->reserved1 || + info->reserved2 || info->reserved3) { + binder_user_error("%d BINDER_GET_NODE_INFO_FOR_REF: only handle may be non-zero.", + proc->pid); + return -EINVAL; + } + + /* This ioctl may only be used by the context manager */ + mutex_lock(&context->context_mgr_node_lock); + if (!context->binder_context_mgr_node || + context->binder_context_mgr_node->proc != proc) { + mutex_unlock(&context->context_mgr_node_lock); + return -EPERM; + } + mutex_unlock(&context->context_mgr_node_lock); + + node = binder_get_node_from_ref(proc, handle, true, NULL); + if (!node) + return -EINVAL; + + info->strong_count = node->local_strong_refs + + node->internal_strong_refs; + info->weak_count = node->local_weak_refs; + + binder_put_node(node); + + return 0; +} + +static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, + struct binder_node_debug_info *info) +{ + struct rb_node *n; + binder_uintptr_t ptr = info->ptr; + + memset(info, 0, sizeof(*info)); + + binder_inner_proc_lock(proc); + for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { + struct binder_node *node = rb_entry(n, struct binder_node, + rb_node); + if (node->ptr > ptr) { + info->ptr = node->ptr; + info->cookie = node->cookie; + info->has_strong_ref = node->has_strong_ref; + info->has_weak_ref = node->has_weak_ref; + break; + } + } + binder_inner_proc_unlock(proc); + + return 0; +} + +static bool binder_txns_pending_ilocked(struct binder_proc *proc) +{ + struct rb_node *n; + struct binder_thread *thread; + + if (proc->outstanding_txns > 0) + return true; + + for (n = rb_first(&proc->threads); n; n = rb_next(n)) { + thread = rb_entry(n, struct binder_thread, rb_node); + if (thread->transaction_stack) + return true; + } + return false; +} + +static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) +{ + struct binder_node *prev = NULL; + struct rb_node *n; + struct binder_ref *ref; + + binder_inner_proc_lock(proc); + for (n = rb_first(&proc->nodes); n; n = rb_next(n)) { + struct binder_node *node; + + node = rb_entry(n, struct binder_node, rb_node); + binder_inc_node_tmpref_ilocked(node); + binder_inner_proc_unlock(proc); + if (prev) + binder_put_node(prev); + binder_node_lock(node); + hlist_for_each_entry(ref, &node->refs, node_entry) { + /* + * Need the node lock to synchronize + * with new notification requests and the + * inner lock to synchronize with queued + * freeze notifications. + */ + binder_inner_proc_lock(ref->proc); + if (!ref->freeze) { + binder_inner_proc_unlock(ref->proc); + continue; + } + ref->freeze->work.type = BINDER_WORK_FROZEN_BINDER; + if (list_empty(&ref->freeze->work.entry)) { + ref->freeze->is_frozen = is_frozen; + binder_enqueue_work_ilocked(&ref->freeze->work, &ref->proc->todo); + binder_wakeup_proc_ilocked(ref->proc); + } else { + if (ref->freeze->sent && ref->freeze->is_frozen != is_frozen) + ref->freeze->resend = true; + ref->freeze->is_frozen = is_frozen; + } + binder_inner_proc_unlock(ref->proc); + } + prev = node; + binder_node_unlock(node); + binder_inner_proc_lock(proc); + if (proc->is_dead) + break; + } + binder_inner_proc_unlock(proc); + if (prev) + binder_put_node(prev); +} + +static int binder_ioctl_freeze(struct binder_freeze_info *info, + struct binder_proc *target_proc) +{ + int ret = 0; + + if (!info->enable) { + binder_inner_proc_lock(target_proc); + target_proc->sync_recv = false; + target_proc->async_recv = false; + target_proc->is_frozen = false; + binder_inner_proc_unlock(target_proc); + binder_add_freeze_work(target_proc, false); + return 0; + } + + /* + * Freezing the target. Prevent new transactions by + * setting frozen state. If timeout specified, wait + * for transactions to drain. + */ + binder_inner_proc_lock(target_proc); + target_proc->sync_recv = false; + target_proc->async_recv = false; + target_proc->is_frozen = true; + binder_inner_proc_unlock(target_proc); + + if (info->timeout_ms > 0) + ret = wait_event_interruptible_timeout( + target_proc->freeze_wait, + (!target_proc->outstanding_txns), + msecs_to_jiffies(info->timeout_ms)); + + /* Check pending transactions that wait for reply */ + if (ret >= 0) { + binder_inner_proc_lock(target_proc); + if (binder_txns_pending_ilocked(target_proc)) + ret = -EAGAIN; + binder_inner_proc_unlock(target_proc); + } + + if (ret < 0) { + binder_inner_proc_lock(target_proc); + target_proc->is_frozen = false; + binder_inner_proc_unlock(target_proc); + } else { + binder_add_freeze_work(target_proc, true); + } + + return ret; +} + +static int binder_ioctl_get_freezer_info( + struct binder_frozen_status_info *info) +{ + struct binder_proc *target_proc; + bool found = false; + __u32 txns_pending; + + info->sync_recv = 0; + info->async_recv = 0; + + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(target_proc, &binder_procs, proc_node) { + if (target_proc->pid == info->pid) { + found = true; + binder_inner_proc_lock(target_proc); + txns_pending = binder_txns_pending_ilocked(target_proc); + info->sync_recv |= target_proc->sync_recv | + (txns_pending << 1); + info->async_recv |= target_proc->async_recv; + binder_inner_proc_unlock(target_proc); + } + } + mutex_unlock(&binder_procs_lock); + + if (!found) + return -EINVAL; + + return 0; +} + +static int binder_ioctl_get_extended_error(struct binder_thread *thread, + void __user *ubuf) +{ + struct binder_extended_error ee; + + binder_inner_proc_lock(thread->proc); + ee = thread->ee; + binder_set_extended_error(&thread->ee, 0, BR_OK, 0); + binder_inner_proc_unlock(thread->proc); + + if (copy_to_user(ubuf, &ee, sizeof(ee))) + return -EFAULT; + + return 0; +} + +static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + int ret; + struct binder_proc *proc = filp->private_data; + struct binder_thread *thread; + void __user *ubuf = (void __user *)arg; + + trace_binder_ioctl(cmd, arg); + + ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); + if (ret) + goto err_unlocked; + + thread = binder_get_thread(proc); + if (thread == NULL) { + ret = -ENOMEM; + goto err; + } + + switch (cmd) { + case BINDER_WRITE_READ: + ret = binder_ioctl_write_read(filp, arg, thread); + if (ret) + goto err; + break; + case BINDER_SET_MAX_THREADS: { + u32 max_threads; + + if (copy_from_user(&max_threads, ubuf, + sizeof(max_threads))) { + ret = -EINVAL; + goto err; + } + binder_inner_proc_lock(proc); + proc->max_threads = max_threads; + binder_inner_proc_unlock(proc); + break; + } + case BINDER_SET_CONTEXT_MGR_EXT: { + struct flat_binder_object fbo; + + if (copy_from_user(&fbo, ubuf, sizeof(fbo))) { + ret = -EINVAL; + goto err; + } + ret = binder_ioctl_set_ctx_mgr(filp, &fbo); + if (ret) + goto err; + break; + } + case BINDER_SET_CONTEXT_MGR: + ret = binder_ioctl_set_ctx_mgr(filp, NULL); + if (ret) + goto err; + break; + case BINDER_THREAD_EXIT: + binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n", + proc->pid, thread->pid); + binder_thread_release(proc, thread); + thread = NULL; + break; + case BINDER_VERSION: { + struct binder_version __user *ver = ubuf; + + if (put_user(BINDER_CURRENT_PROTOCOL_VERSION, + &ver->protocol_version)) { + ret = -EINVAL; + goto err; + } + break; + } + case BINDER_GET_NODE_INFO_FOR_REF: { + struct binder_node_info_for_ref info; + + if (copy_from_user(&info, ubuf, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + ret = binder_ioctl_get_node_info_for_ref(proc, &info); + if (ret < 0) + goto err; + + if (copy_to_user(ubuf, &info, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + break; + } + case BINDER_GET_NODE_DEBUG_INFO: { + struct binder_node_debug_info info; + + if (copy_from_user(&info, ubuf, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + ret = binder_ioctl_get_node_debug_info(proc, &info); + if (ret < 0) + goto err; + + if (copy_to_user(ubuf, &info, sizeof(info))) { + ret = -EFAULT; + goto err; + } + break; + } + case BINDER_FREEZE: { + struct binder_freeze_info info; + struct binder_proc **target_procs = NULL, *target_proc; + int target_procs_count = 0, i = 0; + + ret = 0; + + if (copy_from_user(&info, ubuf, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(target_proc, &binder_procs, proc_node) { + if (target_proc->pid == info.pid) + target_procs_count++; + } + + if (target_procs_count == 0) { + mutex_unlock(&binder_procs_lock); + ret = -EINVAL; + goto err; + } + + target_procs = kcalloc(target_procs_count, + sizeof(struct binder_proc *), + GFP_KERNEL); + + if (!target_procs) { + mutex_unlock(&binder_procs_lock); + ret = -ENOMEM; + goto err; + } + + hlist_for_each_entry(target_proc, &binder_procs, proc_node) { + if (target_proc->pid != info.pid) + continue; + + binder_inner_proc_lock(target_proc); + target_proc->tmp_ref++; + binder_inner_proc_unlock(target_proc); + + target_procs[i++] = target_proc; + } + mutex_unlock(&binder_procs_lock); + + for (i = 0; i < target_procs_count; i++) { + if (ret >= 0) + ret = binder_ioctl_freeze(&info, + target_procs[i]); + + binder_proc_dec_tmpref(target_procs[i]); + } + + kfree(target_procs); + + if (ret < 0) + goto err; + break; + } + case BINDER_GET_FROZEN_INFO: { + struct binder_frozen_status_info info; + + if (copy_from_user(&info, ubuf, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + ret = binder_ioctl_get_freezer_info(&info); + if (ret < 0) + goto err; + + if (copy_to_user(ubuf, &info, sizeof(info))) { + ret = -EFAULT; + goto err; + } + break; + } + case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: { + uint32_t enable; + + if (copy_from_user(&enable, ubuf, sizeof(enable))) { + ret = -EFAULT; + goto err; + } + binder_inner_proc_lock(proc); + proc->oneway_spam_detection_enabled = (bool)enable; + binder_inner_proc_unlock(proc); + break; + } + case BINDER_GET_EXTENDED_ERROR: + ret = binder_ioctl_get_extended_error(thread, ubuf); + if (ret < 0) + goto err; + break; + default: + ret = -EINVAL; + goto err; + } + ret = 0; +err: + if (thread) + thread->looper_need_return = false; + wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); + if (ret && ret != -EINTR) + pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); +err_unlocked: + trace_binder_ioctl_done(ret); + return ret; +} + +static void binder_vma_open(struct vm_area_struct *vma) +{ + struct binder_proc *proc = vma->vm_private_data; + + binder_debug(BINDER_DEBUG_OPEN_CLOSE, + "%d open vm area %lx-%lx (%ld K) vma %lx pagep %lx\n", + proc->pid, vma->vm_start, vma->vm_end, + (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, + (unsigned long)pgprot_val(vma->vm_page_prot)); +} + +static void binder_vma_close(struct vm_area_struct *vma) +{ + struct binder_proc *proc = vma->vm_private_data; + + binder_debug(BINDER_DEBUG_OPEN_CLOSE, + "%d close vm area %lx-%lx (%ld K) vma %lx pagep %lx\n", + proc->pid, vma->vm_start, vma->vm_end, + (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, + (unsigned long)pgprot_val(vma->vm_page_prot)); + binder_alloc_vma_close(&proc->alloc); +} + +VISIBLE_IF_KUNIT vm_fault_t binder_vm_fault(struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} +EXPORT_SYMBOL_IF_KUNIT(binder_vm_fault); + +static const struct vm_operations_struct binder_vm_ops = { + .open = binder_vma_open, + .close = binder_vma_close, + .fault = binder_vm_fault, +}; + +static int binder_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct binder_proc *proc = filp->private_data; + + if (proc->tsk != current->group_leader) + return -EINVAL; + + binder_debug(BINDER_DEBUG_OPEN_CLOSE, + "%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n", + __func__, proc->pid, vma->vm_start, vma->vm_end, + (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, + (unsigned long)pgprot_val(vma->vm_page_prot)); + + if (vma->vm_flags & FORBIDDEN_MMAP_FLAGS) { + pr_err("%s: %d %lx-%lx %s failed %d\n", __func__, + proc->pid, vma->vm_start, vma->vm_end, "bad vm_flags", -EPERM); + return -EPERM; + } + vm_flags_mod(vma, VM_DONTCOPY | VM_MIXEDMAP, VM_MAYWRITE); + + vma->vm_ops = &binder_vm_ops; + vma->vm_private_data = proc; + + return binder_alloc_mmap_handler(&proc->alloc, vma); +} + +static int binder_open(struct inode *nodp, struct file *filp) +{ + struct binder_proc *proc, *itr; + struct binder_device *binder_dev; + struct binderfs_info *info; + struct dentry *binder_binderfs_dir_entry_proc = NULL; + bool existing_pid = false; + + binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, + current->group_leader->pid, current->pid); + + proc = kzalloc(sizeof(*proc), GFP_KERNEL); + if (proc == NULL) + return -ENOMEM; + + dbitmap_init(&proc->dmap); + spin_lock_init(&proc->inner_lock); + spin_lock_init(&proc->outer_lock); + get_task_struct(current->group_leader); + proc->tsk = current->group_leader; + proc->cred = get_cred(filp->f_cred); + INIT_LIST_HEAD(&proc->todo); + init_waitqueue_head(&proc->freeze_wait); + proc->default_priority = task_nice(current); + /* binderfs stashes devices in i_private */ + if (is_binderfs_device(nodp)) { + binder_dev = nodp->i_private; + info = nodp->i_sb->s_fs_info; + binder_binderfs_dir_entry_proc = info->proc_log_dir; + } else { + binder_dev = container_of(filp->private_data, + struct binder_device, miscdev); + } + refcount_inc(&binder_dev->ref); + proc->context = &binder_dev->context; + binder_alloc_init(&proc->alloc); + + binder_stats_created(BINDER_STAT_PROC); + proc->pid = current->group_leader->pid; + INIT_LIST_HEAD(&proc->delivered_death); + INIT_LIST_HEAD(&proc->delivered_freeze); + INIT_LIST_HEAD(&proc->waiting_threads); + filp->private_data = proc; + + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(itr, &binder_procs, proc_node) { + if (itr->pid == proc->pid) { + existing_pid = true; + break; + } + } + hlist_add_head(&proc->proc_node, &binder_procs); + mutex_unlock(&binder_procs_lock); + + if (binder_debugfs_dir_entry_proc && !existing_pid) { + char strbuf[11]; + + snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); + /* + * proc debug entries are shared between contexts. + * Only create for the first PID to avoid debugfs log spamming + * The printing code will anyway print all contexts for a given + * PID so this is not a problem. + */ + proc->debugfs_entry = debugfs_create_file(strbuf, 0444, + binder_debugfs_dir_entry_proc, + (void *)(unsigned long)proc->pid, + &proc_fops); + } + + if (binder_binderfs_dir_entry_proc && !existing_pid) { + char strbuf[11]; + struct dentry *binderfs_entry; + + snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); + /* + * Similar to debugfs, the process specific log file is shared + * between contexts. Only create for the first PID. + * This is ok since same as debugfs, the log file will contain + * information on all contexts of a given PID. + */ + binderfs_entry = binderfs_create_file(binder_binderfs_dir_entry_proc, + strbuf, &proc_fops, (void *)(unsigned long)proc->pid); + if (!IS_ERR(binderfs_entry)) { + proc->binderfs_entry = binderfs_entry; + } else { + int error; + + error = PTR_ERR(binderfs_entry); + pr_warn("Unable to create file %s in binderfs (error %d)\n", + strbuf, error); + } + } + + return 0; +} + +static int binder_flush(struct file *filp, fl_owner_t id) +{ + struct binder_proc *proc = filp->private_data; + + binder_defer_work(proc, BINDER_DEFERRED_FLUSH); + + return 0; +} + +static void binder_deferred_flush(struct binder_proc *proc) +{ + struct rb_node *n; + int wake_count = 0; + + binder_inner_proc_lock(proc); + for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) { + struct binder_thread *thread = rb_entry(n, struct binder_thread, rb_node); + + thread->looper_need_return = true; + if (thread->looper & BINDER_LOOPER_STATE_WAITING) { + wake_up_interruptible(&thread->wait); + wake_count++; + } + } + binder_inner_proc_unlock(proc); + + binder_debug(BINDER_DEBUG_OPEN_CLOSE, + "binder_flush: %d woke %d threads\n", proc->pid, + wake_count); +} + +static int binder_release(struct inode *nodp, struct file *filp) +{ + struct binder_proc *proc = filp->private_data; + + debugfs_remove(proc->debugfs_entry); + + if (proc->binderfs_entry) { + simple_recursive_removal(proc->binderfs_entry, NULL); + proc->binderfs_entry = NULL; + } + + binder_defer_work(proc, BINDER_DEFERRED_RELEASE); + + return 0; +} + +static int binder_node_release(struct binder_node *node, int refs) +{ + struct binder_ref *ref; + int death = 0; + struct binder_proc *proc = node->proc; + + binder_release_work(proc, &node->async_todo); + + binder_node_lock(node); + binder_inner_proc_lock(proc); + binder_dequeue_work_ilocked(&node->work); + /* + * The caller must have taken a temporary ref on the node, + */ + BUG_ON(!node->tmp_refs); + if (hlist_empty(&node->refs) && node->tmp_refs == 1) { + binder_inner_proc_unlock(proc); + binder_node_unlock(node); + binder_free_node(node); + + return refs; + } + + node->proc = NULL; + node->local_strong_refs = 0; + node->local_weak_refs = 0; + binder_inner_proc_unlock(proc); + + spin_lock(&binder_dead_nodes_lock); + hlist_add_head(&node->dead_node, &binder_dead_nodes); + spin_unlock(&binder_dead_nodes_lock); + + hlist_for_each_entry(ref, &node->refs, node_entry) { + refs++; + /* + * Need the node lock to synchronize + * with new notification requests and the + * inner lock to synchronize with queued + * death notifications. + */ + binder_inner_proc_lock(ref->proc); + if (!ref->death) { + binder_inner_proc_unlock(ref->proc); + continue; + } + + death++; + + BUG_ON(!list_empty(&ref->death->work.entry)); + ref->death->work.type = BINDER_WORK_DEAD_BINDER; + binder_enqueue_work_ilocked(&ref->death->work, + &ref->proc->todo); + binder_wakeup_proc_ilocked(ref->proc); + binder_inner_proc_unlock(ref->proc); + } + + binder_debug(BINDER_DEBUG_DEAD_BINDER, + "node %d now dead, refs %d, death %d\n", + node->debug_id, refs, death); + binder_node_unlock(node); + binder_put_node(node); + + return refs; +} + +static void binder_deferred_release(struct binder_proc *proc) +{ + struct binder_context *context = proc->context; + struct rb_node *n; + int threads, nodes, incoming_refs, outgoing_refs, active_transactions; + + mutex_lock(&binder_procs_lock); + hlist_del(&proc->proc_node); + mutex_unlock(&binder_procs_lock); + + mutex_lock(&context->context_mgr_node_lock); + if (context->binder_context_mgr_node && + context->binder_context_mgr_node->proc == proc) { + binder_debug(BINDER_DEBUG_DEAD_BINDER, + "%s: %d context_mgr_node gone\n", + __func__, proc->pid); + context->binder_context_mgr_node = NULL; + } + mutex_unlock(&context->context_mgr_node_lock); + binder_inner_proc_lock(proc); + /* + * Make sure proc stays alive after we + * remove all the threads + */ + proc->tmp_ref++; + + proc->is_dead = true; + proc->is_frozen = false; + proc->sync_recv = false; + proc->async_recv = false; + threads = 0; + active_transactions = 0; + while ((n = rb_first(&proc->threads))) { + struct binder_thread *thread; + + thread = rb_entry(n, struct binder_thread, rb_node); + binder_inner_proc_unlock(proc); + threads++; + active_transactions += binder_thread_release(proc, thread); + binder_inner_proc_lock(proc); + } + + nodes = 0; + incoming_refs = 0; + while ((n = rb_first(&proc->nodes))) { + struct binder_node *node; + + node = rb_entry(n, struct binder_node, rb_node); + nodes++; + /* + * take a temporary ref on the node before + * calling binder_node_release() which will either + * kfree() the node or call binder_put_node() + */ + binder_inc_node_tmpref_ilocked(node); + rb_erase(&node->rb_node, &proc->nodes); + binder_inner_proc_unlock(proc); + incoming_refs = binder_node_release(node, incoming_refs); + binder_inner_proc_lock(proc); + } + binder_inner_proc_unlock(proc); + + outgoing_refs = 0; + binder_proc_lock(proc); + while ((n = rb_first(&proc->refs_by_desc))) { + struct binder_ref *ref; + + ref = rb_entry(n, struct binder_ref, rb_node_desc); + outgoing_refs++; + binder_cleanup_ref_olocked(ref); + binder_proc_unlock(proc); + binder_free_ref(ref); + binder_proc_lock(proc); + } + binder_proc_unlock(proc); + + binder_release_work(proc, &proc->todo); + binder_release_work(proc, &proc->delivered_death); + binder_release_work(proc, &proc->delivered_freeze); + + binder_debug(BINDER_DEBUG_OPEN_CLOSE, + "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d\n", + __func__, proc->pid, threads, nodes, incoming_refs, + outgoing_refs, active_transactions); + + binder_proc_dec_tmpref(proc); +} + +static void binder_deferred_func(struct work_struct *work) +{ + struct binder_proc *proc; + + int defer; + + do { + mutex_lock(&binder_deferred_lock); + if (!hlist_empty(&binder_deferred_list)) { + proc = hlist_entry(binder_deferred_list.first, + struct binder_proc, deferred_work_node); + hlist_del_init(&proc->deferred_work_node); + defer = proc->deferred_work; + proc->deferred_work = 0; + } else { + proc = NULL; + defer = 0; + } + mutex_unlock(&binder_deferred_lock); + + if (defer & BINDER_DEFERRED_FLUSH) + binder_deferred_flush(proc); + + if (defer & BINDER_DEFERRED_RELEASE) + binder_deferred_release(proc); /* frees proc */ + } while (proc); +} +static DECLARE_WORK(binder_deferred_work, binder_deferred_func); + +static void +binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer) +{ + guard(mutex)(&binder_deferred_lock); + proc->deferred_work |= defer; + if (hlist_unhashed(&proc->deferred_work_node)) { + hlist_add_head(&proc->deferred_work_node, + &binder_deferred_list); + schedule_work(&binder_deferred_work); + } +} + +static void print_binder_transaction_ilocked(struct seq_file *m, + struct binder_proc *proc, + const char *prefix, + struct binder_transaction *t) +{ + struct binder_proc *to_proc; + struct binder_buffer *buffer = t->buffer; + ktime_t current_time = ktime_get(); + + spin_lock(&t->lock); + to_proc = t->to_proc; + seq_printf(m, + "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld a%d r%d elapsed %lldms", + prefix, t->debug_id, t, + t->from_pid, + t->from_tid, + to_proc ? to_proc->pid : 0, + t->to_thread ? t->to_thread->pid : 0, + t->code, t->flags, t->priority, t->is_async, t->is_reply, + ktime_ms_delta(current_time, t->start_time)); + spin_unlock(&t->lock); + + if (proc != to_proc) { + /* + * Can only safely deref buffer if we are holding the + * correct proc inner lock for this node + */ + seq_puts(m, "\n"); + return; + } + + if (buffer == NULL) { + seq_puts(m, " buffer free\n"); + return; + } + if (buffer->target_node) + seq_printf(m, " node %d", buffer->target_node->debug_id); + seq_printf(m, " size %zd:%zd offset %lx\n", + buffer->data_size, buffer->offsets_size, + buffer->user_data - proc->alloc.vm_start); +} + +static void print_binder_work_ilocked(struct seq_file *m, + struct binder_proc *proc, + const char *prefix, + const char *transaction_prefix, + struct binder_work *w, bool hash_ptrs) +{ + struct binder_node *node; + struct binder_transaction *t; + + switch (w->type) { + case BINDER_WORK_TRANSACTION: + t = container_of(w, struct binder_transaction, work); + print_binder_transaction_ilocked( + m, proc, transaction_prefix, t); + break; + case BINDER_WORK_RETURN_ERROR: { + struct binder_error *e = container_of( + w, struct binder_error, work); + + seq_printf(m, "%stransaction error: %u\n", + prefix, e->cmd); + } break; + case BINDER_WORK_TRANSACTION_COMPLETE: + seq_printf(m, "%stransaction complete\n", prefix); + break; + case BINDER_WORK_NODE: + node = container_of(w, struct binder_node, work); + if (hash_ptrs) + seq_printf(m, "%snode work %d: u%p c%p\n", + prefix, node->debug_id, + (void *)(long)node->ptr, + (void *)(long)node->cookie); + else + seq_printf(m, "%snode work %d: u%016llx c%016llx\n", + prefix, node->debug_id, + (u64)node->ptr, (u64)node->cookie); + break; + case BINDER_WORK_DEAD_BINDER: + seq_printf(m, "%shas dead binder\n", prefix); + break; + case BINDER_WORK_DEAD_BINDER_AND_CLEAR: + seq_printf(m, "%shas cleared dead binder\n", prefix); + break; + case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: + seq_printf(m, "%shas cleared death notification\n", prefix); + break; + case BINDER_WORK_FROZEN_BINDER: + seq_printf(m, "%shas frozen binder\n", prefix); + break; + case BINDER_WORK_CLEAR_FREEZE_NOTIFICATION: + seq_printf(m, "%shas cleared freeze notification\n", prefix); + break; + default: + seq_printf(m, "%sunknown work: type %d\n", prefix, w->type); + break; + } +} + +static void print_binder_thread_ilocked(struct seq_file *m, + struct binder_thread *thread, + bool print_always, bool hash_ptrs) +{ + struct binder_transaction *t; + struct binder_work *w; + size_t start_pos = m->count; + size_t header_pos; + + seq_printf(m, " thread %d: l %02x need_return %d tr %d\n", + thread->pid, thread->looper, + thread->looper_need_return, + atomic_read(&thread->tmp_ref)); + header_pos = m->count; + t = thread->transaction_stack; + while (t) { + if (t->from == thread) { + print_binder_transaction_ilocked(m, thread->proc, + " outgoing transaction", t); + t = t->from_parent; + } else if (t->to_thread == thread) { + print_binder_transaction_ilocked(m, thread->proc, + " incoming transaction", t); + t = t->to_parent; + } else { + print_binder_transaction_ilocked(m, thread->proc, + " bad transaction", t); + t = NULL; + } + } + list_for_each_entry(w, &thread->todo, entry) { + print_binder_work_ilocked(m, thread->proc, " ", + " pending transaction", + w, hash_ptrs); + } + if (!print_always && m->count == header_pos) + m->count = start_pos; +} + +static void print_binder_node_nilocked(struct seq_file *m, + struct binder_node *node, + bool hash_ptrs) +{ + struct binder_ref *ref; + struct binder_work *w; + int count; + + count = hlist_count_nodes(&node->refs); + + if (hash_ptrs) + seq_printf(m, " node %d: u%p c%p", node->debug_id, + (void *)(long)node->ptr, (void *)(long)node->cookie); + else + seq_printf(m, " node %d: u%016llx c%016llx", node->debug_id, + (u64)node->ptr, (u64)node->cookie); + seq_printf(m, " hs %d hw %d ls %d lw %d is %d iw %d tr %d", + node->has_strong_ref, node->has_weak_ref, + node->local_strong_refs, node->local_weak_refs, + node->internal_strong_refs, count, node->tmp_refs); + if (count) { + seq_puts(m, " proc"); + hlist_for_each_entry(ref, &node->refs, node_entry) + seq_printf(m, " %d", ref->proc->pid); + } + seq_puts(m, "\n"); + if (node->proc) { + list_for_each_entry(w, &node->async_todo, entry) + print_binder_work_ilocked(m, node->proc, " ", + " pending async transaction", + w, hash_ptrs); + } +} + +static void print_binder_ref_olocked(struct seq_file *m, + struct binder_ref *ref) +{ + binder_node_lock(ref->node); + seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n", + ref->data.debug_id, ref->data.desc, + ref->node->proc ? "" : "dead ", + ref->node->debug_id, ref->data.strong, + ref->data.weak, ref->death); + binder_node_unlock(ref->node); +} + +/** + * print_next_binder_node_ilocked() - Print binder_node from a locked list + * @m: struct seq_file for output via seq_printf() + * @proc: struct binder_proc we hold the inner_proc_lock to (if any) + * @node: struct binder_node to print fields of + * @prev_node: struct binder_node we hold a temporary reference to (if any) + * @hash_ptrs: whether to hash @node's binder_uintptr_t fields + * + * Helper function to handle synchronization around printing a struct + * binder_node while iterating through @proc->nodes or the dead nodes list. + * Caller must hold either @proc->inner_lock (for live nodes) or + * binder_dead_nodes_lock. This lock will be released during the body of this + * function, but it will be reacquired before returning to the caller. + * + * Return: pointer to the struct binder_node we hold a tmpref on + */ +static struct binder_node * +print_next_binder_node_ilocked(struct seq_file *m, struct binder_proc *proc, + struct binder_node *node, + struct binder_node *prev_node, bool hash_ptrs) +{ + /* + * Take a temporary reference on the node so that isn't freed while + * we print it. + */ + binder_inc_node_tmpref_ilocked(node); + /* + * Live nodes need to drop the inner proc lock and dead nodes need to + * drop the binder_dead_nodes_lock before trying to take the node lock. + */ + if (proc) + binder_inner_proc_unlock(proc); + else + spin_unlock(&binder_dead_nodes_lock); + if (prev_node) + binder_put_node(prev_node); + binder_node_inner_lock(node); + print_binder_node_nilocked(m, node, hash_ptrs); + binder_node_inner_unlock(node); + if (proc) + binder_inner_proc_lock(proc); + else + spin_lock(&binder_dead_nodes_lock); + return node; +} + +static void print_binder_proc(struct seq_file *m, struct binder_proc *proc, + bool print_all, bool hash_ptrs) +{ + struct binder_work *w; + struct rb_node *n; + size_t start_pos = m->count; + size_t header_pos; + struct binder_node *last_node = NULL; + + seq_printf(m, "proc %d\n", proc->pid); + seq_printf(m, "context %s\n", proc->context->name); + header_pos = m->count; + + binder_inner_proc_lock(proc); + for (n = rb_first(&proc->threads); n; n = rb_next(n)) + print_binder_thread_ilocked(m, rb_entry(n, struct binder_thread, + rb_node), print_all, hash_ptrs); + + for (n = rb_first(&proc->nodes); n; n = rb_next(n)) { + struct binder_node *node = rb_entry(n, struct binder_node, + rb_node); + if (!print_all && !node->has_async_transaction) + continue; + + last_node = print_next_binder_node_ilocked(m, proc, node, + last_node, + hash_ptrs); + } + binder_inner_proc_unlock(proc); + if (last_node) + binder_put_node(last_node); + + if (print_all) { + binder_proc_lock(proc); + for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) + print_binder_ref_olocked(m, rb_entry(n, + struct binder_ref, + rb_node_desc)); + binder_proc_unlock(proc); + } + binder_alloc_print_allocated(m, &proc->alloc); + binder_inner_proc_lock(proc); + list_for_each_entry(w, &proc->todo, entry) + print_binder_work_ilocked(m, proc, " ", + " pending transaction", w, + hash_ptrs); + list_for_each_entry(w, &proc->delivered_death, entry) { + seq_puts(m, " has delivered dead binder\n"); + break; + } + list_for_each_entry(w, &proc->delivered_freeze, entry) { + seq_puts(m, " has delivered freeze binder\n"); + break; + } + binder_inner_proc_unlock(proc); + if (!print_all && m->count == header_pos) + m->count = start_pos; +} + +static const char * const binder_return_strings[] = { + "BR_ERROR", + "BR_OK", + "BR_TRANSACTION", + "BR_REPLY", + "BR_ACQUIRE_RESULT", + "BR_DEAD_REPLY", + "BR_TRANSACTION_COMPLETE", + "BR_INCREFS", + "BR_ACQUIRE", + "BR_RELEASE", + "BR_DECREFS", + "BR_ATTEMPT_ACQUIRE", + "BR_NOOP", + "BR_SPAWN_LOOPER", + "BR_FINISHED", + "BR_DEAD_BINDER", + "BR_CLEAR_DEATH_NOTIFICATION_DONE", + "BR_FAILED_REPLY", + "BR_FROZEN_REPLY", + "BR_ONEWAY_SPAM_SUSPECT", + "BR_TRANSACTION_PENDING_FROZEN", + "BR_FROZEN_BINDER", + "BR_CLEAR_FREEZE_NOTIFICATION_DONE", +}; + +static const char * const binder_command_strings[] = { + "BC_TRANSACTION", + "BC_REPLY", + "BC_ACQUIRE_RESULT", + "BC_FREE_BUFFER", + "BC_INCREFS", + "BC_ACQUIRE", + "BC_RELEASE", + "BC_DECREFS", + "BC_INCREFS_DONE", + "BC_ACQUIRE_DONE", + "BC_ATTEMPT_ACQUIRE", + "BC_REGISTER_LOOPER", + "BC_ENTER_LOOPER", + "BC_EXIT_LOOPER", + "BC_REQUEST_DEATH_NOTIFICATION", + "BC_CLEAR_DEATH_NOTIFICATION", + "BC_DEAD_BINDER_DONE", + "BC_TRANSACTION_SG", + "BC_REPLY_SG", + "BC_REQUEST_FREEZE_NOTIFICATION", + "BC_CLEAR_FREEZE_NOTIFICATION", + "BC_FREEZE_NOTIFICATION_DONE", +}; + +static const char * const binder_objstat_strings[] = { + "proc", + "thread", + "node", + "ref", + "death", + "transaction", + "transaction_complete", + "freeze", +}; + +static void print_binder_stats(struct seq_file *m, const char *prefix, + struct binder_stats *stats) +{ + int i; + + BUILD_BUG_ON(ARRAY_SIZE(stats->bc) != + ARRAY_SIZE(binder_command_strings)); + for (i = 0; i < ARRAY_SIZE(stats->bc); i++) { + int temp = atomic_read(&stats->bc[i]); + + if (temp) + seq_printf(m, "%s%s: %d\n", prefix, + binder_command_strings[i], temp); + } + + BUILD_BUG_ON(ARRAY_SIZE(stats->br) != + ARRAY_SIZE(binder_return_strings)); + for (i = 0; i < ARRAY_SIZE(stats->br); i++) { + int temp = atomic_read(&stats->br[i]); + + if (temp) + seq_printf(m, "%s%s: %d\n", prefix, + binder_return_strings[i], temp); + } + + BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != + ARRAY_SIZE(binder_objstat_strings)); + BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != + ARRAY_SIZE(stats->obj_deleted)); + for (i = 0; i < ARRAY_SIZE(stats->obj_created); i++) { + int created = atomic_read(&stats->obj_created[i]); + int deleted = atomic_read(&stats->obj_deleted[i]); + + if (created || deleted) + seq_printf(m, "%s%s: active %d total %d\n", + prefix, + binder_objstat_strings[i], + created - deleted, + created); + } +} + +static void print_binder_proc_stats(struct seq_file *m, + struct binder_proc *proc) +{ + struct binder_work *w; + struct binder_thread *thread; + struct rb_node *n; + int count, strong, weak, ready_threads; + size_t free_async_space = + binder_alloc_get_free_async_space(&proc->alloc); + + seq_printf(m, "proc %d\n", proc->pid); + seq_printf(m, "context %s\n", proc->context->name); + count = 0; + ready_threads = 0; + binder_inner_proc_lock(proc); + for (n = rb_first(&proc->threads); n; n = rb_next(n)) + count++; + + list_for_each_entry(thread, &proc->waiting_threads, waiting_thread_node) + ready_threads++; + + seq_printf(m, " threads: %d\n", count); + seq_printf(m, " requested threads: %d+%d/%d\n" + " ready threads %d\n" + " free async space %zd\n", proc->requested_threads, + proc->requested_threads_started, proc->max_threads, + ready_threads, + free_async_space); + count = 0; + for (n = rb_first(&proc->nodes); n; n = rb_next(n)) + count++; + binder_inner_proc_unlock(proc); + seq_printf(m, " nodes: %d\n", count); + count = 0; + strong = 0; + weak = 0; + binder_proc_lock(proc); + for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) { + struct binder_ref *ref = rb_entry(n, struct binder_ref, + rb_node_desc); + count++; + strong += ref->data.strong; + weak += ref->data.weak; + } + binder_proc_unlock(proc); + seq_printf(m, " refs: %d s %d w %d\n", count, strong, weak); + + count = binder_alloc_get_allocated_count(&proc->alloc); + seq_printf(m, " buffers: %d\n", count); + + binder_alloc_print_pages(m, &proc->alloc); + + count = 0; + binder_inner_proc_lock(proc); + list_for_each_entry(w, &proc->todo, entry) { + if (w->type == BINDER_WORK_TRANSACTION) + count++; + } + binder_inner_proc_unlock(proc); + seq_printf(m, " pending transactions: %d\n", count); + + print_binder_stats(m, " ", &proc->stats); +} + +static void print_binder_state(struct seq_file *m, bool hash_ptrs) +{ + struct binder_proc *proc; + struct binder_node *node; + struct binder_node *last_node = NULL; + + seq_puts(m, "binder state:\n"); + + spin_lock(&binder_dead_nodes_lock); + if (!hlist_empty(&binder_dead_nodes)) + seq_puts(m, "dead nodes:\n"); + hlist_for_each_entry(node, &binder_dead_nodes, dead_node) + last_node = print_next_binder_node_ilocked(m, NULL, node, + last_node, + hash_ptrs); + spin_unlock(&binder_dead_nodes_lock); + if (last_node) + binder_put_node(last_node); + + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(proc, &binder_procs, proc_node) + print_binder_proc(m, proc, true, hash_ptrs); + mutex_unlock(&binder_procs_lock); +} + +static void print_binder_transactions(struct seq_file *m, bool hash_ptrs) +{ + struct binder_proc *proc; + + seq_puts(m, "binder transactions:\n"); + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(proc, &binder_procs, proc_node) + print_binder_proc(m, proc, false, hash_ptrs); + mutex_unlock(&binder_procs_lock); +} + +static int state_show(struct seq_file *m, void *unused) +{ + print_binder_state(m, false); + return 0; +} + +static int state_hashed_show(struct seq_file *m, void *unused) +{ + print_binder_state(m, true); + return 0; +} + +static int stats_show(struct seq_file *m, void *unused) +{ + struct binder_proc *proc; + + seq_puts(m, "binder stats:\n"); + + print_binder_stats(m, "", &binder_stats); + + mutex_lock(&binder_procs_lock); + hlist_for_each_entry(proc, &binder_procs, proc_node) + print_binder_proc_stats(m, proc); + mutex_unlock(&binder_procs_lock); + + return 0; +} + +static int transactions_show(struct seq_file *m, void *unused) +{ + print_binder_transactions(m, false); + return 0; +} + +static int transactions_hashed_show(struct seq_file *m, void *unused) +{ + print_binder_transactions(m, true); + return 0; +} + +static int proc_show(struct seq_file *m, void *unused) +{ + struct binder_proc *itr; + int pid = (unsigned long)m->private; + + guard(mutex)(&binder_procs_lock); + hlist_for_each_entry(itr, &binder_procs, proc_node) { + if (itr->pid == pid) { + seq_puts(m, "binder proc state:\n"); + print_binder_proc(m, itr, true, false); + } + } + + return 0; +} + +static void print_binder_transaction_log_entry(struct seq_file *m, + struct binder_transaction_log_entry *e) +{ + int debug_id = READ_ONCE(e->debug_id_done); + /* + * read barrier to guarantee debug_id_done read before + * we print the log values + */ + smp_rmb(); + seq_printf(m, + "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d", + e->debug_id, (e->call_type == 2) ? "reply" : + ((e->call_type == 1) ? "async" : "call "), e->from_proc, + e->from_thread, e->to_proc, e->to_thread, e->context_name, + e->to_node, e->target_handle, e->data_size, e->offsets_size, + e->return_error, e->return_error_param, + e->return_error_line); + /* + * read-barrier to guarantee read of debug_id_done after + * done printing the fields of the entry + */ + smp_rmb(); + seq_printf(m, debug_id && debug_id == READ_ONCE(e->debug_id_done) ? + "\n" : " (incomplete)\n"); +} + +static int transaction_log_show(struct seq_file *m, void *unused) +{ + struct binder_transaction_log *log = m->private; + unsigned int log_cur = atomic_read(&log->cur); + unsigned int count; + unsigned int cur; + int i; + + count = log_cur + 1; + cur = count < ARRAY_SIZE(log->entry) && !log->full ? + 0 : count % ARRAY_SIZE(log->entry); + if (count > ARRAY_SIZE(log->entry) || log->full) + count = ARRAY_SIZE(log->entry); + for (i = 0; i < count; i++) { + unsigned int index = cur++ % ARRAY_SIZE(log->entry); + + print_binder_transaction_log_entry(m, &log->entry[index]); + } + return 0; +} + +const struct file_operations binder_fops = { + .owner = THIS_MODULE, + .poll = binder_poll, + .unlocked_ioctl = binder_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .mmap = binder_mmap, + .open = binder_open, + .flush = binder_flush, + .release = binder_release, +}; + +DEFINE_SHOW_ATTRIBUTE(state); +DEFINE_SHOW_ATTRIBUTE(state_hashed); +DEFINE_SHOW_ATTRIBUTE(stats); +DEFINE_SHOW_ATTRIBUTE(transactions); +DEFINE_SHOW_ATTRIBUTE(transactions_hashed); +DEFINE_SHOW_ATTRIBUTE(transaction_log); + +const struct binder_debugfs_entry binder_debugfs_entries[] = { + { + .name = "state", + .mode = 0444, + .fops = &state_fops, + .data = NULL, + }, + { + .name = "state_hashed", + .mode = 0444, + .fops = &state_hashed_fops, + .data = NULL, + }, + { + .name = "stats", + .mode = 0444, + .fops = &stats_fops, + .data = NULL, + }, + { + .name = "transactions", + .mode = 0444, + .fops = &transactions_fops, + .data = NULL, + }, + { + .name = "transactions_hashed", + .mode = 0444, + .fops = &transactions_hashed_fops, + .data = NULL, + }, + { + .name = "transaction_log", + .mode = 0444, + .fops = &transaction_log_fops, + .data = &binder_transaction_log, + }, + { + .name = "failed_transaction_log", + .mode = 0444, + .fops = &transaction_log_fops, + .data = &binder_transaction_log_failed, + }, + {} /* terminator */ +}; + +void binder_add_device(struct binder_device *device) +{ + guard(spinlock)(&binder_devices_lock); + hlist_add_head(&device->hlist, &binder_devices); +} + +void binder_remove_device(struct binder_device *device) +{ + guard(spinlock)(&binder_devices_lock); + hlist_del_init(&device->hlist); +} + +static int __init init_binder_device(const char *name) +{ + int ret; + struct binder_device *binder_device; + + binder_device = kzalloc(sizeof(*binder_device), GFP_KERNEL); + if (!binder_device) + return -ENOMEM; + + binder_device->miscdev.fops = &binder_fops; + binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; + binder_device->miscdev.name = name; + + refcount_set(&binder_device->ref, 1); + binder_device->context.binder_context_mgr_uid = INVALID_UID; + binder_device->context.name = name; + mutex_init(&binder_device->context.context_mgr_node_lock); + + ret = misc_register(&binder_device->miscdev); + if (ret < 0) { + kfree(binder_device); + return ret; + } + + binder_add_device(binder_device); + + return ret; +} + +static int __init binder_init(void) +{ + int ret; + char *device_name, *device_tmp; + struct binder_device *device; + struct hlist_node *tmp; + char *device_names = NULL; + const struct binder_debugfs_entry *db_entry; + + ret = binder_alloc_shrinker_init(); + if (ret) + return ret; + + atomic_set(&binder_transaction_log.cur, ~0U); + atomic_set(&binder_transaction_log_failed.cur, ~0U); + + binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); + + binder_for_each_debugfs_entry(db_entry) + debugfs_create_file(db_entry->name, + db_entry->mode, + binder_debugfs_dir_entry_root, + db_entry->data, + db_entry->fops); + + binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", + binder_debugfs_dir_entry_root); + + if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) && + strcmp(binder_devices_param, "") != 0) { + /* + * Copy the module_parameter string, because we don't want to + * tokenize it in-place. + */ + device_names = kstrdup(binder_devices_param, GFP_KERNEL); + if (!device_names) { + ret = -ENOMEM; + goto err_alloc_device_names_failed; + } + + device_tmp = device_names; + while ((device_name = strsep(&device_tmp, ","))) { + ret = init_binder_device(device_name); + if (ret) + goto err_init_binder_device_failed; + } + } + + ret = genl_register_family(&binder_nl_family); + if (ret) + goto err_init_binder_device_failed; + + ret = init_binderfs(); + if (ret) + goto err_init_binderfs_failed; + + return ret; + +err_init_binderfs_failed: + genl_unregister_family(&binder_nl_family); + +err_init_binder_device_failed: + hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) { + misc_deregister(&device->miscdev); + binder_remove_device(device); + kfree(device); + } + + kfree(device_names); + +err_alloc_device_names_failed: + debugfs_remove_recursive(binder_debugfs_dir_entry_root); + binder_alloc_shrinker_exit(); + + return ret; +} + +device_initcall(binder_init); + +#define CREATE_TRACE_POINTS +#include "binder_trace.h" diff --git a/drivers/android/binder/Makefile b/drivers/android/binder/Makefile new file mode 100644 index 000000000000..09eabb527fa0 --- /dev/null +++ b/drivers/android/binder/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +ccflags-y += -I$(src) # needed for trace events + +obj-$(CONFIG_ANDROID_BINDER_IPC_RUST) += rust_binder.o +rust_binder-y := \ + rust_binder_main.o \ + rust_binderfs.o \ + rust_binder_events.o \ + page_range_helper.o diff --git a/drivers/android/binder/allocation.rs b/drivers/android/binder/allocation.rs new file mode 100644 index 000000000000..7f65a9c3a0e5 --- /dev/null +++ b/drivers/android/binder/allocation.rs @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use core::mem::{size_of, size_of_val, MaybeUninit}; +use core::ops::Range; + +use kernel::{ + bindings, + fs::file::{File, FileDescriptorReservation}, + prelude::*, + sync::{aref::ARef, Arc}, + transmute::{AsBytes, FromBytes}, + uaccess::UserSliceReader, + uapi, +}; + +use crate::{ + deferred_close::DeferredFdCloser, + defs::*, + node::{Node, NodeRef}, + process::Process, + DArc, +}; + +#[derive(Default)] +pub(crate) struct AllocationInfo { + /// Range within the allocation where we can find the offsets to the object descriptors. + pub(crate) offsets: Option<Range<usize>>, + /// The target node of the transaction this allocation is associated to. + /// Not set for replies. + pub(crate) target_node: Option<NodeRef>, + /// When this allocation is dropped, call `pending_oneway_finished` on the node. + /// + /// This is used to serialize oneway transaction on the same node. Binder guarantees that + /// oneway transactions to the same node are delivered sequentially in the order they are sent. + pub(crate) oneway_node: Option<DArc<Node>>, + /// Zero the data in the buffer on free. + pub(crate) clear_on_free: bool, + /// List of files embedded in this transaction. + file_list: FileList, +} + +/// Represents an allocation that the kernel is currently using. +/// +/// When allocations are idle, the range allocator holds the data related to them. +/// +/// # Invariants +/// +/// This allocation corresponds to an allocation in the range allocator, so the relevant pages are +/// marked in use in the page range. +pub(crate) struct Allocation { + pub(crate) offset: usize, + size: usize, + pub(crate) ptr: usize, + pub(crate) process: Arc<Process>, + allocation_info: Option<AllocationInfo>, + free_on_drop: bool, + pub(crate) oneway_spam_detected: bool, + #[allow(dead_code)] + pub(crate) debug_id: usize, +} + +impl Allocation { + pub(crate) fn new( + process: Arc<Process>, + debug_id: usize, + offset: usize, + size: usize, + ptr: usize, + oneway_spam_detected: bool, + ) -> Self { + Self { + process, + offset, + size, + ptr, + debug_id, + oneway_spam_detected, + allocation_info: None, + free_on_drop: true, + } + } + + fn size_check(&self, offset: usize, size: usize) -> Result { + let overflow_fail = offset.checked_add(size).is_none(); + let cmp_size_fail = offset.wrapping_add(size) > self.size; + if overflow_fail || cmp_size_fail { + return Err(EFAULT); + } + Ok(()) + } + + pub(crate) fn copy_into( + &self, + reader: &mut UserSliceReader, + offset: usize, + size: usize, + ) -> Result { + self.size_check(offset, size)?; + + // SAFETY: While this object exists, the range allocator will keep the range allocated, and + // in turn, the pages will be marked as in use. + unsafe { + self.process + .pages + .copy_from_user_slice(reader, self.offset + offset, size) + } + } + + pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { + self.size_check(offset, size_of::<T>())?; + + // SAFETY: While this object exists, the range allocator will keep the range allocated, and + // in turn, the pages will be marked as in use. + unsafe { self.process.pages.read(self.offset + offset) } + } + + pub(crate) fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result { + self.size_check(offset, size_of_val::<T>(obj))?; + + // SAFETY: While this object exists, the range allocator will keep the range allocated, and + // in turn, the pages will be marked as in use. + unsafe { self.process.pages.write(self.offset + offset, obj) } + } + + pub(crate) fn fill_zero(&self) -> Result { + // SAFETY: While this object exists, the range allocator will keep the range allocated, and + // in turn, the pages will be marked as in use. + unsafe { self.process.pages.fill_zero(self.offset, self.size) } + } + + pub(crate) fn keep_alive(mut self) { + self.process + .buffer_make_freeable(self.offset, self.allocation_info.take()); + self.free_on_drop = false; + } + + pub(crate) fn set_info(&mut self, info: AllocationInfo) { + self.allocation_info = Some(info); + } + + pub(crate) fn get_or_init_info(&mut self) -> &mut AllocationInfo { + self.allocation_info.get_or_insert_with(Default::default) + } + + pub(crate) fn set_info_offsets(&mut self, offsets: Range<usize>) { + self.get_or_init_info().offsets = Some(offsets); + } + + pub(crate) fn set_info_oneway_node(&mut self, oneway_node: DArc<Node>) { + self.get_or_init_info().oneway_node = Some(oneway_node); + } + + pub(crate) fn set_info_clear_on_drop(&mut self) { + self.get_or_init_info().clear_on_free = true; + } + + pub(crate) fn set_info_target_node(&mut self, target_node: NodeRef) { + self.get_or_init_info().target_node = Some(target_node); + } + + /// Reserve enough space to push at least `num_fds` fds. + pub(crate) fn info_add_fd_reserve(&mut self, num_fds: usize) -> Result { + self.get_or_init_info() + .file_list + .files_to_translate + .reserve(num_fds, GFP_KERNEL)?; + + Ok(()) + } + + pub(crate) fn info_add_fd( + &mut self, + file: ARef<File>, + buffer_offset: usize, + close_on_free: bool, + ) -> Result { + self.get_or_init_info().file_list.files_to_translate.push( + FileEntry { + file, + buffer_offset, + close_on_free, + }, + GFP_KERNEL, + )?; + + Ok(()) + } + + pub(crate) fn set_info_close_on_free(&mut self, cof: FdsCloseOnFree) { + self.get_or_init_info().file_list.close_on_free = cof.0; + } + + pub(crate) fn translate_fds(&mut self) -> Result<TranslatedFds> { + let file_list = match self.allocation_info.as_mut() { + Some(info) => &mut info.file_list, + None => return Ok(TranslatedFds::new()), + }; + + let files = core::mem::take(&mut file_list.files_to_translate); + + let num_close_on_free = files.iter().filter(|entry| entry.close_on_free).count(); + let mut close_on_free = KVec::with_capacity(num_close_on_free, GFP_KERNEL)?; + + let mut reservations = KVec::with_capacity(files.len(), GFP_KERNEL)?; + for file_info in files { + let res = FileDescriptorReservation::get_unused_fd_flags(bindings::O_CLOEXEC)?; + let fd = res.reserved_fd(); + self.write::<u32>(file_info.buffer_offset, &fd)?; + + reservations.push( + Reservation { + res, + file: file_info.file, + }, + GFP_KERNEL, + )?; + if file_info.close_on_free { + close_on_free.push(fd, GFP_KERNEL)?; + } + } + + Ok(TranslatedFds { + reservations, + close_on_free: FdsCloseOnFree(close_on_free), + }) + } + + /// Should the looper return to userspace when freeing this allocation? + pub(crate) fn looper_need_return_on_free(&self) -> bool { + // Closing fds involves pushing task_work for execution when we return to userspace. Hence, + // we should return to userspace asap if we are closing fds. + match self.allocation_info { + Some(ref info) => !info.file_list.close_on_free.is_empty(), + None => false, + } + } +} + +impl Drop for Allocation { + fn drop(&mut self) { + if !self.free_on_drop { + return; + } + + if let Some(mut info) = self.allocation_info.take() { + if let Some(oneway_node) = info.oneway_node.as_ref() { + oneway_node.pending_oneway_finished(); + } + + info.target_node = None; + + if let Some(offsets) = info.offsets.clone() { + let view = AllocationView::new(self, offsets.start); + for i in offsets.step_by(size_of::<usize>()) { + if view.cleanup_object(i).is_err() { + pr_warn!("Error cleaning up object at offset {}\n", i) + } + } + } + + for &fd in &info.file_list.close_on_free { + let closer = match DeferredFdCloser::new(GFP_KERNEL) { + Ok(closer) => closer, + Err(kernel::alloc::AllocError) => { + // Ignore allocation failures. + break; + } + }; + + // Here, we ignore errors. The operation can fail if the fd is not valid, or if the + // method is called from a kthread. However, this is always called from a syscall, + // so the latter case cannot happen, and we don't care about the first case. + let _ = closer.close_fd(fd); + } + + if info.clear_on_free { + if let Err(e) = self.fill_zero() { + pr_warn!("Failed to clear data on free: {:?}", e); + } + } + } + + self.process.buffer_raw_free(self.ptr); + } +} + +/// A wrapper around `Allocation` that is being created. +/// +/// If the allocation is destroyed while wrapped in this wrapper, then the allocation will be +/// considered to be part of a failed transaction. Successful transactions avoid that by calling +/// `success`, which skips the destructor. +#[repr(transparent)] +pub(crate) struct NewAllocation(pub(crate) Allocation); + +impl NewAllocation { + pub(crate) fn success(self) -> Allocation { + // This skips the destructor. + // + // SAFETY: This type is `#[repr(transparent)]`, so the layout matches. + unsafe { core::mem::transmute(self) } + } +} + +impl core::ops::Deref for NewAllocation { + type Target = Allocation; + fn deref(&self) -> &Allocation { + &self.0 + } +} + +impl core::ops::DerefMut for NewAllocation { + fn deref_mut(&mut self) -> &mut Allocation { + &mut self.0 + } +} + +/// A view into the beginning of an allocation. +/// +/// All attempts to read or write outside of the view will fail. To intentionally access outside of +/// this view, use the `alloc` field of this struct directly. +pub(crate) struct AllocationView<'a> { + pub(crate) alloc: &'a mut Allocation, + limit: usize, +} + +impl<'a> AllocationView<'a> { + pub(crate) fn new(alloc: &'a mut Allocation, limit: usize) -> Self { + AllocationView { alloc, limit } + } + + pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { + if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit { + return Err(EINVAL); + } + self.alloc.read(offset) + } + + pub(crate) fn write<T: AsBytes>(&self, offset: usize, obj: &T) -> Result { + if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit { + return Err(EINVAL); + } + self.alloc.write(offset, obj) + } + + pub(crate) fn copy_into( + &self, + reader: &mut UserSliceReader, + offset: usize, + size: usize, + ) -> Result { + if offset.checked_add(size).ok_or(EINVAL)? > self.limit { + return Err(EINVAL); + } + self.alloc.copy_into(reader, offset, size) + } + + pub(crate) fn transfer_binder_object( + &self, + offset: usize, + obj: &uapi::flat_binder_object, + strong: bool, + node_ref: NodeRef, + ) -> Result { + let mut newobj = FlatBinderObject::default(); + let node = node_ref.node.clone(); + if Arc::ptr_eq(&node_ref.node.owner, &self.alloc.process) { + // The receiving process is the owner of the node, so send it a binder object (instead + // of a handle). + let (ptr, cookie) = node.get_id(); + newobj.hdr.type_ = if strong { + BINDER_TYPE_BINDER + } else { + BINDER_TYPE_WEAK_BINDER + }; + newobj.flags = obj.flags; + newobj.__bindgen_anon_1.binder = ptr as _; + newobj.cookie = cookie as _; + self.write(offset, &newobj)?; + // Increment the user ref count on the node. It will be decremented as part of the + // destruction of the buffer, when we see a binder or weak-binder object. + node.update_refcount(true, 1, strong); + } else { + // The receiving process is different from the owner, so we need to insert a handle to + // the binder object. + let handle = self + .alloc + .process + .as_arc_borrow() + .insert_or_update_handle(node_ref, false)?; + newobj.hdr.type_ = if strong { + BINDER_TYPE_HANDLE + } else { + BINDER_TYPE_WEAK_HANDLE + }; + newobj.flags = obj.flags; + newobj.__bindgen_anon_1.handle = handle; + if self.write(offset, &newobj).is_err() { + // Decrement ref count on the handle we just created. + let _ = self + .alloc + .process + .as_arc_borrow() + .update_ref(handle, false, strong); + return Err(EINVAL); + } + } + + Ok(()) + } + + fn cleanup_object(&self, index_offset: usize) -> Result { + let offset = self.alloc.read(index_offset)?; + let header = self.read::<BinderObjectHeader>(offset)?; + match header.type_ { + BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => { + let obj = self.read::<FlatBinderObject>(offset)?; + let strong = header.type_ == BINDER_TYPE_BINDER; + // SAFETY: The type is `BINDER_TYPE_{WEAK_}BINDER`, so the `binder` field is + // populated. + let ptr = unsafe { obj.__bindgen_anon_1.binder }; + let cookie = obj.cookie; + self.alloc.process.update_node(ptr, cookie, strong); + Ok(()) + } + BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => { + let obj = self.read::<FlatBinderObject>(offset)?; + let strong = header.type_ == BINDER_TYPE_HANDLE; + // SAFETY: The type is `BINDER_TYPE_{WEAK_}HANDLE`, so the `handle` field is + // populated. + let handle = unsafe { obj.__bindgen_anon_1.handle }; + self.alloc + .process + .as_arc_borrow() + .update_ref(handle, false, strong) + } + _ => Ok(()), + } + } +} + +/// A binder object as it is serialized. +/// +/// # Invariants +/// +/// All bytes must be initialized, and the value of `self.hdr.type_` must be one of the allowed +/// types. +#[repr(C)] +pub(crate) union BinderObject { + hdr: uapi::binder_object_header, + fbo: uapi::flat_binder_object, + fdo: uapi::binder_fd_object, + bbo: uapi::binder_buffer_object, + fdao: uapi::binder_fd_array_object, +} + +/// A view into a `BinderObject` that can be used in a match statement. +pub(crate) enum BinderObjectRef<'a> { + Binder(&'a mut uapi::flat_binder_object), + Handle(&'a mut uapi::flat_binder_object), + Fd(&'a mut uapi::binder_fd_object), + Ptr(&'a mut uapi::binder_buffer_object), + Fda(&'a mut uapi::binder_fd_array_object), +} + +impl BinderObject { + pub(crate) fn read_from(reader: &mut UserSliceReader) -> Result<BinderObject> { + let object = Self::read_from_inner(|slice| { + let read_len = usize::min(slice.len(), reader.len()); + reader.clone_reader().read_slice(&mut slice[..read_len])?; + Ok(()) + })?; + + // If we used a object type smaller than the largest object size, then we've read more + // bytes than we needed to. However, we used `.clone_reader()` to avoid advancing the + // original reader. Now, we call `skip` so that the caller's reader is advanced by the + // right amount. + // + // The `skip` call fails if the reader doesn't have `size` bytes available. This could + // happen if the type header corresponds to an object type that is larger than the rest of + // the reader. + // + // Any extra bytes beyond the size of the object are inaccessible after this call, so + // reading them again from the `reader` later does not result in TOCTOU bugs. + reader.skip(object.size())?; + + Ok(object) + } + + /// Use the provided reader closure to construct a `BinderObject`. + /// + /// The closure should write the bytes for the object into the provided slice. + pub(crate) fn read_from_inner<R>(reader: R) -> Result<BinderObject> + where + R: FnOnce(&mut [u8; size_of::<BinderObject>()]) -> Result<()>, + { + let mut obj = MaybeUninit::<BinderObject>::zeroed(); + + // SAFETY: The lengths of `BinderObject` and `[u8; size_of::<BinderObject>()]` are equal, + // and the byte array has an alignment requirement of one, so the pointer cast is okay. + // Additionally, `obj` was initialized to zeros, so the byte array will not be + // uninitialized. + (reader)(unsafe { &mut *obj.as_mut_ptr().cast() })?; + + // SAFETY: The entire object is initialized, so accessing this field is safe. + let type_ = unsafe { obj.assume_init_ref().hdr.type_ }; + if Self::type_to_size(type_).is_none() { + // The value of `obj.hdr_type_` was invalid. + return Err(EINVAL); + } + + // SAFETY: All bytes are initialized (since we zeroed them at the start) and we checked + // that `self.hdr.type_` is one of the allowed types, so the type invariants are satisfied. + unsafe { Ok(obj.assume_init()) } + } + + pub(crate) fn as_ref(&mut self) -> BinderObjectRef<'_> { + use BinderObjectRef::*; + // SAFETY: The constructor ensures that all bytes of `self` are initialized, and all + // variants of this union accept all initialized bit patterns. + unsafe { + match self.hdr.type_ { + BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => Binder(&mut self.fbo), + BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => Handle(&mut self.fbo), + BINDER_TYPE_FD => Fd(&mut self.fdo), + BINDER_TYPE_PTR => Ptr(&mut self.bbo), + BINDER_TYPE_FDA => Fda(&mut self.fdao), + // SAFETY: By the type invariant, the value of `self.hdr.type_` cannot have any + // other value than the ones checked above. + _ => core::hint::unreachable_unchecked(), + } + } + } + + pub(crate) fn size(&self) -> usize { + // SAFETY: The entire object is initialized, so accessing this field is safe. + let type_ = unsafe { self.hdr.type_ }; + + // SAFETY: The type invariants guarantee that the type field is correct. + unsafe { Self::type_to_size(type_).unwrap_unchecked() } + } + + fn type_to_size(type_: u32) -> Option<usize> { + match type_ { + BINDER_TYPE_WEAK_BINDER => Some(size_of::<uapi::flat_binder_object>()), + BINDER_TYPE_BINDER => Some(size_of::<uapi::flat_binder_object>()), + BINDER_TYPE_WEAK_HANDLE => Some(size_of::<uapi::flat_binder_object>()), + BINDER_TYPE_HANDLE => Some(size_of::<uapi::flat_binder_object>()), + BINDER_TYPE_FD => Some(size_of::<uapi::binder_fd_object>()), + BINDER_TYPE_PTR => Some(size_of::<uapi::binder_buffer_object>()), + BINDER_TYPE_FDA => Some(size_of::<uapi::binder_fd_array_object>()), + _ => None, + } + } +} + +#[derive(Default)] +struct FileList { + files_to_translate: KVec<FileEntry>, + close_on_free: KVec<u32>, +} + +struct FileEntry { + /// The file for which a descriptor will be created in the recipient process. + file: ARef<File>, + /// The offset in the buffer where the file descriptor is stored. + buffer_offset: usize, + /// Whether this fd should be closed when the allocation is freed. + close_on_free: bool, +} + +pub(crate) struct TranslatedFds { + reservations: KVec<Reservation>, + /// If commit is called, then these fds should be closed. (If commit is not called, then they + /// shouldn't be closed.) + close_on_free: FdsCloseOnFree, +} + +struct Reservation { + res: FileDescriptorReservation, + file: ARef<File>, +} + +impl TranslatedFds { + pub(crate) fn new() -> Self { + Self { + reservations: KVec::new(), + close_on_free: FdsCloseOnFree(KVec::new()), + } + } + + pub(crate) fn commit(self) -> FdsCloseOnFree { + for entry in self.reservations { + entry.res.fd_install(entry.file); + } + + self.close_on_free + } +} + +pub(crate) struct FdsCloseOnFree(KVec<u32>); diff --git a/drivers/android/binder/context.rs b/drivers/android/binder/context.rs new file mode 100644 index 000000000000..3d135ec03ca7 --- /dev/null +++ b/drivers/android/binder/context.rs @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use kernel::{ + error::Error, + list::{List, ListArc, ListLinks}, + prelude::*, + security, + str::{CStr, CString}, + sync::{Arc, Mutex}, + task::Kuid, +}; + +use crate::{error::BinderError, node::NodeRef, process::Process}; + +kernel::sync::global_lock! { + // SAFETY: We call `init` in the module initializer, so it's initialized before first use. + pub(crate) unsafe(uninit) static CONTEXTS: Mutex<ContextList> = ContextList { + list: List::new(), + }; +} + +pub(crate) struct ContextList { + list: List<Context>, +} + +pub(crate) fn get_all_contexts() -> Result<KVec<Arc<Context>>> { + let lock = CONTEXTS.lock(); + + let count = lock.list.iter().count(); + + let mut ctxs = KVec::with_capacity(count, GFP_KERNEL)?; + for ctx in &lock.list { + ctxs.push(Arc::from(ctx), GFP_KERNEL)?; + } + Ok(ctxs) +} + +/// This struct keeps track of the processes using this context, and which process is the context +/// manager. +struct Manager { + node: Option<NodeRef>, + uid: Option<Kuid>, + all_procs: List<Process>, +} + +/// There is one context per binder file (/dev/binder, /dev/hwbinder, etc) +#[pin_data] +pub(crate) struct Context { + #[pin] + manager: Mutex<Manager>, + pub(crate) name: CString, + #[pin] + links: ListLinks, +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for Context { untracked; } +} +kernel::list::impl_list_item! { + impl ListItem<0> for Context { + using ListLinks { self.links }; + } +} + +impl Context { + pub(crate) fn new(name: &CStr) -> Result<Arc<Self>> { + let name = CString::try_from(name)?; + let list_ctx = ListArc::pin_init::<Error>( + try_pin_init!(Context { + name, + links <- ListLinks::new(), + manager <- kernel::new_mutex!(Manager { + all_procs: List::new(), + node: None, + uid: None, + }, "Context::manager"), + }), + GFP_KERNEL, + )?; + + let ctx = list_ctx.clone_arc(); + CONTEXTS.lock().list.push_back(list_ctx); + + Ok(ctx) + } + + /// Called when the file for this context is unlinked. + /// + /// No-op if called twice. + pub(crate) fn deregister(&self) { + // SAFETY: We never add the context to any other linked list than this one, so it is either + // in this list, or not in any list. + unsafe { CONTEXTS.lock().list.remove(self) }; + } + + pub(crate) fn register_process(self: &Arc<Self>, proc: ListArc<Process>) { + if !Arc::ptr_eq(self, &proc.ctx) { + pr_err!("Context::register_process called on the wrong context."); + return; + } + self.manager.lock().all_procs.push_back(proc); + } + + pub(crate) fn deregister_process(self: &Arc<Self>, proc: &Process) { + if !Arc::ptr_eq(self, &proc.ctx) { + pr_err!("Context::deregister_process called on the wrong context."); + return; + } + // SAFETY: We just checked that this is the right list. + unsafe { self.manager.lock().all_procs.remove(proc) }; + } + + pub(crate) fn set_manager_node(&self, node_ref: NodeRef) -> Result { + let mut manager = self.manager.lock(); + if manager.node.is_some() { + pr_warn!("BINDER_SET_CONTEXT_MGR already set"); + return Err(EBUSY); + } + security::binder_set_context_mgr(&node_ref.node.owner.cred)?; + + // If the context manager has been set before, ensure that we use the same euid. + let caller_uid = Kuid::current_euid(); + if let Some(ref uid) = manager.uid { + if *uid != caller_uid { + return Err(EPERM); + } + } + + manager.node = Some(node_ref); + manager.uid = Some(caller_uid); + Ok(()) + } + + pub(crate) fn unset_manager_node(&self) { + let node_ref = self.manager.lock().node.take(); + drop(node_ref); + } + + pub(crate) fn get_manager_node(&self, strong: bool) -> Result<NodeRef, BinderError> { + self.manager + .lock() + .node + .as_ref() + .ok_or_else(BinderError::new_dead)? + .clone(strong) + .map_err(BinderError::from) + } + + pub(crate) fn for_each_proc<F>(&self, mut func: F) + where + F: FnMut(&Process), + { + let lock = self.manager.lock(); + for proc in &lock.all_procs { + func(&proc); + } + } + + pub(crate) fn get_all_procs(&self) -> Result<KVec<Arc<Process>>> { + let lock = self.manager.lock(); + let count = lock.all_procs.iter().count(); + + let mut procs = KVec::with_capacity(count, GFP_KERNEL)?; + for proc in &lock.all_procs { + procs.push(Arc::from(proc), GFP_KERNEL)?; + } + Ok(procs) + } + + pub(crate) fn get_procs_with_pid(&self, pid: i32) -> Result<KVec<Arc<Process>>> { + let orig = self.get_all_procs()?; + let mut backing = KVec::with_capacity(orig.len(), GFP_KERNEL)?; + for proc in orig.into_iter().filter(|proc| proc.task.pid() == pid) { + backing.push(proc, GFP_KERNEL)?; + } + Ok(backing) + } +} diff --git a/drivers/android/binder/deferred_close.rs b/drivers/android/binder/deferred_close.rs new file mode 100644 index 000000000000..ac895c04d0cb --- /dev/null +++ b/drivers/android/binder/deferred_close.rs @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! Logic for closing files in a deferred manner. +//! +//! This file could make sense to have in `kernel::fs`, but it was rejected for being too +//! Binder-specific. + +use core::mem::MaybeUninit; +use kernel::{ + alloc::{AllocError, Flags}, + bindings, + prelude::*, +}; + +/// Helper used for closing file descriptors in a way that is safe even if the file is currently +/// held using `fdget`. +/// +/// Additional motivation can be found in commit 80cd795630d6 ("binder: fix use-after-free due to +/// ksys_close() during fdget()") and in the comments on `binder_do_fd_close`. +pub(crate) struct DeferredFdCloser { + inner: KBox<DeferredFdCloserInner>, +} + +/// SAFETY: This just holds an allocation with no real content, so there's no safety issue with +/// moving it across threads. +unsafe impl Send for DeferredFdCloser {} +/// SAFETY: This just holds an allocation with no real content, so there's no safety issue with +/// moving it across threads. +unsafe impl Sync for DeferredFdCloser {} + +/// # Invariants +/// +/// If the `file` pointer is non-null, then it points at a `struct file` and owns a refcount to +/// that file. +#[repr(C)] +struct DeferredFdCloserInner { + twork: MaybeUninit<bindings::callback_head>, + file: *mut bindings::file, +} + +impl DeferredFdCloser { + /// Create a new [`DeferredFdCloser`]. + pub(crate) fn new(flags: Flags) -> Result<Self, AllocError> { + Ok(Self { + // INVARIANT: The `file` pointer is null, so the type invariant does not apply. + inner: KBox::new( + DeferredFdCloserInner { + twork: MaybeUninit::uninit(), + file: core::ptr::null_mut(), + }, + flags, + )?, + }) + } + + /// Schedule a task work that closes the file descriptor when this task returns to userspace. + /// + /// Fails if this is called from a context where we cannot run work when returning to + /// userspace. (E.g., from a kthread.) + pub(crate) fn close_fd(self, fd: u32) -> Result<(), DeferredFdCloseError> { + use bindings::task_work_notify_mode_TWA_RESUME as TWA_RESUME; + + // In this method, we schedule the task work before closing the file. This is because + // scheduling a task work is fallible, and we need to know whether it will fail before we + // attempt to close the file. + + // Task works are not available on kthreads. + let current = kernel::current!(); + + // Check if this is a kthread. + // SAFETY: Reading `flags` from a task is always okay. + if unsafe { ((*current.as_ptr()).flags & bindings::PF_KTHREAD) != 0 } { + return Err(DeferredFdCloseError::TaskWorkUnavailable); + } + + // Transfer ownership of the box's allocation to a raw pointer. This disables the + // destructor, so we must manually convert it back to a KBox to drop it. + // + // Until we convert it back to a `KBox`, there are no aliasing requirements on this + // pointer. + let inner = KBox::into_raw(self.inner); + + // The `callback_head` field is first in the struct, so this cast correctly gives us a + // pointer to the field. + let callback_head = inner.cast::<bindings::callback_head>(); + // SAFETY: This pointer offset operation does not go out-of-bounds. + let file_field = unsafe { core::ptr::addr_of_mut!((*inner).file) }; + + let current = current.as_ptr(); + + // SAFETY: This function currently has exclusive access to the `DeferredFdCloserInner`, so + // it is okay for us to perform unsynchronized writes to its `callback_head` field. + unsafe { bindings::init_task_work(callback_head, Some(Self::do_close_fd)) }; + + // SAFETY: This inserts the `DeferredFdCloserInner` into the task workqueue for the current + // task. If this operation is successful, then this transfers exclusive ownership of the + // `callback_head` field to the C side until it calls `do_close_fd`, and we don't touch or + // invalidate the field during that time. + // + // When the C side calls `do_close_fd`, the safety requirements of that method are + // satisfied because when a task work is executed, the callback is given ownership of the + // pointer. + // + // The file pointer is currently null. If it is changed to be non-null before `do_close_fd` + // is called, then that change happens due to the write at the end of this function, and + // that write has a safety comment that explains why the refcount can be dropped when + // `do_close_fd` runs. + let res = unsafe { bindings::task_work_add(current, callback_head, TWA_RESUME) }; + + if res != 0 { + // SAFETY: Scheduling the task work failed, so we still have ownership of the box, so + // we may destroy it. + unsafe { drop(KBox::from_raw(inner)) }; + + return Err(DeferredFdCloseError::TaskWorkUnavailable); + } + + // This removes the fd from the fd table in `current`. The file is not fully closed until + // `filp_close` is called. We are given ownership of one refcount to the file. + // + // SAFETY: This is safe no matter what `fd` is. If the `fd` is valid (that is, if the + // pointer is non-null), then we call `filp_close` on the returned pointer as required by + // `file_close_fd`. + let file = unsafe { bindings::file_close_fd(fd) }; + if file.is_null() { + // We don't clean up the task work since that might be expensive if the task work queue + // is long. Just let it execute and let it clean up for itself. + return Err(DeferredFdCloseError::BadFd); + } + + // Acquire a second refcount to the file. + // + // SAFETY: The `file` pointer points at a file with a non-zero refcount. + unsafe { bindings::get_file(file) }; + + // This method closes the fd, consuming one of our two refcounts. There could be active + // light refcounts created from that fd, so we must ensure that the file has a positive + // refcount for the duration of those active light refcounts. We do that by holding on to + // the second refcount until the current task returns to userspace. + // + // SAFETY: The `file` pointer is valid. Passing `current->files` as the file table to close + // it in is correct, since we just got the `fd` from `file_close_fd` which also uses + // `current->files`. + // + // Note: fl_owner_t is currently a void pointer. + unsafe { bindings::filp_close(file, (*current).files as bindings::fl_owner_t) }; + + // We update the file pointer that the task work is supposed to fput. This transfers + // ownership of our last refcount. + // + // INVARIANT: This changes the `file` field of a `DeferredFdCloserInner` from null to + // non-null. This doesn't break the type invariant for `DeferredFdCloserInner` because we + // still own a refcount to the file, so we can pass ownership of that refcount to the + // `DeferredFdCloserInner`. + // + // When `do_close_fd` runs, it must be safe for it to `fput` the refcount. However, this is + // the case because all light refcounts that are associated with the fd we closed + // previously must be dropped when `do_close_fd`, since light refcounts must be dropped + // before returning to userspace. + // + // SAFETY: Task works are executed on the current thread right before we return to + // userspace, so this write is guaranteed to happen before `do_close_fd` is called, which + // means that a race is not possible here. + unsafe { *file_field = file }; + + Ok(()) + } + + /// # Safety + /// + /// The provided pointer must point at the `twork` field of a `DeferredFdCloserInner` stored in + /// a `KBox`, and the caller must pass exclusive ownership of that `KBox`. Furthermore, if the + /// file pointer is non-null, then it must be okay to release the refcount by calling `fput`. + unsafe extern "C" fn do_close_fd(inner: *mut bindings::callback_head) { + // SAFETY: The caller just passed us ownership of this box. + let inner = unsafe { KBox::from_raw(inner.cast::<DeferredFdCloserInner>()) }; + if !inner.file.is_null() { + // SAFETY: By the type invariants, we own a refcount to this file, and the caller + // guarantees that dropping the refcount now is okay. + unsafe { bindings::fput(inner.file) }; + } + // The allocation is freed when `inner` goes out of scope. + } +} + +/// Represents a failure to close an fd in a deferred manner. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(crate) enum DeferredFdCloseError { + /// Closing the fd failed because we were unable to schedule a task work. + TaskWorkUnavailable, + /// Closing the fd failed because the fd does not exist. + BadFd, +} + +impl From<DeferredFdCloseError> for Error { + fn from(err: DeferredFdCloseError) -> Error { + match err { + DeferredFdCloseError::TaskWorkUnavailable => ESRCH, + DeferredFdCloseError::BadFd => EBADF, + } + } +} diff --git a/drivers/android/binder/defs.rs b/drivers/android/binder/defs.rs new file mode 100644 index 000000000000..33f51b4139c7 --- /dev/null +++ b/drivers/android/binder/defs.rs @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use core::mem::MaybeUninit; +use core::ops::{Deref, DerefMut}; +use kernel::{ + transmute::{AsBytes, FromBytes}, + uapi::{self, *}, +}; + +macro_rules! pub_no_prefix { + ($prefix:ident, $($newname:ident),+ $(,)?) => { + $(pub(crate) const $newname: u32 = kernel::macros::concat_idents!($prefix, $newname);)+ + }; +} + +pub_no_prefix!( + binder_driver_return_protocol_, + BR_TRANSACTION, + BR_TRANSACTION_SEC_CTX, + BR_REPLY, + BR_DEAD_REPLY, + BR_FAILED_REPLY, + BR_FROZEN_REPLY, + BR_NOOP, + BR_SPAWN_LOOPER, + BR_TRANSACTION_COMPLETE, + BR_TRANSACTION_PENDING_FROZEN, + BR_ONEWAY_SPAM_SUSPECT, + BR_OK, + BR_ERROR, + BR_INCREFS, + BR_ACQUIRE, + BR_RELEASE, + BR_DECREFS, + BR_DEAD_BINDER, + BR_CLEAR_DEATH_NOTIFICATION_DONE, + BR_FROZEN_BINDER, + BR_CLEAR_FREEZE_NOTIFICATION_DONE, +); + +pub_no_prefix!( + binder_driver_command_protocol_, + BC_TRANSACTION, + BC_TRANSACTION_SG, + BC_REPLY, + BC_REPLY_SG, + BC_FREE_BUFFER, + BC_ENTER_LOOPER, + BC_EXIT_LOOPER, + BC_REGISTER_LOOPER, + BC_INCREFS, + BC_ACQUIRE, + BC_RELEASE, + BC_DECREFS, + BC_INCREFS_DONE, + BC_ACQUIRE_DONE, + BC_REQUEST_DEATH_NOTIFICATION, + BC_CLEAR_DEATH_NOTIFICATION, + BC_DEAD_BINDER_DONE, + BC_REQUEST_FREEZE_NOTIFICATION, + BC_CLEAR_FREEZE_NOTIFICATION, + BC_FREEZE_NOTIFICATION_DONE, +); + +pub_no_prefix!( + flat_binder_object_flags_, + FLAT_BINDER_FLAG_ACCEPTS_FDS, + FLAT_BINDER_FLAG_TXN_SECURITY_CTX +); + +pub_no_prefix!( + transaction_flags_, + TF_ONE_WAY, + TF_ACCEPT_FDS, + TF_CLEAR_BUF, + TF_UPDATE_TXN +); + +pub(crate) use uapi::{ + BINDER_TYPE_BINDER, BINDER_TYPE_FD, BINDER_TYPE_FDA, BINDER_TYPE_HANDLE, BINDER_TYPE_PTR, + BINDER_TYPE_WEAK_BINDER, BINDER_TYPE_WEAK_HANDLE, +}; + +macro_rules! decl_wrapper { + ($newname:ident, $wrapped:ty) => { + // Define a wrapper around the C type. Use `MaybeUninit` to enforce that the value of + // padding bytes must be preserved. + #[derive(Copy, Clone)] + #[repr(transparent)] + pub(crate) struct $newname(MaybeUninit<$wrapped>); + + // SAFETY: This macro is only used with types where this is ok. + unsafe impl FromBytes for $newname {} + // SAFETY: This macro is only used with types where this is ok. + unsafe impl AsBytes for $newname {} + + impl Deref for $newname { + type Target = $wrapped; + fn deref(&self) -> &Self::Target { + // SAFETY: We use `MaybeUninit` only to preserve padding. The value must still + // always be valid. + unsafe { self.0.assume_init_ref() } + } + } + + impl DerefMut for $newname { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: We use `MaybeUninit` only to preserve padding. The value must still + // always be valid. + unsafe { self.0.assume_init_mut() } + } + } + + impl Default for $newname { + fn default() -> Self { + // Create a new value of this type where all bytes (including padding) are zeroed. + Self(MaybeUninit::zeroed()) + } + } + }; +} + +decl_wrapper!(BinderNodeDebugInfo, uapi::binder_node_debug_info); +decl_wrapper!(BinderNodeInfoForRef, uapi::binder_node_info_for_ref); +decl_wrapper!(FlatBinderObject, uapi::flat_binder_object); +decl_wrapper!(BinderFdObject, uapi::binder_fd_object); +decl_wrapper!(BinderFdArrayObject, uapi::binder_fd_array_object); +decl_wrapper!(BinderObjectHeader, uapi::binder_object_header); +decl_wrapper!(BinderBufferObject, uapi::binder_buffer_object); +decl_wrapper!(BinderTransactionData, uapi::binder_transaction_data); +decl_wrapper!( + BinderTransactionDataSecctx, + uapi::binder_transaction_data_secctx +); +decl_wrapper!(BinderTransactionDataSg, uapi::binder_transaction_data_sg); +decl_wrapper!(BinderWriteRead, uapi::binder_write_read); +decl_wrapper!(BinderVersion, uapi::binder_version); +decl_wrapper!(BinderFrozenStatusInfo, uapi::binder_frozen_status_info); +decl_wrapper!(BinderFreezeInfo, uapi::binder_freeze_info); +decl_wrapper!(BinderFrozenStateInfo, uapi::binder_frozen_state_info); +decl_wrapper!(BinderHandleCookie, uapi::binder_handle_cookie); +decl_wrapper!(ExtendedError, uapi::binder_extended_error); + +impl BinderVersion { + pub(crate) fn current() -> Self { + Self(MaybeUninit::new(uapi::binder_version { + protocol_version: BINDER_CURRENT_PROTOCOL_VERSION as _, + })) + } +} + +impl BinderTransactionData { + pub(crate) fn with_buffers_size(self, buffers_size: u64) -> BinderTransactionDataSg { + BinderTransactionDataSg(MaybeUninit::new(uapi::binder_transaction_data_sg { + transaction_data: *self, + buffers_size, + })) + } +} + +impl BinderTransactionDataSecctx { + /// View the inner data as wrapped in `BinderTransactionData`. + pub(crate) fn tr_data(&mut self) -> &mut BinderTransactionData { + // SAFETY: Transparent wrapper is safe to transmute. + unsafe { + &mut *(&mut self.transaction_data as *mut uapi::binder_transaction_data + as *mut BinderTransactionData) + } + } +} + +impl ExtendedError { + pub(crate) fn new(id: u32, command: u32, param: i32) -> Self { + Self(MaybeUninit::new(uapi::binder_extended_error { + id, + command, + param, + })) + } +} diff --git a/drivers/android/binder/error.rs b/drivers/android/binder/error.rs new file mode 100644 index 000000000000..b24497cfa292 --- /dev/null +++ b/drivers/android/binder/error.rs @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use kernel::fmt; +use kernel::prelude::*; + +use crate::defs::*; + +pub(crate) type BinderResult<T = ()> = core::result::Result<T, BinderError>; + +/// An error that will be returned to userspace via the `BINDER_WRITE_READ` ioctl rather than via +/// errno. +pub(crate) struct BinderError { + pub(crate) reply: u32, + source: Option<Error>, +} + +impl BinderError { + pub(crate) fn new_dead() -> Self { + Self { + reply: BR_DEAD_REPLY, + source: None, + } + } + + pub(crate) fn new_frozen() -> Self { + Self { + reply: BR_FROZEN_REPLY, + source: None, + } + } + + pub(crate) fn new_frozen_oneway() -> Self { + Self { + reply: BR_TRANSACTION_PENDING_FROZEN, + source: None, + } + } + + pub(crate) fn is_dead(&self) -> bool { + self.reply == BR_DEAD_REPLY + } + + pub(crate) fn as_errno(&self) -> kernel::ffi::c_int { + self.source.unwrap_or(EINVAL).to_errno() + } + + pub(crate) fn should_pr_warn(&self) -> bool { + self.source.is_some() + } +} + +/// Convert an errno into a `BinderError` and store the errno used to construct it. The errno +/// should be stored as the thread's extended error when given to userspace. +impl From<Error> for BinderError { + fn from(source: Error) -> Self { + Self { + reply: BR_FAILED_REPLY, + source: Some(source), + } + } +} + +impl From<kernel::fs::file::BadFdError> for BinderError { + fn from(source: kernel::fs::file::BadFdError) -> Self { + BinderError::from(Error::from(source)) + } +} + +impl From<kernel::alloc::AllocError> for BinderError { + fn from(_: kernel::alloc::AllocError) -> Self { + Self { + reply: BR_FAILED_REPLY, + source: Some(ENOMEM), + } + } +} + +impl fmt::Debug for BinderError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.reply { + BR_FAILED_REPLY => match self.source.as_ref() { + Some(source) => f + .debug_struct("BR_FAILED_REPLY") + .field("source", source) + .finish(), + None => f.pad("BR_FAILED_REPLY"), + }, + BR_DEAD_REPLY => f.pad("BR_DEAD_REPLY"), + BR_FROZEN_REPLY => f.pad("BR_FROZEN_REPLY"), + BR_TRANSACTION_PENDING_FROZEN => f.pad("BR_TRANSACTION_PENDING_FROZEN"), + BR_TRANSACTION_COMPLETE => f.pad("BR_TRANSACTION_COMPLETE"), + _ => f + .debug_struct("BinderError") + .field("reply", &self.reply) + .finish(), + } + } +} diff --git a/drivers/android/binder/freeze.rs b/drivers/android/binder/freeze.rs new file mode 100644 index 000000000000..53b60035639a --- /dev/null +++ b/drivers/android/binder/freeze.rs @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use kernel::{ + alloc::AllocError, + list::ListArc, + prelude::*, + rbtree::{self, RBTreeNodeReservation}, + seq_file::SeqFile, + seq_print, + sync::{Arc, UniqueArc}, + uaccess::UserSliceReader, +}; + +use crate::{ + defs::*, node::Node, process::Process, thread::Thread, BinderReturnWriter, DArc, DLArc, + DTRWrap, DeliverToRead, +}; + +#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] +pub(crate) struct FreezeCookie(u64); + +/// Represents a listener for changes to the frozen state of a process. +pub(crate) struct FreezeListener { + /// The node we are listening for. + pub(crate) node: DArc<Node>, + /// The cookie of this freeze listener. + cookie: FreezeCookie, + /// What value of `is_frozen` did we most recently tell userspace about? + last_is_frozen: Option<bool>, + /// We sent a `BR_FROZEN_BINDER` and we are waiting for `BC_FREEZE_NOTIFICATION_DONE` before + /// sending any other commands. + is_pending: bool, + /// Userspace sent `BC_CLEAR_FREEZE_NOTIFICATION` and we need to reply with + /// `BR_CLEAR_FREEZE_NOTIFICATION_DONE` as soon as possible. If `is_pending` is set, then we + /// must wait for it to be unset before we can reply. + is_clearing: bool, + /// Number of cleared duplicates that can't be deleted until userspace sends + /// `BC_FREEZE_NOTIFICATION_DONE`. + num_pending_duplicates: u64, + /// Number of cleared duplicates that can be deleted. + num_cleared_duplicates: u64, +} + +impl FreezeListener { + /// Is it okay to create a new listener with the same cookie as this one for the provided node? + /// + /// Under some scenarios, userspace may delete a freeze listener and immediately recreate it + /// with the same cookie. This results in duplicate listeners. To avoid issues with ambiguity, + /// we allow this only if the new listener is for the same node, and we also require that the + /// old listener has already been cleared. + fn allow_duplicate(&self, node: &DArc<Node>) -> bool { + Arc::ptr_eq(&self.node, node) && self.is_clearing + } +} + +type UninitFM = UniqueArc<core::mem::MaybeUninit<DTRWrap<FreezeMessage>>>; + +/// Represents a notification that the freeze state has changed. +pub(crate) struct FreezeMessage { + cookie: FreezeCookie, +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for FreezeMessage { + untracked; + } +} + +impl FreezeMessage { + fn new(flags: kernel::alloc::Flags) -> Result<UninitFM, AllocError> { + UniqueArc::new_uninit(flags) + } + + fn init(ua: UninitFM, cookie: FreezeCookie) -> DLArc<FreezeMessage> { + match ua.pin_init_with(DTRWrap::new(FreezeMessage { cookie })) { + Ok(msg) => ListArc::from(msg), + Err(err) => match err {}, + } + } +} + +impl DeliverToRead for FreezeMessage { + fn do_work( + self: DArc<Self>, + thread: &Thread, + writer: &mut BinderReturnWriter<'_>, + ) -> Result<bool> { + let _removed_listener; + let mut node_refs = thread.process.node_refs.lock(); + let Some(mut freeze_entry) = node_refs.freeze_listeners.find_mut(&self.cookie) else { + return Ok(true); + }; + let freeze = freeze_entry.get_mut(); + + if freeze.num_cleared_duplicates > 0 { + freeze.num_cleared_duplicates -= 1; + drop(node_refs); + writer.write_code(BR_CLEAR_FREEZE_NOTIFICATION_DONE)?; + writer.write_payload(&self.cookie.0)?; + return Ok(true); + } + + if freeze.is_pending { + return Ok(true); + } + if freeze.is_clearing { + kernel::warn_on!(freeze.num_cleared_duplicates != 0); + if freeze.num_pending_duplicates > 0 { + // The primary freeze listener was deleted, so convert a pending duplicate back + // into the primary one. + freeze.num_pending_duplicates -= 1; + freeze.is_pending = true; + freeze.is_clearing = true; + } else { + _removed_listener = freeze_entry.remove_node(); + } + drop(node_refs); + writer.write_code(BR_CLEAR_FREEZE_NOTIFICATION_DONE)?; + writer.write_payload(&self.cookie.0)?; + Ok(true) + } else { + let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen(); + if freeze.last_is_frozen == Some(is_frozen) { + return Ok(true); + } + + let mut state_info = BinderFrozenStateInfo::default(); + state_info.is_frozen = is_frozen as u32; + state_info.cookie = freeze.cookie.0; + freeze.is_pending = true; + freeze.last_is_frozen = Some(is_frozen); + drop(node_refs); + + writer.write_code(BR_FROZEN_BINDER)?; + writer.write_payload(&state_info)?; + // BR_FROZEN_BINDER notifications can cause transactions + Ok(false) + } + } + + fn cancel(self: DArc<Self>) {} + + fn should_sync_wakeup(&self) -> bool { + false + } + + #[inline(never)] + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { + seq_print!(m, "{}has frozen binder\n", prefix); + Ok(()) + } +} + +impl FreezeListener { + pub(crate) fn on_process_exit(&self, proc: &Arc<Process>) { + if !self.is_clearing { + self.node.remove_freeze_listener(proc); + } + } +} + +impl Process { + pub(crate) fn request_freeze_notif( + self: &Arc<Self>, + reader: &mut UserSliceReader, + ) -> Result<()> { + let hc = reader.read::<BinderHandleCookie>()?; + let handle = hc.handle; + let cookie = FreezeCookie(hc.cookie); + + let msg = FreezeMessage::new(GFP_KERNEL)?; + let alloc = RBTreeNodeReservation::new(GFP_KERNEL)?; + + let mut node_refs_guard = self.node_refs.lock(); + let node_refs = &mut *node_refs_guard; + let Some(info) = node_refs.by_handle.get_mut(&handle) else { + pr_warn!("BC_REQUEST_FREEZE_NOTIFICATION invalid ref {}\n", handle); + return Err(EINVAL); + }; + if info.freeze().is_some() { + pr_warn!("BC_REQUEST_FREEZE_NOTIFICATION already set\n"); + return Err(EINVAL); + } + let node_ref = info.node_ref(); + let freeze_entry = node_refs.freeze_listeners.entry(cookie); + + if let rbtree::Entry::Occupied(ref dupe) = freeze_entry { + if !dupe.get().allow_duplicate(&node_ref.node) { + pr_warn!("BC_REQUEST_FREEZE_NOTIFICATION duplicate cookie\n"); + return Err(EINVAL); + } + } + + // All failure paths must come before this call, and all modifications must come after this + // call. + node_ref.node.add_freeze_listener(self, GFP_KERNEL)?; + + match freeze_entry { + rbtree::Entry::Vacant(entry) => { + entry.insert( + FreezeListener { + cookie, + node: node_ref.node.clone(), + last_is_frozen: None, + is_pending: false, + is_clearing: false, + num_pending_duplicates: 0, + num_cleared_duplicates: 0, + }, + alloc, + ); + } + rbtree::Entry::Occupied(mut dupe) => { + let dupe = dupe.get_mut(); + if dupe.is_pending { + dupe.num_pending_duplicates += 1; + } else { + dupe.num_cleared_duplicates += 1; + } + dupe.last_is_frozen = None; + dupe.is_pending = false; + dupe.is_clearing = false; + } + } + + *info.freeze() = Some(cookie); + let msg = FreezeMessage::init(msg, cookie); + drop(node_refs_guard); + let _ = self.push_work(msg); + Ok(()) + } + + pub(crate) fn freeze_notif_done(self: &Arc<Self>, reader: &mut UserSliceReader) -> Result<()> { + let cookie = FreezeCookie(reader.read()?); + let alloc = FreezeMessage::new(GFP_KERNEL)?; + let mut node_refs_guard = self.node_refs.lock(); + let node_refs = &mut *node_refs_guard; + let Some(freeze) = node_refs.freeze_listeners.get_mut(&cookie) else { + pr_warn!("BC_FREEZE_NOTIFICATION_DONE {:016x} not found\n", cookie.0); + return Err(EINVAL); + }; + let mut clear_msg = None; + if freeze.num_pending_duplicates > 0 { + clear_msg = Some(FreezeMessage::init(alloc, cookie)); + freeze.num_pending_duplicates -= 1; + freeze.num_cleared_duplicates += 1; + } else { + if !freeze.is_pending { + pr_warn!( + "BC_FREEZE_NOTIFICATION_DONE {:016x} not pending\n", + cookie.0 + ); + return Err(EINVAL); + } + let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen(); + if freeze.is_clearing || freeze.last_is_frozen != Some(is_frozen) { + // Immediately send another FreezeMessage. + clear_msg = Some(FreezeMessage::init(alloc, cookie)); + } + freeze.is_pending = false; + } + drop(node_refs_guard); + if let Some(clear_msg) = clear_msg { + let _ = self.push_work(clear_msg); + } + Ok(()) + } + + pub(crate) fn clear_freeze_notif(self: &Arc<Self>, reader: &mut UserSliceReader) -> Result<()> { + let hc = reader.read::<BinderHandleCookie>()?; + let handle = hc.handle; + let cookie = FreezeCookie(hc.cookie); + + let alloc = FreezeMessage::new(GFP_KERNEL)?; + let mut node_refs_guard = self.node_refs.lock(); + let node_refs = &mut *node_refs_guard; + let Some(info) = node_refs.by_handle.get_mut(&handle) else { + pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION invalid ref {}\n", handle); + return Err(EINVAL); + }; + let Some(info_cookie) = info.freeze() else { + pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION freeze notification not active\n"); + return Err(EINVAL); + }; + if *info_cookie != cookie { + pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION freeze notification cookie mismatch\n"); + return Err(EINVAL); + } + let Some(listener) = node_refs.freeze_listeners.get_mut(&cookie) else { + pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION invalid cookie {}\n", handle); + return Err(EINVAL); + }; + listener.is_clearing = true; + listener.node.remove_freeze_listener(self); + *info.freeze() = None; + let mut msg = None; + if !listener.is_pending { + msg = Some(FreezeMessage::init(alloc, cookie)); + } + drop(node_refs_guard); + + if let Some(msg) = msg { + let _ = self.push_work(msg); + } + Ok(()) + } + + fn get_freeze_cookie(&self, node: &DArc<Node>) -> Option<FreezeCookie> { + let node_refs = &mut *self.node_refs.lock(); + let handle = node_refs.by_node.get(&node.global_id())?; + let node_ref = node_refs.by_handle.get_mut(handle)?; + *node_ref.freeze() + } + + /// Creates a vector of every freeze listener on this process. + /// + /// Returns pairs of the remote process listening for notifications and the local node it is + /// listening on. + #[expect(clippy::type_complexity)] + fn find_freeze_recipients(&self) -> Result<KVVec<(DArc<Node>, Arc<Process>)>, AllocError> { + // Defined before `inner` to drop after releasing spinlock if `push_within_capacity` fails. + let mut node_proc_pair; + + // We pre-allocate space for up to 8 recipients before we take the spinlock. However, if + // the allocation fails, use a vector with a capacity of zero instead of failing. After + // all, there might not be any freeze listeners, in which case this operation could still + // succeed. + let mut recipients = + KVVec::with_capacity(8, GFP_KERNEL).unwrap_or_else(|_err| KVVec::new()); + + let mut inner = self.lock_with_nodes(); + let mut curr = inner.nodes.cursor_front_mut(); + while let Some(cursor) = curr { + let (key, node) = cursor.current(); + let key = *key; + let list = node.freeze_list(&inner.inner); + let len = list.len(); + + if recipients.spare_capacity_mut().len() < len { + drop(inner); + recipients.reserve(len, GFP_KERNEL)?; + inner = self.lock_with_nodes(); + // Find the node we were looking at and try again. If the set of nodes was changed, + // then just proceed to the next node. This is ok because we don't guarantee the + // inclusion of nodes that are added or removed in parallel with this operation. + curr = inner.nodes.cursor_lower_bound_mut(&key); + continue; + } + + for proc in list { + node_proc_pair = (node.clone(), proc.clone()); + recipients + .push_within_capacity(node_proc_pair) + .map_err(|_| { + pr_err!( + "push_within_capacity failed even though we checked the capacity\n" + ); + AllocError + })?; + } + + curr = cursor.move_next(); + } + Ok(recipients) + } + + /// Prepare allocations for sending freeze messages. + pub(crate) fn prepare_freeze_messages(&self) -> Result<FreezeMessages, AllocError> { + let recipients = self.find_freeze_recipients()?; + let mut batch = KVVec::with_capacity(recipients.len(), GFP_KERNEL)?; + for (node, proc) in recipients { + let Some(cookie) = proc.get_freeze_cookie(&node) else { + // If the freeze listener was removed in the meantime, just discard the + // notification. + continue; + }; + let msg_alloc = FreezeMessage::new(GFP_KERNEL)?; + let msg = FreezeMessage::init(msg_alloc, cookie); + batch.push((proc, msg), GFP_KERNEL)?; + } + + Ok(FreezeMessages { batch }) + } +} + +pub(crate) struct FreezeMessages { + batch: KVVec<(Arc<Process>, DLArc<FreezeMessage>)>, +} + +impl FreezeMessages { + pub(crate) fn send_messages(self) { + for (proc, msg) in self.batch { + let _ = proc.push_work(msg); + } + } +} diff --git a/drivers/android/binder/node.rs b/drivers/android/binder/node.rs new file mode 100644 index 000000000000..c26d113ede96 --- /dev/null +++ b/drivers/android/binder/node.rs @@ -0,0 +1,1131 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use kernel::{ + list::{AtomicTracker, List, ListArc, ListLinks, TryNewListArc}, + prelude::*, + seq_file::SeqFile, + seq_print, + sync::lock::{spinlock::SpinLockBackend, Guard}, + sync::{Arc, LockedBy, SpinLock}, +}; + +use crate::{ + defs::*, + error::BinderError, + process::{NodeRefInfo, Process, ProcessInner}, + thread::Thread, + transaction::Transaction, + BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead, +}; + +use core::mem; + +mod wrapper; +pub(crate) use self::wrapper::CritIncrWrapper; + +#[derive(Debug)] +pub(crate) struct CouldNotDeliverCriticalIncrement; + +/// Keeps track of how this node is scheduled. +/// +/// There are two ways to schedule a node to a work list. Just schedule the node itself, or +/// allocate a wrapper that references the node and schedule the wrapper. These wrappers exists to +/// make it possible to "move" a node from one list to another - when `do_work` is called directly +/// on the `Node`, then it's a no-op if there's also a pending wrapper. +/// +/// Wrappers are generally only needed for zero-to-one refcount increments, and there are two cases +/// of this: weak increments and strong increments. We call such increments "critical" because it +/// is critical that they are delivered to the thread doing the increment. Some examples: +/// +/// * One thread makes a zero-to-one strong increment, and another thread makes a zero-to-one weak +/// increment. Delivering the node to the thread doing the weak increment is wrong, since the +/// thread doing the strong increment may have ended a long time ago when the command is actually +/// processed by userspace. +/// +/// * We have a weak reference and are about to drop it on one thread. But then another thread does +/// a zero-to-one strong increment. If the strong increment gets sent to the thread that was +/// about to drop the weak reference, then the strong increment could be processed after the +/// other thread has already exited, which would be too late. +/// +/// Note that trying to create a `ListArc` to the node can succeed even if `has_normal_push` is +/// set. This is because another thread might just have popped the node from a todo list, but not +/// yet called `do_work`. However, if `has_normal_push` is false, then creating a `ListArc` should +/// always succeed. +/// +/// Like the other fields in `NodeInner`, the delivery state is protected by the process lock. +struct DeliveryState { + /// Is the `Node` currently scheduled? + has_pushed_node: bool, + + /// Is a wrapper currently scheduled? + /// + /// The wrapper is used only for strong zero2one increments. + has_pushed_wrapper: bool, + + /// Is the currently scheduled `Node` scheduled due to a weak zero2one increment? + /// + /// Weak zero2one operations are always scheduled using the `Node`. + has_weak_zero2one: bool, + + /// Is the currently scheduled wrapper/`Node` scheduled due to a strong zero2one increment? + /// + /// If `has_pushed_wrapper` is set, then the strong zero2one increment was scheduled using the + /// wrapper. Otherwise, `has_pushed_node` must be set and it was scheduled using the `Node`. + has_strong_zero2one: bool, +} + +impl DeliveryState { + fn should_normal_push(&self) -> bool { + !self.has_pushed_node && !self.has_pushed_wrapper + } + + fn did_normal_push(&mut self) { + assert!(self.should_normal_push()); + self.has_pushed_node = true; + } + + fn should_push_weak_zero2one(&self) -> bool { + !self.has_weak_zero2one && !self.has_strong_zero2one + } + + fn can_push_weak_zero2one_normally(&self) -> bool { + !self.has_pushed_node + } + + fn did_push_weak_zero2one(&mut self) { + assert!(self.should_push_weak_zero2one()); + assert!(self.can_push_weak_zero2one_normally()); + self.has_pushed_node = true; + self.has_weak_zero2one = true; + } + + fn should_push_strong_zero2one(&self) -> bool { + !self.has_strong_zero2one + } + + fn can_push_strong_zero2one_normally(&self) -> bool { + !self.has_pushed_node + } + + fn did_push_strong_zero2one(&mut self) { + assert!(self.should_push_strong_zero2one()); + assert!(self.can_push_strong_zero2one_normally()); + self.has_pushed_node = true; + self.has_strong_zero2one = true; + } + + fn did_push_strong_zero2one_wrapper(&mut self) { + assert!(self.should_push_strong_zero2one()); + assert!(!self.can_push_strong_zero2one_normally()); + self.has_pushed_wrapper = true; + self.has_strong_zero2one = true; + } +} + +struct CountState { + /// The reference count. + count: usize, + /// Whether the process that owns this node thinks that we hold a refcount on it. (Note that + /// even if count is greater than one, we only increment it once in the owning process.) + has_count: bool, +} + +impl CountState { + fn new() -> Self { + Self { + count: 0, + has_count: false, + } + } +} + +struct NodeInner { + /// Strong refcounts held on this node by `NodeRef` objects. + strong: CountState, + /// Weak refcounts held on this node by `NodeRef` objects. + weak: CountState, + delivery_state: DeliveryState, + /// The binder driver guarantees that oneway transactions sent to the same node are serialized, + /// that is, userspace will not be given the next one until it has finished processing the + /// previous oneway transaction. This is done to avoid the case where two oneway transactions + /// arrive in opposite order from the order in which they were sent. (E.g., they could be + /// delivered to two different threads, which could appear as-if they were sent in opposite + /// order.) + /// + /// To fix that, we store pending oneway transactions in a separate list in the node, and don't + /// deliver the next oneway transaction until userspace signals that it has finished processing + /// the previous oneway transaction by calling the `BC_FREE_BUFFER` ioctl. + oneway_todo: List<DTRWrap<Transaction>>, + /// Keeps track of whether this node has a pending oneway transaction. + /// + /// When this is true, incoming oneway transactions are stored in `oneway_todo`, instead of + /// being delivered directly to the process. + has_oneway_transaction: bool, + /// List of processes to deliver a notification to when this node is destroyed (usually due to + /// the process dying). + death_list: List<DTRWrap<NodeDeath>, 1>, + /// List of processes to deliver freeze notifications to. + freeze_list: KVVec<Arc<Process>>, + /// The number of active BR_INCREFS or BR_ACQUIRE operations. (should be maximum two) + /// + /// If this is non-zero, then we postpone any BR_RELEASE or BR_DECREFS notifications until the + /// active operations have ended. This avoids the situation an increment and decrement get + /// reordered from userspace's perspective. + active_inc_refs: u8, + /// List of `NodeRefInfo` objects that reference this node. + refs: List<NodeRefInfo, { NodeRefInfo::LIST_NODE }>, +} + +#[pin_data] +pub(crate) struct Node { + pub(crate) debug_id: usize, + ptr: u64, + pub(crate) cookie: u64, + pub(crate) flags: u32, + pub(crate) owner: Arc<Process>, + inner: LockedBy<NodeInner, ProcessInner>, + #[pin] + links_track: AtomicTracker, +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for Node { + tracked_by links_track: AtomicTracker; + } +} + +// Make `oneway_todo` work. +kernel::list::impl_list_item! { + impl ListItem<0> for DTRWrap<Transaction> { + using ListLinks { self.links.inner }; + } +} + +impl Node { + pub(crate) fn new( + ptr: u64, + cookie: u64, + flags: u32, + owner: Arc<Process>, + ) -> impl PinInit<Self> { + pin_init!(Self { + inner: LockedBy::new( + &owner.inner, + NodeInner { + strong: CountState::new(), + weak: CountState::new(), + delivery_state: DeliveryState { + has_pushed_node: false, + has_pushed_wrapper: false, + has_weak_zero2one: false, + has_strong_zero2one: false, + }, + death_list: List::new(), + oneway_todo: List::new(), + freeze_list: KVVec::new(), + has_oneway_transaction: false, + active_inc_refs: 0, + refs: List::new(), + }, + ), + debug_id: super::next_debug_id(), + ptr, + cookie, + flags, + owner, + links_track <- AtomicTracker::new(), + }) + } + + pub(crate) fn has_oneway_transaction(&self, owner_inner: &mut ProcessInner) -> bool { + let inner = self.inner.access_mut(owner_inner); + inner.has_oneway_transaction + } + + #[inline(never)] + pub(crate) fn full_debug_print( + &self, + m: &SeqFile, + owner_inner: &mut ProcessInner, + ) -> Result<()> { + let inner = self.inner.access_mut(owner_inner); + seq_print!( + m, + " node {}: u{:016x} c{:016x} hs {} hw {} cs {} cw {}", + self.debug_id, + self.ptr, + self.cookie, + inner.strong.has_count, + inner.weak.has_count, + inner.strong.count, + inner.weak.count, + ); + if !inner.refs.is_empty() { + seq_print!(m, " proc"); + for node_ref in &inner.refs { + seq_print!(m, " {}", node_ref.process.task.pid()); + } + } + seq_print!(m, "\n"); + for t in &inner.oneway_todo { + t.debug_print_inner(m, " pending async transaction "); + } + Ok(()) + } + + /// Insert the `NodeRef` into this `refs` list. + /// + /// # Safety + /// + /// It must be the case that `info.node_ref.node` is this node. + pub(crate) unsafe fn insert_node_info( + &self, + info: ListArc<NodeRefInfo, { NodeRefInfo::LIST_NODE }>, + ) { + self.inner + .access_mut(&mut self.owner.inner.lock()) + .refs + .push_front(info); + } + + /// Insert the `NodeRef` into this `refs` list. + /// + /// # Safety + /// + /// It must be the case that `info.node_ref.node` is this node. + pub(crate) unsafe fn remove_node_info( + &self, + info: &NodeRefInfo, + ) -> Option<ListArc<NodeRefInfo, { NodeRefInfo::LIST_NODE }>> { + // SAFETY: We always insert `NodeRefInfo` objects into the `refs` list of the node that it + // references in `info.node_ref.node`. That is this node, so `info` cannot possibly be in + // the `refs` list of another node. + unsafe { + self.inner + .access_mut(&mut self.owner.inner.lock()) + .refs + .remove(info) + } + } + + /// An id that is unique across all binder nodes on the system. Used as the key in the + /// `by_node` map. + pub(crate) fn global_id(&self) -> usize { + self as *const Node as usize + } + + pub(crate) fn get_id(&self) -> (u64, u64) { + (self.ptr, self.cookie) + } + + pub(crate) fn add_death( + &self, + death: ListArc<DTRWrap<NodeDeath>, 1>, + guard: &mut Guard<'_, ProcessInner, SpinLockBackend>, + ) { + self.inner.access_mut(guard).death_list.push_back(death); + } + + pub(crate) fn inc_ref_done_locked( + self: &DArc<Node>, + _strong: bool, + owner_inner: &mut ProcessInner, + ) -> Option<DLArc<Node>> { + let inner = self.inner.access_mut(owner_inner); + if inner.active_inc_refs == 0 { + pr_err!("inc_ref_done called when no active inc_refs"); + return None; + } + + inner.active_inc_refs -= 1; + if inner.active_inc_refs == 0 { + // Having active inc_refs can inhibit dropping of ref-counts. Calculate whether we + // would send a refcount decrement, and if so, tell the caller to schedule us. + let strong = inner.strong.count > 0; + let has_strong = inner.strong.has_count; + let weak = strong || inner.weak.count > 0; + let has_weak = inner.weak.has_count; + + let should_drop_weak = !weak && has_weak; + let should_drop_strong = !strong && has_strong; + + // If we want to drop the ref-count again, tell the caller to schedule a work node for + // that. + let need_push = should_drop_weak || should_drop_strong; + + if need_push && inner.delivery_state.should_normal_push() { + let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap(); + inner.delivery_state.did_normal_push(); + Some(list_arc) + } else { + None + } + } else { + None + } + } + + pub(crate) fn update_refcount_locked( + self: &DArc<Node>, + inc: bool, + strong: bool, + count: usize, + owner_inner: &mut ProcessInner, + ) -> Option<DLArc<Node>> { + let is_dead = owner_inner.is_dead; + let inner = self.inner.access_mut(owner_inner); + + // Get a reference to the state we'll update. + let state = if strong { + &mut inner.strong + } else { + &mut inner.weak + }; + + // Update the count and determine whether we need to push work. + let need_push = if inc { + state.count += count; + // TODO: This method shouldn't be used for zero-to-one increments. + !is_dead && !state.has_count + } else { + if state.count < count { + pr_err!("Failure: refcount underflow!"); + return None; + } + state.count -= count; + !is_dead && state.count == 0 && state.has_count + }; + + if need_push && inner.delivery_state.should_normal_push() { + let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap(); + inner.delivery_state.did_normal_push(); + Some(list_arc) + } else { + None + } + } + + pub(crate) fn incr_refcount_allow_zero2one( + self: &DArc<Self>, + strong: bool, + owner_inner: &mut ProcessInner, + ) -> Result<Option<DLArc<Node>>, CouldNotDeliverCriticalIncrement> { + let is_dead = owner_inner.is_dead; + let inner = self.inner.access_mut(owner_inner); + + // Get a reference to the state we'll update. + let state = if strong { + &mut inner.strong + } else { + &mut inner.weak + }; + + // Update the count and determine whether we need to push work. + state.count += 1; + if is_dead || state.has_count { + return Ok(None); + } + + // Userspace needs to be notified of this. + if !strong && inner.delivery_state.should_push_weak_zero2one() { + assert!(inner.delivery_state.can_push_weak_zero2one_normally()); + let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap(); + inner.delivery_state.did_push_weak_zero2one(); + Ok(Some(list_arc)) + } else if strong && inner.delivery_state.should_push_strong_zero2one() { + if inner.delivery_state.can_push_strong_zero2one_normally() { + let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap(); + inner.delivery_state.did_push_strong_zero2one(); + Ok(Some(list_arc)) + } else { + state.count -= 1; + Err(CouldNotDeliverCriticalIncrement) + } + } else { + // Work is already pushed, and we don't need to push again. + Ok(None) + } + } + + pub(crate) fn incr_refcount_allow_zero2one_with_wrapper( + self: &DArc<Self>, + strong: bool, + wrapper: CritIncrWrapper, + owner_inner: &mut ProcessInner, + ) -> Option<DLArc<dyn DeliverToRead>> { + match self.incr_refcount_allow_zero2one(strong, owner_inner) { + Ok(Some(node)) => Some(node as _), + Ok(None) => None, + Err(CouldNotDeliverCriticalIncrement) => { + assert!(strong); + let inner = self.inner.access_mut(owner_inner); + inner.strong.count += 1; + inner.delivery_state.did_push_strong_zero2one_wrapper(); + Some(wrapper.init(self.clone())) + } + } + } + + pub(crate) fn update_refcount(self: &DArc<Self>, inc: bool, count: usize, strong: bool) { + self.owner + .inner + .lock() + .update_node_refcount(self, inc, strong, count, None); + } + + pub(crate) fn populate_counts( + &self, + out: &mut BinderNodeInfoForRef, + guard: &Guard<'_, ProcessInner, SpinLockBackend>, + ) { + let inner = self.inner.access(guard); + out.strong_count = inner.strong.count as _; + out.weak_count = inner.weak.count as _; + } + + pub(crate) fn populate_debug_info( + &self, + out: &mut BinderNodeDebugInfo, + guard: &Guard<'_, ProcessInner, SpinLockBackend>, + ) { + out.ptr = self.ptr as _; + out.cookie = self.cookie as _; + let inner = self.inner.access(guard); + if inner.strong.has_count { + out.has_strong_ref = 1; + } + if inner.weak.has_count { + out.has_weak_ref = 1; + } + } + + pub(crate) fn force_has_count(&self, guard: &mut Guard<'_, ProcessInner, SpinLockBackend>) { + let inner = self.inner.access_mut(guard); + inner.strong.has_count = true; + inner.weak.has_count = true; + } + + fn write(&self, writer: &mut BinderReturnWriter<'_>, code: u32) -> Result { + writer.write_code(code)?; + writer.write_payload(&self.ptr)?; + writer.write_payload(&self.cookie)?; + Ok(()) + } + + pub(crate) fn submit_oneway( + &self, + transaction: DLArc<Transaction>, + guard: &mut Guard<'_, ProcessInner, SpinLockBackend>, + ) -> Result<(), (BinderError, DLArc<dyn DeliverToRead>)> { + if guard.is_dead { + return Err((BinderError::new_dead(), transaction)); + } + + let inner = self.inner.access_mut(guard); + if inner.has_oneway_transaction { + inner.oneway_todo.push_back(transaction); + } else { + inner.has_oneway_transaction = true; + guard.push_work(transaction)?; + } + Ok(()) + } + + pub(crate) fn release(&self) { + let mut guard = self.owner.inner.lock(); + while let Some(work) = self.inner.access_mut(&mut guard).oneway_todo.pop_front() { + drop(guard); + work.into_arc().cancel(); + guard = self.owner.inner.lock(); + } + + while let Some(death) = self.inner.access_mut(&mut guard).death_list.pop_front() { + drop(guard); + death.into_arc().set_dead(); + guard = self.owner.inner.lock(); + } + } + + pub(crate) fn pending_oneway_finished(&self) { + let mut guard = self.owner.inner.lock(); + if guard.is_dead { + // Cleanup will happen in `Process::deferred_release`. + return; + } + + let inner = self.inner.access_mut(&mut guard); + + let transaction = inner.oneway_todo.pop_front(); + inner.has_oneway_transaction = transaction.is_some(); + if let Some(transaction) = transaction { + match guard.push_work(transaction) { + Ok(()) => {} + Err((_err, work)) => { + // Process is dead. + // This shouldn't happen due to the `is_dead` check, but if it does, just drop + // the transaction and return. + drop(guard); + drop(work); + } + } + } + } + + /// Finds an outdated transaction that the given transaction can replace. + /// + /// If one is found, it is removed from the list and returned. + pub(crate) fn take_outdated_transaction( + &self, + new: &Transaction, + guard: &mut Guard<'_, ProcessInner, SpinLockBackend>, + ) -> Option<DLArc<Transaction>> { + let inner = self.inner.access_mut(guard); + let mut cursor = inner.oneway_todo.cursor_front(); + while let Some(next) = cursor.peek_next() { + if new.can_replace(&next) { + return Some(next.remove()); + } + cursor.move_next(); + } + None + } + + /// This is split into a separate function since it's called by both `Node::do_work` and + /// `NodeWrapper::do_work`. + fn do_work_locked( + &self, + writer: &mut BinderReturnWriter<'_>, + mut guard: Guard<'_, ProcessInner, SpinLockBackend>, + ) -> Result<bool> { + let inner = self.inner.access_mut(&mut guard); + let strong = inner.strong.count > 0; + let has_strong = inner.strong.has_count; + let weak = strong || inner.weak.count > 0; + let has_weak = inner.weak.has_count; + + if weak && !has_weak { + inner.weak.has_count = true; + inner.active_inc_refs += 1; + } + + if strong && !has_strong { + inner.strong.has_count = true; + inner.active_inc_refs += 1; + } + + let no_active_inc_refs = inner.active_inc_refs == 0; + let should_drop_weak = no_active_inc_refs && (!weak && has_weak); + let should_drop_strong = no_active_inc_refs && (!strong && has_strong); + if should_drop_weak { + inner.weak.has_count = false; + } + if should_drop_strong { + inner.strong.has_count = false; + } + if no_active_inc_refs && !weak { + // Remove the node if there are no references to it. + guard.remove_node(self.ptr); + } + drop(guard); + + if weak && !has_weak { + self.write(writer, BR_INCREFS)?; + } + if strong && !has_strong { + self.write(writer, BR_ACQUIRE)?; + } + if should_drop_strong { + self.write(writer, BR_RELEASE)?; + } + if should_drop_weak { + self.write(writer, BR_DECREFS)?; + } + + Ok(true) + } + + pub(crate) fn add_freeze_listener( + &self, + process: &Arc<Process>, + flags: kernel::alloc::Flags, + ) -> Result { + let mut vec_alloc = KVVec::<Arc<Process>>::new(); + loop { + let mut guard = self.owner.inner.lock(); + // Do not check for `guard.dead`. The `dead` flag that matters here is the owner of the + // listener, no the target. + let inner = self.inner.access_mut(&mut guard); + let len = inner.freeze_list.len(); + if len >= inner.freeze_list.capacity() { + if len >= vec_alloc.capacity() { + drop(guard); + vec_alloc = KVVec::with_capacity((1 + len).next_power_of_two(), flags)?; + continue; + } + mem::swap(&mut inner.freeze_list, &mut vec_alloc); + for elem in vec_alloc.drain_all() { + inner.freeze_list.push_within_capacity(elem)?; + } + } + inner.freeze_list.push_within_capacity(process.clone())?; + return Ok(()); + } + } + + pub(crate) fn remove_freeze_listener(&self, p: &Arc<Process>) { + let _unused_capacity; + let mut guard = self.owner.inner.lock(); + let inner = self.inner.access_mut(&mut guard); + let len = inner.freeze_list.len(); + inner.freeze_list.retain(|proc| !Arc::ptr_eq(proc, p)); + if len == inner.freeze_list.len() { + pr_warn!( + "Could not remove freeze listener for {}\n", + p.pid_in_current_ns() + ); + } + if inner.freeze_list.is_empty() { + _unused_capacity = mem::take(&mut inner.freeze_list); + } + } + + pub(crate) fn freeze_list<'a>(&'a self, guard: &'a ProcessInner) -> &'a [Arc<Process>] { + &self.inner.access(guard).freeze_list + } +} + +impl DeliverToRead for Node { + fn do_work( + self: DArc<Self>, + _thread: &Thread, + writer: &mut BinderReturnWriter<'_>, + ) -> Result<bool> { + let mut owner_inner = self.owner.inner.lock(); + let inner = self.inner.access_mut(&mut owner_inner); + + assert!(inner.delivery_state.has_pushed_node); + if inner.delivery_state.has_pushed_wrapper { + // If the wrapper is scheduled, then we are either a normal push or weak zero2one + // increment, and the wrapper is a strong zero2one increment, so the wrapper always + // takes precedence over us. + assert!(inner.delivery_state.has_strong_zero2one); + inner.delivery_state.has_pushed_node = false; + inner.delivery_state.has_weak_zero2one = false; + return Ok(true); + } + + inner.delivery_state.has_pushed_node = false; + inner.delivery_state.has_weak_zero2one = false; + inner.delivery_state.has_strong_zero2one = false; + + self.do_work_locked(writer, owner_inner) + } + + fn cancel(self: DArc<Self>) {} + + fn should_sync_wakeup(&self) -> bool { + false + } + + #[inline(never)] + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { + seq_print!( + m, + "{}node work {}: u{:016x} c{:016x}\n", + prefix, + self.debug_id, + self.ptr, + self.cookie, + ); + Ok(()) + } +} + +/// Represents something that holds one or more ref-counts to a `Node`. +/// +/// Whenever process A holds a refcount to a node owned by a different process B, then process A +/// will store a `NodeRef` that refers to the `Node` in process B. When process A releases the +/// refcount, we destroy the NodeRef, which decrements the ref-count in process A. +/// +/// This type is also used for some other cases. For example, a transaction allocation holds a +/// refcount on the target node, and this is implemented by storing a `NodeRef` in the allocation +/// so that the destructor of the allocation will drop a refcount of the `Node`. +pub(crate) struct NodeRef { + pub(crate) node: DArc<Node>, + /// How many times does this NodeRef hold a refcount on the Node? + strong_node_count: usize, + weak_node_count: usize, + /// How many times does userspace hold a refcount on this NodeRef? + strong_count: usize, + weak_count: usize, +} + +impl NodeRef { + pub(crate) fn new(node: DArc<Node>, strong_count: usize, weak_count: usize) -> Self { + Self { + node, + strong_node_count: strong_count, + weak_node_count: weak_count, + strong_count, + weak_count, + } + } + + pub(crate) fn absorb(&mut self, mut other: Self) { + assert!( + Arc::ptr_eq(&self.node, &other.node), + "absorb called with differing nodes" + ); + self.strong_node_count += other.strong_node_count; + self.weak_node_count += other.weak_node_count; + self.strong_count += other.strong_count; + self.weak_count += other.weak_count; + other.strong_count = 0; + other.weak_count = 0; + other.strong_node_count = 0; + other.weak_node_count = 0; + + if self.strong_node_count >= 2 || self.weak_node_count >= 2 { + let mut guard = self.node.owner.inner.lock(); + let inner = self.node.inner.access_mut(&mut guard); + + if self.strong_node_count >= 2 { + inner.strong.count -= self.strong_node_count - 1; + self.strong_node_count = 1; + assert_ne!(inner.strong.count, 0); + } + if self.weak_node_count >= 2 { + inner.weak.count -= self.weak_node_count - 1; + self.weak_node_count = 1; + assert_ne!(inner.weak.count, 0); + } + } + } + + pub(crate) fn get_count(&self) -> (usize, usize) { + (self.strong_count, self.weak_count) + } + + pub(crate) fn clone(&self, strong: bool) -> Result<NodeRef> { + if strong && self.strong_count == 0 { + return Err(EINVAL); + } + Ok(self + .node + .owner + .inner + .lock() + .new_node_ref(self.node.clone(), strong, None)) + } + + /// Updates (increments or decrements) the number of references held against the node. If the + /// count being updated transitions from 0 to 1 or from 1 to 0, the node is notified by having + /// its `update_refcount` function called. + /// + /// Returns whether `self` should be removed (when both counts are zero). + pub(crate) fn update(&mut self, inc: bool, strong: bool) -> bool { + if strong && self.strong_count == 0 { + return false; + } + let (count, node_count, other_count) = if strong { + ( + &mut self.strong_count, + &mut self.strong_node_count, + self.weak_count, + ) + } else { + ( + &mut self.weak_count, + &mut self.weak_node_count, + self.strong_count, + ) + }; + if inc { + if *count == 0 { + *node_count = 1; + self.node.update_refcount(true, 1, strong); + } + *count += 1; + } else { + if *count == 0 { + pr_warn!( + "pid {} performed invalid decrement on ref\n", + kernel::current!().pid() + ); + return false; + } + *count -= 1; + if *count == 0 { + self.node.update_refcount(false, *node_count, strong); + *node_count = 0; + return other_count == 0; + } + } + false + } +} + +impl Drop for NodeRef { + // This destructor is called conditionally from `Allocation::drop`. That branch is often + // mispredicted. Inlining this method call reduces the cost of those branch mispredictions. + #[inline(always)] + fn drop(&mut self) { + if self.strong_node_count > 0 { + self.node + .update_refcount(false, self.strong_node_count, true); + } + if self.weak_node_count > 0 { + self.node + .update_refcount(false, self.weak_node_count, false); + } + } +} + +struct NodeDeathInner { + dead: bool, + cleared: bool, + notification_done: bool, + /// Indicates whether the normal flow was interrupted by removing the handle. In this case, we + /// need behave as if the death notification didn't exist (i.e., we don't deliver anything to + /// the user. + aborted: bool, +} + +/// Used to deliver notifications when a process dies. +/// +/// A process can request to be notified when a process dies using `BC_REQUEST_DEATH_NOTIFICATION`. +/// This will make the driver send a `BR_DEAD_BINDER` to userspace when the process dies (or +/// immediately if it is already dead). Userspace is supposed to respond with `BC_DEAD_BINDER_DONE` +/// once it has processed the notification. +/// +/// Userspace can unregister from death notifications using the `BC_CLEAR_DEATH_NOTIFICATION` +/// command. In this case, the kernel will respond with `BR_CLEAR_DEATH_NOTIFICATION_DONE` once the +/// notification has been removed. Note that if the remote process dies before the kernel has +/// responded with `BR_CLEAR_DEATH_NOTIFICATION_DONE`, then the kernel will still send a +/// `BR_DEAD_BINDER`, which userspace must be able to process. In this case, the kernel will wait +/// for the `BC_DEAD_BINDER_DONE` command before it sends `BR_CLEAR_DEATH_NOTIFICATION_DONE`. +/// +/// Note that even if the kernel sends a `BR_DEAD_BINDER`, this does not remove the death +/// notification. Userspace must still remove it manually using `BC_CLEAR_DEATH_NOTIFICATION`. +/// +/// If a process uses `BC_RELEASE` to destroy its last refcount on a node that has an active death +/// registration, then the death registration is immediately deleted (we implement this using the +/// `aborted` field). However, userspace is not supposed to delete a `NodeRef` without first +/// deregistering death notifications, so this codepath is not executed under normal circumstances. +#[pin_data] +pub(crate) struct NodeDeath { + node: DArc<Node>, + process: Arc<Process>, + pub(crate) cookie: u64, + #[pin] + links_track: AtomicTracker<0>, + /// Used by the owner `Node` to store a list of registered death notifications. + /// + /// # Invariants + /// + /// Only ever used with the `death_list` list of `self.node`. + #[pin] + death_links: ListLinks<1>, + /// Used by the process to keep track of the death notifications for which we have sent a + /// `BR_DEAD_BINDER` but not yet received a `BC_DEAD_BINDER_DONE`. + /// + /// # Invariants + /// + /// Only ever used with the `delivered_deaths` list of `self.process`. + #[pin] + delivered_links: ListLinks<2>, + #[pin] + delivered_links_track: AtomicTracker<2>, + #[pin] + inner: SpinLock<NodeDeathInner>, +} + +impl NodeDeath { + /// Constructs a new node death notification object. + pub(crate) fn new( + node: DArc<Node>, + process: Arc<Process>, + cookie: u64, + ) -> impl PinInit<DTRWrap<Self>> { + DTRWrap::new(pin_init!( + Self { + node, + process, + cookie, + links_track <- AtomicTracker::new(), + death_links <- ListLinks::new(), + delivered_links <- ListLinks::new(), + delivered_links_track <- AtomicTracker::new(), + inner <- kernel::new_spinlock!(NodeDeathInner { + dead: false, + cleared: false, + notification_done: false, + aborted: false, + }, "NodeDeath::inner"), + } + )) + } + + /// Sets the cleared flag to `true`. + /// + /// It removes `self` from the node's death notification list if needed. + /// + /// Returns whether it needs to be queued. + pub(crate) fn set_cleared(self: &DArc<Self>, abort: bool) -> bool { + let (needs_removal, needs_queueing) = { + // Update state and determine if we need to queue a work item. We only need to do it + // when the node is not dead or if the user already completed the death notification. + let mut inner = self.inner.lock(); + if abort { + inner.aborted = true; + } + if inner.cleared { + // Already cleared. + return false; + } + inner.cleared = true; + (!inner.dead, !inner.dead || inner.notification_done) + }; + + // Remove death notification from node. + if needs_removal { + let mut owner_inner = self.node.owner.inner.lock(); + let node_inner = self.node.inner.access_mut(&mut owner_inner); + // SAFETY: A `NodeDeath` is never inserted into the death list of any node other than + // its owner, so it is either in this death list or in no death list. + unsafe { node_inner.death_list.remove(self) }; + } + needs_queueing + } + + /// Sets the 'notification done' flag to `true`. + pub(crate) fn set_notification_done(self: DArc<Self>, thread: &Thread) { + let needs_queueing = { + let mut inner = self.inner.lock(); + inner.notification_done = true; + inner.cleared + }; + if needs_queueing { + if let Some(death) = ListArc::try_from_arc_or_drop(self) { + let _ = thread.push_work_if_looper(death); + } + } + } + + /// Sets the 'dead' flag to `true` and queues work item if needed. + pub(crate) fn set_dead(self: DArc<Self>) { + let needs_queueing = { + let mut inner = self.inner.lock(); + if inner.cleared { + false + } else { + inner.dead = true; + true + } + }; + if needs_queueing { + // Push the death notification to the target process. There is nothing else to do if + // it's already dead. + if let Some(death) = ListArc::try_from_arc_or_drop(self) { + let process = death.process.clone(); + let _ = process.push_work(death); + } + } + } +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for NodeDeath { + tracked_by links_track: AtomicTracker; + } +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<1> for DTRWrap<NodeDeath> { untracked; } +} +kernel::list::impl_list_item! { + impl ListItem<1> for DTRWrap<NodeDeath> { + using ListLinks { self.wrapped.death_links }; + } +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<2> for DTRWrap<NodeDeath> { + tracked_by wrapped: NodeDeath; + } +} +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<2> for NodeDeath { + tracked_by delivered_links_track: AtomicTracker<2>; + } +} +kernel::list::impl_list_item! { + impl ListItem<2> for DTRWrap<NodeDeath> { + using ListLinks { self.wrapped.delivered_links }; + } +} + +impl DeliverToRead for NodeDeath { + fn do_work( + self: DArc<Self>, + _thread: &Thread, + writer: &mut BinderReturnWriter<'_>, + ) -> Result<bool> { + let done = { + let inner = self.inner.lock(); + if inner.aborted { + return Ok(true); + } + inner.cleared && (!inner.dead || inner.notification_done) + }; + + let cookie = self.cookie; + let cmd = if done { + BR_CLEAR_DEATH_NOTIFICATION_DONE + } else { + let process = self.process.clone(); + let mut process_inner = process.inner.lock(); + let inner = self.inner.lock(); + if inner.aborted { + return Ok(true); + } + // We're still holding the inner lock, so it cannot be aborted while we insert it into + // the delivered list. + process_inner.death_delivered(self.clone()); + BR_DEAD_BINDER + }; + + writer.write_code(cmd)?; + writer.write_payload(&cookie)?; + // DEAD_BINDER notifications can cause transactions, so stop processing work items when we + // get to a death notification. + Ok(cmd != BR_DEAD_BINDER) + } + + fn cancel(self: DArc<Self>) {} + + fn should_sync_wakeup(&self) -> bool { + false + } + + #[inline(never)] + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { + let inner = self.inner.lock(); + + let dead_binder = inner.dead && !inner.notification_done; + + if dead_binder { + if inner.cleared { + seq_print!(m, "{}has cleared dead binder\n", prefix); + } else { + seq_print!(m, "{}has dead binder\n", prefix); + } + } else { + seq_print!(m, "{}has cleared death notification\n", prefix); + } + + Ok(()) + } +} diff --git a/drivers/android/binder/node/wrapper.rs b/drivers/android/binder/node/wrapper.rs new file mode 100644 index 000000000000..43294c050502 --- /dev/null +++ b/drivers/android/binder/node/wrapper.rs @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use kernel::{list::ListArc, prelude::*, seq_file::SeqFile, seq_print, sync::UniqueArc}; + +use crate::{node::Node, thread::Thread, BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead}; + +use core::mem::MaybeUninit; + +pub(crate) struct CritIncrWrapper { + inner: UniqueArc<MaybeUninit<DTRWrap<NodeWrapper>>>, +} + +impl CritIncrWrapper { + pub(crate) fn new() -> Result<Self> { + Ok(CritIncrWrapper { + inner: UniqueArc::new_uninit(GFP_KERNEL)?, + }) + } + + pub(super) fn init(self, node: DArc<Node>) -> DLArc<dyn DeliverToRead> { + match self.inner.pin_init_with(DTRWrap::new(NodeWrapper { node })) { + Ok(initialized) => ListArc::from(initialized) as _, + Err(err) => match err {}, + } + } +} + +struct NodeWrapper { + node: DArc<Node>, +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for NodeWrapper { + untracked; + } +} + +impl DeliverToRead for NodeWrapper { + fn do_work( + self: DArc<Self>, + _thread: &Thread, + writer: &mut BinderReturnWriter<'_>, + ) -> Result<bool> { + let node = &self.node; + let mut owner_inner = node.owner.inner.lock(); + let inner = node.inner.access_mut(&mut owner_inner); + + let ds = &mut inner.delivery_state; + + assert!(ds.has_pushed_wrapper); + assert!(ds.has_strong_zero2one); + ds.has_pushed_wrapper = false; + ds.has_strong_zero2one = false; + + node.do_work_locked(writer, owner_inner) + } + + fn cancel(self: DArc<Self>) {} + + fn should_sync_wakeup(&self) -> bool { + false + } + + #[inline(never)] + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { + seq_print!( + m, + "{}node work {}: u{:016x} c{:016x}\n", + prefix, + self.node.debug_id, + self.node.ptr, + self.node.cookie, + ); + Ok(()) + } +} diff --git a/drivers/android/binder/page_range.rs b/drivers/android/binder/page_range.rs new file mode 100644 index 000000000000..9379038f61f5 --- /dev/null +++ b/drivers/android/binder/page_range.rs @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! This module has utilities for managing a page range where unused pages may be reclaimed by a +//! vma shrinker. + +// To avoid deadlocks, locks are taken in the order: +// +// 1. mmap lock +// 2. spinlock +// 3. lru spinlock +// +// The shrinker will use trylock methods because it locks them in a different order. + +use core::{ + marker::PhantomPinned, + mem::{size_of, size_of_val, MaybeUninit}, + ptr, +}; + +use kernel::{ + bindings, + error::Result, + ffi::{c_ulong, c_void}, + mm::{virt, Mm, MmWithUser}, + new_mutex, new_spinlock, + page::{Page, PAGE_SHIFT, PAGE_SIZE}, + prelude::*, + str::CStr, + sync::{aref::ARef, Mutex, SpinLock}, + task::Pid, + transmute::FromBytes, + types::Opaque, + uaccess::UserSliceReader, +}; + +/// Represents a shrinker that can be registered with the kernel. +/// +/// Each shrinker can be used by many `ShrinkablePageRange` objects. +#[repr(C)] +pub(crate) struct Shrinker { + inner: Opaque<*mut bindings::shrinker>, + list_lru: Opaque<bindings::list_lru>, +} + +// SAFETY: The shrinker and list_lru are thread safe. +unsafe impl Send for Shrinker {} +// SAFETY: The shrinker and list_lru are thread safe. +unsafe impl Sync for Shrinker {} + +impl Shrinker { + /// Create a new shrinker. + /// + /// # Safety + /// + /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have + /// been called exactly once, and it must not have returned an error. + pub(crate) const unsafe fn new() -> Self { + Self { + inner: Opaque::uninit(), + list_lru: Opaque::uninit(), + } + } + + /// Register this shrinker with the kernel. + pub(crate) fn register(&'static self, name: &CStr) -> Result<()> { + // SAFETY: These fields are not yet used, so it's okay to zero them. + unsafe { + self.inner.get().write(ptr::null_mut()); + self.list_lru.get().write_bytes(0, 1); + } + + // SAFETY: The field is not yet used, so we can initialize it. + let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) }; + if ret != 0 { + return Err(Error::from_errno(ret)); + } + + // SAFETY: The `name` points at a valid c string. + let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) }; + if shrinker.is_null() { + // SAFETY: We initialized it, so its okay to destroy it. + unsafe { bindings::list_lru_destroy(self.list_lru.get()) }; + return Err(Error::from_errno(ret)); + } + + // SAFETY: We're about to register the shrinker, and these are the fields we need to + // initialize. (All other fields are already zeroed.) + unsafe { + (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count)); + (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan)); + (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast()); + } + + // SAFETY: The new shrinker has been fully initialized, so we can register it. + unsafe { bindings::shrinker_register(shrinker) }; + + // SAFETY: This initializes the pointer to the shrinker so that we can use it. + unsafe { self.inner.get().write(shrinker) }; + + Ok(()) + } +} + +/// A container that manages a page range in a vma. +/// +/// The pages can be thought of as an array of booleans of whether the pages are usable. The +/// methods `use_range` and `stop_using_range` set all booleans in a range to true or false +/// respectively. Initially, no pages are allocated. When a page is not used, it is not freed +/// immediately. Instead, it is made available to the memory shrinker to free it if the device is +/// under memory pressure. +/// +/// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no +/// way to know whether an index ends up with true or false if a call to `use_range` races with +/// another call to `stop_using_range` on a given index. +/// +/// It's also okay for the two methods to race with themselves, e.g. if two threads call +/// `use_range` on the same index, then that's fine and neither call will return until the page is +/// allocated and mapped. +/// +/// The methods that read or write to a range require that the page is marked as in use. So it is +/// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or +/// write to the page. +#[pin_data(PinnedDrop)] +pub(crate) struct ShrinkablePageRange { + /// Shrinker object registered with the kernel. + shrinker: &'static Shrinker, + /// Pid using this page range. Only used as debugging information. + pid: Pid, + /// The mm for the relevant process. + mm: ARef<Mm>, + /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`. + #[pin] + mm_lock: Mutex<()>, + /// Spinlock protecting changes to pages. + #[pin] + lock: SpinLock<Inner>, + + /// Must not move, since page info has pointers back. + #[pin] + _pin: PhantomPinned, +} + +struct Inner { + /// Array of pages. + /// + /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive + /// ownership. To deal with that, we manage it using raw pointers. + pages: *mut PageInfo, + /// Length of the `pages` array. + size: usize, + /// The address of the vma to insert the pages into. + vma_addr: usize, +} + +// SAFETY: proper locking is in place for `Inner` +unsafe impl Send for Inner {} + +type StableMmGuard = + kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>; + +/// An array element that describes the current state of a page. +/// +/// There are three states: +/// +/// * Free. The page is None. The `lru` element is not queued. +/// * Available. The page is Some. The `lru` element is queued to the shrinker's lru. +/// * Used. The page is Some. The `lru` element is not queued. +/// +/// When an element is available, the shrinker is able to free the page. +#[repr(C)] +struct PageInfo { + lru: bindings::list_head, + page: Option<Page>, + range: *const ShrinkablePageRange, +} + +impl PageInfo { + /// # Safety + /// + /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set. + unsafe fn set_page(me: *mut PageInfo, page: Page) { + // SAFETY: This pointer offset is in bounds. + let ptr = unsafe { &raw mut (*me).page }; + + // SAFETY: The pointer is valid for writing, so also valid for reading. + if unsafe { (*ptr).is_some() } { + pr_err!("set_page called when there is already a page"); + // SAFETY: We will initialize the page again below. + unsafe { ptr::drop_in_place(ptr) }; + } + + // SAFETY: The pointer is valid for writing. + unsafe { ptr::write(ptr, Some(page)) }; + } + + /// # Safety + /// + /// The caller ensures that reading from `me.page` is ok for the duration of 'a. + unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> { + // SAFETY: This pointer offset is in bounds. + let ptr = unsafe { &raw const (*me).page }; + + // SAFETY: The pointer is valid for reading. + unsafe { (*ptr).as_ref() } + } + + /// # Safety + /// + /// The caller ensures that writing to `me.page` is ok for the duration of 'a. + unsafe fn take_page(me: *mut PageInfo) -> Option<Page> { + // SAFETY: This pointer offset is in bounds. + let ptr = unsafe { &raw mut (*me).page }; + + // SAFETY: The pointer is valid for reading. + unsafe { (*ptr).take() } + } + + /// Add this page to the lru list, if not already in the list. + /// + /// # Safety + /// + /// The pointer must be valid, and it must be the right shrinker and nid. + unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { + // SAFETY: This pointer offset is in bounds. + let lru_ptr = unsafe { &raw mut (*me).lru }; + // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. + unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; + } + + /// Remove this page from the lru list, if it is in the list. + /// + /// # Safety + /// + /// The pointer must be valid, and it must be the right shrinker and nid. + unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) { + // SAFETY: This pointer offset is in bounds. + let lru_ptr = unsafe { &raw mut (*me).lru }; + // SAFETY: The lru pointer is valid, and we're not using it with any other lru list. + unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) }; + } +} + +impl ShrinkablePageRange { + /// Create a new `ShrinkablePageRange` using the given shrinker. + pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> { + try_pin_init!(Self { + shrinker, + pid: kernel::current!().pid(), + mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?), + mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"), + lock <- new_spinlock!(Inner { + pages: ptr::null_mut(), + size: 0, + vma_addr: 0, + }, "ShrinkablePageRange"), + _pin: PhantomPinned, + }) + } + + pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> { + // SAFETY: This extends the duration of the reference. Since this call happens before + // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block + // until the returned guard is dropped. This ensures that the guard is valid until dropped. + let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) }; + + mm_lock.try_lock() + } + + /// Register a vma with this page range. Returns the size of the region. + pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> { + let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize); + let num_pages = num_bytes >> PAGE_SHIFT; + + if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) { + pr_debug!("Failed to register with vma: invalid vma->vm_mm"); + return Err(EINVAL); + } + if num_pages == 0 { + pr_debug!("Failed to register with vma: size zero"); + return Err(EINVAL); + } + + let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?; + + // SAFETY: This just initializes the pages array. + unsafe { + let self_ptr = self as *const ShrinkablePageRange; + for i in 0..num_pages { + let info = pages.as_mut_ptr().add(i); + (&raw mut (*info).range).write(self_ptr); + (&raw mut (*info).page).write(None); + let lru = &raw mut (*info).lru; + (&raw mut (*lru).next).write(lru); + (&raw mut (*lru).prev).write(lru); + } + } + + let mut inner = self.lock.lock(); + if inner.size > 0 { + pr_debug!("Failed to register with vma: already registered"); + drop(inner); + return Err(EBUSY); + } + + inner.pages = pages.into_raw_parts().0; + inner.size = num_pages; + inner.vma_addr = vma.start(); + + Ok(num_pages) + } + + /// Make sure that the given pages are allocated and mapped. + /// + /// Must not be called from an atomic context. + pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> { + if start >= end { + return Ok(()); + } + let mut inner = self.lock.lock(); + assert!(end <= inner.size); + + for i in start..end { + // SAFETY: This pointer offset is in bounds. + let page_info = unsafe { inner.pages.add(i) }; + + // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. + if let Some(page) = unsafe { PageInfo::get_page(page_info) } { + // Since we're going to use the page, we should remove it from the lru list so that + // the shrinker will not free it. + // + // SAFETY: The pointer is valid, and this is the right shrinker. + // + // The shrinker can't free the page between the check and this call to + // `list_lru_del` because we hold the lock. + unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; + } else { + // We have to allocate a new page. Use the slow path. + drop(inner); + // SAFETY: `i < end <= inner.size` so `i` is in bounds. + match unsafe { self.use_page_slow(i) } { + Ok(()) => {} + Err(err) => { + pr_warn!("Error in use_page_slow: {:?}", err); + return Err(err); + } + } + inner = self.lock.lock(); + } + } + Ok(()) + } + + /// Mark the given page as in use, slow path. + /// + /// Must not be called from an atomic context. + /// + /// # Safety + /// + /// Assumes that `i` is in bounds. + #[cold] + unsafe fn use_page_slow(&self, i: usize) -> Result<()> { + let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?; + + let mm_mutex = self.mm_lock.lock(); + let inner = self.lock.lock(); + + // SAFETY: This pointer offset is in bounds. + let page_info = unsafe { inner.pages.add(i) }; + + // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay. + if let Some(page) = unsafe { PageInfo::get_page(page_info) } { + // The page was already there, or someone else added the page while we didn't hold the + // spinlock. + // + // SAFETY: The pointer is valid, and this is the right shrinker. + // + // The shrinker can't free the page between the check and this call to + // `list_lru_del` because we hold the lock. + unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) }; + return Ok(()); + } + + let vma_addr = inner.vma_addr; + // Release the spinlock while we insert the page into the vma. + drop(inner); + + // No overflow since we stay in bounds of the vma. + let user_page_addr = vma_addr + (i << PAGE_SHIFT); + + // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from + // a remote process. If the call to `mmput` races with the process shutting down, then the + // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't + // happen until it returns to userspace. However, the caller might instead go to sleep and + // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the + // middle of a shutdown process that won't complete until the `mm` is dropped. This can + // amount to a deadlock. + // + // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a + // workqueue. + MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?) + .mmap_read_lock() + .vma_lookup(vma_addr) + .ok_or(ESRCH)? + .as_mixedmap_vma() + .ok_or(ESRCH)? + .vm_insert_page(user_page_addr, &new_page) + .inspect_err(|err| { + pr_warn!( + "Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}", + user_page_addr, + vma_addr, + i, + err + ) + })?; + + let inner = self.lock.lock(); + + // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page + // can be written to since we hold the lock. + // + // We released and reacquired the spinlock since we checked that the page is null, but we + // always hold the mm_lock mutex when setting the page to a non-null value, so it's not + // possible for someone else to have changed it since our check. + unsafe { PageInfo::set_page(page_info, new_page) }; + + drop(inner); + drop(mm_mutex); + + Ok(()) + } + + /// If the given page is in use, then mark it as available so that the shrinker can free it. + /// + /// May be called from an atomic context. + pub(crate) fn stop_using_range(&self, start: usize, end: usize) { + if start >= end { + return; + } + let inner = self.lock.lock(); + assert!(end <= inner.size); + + for i in (start..end).rev() { + // SAFETY: The pointer is in bounds. + let page_info = unsafe { inner.pages.add(i) }; + + // SAFETY: Okay for reading since we have the lock. + if let Some(page) = unsafe { PageInfo::get_page(page_info) } { + // SAFETY: The pointer is valid, and it's the right shrinker. + unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) }; + } + } + } + + /// Helper for reading or writing to a range of bytes that may overlap with several pages. + /// + /// # Safety + /// + /// All pages touched by this operation must be in use for the duration of this call. + unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result + where + T: FnMut(&Page, usize, usize) -> Result, + { + if size == 0 { + return Ok(()); + } + + let (pages, num_pages) = { + let inner = self.lock.lock(); + (inner.pages, inner.size) + }; + let num_bytes = num_pages << PAGE_SHIFT; + + // Check that the request is within the buffer. + if offset.checked_add(size).ok_or(EFAULT)? > num_bytes { + return Err(EFAULT); + } + + let mut page_index = offset >> PAGE_SHIFT; + offset &= PAGE_SIZE - 1; + while size > 0 { + let available = usize::min(size, PAGE_SIZE - offset); + // SAFETY: The pointer is in bounds. + let page_info = unsafe { pages.add(page_index) }; + // SAFETY: The caller guarantees that this page is in the "in use" state for the + // duration of this call to `iterate`, so nobody will change the page. + let page = unsafe { PageInfo::get_page(page_info) }; + if page.is_none() { + pr_warn!("Page is null!"); + } + let page = page.ok_or(EFAULT)?; + cb(page, offset, available)?; + size -= available; + page_index += 1; + offset = 0; + } + Ok(()) + } + + /// Copy from userspace into this page range. + /// + /// # Safety + /// + /// All pages touched by this operation must be in use for the duration of this call. + pub(crate) unsafe fn copy_from_user_slice( + &self, + reader: &mut UserSliceReader, + offset: usize, + size: usize, + ) -> Result { + // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`. + unsafe { + self.iterate(offset, size, |page, offset, to_copy| { + page.copy_from_user_slice_raw(reader, offset, to_copy) + }) + } + } + + /// Copy from this page range into kernel space. + /// + /// # Safety + /// + /// All pages touched by this operation must be in use for the duration of this call. + pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> { + let mut out = MaybeUninit::<T>::uninit(); + let mut out_offset = 0; + // SAFETY: `self.iterate` has the same safety requirements as `read`. + unsafe { + self.iterate(offset, size_of::<T>(), |page, offset, to_copy| { + // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. + let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset); + // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid. + page.read_raw(obj_ptr, offset, to_copy)?; + out_offset += to_copy; + Ok(()) + })?; + } + // SAFETY: We just initialised the data. + Ok(unsafe { out.assume_init() }) + } + + /// Copy from kernel space into this page range. + /// + /// # Safety + /// + /// All pages touched by this operation must be in use for the duration of this call. + pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result { + let mut obj_offset = 0; + // SAFETY: `self.iterate` has the same safety requirements as `write`. + unsafe { + self.iterate(offset, size_of_val(obj), |page, offset, to_copy| { + // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T. + let obj_ptr = (obj as *const T as *const u8).add(obj_offset); + // SAFETY: We have a reference to the object, so the pointer is valid. + page.write_raw(obj_ptr, offset, to_copy)?; + obj_offset += to_copy; + Ok(()) + }) + } + } + + /// Write zeroes to the given range. + /// + /// # Safety + /// + /// All pages touched by this operation must be in use for the duration of this call. + pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result { + // SAFETY: `self.iterate` has the same safety requirements as `copy_into`. + unsafe { + self.iterate(offset, size, |page, offset, len| { + page.fill_zero_raw(offset, len) + }) + } + } +} + +#[pinned_drop] +impl PinnedDrop for ShrinkablePageRange { + fn drop(self: Pin<&mut Self>) { + let (pages, size) = { + let lock = self.lock.lock(); + (lock.pages, lock.size) + }; + + if size == 0 { + return; + } + + // Note: This call is also necessary for the safety of `stable_trylock_mm`. + let mm_lock = self.mm_lock.lock(); + + // This is the destructor, so unlike the other methods, we only need to worry about races + // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the + // shrinker, and after this loop, the shrinker will not access any of our pages since we + // removed them from the lru list. + for i in 0..size { + // SAFETY: Loop is in-bounds of the size. + let p_ptr = unsafe { pages.add(i) }; + // SAFETY: No other readers, so we can read. + if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } { + // SAFETY: The pointer is valid and it's the right shrinker. + unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) }; + } + } + + drop(mm_lock); + + // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore, + // all `size` elements are initialized. Also, the array is no longer shared with the + // shrinker due to the above loop. + drop(unsafe { KVVec::from_raw_parts(pages, size, size) }); + } +} + +/// # Safety +/// Called by the shrinker. +#[no_mangle] +unsafe extern "C" fn rust_shrink_count( + shrink: *mut bindings::shrinker, + _sc: *mut bindings::shrink_control, +) -> c_ulong { + // SAFETY: We can access our own private data. + let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; + // SAFETY: Accessing the lru list is okay. Just an FFI call. + unsafe { bindings::list_lru_count(list_lru) } +} + +/// # Safety +/// Called by the shrinker. +#[no_mangle] +unsafe extern "C" fn rust_shrink_scan( + shrink: *mut bindings::shrinker, + sc: *mut bindings::shrink_control, +) -> c_ulong { + // SAFETY: We can access our own private data. + let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() }; + // SAFETY: Caller guarantees that it is safe to read this field. + let nr_to_scan = unsafe { (*sc).nr_to_scan }; + // SAFETY: Accessing the lru list is okay. Just an FFI call. + unsafe { + bindings::list_lru_walk( + list_lru, + Some(bindings::rust_shrink_free_page_wrap), + ptr::null_mut(), + nr_to_scan, + ) + } +} + +const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP; +const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY; + +/// # Safety +/// Called by the shrinker. +#[no_mangle] +unsafe extern "C" fn rust_shrink_free_page( + item: *mut bindings::list_head, + lru: *mut bindings::list_lru_one, + _cb_arg: *mut c_void, +) -> bindings::lru_status { + // Fields that should survive after unlocking the lru lock. + let page; + let page_index; + let mm; + let mmap_read; + let mm_mutex; + let vma_addr; + + { + // CAST: The `list_head` field is first in `PageInfo`. + let info = item as *mut PageInfo; + // SAFETY: The `range` field of `PageInfo` is immutable. + let range = unsafe { &*((*info).range) }; + + mm = match range.mm.mmget_not_zero() { + Some(mm) => MmWithUser::into_mmput_async(mm), + None => return LRU_SKIP, + }; + + mm_mutex = match range.stable_trylock_mm() { + Some(guard) => guard, + None => return LRU_SKIP, + }; + + mmap_read = match mm.mmap_read_trylock() { + Some(guard) => guard, + None => return LRU_SKIP, + }; + + // We can't lock it normally here, since we hold the lru lock. + let inner = match range.lock.try_lock() { + Some(inner) => inner, + None => return LRU_SKIP, + }; + + // SAFETY: The item is in this lru list, so it's okay to remove it. + unsafe { bindings::list_lru_isolate(lru, item) }; + + // SAFETY: Both pointers are in bounds of the same allocation. + page_index = unsafe { info.offset_from(inner.pages) } as usize; + + // SAFETY: We hold the spinlock, so we can take the page. + // + // This sets the page pointer to zero before we unmap it from the vma. However, we call + // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to + // insert a new page until after our call to `zap_page_range`. + page = unsafe { PageInfo::take_page(info) }; + vma_addr = inner.vma_addr; + + // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because + // they can be freed at any point after we unlock `lru_lock`. This is with the exception of + // `mm_mutex` which is kept alive by holding the lock. + } + + // SAFETY: The lru lock is locked when this method is called. + unsafe { bindings::spin_unlock(&raw mut (*lru).lock) }; + + if let Some(vma) = mmap_read.vma_lookup(vma_addr) { + let user_page_addr = vma_addr + (page_index << PAGE_SHIFT); + vma.zap_page_range_single(user_page_addr, PAGE_SIZE); + } + + drop(mmap_read); + drop(mm_mutex); + drop(mm); + drop(page); + + // SAFETY: We just unlocked the lru lock, but it should be locked when we return. + unsafe { bindings::spin_lock(&raw mut (*lru).lock) }; + + LRU_REMOVED_ENTRY +} diff --git a/drivers/android/binder/page_range_helper.c b/drivers/android/binder/page_range_helper.c new file mode 100644 index 000000000000..496887723ee0 --- /dev/null +++ b/drivers/android/binder/page_range_helper.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* C helper for page_range.rs to work around a CFI violation. + * + * Bindgen currently pretends that `enum lru_status` is the same as an integer. + * This assumption is fine ABI-wise, but once you add CFI to the mix, it + * triggers a CFI violation because `enum lru_status` gets a different CFI tag. + * + * This file contains a workaround until bindgen can be fixed. + * + * Copyright (C) 2025 Google LLC. + */ +#include "page_range_helper.h" + +unsigned int rust_shrink_free_page(struct list_head *item, + struct list_lru_one *list, + void *cb_arg); + +enum lru_status +rust_shrink_free_page_wrap(struct list_head *item, struct list_lru_one *list, + void *cb_arg) +{ + return rust_shrink_free_page(item, list, cb_arg); +} diff --git a/drivers/android/binder/page_range_helper.h b/drivers/android/binder/page_range_helper.h new file mode 100644 index 000000000000..18dd2dd117b2 --- /dev/null +++ b/drivers/android/binder/page_range_helper.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Google, Inc. + */ + +#ifndef _LINUX_PAGE_RANGE_HELPER_H +#define _LINUX_PAGE_RANGE_HELPER_H + +#include <linux/list_lru.h> + +enum lru_status +rust_shrink_free_page_wrap(struct list_head *item, struct list_lru_one *list, + void *cb_arg); + +#endif /* _LINUX_PAGE_RANGE_HELPER_H */ diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs new file mode 100644 index 000000000000..132055b4790f --- /dev/null +++ b/drivers/android/binder/process.rs @@ -0,0 +1,1745 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! This module defines the `Process` type, which represents a process using a particular binder +//! context. +//! +//! The `Process` object keeps track of all of the resources that this process owns in the binder +//! context. +//! +//! There is one `Process` object for each binder fd that a process has opened, so processes using +//! several binder contexts have several `Process` objects. This ensures that the contexts are +//! fully separated. + +use core::mem::take; + +use kernel::{ + bindings, + cred::Credential, + error::Error, + fs::file::{self, File}, + id_pool::IdPool, + list::{List, ListArc, ListArcField, ListLinks}, + mm, + prelude::*, + rbtree::{self, RBTree, RBTreeNode, RBTreeNodeReservation}, + seq_file::SeqFile, + seq_print, + sync::poll::PollTable, + sync::{ + lock::{spinlock::SpinLockBackend, Guard}, + Arc, ArcBorrow, CondVar, CondVarTimeoutResult, Mutex, SpinLock, UniqueArc, + }, + task::Task, + types::ARef, + uaccess::{UserSlice, UserSliceReader}, + uapi, + workqueue::{self, Work}, +}; + +use crate::{ + allocation::{Allocation, AllocationInfo, NewAllocation}, + context::Context, + defs::*, + error::{BinderError, BinderResult}, + node::{CouldNotDeliverCriticalIncrement, CritIncrWrapper, Node, NodeDeath, NodeRef}, + page_range::ShrinkablePageRange, + range_alloc::{RangeAllocator, ReserveNew, ReserveNewArgs}, + stats::BinderStats, + thread::{PushWorkRes, Thread}, + BinderfsProcFile, DArc, DLArc, DTRWrap, DeliverToRead, +}; + +#[path = "freeze.rs"] +mod freeze; +use self::freeze::{FreezeCookie, FreezeListener}; + +struct Mapping { + address: usize, + alloc: RangeAllocator<AllocationInfo>, +} + +impl Mapping { + fn new(address: usize, size: usize) -> Self { + Self { + address, + alloc: RangeAllocator::new(size), + } + } +} + +// bitflags for defer_work. +const PROC_DEFER_FLUSH: u8 = 1; +const PROC_DEFER_RELEASE: u8 = 2; + +#[derive(Copy, Clone)] +pub(crate) enum IsFrozen { + Yes, + No, + InProgress, +} + +impl IsFrozen { + /// Whether incoming transactions should be rejected due to freeze. + pub(crate) fn is_frozen(self) -> bool { + match self { + IsFrozen::Yes => true, + IsFrozen::No => false, + IsFrozen::InProgress => true, + } + } + + /// Whether freeze notifications consider this process frozen. + pub(crate) fn is_fully_frozen(self) -> bool { + match self { + IsFrozen::Yes => true, + IsFrozen::No => false, + IsFrozen::InProgress => false, + } + } +} + +/// The fields of `Process` protected by the spinlock. +pub(crate) struct ProcessInner { + is_manager: bool, + pub(crate) is_dead: bool, + threads: RBTree<i32, Arc<Thread>>, + /// INVARIANT: Threads pushed to this list must be owned by this process. + ready_threads: List<Thread>, + nodes: RBTree<u64, DArc<Node>>, + mapping: Option<Mapping>, + work: List<DTRWrap<dyn DeliverToRead>>, + delivered_deaths: List<DTRWrap<NodeDeath>, 2>, + + /// The number of requested threads that haven't registered yet. + requested_thread_count: u32, + /// The maximum number of threads used by the process thread pool. + max_threads: u32, + /// The number of threads the started and registered with the thread pool. + started_thread_count: u32, + + /// Bitmap of deferred work to do. + defer_work: u8, + + /// Number of transactions to be transmitted before processes in freeze_wait + /// are woken up. + outstanding_txns: u32, + /// Process is frozen and unable to service binder transactions. + pub(crate) is_frozen: IsFrozen, + /// Process received sync transactions since last frozen. + pub(crate) sync_recv: bool, + /// Process received async transactions since last frozen. + pub(crate) async_recv: bool, + pub(crate) binderfs_file: Option<BinderfsProcFile>, + /// Check for oneway spam + oneway_spam_detection_enabled: bool, +} + +impl ProcessInner { + fn new() -> Self { + Self { + is_manager: false, + is_dead: false, + threads: RBTree::new(), + ready_threads: List::new(), + mapping: None, + nodes: RBTree::new(), + work: List::new(), + delivered_deaths: List::new(), + requested_thread_count: 0, + max_threads: 0, + started_thread_count: 0, + defer_work: 0, + outstanding_txns: 0, + is_frozen: IsFrozen::No, + sync_recv: false, + async_recv: false, + binderfs_file: None, + oneway_spam_detection_enabled: false, + } + } + + /// Schedule the work item for execution on this process. + /// + /// If any threads are ready for work, then the work item is given directly to that thread and + /// it is woken up. Otherwise, it is pushed to the process work list. + /// + /// This call can fail only if the process is dead. In this case, the work item is returned to + /// the caller so that the caller can drop it after releasing the inner process lock. This is + /// necessary since the destructor of `Transaction` will take locks that can't necessarily be + /// taken while holding the inner process lock. + pub(crate) fn push_work( + &mut self, + work: DLArc<dyn DeliverToRead>, + ) -> Result<(), (BinderError, DLArc<dyn DeliverToRead>)> { + // Try to find a ready thread to which to push the work. + if let Some(thread) = self.ready_threads.pop_front() { + // Push to thread while holding state lock. This prevents the thread from giving up + // (for example, because of a signal) when we're about to deliver work. + match thread.push_work(work) { + PushWorkRes::Ok => Ok(()), + PushWorkRes::FailedDead(work) => Err((BinderError::new_dead(), work)), + } + } else if self.is_dead { + Err((BinderError::new_dead(), work)) + } else { + let sync = work.should_sync_wakeup(); + + // Didn't find a thread waiting for proc work; this can happen + // in two scenarios: + // 1. All threads are busy handling transactions + // In that case, one of those threads should call back into + // the kernel driver soon and pick up this work. + // 2. Threads are using the (e)poll interface, in which case + // they may be blocked on the waitqueue without having been + // added to waiting_threads. For this case, we just iterate + // over all threads not handling transaction work, and + // wake them all up. We wake all because we don't know whether + // a thread that called into (e)poll is handling non-binder + // work currently. + self.work.push_back(work); + + // Wake up polling threads, if any. + for thread in self.threads.values() { + thread.notify_if_poll_ready(sync); + } + + Ok(()) + } + } + + pub(crate) fn remove_node(&mut self, ptr: u64) { + self.nodes.remove(&ptr); + } + + /// Updates the reference count on the given node. + pub(crate) fn update_node_refcount( + &mut self, + node: &DArc<Node>, + inc: bool, + strong: bool, + count: usize, + othread: Option<&Thread>, + ) { + let push = node.update_refcount_locked(inc, strong, count, self); + + // If we decided that we need to push work, push either to the process or to a thread if + // one is specified. + if let Some(node) = push { + if let Some(thread) = othread { + thread.push_work_deferred(node); + } else { + let _ = self.push_work(node); + // Nothing to do: `push_work` may fail if the process is dead, but that's ok as in + // that case, it doesn't care about the notification. + } + } + } + + pub(crate) fn new_node_ref( + &mut self, + node: DArc<Node>, + strong: bool, + thread: Option<&Thread>, + ) -> NodeRef { + self.update_node_refcount(&node, true, strong, 1, thread); + let strong_count = if strong { 1 } else { 0 }; + NodeRef::new(node, strong_count, 1 - strong_count) + } + + pub(crate) fn new_node_ref_with_thread( + &mut self, + node: DArc<Node>, + strong: bool, + thread: &Thread, + wrapper: Option<CritIncrWrapper>, + ) -> Result<NodeRef, CouldNotDeliverCriticalIncrement> { + let push = match wrapper { + None => node + .incr_refcount_allow_zero2one(strong, self)? + .map(|node| node as _), + Some(wrapper) => node.incr_refcount_allow_zero2one_with_wrapper(strong, wrapper, self), + }; + if let Some(node) = push { + thread.push_work_deferred(node); + } + let strong_count = if strong { 1 } else { 0 }; + Ok(NodeRef::new(node, strong_count, 1 - strong_count)) + } + + /// Returns an existing node with the given pointer and cookie, if one exists. + /// + /// Returns an error if a node with the given pointer but a different cookie exists. + fn get_existing_node(&self, ptr: u64, cookie: u64) -> Result<Option<DArc<Node>>> { + match self.nodes.get(&ptr) { + None => Ok(None), + Some(node) => { + let (_, node_cookie) = node.get_id(); + if node_cookie == cookie { + Ok(Some(node.clone())) + } else { + Err(EINVAL) + } + } + } + } + + fn register_thread(&mut self) -> bool { + if self.requested_thread_count == 0 { + return false; + } + + self.requested_thread_count -= 1; + self.started_thread_count += 1; + true + } + + /// Finds a delivered death notification with the given cookie, removes it from the thread's + /// delivered list, and returns it. + fn pull_delivered_death(&mut self, cookie: u64) -> Option<DArc<NodeDeath>> { + let mut cursor = self.delivered_deaths.cursor_front(); + while let Some(next) = cursor.peek_next() { + if next.cookie == cookie { + return Some(next.remove().into_arc()); + } + cursor.move_next(); + } + None + } + + pub(crate) fn death_delivered(&mut self, death: DArc<NodeDeath>) { + if let Some(death) = ListArc::try_from_arc_or_drop(death) { + self.delivered_deaths.push_back(death); + } else { + pr_warn!("Notification added to `delivered_deaths` twice."); + } + } + + pub(crate) fn add_outstanding_txn(&mut self) { + self.outstanding_txns += 1; + } + + fn txns_pending_locked(&self) -> bool { + if self.outstanding_txns > 0 { + return true; + } + for thread in self.threads.values() { + if thread.has_current_transaction() { + return true; + } + } + false + } +} + +/// Used to keep track of a node that this process has a handle to. +#[pin_data] +pub(crate) struct NodeRefInfo { + debug_id: usize, + /// The refcount that this process owns to the node. + node_ref: ListArcField<NodeRef, { Self::LIST_PROC }>, + death: ListArcField<Option<DArc<NodeDeath>>, { Self::LIST_PROC }>, + /// Cookie of the active freeze listener for this node. + freeze: ListArcField<Option<FreezeCookie>, { Self::LIST_PROC }>, + /// Used to store this `NodeRefInfo` in the node's `refs` list. + #[pin] + links: ListLinks<{ Self::LIST_NODE }>, + /// The handle for this `NodeRefInfo`. + handle: u32, + /// The process that has a handle to the node. + pub(crate) process: Arc<Process>, +} + +impl NodeRefInfo { + /// The id used for the `Node::refs` list. + pub(crate) const LIST_NODE: u64 = 0x2da16350fb724a10; + /// The id used for the `ListArc` in `ProcessNodeRefs`. + const LIST_PROC: u64 = 0xd703a5263dcc8650; + + fn new(node_ref: NodeRef, handle: u32, process: Arc<Process>) -> impl PinInit<Self> { + pin_init!(Self { + debug_id: super::next_debug_id(), + node_ref: ListArcField::new(node_ref), + death: ListArcField::new(None), + freeze: ListArcField::new(None), + links <- ListLinks::new(), + handle, + process, + }) + } + + kernel::list::define_list_arc_field_getter! { + pub(crate) fn death(&mut self<{Self::LIST_PROC}>) -> &mut Option<DArc<NodeDeath>> { death } + pub(crate) fn freeze(&mut self<{Self::LIST_PROC}>) -> &mut Option<FreezeCookie> { freeze } + pub(crate) fn node_ref(&mut self<{Self::LIST_PROC}>) -> &mut NodeRef { node_ref } + pub(crate) fn node_ref2(&self<{Self::LIST_PROC}>) -> &NodeRef { node_ref } + } +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<{Self::LIST_NODE}> for NodeRefInfo { untracked; } + impl ListArcSafe<{Self::LIST_PROC}> for NodeRefInfo { untracked; } +} +kernel::list::impl_list_item! { + impl ListItem<{Self::LIST_NODE}> for NodeRefInfo { + using ListLinks { self.links }; + } +} + +/// Keeps track of references this process has to nodes owned by other processes. +/// +/// TODO: Currently, the rbtree requires two allocations per node reference, and two tree +/// traversals to look up a node by `Node::global_id`. Once the rbtree is more powerful, these +/// extra costs should be eliminated. +struct ProcessNodeRefs { + /// Used to look up nodes using the 32-bit id that this process knows it by. + by_handle: RBTree<u32, ListArc<NodeRefInfo, { NodeRefInfo::LIST_PROC }>>, + /// Used to quickly find unused ids in `by_handle`. + handle_is_present: IdPool, + /// Used to look up nodes without knowing their local 32-bit id. The usize is the address of + /// the underlying `Node` struct as returned by `Node::global_id`. + by_node: RBTree<usize, u32>, + /// Used to look up a `FreezeListener` by cookie. + /// + /// There might be multiple freeze listeners for the same node, but at most one of them is + /// active. + freeze_listeners: RBTree<FreezeCookie, FreezeListener>, +} + +impl ProcessNodeRefs { + fn new() -> Self { + Self { + by_handle: RBTree::new(), + handle_is_present: IdPool::new(), + by_node: RBTree::new(), + freeze_listeners: RBTree::new(), + } + } +} + +/// A process using binder. +/// +/// Strictly speaking, there can be multiple of these per process. There is one for each binder fd +/// that a process has opened, so processes using several binder contexts have several `Process` +/// objects. This ensures that the contexts are fully separated. +#[pin_data] +pub(crate) struct Process { + pub(crate) ctx: Arc<Context>, + + // The task leader (process). + pub(crate) task: ARef<Task>, + + // Credential associated with file when `Process` is created. + pub(crate) cred: ARef<Credential>, + + #[pin] + pub(crate) inner: SpinLock<ProcessInner>, + + #[pin] + pub(crate) pages: ShrinkablePageRange, + + // Waitqueue of processes waiting for all outstanding transactions to be + // processed. + #[pin] + freeze_wait: CondVar, + + // Node references are in a different lock to avoid recursive acquisition when + // incrementing/decrementing a node in another process. + #[pin] + node_refs: Mutex<ProcessNodeRefs>, + + // Work node for deferred work item. + #[pin] + defer_work: Work<Process>, + + // Links for process list in Context. + #[pin] + links: ListLinks, + + pub(crate) stats: BinderStats, +} + +kernel::impl_has_work! { + impl HasWork<Process> for Process { self.defer_work } +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for Process { untracked; } +} +kernel::list::impl_list_item! { + impl ListItem<0> for Process { + using ListLinks { self.links }; + } +} + +impl workqueue::WorkItem for Process { + type Pointer = Arc<Process>; + + fn run(me: Arc<Self>) { + let defer; + { + let mut inner = me.inner.lock(); + defer = inner.defer_work; + inner.defer_work = 0; + } + + if defer & PROC_DEFER_FLUSH != 0 { + me.deferred_flush(); + } + if defer & PROC_DEFER_RELEASE != 0 { + me.deferred_release(); + } + } +} + +impl Process { + fn new(ctx: Arc<Context>, cred: ARef<Credential>) -> Result<Arc<Self>> { + let current = kernel::current!(); + let list_process = ListArc::pin_init::<Error>( + try_pin_init!(Process { + ctx, + cred, + inner <- kernel::new_spinlock!(ProcessInner::new(), "Process::inner"), + pages <- ShrinkablePageRange::new(&super::BINDER_SHRINKER), + node_refs <- kernel::new_mutex!(ProcessNodeRefs::new(), "Process::node_refs"), + freeze_wait <- kernel::new_condvar!("Process::freeze_wait"), + task: current.group_leader().into(), + defer_work <- kernel::new_work!("Process::defer_work"), + links <- ListLinks::new(), + stats: BinderStats::new(), + }), + GFP_KERNEL, + )?; + + let process = list_process.clone_arc(); + process.ctx.register_process(list_process); + + Ok(process) + } + + pub(crate) fn pid_in_current_ns(&self) -> kernel::task::Pid { + self.task.tgid_nr_ns(None) + } + + #[inline(never)] + pub(crate) fn debug_print_stats(&self, m: &SeqFile, ctx: &Context) -> Result<()> { + seq_print!(m, "proc {}\n", self.pid_in_current_ns()); + seq_print!(m, "context {}\n", &*ctx.name); + + let inner = self.inner.lock(); + seq_print!(m, " threads: {}\n", inner.threads.iter().count()); + seq_print!( + m, + " requested threads: {}+{}/{}\n", + inner.requested_thread_count, + inner.started_thread_count, + inner.max_threads, + ); + if let Some(mapping) = &inner.mapping { + seq_print!( + m, + " free oneway space: {}\n", + mapping.alloc.free_oneway_space() + ); + seq_print!(m, " buffers: {}\n", mapping.alloc.count_buffers()); + } + seq_print!( + m, + " outstanding transactions: {}\n", + inner.outstanding_txns + ); + seq_print!(m, " nodes: {}\n", inner.nodes.iter().count()); + drop(inner); + + { + let mut refs = self.node_refs.lock(); + let (mut count, mut weak, mut strong) = (0, 0, 0); + for r in refs.by_handle.values_mut() { + let node_ref = r.node_ref(); + let (nstrong, nweak) = node_ref.get_count(); + count += 1; + weak += nweak; + strong += nstrong; + } + seq_print!(m, " refs: {count} s {strong} w {weak}\n"); + } + + self.stats.debug_print(" ", m); + + Ok(()) + } + + #[inline(never)] + pub(crate) fn debug_print(&self, m: &SeqFile, ctx: &Context, print_all: bool) -> Result<()> { + seq_print!(m, "proc {}\n", self.pid_in_current_ns()); + seq_print!(m, "context {}\n", &*ctx.name); + + let mut all_threads = KVec::new(); + let mut all_nodes = KVec::new(); + loop { + let inner = self.inner.lock(); + let num_threads = inner.threads.iter().count(); + let num_nodes = inner.nodes.iter().count(); + + if all_threads.capacity() < num_threads || all_nodes.capacity() < num_nodes { + drop(inner); + all_threads.reserve(num_threads, GFP_KERNEL)?; + all_nodes.reserve(num_nodes, GFP_KERNEL)?; + continue; + } + + for thread in inner.threads.values() { + assert!(all_threads.len() < all_threads.capacity()); + let _ = all_threads.push(thread.clone(), GFP_ATOMIC); + } + + for node in inner.nodes.values() { + assert!(all_nodes.len() < all_nodes.capacity()); + let _ = all_nodes.push(node.clone(), GFP_ATOMIC); + } + + break; + } + + for thread in all_threads { + thread.debug_print(m, print_all)?; + } + + let mut inner = self.inner.lock(); + for node in all_nodes { + if print_all || node.has_oneway_transaction(&mut inner) { + node.full_debug_print(m, &mut inner)?; + } + } + drop(inner); + + if print_all { + let mut refs = self.node_refs.lock(); + for r in refs.by_handle.values_mut() { + let node_ref = r.node_ref(); + let dead = node_ref.node.owner.inner.lock().is_dead; + let (strong, weak) = node_ref.get_count(); + let debug_id = node_ref.node.debug_id; + + seq_print!( + m, + " ref {}: desc {} {}node {debug_id} s {strong} w {weak}", + r.debug_id, + r.handle, + if dead { "dead " } else { "" } + ); + } + } + + let inner = self.inner.lock(); + for work in &inner.work { + work.debug_print(m, " ", " pending transaction ")?; + } + for _death in &inner.delivered_deaths { + seq_print!(m, " has delivered dead binder\n"); + } + if let Some(mapping) = &inner.mapping { + mapping.alloc.debug_print(m)?; + } + drop(inner); + + Ok(()) + } + + /// Attempts to fetch a work item from the process queue. + pub(crate) fn get_work(&self) -> Option<DLArc<dyn DeliverToRead>> { + self.inner.lock().work.pop_front() + } + + /// Attempts to fetch a work item from the process queue. If none is available, it registers the + /// given thread as ready to receive work directly. + /// + /// This must only be called when the thread is not participating in a transaction chain; when + /// it is, work will always be delivered directly to the thread (and not through the process + /// queue). + pub(crate) fn get_work_or_register<'a>( + &'a self, + thread: &'a Arc<Thread>, + ) -> GetWorkOrRegister<'a> { + let mut inner = self.inner.lock(); + // Try to get work from the process queue. + if let Some(work) = inner.work.pop_front() { + return GetWorkOrRegister::Work(work); + } + + // Register the thread as ready. + GetWorkOrRegister::Register(Registration::new(thread, &mut inner)) + } + + fn get_current_thread(self: ArcBorrow<'_, Self>) -> Result<Arc<Thread>> { + let id = { + let current = kernel::current!(); + if !core::ptr::eq(current.group_leader(), &*self.task) { + pr_err!("get_current_thread was called from the wrong process."); + return Err(EINVAL); + } + current.pid() + }; + + { + let inner = self.inner.lock(); + if let Some(thread) = inner.threads.get(&id) { + return Ok(thread.clone()); + } + } + + // Allocate a new `Thread` without holding any locks. + let reservation = RBTreeNodeReservation::new(GFP_KERNEL)?; + let ta: Arc<Thread> = Thread::new(id, self.into())?; + + let mut inner = self.inner.lock(); + match inner.threads.entry(id) { + rbtree::Entry::Vacant(entry) => { + entry.insert(ta.clone(), reservation); + Ok(ta) + } + rbtree::Entry::Occupied(_entry) => { + pr_err!("Cannot create two threads with the same id."); + Err(EINVAL) + } + } + } + + pub(crate) fn push_work(&self, work: DLArc<dyn DeliverToRead>) -> BinderResult { + // If push_work fails, drop the work item outside the lock. + let res = self.inner.lock().push_work(work); + match res { + Ok(()) => Ok(()), + Err((err, work)) => { + drop(work); + Err(err) + } + } + } + + fn set_as_manager( + self: ArcBorrow<'_, Self>, + info: Option<FlatBinderObject>, + thread: &Thread, + ) -> Result { + let (ptr, cookie, flags) = if let Some(obj) = info { + ( + // SAFETY: The object type for this ioctl is implicitly `BINDER_TYPE_BINDER`, so it + // is safe to access the `binder` field. + unsafe { obj.__bindgen_anon_1.binder }, + obj.cookie, + obj.flags, + ) + } else { + (0, 0, 0) + }; + let node_ref = self.get_node(ptr, cookie, flags as _, true, thread)?; + let node = node_ref.node.clone(); + self.ctx.set_manager_node(node_ref)?; + self.inner.lock().is_manager = true; + + // Force the state of the node to prevent the delivery of acquire/increfs. + let mut owner_inner = node.owner.inner.lock(); + node.force_has_count(&mut owner_inner); + Ok(()) + } + + fn get_node_inner( + self: ArcBorrow<'_, Self>, + ptr: u64, + cookie: u64, + flags: u32, + strong: bool, + thread: &Thread, + wrapper: Option<CritIncrWrapper>, + ) -> Result<Result<NodeRef, CouldNotDeliverCriticalIncrement>> { + // Try to find an existing node. + { + let mut inner = self.inner.lock(); + if let Some(node) = inner.get_existing_node(ptr, cookie)? { + return Ok(inner.new_node_ref_with_thread(node, strong, thread, wrapper)); + } + } + + // Allocate the node before reacquiring the lock. + let node = DTRWrap::arc_pin_init(Node::new(ptr, cookie, flags, self.into()))?.into_arc(); + let rbnode = RBTreeNode::new(ptr, node.clone(), GFP_KERNEL)?; + let mut inner = self.inner.lock(); + if let Some(node) = inner.get_existing_node(ptr, cookie)? { + return Ok(inner.new_node_ref_with_thread(node, strong, thread, wrapper)); + } + + inner.nodes.insert(rbnode); + // This can only fail if someone has already pushed the node to a list, but we just created + // it and still hold the lock, so it can't fail right now. + let node_ref = inner + .new_node_ref_with_thread(node, strong, thread, wrapper) + .unwrap(); + + Ok(Ok(node_ref)) + } + + pub(crate) fn get_node( + self: ArcBorrow<'_, Self>, + ptr: u64, + cookie: u64, + flags: u32, + strong: bool, + thread: &Thread, + ) -> Result<NodeRef> { + let mut wrapper = None; + for _ in 0..2 { + match self.get_node_inner(ptr, cookie, flags, strong, thread, wrapper) { + Err(err) => return Err(err), + Ok(Ok(node_ref)) => return Ok(node_ref), + Ok(Err(CouldNotDeliverCriticalIncrement)) => { + wrapper = Some(CritIncrWrapper::new()?); + } + } + } + // We only get a `CouldNotDeliverCriticalIncrement` error if `wrapper` is `None`, so the + // loop should run at most twice. + unreachable!() + } + + pub(crate) fn insert_or_update_handle( + self: ArcBorrow<'_, Process>, + node_ref: NodeRef, + is_manager: bool, + ) -> Result<u32> { + { + let mut refs = self.node_refs.lock(); + + // Do a lookup before inserting. + if let Some(handle_ref) = refs.by_node.get(&node_ref.node.global_id()) { + let handle = *handle_ref; + let info = refs.by_handle.get_mut(&handle).unwrap(); + info.node_ref().absorb(node_ref); + return Ok(handle); + } + } + + // Reserve memory for tree nodes. + let reserve1 = RBTreeNodeReservation::new(GFP_KERNEL)?; + let reserve2 = RBTreeNodeReservation::new(GFP_KERNEL)?; + let info = UniqueArc::new_uninit(GFP_KERNEL)?; + + let mut refs_lock = self.node_refs.lock(); + let mut refs = &mut *refs_lock; + + let (unused_id, by_handle_slot) = loop { + // ID 0 may only be used by the manager. + let start = if is_manager { 0 } else { 1 }; + + if let Some(res) = refs.handle_is_present.find_unused_id(start) { + match refs.by_handle.entry(res.as_u32()) { + rbtree::Entry::Vacant(entry) => break (res, entry), + rbtree::Entry::Occupied(_) => { + pr_err!("Detected mismatch between handle_is_present and by_handle"); + res.acquire(); + kernel::warn_on!(true); + return Err(EINVAL); + } + } + } + + let grow_request = refs.handle_is_present.grow_request().ok_or(ENOMEM)?; + drop(refs_lock); + let resizer = grow_request.realloc(GFP_KERNEL)?; + refs_lock = self.node_refs.lock(); + refs = &mut *refs_lock; + refs.handle_is_present.grow(resizer); + }; + let handle = unused_id.as_u32(); + + // Do a lookup again as node may have been inserted before the lock was reacquired. + if let Some(handle_ref) = refs.by_node.get(&node_ref.node.global_id()) { + let handle = *handle_ref; + let info = refs.by_handle.get_mut(&handle).unwrap(); + info.node_ref().absorb(node_ref); + return Ok(handle); + } + + let gid = node_ref.node.global_id(); + let (info_proc, info_node) = { + let info_init = NodeRefInfo::new(node_ref, handle, self.into()); + match info.pin_init_with(info_init) { + Ok(info) => ListArc::pair_from_pin_unique(info), + // error is infallible + Err(err) => match err {}, + } + }; + + // Ensure the process is still alive while we insert a new reference. + // + // This releases the lock before inserting the nodes, but since `is_dead` is set as the + // first thing in `deferred_release`, process cleanup will not miss the items inserted into + // `refs` below. + if self.inner.lock().is_dead { + return Err(ESRCH); + } + + // SAFETY: `info_proc` and `info_node` reference the same node, so we are inserting + // `info_node` into the right node's `refs` list. + unsafe { info_proc.node_ref2().node.insert_node_info(info_node) }; + + refs.by_node.insert(reserve1.into_node(gid, handle)); + by_handle_slot.insert(info_proc, reserve2); + unused_id.acquire(); + Ok(handle) + } + + pub(crate) fn get_transaction_node(&self, handle: u32) -> BinderResult<NodeRef> { + // When handle is zero, try to get the context manager. + if handle == 0 { + Ok(self.ctx.get_manager_node(true)?) + } else { + Ok(self.get_node_from_handle(handle, true)?) + } + } + + pub(crate) fn get_node_from_handle(&self, handle: u32, strong: bool) -> Result<NodeRef> { + self.node_refs + .lock() + .by_handle + .get_mut(&handle) + .ok_or(ENOENT)? + .node_ref() + .clone(strong) + } + + pub(crate) fn remove_from_delivered_deaths(&self, death: &DArc<NodeDeath>) { + let mut inner = self.inner.lock(); + // SAFETY: By the invariant on the `delivered_links` field, this is the right linked list. + let removed = unsafe { inner.delivered_deaths.remove(death) }; + drop(inner); + drop(removed); + } + + pub(crate) fn update_ref( + self: ArcBorrow<'_, Process>, + handle: u32, + inc: bool, + strong: bool, + ) -> Result { + if inc && handle == 0 { + if let Ok(node_ref) = self.ctx.get_manager_node(strong) { + if core::ptr::eq(&*self, &*node_ref.node.owner) { + return Err(EINVAL); + } + let _ = self.insert_or_update_handle(node_ref, true); + return Ok(()); + } + } + + // To preserve original binder behaviour, we only fail requests where the manager tries to + // increment references on itself. + let mut refs = self.node_refs.lock(); + if let Some(info) = refs.by_handle.get_mut(&handle) { + if info.node_ref().update(inc, strong) { + // Clean up death if there is one attached to this node reference. + if let Some(death) = info.death().take() { + death.set_cleared(true); + self.remove_from_delivered_deaths(&death); + } + + // Remove reference from process tables, and from the node's `refs` list. + + // SAFETY: We are removing the `NodeRefInfo` from the right node. + unsafe { info.node_ref2().node.remove_node_info(info) }; + + let id = info.node_ref().node.global_id(); + refs.by_handle.remove(&handle); + refs.by_node.remove(&id); + refs.handle_is_present.release_id(handle as usize); + + if let Some(shrink) = refs.handle_is_present.shrink_request() { + drop(refs); + // This intentionally ignores allocation failures. + if let Ok(new_bitmap) = shrink.realloc(GFP_KERNEL) { + refs = self.node_refs.lock(); + refs.handle_is_present.shrink(new_bitmap); + } + } + } + } else { + // All refs are cleared in process exit, so this warning is expected in that case. + if !self.inner.lock().is_dead { + pr_warn!("{}: no such ref {handle}\n", self.pid_in_current_ns()); + } + } + Ok(()) + } + + /// Decrements the refcount of the given node, if one exists. + pub(crate) fn update_node(&self, ptr: u64, cookie: u64, strong: bool) { + let mut inner = self.inner.lock(); + if let Ok(Some(node)) = inner.get_existing_node(ptr, cookie) { + inner.update_node_refcount(&node, false, strong, 1, None); + } + } + + pub(crate) fn inc_ref_done(&self, reader: &mut UserSliceReader, strong: bool) -> Result { + let ptr = reader.read::<u64>()?; + let cookie = reader.read::<u64>()?; + let mut inner = self.inner.lock(); + if let Ok(Some(node)) = inner.get_existing_node(ptr, cookie) { + if let Some(node) = node.inc_ref_done_locked(strong, &mut inner) { + // This only fails if the process is dead. + let _ = inner.push_work(node); + } + } + Ok(()) + } + + pub(crate) fn buffer_alloc( + self: &Arc<Self>, + debug_id: usize, + size: usize, + is_oneway: bool, + from_pid: i32, + ) -> BinderResult<NewAllocation> { + use kernel::page::PAGE_SIZE; + + let mut reserve_new_args = ReserveNewArgs { + debug_id, + size, + is_oneway, + pid: from_pid, + ..ReserveNewArgs::default() + }; + + let (new_alloc, addr) = loop { + let mut inner = self.inner.lock(); + let mapping = inner.mapping.as_mut().ok_or_else(BinderError::new_dead)?; + let alloc_request = match mapping.alloc.reserve_new(reserve_new_args)? { + ReserveNew::Success(new_alloc) => break (new_alloc, mapping.address), + ReserveNew::NeedAlloc(request) => request, + }; + drop(inner); + // We need to allocate memory and then call `reserve_new` again. + reserve_new_args = alloc_request.make_alloc()?; + }; + + let res = Allocation::new( + self.clone(), + debug_id, + new_alloc.offset, + size, + addr + new_alloc.offset, + new_alloc.oneway_spam_detected, + ); + + // This allocation will be marked as in use until the `Allocation` is used to free it. + // + // This method can't be called while holding a lock, so we release the lock first. It's + // okay for several threads to use the method on the same index at the same time. In that + // case, one of the calls will allocate the given page (if missing), and the other call + // will wait for the other call to finish allocating the page. + // + // We will not call `stop_using_range` in parallel with this on the same page, because the + // allocation can only be removed via the destructor of the `Allocation` object that we + // currently own. + match self.pages.use_range( + new_alloc.offset / PAGE_SIZE, + (new_alloc.offset + size).div_ceil(PAGE_SIZE), + ) { + Ok(()) => {} + Err(err) => { + pr_warn!("use_range failure {:?}", err); + return Err(err.into()); + } + } + + Ok(NewAllocation(res)) + } + + pub(crate) fn buffer_get(self: &Arc<Self>, ptr: usize) -> Option<Allocation> { + let mut inner = self.inner.lock(); + let mapping = inner.mapping.as_mut()?; + let offset = ptr.checked_sub(mapping.address)?; + let (size, debug_id, odata) = mapping.alloc.reserve_existing(offset).ok()?; + let mut alloc = Allocation::new(self.clone(), debug_id, offset, size, ptr, false); + if let Some(data) = odata { + alloc.set_info(data); + } + Some(alloc) + } + + pub(crate) fn buffer_raw_free(&self, ptr: usize) { + let mut inner = self.inner.lock(); + if let Some(ref mut mapping) = &mut inner.mapping { + let offset = match ptr.checked_sub(mapping.address) { + Some(offset) => offset, + None => return, + }; + + let freed_range = match mapping.alloc.reservation_abort(offset) { + Ok(freed_range) => freed_range, + Err(_) => { + pr_warn!( + "Pointer {:x} failed to free, base = {:x}\n", + ptr, + mapping.address + ); + return; + } + }; + + // No more allocations in this range. Mark them as not in use. + // + // Must be done before we release the lock so that `use_range` is not used on these + // indices until `stop_using_range` returns. + self.pages + .stop_using_range(freed_range.start_page_idx, freed_range.end_page_idx); + } + } + + pub(crate) fn buffer_make_freeable(&self, offset: usize, mut data: Option<AllocationInfo>) { + let mut inner = self.inner.lock(); + if let Some(ref mut mapping) = &mut inner.mapping { + if mapping.alloc.reservation_commit(offset, &mut data).is_err() { + pr_warn!("Offset {} failed to be marked freeable\n", offset); + } + } + } + + fn create_mapping(&self, vma: &mm::virt::VmaNew) -> Result { + use kernel::page::PAGE_SIZE; + let size = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize); + let mapping = Mapping::new(vma.start(), size); + let page_count = self.pages.register_with_vma(vma)?; + if page_count * PAGE_SIZE != size { + return Err(EINVAL); + } + + // Save range allocator for later. + self.inner.lock().mapping = Some(mapping); + + Ok(()) + } + + fn version(&self, data: UserSlice) -> Result { + data.writer().write(&BinderVersion::current()) + } + + pub(crate) fn register_thread(&self) -> bool { + self.inner.lock().register_thread() + } + + fn remove_thread(&self, thread: Arc<Thread>) { + self.inner.lock().threads.remove(&thread.id); + thread.release(); + } + + fn set_max_threads(&self, max: u32) { + self.inner.lock().max_threads = max; + } + + fn set_oneway_spam_detection_enabled(&self, enabled: u32) { + self.inner.lock().oneway_spam_detection_enabled = enabled != 0; + } + + pub(crate) fn is_oneway_spam_detection_enabled(&self) -> bool { + self.inner.lock().oneway_spam_detection_enabled + } + + fn get_node_debug_info(&self, data: UserSlice) -> Result { + let (mut reader, mut writer) = data.reader_writer(); + + // Read the starting point. + let ptr = reader.read::<BinderNodeDebugInfo>()?.ptr; + let mut out = BinderNodeDebugInfo::default(); + + { + let inner = self.inner.lock(); + for (node_ptr, node) in &inner.nodes { + if *node_ptr > ptr { + node.populate_debug_info(&mut out, &inner); + break; + } + } + } + + writer.write(&out) + } + + fn get_node_info_from_ref(&self, data: UserSlice) -> Result { + let (mut reader, mut writer) = data.reader_writer(); + let mut out = reader.read::<BinderNodeInfoForRef>()?; + + if out.strong_count != 0 + || out.weak_count != 0 + || out.reserved1 != 0 + || out.reserved2 != 0 + || out.reserved3 != 0 + { + return Err(EINVAL); + } + + // Only the context manager is allowed to use this ioctl. + if !self.inner.lock().is_manager { + return Err(EPERM); + } + + { + let mut node_refs = self.node_refs.lock(); + let node_info = node_refs.by_handle.get_mut(&out.handle).ok_or(ENOENT)?; + let node_ref = node_info.node_ref(); + let owner_inner = node_ref.node.owner.inner.lock(); + node_ref.node.populate_counts(&mut out, &owner_inner); + } + + // Write the result back. + writer.write(&out) + } + + pub(crate) fn needs_thread(&self) -> bool { + let mut inner = self.inner.lock(); + let ret = inner.requested_thread_count == 0 + && inner.ready_threads.is_empty() + && inner.started_thread_count < inner.max_threads; + if ret { + inner.requested_thread_count += 1 + } + ret + } + + pub(crate) fn request_death( + self: &Arc<Self>, + reader: &mut UserSliceReader, + thread: &Thread, + ) -> Result { + let handle: u32 = reader.read()?; + let cookie: u64 = reader.read()?; + + // Queue BR_ERROR if we can't allocate memory for the death notification. + let death = UniqueArc::new_uninit(GFP_KERNEL).inspect_err(|_| { + thread.push_return_work(BR_ERROR); + })?; + let mut refs = self.node_refs.lock(); + let Some(info) = refs.by_handle.get_mut(&handle) else { + pr_warn!("BC_REQUEST_DEATH_NOTIFICATION invalid ref {handle}\n"); + return Ok(()); + }; + + // Nothing to do if there is already a death notification request for this handle. + if info.death().is_some() { + pr_warn!("BC_REQUEST_DEATH_NOTIFICATION death notification already set\n"); + return Ok(()); + } + + let death = { + let death_init = NodeDeath::new(info.node_ref().node.clone(), self.clone(), cookie); + match death.pin_init_with(death_init) { + Ok(death) => death, + // error is infallible + Err(err) => match err {}, + } + }; + + // Register the death notification. + { + let owner = info.node_ref2().node.owner.clone(); + let mut owner_inner = owner.inner.lock(); + if owner_inner.is_dead { + let death = Arc::from(death); + *info.death() = Some(death.clone()); + drop(owner_inner); + death.set_dead(); + } else { + let death = ListArc::from(death); + *info.death() = Some(death.clone_arc()); + info.node_ref().node.add_death(death, &mut owner_inner); + } + } + Ok(()) + } + + pub(crate) fn clear_death(&self, reader: &mut UserSliceReader, thread: &Thread) -> Result { + let handle: u32 = reader.read()?; + let cookie: u64 = reader.read()?; + + let mut refs = self.node_refs.lock(); + let Some(info) = refs.by_handle.get_mut(&handle) else { + pr_warn!("BC_CLEAR_DEATH_NOTIFICATION invalid ref {handle}\n"); + return Ok(()); + }; + + let Some(death) = info.death().take() else { + pr_warn!("BC_CLEAR_DEATH_NOTIFICATION death notification not active\n"); + return Ok(()); + }; + if death.cookie != cookie { + *info.death() = Some(death); + pr_warn!("BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch\n"); + return Ok(()); + } + + // Update state and determine if we need to queue a work item. We only need to do it when + // the node is not dead or if the user already completed the death notification. + if death.set_cleared(false) { + if let Some(death) = ListArc::try_from_arc_or_drop(death) { + let _ = thread.push_work_if_looper(death); + } + } + + Ok(()) + } + + pub(crate) fn dead_binder_done(&self, cookie: u64, thread: &Thread) { + if let Some(death) = self.inner.lock().pull_delivered_death(cookie) { + death.set_notification_done(thread); + } + } + + /// Locks the spinlock and move the `nodes` rbtree out. + /// + /// This allows you to iterate through `nodes` while also allowing you to give other parts of + /// the codebase exclusive access to `ProcessInner`. + pub(crate) fn lock_with_nodes(&self) -> WithNodes<'_> { + let mut inner = self.inner.lock(); + WithNodes { + nodes: take(&mut inner.nodes), + inner, + } + } + + fn deferred_flush(&self) { + let inner = self.inner.lock(); + for thread in inner.threads.values() { + thread.exit_looper(); + } + } + + fn deferred_release(self: Arc<Self>) { + let is_manager = { + let mut inner = self.inner.lock(); + inner.is_dead = true; + inner.is_frozen = IsFrozen::No; + inner.sync_recv = false; + inner.async_recv = false; + inner.is_manager + }; + + if is_manager { + self.ctx.unset_manager_node(); + } + + self.ctx.deregister_process(&self); + + let binderfs_file = self.inner.lock().binderfs_file.take(); + drop(binderfs_file); + + // Release threads. + let threads = { + let mut inner = self.inner.lock(); + let threads = take(&mut inner.threads); + let ready = take(&mut inner.ready_threads); + drop(inner); + drop(ready); + + for thread in threads.values() { + thread.release(); + } + threads + }; + + // Release nodes. + { + while let Some(node) = { + let mut lock = self.inner.lock(); + lock.nodes.cursor_front_mut().map(|c| c.remove_current().1) + } { + node.to_key_value().1.release(); + } + } + + // Clean up death listeners and remove nodes from external node info lists. + for info in self.node_refs.lock().by_handle.values_mut() { + // SAFETY: We are removing the `NodeRefInfo` from the right node. + unsafe { info.node_ref2().node.remove_node_info(info) }; + + // Remove all death notifications from the nodes (that belong to a different process). + let death = if let Some(existing) = info.death().take() { + existing + } else { + continue; + }; + death.set_cleared(false); + } + + // Clean up freeze listeners. + let freeze_listeners = take(&mut self.node_refs.lock().freeze_listeners); + for listener in freeze_listeners.values() { + listener.on_process_exit(&self); + } + drop(freeze_listeners); + + // Release refs on foreign nodes. + { + let mut refs = self.node_refs.lock(); + let by_handle = take(&mut refs.by_handle); + let by_node = take(&mut refs.by_node); + drop(refs); + drop(by_node); + drop(by_handle); + } + + // Cancel all pending work items. + while let Some(work) = self.get_work() { + work.into_arc().cancel(); + } + + // Clear delivered_deaths list. + // + // Scope ensures that MutexGuard is dropped while executing the body. + while let Some(delivered_death) = { self.inner.lock().delivered_deaths.pop_front() } { + drop(delivered_death); + } + + // Free any resources kept alive by allocated buffers. + let omapping = self.inner.lock().mapping.take(); + if let Some(mut mapping) = omapping { + let address = mapping.address; + mapping + .alloc + .take_for_each(|offset, size, debug_id, odata| { + let ptr = offset + address; + let mut alloc = + Allocation::new(self.clone(), debug_id, offset, size, ptr, false); + if let Some(data) = odata { + alloc.set_info(data); + } + drop(alloc) + }); + } + + // calls to synchronize_rcu() in thread drop will happen here + drop(threads); + } + + pub(crate) fn drop_outstanding_txn(&self) { + let wake = { + let mut inner = self.inner.lock(); + if inner.outstanding_txns == 0 { + pr_err!("outstanding_txns underflow"); + return; + } + inner.outstanding_txns -= 1; + inner.is_frozen.is_frozen() && inner.outstanding_txns == 0 + }; + + if wake { + self.freeze_wait.notify_all(); + } + } + + pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { + if info.enable == 0 { + let msgs = self.prepare_freeze_messages()?; + let mut inner = self.inner.lock(); + inner.sync_recv = false; + inner.async_recv = false; + inner.is_frozen = IsFrozen::No; + drop(inner); + msgs.send_messages(); + return Ok(()); + } + + let mut inner = self.inner.lock(); + inner.sync_recv = false; + inner.async_recv = false; + inner.is_frozen = IsFrozen::InProgress; + + if info.timeout_ms > 0 { + let mut jiffies = kernel::time::msecs_to_jiffies(info.timeout_ms); + while jiffies > 0 { + if inner.outstanding_txns == 0 { + break; + } + + match self + .freeze_wait + .wait_interruptible_timeout(&mut inner, jiffies) + { + CondVarTimeoutResult::Signal { .. } => { + inner.is_frozen = IsFrozen::No; + return Err(ERESTARTSYS); + } + CondVarTimeoutResult::Woken { jiffies: remaining } => { + jiffies = remaining; + } + CondVarTimeoutResult::Timeout => { + jiffies = 0; + } + } + } + } + + if inner.txns_pending_locked() { + inner.is_frozen = IsFrozen::No; + Err(EAGAIN) + } else { + drop(inner); + match self.prepare_freeze_messages() { + Ok(batch) => { + self.inner.lock().is_frozen = IsFrozen::Yes; + batch.send_messages(); + Ok(()) + } + Err(kernel::alloc::AllocError) => { + self.inner.lock().is_frozen = IsFrozen::No; + Err(ENOMEM) + } + } + } + } +} + +fn get_frozen_status(data: UserSlice) -> Result { + let (mut reader, mut writer) = data.reader_writer(); + + let mut info = reader.read::<BinderFrozenStatusInfo>()?; + info.sync_recv = 0; + info.async_recv = 0; + let mut found = false; + + for ctx in crate::context::get_all_contexts()? { + ctx.for_each_proc(|proc| { + if proc.task.pid() == info.pid as _ { + found = true; + let inner = proc.inner.lock(); + let txns_pending = inner.txns_pending_locked(); + info.async_recv |= inner.async_recv as u32; + info.sync_recv |= inner.sync_recv as u32; + info.sync_recv |= (txns_pending as u32) << 1; + } + }); + } + + if found { + writer.write(&info)?; + Ok(()) + } else { + Err(EINVAL) + } +} + +fn ioctl_freeze(reader: &mut UserSliceReader) -> Result { + let info = reader.read::<BinderFreezeInfo>()?; + + // Very unlikely for there to be more than 3, since a process normally uses at most binder and + // hwbinder. + let mut procs = KVec::with_capacity(3, GFP_KERNEL)?; + + let ctxs = crate::context::get_all_contexts()?; + for ctx in ctxs { + for proc in ctx.get_procs_with_pid(info.pid as i32)? { + procs.push(proc, GFP_KERNEL)?; + } + } + + for proc in procs { + proc.ioctl_freeze(&info)?; + } + Ok(()) +} + +/// The ioctl handler. +impl Process { + /// Ioctls that are write-only from the perspective of userspace. + /// + /// The kernel will only read from the pointer that userspace provided to us. + fn ioctl_write_only( + this: ArcBorrow<'_, Process>, + _file: &File, + cmd: u32, + reader: &mut UserSliceReader, + ) -> Result { + let thread = this.get_current_thread()?; + match cmd { + uapi::BINDER_SET_MAX_THREADS => this.set_max_threads(reader.read()?), + uapi::BINDER_THREAD_EXIT => this.remove_thread(thread), + uapi::BINDER_SET_CONTEXT_MGR => this.set_as_manager(None, &thread)?, + uapi::BINDER_SET_CONTEXT_MGR_EXT => { + this.set_as_manager(Some(reader.read()?), &thread)? + } + uapi::BINDER_ENABLE_ONEWAY_SPAM_DETECTION => { + this.set_oneway_spam_detection_enabled(reader.read()?) + } + uapi::BINDER_FREEZE => ioctl_freeze(reader)?, + _ => return Err(EINVAL), + } + Ok(()) + } + + /// Ioctls that are read/write from the perspective of userspace. + /// + /// The kernel will both read from and write to the pointer that userspace provided to us. + fn ioctl_write_read( + this: ArcBorrow<'_, Process>, + file: &File, + cmd: u32, + data: UserSlice, + ) -> Result { + let thread = this.get_current_thread()?; + let blocking = (file.flags() & file::flags::O_NONBLOCK) == 0; + match cmd { + uapi::BINDER_WRITE_READ => thread.write_read(data, blocking)?, + uapi::BINDER_GET_NODE_DEBUG_INFO => this.get_node_debug_info(data)?, + uapi::BINDER_GET_NODE_INFO_FOR_REF => this.get_node_info_from_ref(data)?, + uapi::BINDER_VERSION => this.version(data)?, + uapi::BINDER_GET_FROZEN_INFO => get_frozen_status(data)?, + uapi::BINDER_GET_EXTENDED_ERROR => thread.get_extended_error(data)?, + _ => return Err(EINVAL), + } + Ok(()) + } +} + +/// The file operations supported by `Process`. +impl Process { + pub(crate) fn open(ctx: ArcBorrow<'_, Context>, file: &File) -> Result<Arc<Process>> { + Self::new(ctx.into(), ARef::from(file.cred())) + } + + pub(crate) fn release(this: Arc<Process>, _file: &File) { + let binderfs_file; + let should_schedule; + { + let mut inner = this.inner.lock(); + should_schedule = inner.defer_work == 0; + inner.defer_work |= PROC_DEFER_RELEASE; + binderfs_file = inner.binderfs_file.take(); + } + + if should_schedule { + // Ignore failures to schedule to the workqueue. Those just mean that we're already + // scheduled for execution. + let _ = workqueue::system().enqueue(this); + } + + drop(binderfs_file); + } + + pub(crate) fn flush(this: ArcBorrow<'_, Process>) -> Result { + let should_schedule; + { + let mut inner = this.inner.lock(); + should_schedule = inner.defer_work == 0; + inner.defer_work |= PROC_DEFER_FLUSH; + } + + if should_schedule { + // Ignore failures to schedule to the workqueue. Those just mean that we're already + // scheduled for execution. + let _ = workqueue::system().enqueue(Arc::from(this)); + } + Ok(()) + } + + pub(crate) fn ioctl(this: ArcBorrow<'_, Process>, file: &File, cmd: u32, arg: usize) -> Result { + use kernel::ioctl::{_IOC_DIR, _IOC_SIZE}; + use kernel::uapi::{_IOC_READ, _IOC_WRITE}; + + crate::trace::trace_ioctl(cmd, arg); + + let user_slice = UserSlice::new(UserPtr::from_addr(arg), _IOC_SIZE(cmd)); + + const _IOC_READ_WRITE: u32 = _IOC_READ | _IOC_WRITE; + + match _IOC_DIR(cmd) { + _IOC_WRITE => Self::ioctl_write_only(this, file, cmd, &mut user_slice.reader()), + _IOC_READ_WRITE => Self::ioctl_write_read(this, file, cmd, user_slice), + _ => Err(EINVAL), + } + } + + pub(crate) fn mmap( + this: ArcBorrow<'_, Process>, + _file: &File, + vma: &mm::virt::VmaNew, + ) -> Result { + // We don't allow mmap to be used in a different process. + if !core::ptr::eq(kernel::current!().group_leader(), &*this.task) { + return Err(EINVAL); + } + if vma.start() == 0 { + return Err(EINVAL); + } + + vma.try_clear_maywrite().map_err(|_| EPERM)?; + vma.set_dontcopy(); + vma.set_mixedmap(); + + // TODO: Set ops. We need to learn when the user unmaps so that we can stop using it. + this.create_mapping(vma) + } + + pub(crate) fn poll( + this: ArcBorrow<'_, Process>, + file: &File, + table: PollTable<'_>, + ) -> Result<u32> { + let thread = this.get_current_thread()?; + let (from_proc, mut mask) = thread.poll(file, table); + if mask == 0 && from_proc && !this.inner.lock().work.is_empty() { + mask |= bindings::POLLIN; + } + Ok(mask) + } +} + +/// Represents that a thread has registered with the `ready_threads` list of its process. +/// +/// The destructor of this type will unregister the thread from the list of ready threads. +pub(crate) struct Registration<'a> { + thread: &'a Arc<Thread>, +} + +impl<'a> Registration<'a> { + fn new(thread: &'a Arc<Thread>, guard: &mut Guard<'_, ProcessInner, SpinLockBackend>) -> Self { + assert!(core::ptr::eq(&thread.process.inner, guard.lock_ref())); + // INVARIANT: We are pushing this thread to the right `ready_threads` list. + if let Ok(list_arc) = ListArc::try_from_arc(thread.clone()) { + guard.ready_threads.push_front(list_arc); + } else { + // It is an error to hit this branch, and it should not be reachable. We try to do + // something reasonable when the failure path happens. Most likely, the thread in + // question will sleep forever. + pr_err!("Same thread registered with `ready_threads` twice."); + } + Self { thread } + } +} + +impl Drop for Registration<'_> { + fn drop(&mut self) { + let mut inner = self.thread.process.inner.lock(); + // SAFETY: The thread has the invariant that we never push it to any other linked list than + // the `ready_threads` list of its parent process. Therefore, the thread is either in that + // list, or in no list. + unsafe { inner.ready_threads.remove(self.thread) }; + } +} + +pub(crate) struct WithNodes<'a> { + pub(crate) inner: Guard<'a, ProcessInner, SpinLockBackend>, + pub(crate) nodes: RBTree<u64, DArc<Node>>, +} + +impl Drop for WithNodes<'_> { + fn drop(&mut self) { + core::mem::swap(&mut self.nodes, &mut self.inner.nodes); + if self.nodes.iter().next().is_some() { + pr_err!("nodes array was modified while using lock_with_nodes\n"); + } + } +} + +pub(crate) enum GetWorkOrRegister<'a> { + Work(DLArc<dyn DeliverToRead>), + Register(Registration<'a>), +} diff --git a/drivers/android/binder/range_alloc/array.rs b/drivers/android/binder/range_alloc/array.rs new file mode 100644 index 000000000000..07e1dec2ce63 --- /dev/null +++ b/drivers/android/binder/range_alloc/array.rs @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use kernel::{ + page::{PAGE_MASK, PAGE_SIZE}, + prelude::*, + seq_file::SeqFile, + seq_print, + task::Pid, +}; + +use crate::range_alloc::{DescriptorState, FreedRange, Range}; + +/// Keeps track of allocations in a process' mmap. +/// +/// Each process has an mmap where the data for incoming transactions will be placed. This struct +/// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that +/// has metadata related to the allocation. We also keep track of available free space. +pub(super) struct ArrayRangeAllocator<T> { + /// This stores all ranges that are allocated. Unlike the tree based allocator, we do *not* + /// store the free ranges. + /// + /// Sorted by offset. + pub(super) ranges: KVec<Range<T>>, + size: usize, + free_oneway_space: usize, +} + +struct FindEmptyRes { + /// Which index in `ranges` should we insert the new range at? + /// + /// Inserting the new range at this index keeps `ranges` sorted. + insert_at_idx: usize, + /// Which offset should we insert the new range at? + insert_at_offset: usize, +} + +impl<T> ArrayRangeAllocator<T> { + pub(crate) fn new(size: usize, alloc: EmptyArrayAlloc<T>) -> Self { + Self { + ranges: alloc.ranges, + size, + free_oneway_space: size / 2, + } + } + + pub(crate) fn free_oneway_space(&self) -> usize { + self.free_oneway_space + } + + pub(crate) fn count_buffers(&self) -> usize { + self.ranges.len() + } + + pub(crate) fn total_size(&self) -> usize { + self.size + } + + pub(crate) fn is_full(&self) -> bool { + self.ranges.len() == self.ranges.capacity() + } + + pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { + for range in &self.ranges { + seq_print!( + m, + " buffer {}: {} size {} pid {} oneway {}", + 0, + range.offset, + range.size, + range.state.pid(), + range.state.is_oneway(), + ); + if let DescriptorState::Reserved(_) = range.state { + seq_print!(m, " reserved\n"); + } else { + seq_print!(m, " allocated\n"); + } + } + Ok(()) + } + + /// Find somewhere to put a new range. + /// + /// Unlike the tree implementation, we do not bother to find the smallest gap. The idea is that + /// fragmentation isn't a big issue when we don't have many ranges. + /// + /// Returns the index that the new range should have in `self.ranges` after insertion. + fn find_empty_range(&self, size: usize) -> Option<FindEmptyRes> { + let after_last_range = self.ranges.last().map(Range::endpoint).unwrap_or(0); + + if size <= self.total_size() - after_last_range { + // We can put the range at the end, so just do that. + Some(FindEmptyRes { + insert_at_idx: self.ranges.len(), + insert_at_offset: after_last_range, + }) + } else { + let mut end_of_prev = 0; + for (i, range) in self.ranges.iter().enumerate() { + // Does it fit before the i'th range? + if size <= range.offset - end_of_prev { + return Some(FindEmptyRes { + insert_at_idx: i, + insert_at_offset: end_of_prev, + }); + } + end_of_prev = range.endpoint(); + } + None + } + } + + pub(crate) fn reserve_new( + &mut self, + debug_id: usize, + size: usize, + is_oneway: bool, + pid: Pid, + ) -> Result<usize> { + // Compute new value of free_oneway_space, which is set only on success. + let new_oneway_space = if is_oneway { + match self.free_oneway_space.checked_sub(size) { + Some(new_oneway_space) => new_oneway_space, + None => return Err(ENOSPC), + } + } else { + self.free_oneway_space + }; + + let FindEmptyRes { + insert_at_idx, + insert_at_offset, + } = self.find_empty_range(size).ok_or(ENOSPC)?; + self.free_oneway_space = new_oneway_space; + + let new_range = Range { + offset: insert_at_offset, + size, + state: DescriptorState::new(is_oneway, debug_id, pid), + }; + // Insert the value at the given index to keep the array sorted. + self.ranges + .insert_within_capacity(insert_at_idx, new_range) + .ok() + .unwrap(); + + Ok(insert_at_offset) + } + + pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { + // This could use a binary search, but linear scans are usually faster for small arrays. + let i = self + .ranges + .iter() + .position(|range| range.offset == offset) + .ok_or(EINVAL)?; + let range = &self.ranges[i]; + + if let DescriptorState::Allocated(_) = range.state { + return Err(EPERM); + } + + let size = range.size; + let offset = range.offset; + + if range.state.is_oneway() { + self.free_oneway_space += size; + } + + // This computes the range of pages that are no longer used by *any* allocated range. The + // caller will mark them as unused, which means that they can be freed if the system comes + // under memory pressure. + let mut freed_range = FreedRange::interior_pages(offset, size); + #[expect(clippy::collapsible_if)] // reads better like this + if offset % PAGE_SIZE != 0 { + if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) { + freed_range.start_page_idx -= 1; + } + } + if range.endpoint() % PAGE_SIZE != 0 { + let page_after = (range.endpoint() & PAGE_MASK) + PAGE_SIZE; + if i + 1 == self.ranges.len() || page_after <= self.ranges[i + 1].offset { + freed_range.end_page_idx += 1; + } + } + + self.ranges.remove(i)?; + Ok(freed_range) + } + + pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { + // This could use a binary search, but linear scans are usually faster for small arrays. + let range = self + .ranges + .iter_mut() + .find(|range| range.offset == offset) + .ok_or(ENOENT)?; + + let DescriptorState::Reserved(reservation) = &range.state else { + return Err(ENOENT); + }; + + range.state = DescriptorState::Allocated(reservation.clone().allocate(data.take())); + Ok(()) + } + + pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { + // This could use a binary search, but linear scans are usually faster for small arrays. + let range = self + .ranges + .iter_mut() + .find(|range| range.offset == offset) + .ok_or(ENOENT)?; + + let DescriptorState::Allocated(allocation) = &mut range.state else { + return Err(ENOENT); + }; + + let data = allocation.take(); + let debug_id = allocation.reservation.debug_id; + range.state = DescriptorState::Reserved(allocation.reservation.clone()); + Ok((range.size, debug_id, data)) + } + + pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { + for range in self.ranges.iter_mut() { + if let DescriptorState::Allocated(allocation) = &mut range.state { + callback( + range.offset, + range.size, + allocation.reservation.debug_id, + allocation.data.take(), + ); + } + } + } +} + +pub(crate) struct EmptyArrayAlloc<T> { + ranges: KVec<Range<T>>, +} + +impl<T> EmptyArrayAlloc<T> { + pub(crate) fn try_new(capacity: usize) -> Result<Self> { + Ok(Self { + ranges: KVec::with_capacity(capacity, GFP_KERNEL)?, + }) + } +} diff --git a/drivers/android/binder/range_alloc/mod.rs b/drivers/android/binder/range_alloc/mod.rs new file mode 100644 index 000000000000..2301e2bc1a1f --- /dev/null +++ b/drivers/android/binder/range_alloc/mod.rs @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use kernel::{page::PAGE_SIZE, prelude::*, seq_file::SeqFile, task::Pid}; + +mod tree; +use self::tree::{FromArrayAllocs, ReserveNewTreeAlloc, TreeRangeAllocator}; + +mod array; +use self::array::{ArrayRangeAllocator, EmptyArrayAlloc}; + +enum DescriptorState<T> { + Reserved(Reservation), + Allocated(Allocation<T>), +} + +impl<T> DescriptorState<T> { + fn new(is_oneway: bool, debug_id: usize, pid: Pid) -> Self { + DescriptorState::Reserved(Reservation { + debug_id, + is_oneway, + pid, + }) + } + + fn pid(&self) -> Pid { + match self { + DescriptorState::Reserved(inner) => inner.pid, + DescriptorState::Allocated(inner) => inner.reservation.pid, + } + } + + fn is_oneway(&self) -> bool { + match self { + DescriptorState::Reserved(inner) => inner.is_oneway, + DescriptorState::Allocated(inner) => inner.reservation.is_oneway, + } + } +} + +#[derive(Clone)] +struct Reservation { + debug_id: usize, + is_oneway: bool, + pid: Pid, +} + +impl Reservation { + fn allocate<T>(self, data: Option<T>) -> Allocation<T> { + Allocation { + data, + reservation: self, + } + } +} + +struct Allocation<T> { + reservation: Reservation, + data: Option<T>, +} + +impl<T> Allocation<T> { + fn deallocate(self) -> (Reservation, Option<T>) { + (self.reservation, self.data) + } + + fn debug_id(&self) -> usize { + self.reservation.debug_id + } + + fn take(&mut self) -> Option<T> { + self.data.take() + } +} + +/// The array implementation must switch to the tree if it wants to go beyond this number of +/// ranges. +const TREE_THRESHOLD: usize = 8; + +/// Represents a range of pages that have just become completely free. +#[derive(Copy, Clone)] +pub(crate) struct FreedRange { + pub(crate) start_page_idx: usize, + pub(crate) end_page_idx: usize, +} + +impl FreedRange { + fn interior_pages(offset: usize, size: usize) -> FreedRange { + FreedRange { + // Divide round up + start_page_idx: offset.div_ceil(PAGE_SIZE), + // Divide round down + end_page_idx: (offset + size) / PAGE_SIZE, + } + } +} + +struct Range<T> { + offset: usize, + size: usize, + state: DescriptorState<T>, +} + +impl<T> Range<T> { + fn endpoint(&self) -> usize { + self.offset + self.size + } +} + +pub(crate) struct RangeAllocator<T> { + inner: Impl<T>, +} + +enum Impl<T> { + Empty(usize), + Array(ArrayRangeAllocator<T>), + Tree(TreeRangeAllocator<T>), +} + +impl<T> RangeAllocator<T> { + pub(crate) fn new(size: usize) -> Self { + Self { + inner: Impl::Empty(size), + } + } + + pub(crate) fn free_oneway_space(&self) -> usize { + match &self.inner { + Impl::Empty(size) => size / 2, + Impl::Array(array) => array.free_oneway_space(), + Impl::Tree(tree) => tree.free_oneway_space(), + } + } + + pub(crate) fn count_buffers(&self) -> usize { + match &self.inner { + Impl::Empty(_size) => 0, + Impl::Array(array) => array.count_buffers(), + Impl::Tree(tree) => tree.count_buffers(), + } + } + + pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { + match &self.inner { + Impl::Empty(_size) => Ok(()), + Impl::Array(array) => array.debug_print(m), + Impl::Tree(tree) => tree.debug_print(m), + } + } + + /// Try to reserve a new buffer, using the provided allocation if necessary. + pub(crate) fn reserve_new(&mut self, mut args: ReserveNewArgs<T>) -> Result<ReserveNew<T>> { + match &mut self.inner { + Impl::Empty(size) => { + let empty_array = match args.empty_array_alloc.take() { + Some(empty_array) => ArrayRangeAllocator::new(*size, empty_array), + None => { + return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc { + args, + need_empty_array_alloc: true, + need_new_tree_alloc: false, + need_tree_alloc: false, + })) + } + }; + + self.inner = Impl::Array(empty_array); + self.reserve_new(args) + } + Impl::Array(array) if array.is_full() => { + let allocs = match args.new_tree_alloc { + Some(ref mut allocs) => allocs, + None => { + return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc { + args, + need_empty_array_alloc: false, + need_new_tree_alloc: true, + need_tree_alloc: true, + })) + } + }; + + let new_tree = + TreeRangeAllocator::from_array(array.total_size(), &mut array.ranges, allocs); + + self.inner = Impl::Tree(new_tree); + self.reserve_new(args) + } + Impl::Array(array) => { + let offset = + array.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid)?; + Ok(ReserveNew::Success(ReserveNewSuccess { + offset, + oneway_spam_detected: false, + _empty_array_alloc: args.empty_array_alloc, + _new_tree_alloc: args.new_tree_alloc, + _tree_alloc: args.tree_alloc, + })) + } + Impl::Tree(tree) => { + let alloc = match args.tree_alloc { + Some(alloc) => alloc, + None => { + return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc { + args, + need_empty_array_alloc: false, + need_new_tree_alloc: false, + need_tree_alloc: true, + })); + } + }; + let (offset, oneway_spam_detected) = + tree.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid, alloc)?; + Ok(ReserveNew::Success(ReserveNewSuccess { + offset, + oneway_spam_detected, + _empty_array_alloc: args.empty_array_alloc, + _new_tree_alloc: args.new_tree_alloc, + _tree_alloc: None, + })) + } + } + } + + /// Deletes the allocations at `offset`. + pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { + match &mut self.inner { + Impl::Empty(_size) => Err(EINVAL), + Impl::Array(array) => array.reservation_abort(offset), + Impl::Tree(tree) => { + let freed_range = tree.reservation_abort(offset)?; + if tree.is_empty() { + self.inner = Impl::Empty(tree.total_size()); + } + Ok(freed_range) + } + } + } + + /// Called when an allocation is no longer in use by the kernel. + /// + /// The value in `data` will be stored, if any. A mutable reference is used to avoid dropping + /// the `T` when an error is returned. + pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { + match &mut self.inner { + Impl::Empty(_size) => Err(EINVAL), + Impl::Array(array) => array.reservation_commit(offset, data), + Impl::Tree(tree) => tree.reservation_commit(offset, data), + } + } + + /// Called when the kernel starts using an allocation. + /// + /// Returns the size of the existing entry and the data associated with it. + pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { + match &mut self.inner { + Impl::Empty(_size) => Err(EINVAL), + Impl::Array(array) => array.reserve_existing(offset), + Impl::Tree(tree) => tree.reserve_existing(offset), + } + } + + /// Call the provided callback at every allocated region. + /// + /// This destroys the range allocator. Used only during shutdown. + pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { + match &mut self.inner { + Impl::Empty(_size) => {} + Impl::Array(array) => array.take_for_each(callback), + Impl::Tree(tree) => tree.take_for_each(callback), + } + } +} + +/// The arguments for `reserve_new`. +#[derive(Default)] +pub(crate) struct ReserveNewArgs<T> { + pub(crate) size: usize, + pub(crate) is_oneway: bool, + pub(crate) debug_id: usize, + pub(crate) pid: Pid, + pub(crate) empty_array_alloc: Option<EmptyArrayAlloc<T>>, + pub(crate) new_tree_alloc: Option<FromArrayAllocs<T>>, + pub(crate) tree_alloc: Option<ReserveNewTreeAlloc<T>>, +} + +/// The return type of `ReserveNew`. +pub(crate) enum ReserveNew<T> { + Success(ReserveNewSuccess<T>), + NeedAlloc(ReserveNewNeedAlloc<T>), +} + +/// Returned by `reserve_new` when the reservation was successul. +pub(crate) struct ReserveNewSuccess<T> { + pub(crate) offset: usize, + pub(crate) oneway_spam_detected: bool, + + // If the user supplied an allocation that we did not end up using, then we return it here. + // The caller will kfree it outside of the lock. + _empty_array_alloc: Option<EmptyArrayAlloc<T>>, + _new_tree_alloc: Option<FromArrayAllocs<T>>, + _tree_alloc: Option<ReserveNewTreeAlloc<T>>, +} + +/// Returned by `reserve_new` to request the caller to make an allocation before calling the method +/// again. +pub(crate) struct ReserveNewNeedAlloc<T> { + args: ReserveNewArgs<T>, + need_empty_array_alloc: bool, + need_new_tree_alloc: bool, + need_tree_alloc: bool, +} + +impl<T> ReserveNewNeedAlloc<T> { + /// Make the necessary allocations for another call to `reserve_new`. + pub(crate) fn make_alloc(mut self) -> Result<ReserveNewArgs<T>> { + if self.need_empty_array_alloc && self.args.empty_array_alloc.is_none() { + self.args.empty_array_alloc = Some(EmptyArrayAlloc::try_new(TREE_THRESHOLD)?); + } + if self.need_new_tree_alloc && self.args.new_tree_alloc.is_none() { + self.args.new_tree_alloc = Some(FromArrayAllocs::try_new(TREE_THRESHOLD)?); + } + if self.need_tree_alloc && self.args.tree_alloc.is_none() { + self.args.tree_alloc = Some(ReserveNewTreeAlloc::try_new()?); + } + Ok(self.args) + } +} diff --git a/drivers/android/binder/range_alloc/tree.rs b/drivers/android/binder/range_alloc/tree.rs new file mode 100644 index 000000000000..838fdd2b47ea --- /dev/null +++ b/drivers/android/binder/range_alloc/tree.rs @@ -0,0 +1,488 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use kernel::{ + page::PAGE_SIZE, + prelude::*, + rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation}, + seq_file::SeqFile, + seq_print, + task::Pid, +}; + +use crate::range_alloc::{DescriptorState, FreedRange, Range}; + +/// Keeps track of allocations in a process' mmap. +/// +/// Each process has an mmap where the data for incoming transactions will be placed. This struct +/// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that +/// has metadata related to the allocation. We also keep track of available free space. +pub(super) struct TreeRangeAllocator<T> { + /// This collection contains descriptors for *both* ranges containing an allocation, *and* free + /// ranges between allocations. The free ranges get merged, so there are never two free ranges + /// next to each other. + tree: RBTree<usize, Descriptor<T>>, + /// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size, + /// letting us look up the smallest range whose size is at least some lower bound. + free_tree: RBTree<FreeKey, ()>, + size: usize, + free_oneway_space: usize, +} + +impl<T> TreeRangeAllocator<T> { + pub(crate) fn from_array( + size: usize, + ranges: &mut KVec<Range<T>>, + alloc: &mut FromArrayAllocs<T>, + ) -> Self { + let mut tree = TreeRangeAllocator { + tree: RBTree::new(), + free_tree: RBTree::new(), + size, + free_oneway_space: size / 2, + }; + + let mut free_offset = 0; + for range in ranges.drain_all() { + let free_size = range.offset - free_offset; + if free_size > 0 { + let free_node = alloc.free_tree.pop().unwrap(); + tree.free_tree + .insert(free_node.into_node((free_size, free_offset), ())); + let tree_node = alloc.tree.pop().unwrap(); + tree.tree.insert( + tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)), + ); + } + free_offset = range.endpoint(); + + if range.state.is_oneway() { + tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size); + } + + let free_res = alloc.free_tree.pop().unwrap(); + let tree_node = alloc.tree.pop().unwrap(); + let mut desc = Descriptor::new(range.offset, range.size); + desc.state = Some((range.state, free_res)); + tree.tree.insert(tree_node.into_node(range.offset, desc)); + } + + // After the last range, we may need a free range. + if free_offset < size { + let free_size = size - free_offset; + let free_node = alloc.free_tree.pop().unwrap(); + tree.free_tree + .insert(free_node.into_node((free_size, free_offset), ())); + let tree_node = alloc.tree.pop().unwrap(); + tree.tree + .insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size))); + } + + tree + } + + pub(crate) fn is_empty(&self) -> bool { + let mut tree_iter = self.tree.values(); + // There's always at least one range, because index zero is either the start of a free or + // allocated range. + let first_value = tree_iter.next().unwrap(); + if tree_iter.next().is_some() { + // There are never two free ranges next to each other, so if there is more than one + // descriptor, then at least one of them must hold an allocated range. + return false; + } + // There is only one descriptor. Return true if it is for a free range. + first_value.state.is_none() + } + + pub(crate) fn total_size(&self) -> usize { + self.size + } + + pub(crate) fn free_oneway_space(&self) -> usize { + self.free_oneway_space + } + + pub(crate) fn count_buffers(&self) -> usize { + self.tree + .values() + .filter(|desc| desc.state.is_some()) + .count() + } + + pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> { + for desc in self.tree.values() { + let state = match &desc.state { + Some(state) => &state.0, + None => continue, + }; + seq_print!( + m, + " buffer: {} size {} pid {}", + desc.offset, + desc.size, + state.pid(), + ); + if state.is_oneway() { + seq_print!(m, " oneway"); + } + match state { + DescriptorState::Reserved(_res) => { + seq_print!(m, " reserved\n"); + } + DescriptorState::Allocated(_alloc) => { + seq_print!(m, " allocated\n"); + } + } + } + Ok(()) + } + + fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> { + let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?; + let ((_, offset), ()) = free_cursor.current(); + self.tree.get_mut(offset) + } + + /// Try to reserve a new buffer, using the provided allocation if necessary. + pub(crate) fn reserve_new( + &mut self, + debug_id: usize, + size: usize, + is_oneway: bool, + pid: Pid, + alloc: ReserveNewTreeAlloc<T>, + ) -> Result<(usize, bool)> { + // Compute new value of free_oneway_space, which is set only on success. + let new_oneway_space = if is_oneway { + match self.free_oneway_space.checked_sub(size) { + Some(new_oneway_space) => new_oneway_space, + None => return Err(ENOSPC), + } + } else { + self.free_oneway_space + }; + + // Start detecting spammers once we have less than 20% + // of async space left (which is less than 10% of total + // buffer size). + // + // (This will short-circut, so `low_oneway_space` is + // only called when necessary.) + let oneway_spam_detected = + is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid); + + let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) { + None => { + pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size); + return Err(ENOSPC); + } + Some(desc) => { + let found_size = desc.size; + let found_offset = desc.offset; + + // In case we need to break up the descriptor + let new_desc = Descriptor::new(found_offset + size, found_size - size); + let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc); + + desc.state = Some(( + DescriptorState::new(is_oneway, debug_id, pid), + desc_node_res, + )); + desc.size = size; + + (found_size, found_offset, tree_node, free_tree_node) + } + }; + self.free_oneway_space = new_oneway_space; + self.free_tree.remove(&(found_size, found_off)); + + if found_size != size { + self.tree.insert(tree_node); + self.free_tree.insert(free_tree_node); + } + + Ok((found_off, oneway_spam_detected)) + } + + pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> { + let mut cursor = self.tree.cursor_lower_bound_mut(&offset).ok_or_else(|| { + pr_warn!( + "EINVAL from range_alloc.reservation_abort - offset: {}", + offset + ); + EINVAL + })?; + + let (_, desc) = cursor.current_mut(); + + if desc.offset != offset { + pr_warn!( + "EINVAL from range_alloc.reservation_abort - offset: {}", + offset + ); + return Err(EINVAL); + } + + let (reservation, free_node_res) = desc.try_change_state(|state| match state { + Some((DescriptorState::Reserved(reservation), free_node_res)) => { + (None, Ok((reservation, free_node_res))) + } + None => { + pr_warn!( + "EINVAL from range_alloc.reservation_abort - offset: {}", + offset + ); + (None, Err(EINVAL)) + } + allocated => { + pr_warn!( + "EPERM from range_alloc.reservation_abort - offset: {}", + offset + ); + (allocated, Err(EPERM)) + } + })?; + + let mut size = desc.size; + let mut offset = desc.offset; + let free_oneway_space_add = if reservation.is_oneway { size } else { 0 }; + + self.free_oneway_space += free_oneway_space_add; + + let mut freed_range = FreedRange::interior_pages(offset, size); + // Compute how large the next free region needs to be to include one more page in + // the newly freed range. + let add_next_page_needed = match (offset + size) % PAGE_SIZE { + 0 => usize::MAX, + unalign => PAGE_SIZE - unalign, + }; + // Compute how large the previous free region needs to be to include one more page + // in the newly freed range. + let add_prev_page_needed = match offset % PAGE_SIZE { + 0 => usize::MAX, + unalign => unalign, + }; + + // Merge next into current if next is free + let remove_next = match cursor.peek_next() { + Some((_, next)) if next.state.is_none() => { + if next.size >= add_next_page_needed { + freed_range.end_page_idx += 1; + } + self.free_tree.remove(&(next.size, next.offset)); + size += next.size; + true + } + _ => false, + }; + + if remove_next { + let (_, desc) = cursor.current_mut(); + desc.size = size; + cursor.remove_next(); + } + + // Merge current into prev if prev is free + match cursor.peek_prev_mut() { + Some((_, prev)) if prev.state.is_none() => { + if prev.size >= add_prev_page_needed { + freed_range.start_page_idx -= 1; + } + // merge previous with current, remove current + self.free_tree.remove(&(prev.size, prev.offset)); + offset = prev.offset; + size += prev.size; + prev.size = size; + cursor.remove_current(); + } + _ => {} + }; + + self.free_tree + .insert(free_node_res.into_node((size, offset), ())); + + Ok(freed_range) + } + + pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result { + let desc = self.tree.get_mut(&offset).ok_or(ENOENT)?; + + desc.try_change_state(|state| match state { + Some((DescriptorState::Reserved(reservation), free_node_res)) => ( + Some(( + DescriptorState::Allocated(reservation.allocate(data.take())), + free_node_res, + )), + Ok(()), + ), + other => (other, Err(ENOENT)), + }) + } + + /// Takes an entry at the given offset from [`DescriptorState::Allocated`] to + /// [`DescriptorState::Reserved`]. + /// + /// Returns the size of the existing entry and the data associated with it. + pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> { + let desc = self.tree.get_mut(&offset).ok_or_else(|| { + pr_warn!( + "ENOENT from range_alloc.reserve_existing - offset: {}", + offset + ); + ENOENT + })?; + + let (debug_id, data) = desc.try_change_state(|state| match state { + Some((DescriptorState::Allocated(allocation), free_node_res)) => { + let (reservation, data) = allocation.deallocate(); + let debug_id = reservation.debug_id; + ( + Some((DescriptorState::Reserved(reservation), free_node_res)), + Ok((debug_id, data)), + ) + } + other => { + pr_warn!( + "ENOENT from range_alloc.reserve_existing - offset: {}", + offset + ); + (other, Err(ENOENT)) + } + })?; + + Ok((desc.size, debug_id, data)) + } + + /// Call the provided callback at every allocated region. + /// + /// This destroys the range allocator. Used only during shutdown. + pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) { + for (_, desc) in self.tree.iter_mut() { + if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state { + callback( + desc.offset, + desc.size, + allocation.debug_id(), + allocation.take(), + ); + } + } + } + + /// Find the amount and size of buffers allocated by the current caller. + /// + /// The idea is that once we cross the threshold, whoever is responsible + /// for the low async space is likely to try to send another async transaction, + /// and at some point we'll catch them in the act. This is more efficient + /// than keeping a map per pid. + fn low_oneway_space(&self, calling_pid: Pid) -> bool { + let mut total_alloc_size = 0; + let mut num_buffers = 0; + for (_, desc) in self.tree.iter() { + if let Some((state, _)) = &desc.state { + if state.is_oneway() && state.pid() == calling_pid { + total_alloc_size += desc.size; + num_buffers += 1; + } + } + } + + // Warn if this pid has more than 50 transactions, or more than 50% of + // async space (which is 25% of total buffer size). Oneway spam is only + // detected when the threshold is exceeded. + num_buffers > 50 || total_alloc_size > self.size / 4 + } +} + +type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes); +struct Descriptor<T> { + size: usize, + offset: usize, + state: Option<TreeDescriptorState<T>>, +} + +impl<T> Descriptor<T> { + fn new(offset: usize, size: usize) -> Self { + Self { + size, + offset, + state: None, + } + } + + fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data> + where + F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>), + { + let (new_state, result) = f(self.state.take()); + self.state = new_state; + result + } +} + +// (Descriptor.size, Descriptor.offset) +type FreeKey = (usize, usize); +type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>; + +/// An allocation for use by `reserve_new`. +pub(crate) struct ReserveNewTreeAlloc<T> { + tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>, + free_tree_node_res: FreeNodeRes, + desc_node_res: FreeNodeRes, +} + +impl<T> ReserveNewTreeAlloc<T> { + pub(crate) fn try_new() -> Result<Self> { + let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?; + let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?; + let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?; + Ok(Self { + tree_node_res, + free_tree_node_res, + desc_node_res, + }) + } + + fn initialize( + self, + desc: Descriptor<T>, + ) -> ( + RBTreeNode<usize, Descriptor<T>>, + RBTreeNode<FreeKey, ()>, + FreeNodeRes, + ) { + let size = desc.size; + let offset = desc.offset; + ( + self.tree_node_res.into_node(offset, desc), + self.free_tree_node_res.into_node((size, offset), ()), + self.desc_node_res, + ) + } +} + +/// An allocation for creating a tree from an `ArrayRangeAllocator`. +pub(crate) struct FromArrayAllocs<T> { + tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>, + free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>, +} + +impl<T> FromArrayAllocs<T> { + pub(crate) fn try_new(len: usize) -> Result<Self> { + let num_descriptors = 2 * len + 1; + + let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?; + for _ in 0..num_descriptors { + tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?; + } + + let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?; + for _ in 0..num_descriptors { + free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?; + } + + Ok(Self { tree, free_tree }) + } +} diff --git a/drivers/android/binder/rust_binder.h b/drivers/android/binder/rust_binder.h new file mode 100644 index 000000000000..31806890ed1a --- /dev/null +++ b/drivers/android/binder/rust_binder.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Google, Inc. + */ + +#ifndef _LINUX_RUST_BINDER_H +#define _LINUX_RUST_BINDER_H + +#include <uapi/linux/android/binder.h> +#include <uapi/linux/android/binderfs.h> + +/* + * These symbols are exposed by `rust_binderfs.c` and exist here so that Rust + * Binder can call them. + */ +int init_rust_binderfs(void); + +struct dentry; +struct inode; +struct dentry *rust_binderfs_create_proc_file(struct inode *nodp, int pid); +void rust_binderfs_remove_file(struct dentry *dentry); + +#endif diff --git a/drivers/android/binder/rust_binder_events.c b/drivers/android/binder/rust_binder_events.c new file mode 100644 index 000000000000..488b1470060c --- /dev/null +++ b/drivers/android/binder/rust_binder_events.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* rust_binder_events.c + * + * Rust Binder tracepoints. + * + * Copyright 2025 Google LLC + */ + +#include "rust_binder.h" + +const char * const binder_command_strings[] = { + "BC_TRANSACTION", + "BC_REPLY", + "BC_ACQUIRE_RESULT", + "BC_FREE_BUFFER", + "BC_INCREFS", + "BC_ACQUIRE", + "BC_RELEASE", + "BC_DECREFS", + "BC_INCREFS_DONE", + "BC_ACQUIRE_DONE", + "BC_ATTEMPT_ACQUIRE", + "BC_REGISTER_LOOPER", + "BC_ENTER_LOOPER", + "BC_EXIT_LOOPER", + "BC_REQUEST_DEATH_NOTIFICATION", + "BC_CLEAR_DEATH_NOTIFICATION", + "BC_DEAD_BINDER_DONE", + "BC_TRANSACTION_SG", + "BC_REPLY_SG", +}; + +const char * const binder_return_strings[] = { + "BR_ERROR", + "BR_OK", + "BR_TRANSACTION", + "BR_REPLY", + "BR_ACQUIRE_RESULT", + "BR_DEAD_REPLY", + "BR_TRANSACTION_COMPLETE", + "BR_INCREFS", + "BR_ACQUIRE", + "BR_RELEASE", + "BR_DECREFS", + "BR_ATTEMPT_ACQUIRE", + "BR_NOOP", + "BR_SPAWN_LOOPER", + "BR_FINISHED", + "BR_DEAD_BINDER", + "BR_CLEAR_DEATH_NOTIFICATION_DONE", + "BR_FAILED_REPLY", + "BR_FROZEN_REPLY", + "BR_ONEWAY_SPAM_SUSPECT", + "BR_TRANSACTION_PENDING_FROZEN" +}; + +#define CREATE_TRACE_POINTS +#define CREATE_RUST_TRACE_POINTS +#include "rust_binder_events.h" diff --git a/drivers/android/binder/rust_binder_events.h b/drivers/android/binder/rust_binder_events.h new file mode 100644 index 000000000000..2f3efbf9dba6 --- /dev/null +++ b/drivers/android/binder/rust_binder_events.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Google, Inc. + */ + +#undef TRACE_SYSTEM +#undef TRACE_INCLUDE_FILE +#undef TRACE_INCLUDE_PATH +#define TRACE_SYSTEM rust_binder +#define TRACE_INCLUDE_FILE rust_binder_events +#define TRACE_INCLUDE_PATH ../drivers/android/binder + +#if !defined(_RUST_BINDER_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _RUST_BINDER_TRACE_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(rust_binder_ioctl, + TP_PROTO(unsigned int cmd, unsigned long arg), + TP_ARGS(cmd, arg), + + TP_STRUCT__entry( + __field(unsigned int, cmd) + __field(unsigned long, arg) + ), + TP_fast_assign( + __entry->cmd = cmd; + __entry->arg = arg; + ), + TP_printk("cmd=0x%x arg=0x%lx", __entry->cmd, __entry->arg) +); + +#endif /* _RUST_BINDER_TRACE_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/drivers/android/binder/rust_binder_internal.h b/drivers/android/binder/rust_binder_internal.h new file mode 100644 index 000000000000..78288fe7964d --- /dev/null +++ b/drivers/android/binder/rust_binder_internal.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* rust_binder_internal.h + * + * This file contains internal data structures used by Rust Binder. Mostly, + * these are type definitions used only by binderfs or things that Rust Binder + * define and export to binderfs. + * + * It does not include things exported by binderfs to Rust Binder since this + * file is not included as input to bindgen. + * + * Copyright (C) 2025 Google LLC. + */ + +#ifndef _LINUX_RUST_BINDER_INTERNAL_H +#define _LINUX_RUST_BINDER_INTERNAL_H + +#define RUST_BINDERFS_SUPER_MAGIC 0x6c6f6f71 + +#include <linux/seq_file.h> +#include <uapi/linux/android/binder.h> +#include <uapi/linux/android/binderfs.h> + +/* + * The internal data types in the Rust Binder driver are opaque to C, so we use + * void pointer typedefs for these types. + */ +typedef void *rust_binder_context; + +/** + * struct binder_device - information about a binder device node + * @minor: the minor number used by this device + * @ctx: the Rust Context used by this device, or null for binder-control + * + * This is used as the private data for files directly in binderfs, but not + * files in the binder_logs subdirectory. This struct owns a refcount on `ctx` + * and the entry for `minor` in `binderfs_minors`. For binder-control `ctx` is + * null. + */ +struct binder_device { + int minor; + rust_binder_context ctx; +}; + +int rust_binder_stats_show(struct seq_file *m, void *unused); +int rust_binder_state_show(struct seq_file *m, void *unused); +int rust_binder_transactions_show(struct seq_file *m, void *unused); +int rust_binder_proc_show(struct seq_file *m, void *pid); + +extern const struct file_operations rust_binder_fops; +rust_binder_context rust_binder_new_context(char *name); +void rust_binder_remove_context(rust_binder_context device); + +/** + * binderfs_mount_opts - mount options for binderfs + * @max: maximum number of allocatable binderfs binder devices + * @stats_mode: enable binder stats in binderfs. + */ +struct binderfs_mount_opts { + int max; + int stats_mode; +}; + +/** + * binderfs_info - information about a binderfs mount + * @ipc_ns: The ipc namespace the binderfs mount belongs to. + * @control_dentry: This records the dentry of this binderfs mount + * binder-control device. + * @root_uid: uid that needs to be used when a new binder device is + * created. + * @root_gid: gid that needs to be used when a new binder device is + * created. + * @mount_opts: The mount options in use. + * @device_count: The current number of allocated binder devices. + * @proc_log_dir: Pointer to the directory dentry containing process-specific + * logs. + */ +struct binderfs_info { + struct ipc_namespace *ipc_ns; + struct dentry *control_dentry; + kuid_t root_uid; + kgid_t root_gid; + struct binderfs_mount_opts mount_opts; + int device_count; + struct dentry *proc_log_dir; +}; + +#endif /* _LINUX_RUST_BINDER_INTERNAL_H */ diff --git a/drivers/android/binder/rust_binder_main.rs b/drivers/android/binder/rust_binder_main.rs new file mode 100644 index 000000000000..c79a9e742240 --- /dev/null +++ b/drivers/android/binder/rust_binder_main.rs @@ -0,0 +1,611 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! Binder -- the Android IPC mechanism. +#![recursion_limit = "256"] +#![allow( + clippy::as_underscore, + clippy::ref_as_ptr, + clippy::ptr_as_ptr, + clippy::cast_lossless +)] + +use kernel::{ + bindings::{self, seq_file}, + fs::File, + list::{ListArc, ListArcSafe, ListLinksSelfPtr, TryNewListArc}, + prelude::*, + seq_file::SeqFile, + seq_print, + sync::poll::PollTable, + sync::Arc, + task::Pid, + transmute::AsBytes, + types::ForeignOwnable, + uaccess::UserSliceWriter, +}; + +use crate::{context::Context, page_range::Shrinker, process::Process, thread::Thread}; + +use core::{ + ptr::NonNull, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, +}; + +mod allocation; +mod context; +mod deferred_close; +mod defs; +mod error; +mod node; +mod page_range; +mod process; +mod range_alloc; +mod stats; +mod thread; +mod trace; +mod transaction; + +#[allow(warnings)] // generated bindgen code +mod binderfs { + use kernel::bindings::{dentry, inode}; + + extern "C" { + pub fn init_rust_binderfs() -> kernel::ffi::c_int; + } + extern "C" { + pub fn rust_binderfs_create_proc_file( + nodp: *mut inode, + pid: kernel::ffi::c_int, + ) -> *mut dentry; + } + extern "C" { + pub fn rust_binderfs_remove_file(dentry: *mut dentry); + } + pub type rust_binder_context = *mut kernel::ffi::c_void; + #[repr(C)] + #[derive(Copy, Clone)] + pub struct binder_device { + pub minor: kernel::ffi::c_int, + pub ctx: rust_binder_context, + } + impl Default for binder_device { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } + } +} + +module! { + type: BinderModule, + name: "rust_binder", + authors: ["Wedson Almeida Filho", "Alice Ryhl"], + description: "Android Binder", + license: "GPL", +} + +fn next_debug_id() -> usize { + static NEXT_DEBUG_ID: AtomicUsize = AtomicUsize::new(0); + + NEXT_DEBUG_ID.fetch_add(1, Ordering::Relaxed) +} + +/// Provides a single place to write Binder return values via the +/// supplied `UserSliceWriter`. +pub(crate) struct BinderReturnWriter<'a> { + writer: UserSliceWriter, + thread: &'a Thread, +} + +impl<'a> BinderReturnWriter<'a> { + fn new(writer: UserSliceWriter, thread: &'a Thread) -> Self { + BinderReturnWriter { writer, thread } + } + + /// Write a return code back to user space. + /// Should be a `BR_` constant from [`defs`] e.g. [`defs::BR_TRANSACTION_COMPLETE`]. + fn write_code(&mut self, code: u32) -> Result { + stats::GLOBAL_STATS.inc_br(code); + self.thread.process.stats.inc_br(code); + self.writer.write(&code) + } + + /// Write something *other than* a return code to user space. + fn write_payload<T: AsBytes>(&mut self, payload: &T) -> Result { + self.writer.write(payload) + } + + fn len(&self) -> usize { + self.writer.len() + } +} + +/// Specifies how a type should be delivered to the read part of a BINDER_WRITE_READ ioctl. +/// +/// When a value is pushed to the todo list for a process or thread, it is stored as a trait object +/// with the type `Arc<dyn DeliverToRead>`. Trait objects are a Rust feature that lets you +/// implement dynamic dispatch over many different types. This lets us store many different types +/// in the todo list. +trait DeliverToRead: ListArcSafe + Send + Sync { + /// Performs work. Returns true if remaining work items in the queue should be processed + /// immediately, or false if it should return to caller before processing additional work + /// items. + fn do_work( + self: DArc<Self>, + thread: &Thread, + writer: &mut BinderReturnWriter<'_>, + ) -> Result<bool>; + + /// Cancels the given work item. This is called instead of [`DeliverToRead::do_work`] when work + /// won't be delivered. + fn cancel(self: DArc<Self>); + + /// Should we use `wake_up_interruptible_sync` or `wake_up_interruptible` when scheduling this + /// work item? + /// + /// Generally only set to true for non-oneway transactions. + fn should_sync_wakeup(&self) -> bool; + + fn debug_print(&self, m: &SeqFile, prefix: &str, transaction_prefix: &str) -> Result<()>; +} + +// Wrapper around a `DeliverToRead` with linked list links. +#[pin_data] +struct DTRWrap<T: ?Sized> { + #[pin] + links: ListLinksSelfPtr<DTRWrap<dyn DeliverToRead>>, + #[pin] + wrapped: T, +} +kernel::list::impl_list_arc_safe! { + impl{T: ListArcSafe + ?Sized} ListArcSafe<0> for DTRWrap<T> { + tracked_by wrapped: T; + } +} +kernel::list::impl_list_item! { + impl ListItem<0> for DTRWrap<dyn DeliverToRead> { + using ListLinksSelfPtr { self.links }; + } +} + +impl<T: ?Sized> core::ops::Deref for DTRWrap<T> { + type Target = T; + fn deref(&self) -> &T { + &self.wrapped + } +} + +type DArc<T> = kernel::sync::Arc<DTRWrap<T>>; +type DLArc<T> = kernel::list::ListArc<DTRWrap<T>>; + +impl<T: ListArcSafe> DTRWrap<T> { + fn new(val: impl PinInit<T>) -> impl PinInit<Self> { + pin_init!(Self { + links <- ListLinksSelfPtr::new(), + wrapped <- val, + }) + } + + fn arc_try_new(val: T) -> Result<DLArc<T>, kernel::alloc::AllocError> { + ListArc::pin_init( + try_pin_init!(Self { + links <- ListLinksSelfPtr::new(), + wrapped: val, + }), + GFP_KERNEL, + ) + .map_err(|_| kernel::alloc::AllocError) + } + + fn arc_pin_init(init: impl PinInit<T>) -> Result<DLArc<T>, kernel::error::Error> { + ListArc::pin_init( + try_pin_init!(Self { + links <- ListLinksSelfPtr::new(), + wrapped <- init, + }), + GFP_KERNEL, + ) + } +} + +struct DeliverCode { + code: u32, + skip: AtomicBool, +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for DeliverCode { untracked; } +} + +impl DeliverCode { + fn new(code: u32) -> Self { + Self { + code, + skip: AtomicBool::new(false), + } + } + + /// Disable this DeliverCode and make it do nothing. + /// + /// This is used instead of removing it from the work list, since `LinkedList::remove` is + /// unsafe, whereas this method is not. + fn skip(&self) { + self.skip.store(true, Ordering::Relaxed); + } +} + +impl DeliverToRead for DeliverCode { + fn do_work( + self: DArc<Self>, + _thread: &Thread, + writer: &mut BinderReturnWriter<'_>, + ) -> Result<bool> { + if !self.skip.load(Ordering::Relaxed) { + writer.write_code(self.code)?; + } + Ok(true) + } + + fn cancel(self: DArc<Self>) {} + + fn should_sync_wakeup(&self) -> bool { + false + } + + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { + seq_print!(m, "{}", prefix); + if self.skip.load(Ordering::Relaxed) { + seq_print!(m, "(skipped) "); + } + if self.code == defs::BR_TRANSACTION_COMPLETE { + seq_print!(m, "transaction complete\n"); + } else { + seq_print!(m, "transaction error: {}\n", self.code); + } + Ok(()) + } +} + +fn ptr_align(value: usize) -> Option<usize> { + let size = core::mem::size_of::<usize>() - 1; + Some(value.checked_add(size)? & !size) +} + +// SAFETY: We call register in `init`. +static BINDER_SHRINKER: Shrinker = unsafe { Shrinker::new() }; + +struct BinderModule {} + +impl kernel::Module for BinderModule { + fn init(_module: &'static kernel::ThisModule) -> Result<Self> { + // SAFETY: The module initializer never runs twice, so we only call this once. + unsafe { crate::context::CONTEXTS.init() }; + + pr_warn!("Loaded Rust Binder."); + + BINDER_SHRINKER.register(kernel::c_str!("android-binder"))?; + + // SAFETY: The module is being loaded, so we can initialize binderfs. + unsafe { kernel::error::to_result(binderfs::init_rust_binderfs())? }; + + Ok(Self {}) + } +} + +/// Makes the inner type Sync. +#[repr(transparent)] +pub struct AssertSync<T>(T); +// SAFETY: Used only to insert `file_operations` into a global, which is safe. +unsafe impl<T> Sync for AssertSync<T> {} + +/// File operations that rust_binderfs.c can use. +#[no_mangle] +#[used] +pub static rust_binder_fops: AssertSync<kernel::bindings::file_operations> = { + // SAFETY: All zeroes is safe for the `file_operations` type. + let zeroed_ops = unsafe { core::mem::MaybeUninit::zeroed().assume_init() }; + + let ops = kernel::bindings::file_operations { + owner: THIS_MODULE.as_ptr(), + poll: Some(rust_binder_poll), + unlocked_ioctl: Some(rust_binder_ioctl), + compat_ioctl: Some(bindings::compat_ptr_ioctl), + mmap: Some(rust_binder_mmap), + open: Some(rust_binder_open), + release: Some(rust_binder_release), + flush: Some(rust_binder_flush), + ..zeroed_ops + }; + AssertSync(ops) +}; + +/// # Safety +/// Only called by binderfs. +#[no_mangle] +unsafe extern "C" fn rust_binder_new_context( + name: *const kernel::ffi::c_char, +) -> *mut kernel::ffi::c_void { + // SAFETY: The caller will always provide a valid c string here. + let name = unsafe { kernel::str::CStr::from_char_ptr(name) }; + match Context::new(name) { + Ok(ctx) => Arc::into_foreign(ctx), + Err(_err) => core::ptr::null_mut(), + } +} + +/// # Safety +/// Only called by binderfs. +#[no_mangle] +unsafe extern "C" fn rust_binder_remove_context(device: *mut kernel::ffi::c_void) { + if !device.is_null() { + // SAFETY: The caller ensures that the `device` pointer came from a previous call to + // `rust_binder_new_device`. + let ctx = unsafe { Arc::<Context>::from_foreign(device) }; + ctx.deregister(); + drop(ctx); + } +} + +/// # Safety +/// Only called by binderfs. +unsafe extern "C" fn rust_binder_open( + inode: *mut bindings::inode, + file_ptr: *mut bindings::file, +) -> kernel::ffi::c_int { + // SAFETY: The `rust_binderfs.c` file ensures that `i_private` is set to a + // `struct binder_device`. + let device = unsafe { (*inode).i_private } as *const binderfs::binder_device; + + assert!(!device.is_null()); + + // SAFETY: The `rust_binderfs.c` file ensures that `device->ctx` holds a binder context when + // using the rust binder fops. + let ctx = unsafe { Arc::<Context>::borrow((*device).ctx) }; + + // SAFETY: The caller provides a valid file pointer to a new `struct file`. + let file = unsafe { File::from_raw_file(file_ptr) }; + let process = match Process::open(ctx, file) { + Ok(process) => process, + Err(err) => return err.to_errno(), + }; + + // SAFETY: This is an `inode` for a newly created binder file. + match unsafe { BinderfsProcFile::new(inode, process.task.pid()) } { + Ok(Some(file)) => process.inner.lock().binderfs_file = Some(file), + Ok(None) => { /* pid already exists */ } + Err(err) => return err.to_errno(), + } + + // SAFETY: This file is associated with Rust binder, so we own the `private_data` field. + unsafe { (*file_ptr).private_data = process.into_foreign() }; + 0 +} + +/// # Safety +/// Only called by binderfs. +unsafe extern "C" fn rust_binder_release( + _inode: *mut bindings::inode, + file: *mut bindings::file, +) -> kernel::ffi::c_int { + // SAFETY: We previously set `private_data` in `rust_binder_open`. + let process = unsafe { Arc::<Process>::from_foreign((*file).private_data) }; + // SAFETY: The caller ensures that the file is valid. + let file = unsafe { File::from_raw_file(file) }; + Process::release(process, file); + 0 +} + +/// # Safety +/// Only called by binderfs. +unsafe extern "C" fn rust_binder_ioctl( + file: *mut bindings::file, + cmd: kernel::ffi::c_uint, + arg: kernel::ffi::c_ulong, +) -> kernel::ffi::c_long { + // SAFETY: We previously set `private_data` in `rust_binder_open`. + let f = unsafe { Arc::<Process>::borrow((*file).private_data) }; + // SAFETY: The caller ensures that the file is valid. + match Process::ioctl(f, unsafe { File::from_raw_file(file) }, cmd as _, arg as _) { + Ok(()) => 0, + Err(err) => err.to_errno() as isize, + } +} + +/// # Safety +/// Only called by binderfs. +unsafe extern "C" fn rust_binder_mmap( + file: *mut bindings::file, + vma: *mut bindings::vm_area_struct, +) -> kernel::ffi::c_int { + // SAFETY: We previously set `private_data` in `rust_binder_open`. + let f = unsafe { Arc::<Process>::borrow((*file).private_data) }; + // SAFETY: The caller ensures that the vma is valid. + let area = unsafe { kernel::mm::virt::VmaNew::from_raw(vma) }; + // SAFETY: The caller ensures that the file is valid. + match Process::mmap(f, unsafe { File::from_raw_file(file) }, area) { + Ok(()) => 0, + Err(err) => err.to_errno(), + } +} + +/// # Safety +/// Only called by binderfs. +unsafe extern "C" fn rust_binder_poll( + file: *mut bindings::file, + wait: *mut bindings::poll_table_struct, +) -> bindings::__poll_t { + // SAFETY: We previously set `private_data` in `rust_binder_open`. + let f = unsafe { Arc::<Process>::borrow((*file).private_data) }; + // SAFETY: The caller ensures that the file is valid. + let fileref = unsafe { File::from_raw_file(file) }; + // SAFETY: The caller ensures that the `PollTable` is valid. + match Process::poll(f, fileref, unsafe { PollTable::from_raw(wait) }) { + Ok(v) => v, + Err(_) => bindings::POLLERR, + } +} + +/// # Safety +/// Only called by binderfs. +unsafe extern "C" fn rust_binder_flush( + file: *mut bindings::file, + _id: bindings::fl_owner_t, +) -> kernel::ffi::c_int { + // SAFETY: We previously set `private_data` in `rust_binder_open`. + let f = unsafe { Arc::<Process>::borrow((*file).private_data) }; + match Process::flush(f) { + Ok(()) => 0, + Err(err) => err.to_errno(), + } +} + +/// # Safety +/// Only called by binderfs. +#[no_mangle] +unsafe extern "C" fn rust_binder_stats_show( + ptr: *mut seq_file, + _: *mut kernel::ffi::c_void, +) -> kernel::ffi::c_int { + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which + // this method is called. + let m = unsafe { SeqFile::from_raw(ptr) }; + if let Err(err) = rust_binder_stats_show_impl(m) { + seq_print!(m, "failed to generate state: {:?}\n", err); + } + 0 +} + +/// # Safety +/// Only called by binderfs. +#[no_mangle] +unsafe extern "C" fn rust_binder_state_show( + ptr: *mut seq_file, + _: *mut kernel::ffi::c_void, +) -> kernel::ffi::c_int { + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which + // this method is called. + let m = unsafe { SeqFile::from_raw(ptr) }; + if let Err(err) = rust_binder_state_show_impl(m) { + seq_print!(m, "failed to generate state: {:?}\n", err); + } + 0 +} + +/// # Safety +/// Only called by binderfs. +#[no_mangle] +unsafe extern "C" fn rust_binder_proc_show( + ptr: *mut seq_file, + _: *mut kernel::ffi::c_void, +) -> kernel::ffi::c_int { + // SAFETY: Accessing the private field of `seq_file` is okay. + let pid = (unsafe { (*ptr).private }) as usize as Pid; + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which + // this method is called. + let m = unsafe { SeqFile::from_raw(ptr) }; + if let Err(err) = rust_binder_proc_show_impl(m, pid) { + seq_print!(m, "failed to generate state: {:?}\n", err); + } + 0 +} + +/// # Safety +/// Only called by binderfs. +#[no_mangle] +unsafe extern "C" fn rust_binder_transactions_show( + ptr: *mut seq_file, + _: *mut kernel::ffi::c_void, +) -> kernel::ffi::c_int { + // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which + // this method is called. + let m = unsafe { SeqFile::from_raw(ptr) }; + if let Err(err) = rust_binder_transactions_show_impl(m) { + seq_print!(m, "failed to generate state: {:?}\n", err); + } + 0 +} + +fn rust_binder_transactions_show_impl(m: &SeqFile) -> Result<()> { + seq_print!(m, "binder transactions:\n"); + let contexts = context::get_all_contexts()?; + for ctx in contexts { + let procs = ctx.get_all_procs()?; + for proc in procs { + proc.debug_print(m, &ctx, false)?; + seq_print!(m, "\n"); + } + } + Ok(()) +} + +fn rust_binder_stats_show_impl(m: &SeqFile) -> Result<()> { + seq_print!(m, "binder stats:\n"); + stats::GLOBAL_STATS.debug_print("", m); + let contexts = context::get_all_contexts()?; + for ctx in contexts { + let procs = ctx.get_all_procs()?; + for proc in procs { + proc.debug_print_stats(m, &ctx)?; + seq_print!(m, "\n"); + } + } + Ok(()) +} + +fn rust_binder_state_show_impl(m: &SeqFile) -> Result<()> { + seq_print!(m, "binder state:\n"); + let contexts = context::get_all_contexts()?; + for ctx in contexts { + let procs = ctx.get_all_procs()?; + for proc in procs { + proc.debug_print(m, &ctx, true)?; + seq_print!(m, "\n"); + } + } + Ok(()) +} + +fn rust_binder_proc_show_impl(m: &SeqFile, pid: Pid) -> Result<()> { + seq_print!(m, "binder proc state:\n"); + let contexts = context::get_all_contexts()?; + for ctx in contexts { + let procs = ctx.get_procs_with_pid(pid)?; + for proc in procs { + proc.debug_print(m, &ctx, true)?; + seq_print!(m, "\n"); + } + } + Ok(()) +} + +struct BinderfsProcFile(NonNull<bindings::dentry>); + +// SAFETY: Safe to drop any thread. +unsafe impl Send for BinderfsProcFile {} + +impl BinderfsProcFile { + /// # Safety + /// + /// Takes an inode from a newly created binder file. + unsafe fn new(nodp: *mut bindings::inode, pid: i32) -> Result<Option<Self>> { + // SAFETY: The caller passes an `inode` for a newly created binder file. + let dentry = unsafe { binderfs::rust_binderfs_create_proc_file(nodp, pid) }; + match kernel::error::from_err_ptr(dentry) { + Ok(dentry) => Ok(NonNull::new(dentry).map(Self)), + Err(err) if err == EEXIST => Ok(None), + Err(err) => Err(err), + } + } +} + +impl Drop for BinderfsProcFile { + fn drop(&mut self) { + // SAFETY: This is a dentry from `rust_binderfs_remove_file` that has not been deleted yet. + unsafe { binderfs::rust_binderfs_remove_file(self.0.as_ptr()) }; + } +} diff --git a/drivers/android/binder/rust_binderfs.c b/drivers/android/binder/rust_binderfs.c new file mode 100644 index 000000000000..c69026df775c --- /dev/null +++ b/drivers/android/binder/rust_binderfs.c @@ -0,0 +1,795 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/compiler_types.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/fsnotify.h> +#include <linux/gfp.h> +#include <linux/idr.h> +#include <linux/init.h> +#include <linux/ipc_namespace.h> +#include <linux/kdev_t.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/namei.h> +#include <linux/magic.h> +#include <linux/major.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/mount.h> +#include <linux/fs_parser.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/spinlock_types.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/user_namespace.h> +#include <linux/xarray.h> +#include <uapi/asm-generic/errno-base.h> +#include <uapi/linux/android/binder.h> +#include <uapi/linux/android/binderfs.h> + +#include "rust_binder.h" +#include "rust_binder_internal.h" + +#define FIRST_INODE 1 +#define SECOND_INODE 2 +#define INODE_OFFSET 3 +#define BINDERFS_MAX_MINOR (1U << MINORBITS) +/* Ensure that the initial ipc namespace always has devices available. */ +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) + +DEFINE_SHOW_ATTRIBUTE(rust_binder_stats); +DEFINE_SHOW_ATTRIBUTE(rust_binder_state); +DEFINE_SHOW_ATTRIBUTE(rust_binder_transactions); +DEFINE_SHOW_ATTRIBUTE(rust_binder_proc); + +char *rust_binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; +module_param_named(rust_devices, rust_binder_devices_param, charp, 0444); + +static dev_t binderfs_dev; +static DEFINE_MUTEX(binderfs_minors_mutex); +static DEFINE_IDA(binderfs_minors); + +enum binderfs_param { + Opt_max, + Opt_stats_mode, +}; + +enum binderfs_stats_mode { + binderfs_stats_mode_unset, + binderfs_stats_mode_global, +}; + +struct binder_features { + bool oneway_spam_detection; + bool extended_error; + bool freeze_notification; +}; + +static const struct constant_table binderfs_param_stats[] = { + { "global", binderfs_stats_mode_global }, + {} +}; + +static const struct fs_parameter_spec binderfs_fs_parameters[] = { + fsparam_u32("max", Opt_max), + fsparam_enum("stats", Opt_stats_mode, binderfs_param_stats), + {} +}; + +static struct binder_features binder_features = { + .oneway_spam_detection = true, + .extended_error = true, + .freeze_notification = true, +}; + +static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb) +{ + return sb->s_fs_info; +} + +/** + * binderfs_binder_device_create - allocate inode from super block of a + * binderfs mount + * @ref_inode: inode from wich the super block will be taken + * @userp: buffer to copy information about new device for userspace to + * @req: struct binderfs_device as copied from userspace + * + * This function allocates a new binder_device and reserves a new minor + * number for it. + * Minor numbers are limited and tracked globally in binderfs_minors. The + * function will stash a struct binder_device for the specific binder + * device in i_private of the inode. + * It will go on to allocate a new inode from the super block of the + * filesystem mount, stash a struct binder_device in its i_private field + * and attach a dentry to that inode. + * + * Return: 0 on success, negative errno on failure + */ +static int binderfs_binder_device_create(struct inode *ref_inode, + struct binderfs_device __user *userp, + struct binderfs_device *req) +{ + int minor, ret; + struct dentry *dentry, *root; + struct binder_device *device = NULL; + rust_binder_context ctx = NULL; + struct inode *inode = NULL; + struct super_block *sb = ref_inode->i_sb; + struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif + + /* Reserve new minor number for the new device. */ + mutex_lock(&binderfs_minors_mutex); + if (++info->device_count <= info->mount_opts.max) + minor = ida_alloc_max(&binderfs_minors, + use_reserve ? BINDERFS_MAX_MINOR : + BINDERFS_MAX_MINOR_CAPPED, + GFP_KERNEL); + else + minor = -ENOSPC; + if (minor < 0) { + --info->device_count; + mutex_unlock(&binderfs_minors_mutex); + return minor; + } + mutex_unlock(&binderfs_minors_mutex); + + ret = -ENOMEM; + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + goto err; + + req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */ + + ctx = rust_binder_new_context(req->name); + if (!ctx) + goto err; + + inode = new_inode(sb); + if (!inode) + goto err; + + inode->i_ino = minor + INODE_OFFSET; + simple_inode_init_ts(inode); + init_special_inode(inode, S_IFCHR | 0600, + MKDEV(MAJOR(binderfs_dev), minor)); + inode->i_fop = &rust_binder_fops; + inode->i_uid = info->root_uid; + inode->i_gid = info->root_gid; + + req->major = MAJOR(binderfs_dev); + req->minor = minor; + device->ctx = ctx; + device->minor = minor; + + if (userp && copy_to_user(userp, req, sizeof(*req))) { + ret = -EFAULT; + goto err; + } + + root = sb->s_root; + dentry = simple_start_creating(root, req->name); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto err; + } + + inode->i_private = device; + d_make_persistent(dentry, inode); + + fsnotify_create(root->d_inode, dentry); + simple_done_creating(dentry); + + return 0; + +err: + kfree(device); + rust_binder_remove_context(ctx); + mutex_lock(&binderfs_minors_mutex); + --info->device_count; + ida_free(&binderfs_minors, minor); + mutex_unlock(&binderfs_minors_mutex); + iput(inode); + + return ret; +} + +/** + * binder_ctl_ioctl - handle binder device node allocation requests + * + * The request handler for the binder-control device. All requests operate on + * the binderfs mount the binder-control device resides in: + * - BINDER_CTL_ADD + * Allocate a new binder device. + * + * Return: %0 on success, negative errno on failure. + */ +static long binder_ctl_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret = -EINVAL; + struct inode *inode = file_inode(file); + struct binderfs_device __user *device = (struct binderfs_device __user *)arg; + struct binderfs_device device_req; + + switch (cmd) { + case BINDER_CTL_ADD: + ret = copy_from_user(&device_req, device, sizeof(device_req)); + if (ret) { + ret = -EFAULT; + break; + } + + ret = binderfs_binder_device_create(inode, device, &device_req); + break; + default: + break; + } + + return ret; +} + +static void binderfs_evict_inode(struct inode *inode) +{ + struct binder_device *device = inode->i_private; + struct binderfs_info *info = BINDERFS_SB(inode->i_sb); + + clear_inode(inode); + + if (!S_ISCHR(inode->i_mode) || !device) + return; + + mutex_lock(&binderfs_minors_mutex); + --info->device_count; + ida_free(&binderfs_minors, device->minor); + mutex_unlock(&binderfs_minors_mutex); + + /* ctx is null for binder-control, but this function ignores null pointers */ + rust_binder_remove_context(device->ctx); + + kfree(device); +} + +static int binderfs_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + int opt; + struct binderfs_mount_opts *ctx = fc->fs_private; + struct fs_parse_result result; + + opt = fs_parse(fc, binderfs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_max: + if (result.uint_32 > BINDERFS_MAX_MINOR) + return invalfc(fc, "Bad value for '%s'", param->key); + + ctx->max = result.uint_32; + break; + case Opt_stats_mode: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ctx->stats_mode = result.uint_32; + break; + default: + return invalfc(fc, "Unsupported parameter '%s'", param->key); + } + + return 0; +} + +static int binderfs_fs_context_reconfigure(struct fs_context *fc) +{ + struct binderfs_mount_opts *ctx = fc->fs_private; + struct binderfs_info *info = BINDERFS_SB(fc->root->d_sb); + + if (info->mount_opts.stats_mode != ctx->stats_mode) + return invalfc(fc, "Binderfs stats mode cannot be changed during a remount"); + + info->mount_opts.stats_mode = ctx->stats_mode; + info->mount_opts.max = ctx->max; + return 0; +} + +static int binderfs_show_options(struct seq_file *seq, struct dentry *root) +{ + struct binderfs_info *info = BINDERFS_SB(root->d_sb); + + if (info->mount_opts.max <= BINDERFS_MAX_MINOR) + seq_printf(seq, ",max=%d", info->mount_opts.max); + + switch (info->mount_opts.stats_mode) { + case binderfs_stats_mode_unset: + break; + case binderfs_stats_mode_global: + seq_puts(seq, ",stats=global"); + break; + } + + return 0; +} + +static const struct super_operations binderfs_super_ops = { + .evict_inode = binderfs_evict_inode, + .show_options = binderfs_show_options, + .statfs = simple_statfs, +}; + +static inline bool is_binderfs_control_device(const struct dentry *dentry) +{ + struct binderfs_info *info = dentry->d_sb->s_fs_info; + + return info->control_dentry == dentry; +} + +static int binderfs_rename(struct mnt_idmap *idmap, + struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + if (is_binderfs_control_device(old_dentry) || + is_binderfs_control_device(new_dentry)) + return -EPERM; + + return simple_rename(idmap, old_dir, old_dentry, new_dir, + new_dentry, flags); +} + +static int binderfs_unlink(struct inode *dir, struct dentry *dentry) +{ + if (is_binderfs_control_device(dentry)) + return -EPERM; + + return simple_unlink(dir, dentry); +} + +static const struct file_operations binder_ctl_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = binder_ctl_ioctl, + .compat_ioctl = binder_ctl_ioctl, + .llseek = noop_llseek, +}; + +/** + * binderfs_binder_ctl_create - create a new binder-control device + * @sb: super block of the binderfs mount + * + * This function creates a new binder-control device node in the binderfs mount + * referred to by @sb. + * + * Return: 0 on success, negative errno on failure + */ +static int binderfs_binder_ctl_create(struct super_block *sb) +{ + int minor, ret; + struct dentry *dentry; + struct binder_device *device; + struct inode *inode = NULL; + struct dentry *root = sb->s_root; + struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + /* If we have already created a binder-control node, return. */ + if (info->control_dentry) { + ret = 0; + goto out; + } + + ret = -ENOMEM; + inode = new_inode(sb); + if (!inode) + goto out; + + /* Reserve a new minor number for the new device. */ + mutex_lock(&binderfs_minors_mutex); + minor = ida_alloc_max(&binderfs_minors, + use_reserve ? BINDERFS_MAX_MINOR : + BINDERFS_MAX_MINOR_CAPPED, + GFP_KERNEL); + mutex_unlock(&binderfs_minors_mutex); + if (minor < 0) { + ret = minor; + goto out; + } + + inode->i_ino = SECOND_INODE; + simple_inode_init_ts(inode); + init_special_inode(inode, S_IFCHR | 0600, + MKDEV(MAJOR(binderfs_dev), minor)); + inode->i_fop = &binder_ctl_fops; + inode->i_uid = info->root_uid; + inode->i_gid = info->root_gid; + + device->minor = minor; + device->ctx = NULL; + + dentry = d_alloc_name(root, "binder-control"); + if (!dentry) + goto out; + + inode->i_private = device; + info->control_dentry = dentry; + d_add(dentry, inode); + + return 0; + +out: + kfree(device); + iput(inode); + + return ret; +} + +static const struct inode_operations binderfs_dir_inode_operations = { + .lookup = simple_lookup, + .rename = binderfs_rename, + .unlink = binderfs_unlink, +}; + +static struct inode *binderfs_make_inode(struct super_block *sb, int mode) +{ + struct inode *ret; + + ret = new_inode(sb); + if (ret) { + ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET); + ret->i_mode = mode; + simple_inode_init_ts(ret); + } + return ret; +} + +void rust_binderfs_remove_file(struct dentry *dentry) +{ + simple_recursive_removal(dentry, NULL); +} + +static struct dentry *rust_binderfs_create_file(struct dentry *parent, const char *name, + const struct file_operations *fops, + void *data) +{ + struct dentry *dentry; + struct inode *new_inode; + + new_inode = binderfs_make_inode(parent->d_sb, S_IFREG | 0444); + if (!new_inode) + return ERR_PTR(-ENOMEM); + new_inode->i_fop = fops; + new_inode->i_private = data; + + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) { + iput(new_inode); + return dentry; + } + + d_make_persistent(dentry, new_inode); + fsnotify_create(parent->d_inode, dentry); + simple_done_creating(dentry); + return dentry; +} + +struct dentry *rust_binderfs_create_proc_file(struct inode *nodp, int pid) +{ + struct binderfs_info *info = nodp->i_sb->s_fs_info; + struct dentry *dir = info->proc_log_dir; + char strbuf[20 + 1]; + void *data = (void *)(unsigned long) pid; + + if (!dir) + return NULL; + + snprintf(strbuf, sizeof(strbuf), "%u", pid); + return rust_binderfs_create_file(dir, strbuf, &rust_binder_proc_fops, data); +} + +static struct dentry *binderfs_create_dir(struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + struct inode *new_inode; + + new_inode = binderfs_make_inode(parent->d_sb, S_IFDIR | 0755); + if (!new_inode) + return ERR_PTR(-ENOMEM); + + new_inode->i_fop = &simple_dir_operations; + new_inode->i_op = &simple_dir_inode_operations; + + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) { + iput(new_inode); + return dentry; + } + + inc_nlink(parent->d_inode); + set_nlink(new_inode, 2); + d_make_persistent(dentry, new_inode); + fsnotify_mkdir(parent->d_inode, dentry); + simple_done_creating(dentry); + return dentry; +} + +static int binder_features_show(struct seq_file *m, void *unused) +{ + bool *feature = m->private; + + seq_printf(m, "%d\n", *feature); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(binder_features); + +static int init_binder_features(struct super_block *sb) +{ + struct dentry *dentry, *dir; + + dir = binderfs_create_dir(sb->s_root, "features"); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + dentry = rust_binderfs_create_file(dir, "oneway_spam_detection", + &binder_features_fops, + &binder_features.oneway_spam_detection); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + dentry = rust_binderfs_create_file(dir, "extended_error", + &binder_features_fops, + &binder_features.extended_error); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + dentry = rust_binderfs_create_file(dir, "freeze_notification", + &binder_features_fops, + &binder_features.freeze_notification); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + return 0; +} + +static int init_binder_logs(struct super_block *sb) +{ + struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir; + struct binderfs_info *info; + int ret = 0; + + binder_logs_root_dir = binderfs_create_dir(sb->s_root, + "binder_logs"); + if (IS_ERR(binder_logs_root_dir)) { + ret = PTR_ERR(binder_logs_root_dir); + goto out; + } + + dentry = rust_binderfs_create_file(binder_logs_root_dir, "stats", + &rust_binder_stats_fops, NULL); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = rust_binderfs_create_file(binder_logs_root_dir, "state", + &rust_binder_state_fops, NULL); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = rust_binderfs_create_file(binder_logs_root_dir, "transactions", + &rust_binder_transactions_fops, NULL); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc"); + if (IS_ERR(proc_log_dir)) { + ret = PTR_ERR(proc_log_dir); + goto out; + } + info = sb->s_fs_info; + info->proc_log_dir = proc_log_dir; + +out: + return ret; +} + +static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc) +{ + int ret; + struct binderfs_info *info; + struct binderfs_mount_opts *ctx = fc->fs_private; + struct inode *inode = NULL; + struct binderfs_device device_info = {}; + const char *name; + size_t len; + + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + + /* + * The binderfs filesystem can be mounted by userns root in a + * non-initial userns. By default such mounts have the SB_I_NODEV flag + * set in s_iflags to prevent security issues where userns root can + * just create random device nodes via mknod() since it owns the + * filesystem mount. But binderfs does not allow to create any files + * including devices nodes. The only way to create binder devices nodes + * is through the binder-control device which userns root is explicitly + * allowed to do. So removing the SB_I_NODEV flag from s_iflags is both + * necessary and safe. + */ + sb->s_iflags &= ~SB_I_NODEV; + sb->s_iflags |= SB_I_NOEXEC; + sb->s_magic = RUST_BINDERFS_SUPER_MAGIC; + sb->s_op = &binderfs_super_ops; + sb->s_time_gran = 1; + + sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); + if (!sb->s_fs_info) + return -ENOMEM; + info = sb->s_fs_info; + + info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); + + info->root_gid = make_kgid(sb->s_user_ns, 0); + if (!gid_valid(info->root_gid)) + info->root_gid = GLOBAL_ROOT_GID; + info->root_uid = make_kuid(sb->s_user_ns, 0); + if (!uid_valid(info->root_uid)) + info->root_uid = GLOBAL_ROOT_UID; + info->mount_opts.max = ctx->max; + info->mount_opts.stats_mode = ctx->stats_mode; + + inode = new_inode(sb); + if (!inode) + return -ENOMEM; + + inode->i_ino = FIRST_INODE; + inode->i_fop = &simple_dir_operations; + inode->i_mode = S_IFDIR | 0755; + simple_inode_init_ts(inode); + inode->i_op = &binderfs_dir_inode_operations; + set_nlink(inode, 2); + + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; + + ret = binderfs_binder_ctl_create(sb); + if (ret) + return ret; + + name = rust_binder_devices_param; + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { + strscpy(device_info.name, name, len + 1); + ret = binderfs_binder_device_create(inode, NULL, &device_info); + if (ret) + return ret; + name += len; + if (*name == ',') + name++; + } + + ret = init_binder_features(sb); + if (ret) + return ret; + + if (info->mount_opts.stats_mode == binderfs_stats_mode_global) + return init_binder_logs(sb); + + return 0; +} + +static int binderfs_fs_context_get_tree(struct fs_context *fc) +{ + return get_tree_nodev(fc, binderfs_fill_super); +} + +static void binderfs_fs_context_free(struct fs_context *fc) +{ + struct binderfs_mount_opts *ctx = fc->fs_private; + + kfree(ctx); +} + +static const struct fs_context_operations binderfs_fs_context_ops = { + .free = binderfs_fs_context_free, + .get_tree = binderfs_fs_context_get_tree, + .parse_param = binderfs_fs_context_parse_param, + .reconfigure = binderfs_fs_context_reconfigure, +}; + +static int binderfs_init_fs_context(struct fs_context *fc) +{ + struct binderfs_mount_opts *ctx; + + ctx = kzalloc(sizeof(struct binderfs_mount_opts), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->max = BINDERFS_MAX_MINOR; + ctx->stats_mode = binderfs_stats_mode_unset; + + fc->fs_private = ctx; + fc->ops = &binderfs_fs_context_ops; + + return 0; +} + +static void binderfs_kill_super(struct super_block *sb) +{ + struct binderfs_info *info = sb->s_fs_info; + + /* + * During inode eviction struct binderfs_info is needed. + * So first wipe the super_block then free struct binderfs_info. + */ + kill_anon_super(sb); + + if (info && info->ipc_ns) + put_ipc_ns(info->ipc_ns); + + kfree(info); +} + +static struct file_system_type binder_fs_type = { + .name = "binder", + .init_fs_context = binderfs_init_fs_context, + .parameters = binderfs_fs_parameters, + .kill_sb = binderfs_kill_super, + .fs_flags = FS_USERNS_MOUNT, +}; + +int init_rust_binderfs(void) +{ + int ret; + const char *name; + size_t len; + + /* Verify that the default binderfs device names are valid. */ + name = rust_binder_devices_param; + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { + if (len > BINDERFS_MAX_NAME) + return -E2BIG; + name += len; + if (*name == ',') + name++; + } + + /* Allocate new major number for binderfs. */ + ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR, + "rust_binder"); + if (ret) + return ret; + + ret = register_filesystem(&binder_fs_type); + if (ret) { + unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); + return ret; + } + + return ret; +} diff --git a/drivers/android/binder/stats.rs b/drivers/android/binder/stats.rs new file mode 100644 index 000000000000..037002651941 --- /dev/null +++ b/drivers/android/binder/stats.rs @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! Keep track of statistics for binder_logs. + +use crate::defs::*; +use core::sync::atomic::{AtomicU32, Ordering::Relaxed}; +use kernel::{ioctl::_IOC_NR, seq_file::SeqFile, seq_print}; + +const BC_COUNT: usize = _IOC_NR(BC_REPLY_SG) as usize + 1; +const BR_COUNT: usize = _IOC_NR(BR_TRANSACTION_PENDING_FROZEN) as usize + 1; + +pub(crate) static GLOBAL_STATS: BinderStats = BinderStats::new(); + +pub(crate) struct BinderStats { + bc: [AtomicU32; BC_COUNT], + br: [AtomicU32; BR_COUNT], +} + +impl BinderStats { + pub(crate) const fn new() -> Self { + #[expect(clippy::declare_interior_mutable_const)] + const ZERO: AtomicU32 = AtomicU32::new(0); + + Self { + bc: [ZERO; BC_COUNT], + br: [ZERO; BR_COUNT], + } + } + + pub(crate) fn inc_bc(&self, bc: u32) { + let idx = _IOC_NR(bc) as usize; + if let Some(bc_ref) = self.bc.get(idx) { + bc_ref.fetch_add(1, Relaxed); + } + } + + pub(crate) fn inc_br(&self, br: u32) { + let idx = _IOC_NR(br) as usize; + if let Some(br_ref) = self.br.get(idx) { + br_ref.fetch_add(1, Relaxed); + } + } + + pub(crate) fn debug_print(&self, prefix: &str, m: &SeqFile) { + for (i, cnt) in self.bc.iter().enumerate() { + let cnt = cnt.load(Relaxed); + if cnt > 0 { + seq_print!(m, "{}{}: {}\n", prefix, command_string(i), cnt); + } + } + for (i, cnt) in self.br.iter().enumerate() { + let cnt = cnt.load(Relaxed); + if cnt > 0 { + seq_print!(m, "{}{}: {}\n", prefix, return_string(i), cnt); + } + } + } +} + +mod strings { + use core::str::from_utf8_unchecked; + use kernel::str::{CStr, CStrExt as _}; + + extern "C" { + static binder_command_strings: [*const u8; super::BC_COUNT]; + static binder_return_strings: [*const u8; super::BR_COUNT]; + } + + pub(super) fn command_string(i: usize) -> &'static str { + // SAFETY: Accessing `binder_command_strings` is always safe. + let c_str_ptr = unsafe { binder_command_strings[i] }; + // SAFETY: The `binder_command_strings` array only contains nul-terminated strings. + let bytes = unsafe { CStr::from_char_ptr(c_str_ptr) }.to_bytes(); + // SAFETY: The `binder_command_strings` array only contains strings with ascii-chars. + unsafe { from_utf8_unchecked(bytes) } + } + + pub(super) fn return_string(i: usize) -> &'static str { + // SAFETY: Accessing `binder_return_strings` is always safe. + let c_str_ptr = unsafe { binder_return_strings[i] }; + // SAFETY: The `binder_command_strings` array only contains nul-terminated strings. + let bytes = unsafe { CStr::from_char_ptr(c_str_ptr) }.to_bytes(); + // SAFETY: The `binder_command_strings` array only contains strings with ascii-chars. + unsafe { from_utf8_unchecked(bytes) } + } +} +use strings::{command_string, return_string}; diff --git a/drivers/android/binder/thread.rs b/drivers/android/binder/thread.rs new file mode 100644 index 000000000000..1a8e6fdc0dc4 --- /dev/null +++ b/drivers/android/binder/thread.rs @@ -0,0 +1,1596 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! This module defines the `Thread` type, which represents a userspace thread that is using +//! binder. +//! +//! The `Process` object stores all of the threads in an rb tree. + +use kernel::{ + bindings, + fs::{File, LocalFile}, + list::{AtomicTracker, List, ListArc, ListLinks, TryNewListArc}, + prelude::*, + security, + seq_file::SeqFile, + seq_print, + sync::poll::{PollCondVar, PollTable}, + sync::{Arc, SpinLock}, + task::Task, + types::ARef, + uaccess::UserSlice, + uapi, +}; + +use crate::{ + allocation::{Allocation, AllocationView, BinderObject, BinderObjectRef, NewAllocation}, + defs::*, + error::BinderResult, + process::{GetWorkOrRegister, Process}, + ptr_align, + stats::GLOBAL_STATS, + transaction::Transaction, + BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverCode, DeliverToRead, +}; + +use core::{ + mem::size_of, + sync::atomic::{AtomicU32, Ordering}, +}; + +/// Stores the layout of the scatter-gather entries. This is used during the `translate_objects` +/// call and is discarded when it returns. +struct ScatterGatherState { + /// A struct that tracks the amount of unused buffer space. + unused_buffer_space: UnusedBufferSpace, + /// Scatter-gather entries to copy. + sg_entries: KVec<ScatterGatherEntry>, + /// Indexes into `sg_entries` corresponding to the last binder_buffer_object that + /// was processed and all of its ancestors. The array is in sorted order. + ancestors: KVec<usize>, +} + +/// This entry specifies an additional buffer that should be copied using the scatter-gather +/// mechanism. +struct ScatterGatherEntry { + /// The index in the offset array of the BINDER_TYPE_PTR that this entry originates from. + obj_index: usize, + /// Offset in target buffer. + offset: usize, + /// User address in source buffer. + sender_uaddr: usize, + /// Number of bytes to copy. + length: usize, + /// The minimum offset of the next fixup in this buffer. + fixup_min_offset: usize, + /// The offsets within this buffer that contain pointers which should be translated. + pointer_fixups: KVec<PointerFixupEntry>, +} + +/// This entry specifies that a fixup should happen at `target_offset` of the +/// buffer. If `skip` is nonzero, then the fixup is a `binder_fd_array_object` +/// and is applied later. Otherwise if `skip` is zero, then the size of the +/// fixup is `sizeof::<u64>()` and `pointer_value` is written to the buffer. +struct PointerFixupEntry { + /// The number of bytes to skip, or zero for a `binder_buffer_object` fixup. + skip: usize, + /// The translated pointer to write when `skip` is zero. + pointer_value: u64, + /// The offset at which the value should be written. The offset is relative + /// to the original buffer. + target_offset: usize, +} + +/// Return type of `apply_and_validate_fixup_in_parent`. +struct ParentFixupInfo { + /// The index of the parent buffer in `sg_entries`. + parent_sg_index: usize, + /// The number of ancestors of the buffer. + /// + /// The buffer is considered an ancestor of itself, so this is always at + /// least one. + num_ancestors: usize, + /// New value of `fixup_min_offset` if this fixup is applied. + new_min_offset: usize, + /// The offset of the fixup in the target buffer. + target_offset: usize, +} + +impl ScatterGatherState { + /// Called when a `binder_buffer_object` or `binder_fd_array_object` tries + /// to access a region in its parent buffer. These accesses have various + /// restrictions, which this method verifies. + /// + /// The `parent_offset` and `length` arguments describe the offset and + /// length of the access in the parent buffer. + /// + /// # Detailed restrictions + /// + /// Obviously the fixup must be in-bounds for the parent buffer. + /// + /// For safety reasons, we only allow fixups inside a buffer to happen + /// at increasing offsets; additionally, we only allow fixup on the last + /// buffer object that was verified, or one of its parents. + /// + /// Example of what is allowed: + /// + /// A + /// B (parent = A, offset = 0) + /// C (parent = A, offset = 16) + /// D (parent = C, offset = 0) + /// E (parent = A, offset = 32) // min_offset is 16 (C.parent_offset) + /// + /// Examples of what is not allowed: + /// + /// Decreasing offsets within the same parent: + /// A + /// C (parent = A, offset = 16) + /// B (parent = A, offset = 0) // decreasing offset within A + /// + /// Arcerring to a parent that wasn't the last object or any of its parents: + /// A + /// B (parent = A, offset = 0) + /// C (parent = A, offset = 0) + /// C (parent = A, offset = 16) + /// D (parent = B, offset = 0) // B is not A or any of A's parents + fn validate_parent_fixup( + &self, + parent: usize, + parent_offset: usize, + length: usize, + ) -> Result<ParentFixupInfo> { + // Using `position` would also be correct, but `rposition` avoids + // quadratic running times. + let ancestors_i = self + .ancestors + .iter() + .copied() + .rposition(|sg_idx| self.sg_entries[sg_idx].obj_index == parent) + .ok_or(EINVAL)?; + let sg_idx = self.ancestors[ancestors_i]; + let sg_entry = match self.sg_entries.get(sg_idx) { + Some(sg_entry) => sg_entry, + None => { + pr_err!( + "self.ancestors[{}] is {}, but self.sg_entries.len() is {}", + ancestors_i, + sg_idx, + self.sg_entries.len() + ); + return Err(EINVAL); + } + }; + if sg_entry.fixup_min_offset > parent_offset { + pr_warn!( + "validate_parent_fixup: fixup_min_offset={}, parent_offset={}", + sg_entry.fixup_min_offset, + parent_offset + ); + return Err(EINVAL); + } + let new_min_offset = parent_offset.checked_add(length).ok_or(EINVAL)?; + if new_min_offset > sg_entry.length { + pr_warn!( + "validate_parent_fixup: new_min_offset={}, sg_entry.length={}", + new_min_offset, + sg_entry.length + ); + return Err(EINVAL); + } + let target_offset = sg_entry.offset.checked_add(parent_offset).ok_or(EINVAL)?; + // The `ancestors_i + 1` operation can't overflow since the output of the addition is at + // most `self.ancestors.len()`, which also fits in a usize. + Ok(ParentFixupInfo { + parent_sg_index: sg_idx, + num_ancestors: ancestors_i + 1, + new_min_offset, + target_offset, + }) + } +} + +/// Keeps track of how much unused buffer space is left. The initial amount is the number of bytes +/// requested by the user using the `buffers_size` field of `binder_transaction_data_sg`. Each time +/// we translate an object of type `BINDER_TYPE_PTR`, some of the unused buffer space is consumed. +struct UnusedBufferSpace { + /// The start of the remaining space. + offset: usize, + /// The end of the remaining space. + limit: usize, +} +impl UnusedBufferSpace { + /// Claim the next `size` bytes from the unused buffer space. The offset for the claimed chunk + /// into the buffer is returned. + fn claim_next(&mut self, size: usize) -> Result<usize> { + // We require every chunk to be aligned. + let size = ptr_align(size).ok_or(EINVAL)?; + let new_offset = self.offset.checked_add(size).ok_or(EINVAL)?; + + if new_offset <= self.limit { + let offset = self.offset; + self.offset = new_offset; + Ok(offset) + } else { + Err(EINVAL) + } + } +} + +pub(crate) enum PushWorkRes { + Ok, + FailedDead(DLArc<dyn DeliverToRead>), +} + +impl PushWorkRes { + fn is_ok(&self) -> bool { + match self { + PushWorkRes::Ok => true, + PushWorkRes::FailedDead(_) => false, + } + } +} + +/// The fields of `Thread` protected by the spinlock. +struct InnerThread { + /// Determines the looper state of the thread. It is a bit-wise combination of the constants + /// prefixed with `LOOPER_`. + looper_flags: u32, + + /// Determines whether the looper should return. + looper_need_return: bool, + + /// Determines if thread is dead. + is_dead: bool, + + /// Work item used to deliver error codes to the thread that started a transaction. Stored here + /// so that it can be reused. + reply_work: DArc<ThreadError>, + + /// Work item used to deliver error codes to the current thread. Stored here so that it can be + /// reused. + return_work: DArc<ThreadError>, + + /// Determines whether the work list below should be processed. When set to false, `work_list` + /// is treated as if it were empty. + process_work_list: bool, + /// List of work items to deliver to userspace. + work_list: List<DTRWrap<dyn DeliverToRead>>, + current_transaction: Option<DArc<Transaction>>, + + /// Extended error information for this thread. + extended_error: ExtendedError, +} + +const LOOPER_REGISTERED: u32 = 0x01; +const LOOPER_ENTERED: u32 = 0x02; +const LOOPER_EXITED: u32 = 0x04; +const LOOPER_INVALID: u32 = 0x08; +const LOOPER_WAITING: u32 = 0x10; +const LOOPER_WAITING_PROC: u32 = 0x20; +const LOOPER_POLL: u32 = 0x40; + +impl InnerThread { + fn new() -> Result<Self> { + fn next_err_id() -> u32 { + static EE_ID: AtomicU32 = AtomicU32::new(0); + EE_ID.fetch_add(1, Ordering::Relaxed) + } + + Ok(Self { + looper_flags: 0, + looper_need_return: false, + is_dead: false, + process_work_list: false, + reply_work: ThreadError::try_new()?, + return_work: ThreadError::try_new()?, + work_list: List::new(), + current_transaction: None, + extended_error: ExtendedError::new(next_err_id(), BR_OK, 0), + }) + } + + fn pop_work(&mut self) -> Option<DLArc<dyn DeliverToRead>> { + if !self.process_work_list { + return None; + } + + let ret = self.work_list.pop_front(); + self.process_work_list = !self.work_list.is_empty(); + ret + } + + fn push_work(&mut self, work: DLArc<dyn DeliverToRead>) -> PushWorkRes { + if self.is_dead { + PushWorkRes::FailedDead(work) + } else { + self.work_list.push_back(work); + self.process_work_list = true; + PushWorkRes::Ok + } + } + + fn push_reply_work(&mut self, code: u32) { + if let Ok(work) = ListArc::try_from_arc(self.reply_work.clone()) { + work.set_error_code(code); + self.push_work(work); + } else { + pr_warn!("Thread reply work is already in use."); + } + } + + fn push_return_work(&mut self, reply: u32) { + if let Ok(work) = ListArc::try_from_arc(self.return_work.clone()) { + work.set_error_code(reply); + self.push_work(work); + } else { + pr_warn!("Thread return work is already in use."); + } + } + + /// Used to push work items that do not need to be processed immediately and can wait until the + /// thread gets another work item. + fn push_work_deferred(&mut self, work: DLArc<dyn DeliverToRead>) { + self.work_list.push_back(work); + } + + /// Fetches the transaction this thread can reply to. If the thread has a pending transaction + /// (that it could respond to) but it has also issued a transaction, it must first wait for the + /// previously-issued transaction to complete. + /// + /// The `thread` parameter should be the thread containing this `ThreadInner`. + fn pop_transaction_to_reply(&mut self, thread: &Thread) -> Result<DArc<Transaction>> { + let transaction = self.current_transaction.take().ok_or(EINVAL)?; + if core::ptr::eq(thread, transaction.from.as_ref()) { + self.current_transaction = Some(transaction); + return Err(EINVAL); + } + // Find a new current transaction for this thread. + self.current_transaction = transaction.find_from(thread).cloned(); + Ok(transaction) + } + + fn pop_transaction_replied(&mut self, transaction: &DArc<Transaction>) -> bool { + match self.current_transaction.take() { + None => false, + Some(old) => { + if !Arc::ptr_eq(transaction, &old) { + self.current_transaction = Some(old); + return false; + } + self.current_transaction = old.clone_next(); + true + } + } + } + + fn looper_enter(&mut self) { + self.looper_flags |= LOOPER_ENTERED; + if self.looper_flags & LOOPER_REGISTERED != 0 { + self.looper_flags |= LOOPER_INVALID; + } + } + + fn looper_register(&mut self, valid: bool) { + self.looper_flags |= LOOPER_REGISTERED; + if !valid || self.looper_flags & LOOPER_ENTERED != 0 { + self.looper_flags |= LOOPER_INVALID; + } + } + + fn looper_exit(&mut self) { + self.looper_flags |= LOOPER_EXITED; + } + + /// Determines whether the thread is part of a pool, i.e., if it is a looper. + fn is_looper(&self) -> bool { + self.looper_flags & (LOOPER_ENTERED | LOOPER_REGISTERED) != 0 + } + + /// Determines whether the thread should attempt to fetch work items from the process queue. + /// This is generally case when the thread is registered as a looper and not part of a + /// transaction stack. But if there is local work, we want to return to userspace before we + /// deliver any remote work. + fn should_use_process_work_queue(&self) -> bool { + self.current_transaction.is_none() && !self.process_work_list && self.is_looper() + } + + fn poll(&mut self) -> u32 { + self.looper_flags |= LOOPER_POLL; + if self.process_work_list || self.looper_need_return { + bindings::POLLIN + } else { + 0 + } + } +} + +/// This represents a thread that's used with binder. +#[pin_data] +pub(crate) struct Thread { + pub(crate) id: i32, + pub(crate) process: Arc<Process>, + pub(crate) task: ARef<Task>, + #[pin] + inner: SpinLock<InnerThread>, + #[pin] + work_condvar: PollCondVar, + /// Used to insert this thread into the process' `ready_threads` list. + /// + /// INVARIANT: May never be used for any other list than the `self.process.ready_threads`. + #[pin] + links: ListLinks, + #[pin] + links_track: AtomicTracker, +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for Thread { + tracked_by links_track: AtomicTracker; + } +} +kernel::list::impl_list_item! { + impl ListItem<0> for Thread { + using ListLinks { self.links }; + } +} + +impl Thread { + pub(crate) fn new(id: i32, process: Arc<Process>) -> Result<Arc<Self>> { + let inner = InnerThread::new()?; + + Arc::pin_init( + try_pin_init!(Thread { + id, + process, + task: ARef::from(&**kernel::current!()), + inner <- kernel::new_spinlock!(inner, "Thread::inner"), + work_condvar <- kernel::new_poll_condvar!("Thread::work_condvar"), + links <- ListLinks::new(), + links_track <- AtomicTracker::new(), + }), + GFP_KERNEL, + ) + } + + #[inline(never)] + pub(crate) fn debug_print(self: &Arc<Self>, m: &SeqFile, print_all: bool) -> Result<()> { + let inner = self.inner.lock(); + + if print_all || inner.current_transaction.is_some() || !inner.work_list.is_empty() { + seq_print!( + m, + " thread {}: l {:02x} need_return {}\n", + self.id, + inner.looper_flags, + inner.looper_need_return, + ); + } + + let mut t_opt = inner.current_transaction.as_ref(); + while let Some(t) = t_opt { + if Arc::ptr_eq(&t.from, self) { + t.debug_print_inner(m, " outgoing transaction "); + t_opt = t.from_parent.as_ref(); + } else if Arc::ptr_eq(&t.to, &self.process) { + t.debug_print_inner(m, " incoming transaction "); + t_opt = t.find_from(self); + } else { + t.debug_print_inner(m, " bad transaction "); + t_opt = None; + } + } + + for work in &inner.work_list { + work.debug_print(m, " ", " pending transaction ")?; + } + Ok(()) + } + + pub(crate) fn get_extended_error(&self, data: UserSlice) -> Result { + let mut writer = data.writer(); + let ee = self.inner.lock().extended_error; + writer.write(&ee)?; + Ok(()) + } + + pub(crate) fn set_current_transaction(&self, transaction: DArc<Transaction>) { + self.inner.lock().current_transaction = Some(transaction); + } + + pub(crate) fn has_current_transaction(&self) -> bool { + self.inner.lock().current_transaction.is_some() + } + + /// Attempts to fetch a work item from the thread-local queue. The behaviour if the queue is + /// empty depends on `wait`: if it is true, the function waits for some work to be queued (or a + /// signal); otherwise it returns indicating that none is available. + fn get_work_local(self: &Arc<Self>, wait: bool) -> Result<Option<DLArc<dyn DeliverToRead>>> { + { + let mut inner = self.inner.lock(); + if inner.looper_need_return { + return Ok(inner.pop_work()); + } + } + + // Try once if the caller does not want to wait. + if !wait { + return self.inner.lock().pop_work().ok_or(EAGAIN).map(Some); + } + + // Loop waiting only on the local queue (i.e., not registering with the process queue). + let mut inner = self.inner.lock(); + loop { + if let Some(work) = inner.pop_work() { + return Ok(Some(work)); + } + + inner.looper_flags |= LOOPER_WAITING; + let signal_pending = self.work_condvar.wait_interruptible_freezable(&mut inner); + inner.looper_flags &= !LOOPER_WAITING; + + if signal_pending { + return Err(EINTR); + } + if inner.looper_need_return { + return Ok(None); + } + } + } + + /// Attempts to fetch a work item from the thread-local queue, falling back to the process-wide + /// queue if none is available locally. + /// + /// This must only be called when the thread is not participating in a transaction chain. If it + /// is, the local version (`get_work_local`) should be used instead. + fn get_work(self: &Arc<Self>, wait: bool) -> Result<Option<DLArc<dyn DeliverToRead>>> { + // Try to get work from the thread's work queue, using only a local lock. + { + let mut inner = self.inner.lock(); + if let Some(work) = inner.pop_work() { + return Ok(Some(work)); + } + if inner.looper_need_return { + drop(inner); + return Ok(self.process.get_work()); + } + } + + // If the caller doesn't want to wait, try to grab work from the process queue. + // + // We know nothing will have been queued directly to the thread queue because it is not in + // a transaction and it is not in the process' ready list. + if !wait { + return self.process.get_work().ok_or(EAGAIN).map(Some); + } + + // Get work from the process queue. If none is available, atomically register as ready. + let reg = match self.process.get_work_or_register(self) { + GetWorkOrRegister::Work(work) => return Ok(Some(work)), + GetWorkOrRegister::Register(reg) => reg, + }; + + let mut inner = self.inner.lock(); + loop { + if let Some(work) = inner.pop_work() { + return Ok(Some(work)); + } + + inner.looper_flags |= LOOPER_WAITING | LOOPER_WAITING_PROC; + let signal_pending = self.work_condvar.wait_interruptible_freezable(&mut inner); + inner.looper_flags &= !(LOOPER_WAITING | LOOPER_WAITING_PROC); + + if signal_pending || inner.looper_need_return { + // We need to return now. We need to pull the thread off the list of ready threads + // (by dropping `reg`), then check the state again after it's off the list to + // ensure that something was not queued in the meantime. If something has been + // queued, we just return it (instead of the error). + drop(inner); + drop(reg); + + let res = match self.inner.lock().pop_work() { + Some(work) => Ok(Some(work)), + None if signal_pending => Err(EINTR), + None => Ok(None), + }; + return res; + } + } + } + + /// Push the provided work item to be delivered to user space via this thread. + /// + /// Returns whether the item was successfully pushed. This can only fail if the thread is dead. + pub(crate) fn push_work(&self, work: DLArc<dyn DeliverToRead>) -> PushWorkRes { + let sync = work.should_sync_wakeup(); + + let res = self.inner.lock().push_work(work); + + if res.is_ok() { + if sync { + self.work_condvar.notify_sync(); + } else { + self.work_condvar.notify_one(); + } + } + + res + } + + /// Attempts to push to given work item to the thread if it's a looper thread (i.e., if it's + /// part of a thread pool) and is alive. Otherwise, push the work item to the process instead. + pub(crate) fn push_work_if_looper(&self, work: DLArc<dyn DeliverToRead>) -> BinderResult { + let mut inner = self.inner.lock(); + if inner.is_looper() && !inner.is_dead { + inner.push_work(work); + Ok(()) + } else { + drop(inner); + self.process.push_work(work) + } + } + + pub(crate) fn push_work_deferred(&self, work: DLArc<dyn DeliverToRead>) { + self.inner.lock().push_work_deferred(work); + } + + pub(crate) fn push_return_work(&self, reply: u32) { + self.inner.lock().push_return_work(reply); + } + + fn translate_object( + &self, + obj_index: usize, + offset: usize, + object: BinderObjectRef<'_>, + view: &mut AllocationView<'_>, + allow_fds: bool, + sg_state: &mut ScatterGatherState, + ) -> BinderResult { + match object { + BinderObjectRef::Binder(obj) => { + let strong = obj.hdr.type_ == BINDER_TYPE_BINDER; + // SAFETY: `binder` is a `binder_uintptr_t`; any bit pattern is a valid + // representation. + let ptr = unsafe { obj.__bindgen_anon_1.binder } as _; + let cookie = obj.cookie as _; + let flags = obj.flags as _; + let node = self + .process + .as_arc_borrow() + .get_node(ptr, cookie, flags, strong, self)?; + security::binder_transfer_binder(&self.process.cred, &view.alloc.process.cred)?; + view.transfer_binder_object(offset, obj, strong, node)?; + } + BinderObjectRef::Handle(obj) => { + let strong = obj.hdr.type_ == BINDER_TYPE_HANDLE; + // SAFETY: `handle` is a `u32`; any bit pattern is a valid representation. + let handle = unsafe { obj.__bindgen_anon_1.handle } as _; + let node = self.process.get_node_from_handle(handle, strong)?; + security::binder_transfer_binder(&self.process.cred, &view.alloc.process.cred)?; + view.transfer_binder_object(offset, obj, strong, node)?; + } + BinderObjectRef::Fd(obj) => { + if !allow_fds { + return Err(EPERM.into()); + } + + // SAFETY: `fd` is a `u32`; any bit pattern is a valid representation. + let fd = unsafe { obj.__bindgen_anon_1.fd }; + let file = LocalFile::fget(fd)?; + // SAFETY: The binder driver never calls `fdget_pos` and this code runs from an + // ioctl, so there are no active calls to `fdget_pos` on this thread. + let file = unsafe { LocalFile::assume_no_fdget_pos(file) }; + security::binder_transfer_file( + &self.process.cred, + &view.alloc.process.cred, + &file, + )?; + + let mut obj_write = BinderFdObject::default(); + obj_write.hdr.type_ = BINDER_TYPE_FD; + // This will be overwritten with the actual fd when the transaction is received. + obj_write.__bindgen_anon_1.fd = u32::MAX; + obj_write.cookie = obj.cookie; + view.write::<BinderFdObject>(offset, &obj_write)?; + + const FD_FIELD_OFFSET: usize = + core::mem::offset_of!(uapi::binder_fd_object, __bindgen_anon_1.fd); + + let field_offset = offset + FD_FIELD_OFFSET; + + view.alloc.info_add_fd(file, field_offset, false)?; + } + BinderObjectRef::Ptr(obj) => { + let obj_length = obj.length.try_into().map_err(|_| EINVAL)?; + let alloc_offset = match sg_state.unused_buffer_space.claim_next(obj_length) { + Ok(alloc_offset) => alloc_offset, + Err(err) => { + pr_warn!( + "Failed to claim space for a BINDER_TYPE_PTR. (offset: {}, limit: {}, size: {})", + sg_state.unused_buffer_space.offset, + sg_state.unused_buffer_space.limit, + obj_length, + ); + return Err(err.into()); + } + }; + + let sg_state_idx = sg_state.sg_entries.len(); + sg_state.sg_entries.push( + ScatterGatherEntry { + obj_index, + offset: alloc_offset, + sender_uaddr: obj.buffer as _, + length: obj_length, + pointer_fixups: KVec::new(), + fixup_min_offset: 0, + }, + GFP_KERNEL, + )?; + + let buffer_ptr_in_user_space = (view.alloc.ptr + alloc_offset) as u64; + + if obj.flags & uapi::BINDER_BUFFER_FLAG_HAS_PARENT == 0 { + sg_state.ancestors.clear(); + sg_state.ancestors.push(sg_state_idx, GFP_KERNEL)?; + } else { + // Another buffer also has a pointer to this buffer, and we need to fixup that + // pointer too. + + let parent_index = usize::try_from(obj.parent).map_err(|_| EINVAL)?; + let parent_offset = usize::try_from(obj.parent_offset).map_err(|_| EINVAL)?; + + let info = sg_state.validate_parent_fixup( + parent_index, + parent_offset, + size_of::<u64>(), + )?; + + sg_state.ancestors.truncate(info.num_ancestors); + sg_state.ancestors.push(sg_state_idx, GFP_KERNEL)?; + + let parent_entry = match sg_state.sg_entries.get_mut(info.parent_sg_index) { + Some(parent_entry) => parent_entry, + None => { + pr_err!( + "validate_parent_fixup returned index out of bounds for sg.entries" + ); + return Err(EINVAL.into()); + } + }; + + parent_entry.fixup_min_offset = info.new_min_offset; + parent_entry.pointer_fixups.push( + PointerFixupEntry { + skip: 0, + pointer_value: buffer_ptr_in_user_space, + target_offset: info.target_offset, + }, + GFP_KERNEL, + )?; + } + + let mut obj_write = BinderBufferObject::default(); + obj_write.hdr.type_ = BINDER_TYPE_PTR; + obj_write.flags = obj.flags; + obj_write.buffer = buffer_ptr_in_user_space; + obj_write.length = obj.length; + obj_write.parent = obj.parent; + obj_write.parent_offset = obj.parent_offset; + view.write::<BinderBufferObject>(offset, &obj_write)?; + } + BinderObjectRef::Fda(obj) => { + if !allow_fds { + return Err(EPERM.into()); + } + let parent_index = usize::try_from(obj.parent).map_err(|_| EINVAL)?; + let parent_offset = usize::try_from(obj.parent_offset).map_err(|_| EINVAL)?; + let num_fds = usize::try_from(obj.num_fds).map_err(|_| EINVAL)?; + let fds_len = num_fds.checked_mul(size_of::<u32>()).ok_or(EINVAL)?; + + let info = sg_state.validate_parent_fixup(parent_index, parent_offset, fds_len)?; + view.alloc.info_add_fd_reserve(num_fds)?; + + sg_state.ancestors.truncate(info.num_ancestors); + let parent_entry = match sg_state.sg_entries.get_mut(info.parent_sg_index) { + Some(parent_entry) => parent_entry, + None => { + pr_err!( + "validate_parent_fixup returned index out of bounds for sg.entries" + ); + return Err(EINVAL.into()); + } + }; + + parent_entry.fixup_min_offset = info.new_min_offset; + parent_entry + .pointer_fixups + .push( + PointerFixupEntry { + skip: fds_len, + pointer_value: 0, + target_offset: info.target_offset, + }, + GFP_KERNEL, + ) + .map_err(|_| ENOMEM)?; + + let fda_uaddr = parent_entry + .sender_uaddr + .checked_add(parent_offset) + .ok_or(EINVAL)?; + let mut fda_bytes = KVec::new(); + UserSlice::new(UserPtr::from_addr(fda_uaddr as _), fds_len) + .read_all(&mut fda_bytes, GFP_KERNEL)?; + + if fds_len != fda_bytes.len() { + pr_err!("UserSlice::read_all returned wrong length in BINDER_TYPE_FDA"); + return Err(EINVAL.into()); + } + + for i in (0..fds_len).step_by(size_of::<u32>()) { + let fd = { + let mut fd_bytes = [0u8; size_of::<u32>()]; + fd_bytes.copy_from_slice(&fda_bytes[i..i + size_of::<u32>()]); + u32::from_ne_bytes(fd_bytes) + }; + + let file = LocalFile::fget(fd)?; + // SAFETY: The binder driver never calls `fdget_pos` and this code runs from an + // ioctl, so there are no active calls to `fdget_pos` on this thread. + let file = unsafe { LocalFile::assume_no_fdget_pos(file) }; + security::binder_transfer_file( + &self.process.cred, + &view.alloc.process.cred, + &file, + )?; + + // The `validate_parent_fixup` call ensuers that this addition will not + // overflow. + view.alloc.info_add_fd(file, info.target_offset + i, true)?; + } + drop(fda_bytes); + + let mut obj_write = BinderFdArrayObject::default(); + obj_write.hdr.type_ = BINDER_TYPE_FDA; + obj_write.num_fds = obj.num_fds; + obj_write.parent = obj.parent; + obj_write.parent_offset = obj.parent_offset; + view.write::<BinderFdArrayObject>(offset, &obj_write)?; + } + } + Ok(()) + } + + fn apply_sg(&self, alloc: &mut Allocation, sg_state: &mut ScatterGatherState) -> BinderResult { + for sg_entry in &mut sg_state.sg_entries { + let mut end_of_previous_fixup = sg_entry.offset; + let offset_end = sg_entry.offset.checked_add(sg_entry.length).ok_or(EINVAL)?; + + let mut reader = + UserSlice::new(UserPtr::from_addr(sg_entry.sender_uaddr), sg_entry.length).reader(); + for fixup in &mut sg_entry.pointer_fixups { + let fixup_len = if fixup.skip == 0 { + size_of::<u64>() + } else { + fixup.skip + }; + + let target_offset_end = fixup.target_offset.checked_add(fixup_len).ok_or(EINVAL)?; + if fixup.target_offset < end_of_previous_fixup || offset_end < target_offset_end { + pr_warn!( + "Fixups oob {} {} {} {}", + fixup.target_offset, + end_of_previous_fixup, + offset_end, + target_offset_end + ); + return Err(EINVAL.into()); + } + + let copy_off = end_of_previous_fixup; + let copy_len = fixup.target_offset - end_of_previous_fixup; + if let Err(err) = alloc.copy_into(&mut reader, copy_off, copy_len) { + pr_warn!("Failed copying into alloc: {:?}", err); + return Err(err.into()); + } + if fixup.skip == 0 { + let res = alloc.write::<u64>(fixup.target_offset, &fixup.pointer_value); + if let Err(err) = res { + pr_warn!("Failed copying ptr into alloc: {:?}", err); + return Err(err.into()); + } + } + if let Err(err) = reader.skip(fixup_len) { + pr_warn!("Failed skipping {} from reader: {:?}", fixup_len, err); + return Err(err.into()); + } + end_of_previous_fixup = target_offset_end; + } + let copy_off = end_of_previous_fixup; + let copy_len = offset_end - end_of_previous_fixup; + if let Err(err) = alloc.copy_into(&mut reader, copy_off, copy_len) { + pr_warn!("Failed copying remainder into alloc: {:?}", err); + return Err(err.into()); + } + } + Ok(()) + } + + /// This method copies the payload of a transaction into the target process. + /// + /// The resulting payload will have several different components, which will be stored next to + /// each other in the allocation. Furthermore, various objects can be embedded in the payload, + /// and those objects have to be translated so that they make sense to the target transaction. + pub(crate) fn copy_transaction_data( + &self, + to_process: Arc<Process>, + tr: &BinderTransactionDataSg, + debug_id: usize, + allow_fds: bool, + txn_security_ctx_offset: Option<&mut usize>, + ) -> BinderResult<NewAllocation> { + let trd = &tr.transaction_data; + let is_oneway = trd.flags & TF_ONE_WAY != 0; + let mut secctx = if let Some(offset) = txn_security_ctx_offset { + let secid = self.process.cred.get_secid(); + let ctx = match security::SecurityCtx::from_secid(secid) { + Ok(ctx) => ctx, + Err(err) => { + pr_warn!("Failed to get security ctx for id {}: {:?}", secid, err); + return Err(err.into()); + } + }; + Some((offset, ctx)) + } else { + None + }; + + let data_size = trd.data_size.try_into().map_err(|_| EINVAL)?; + let aligned_data_size = ptr_align(data_size).ok_or(EINVAL)?; + let offsets_size = trd.offsets_size.try_into().map_err(|_| EINVAL)?; + let aligned_offsets_size = ptr_align(offsets_size).ok_or(EINVAL)?; + let buffers_size = tr.buffers_size.try_into().map_err(|_| EINVAL)?; + let aligned_buffers_size = ptr_align(buffers_size).ok_or(EINVAL)?; + let aligned_secctx_size = match secctx.as_ref() { + Some((_offset, ctx)) => ptr_align(ctx.len()).ok_or(EINVAL)?, + None => 0, + }; + + // This guarantees that at least `sizeof(usize)` bytes will be allocated. + let len = usize::max( + aligned_data_size + .checked_add(aligned_offsets_size) + .and_then(|sum| sum.checked_add(aligned_buffers_size)) + .and_then(|sum| sum.checked_add(aligned_secctx_size)) + .ok_or(ENOMEM)?, + size_of::<usize>(), + ); + let secctx_off = aligned_data_size + aligned_offsets_size + aligned_buffers_size; + let mut alloc = + match to_process.buffer_alloc(debug_id, len, is_oneway, self.process.task.pid()) { + Ok(alloc) => alloc, + Err(err) => { + pr_warn!( + "Failed to allocate buffer. len:{}, is_oneway:{}", + len, + is_oneway + ); + return Err(err); + } + }; + + // SAFETY: This accesses a union field, but it's okay because the field's type is valid for + // all bit-patterns. + let trd_data_ptr = unsafe { &trd.data.ptr }; + let mut buffer_reader = + UserSlice::new(UserPtr::from_addr(trd_data_ptr.buffer as _), data_size).reader(); + let mut end_of_previous_object = 0; + let mut sg_state = None; + + // Copy offsets if there are any. + if offsets_size > 0 { + { + let mut reader = + UserSlice::new(UserPtr::from_addr(trd_data_ptr.offsets as _), offsets_size) + .reader(); + alloc.copy_into(&mut reader, aligned_data_size, offsets_size)?; + } + + let offsets_start = aligned_data_size; + let offsets_end = aligned_data_size + aligned_offsets_size; + + // This state is used for BINDER_TYPE_PTR objects. + let sg_state = sg_state.insert(ScatterGatherState { + unused_buffer_space: UnusedBufferSpace { + offset: offsets_end, + limit: len, + }, + sg_entries: KVec::new(), + ancestors: KVec::new(), + }); + + // Traverse the objects specified. + let mut view = AllocationView::new(&mut alloc, data_size); + for (index, index_offset) in (offsets_start..offsets_end) + .step_by(size_of::<usize>()) + .enumerate() + { + let offset = view.alloc.read(index_offset)?; + + if offset < end_of_previous_object { + pr_warn!("Got transaction with invalid offset."); + return Err(EINVAL.into()); + } + + // Copy data between two objects. + if end_of_previous_object < offset { + view.copy_into( + &mut buffer_reader, + end_of_previous_object, + offset - end_of_previous_object, + )?; + } + + let mut object = BinderObject::read_from(&mut buffer_reader)?; + + match self.translate_object( + index, + offset, + object.as_ref(), + &mut view, + allow_fds, + sg_state, + ) { + Ok(()) => end_of_previous_object = offset + object.size(), + Err(err) => { + pr_warn!("Error while translating object."); + return Err(err); + } + } + + // Update the indexes containing objects to clean up. + let offset_after_object = index_offset + size_of::<usize>(); + view.alloc + .set_info_offsets(offsets_start..offset_after_object); + } + } + + // Copy remaining raw data. + alloc.copy_into( + &mut buffer_reader, + end_of_previous_object, + data_size - end_of_previous_object, + )?; + + if let Some(sg_state) = sg_state.as_mut() { + if let Err(err) = self.apply_sg(&mut alloc, sg_state) { + pr_warn!("Failure in apply_sg: {:?}", err); + return Err(err); + } + } + + if let Some((off_out, secctx)) = secctx.as_mut() { + if let Err(err) = alloc.write(secctx_off, secctx.as_bytes()) { + pr_warn!("Failed to write security context: {:?}", err); + return Err(err.into()); + } + **off_out = secctx_off; + } + Ok(alloc) + } + + fn unwind_transaction_stack(self: &Arc<Self>) { + let mut thread = self.clone(); + while let Ok(transaction) = { + let mut inner = thread.inner.lock(); + inner.pop_transaction_to_reply(thread.as_ref()) + } { + let reply = Err(BR_DEAD_REPLY); + if !transaction.from.deliver_single_reply(reply, &transaction) { + break; + } + + thread = transaction.from.clone(); + } + } + + pub(crate) fn deliver_reply( + &self, + reply: Result<DLArc<Transaction>, u32>, + transaction: &DArc<Transaction>, + ) { + if self.deliver_single_reply(reply, transaction) { + transaction.from.unwind_transaction_stack(); + } + } + + /// Delivers a reply to the thread that started a transaction. The reply can either be a + /// reply-transaction or an error code to be delivered instead. + /// + /// Returns whether the thread is dead. If it is, the caller is expected to unwind the + /// transaction stack by completing transactions for threads that are dead. + fn deliver_single_reply( + &self, + reply: Result<DLArc<Transaction>, u32>, + transaction: &DArc<Transaction>, + ) -> bool { + if let Ok(transaction) = &reply { + transaction.set_outstanding(&mut self.process.inner.lock()); + } + + { + let mut inner = self.inner.lock(); + if !inner.pop_transaction_replied(transaction) { + return false; + } + + if inner.is_dead { + return true; + } + + match reply { + Ok(work) => { + inner.push_work(work); + } + Err(code) => inner.push_reply_work(code), + } + } + + // Notify the thread now that we've released the inner lock. + self.work_condvar.notify_sync(); + false + } + + /// Determines if the given transaction is the current transaction for this thread. + fn is_current_transaction(&self, transaction: &DArc<Transaction>) -> bool { + let inner = self.inner.lock(); + match &inner.current_transaction { + None => false, + Some(current) => Arc::ptr_eq(current, transaction), + } + } + + /// Determines the current top of the transaction stack. It fails if the top is in another + /// thread (i.e., this thread belongs to a stack but it has called another thread). The top is + /// [`None`] if the thread is not currently participating in a transaction stack. + fn top_of_transaction_stack(&self) -> Result<Option<DArc<Transaction>>> { + let inner = self.inner.lock(); + if let Some(cur) = &inner.current_transaction { + if core::ptr::eq(self, cur.from.as_ref()) { + pr_warn!("got new transaction with bad transaction stack"); + return Err(EINVAL); + } + Ok(Some(cur.clone())) + } else { + Ok(None) + } + } + + fn transaction<T>(self: &Arc<Self>, tr: &BinderTransactionDataSg, inner: T) + where + T: FnOnce(&Arc<Self>, &BinderTransactionDataSg) -> BinderResult, + { + if let Err(err) = inner(self, tr) { + if err.should_pr_warn() { + let mut ee = self.inner.lock().extended_error; + ee.command = err.reply; + ee.param = err.as_errno(); + pr_warn!( + "Transaction failed: {:?} my_pid:{}", + err, + self.process.pid_in_current_ns() + ); + } + + self.push_return_work(err.reply); + } + } + + fn transaction_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult { + // SAFETY: Handle's type has no invalid bit patterns. + let handle = unsafe { tr.transaction_data.target.handle }; + let node_ref = self.process.get_transaction_node(handle)?; + security::binder_transaction(&self.process.cred, &node_ref.node.owner.cred)?; + // TODO: We need to ensure that there isn't a pending transaction in the work queue. How + // could this happen? + let top = self.top_of_transaction_stack()?; + let list_completion = DTRWrap::arc_try_new(DeliverCode::new(BR_TRANSACTION_COMPLETE))?; + let completion = list_completion.clone_arc(); + let transaction = Transaction::new(node_ref, top, self, tr)?; + + // Check that the transaction stack hasn't changed while the lock was released, then update + // it with the new transaction. + { + let mut inner = self.inner.lock(); + if !transaction.is_stacked_on(&inner.current_transaction) { + pr_warn!("Transaction stack changed during transaction!"); + return Err(EINVAL.into()); + } + inner.current_transaction = Some(transaction.clone_arc()); + // We push the completion as a deferred work so that we wait for the reply before + // returning to userland. + inner.push_work_deferred(list_completion); + } + + if let Err(e) = transaction.submit() { + completion.skip(); + // Define `transaction` first to drop it after `inner`. + let transaction; + let mut inner = self.inner.lock(); + transaction = inner.current_transaction.take().unwrap(); + inner.current_transaction = transaction.clone_next(); + Err(e) + } else { + Ok(()) + } + } + + fn reply_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult { + let orig = self.inner.lock().pop_transaction_to_reply(self)?; + if !orig.from.is_current_transaction(&orig) { + return Err(EINVAL.into()); + } + + // We need to complete the transaction even if we cannot complete building the reply. + let out = (|| -> BinderResult<_> { + let completion = DTRWrap::arc_try_new(DeliverCode::new(BR_TRANSACTION_COMPLETE))?; + let process = orig.from.process.clone(); + let allow_fds = orig.flags & TF_ACCEPT_FDS != 0; + let reply = Transaction::new_reply(self, process, tr, allow_fds)?; + self.inner.lock().push_work(completion); + orig.from.deliver_reply(Ok(reply), &orig); + Ok(()) + })() + .map_err(|mut err| { + // At this point we only return `BR_TRANSACTION_COMPLETE` to the caller, and we must let + // the sender know that the transaction has completed (with an error in this case). + pr_warn!( + "Failure {:?} during reply - delivering BR_FAILED_REPLY to sender.", + err + ); + let reply = Err(BR_FAILED_REPLY); + orig.from.deliver_reply(reply, &orig); + err.reply = BR_TRANSACTION_COMPLETE; + err + }); + + out + } + + fn oneway_transaction_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult { + // SAFETY: The `handle` field is valid for all possible byte values, so reading from the + // union is okay. + let handle = unsafe { tr.transaction_data.target.handle }; + let node_ref = self.process.get_transaction_node(handle)?; + security::binder_transaction(&self.process.cred, &node_ref.node.owner.cred)?; + let transaction = Transaction::new(node_ref, None, self, tr)?; + let code = if self.process.is_oneway_spam_detection_enabled() + && transaction.oneway_spam_detected + { + BR_ONEWAY_SPAM_SUSPECT + } else { + BR_TRANSACTION_COMPLETE + }; + let list_completion = DTRWrap::arc_try_new(DeliverCode::new(code))?; + let completion = list_completion.clone_arc(); + self.inner.lock().push_work(list_completion); + match transaction.submit() { + Ok(()) => Ok(()), + Err(err) => { + completion.skip(); + Err(err) + } + } + } + + fn write(self: &Arc<Self>, req: &mut BinderWriteRead) -> Result { + let write_start = req.write_buffer.wrapping_add(req.write_consumed); + let write_len = req.write_size.saturating_sub(req.write_consumed); + let mut reader = + UserSlice::new(UserPtr::from_addr(write_start as _), write_len as _).reader(); + + while reader.len() >= size_of::<u32>() && self.inner.lock().return_work.is_unused() { + let before = reader.len(); + let cmd = reader.read::<u32>()?; + GLOBAL_STATS.inc_bc(cmd); + self.process.stats.inc_bc(cmd); + match cmd { + BC_TRANSACTION => { + let tr = reader.read::<BinderTransactionData>()?.with_buffers_size(0); + if tr.transaction_data.flags & TF_ONE_WAY != 0 { + self.transaction(&tr, Self::oneway_transaction_inner); + } else { + self.transaction(&tr, Self::transaction_inner); + } + } + BC_TRANSACTION_SG => { + let tr = reader.read::<BinderTransactionDataSg>()?; + if tr.transaction_data.flags & TF_ONE_WAY != 0 { + self.transaction(&tr, Self::oneway_transaction_inner); + } else { + self.transaction(&tr, Self::transaction_inner); + } + } + BC_REPLY => { + let tr = reader.read::<BinderTransactionData>()?.with_buffers_size(0); + self.transaction(&tr, Self::reply_inner) + } + BC_REPLY_SG => { + let tr = reader.read::<BinderTransactionDataSg>()?; + self.transaction(&tr, Self::reply_inner) + } + BC_FREE_BUFFER => { + let buffer = self.process.buffer_get(reader.read()?); + if let Some(buffer) = buffer { + if buffer.looper_need_return_on_free() { + self.inner.lock().looper_need_return = true; + } + drop(buffer); + } + } + BC_INCREFS => { + self.process + .as_arc_borrow() + .update_ref(reader.read()?, true, false)? + } + BC_ACQUIRE => { + self.process + .as_arc_borrow() + .update_ref(reader.read()?, true, true)? + } + BC_RELEASE => { + self.process + .as_arc_borrow() + .update_ref(reader.read()?, false, true)? + } + BC_DECREFS => { + self.process + .as_arc_borrow() + .update_ref(reader.read()?, false, false)? + } + BC_INCREFS_DONE => self.process.inc_ref_done(&mut reader, false)?, + BC_ACQUIRE_DONE => self.process.inc_ref_done(&mut reader, true)?, + BC_REQUEST_DEATH_NOTIFICATION => self.process.request_death(&mut reader, self)?, + BC_CLEAR_DEATH_NOTIFICATION => self.process.clear_death(&mut reader, self)?, + BC_DEAD_BINDER_DONE => self.process.dead_binder_done(reader.read()?, self), + BC_REGISTER_LOOPER => { + let valid = self.process.register_thread(); + self.inner.lock().looper_register(valid); + } + BC_ENTER_LOOPER => self.inner.lock().looper_enter(), + BC_EXIT_LOOPER => self.inner.lock().looper_exit(), + BC_REQUEST_FREEZE_NOTIFICATION => self.process.request_freeze_notif(&mut reader)?, + BC_CLEAR_FREEZE_NOTIFICATION => self.process.clear_freeze_notif(&mut reader)?, + BC_FREEZE_NOTIFICATION_DONE => self.process.freeze_notif_done(&mut reader)?, + + // Fail if given an unknown error code. + // BC_ATTEMPT_ACQUIRE and BC_ACQUIRE_RESULT are no longer supported. + _ => return Err(EINVAL), + } + // Update the number of write bytes consumed. + req.write_consumed += (before - reader.len()) as u64; + } + + Ok(()) + } + + fn read(self: &Arc<Self>, req: &mut BinderWriteRead, wait: bool) -> Result { + let read_start = req.read_buffer.wrapping_add(req.read_consumed); + let read_len = req.read_size.saturating_sub(req.read_consumed); + let mut writer = BinderReturnWriter::new( + UserSlice::new(UserPtr::from_addr(read_start as _), read_len as _).writer(), + self, + ); + let (in_pool, use_proc_queue) = { + let inner = self.inner.lock(); + (inner.is_looper(), inner.should_use_process_work_queue()) + }; + + let getter = if use_proc_queue { + Self::get_work + } else { + Self::get_work_local + }; + + // Reserve some room at the beginning of the read buffer so that we can send a + // BR_SPAWN_LOOPER if we need to. + let mut has_noop_placeholder = false; + if req.read_consumed == 0 { + if let Err(err) = writer.write_code(BR_NOOP) { + pr_warn!("Failure when writing BR_NOOP at beginning of buffer."); + return Err(err); + } + has_noop_placeholder = true; + } + + // Loop doing work while there is room in the buffer. + let initial_len = writer.len(); + while writer.len() >= size_of::<uapi::binder_transaction_data_secctx>() + 4 { + match getter(self, wait && initial_len == writer.len()) { + Ok(Some(work)) => match work.into_arc().do_work(self, &mut writer) { + Ok(true) => {} + Ok(false) => break, + Err(err) => { + return Err(err); + } + }, + Ok(None) => { + break; + } + Err(err) => { + // Propagate the error if we haven't written anything else. + if err != EINTR && err != EAGAIN { + pr_warn!("Failure in work getter: {:?}", err); + } + if initial_len == writer.len() { + return Err(err); + } else { + break; + } + } + } + } + + req.read_consumed += read_len - writer.len() as u64; + + // Write BR_SPAWN_LOOPER if the process needs more threads for its pool. + if has_noop_placeholder && in_pool && self.process.needs_thread() { + let mut writer = + UserSlice::new(UserPtr::from_addr(req.read_buffer as _), req.read_size as _) + .writer(); + writer.write(&BR_SPAWN_LOOPER)?; + } + Ok(()) + } + + pub(crate) fn write_read(self: &Arc<Self>, data: UserSlice, wait: bool) -> Result { + let (mut reader, mut writer) = data.reader_writer(); + let mut req = reader.read::<BinderWriteRead>()?; + + // Go through the write buffer. + let mut ret = Ok(()); + if req.write_size > 0 { + ret = self.write(&mut req); + if let Err(err) = ret { + pr_warn!( + "Write failure {:?} in pid:{}", + err, + self.process.pid_in_current_ns() + ); + req.read_consumed = 0; + writer.write(&req)?; + self.inner.lock().looper_need_return = false; + return ret; + } + } + + // Go through the work queue. + if req.read_size > 0 { + ret = self.read(&mut req, wait); + if ret.is_err() && ret != Err(EINTR) { + pr_warn!( + "Read failure {:?} in pid:{}", + ret, + self.process.pid_in_current_ns() + ); + } + } + + // Write the request back so that the consumed fields are visible to the caller. + writer.write(&req)?; + + self.inner.lock().looper_need_return = false; + + ret + } + + pub(crate) fn poll(&self, file: &File, table: PollTable<'_>) -> (bool, u32) { + table.register_wait(file, &self.work_condvar); + let mut inner = self.inner.lock(); + (inner.should_use_process_work_queue(), inner.poll()) + } + + /// Make the call to `get_work` or `get_work_local` return immediately, if any. + pub(crate) fn exit_looper(&self) { + let mut inner = self.inner.lock(); + let should_notify = inner.looper_flags & LOOPER_WAITING != 0; + if should_notify { + inner.looper_need_return = true; + } + drop(inner); + + if should_notify { + self.work_condvar.notify_one(); + } + } + + pub(crate) fn notify_if_poll_ready(&self, sync: bool) { + // Determine if we need to notify. This requires the lock. + let inner = self.inner.lock(); + let notify = inner.looper_flags & LOOPER_POLL != 0 && inner.should_use_process_work_queue(); + drop(inner); + + // Now that the lock is no longer held, notify the waiters if we have to. + if notify { + if sync { + self.work_condvar.notify_sync(); + } else { + self.work_condvar.notify_one(); + } + } + } + + pub(crate) fn release(self: &Arc<Self>) { + self.inner.lock().is_dead = true; + + //self.work_condvar.clear(); + self.unwind_transaction_stack(); + + // Cancel all pending work items. + while let Ok(Some(work)) = self.get_work_local(false) { + work.into_arc().cancel(); + } + } +} + +#[pin_data] +struct ThreadError { + error_code: AtomicU32, + #[pin] + links_track: AtomicTracker, +} + +impl ThreadError { + fn try_new() -> Result<DArc<Self>> { + DTRWrap::arc_pin_init(pin_init!(Self { + error_code: AtomicU32::new(BR_OK), + links_track <- AtomicTracker::new(), + })) + .map(ListArc::into_arc) + } + + fn set_error_code(&self, code: u32) { + self.error_code.store(code, Ordering::Relaxed); + } + + fn is_unused(&self) -> bool { + self.error_code.load(Ordering::Relaxed) == BR_OK + } +} + +impl DeliverToRead for ThreadError { + fn do_work( + self: DArc<Self>, + _thread: &Thread, + writer: &mut BinderReturnWriter<'_>, + ) -> Result<bool> { + let code = self.error_code.load(Ordering::Relaxed); + self.error_code.store(BR_OK, Ordering::Relaxed); + writer.write_code(code)?; + Ok(true) + } + + fn cancel(self: DArc<Self>) {} + + fn should_sync_wakeup(&self) -> bool { + false + } + + fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> { + seq_print!( + m, + "{}transaction error: {}\n", + prefix, + self.error_code.load(Ordering::Relaxed) + ); + Ok(()) + } +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for ThreadError { + tracked_by links_track: AtomicTracker; + } +} diff --git a/drivers/android/binder/trace.rs b/drivers/android/binder/trace.rs new file mode 100644 index 000000000000..af0e4392805e --- /dev/null +++ b/drivers/android/binder/trace.rs @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use kernel::ffi::{c_uint, c_ulong}; +use kernel::tracepoint::declare_trace; + +declare_trace! { + unsafe fn rust_binder_ioctl(cmd: c_uint, arg: c_ulong); +} + +#[inline] +pub(crate) fn trace_ioctl(cmd: u32, arg: usize) { + // SAFETY: Always safe to call. + unsafe { rust_binder_ioctl(cmd, arg as c_ulong) } +} diff --git a/drivers/android/binder/transaction.rs b/drivers/android/binder/transaction.rs new file mode 100644 index 000000000000..4bd3c0e417eb --- /dev/null +++ b/drivers/android/binder/transaction.rs @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +use core::sync::atomic::{AtomicBool, Ordering}; +use kernel::{ + prelude::*, + seq_file::SeqFile, + seq_print, + sync::{Arc, SpinLock}, + task::Kuid, + time::{Instant, Monotonic}, + types::ScopeGuard, +}; + +use crate::{ + allocation::{Allocation, TranslatedFds}, + defs::*, + error::{BinderError, BinderResult}, + node::{Node, NodeRef}, + process::{Process, ProcessInner}, + ptr_align, + thread::{PushWorkRes, Thread}, + BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead, +}; + +#[pin_data(PinnedDrop)] +pub(crate) struct Transaction { + pub(crate) debug_id: usize, + target_node: Option<DArc<Node>>, + pub(crate) from_parent: Option<DArc<Transaction>>, + pub(crate) from: Arc<Thread>, + pub(crate) to: Arc<Process>, + #[pin] + allocation: SpinLock<Option<Allocation>>, + is_outstanding: AtomicBool, + code: u32, + pub(crate) flags: u32, + data_size: usize, + offsets_size: usize, + data_address: usize, + sender_euid: Kuid, + txn_security_ctx_off: Option<usize>, + pub(crate) oneway_spam_detected: bool, + start_time: Instant<Monotonic>, +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<0> for Transaction { untracked; } +} + +impl Transaction { + pub(crate) fn new( + node_ref: NodeRef, + from_parent: Option<DArc<Transaction>>, + from: &Arc<Thread>, + tr: &BinderTransactionDataSg, + ) -> BinderResult<DLArc<Self>> { + let debug_id = super::next_debug_id(); + let trd = &tr.transaction_data; + let allow_fds = node_ref.node.flags & FLAT_BINDER_FLAG_ACCEPTS_FDS != 0; + let txn_security_ctx = node_ref.node.flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX != 0; + let mut txn_security_ctx_off = if txn_security_ctx { Some(0) } else { None }; + let to = node_ref.node.owner.clone(); + let mut alloc = match from.copy_transaction_data( + to.clone(), + tr, + debug_id, + allow_fds, + txn_security_ctx_off.as_mut(), + ) { + Ok(alloc) => alloc, + Err(err) => { + if !err.is_dead() { + pr_warn!("Failure in copy_transaction_data: {:?}", err); + } + return Err(err); + } + }; + let oneway_spam_detected = alloc.oneway_spam_detected; + if trd.flags & TF_ONE_WAY != 0 { + if from_parent.is_some() { + pr_warn!("Oneway transaction should not be in a transaction stack."); + return Err(EINVAL.into()); + } + alloc.set_info_oneway_node(node_ref.node.clone()); + } + if trd.flags & TF_CLEAR_BUF != 0 { + alloc.set_info_clear_on_drop(); + } + let target_node = node_ref.node.clone(); + alloc.set_info_target_node(node_ref); + let data_address = alloc.ptr; + + Ok(DTRWrap::arc_pin_init(pin_init!(Transaction { + debug_id, + target_node: Some(target_node), + from_parent, + sender_euid: from.process.task.euid(), + from: from.clone(), + to, + code: trd.code, + flags: trd.flags, + data_size: trd.data_size as _, + offsets_size: trd.offsets_size as _, + data_address, + allocation <- kernel::new_spinlock!(Some(alloc.success()), "Transaction::new"), + is_outstanding: AtomicBool::new(false), + txn_security_ctx_off, + oneway_spam_detected, + start_time: Instant::now(), + }))?) + } + + pub(crate) fn new_reply( + from: &Arc<Thread>, + to: Arc<Process>, + tr: &BinderTransactionDataSg, + allow_fds: bool, + ) -> BinderResult<DLArc<Self>> { + let debug_id = super::next_debug_id(); + let trd = &tr.transaction_data; + let mut alloc = match from.copy_transaction_data(to.clone(), tr, debug_id, allow_fds, None) + { + Ok(alloc) => alloc, + Err(err) => { + pr_warn!("Failure in copy_transaction_data: {:?}", err); + return Err(err); + } + }; + let oneway_spam_detected = alloc.oneway_spam_detected; + if trd.flags & TF_CLEAR_BUF != 0 { + alloc.set_info_clear_on_drop(); + } + Ok(DTRWrap::arc_pin_init(pin_init!(Transaction { + debug_id, + target_node: None, + from_parent: None, + sender_euid: from.process.task.euid(), + from: from.clone(), + to, + code: trd.code, + flags: trd.flags, + data_size: trd.data_size as _, + offsets_size: trd.offsets_size as _, + data_address: alloc.ptr, + allocation <- kernel::new_spinlock!(Some(alloc.success()), "Transaction::new"), + is_outstanding: AtomicBool::new(false), + txn_security_ctx_off: None, + oneway_spam_detected, + start_time: Instant::now(), + }))?) + } + + #[inline(never)] + pub(crate) fn debug_print_inner(&self, m: &SeqFile, prefix: &str) { + seq_print!( + m, + "{}{}: from {}:{} to {} code {:x} flags {:x} elapsed {}ms", + prefix, + self.debug_id, + self.from.process.task.pid(), + self.from.id, + self.to.task.pid(), + self.code, + self.flags, + self.start_time.elapsed().as_millis(), + ); + if let Some(target_node) = &self.target_node { + seq_print!(m, " node {}", target_node.debug_id); + } + seq_print!(m, " size {}:{}\n", self.data_size, self.offsets_size); + } + + /// Determines if the transaction is stacked on top of the given transaction. + pub(crate) fn is_stacked_on(&self, onext: &Option<DArc<Self>>) -> bool { + match (&self.from_parent, onext) { + (None, None) => true, + (Some(from_parent), Some(next)) => Arc::ptr_eq(from_parent, next), + _ => false, + } + } + + /// Returns a pointer to the next transaction on the transaction stack, if there is one. + pub(crate) fn clone_next(&self) -> Option<DArc<Self>> { + Some(self.from_parent.as_ref()?.clone()) + } + + /// Searches in the transaction stack for a thread that belongs to the target process. This is + /// useful when finding a target for a new transaction: if the node belongs to a process that + /// is already part of the transaction stack, we reuse the thread. + fn find_target_thread(&self) -> Option<Arc<Thread>> { + let mut it = &self.from_parent; + while let Some(transaction) = it { + if Arc::ptr_eq(&transaction.from.process, &self.to) { + return Some(transaction.from.clone()); + } + it = &transaction.from_parent; + } + None + } + + /// Searches in the transaction stack for a transaction originating at the given thread. + pub(crate) fn find_from(&self, thread: &Thread) -> Option<&DArc<Transaction>> { + let mut it = &self.from_parent; + while let Some(transaction) = it { + if core::ptr::eq(thread, transaction.from.as_ref()) { + return Some(transaction); + } + + it = &transaction.from_parent; + } + None + } + + pub(crate) fn set_outstanding(&self, to_process: &mut ProcessInner) { + // No race because this method is only called once. + if !self.is_outstanding.load(Ordering::Relaxed) { + self.is_outstanding.store(true, Ordering::Relaxed); + to_process.add_outstanding_txn(); + } + } + + /// Decrement `outstanding_txns` in `to` if it hasn't already been decremented. + fn drop_outstanding_txn(&self) { + // No race because this is called at most twice, and one of the calls are in the + // destructor, which is guaranteed to not race with any other operations on the + // transaction. It also cannot race with `set_outstanding`, since submission happens + // before delivery. + if self.is_outstanding.load(Ordering::Relaxed) { + self.is_outstanding.store(false, Ordering::Relaxed); + self.to.drop_outstanding_txn(); + } + } + + /// Submits the transaction to a work queue. Uses a thread if there is one in the transaction + /// stack, otherwise uses the destination process. + /// + /// Not used for replies. + pub(crate) fn submit(self: DLArc<Self>) -> BinderResult { + // Defined before `process_inner` so that the destructor runs after releasing the lock. + let mut _t_outdated; + + let oneway = self.flags & TF_ONE_WAY != 0; + let process = self.to.clone(); + let mut process_inner = process.inner.lock(); + + self.set_outstanding(&mut process_inner); + + if oneway { + if let Some(target_node) = self.target_node.clone() { + if process_inner.is_frozen.is_frozen() { + process_inner.async_recv = true; + if self.flags & TF_UPDATE_TXN != 0 { + if let Some(t_outdated) = + target_node.take_outdated_transaction(&self, &mut process_inner) + { + // Save the transaction to be dropped after locks are released. + _t_outdated = t_outdated; + } + } + } + match target_node.submit_oneway(self, &mut process_inner) { + Ok(()) => {} + Err((err, work)) => { + drop(process_inner); + // Drop work after releasing process lock. + drop(work); + return Err(err); + } + } + + if process_inner.is_frozen.is_frozen() { + return Err(BinderError::new_frozen_oneway()); + } else { + return Ok(()); + } + } else { + pr_err!("Failed to submit oneway transaction to node."); + } + } + + if process_inner.is_frozen.is_frozen() { + process_inner.sync_recv = true; + return Err(BinderError::new_frozen()); + } + + let res = if let Some(thread) = self.find_target_thread() { + match thread.push_work(self) { + PushWorkRes::Ok => Ok(()), + PushWorkRes::FailedDead(me) => Err((BinderError::new_dead(), me)), + } + } else { + process_inner.push_work(self) + }; + drop(process_inner); + + match res { + Ok(()) => Ok(()), + Err((err, work)) => { + // Drop work after releasing process lock. + drop(work); + Err(err) + } + } + } + + /// Check whether one oneway transaction can supersede another. + pub(crate) fn can_replace(&self, old: &Transaction) -> bool { + if self.from.process.task.pid() != old.from.process.task.pid() { + return false; + } + + if self.flags & old.flags & (TF_ONE_WAY | TF_UPDATE_TXN) != (TF_ONE_WAY | TF_UPDATE_TXN) { + return false; + } + + let target_node_match = match (self.target_node.as_ref(), old.target_node.as_ref()) { + (None, None) => true, + (Some(tn1), Some(tn2)) => Arc::ptr_eq(tn1, tn2), + _ => false, + }; + + self.code == old.code && self.flags == old.flags && target_node_match + } + + fn prepare_file_list(&self) -> Result<TranslatedFds> { + let mut alloc = self.allocation.lock().take().ok_or(ESRCH)?; + + match alloc.translate_fds() { + Ok(translated) => { + *self.allocation.lock() = Some(alloc); + Ok(translated) + } + Err(err) => { + // Free the allocation eagerly. + drop(alloc); + Err(err) + } + } + } +} + +impl DeliverToRead for Transaction { + fn do_work( + self: DArc<Self>, + thread: &Thread, + writer: &mut BinderReturnWriter<'_>, + ) -> Result<bool> { + let send_failed_reply = ScopeGuard::new(|| { + if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 { + let reply = Err(BR_FAILED_REPLY); + self.from.deliver_reply(reply, &self); + } + self.drop_outstanding_txn(); + }); + + let files = if let Ok(list) = self.prepare_file_list() { + list + } else { + // On failure to process the list, we send a reply back to the sender and ignore the + // transaction on the recipient. + return Ok(true); + }; + + let mut tr_sec = BinderTransactionDataSecctx::default(); + let tr = tr_sec.tr_data(); + if let Some(target_node) = &self.target_node { + let (ptr, cookie) = target_node.get_id(); + tr.target.ptr = ptr as _; + tr.cookie = cookie as _; + }; + tr.code = self.code; + tr.flags = self.flags; + tr.data_size = self.data_size as _; + tr.data.ptr.buffer = self.data_address as _; + tr.offsets_size = self.offsets_size as _; + if tr.offsets_size > 0 { + tr.data.ptr.offsets = (self.data_address + ptr_align(self.data_size).unwrap()) as _; + } + tr.sender_euid = self.sender_euid.into_uid_in_current_ns(); + tr.sender_pid = 0; + if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 { + // Not a reply and not one-way. + tr.sender_pid = self.from.process.pid_in_current_ns(); + } + let code = if self.target_node.is_none() { + BR_REPLY + } else if self.txn_security_ctx_off.is_some() { + BR_TRANSACTION_SEC_CTX + } else { + BR_TRANSACTION + }; + + // Write the transaction code and data to the user buffer. + writer.write_code(code)?; + if let Some(off) = self.txn_security_ctx_off { + tr_sec.secctx = (self.data_address + off) as u64; + writer.write_payload(&tr_sec)?; + } else { + writer.write_payload(&*tr)?; + } + + let mut alloc = self.allocation.lock().take().ok_or(ESRCH)?; + + // Dismiss the completion of transaction with a failure. No failure paths are allowed from + // here on out. + send_failed_reply.dismiss(); + + // Commit files, and set FDs in FDA to be closed on buffer free. + let close_on_free = files.commit(); + alloc.set_info_close_on_free(close_on_free); + + // It is now the user's responsibility to clear the allocation. + alloc.keep_alive(); + + self.drop_outstanding_txn(); + + // When this is not a reply and not a oneway transaction, update `current_transaction`. If + // it's a reply, `current_transaction` has already been updated appropriately. + if self.target_node.is_some() && tr_sec.transaction_data.flags & TF_ONE_WAY == 0 { + thread.set_current_transaction(self); + } + + Ok(false) + } + + fn cancel(self: DArc<Self>) { + let allocation = self.allocation.lock().take(); + drop(allocation); + + // If this is not a reply or oneway transaction, then send a dead reply. + if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 { + let reply = Err(BR_DEAD_REPLY); + self.from.deliver_reply(reply, &self); + } + + self.drop_outstanding_txn(); + } + + fn should_sync_wakeup(&self) -> bool { + self.flags & TF_ONE_WAY == 0 + } + + fn debug_print(&self, m: &SeqFile, _prefix: &str, tprefix: &str) -> Result<()> { + self.debug_print_inner(m, tprefix); + Ok(()) + } +} + +#[pinned_drop] +impl PinnedDrop for Transaction { + fn drop(self: Pin<&mut Self>) { + self.drop_outstanding_txn(); + } +} diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c new file mode 100644 index 000000000000..979c96b74cad --- /dev/null +++ b/drivers/android/binder_alloc.c @@ -0,0 +1,1410 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* binder_alloc.c + * + * Android IPC Subsystem + * + * Copyright (C) 2007-2017 Google, Inc. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/list.h> +#include <linux/sched/mm.h> +#include <linux/module.h> +#include <linux/rtmutex.h> +#include <linux/rbtree.h> +#include <linux/seq_file.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/list_lru.h> +#include <linux/ratelimit.h> +#include <asm/cacheflush.h> +#include <linux/uaccess.h> +#include <linux/highmem.h> +#include <linux/sizes.h> +#include <kunit/visibility.h> +#include "binder_alloc.h" +#include "binder_trace.h" + +static struct list_lru binder_freelist; + +static DEFINE_MUTEX(binder_alloc_mmap_lock); + +enum { + BINDER_DEBUG_USER_ERROR = 1U << 0, + BINDER_DEBUG_OPEN_CLOSE = 1U << 1, + BINDER_DEBUG_BUFFER_ALLOC = 1U << 2, + BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 3, +}; +static uint32_t binder_alloc_debug_mask = BINDER_DEBUG_USER_ERROR; + +module_param_named(debug_mask, binder_alloc_debug_mask, + uint, 0644); + +#define binder_alloc_debug(mask, x...) \ + do { \ + if (binder_alloc_debug_mask & mask) \ + pr_info_ratelimited(x); \ + } while (0) + +static struct binder_buffer *binder_buffer_next(struct binder_buffer *buffer) +{ + return list_entry(buffer->entry.next, struct binder_buffer, entry); +} + +static struct binder_buffer *binder_buffer_prev(struct binder_buffer *buffer) +{ + return list_entry(buffer->entry.prev, struct binder_buffer, entry); +} + +VISIBLE_IF_KUNIT size_t binder_alloc_buffer_size(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + if (list_is_last(&buffer->entry, &alloc->buffers)) + return alloc->vm_start + alloc->buffer_size - buffer->user_data; + return binder_buffer_next(buffer)->user_data - buffer->user_data; +} +EXPORT_SYMBOL_IF_KUNIT(binder_alloc_buffer_size); + +static void binder_insert_free_buffer(struct binder_alloc *alloc, + struct binder_buffer *new_buffer) +{ + struct rb_node **p = &alloc->free_buffers.rb_node; + struct rb_node *parent = NULL; + struct binder_buffer *buffer; + size_t buffer_size; + size_t new_buffer_size; + + BUG_ON(!new_buffer->free); + + new_buffer_size = binder_alloc_buffer_size(alloc, new_buffer); + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: add free buffer, size %zd, at %pK\n", + alloc->pid, new_buffer_size, new_buffer); + + while (*p) { + parent = *p; + buffer = rb_entry(parent, struct binder_buffer, rb_node); + BUG_ON(!buffer->free); + + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + if (new_buffer_size < buffer_size) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node(&new_buffer->rb_node, parent, p); + rb_insert_color(&new_buffer->rb_node, &alloc->free_buffers); +} + +static void binder_insert_allocated_buffer_locked( + struct binder_alloc *alloc, struct binder_buffer *new_buffer) +{ + struct rb_node **p = &alloc->allocated_buffers.rb_node; + struct rb_node *parent = NULL; + struct binder_buffer *buffer; + + BUG_ON(new_buffer->free); + + while (*p) { + parent = *p; + buffer = rb_entry(parent, struct binder_buffer, rb_node); + BUG_ON(buffer->free); + + if (new_buffer->user_data < buffer->user_data) + p = &parent->rb_left; + else if (new_buffer->user_data > buffer->user_data) + p = &parent->rb_right; + else + BUG(); + } + rb_link_node(&new_buffer->rb_node, parent, p); + rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers); +} + +static struct binder_buffer *binder_alloc_prepare_to_free_locked( + struct binder_alloc *alloc, + unsigned long user_ptr) +{ + struct rb_node *n = alloc->allocated_buffers.rb_node; + struct binder_buffer *buffer; + + while (n) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + BUG_ON(buffer->free); + + if (user_ptr < buffer->user_data) { + n = n->rb_left; + } else if (user_ptr > buffer->user_data) { + n = n->rb_right; + } else { + /* + * Guard against user threads attempting to + * free the buffer when in use by kernel or + * after it's already been freed. + */ + if (!buffer->allow_user_free) + return ERR_PTR(-EPERM); + buffer->allow_user_free = 0; + return buffer; + } + } + return NULL; +} + +/** + * binder_alloc_prepare_to_free() - get buffer given user ptr + * @alloc: binder_alloc for this proc + * @user_ptr: User pointer to buffer data + * + * Validate userspace pointer to buffer data and return buffer corresponding to + * that user pointer. Search the rb tree for buffer that matches user data + * pointer. + * + * Return: Pointer to buffer or NULL + */ +struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc, + unsigned long user_ptr) +{ + guard(mutex)(&alloc->mutex); + return binder_alloc_prepare_to_free_locked(alloc, user_ptr); +} + +static inline void +binder_set_installed_page(struct binder_alloc *alloc, + unsigned long index, + struct page *page) +{ + /* Pairs with acquire in binder_get_installed_page() */ + smp_store_release(&alloc->pages[index], page); +} + +static inline struct page * +binder_get_installed_page(struct binder_alloc *alloc, unsigned long index) +{ + /* Pairs with release in binder_set_installed_page() */ + return smp_load_acquire(&alloc->pages[index]); +} + +static void binder_lru_freelist_add(struct binder_alloc *alloc, + unsigned long start, unsigned long end) +{ + unsigned long page_addr; + struct page *page; + + trace_binder_update_page_range(alloc, false, start, end); + + for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { + size_t index; + int ret; + + index = (page_addr - alloc->vm_start) / PAGE_SIZE; + page = binder_get_installed_page(alloc, index); + if (!page) + continue; + + trace_binder_free_lru_start(alloc, index); + + ret = list_lru_add(alloc->freelist, + page_to_lru(page), + page_to_nid(page), + NULL); + WARN_ON(!ret); + + trace_binder_free_lru_end(alloc, index); + } +} + +static inline +void binder_alloc_set_mapped(struct binder_alloc *alloc, bool state) +{ + /* pairs with smp_load_acquire in binder_alloc_is_mapped() */ + smp_store_release(&alloc->mapped, state); +} + +static inline bool binder_alloc_is_mapped(struct binder_alloc *alloc) +{ + /* pairs with smp_store_release in binder_alloc_set_mapped() */ + return smp_load_acquire(&alloc->mapped); +} + +static struct page *binder_page_lookup(struct binder_alloc *alloc, + unsigned long addr) +{ + struct mm_struct *mm = alloc->mm; + struct page *page; + long npages = 0; + + /* + * Find an existing page in the remote mm. If missing, + * don't attempt to fault-in just propagate an error. + */ + mmap_read_lock(mm); + if (binder_alloc_is_mapped(alloc)) + npages = get_user_pages_remote(mm, addr, 1, FOLL_NOFAULT, + &page, NULL); + mmap_read_unlock(mm); + + return npages > 0 ? page : NULL; +} + +static int binder_page_insert(struct binder_alloc *alloc, + unsigned long addr, + struct page *page) +{ + struct mm_struct *mm = alloc->mm; + struct vm_area_struct *vma; + int ret = -ESRCH; + + /* attempt per-vma lock first */ + vma = lock_vma_under_rcu(mm, addr); + if (vma) { + if (binder_alloc_is_mapped(alloc)) + ret = vm_insert_page(vma, addr, page); + vma_end_read(vma); + return ret; + } + + /* fall back to mmap_lock */ + mmap_read_lock(mm); + vma = vma_lookup(mm, addr); + if (vma && binder_alloc_is_mapped(alloc)) + ret = vm_insert_page(vma, addr, page); + mmap_read_unlock(mm); + + return ret; +} + +static struct page *binder_page_alloc(struct binder_alloc *alloc, + unsigned long index) +{ + struct binder_shrinker_mdata *mdata; + struct page *page; + + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (!page) + return NULL; + + /* allocate and install shrinker metadata under page->private */ + mdata = kzalloc(sizeof(*mdata), GFP_KERNEL); + if (!mdata) { + __free_page(page); + return NULL; + } + + mdata->alloc = alloc; + mdata->page_index = index; + INIT_LIST_HEAD(&mdata->lru); + set_page_private(page, (unsigned long)mdata); + + return page; +} + +static void binder_free_page(struct page *page) +{ + kfree((struct binder_shrinker_mdata *)page_private(page)); + __free_page(page); +} + +static int binder_install_single_page(struct binder_alloc *alloc, + unsigned long index, + unsigned long addr) +{ + struct page *page; + int ret; + + if (!mmget_not_zero(alloc->mm)) + return -ESRCH; + + page = binder_page_alloc(alloc, index); + if (!page) { + ret = -ENOMEM; + goto out; + } + + ret = binder_page_insert(alloc, addr, page); + switch (ret) { + case -EBUSY: + /* + * EBUSY is ok. Someone installed the pte first but the + * alloc->pages[index] has not been updated yet. Discard + * our page and look up the one already installed. + */ + ret = 0; + binder_free_page(page); + page = binder_page_lookup(alloc, addr); + if (!page) { + pr_err("%d: failed to find page at offset %lx\n", + alloc->pid, addr - alloc->vm_start); + ret = -ESRCH; + break; + } + fallthrough; + case 0: + /* Mark page installation complete and safe to use */ + binder_set_installed_page(alloc, index, page); + break; + default: + binder_free_page(page); + pr_err("%d: %s failed to insert page at offset %lx with %d\n", + alloc->pid, __func__, addr - alloc->vm_start, ret); + break; + } +out: + mmput_async(alloc->mm); + return ret; +} + +static int binder_install_buffer_pages(struct binder_alloc *alloc, + struct binder_buffer *buffer, + size_t size) +{ + unsigned long start, final; + unsigned long page_addr; + + start = buffer->user_data & PAGE_MASK; + final = PAGE_ALIGN(buffer->user_data + size); + + for (page_addr = start; page_addr < final; page_addr += PAGE_SIZE) { + unsigned long index; + int ret; + + index = (page_addr - alloc->vm_start) / PAGE_SIZE; + if (binder_get_installed_page(alloc, index)) + continue; + + trace_binder_alloc_page_start(alloc, index); + + ret = binder_install_single_page(alloc, index, page_addr); + if (ret) + return ret; + + trace_binder_alloc_page_end(alloc, index); + } + + return 0; +} + +/* The range of pages should exclude those shared with other buffers */ +static void binder_lru_freelist_del(struct binder_alloc *alloc, + unsigned long start, unsigned long end) +{ + unsigned long page_addr; + struct page *page; + + trace_binder_update_page_range(alloc, true, start, end); + + for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { + unsigned long index; + bool on_lru; + + index = (page_addr - alloc->vm_start) / PAGE_SIZE; + page = binder_get_installed_page(alloc, index); + + if (page) { + trace_binder_alloc_lru_start(alloc, index); + + on_lru = list_lru_del(alloc->freelist, + page_to_lru(page), + page_to_nid(page), + NULL); + WARN_ON(!on_lru); + + trace_binder_alloc_lru_end(alloc, index); + continue; + } + + if (index + 1 > alloc->pages_high) + alloc->pages_high = index + 1; + } +} + +static void debug_no_space_locked(struct binder_alloc *alloc) +{ + size_t largest_alloc_size = 0; + struct binder_buffer *buffer; + size_t allocated_buffers = 0; + size_t largest_free_size = 0; + size_t total_alloc_size = 0; + size_t total_free_size = 0; + size_t free_buffers = 0; + size_t buffer_size; + struct rb_node *n; + + for (n = rb_first(&alloc->allocated_buffers); n; n = rb_next(n)) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + allocated_buffers++; + total_alloc_size += buffer_size; + if (buffer_size > largest_alloc_size) + largest_alloc_size = buffer_size; + } + + for (n = rb_first(&alloc->free_buffers); n; n = rb_next(n)) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + free_buffers++; + total_free_size += buffer_size; + if (buffer_size > largest_free_size) + largest_free_size = buffer_size; + } + + binder_alloc_debug(BINDER_DEBUG_USER_ERROR, + "allocated: %zd (num: %zd largest: %zd), free: %zd (num: %zd largest: %zd)\n", + total_alloc_size, allocated_buffers, + largest_alloc_size, total_free_size, + free_buffers, largest_free_size); +} + +static bool debug_low_async_space_locked(struct binder_alloc *alloc) +{ + /* + * Find the amount and size of buffers allocated by the current caller; + * The idea is that once we cross the threshold, whoever is responsible + * for the low async space is likely to try to send another async txn, + * and at some point we'll catch them in the act. This is more efficient + * than keeping a map per pid. + */ + struct binder_buffer *buffer; + size_t total_alloc_size = 0; + int pid = current->tgid; + size_t num_buffers = 0; + struct rb_node *n; + + /* + * Only start detecting spammers once we have less than 20% of async + * space left (which is less than 10% of total buffer size). + */ + if (alloc->free_async_space >= alloc->buffer_size / 10) { + alloc->oneway_spam_detected = false; + return false; + } + + for (n = rb_first(&alloc->allocated_buffers); n != NULL; + n = rb_next(n)) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + if (buffer->pid != pid) + continue; + if (!buffer->async_transaction) + continue; + total_alloc_size += binder_alloc_buffer_size(alloc, buffer); + num_buffers++; + } + + /* + * Warn if this pid has more than 50 transactions, or more than 50% of + * async space (which is 25% of total buffer size). Oneway spam is only + * detected when the threshold is exceeded. + */ + if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) { + binder_alloc_debug(BINDER_DEBUG_USER_ERROR, + "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n", + alloc->pid, pid, num_buffers, total_alloc_size); + if (!alloc->oneway_spam_detected) { + alloc->oneway_spam_detected = true; + return true; + } + } + return false; +} + +/* Callers preallocate @new_buffer, it is freed by this function if unused */ +static struct binder_buffer *binder_alloc_new_buf_locked( + struct binder_alloc *alloc, + struct binder_buffer *new_buffer, + size_t size, + int is_async) +{ + struct rb_node *n = alloc->free_buffers.rb_node; + struct rb_node *best_fit = NULL; + struct binder_buffer *buffer; + unsigned long next_used_page; + unsigned long curr_last_page; + size_t buffer_size; + + if (is_async && alloc->free_async_space < size) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd failed, no async space left\n", + alloc->pid, size); + buffer = ERR_PTR(-ENOSPC); + goto out; + } + + while (n) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + BUG_ON(!buffer->free); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + if (size < buffer_size) { + best_fit = n; + n = n->rb_left; + } else if (size > buffer_size) { + n = n->rb_right; + } else { + best_fit = n; + break; + } + } + + if (unlikely(!best_fit)) { + binder_alloc_debug(BINDER_DEBUG_USER_ERROR, + "%d: binder_alloc_buf size %zd failed, no address space\n", + alloc->pid, size); + debug_no_space_locked(alloc); + buffer = ERR_PTR(-ENOSPC); + goto out; + } + + if (buffer_size != size) { + /* Found an oversized buffer and needs to be split */ + buffer = rb_entry(best_fit, struct binder_buffer, rb_node); + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + WARN_ON(n || buffer_size == size); + new_buffer->user_data = buffer->user_data + size; + list_add(&new_buffer->entry, &buffer->entry); + new_buffer->free = 1; + binder_insert_free_buffer(alloc, new_buffer); + new_buffer = NULL; + } + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n", + alloc->pid, size, buffer, buffer_size); + + /* + * Now we remove the pages from the freelist. A clever calculation + * with buffer_size determines if the last page is shared with an + * adjacent in-use buffer. In such case, the page has been already + * removed from the freelist so we trim our range short. + */ + next_used_page = (buffer->user_data + buffer_size) & PAGE_MASK; + curr_last_page = PAGE_ALIGN(buffer->user_data + size); + binder_lru_freelist_del(alloc, PAGE_ALIGN(buffer->user_data), + min(next_used_page, curr_last_page)); + + rb_erase(&buffer->rb_node, &alloc->free_buffers); + buffer->free = 0; + buffer->allow_user_free = 0; + binder_insert_allocated_buffer_locked(alloc, buffer); + buffer->async_transaction = is_async; + buffer->oneway_spam_suspect = false; + if (is_async) { + alloc->free_async_space -= size; + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, + "%d: binder_alloc_buf size %zd async free %zd\n", + alloc->pid, size, alloc->free_async_space); + if (debug_low_async_space_locked(alloc)) + buffer->oneway_spam_suspect = true; + } + +out: + /* Discard possibly unused new_buffer */ + kfree(new_buffer); + return buffer; +} + +/* Calculate the sanitized total size, returns 0 for invalid request */ +static inline size_t sanitized_size(size_t data_size, + size_t offsets_size, + size_t extra_buffers_size) +{ + size_t total, tmp; + + /* Align to pointer size and check for overflows */ + tmp = ALIGN(data_size, sizeof(void *)) + + ALIGN(offsets_size, sizeof(void *)); + if (tmp < data_size || tmp < offsets_size) + return 0; + total = tmp + ALIGN(extra_buffers_size, sizeof(void *)); + if (total < tmp || total < extra_buffers_size) + return 0; + + /* Pad 0-sized buffers so they get a unique address */ + total = max(total, sizeof(void *)); + + return total; +} + +/** + * binder_alloc_new_buf() - Allocate a new binder buffer + * @alloc: binder_alloc for this proc + * @data_size: size of user data buffer + * @offsets_size: user specified buffer offset + * @extra_buffers_size: size of extra space for meta-data (eg, security context) + * @is_async: buffer for async transaction + * + * Allocate a new buffer given the requested sizes. Returns + * the kernel version of the buffer pointer. The size allocated + * is the sum of the three given sizes (each rounded up to + * pointer-sized boundary) + * + * Return: The allocated buffer or %ERR_PTR(-errno) if error + */ +struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async) +{ + struct binder_buffer *buffer, *next; + size_t size; + int ret; + + /* Check binder_alloc is fully initialized */ + if (!binder_alloc_is_mapped(alloc)) { + binder_alloc_debug(BINDER_DEBUG_USER_ERROR, + "%d: binder_alloc_buf, no vma\n", + alloc->pid); + return ERR_PTR(-ESRCH); + } + + size = sanitized_size(data_size, offsets_size, extra_buffers_size); + if (unlikely(!size)) { + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: got transaction with invalid size %zd-%zd-%zd\n", + alloc->pid, data_size, offsets_size, + extra_buffers_size); + return ERR_PTR(-EINVAL); + } + + /* Preallocate the next buffer */ + next = kzalloc(sizeof(*next), GFP_KERNEL); + if (!next) + return ERR_PTR(-ENOMEM); + + mutex_lock(&alloc->mutex); + buffer = binder_alloc_new_buf_locked(alloc, next, size, is_async); + if (IS_ERR(buffer)) { + mutex_unlock(&alloc->mutex); + goto out; + } + + buffer->data_size = data_size; + buffer->offsets_size = offsets_size; + buffer->extra_buffers_size = extra_buffers_size; + buffer->pid = current->tgid; + mutex_unlock(&alloc->mutex); + + ret = binder_install_buffer_pages(alloc, buffer, size); + if (ret) { + binder_alloc_free_buf(alloc, buffer); + buffer = ERR_PTR(ret); + } +out: + return buffer; +} +EXPORT_SYMBOL_IF_KUNIT(binder_alloc_new_buf); + +static unsigned long buffer_start_page(struct binder_buffer *buffer) +{ + return buffer->user_data & PAGE_MASK; +} + +static unsigned long prev_buffer_end_page(struct binder_buffer *buffer) +{ + return (buffer->user_data - 1) & PAGE_MASK; +} + +static void binder_delete_free_buffer(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + struct binder_buffer *prev, *next; + + if (PAGE_ALIGNED(buffer->user_data)) + goto skip_freelist; + + BUG_ON(alloc->buffers.next == &buffer->entry); + prev = binder_buffer_prev(buffer); + BUG_ON(!prev->free); + if (prev_buffer_end_page(prev) == buffer_start_page(buffer)) + goto skip_freelist; + + if (!list_is_last(&buffer->entry, &alloc->buffers)) { + next = binder_buffer_next(buffer); + if (buffer_start_page(next) == buffer_start_page(buffer)) + goto skip_freelist; + } + + binder_lru_freelist_add(alloc, buffer_start_page(buffer), + buffer_start_page(buffer) + PAGE_SIZE); +skip_freelist: + list_del(&buffer->entry); + kfree(buffer); +} + +static void binder_free_buf_locked(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + size_t size, buffer_size; + + buffer_size = binder_alloc_buffer_size(alloc, buffer); + + size = ALIGN(buffer->data_size, sizeof(void *)) + + ALIGN(buffer->offsets_size, sizeof(void *)) + + ALIGN(buffer->extra_buffers_size, sizeof(void *)); + + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%d: binder_free_buf %pK size %zd buffer_size %zd\n", + alloc->pid, buffer, size, buffer_size); + + BUG_ON(buffer->free); + BUG_ON(size > buffer_size); + BUG_ON(buffer->transaction != NULL); + BUG_ON(buffer->user_data < alloc->vm_start); + BUG_ON(buffer->user_data > alloc->vm_start + alloc->buffer_size); + + if (buffer->async_transaction) { + alloc->free_async_space += buffer_size; + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, + "%d: binder_free_buf size %zd async free %zd\n", + alloc->pid, size, alloc->free_async_space); + } + + binder_lru_freelist_add(alloc, PAGE_ALIGN(buffer->user_data), + (buffer->user_data + buffer_size) & PAGE_MASK); + + rb_erase(&buffer->rb_node, &alloc->allocated_buffers); + buffer->free = 1; + if (!list_is_last(&buffer->entry, &alloc->buffers)) { + struct binder_buffer *next = binder_buffer_next(buffer); + + if (next->free) { + rb_erase(&next->rb_node, &alloc->free_buffers); + binder_delete_free_buffer(alloc, next); + } + } + if (alloc->buffers.next != &buffer->entry) { + struct binder_buffer *prev = binder_buffer_prev(buffer); + + if (prev->free) { + binder_delete_free_buffer(alloc, buffer); + rb_erase(&prev->rb_node, &alloc->free_buffers); + buffer = prev; + } + } + binder_insert_free_buffer(alloc, buffer); +} + +/** + * binder_alloc_get_page() - get kernel pointer for given buffer offset + * @alloc: binder_alloc for this proc + * @buffer: binder buffer to be accessed + * @buffer_offset: offset into @buffer data + * @pgoffp: address to copy final page offset to + * + * Lookup the struct page corresponding to the address + * at @buffer_offset into @buffer->user_data. If @pgoffp is not + * NULL, the byte-offset into the page is written there. + * + * The caller is responsible to ensure that the offset points + * to a valid address within the @buffer and that @buffer is + * not freeable by the user. Since it can't be freed, we are + * guaranteed that the corresponding elements of @alloc->pages[] + * cannot change. + * + * Return: struct page + */ +static struct page *binder_alloc_get_page(struct binder_alloc *alloc, + struct binder_buffer *buffer, + binder_size_t buffer_offset, + pgoff_t *pgoffp) +{ + binder_size_t buffer_space_offset = buffer_offset + + (buffer->user_data - alloc->vm_start); + pgoff_t pgoff = buffer_space_offset & ~PAGE_MASK; + size_t index = buffer_space_offset >> PAGE_SHIFT; + + *pgoffp = pgoff; + + return alloc->pages[index]; +} + +/** + * binder_alloc_clear_buf() - zero out buffer + * @alloc: binder_alloc for this proc + * @buffer: binder buffer to be cleared + * + * memset the given buffer to 0 + */ +static void binder_alloc_clear_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + size_t bytes = binder_alloc_buffer_size(alloc, buffer); + binder_size_t buffer_offset = 0; + + while (bytes) { + unsigned long size; + struct page *page; + pgoff_t pgoff; + + page = binder_alloc_get_page(alloc, buffer, + buffer_offset, &pgoff); + size = min_t(size_t, bytes, PAGE_SIZE - pgoff); + memset_page(page, pgoff, 0, size); + bytes -= size; + buffer_offset += size; + } +} + +/** + * binder_alloc_free_buf() - free a binder buffer + * @alloc: binder_alloc for this proc + * @buffer: kernel pointer to buffer + * + * Free the buffer allocated via binder_alloc_new_buf() + */ +void binder_alloc_free_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + /* + * We could eliminate the call to binder_alloc_clear_buf() + * from binder_alloc_deferred_release() by moving this to + * binder_free_buf_locked(). However, that could + * increase contention for the alloc mutex if clear_on_free + * is used frequently for large buffers. The mutex is not + * needed for correctness here. + */ + if (buffer->clear_on_free) { + binder_alloc_clear_buf(alloc, buffer); + buffer->clear_on_free = false; + } + mutex_lock(&alloc->mutex); + binder_free_buf_locked(alloc, buffer); + mutex_unlock(&alloc->mutex); +} +EXPORT_SYMBOL_IF_KUNIT(binder_alloc_free_buf); + +/** + * binder_alloc_mmap_handler() - map virtual address space for proc + * @alloc: alloc structure for this proc + * @vma: vma passed to mmap() + * + * Called by binder_mmap() to initialize the space specified in + * vma for allocating binder buffers + * + * Return: + * 0 = success + * -EBUSY = address space already mapped + * -ENOMEM = failed to map memory to given address space + */ +int binder_alloc_mmap_handler(struct binder_alloc *alloc, + struct vm_area_struct *vma) +{ + struct binder_buffer *buffer; + const char *failure_string; + int ret; + + if (unlikely(vma->vm_mm != alloc->mm)) { + ret = -EINVAL; + failure_string = "invalid vma->vm_mm"; + goto err_invalid_mm; + } + + mutex_lock(&binder_alloc_mmap_lock); + if (alloc->buffer_size) { + ret = -EBUSY; + failure_string = "already mapped"; + goto err_already_mapped; + } + alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start, + SZ_4M); + mutex_unlock(&binder_alloc_mmap_lock); + + alloc->vm_start = vma->vm_start; + + alloc->pages = kvcalloc(alloc->buffer_size / PAGE_SIZE, + sizeof(alloc->pages[0]), + GFP_KERNEL); + if (!alloc->pages) { + ret = -ENOMEM; + failure_string = "alloc page array"; + goto err_alloc_pages_failed; + } + + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + if (!buffer) { + ret = -ENOMEM; + failure_string = "alloc buffer struct"; + goto err_alloc_buf_struct_failed; + } + + buffer->user_data = alloc->vm_start; + list_add(&buffer->entry, &alloc->buffers); + buffer->free = 1; + binder_insert_free_buffer(alloc, buffer); + alloc->free_async_space = alloc->buffer_size / 2; + + /* Signal binder_alloc is fully initialized */ + binder_alloc_set_mapped(alloc, true); + + return 0; + +err_alloc_buf_struct_failed: + kvfree(alloc->pages); + alloc->pages = NULL; +err_alloc_pages_failed: + alloc->vm_start = 0; + mutex_lock(&binder_alloc_mmap_lock); + alloc->buffer_size = 0; +err_already_mapped: + mutex_unlock(&binder_alloc_mmap_lock); +err_invalid_mm: + binder_alloc_debug(BINDER_DEBUG_USER_ERROR, + "%s: %d %lx-%lx %s failed %d\n", __func__, + alloc->pid, vma->vm_start, vma->vm_end, + failure_string, ret); + return ret; +} +EXPORT_SYMBOL_IF_KUNIT(binder_alloc_mmap_handler); + +void binder_alloc_deferred_release(struct binder_alloc *alloc) +{ + struct rb_node *n; + int buffers, page_count; + struct binder_buffer *buffer; + + buffers = 0; + mutex_lock(&alloc->mutex); + BUG_ON(alloc->mapped); + + while ((n = rb_first(&alloc->allocated_buffers))) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + + /* Transaction should already have been freed */ + BUG_ON(buffer->transaction); + + if (buffer->clear_on_free) { + binder_alloc_clear_buf(alloc, buffer); + buffer->clear_on_free = false; + } + binder_free_buf_locked(alloc, buffer); + buffers++; + } + + while (!list_empty(&alloc->buffers)) { + buffer = list_first_entry(&alloc->buffers, + struct binder_buffer, entry); + WARN_ON(!buffer->free); + + list_del(&buffer->entry); + WARN_ON_ONCE(!list_empty(&alloc->buffers)); + kfree(buffer); + } + + page_count = 0; + if (alloc->pages) { + int i; + + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + struct page *page; + bool on_lru; + + page = binder_get_installed_page(alloc, i); + if (!page) + continue; + + on_lru = list_lru_del(alloc->freelist, + page_to_lru(page), + page_to_nid(page), + NULL); + binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, + "%s: %d: page %d %s\n", + __func__, alloc->pid, i, + on_lru ? "on lru" : "active"); + binder_free_page(page); + page_count++; + } + } + mutex_unlock(&alloc->mutex); + kvfree(alloc->pages); + if (alloc->mm) + mmdrop(alloc->mm); + + binder_alloc_debug(BINDER_DEBUG_OPEN_CLOSE, + "%s: %d buffers %d, pages %d\n", + __func__, alloc->pid, buffers, page_count); +} +EXPORT_SYMBOL_IF_KUNIT(binder_alloc_deferred_release); + +/** + * binder_alloc_print_allocated() - print buffer info + * @m: seq_file for output via seq_printf() + * @alloc: binder_alloc for this proc + * + * Prints information about every buffer associated with + * the binder_alloc state to the given seq_file + */ +void binder_alloc_print_allocated(struct seq_file *m, + struct binder_alloc *alloc) +{ + struct binder_buffer *buffer; + struct rb_node *n; + + guard(mutex)(&alloc->mutex); + for (n = rb_first(&alloc->allocated_buffers); n; n = rb_next(n)) { + buffer = rb_entry(n, struct binder_buffer, rb_node); + seq_printf(m, " buffer %d: %lx size %zd:%zd:%zd %s\n", + buffer->debug_id, + buffer->user_data - alloc->vm_start, + buffer->data_size, buffer->offsets_size, + buffer->extra_buffers_size, + buffer->transaction ? "active" : "delivered"); + } +} + +/** + * binder_alloc_print_pages() - print page usage + * @m: seq_file for output via seq_printf() + * @alloc: binder_alloc for this proc + */ +void binder_alloc_print_pages(struct seq_file *m, + struct binder_alloc *alloc) +{ + struct page *page; + int i; + int active = 0; + int lru = 0; + int free = 0; + + mutex_lock(&alloc->mutex); + /* + * Make sure the binder_alloc is fully initialized, otherwise we might + * read inconsistent state. + */ + if (binder_alloc_is_mapped(alloc)) { + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { + page = binder_get_installed_page(alloc, i); + if (!page) + free++; + else if (list_empty(page_to_lru(page))) + active++; + else + lru++; + } + } + mutex_unlock(&alloc->mutex); + seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); + seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); +} + +/** + * binder_alloc_get_allocated_count() - return count of buffers + * @alloc: binder_alloc for this proc + * + * Return: count of allocated buffers + */ +int binder_alloc_get_allocated_count(struct binder_alloc *alloc) +{ + struct rb_node *n; + int count = 0; + + guard(mutex)(&alloc->mutex); + for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n)) + count++; + return count; +} + + +/** + * binder_alloc_vma_close() - invalidate address space + * @alloc: binder_alloc for this proc + * + * Called from binder_vma_close() when releasing address space. + * Clears alloc->mapped to prevent new incoming transactions from + * allocating more buffers. + */ +void binder_alloc_vma_close(struct binder_alloc *alloc) +{ + binder_alloc_set_mapped(alloc, false); +} +EXPORT_SYMBOL_IF_KUNIT(binder_alloc_vma_close); + +/** + * binder_alloc_free_page() - shrinker callback to free pages + * @item: item to free + * @lru: list_lru instance of the item + * @cb_arg: callback argument + * + * Called from list_lru_walk() in binder_shrink_scan() to free + * up pages when the system is under memory pressure. + */ +enum lru_status binder_alloc_free_page(struct list_head *item, + struct list_lru_one *lru, + void *cb_arg) + __must_hold(&lru->lock) +{ + struct binder_shrinker_mdata *mdata = container_of(item, typeof(*mdata), lru); + struct binder_alloc *alloc = mdata->alloc; + struct mm_struct *mm = alloc->mm; + struct vm_area_struct *vma; + struct page *page_to_free; + unsigned long page_addr; + int mm_locked = 0; + size_t index; + + if (!mmget_not_zero(mm)) + goto err_mmget; + + index = mdata->page_index; + page_addr = alloc->vm_start + index * PAGE_SIZE; + + /* attempt per-vma lock first */ + vma = lock_vma_under_rcu(mm, page_addr); + if (!vma) { + /* fall back to mmap_lock */ + if (!mmap_read_trylock(mm)) + goto err_mmap_read_lock_failed; + mm_locked = 1; + vma = vma_lookup(mm, page_addr); + } + + if (!mutex_trylock(&alloc->mutex)) + goto err_get_alloc_mutex_failed; + + /* + * Since a binder_alloc can only be mapped once, we ensure + * the vma corresponds to this mapping by checking whether + * the binder_alloc is still mapped. + */ + if (vma && !binder_alloc_is_mapped(alloc)) + goto err_invalid_vma; + + trace_binder_unmap_kernel_start(alloc, index); + + page_to_free = alloc->pages[index]; + binder_set_installed_page(alloc, index, NULL); + + trace_binder_unmap_kernel_end(alloc, index); + + list_lru_isolate(lru, item); + spin_unlock(&lru->lock); + + if (vma) { + trace_binder_unmap_user_start(alloc, index); + + zap_page_range_single(vma, page_addr, PAGE_SIZE, NULL); + + trace_binder_unmap_user_end(alloc, index); + } + + mutex_unlock(&alloc->mutex); + if (mm_locked) + mmap_read_unlock(mm); + else + vma_end_read(vma); + mmput_async(mm); + binder_free_page(page_to_free); + + return LRU_REMOVED_RETRY; + +err_invalid_vma: + mutex_unlock(&alloc->mutex); +err_get_alloc_mutex_failed: + if (mm_locked) + mmap_read_unlock(mm); + else + vma_end_read(vma); +err_mmap_read_lock_failed: + mmput_async(mm); +err_mmget: + return LRU_SKIP; +} +EXPORT_SYMBOL_IF_KUNIT(binder_alloc_free_page); + +static unsigned long +binder_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + return list_lru_count(&binder_freelist); +} + +static unsigned long +binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + return list_lru_walk(&binder_freelist, binder_alloc_free_page, + NULL, sc->nr_to_scan); +} + +static struct shrinker *binder_shrinker; + +VISIBLE_IF_KUNIT void __binder_alloc_init(struct binder_alloc *alloc, + struct list_lru *freelist) +{ + alloc->pid = current->group_leader->pid; + alloc->mm = current->mm; + mmgrab(alloc->mm); + mutex_init(&alloc->mutex); + INIT_LIST_HEAD(&alloc->buffers); + alloc->freelist = freelist; +} +EXPORT_SYMBOL_IF_KUNIT(__binder_alloc_init); + +/** + * binder_alloc_init() - called by binder_open() for per-proc initialization + * @alloc: binder_alloc for this proc + * + * Called from binder_open() to initialize binder_alloc fields for + * new binder proc + */ +void binder_alloc_init(struct binder_alloc *alloc) +{ + __binder_alloc_init(alloc, &binder_freelist); +} + +int binder_alloc_shrinker_init(void) +{ + int ret; + + ret = list_lru_init(&binder_freelist); + if (ret) + return ret; + + binder_shrinker = shrinker_alloc(0, "android-binder"); + if (!binder_shrinker) { + list_lru_destroy(&binder_freelist); + return -ENOMEM; + } + + binder_shrinker->count_objects = binder_shrink_count; + binder_shrinker->scan_objects = binder_shrink_scan; + + shrinker_register(binder_shrinker); + + return 0; +} + +void binder_alloc_shrinker_exit(void) +{ + shrinker_free(binder_shrinker); + list_lru_destroy(&binder_freelist); +} + +/** + * check_buffer() - verify that buffer/offset is safe to access + * @alloc: binder_alloc for this proc + * @buffer: binder buffer to be accessed + * @offset: offset into @buffer data + * @bytes: bytes to access from offset + * + * Check that the @offset/@bytes are within the size of the given + * @buffer and that the buffer is currently active and not freeable. + * Offsets must also be multiples of sizeof(u32). The kernel is + * allowed to touch the buffer in two cases: + * + * 1) when the buffer is being created: + * (buffer->free == 0 && buffer->allow_user_free == 0) + * 2) when the buffer is being torn down: + * (buffer->free == 0 && buffer->transaction == NULL). + * + * Return: true if the buffer is safe to access + */ +static inline bool check_buffer(struct binder_alloc *alloc, + struct binder_buffer *buffer, + binder_size_t offset, size_t bytes) +{ + size_t buffer_size = binder_alloc_buffer_size(alloc, buffer); + + return buffer_size >= bytes && + offset <= buffer_size - bytes && + IS_ALIGNED(offset, sizeof(u32)) && + !buffer->free && + (!buffer->allow_user_free || !buffer->transaction); +} + +/** + * binder_alloc_copy_user_to_buffer() - copy src user to tgt user + * @alloc: binder_alloc for this proc + * @buffer: binder buffer to be accessed + * @buffer_offset: offset into @buffer data + * @from: userspace pointer to source buffer + * @bytes: bytes to copy + * + * Copy bytes from source userspace to target buffer. + * + * Return: bytes remaining to be copied + */ +unsigned long +binder_alloc_copy_user_to_buffer(struct binder_alloc *alloc, + struct binder_buffer *buffer, + binder_size_t buffer_offset, + const void __user *from, + size_t bytes) +{ + if (!check_buffer(alloc, buffer, buffer_offset, bytes)) + return bytes; + + while (bytes) { + unsigned long size; + unsigned long ret; + struct page *page; + pgoff_t pgoff; + void *kptr; + + page = binder_alloc_get_page(alloc, buffer, + buffer_offset, &pgoff); + size = min_t(size_t, bytes, PAGE_SIZE - pgoff); + kptr = kmap_local_page(page) + pgoff; + ret = copy_from_user(kptr, from, size); + kunmap_local(kptr); + if (ret) + return bytes - size + ret; + bytes -= size; + from += size; + buffer_offset += size; + } + return 0; +} + +static int binder_alloc_do_buffer_copy(struct binder_alloc *alloc, + bool to_buffer, + struct binder_buffer *buffer, + binder_size_t buffer_offset, + void *ptr, + size_t bytes) +{ + /* All copies must be 32-bit aligned and 32-bit size */ + if (!check_buffer(alloc, buffer, buffer_offset, bytes)) + return -EINVAL; + + while (bytes) { + unsigned long size; + struct page *page; + pgoff_t pgoff; + + page = binder_alloc_get_page(alloc, buffer, + buffer_offset, &pgoff); + size = min_t(size_t, bytes, PAGE_SIZE - pgoff); + if (to_buffer) + memcpy_to_page(page, pgoff, ptr, size); + else + memcpy_from_page(ptr, page, pgoff, size); + bytes -= size; + pgoff = 0; + ptr = ptr + size; + buffer_offset += size; + } + return 0; +} + +int binder_alloc_copy_to_buffer(struct binder_alloc *alloc, + struct binder_buffer *buffer, + binder_size_t buffer_offset, + void *src, + size_t bytes) +{ + return binder_alloc_do_buffer_copy(alloc, true, buffer, buffer_offset, + src, bytes); +} + +int binder_alloc_copy_from_buffer(struct binder_alloc *alloc, + void *dest, + struct binder_buffer *buffer, + binder_size_t buffer_offset, + size_t bytes) +{ + return binder_alloc_do_buffer_copy(alloc, false, buffer, buffer_offset, + dest, bytes); +} diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h new file mode 100644 index 000000000000..d6f1f6f2d00e --- /dev/null +++ b/drivers/android/binder_alloc.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017 Google, Inc. + */ + +#ifndef _LINUX_BINDER_ALLOC_H +#define _LINUX_BINDER_ALLOC_H + +#include <linux/rbtree.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/rtmutex.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/list_lru.h> +#include <uapi/linux/android/binder.h> + +struct binder_transaction; + +/** + * struct binder_buffer - buffer used for binder transactions + * @entry: entry alloc->buffers + * @rb_node: node for allocated_buffers/free_buffers rb trees + * @free: %true if buffer is free + * @clear_on_free: %true if buffer must be zeroed after use + * @allow_user_free: %true if user is allowed to free buffer + * @async_transaction: %true if buffer is in use for an async txn + * @oneway_spam_suspect: %true if total async allocate size just exceed + * spamming detect threshold + * @debug_id: unique ID for debugging + * @transaction: pointer to associated struct binder_transaction + * @target_node: struct binder_node associated with this buffer + * @data_size: size of @transaction data + * @offsets_size: size of array of offsets + * @extra_buffers_size: size of space for other objects (like sg lists) + * @user_data: user pointer to base of buffer space + * @pid: pid to attribute the buffer to (caller) + * + * Bookkeeping structure for binder transaction buffers + */ +struct binder_buffer { + struct list_head entry; /* free and allocated entries by address */ + struct rb_node rb_node; /* free entry by size or allocated entry */ + /* by address */ + unsigned free:1; + unsigned clear_on_free:1; + unsigned allow_user_free:1; + unsigned async_transaction:1; + unsigned oneway_spam_suspect:1; + unsigned debug_id:27; + struct binder_transaction *transaction; + struct binder_node *target_node; + size_t data_size; + size_t offsets_size; + size_t extra_buffers_size; + unsigned long user_data; + int pid; +}; + +/** + * struct binder_shrinker_mdata - binder metadata used to reclaim pages + * @lru: LRU entry in binder_freelist + * @alloc: binder_alloc owning the page to reclaim + * @page_index: offset in @alloc->pages[] into the page to reclaim + */ +struct binder_shrinker_mdata { + struct list_head lru; + struct binder_alloc *alloc; + unsigned long page_index; +}; + +static inline struct list_head *page_to_lru(struct page *p) +{ + struct binder_shrinker_mdata *mdata; + + mdata = (struct binder_shrinker_mdata *)page_private(p); + + return &mdata->lru; +} + +/** + * struct binder_alloc - per-binder proc state for binder allocator + * @mutex: protects binder_alloc fields + * @mm: copy of task->mm (invariant after open) + * @vm_start: base of per-proc address space mapped via mmap + * @buffers: list of all buffers for this proc + * @free_buffers: rb tree of buffers available for allocation + * sorted by size + * @allocated_buffers: rb tree of allocated buffers sorted by address + * @free_async_space: VA space available for async buffers. This is + * initialized at mmap time to 1/2 the full VA space + * @pages: array of struct page * + * @freelist: lru list to use for free pages (invariant after init) + * @buffer_size: size of address space specified via mmap + * @pid: pid for associated binder_proc (invariant after init) + * @pages_high: high watermark of offset in @pages + * @mapped: whether the vm area is mapped, each binder instance is + * allowed a single mapping throughout its lifetime + * @oneway_spam_detected: %true if oneway spam detection fired, clear that + * flag once the async buffer has returned to a healthy state + * + * Bookkeeping structure for per-proc address space management for binder + * buffers. It is normally initialized during binder_init() and binder_mmap() + * calls. The address space is used for both user-visible buffers and for + * struct binder_buffer objects used to track the user buffers + */ +struct binder_alloc { + struct mutex mutex; + struct mm_struct *mm; + unsigned long vm_start; + struct list_head buffers; + struct rb_root free_buffers; + struct rb_root allocated_buffers; + size_t free_async_space; + struct page **pages; + struct list_lru *freelist; + size_t buffer_size; + int pid; + size_t pages_high; + bool mapped; + bool oneway_spam_detected; +}; + +enum lru_status binder_alloc_free_page(struct list_head *item, + struct list_lru_one *lru, + void *cb_arg); +struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc, + size_t data_size, + size_t offsets_size, + size_t extra_buffers_size, + int is_async); +void binder_alloc_init(struct binder_alloc *alloc); +int binder_alloc_shrinker_init(void); +void binder_alloc_shrinker_exit(void); +void binder_alloc_vma_close(struct binder_alloc *alloc); +struct binder_buffer * +binder_alloc_prepare_to_free(struct binder_alloc *alloc, + unsigned long user_ptr); +void binder_alloc_free_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer); +int binder_alloc_mmap_handler(struct binder_alloc *alloc, + struct vm_area_struct *vma); +void binder_alloc_deferred_release(struct binder_alloc *alloc); +int binder_alloc_get_allocated_count(struct binder_alloc *alloc); +void binder_alloc_print_allocated(struct seq_file *m, + struct binder_alloc *alloc); +void binder_alloc_print_pages(struct seq_file *m, + struct binder_alloc *alloc); + +/** + * binder_alloc_get_free_async_space() - get free space available for async + * @alloc: binder_alloc for this proc + * + * Return: the bytes remaining in the address-space for async transactions + */ +static inline size_t +binder_alloc_get_free_async_space(struct binder_alloc *alloc) +{ + guard(mutex)(&alloc->mutex); + return alloc->free_async_space; +} + +unsigned long +binder_alloc_copy_user_to_buffer(struct binder_alloc *alloc, + struct binder_buffer *buffer, + binder_size_t buffer_offset, + const void __user *from, + size_t bytes); + +int binder_alloc_copy_to_buffer(struct binder_alloc *alloc, + struct binder_buffer *buffer, + binder_size_t buffer_offset, + void *src, + size_t bytes); + +int binder_alloc_copy_from_buffer(struct binder_alloc *alloc, + void *dest, + struct binder_buffer *buffer, + binder_size_t buffer_offset, + size_t bytes); + +#if IS_ENABLED(CONFIG_KUNIT) +void __binder_alloc_init(struct binder_alloc *alloc, struct list_lru *freelist); +size_t binder_alloc_buffer_size(struct binder_alloc *alloc, + struct binder_buffer *buffer); +#endif + +#endif /* _LINUX_BINDER_ALLOC_H */ + diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h new file mode 100644 index 000000000000..342574bfd28a --- /dev/null +++ b/drivers/android/binder_internal.h @@ -0,0 +1,597 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_BINDER_INTERNAL_H +#define _LINUX_BINDER_INTERNAL_H + +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <linux/mutex.h> +#include <linux/refcount.h> +#include <linux/stddef.h> +#include <linux/types.h> +#include <linux/uidgid.h> +#include <uapi/linux/android/binderfs.h> +#include "binder_alloc.h" +#include "dbitmap.h" + +struct binder_context { + struct binder_node *binder_context_mgr_node; + struct mutex context_mgr_node_lock; + kuid_t binder_context_mgr_uid; + const char *name; +}; + +/** + * struct binder_device - information about a binder device node + * @hlist: list of binder devices + * @miscdev: information about a binder character device node + * @context: binder context information + * @binderfs_inode: This is the inode of the root dentry of the super block + * belonging to a binderfs mount. + */ +struct binder_device { + struct hlist_node hlist; + struct miscdevice miscdev; + struct binder_context context; + struct inode *binderfs_inode; + refcount_t ref; +}; + +/** + * binderfs_mount_opts - mount options for binderfs + * @max: maximum number of allocatable binderfs binder devices + * @stats_mode: enable binder stats in binderfs. + */ +struct binderfs_mount_opts { + int max; + int stats_mode; +}; + +/** + * binderfs_info - information about a binderfs mount + * @ipc_ns: The ipc namespace the binderfs mount belongs to. + * @control_dentry: This records the dentry of this binderfs mount + * binder-control device. + * @root_uid: uid that needs to be used when a new binder device is + * created. + * @root_gid: gid that needs to be used when a new binder device is + * created. + * @mount_opts: The mount options in use. + * @device_count: The current number of allocated binder devices. + * @proc_log_dir: Pointer to the directory dentry containing process-specific + * logs. + */ +struct binderfs_info { + struct ipc_namespace *ipc_ns; + struct dentry *control_dentry; + kuid_t root_uid; + kgid_t root_gid; + struct binderfs_mount_opts mount_opts; + int device_count; + struct dentry *proc_log_dir; +}; + +extern const struct file_operations binder_fops; + +extern char *binder_devices_param; + +#ifdef CONFIG_ANDROID_BINDERFS +extern bool is_binderfs_device(const struct inode *inode); +extern struct dentry *binderfs_create_file(struct dentry *dir, const char *name, + const struct file_operations *fops, + void *data); +#else +static inline bool is_binderfs_device(const struct inode *inode) +{ + return false; +} +static inline struct dentry *binderfs_create_file(struct dentry *dir, + const char *name, + const struct file_operations *fops, + void *data) +{ + return NULL; +} +#endif + +#ifdef CONFIG_ANDROID_BINDERFS +extern int __init init_binderfs(void); +#else +static inline int __init init_binderfs(void) +{ + return 0; +} +#endif + +struct binder_debugfs_entry { + const char *name; + umode_t mode; + const struct file_operations *fops; + void *data; +}; + +extern const struct binder_debugfs_entry binder_debugfs_entries[]; + +#define binder_for_each_debugfs_entry(entry) \ + for ((entry) = binder_debugfs_entries; \ + (entry)->name; \ + (entry)++) + +enum binder_stat_types { + BINDER_STAT_PROC, + BINDER_STAT_THREAD, + BINDER_STAT_NODE, + BINDER_STAT_REF, + BINDER_STAT_DEATH, + BINDER_STAT_TRANSACTION, + BINDER_STAT_TRANSACTION_COMPLETE, + BINDER_STAT_FREEZE, + BINDER_STAT_COUNT +}; + +struct binder_stats { + atomic_t br[_IOC_NR(BR_CLEAR_FREEZE_NOTIFICATION_DONE) + 1]; + atomic_t bc[_IOC_NR(BC_FREEZE_NOTIFICATION_DONE) + 1]; + atomic_t obj_created[BINDER_STAT_COUNT]; + atomic_t obj_deleted[BINDER_STAT_COUNT]; +}; + +/** + * struct binder_work - work enqueued on a worklist + * @entry: node enqueued on list + * @type: type of work to be performed + * + * There are separate work lists for proc, thread, and node (async). + */ +struct binder_work { + struct list_head entry; + + enum binder_work_type { + BINDER_WORK_TRANSACTION = 1, + BINDER_WORK_TRANSACTION_COMPLETE, + BINDER_WORK_TRANSACTION_PENDING, + BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT, + BINDER_WORK_RETURN_ERROR, + BINDER_WORK_NODE, + BINDER_WORK_DEAD_BINDER, + BINDER_WORK_DEAD_BINDER_AND_CLEAR, + BINDER_WORK_CLEAR_DEATH_NOTIFICATION, + BINDER_WORK_FROZEN_BINDER, + BINDER_WORK_CLEAR_FREEZE_NOTIFICATION, + } type; +}; + +struct binder_error { + struct binder_work work; + uint32_t cmd; +}; + +/** + * struct binder_node - binder node bookkeeping + * @debug_id: unique ID for debugging + * (invariant after initialized) + * @lock: lock for node fields + * @work: worklist element for node work + * (protected by @proc->inner_lock) + * @rb_node: element for proc->nodes tree + * (protected by @proc->inner_lock) + * @dead_node: element for binder_dead_nodes list + * (protected by binder_dead_nodes_lock) + * @proc: binder_proc that owns this node + * (invariant after initialized) + * @refs: list of references on this node + * (protected by @lock) + * @internal_strong_refs: used to take strong references when + * initiating a transaction + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @local_weak_refs: weak user refs from local process + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @local_strong_refs: strong user refs from local process + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @tmp_refs: temporary kernel refs + * (protected by @proc->inner_lock while @proc + * is valid, and by binder_dead_nodes_lock + * if @proc is NULL. During inc/dec and node release + * it is also protected by @lock to provide safety + * as the node dies and @proc becomes NULL) + * @ptr: userspace pointer for node + * (invariant, no lock needed) + * @cookie: userspace cookie for node + * (invariant, no lock needed) + * @has_strong_ref: userspace notified of strong ref + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @pending_strong_ref: userspace has acked notification of strong ref + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @has_weak_ref: userspace notified of weak ref + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @pending_weak_ref: userspace has acked notification of weak ref + * (protected by @proc->inner_lock if @proc + * and by @lock) + * @has_async_transaction: async transaction to node in progress + * (protected by @lock) + * @accept_fds: file descriptor operations supported for node + * (invariant after initialized) + * @min_priority: minimum scheduling priority + * (invariant after initialized) + * @txn_security_ctx: require sender's security context + * (invariant after initialized) + * @async_todo: list of async work items + * (protected by @proc->inner_lock) + * + * Bookkeeping structure for binder nodes. + */ +struct binder_node { + int debug_id; + spinlock_t lock; + struct binder_work work; + union { + struct rb_node rb_node; + struct hlist_node dead_node; + }; + struct binder_proc *proc; + struct hlist_head refs; + int internal_strong_refs; + int local_weak_refs; + int local_strong_refs; + int tmp_refs; + binder_uintptr_t ptr; + binder_uintptr_t cookie; + struct { + /* + * bitfield elements protected by + * proc inner_lock + */ + u8 has_strong_ref:1; + u8 pending_strong_ref:1; + u8 has_weak_ref:1; + u8 pending_weak_ref:1; + }; + struct { + /* + * invariant after initialization + */ + u8 accept_fds:1; + u8 txn_security_ctx:1; + u8 min_priority; + }; + bool has_async_transaction; + struct list_head async_todo; +}; + +struct binder_ref_death { + /** + * @work: worklist element for death notifications + * (protected by inner_lock of the proc that + * this ref belongs to) + */ + struct binder_work work; + binder_uintptr_t cookie; +}; + +struct binder_ref_freeze { + struct binder_work work; + binder_uintptr_t cookie; + bool is_frozen:1; + bool sent:1; + bool resend:1; +}; + +/** + * struct binder_ref_data - binder_ref counts and id + * @debug_id: unique ID for the ref + * @desc: unique userspace handle for ref + * @strong: strong ref count (debugging only if not locked) + * @weak: weak ref count (debugging only if not locked) + * + * Structure to hold ref count and ref id information. Since + * the actual ref can only be accessed with a lock, this structure + * is used to return information about the ref to callers of + * ref inc/dec functions. + */ +struct binder_ref_data { + int debug_id; + uint32_t desc; + int strong; + int weak; +}; + +/** + * struct binder_ref - struct to track references on nodes + * @data: binder_ref_data containing id, handle, and current refcounts + * @rb_node_desc: node for lookup by @data.desc in proc's rb_tree + * @rb_node_node: node for lookup by @node in proc's rb_tree + * @node_entry: list entry for node->refs list in target node + * (protected by @node->lock) + * @proc: binder_proc containing ref + * @node: binder_node of target node. When cleaning up a + * ref for deletion in binder_cleanup_ref, a non-NULL + * @node indicates the node must be freed + * @death: pointer to death notification (ref_death) if requested + * (protected by @node->lock) + * @freeze: pointer to freeze notification (ref_freeze) if requested + * (protected by @node->lock) + * + * Structure to track references from procA to target node (on procB). This + * structure is unsafe to access without holding @proc->outer_lock. + */ +struct binder_ref { + /* Lookups needed: */ + /* node + proc => ref (transaction) */ + /* desc + proc => ref (transaction, inc/dec ref) */ + /* node => refs + procs (proc exit) */ + struct binder_ref_data data; + struct rb_node rb_node_desc; + struct rb_node rb_node_node; + struct hlist_node node_entry; + struct binder_proc *proc; + struct binder_node *node; + struct binder_ref_death *death; + struct binder_ref_freeze *freeze; +}; + +/** + * struct binder_proc - binder process bookkeeping + * @proc_node: element for binder_procs list + * @threads: rbtree of binder_threads in this proc + * (protected by @inner_lock) + * @nodes: rbtree of binder nodes associated with + * this proc ordered by node->ptr + * (protected by @inner_lock) + * @refs_by_desc: rbtree of refs ordered by ref->desc + * (protected by @outer_lock) + * @refs_by_node: rbtree of refs ordered by ref->node + * (protected by @outer_lock) + * @waiting_threads: threads currently waiting for proc work + * (protected by @inner_lock) + * @pid PID of group_leader of process + * (invariant after initialized) + * @tsk task_struct for group_leader of process + * (invariant after initialized) + * @cred struct cred associated with the `struct file` + * in binder_open() + * (invariant after initialized) + * @deferred_work_node: element for binder_deferred_list + * (protected by binder_deferred_lock) + * @deferred_work: bitmap of deferred work to perform + * (protected by binder_deferred_lock) + * @outstanding_txns: number of transactions to be transmitted before + * processes in freeze_wait are woken up + * (protected by @inner_lock) + * @is_dead: process is dead and awaiting free + * when outstanding transactions are cleaned up + * (protected by @inner_lock) + * @is_frozen: process is frozen and unable to service + * binder transactions + * (protected by @inner_lock) + * @sync_recv: process received sync transactions since last frozen + * bit 0: received sync transaction after being frozen + * bit 1: new pending sync transaction during freezing + * (protected by @inner_lock) + * @async_recv: process received async transactions since last frozen + * (protected by @inner_lock) + * @freeze_wait: waitqueue of processes waiting for all outstanding + * transactions to be processed + * (protected by @inner_lock) + * @dmap dbitmap to manage available reference descriptors + * (protected by @outer_lock) + * @todo: list of work for this process + * (protected by @inner_lock) + * @stats: per-process binder statistics + * (atomics, no lock needed) + * @delivered_death: list of delivered death notification + * (protected by @inner_lock) + * @delivered_freeze: list of delivered freeze notification + * (protected by @inner_lock) + * @max_threads: cap on number of binder threads + * (protected by @inner_lock) + * @requested_threads: number of binder threads requested but not + * yet started. In current implementation, can + * only be 0 or 1. + * (protected by @inner_lock) + * @requested_threads_started: number binder threads started + * (protected by @inner_lock) + * @tmp_ref: temporary reference to indicate proc is in use + * (protected by @inner_lock) + * @default_priority: default scheduler priority + * (invariant after initialized) + * @debugfs_entry: debugfs node + * @alloc: binder allocator bookkeeping + * @context: binder_context for this proc + * (invariant after initialized) + * @inner_lock: can nest under outer_lock and/or node lock + * @outer_lock: no nesting under innor or node lock + * Lock order: 1) outer, 2) node, 3) inner + * @binderfs_entry: process-specific binderfs log file + * @oneway_spam_detection_enabled: process enabled oneway spam detection + * or not + * + * Bookkeeping structure for binder processes + */ +struct binder_proc { + struct hlist_node proc_node; + struct rb_root threads; + struct rb_root nodes; + struct rb_root refs_by_desc; + struct rb_root refs_by_node; + struct list_head waiting_threads; + int pid; + struct task_struct *tsk; + const struct cred *cred; + struct hlist_node deferred_work_node; + int deferred_work; + int outstanding_txns; + bool is_dead; + bool is_frozen; + bool sync_recv; + bool async_recv; + wait_queue_head_t freeze_wait; + struct dbitmap dmap; + struct list_head todo; + struct binder_stats stats; + struct list_head delivered_death; + struct list_head delivered_freeze; + u32 max_threads; + int requested_threads; + int requested_threads_started; + int tmp_ref; + long default_priority; + struct dentry *debugfs_entry; + struct binder_alloc alloc; + struct binder_context *context; + spinlock_t inner_lock; + spinlock_t outer_lock; + struct dentry *binderfs_entry; + bool oneway_spam_detection_enabled; +}; + +/** + * struct binder_thread - binder thread bookkeeping + * @proc: binder process for this thread + * (invariant after initialization) + * @rb_node: element for proc->threads rbtree + * (protected by @proc->inner_lock) + * @waiting_thread_node: element for @proc->waiting_threads list + * (protected by @proc->inner_lock) + * @pid: PID for this thread + * (invariant after initialization) + * @looper: bitmap of looping state + * (only accessed by this thread) + * @looper_needs_return: looping thread needs to exit driver + * (no lock needed) + * @transaction_stack: stack of in-progress transactions for this thread + * (protected by @proc->inner_lock) + * @todo: list of work to do for this thread + * (protected by @proc->inner_lock) + * @process_todo: whether work in @todo should be processed + * (protected by @proc->inner_lock) + * @return_error: transaction errors reported by this thread + * (only accessed by this thread) + * @reply_error: transaction errors reported by target thread + * (protected by @proc->inner_lock) + * @ee: extended error information from this thread + * (protected by @proc->inner_lock) + * @wait: wait queue for thread work + * @stats: per-thread statistics + * (atomics, no lock needed) + * @tmp_ref: temporary reference to indicate thread is in use + * (atomic since @proc->inner_lock cannot + * always be acquired) + * @is_dead: thread is dead and awaiting free + * when outstanding transactions are cleaned up + * (protected by @proc->inner_lock) + * + * Bookkeeping structure for binder threads. + */ +struct binder_thread { + struct binder_proc *proc; + struct rb_node rb_node; + struct list_head waiting_thread_node; + int pid; + int looper; /* only modified by this thread */ + bool looper_need_return; /* can be written by other thread */ + struct binder_transaction *transaction_stack; + struct list_head todo; + bool process_todo; + struct binder_error return_error; + struct binder_error reply_error; + struct binder_extended_error ee; + wait_queue_head_t wait; + struct binder_stats stats; + atomic_t tmp_ref; + bool is_dead; +}; + +/** + * struct binder_txn_fd_fixup - transaction fd fixup list element + * @fixup_entry: list entry + * @file: struct file to be associated with new fd + * @offset: offset in buffer data to this fixup + * @target_fd: fd to use by the target to install @file + * + * List element for fd fixups in a transaction. Since file + * descriptors need to be allocated in the context of the + * target process, we pass each fd to be processed in this + * struct. + */ +struct binder_txn_fd_fixup { + struct list_head fixup_entry; + struct file *file; + size_t offset; + int target_fd; +}; + +struct binder_transaction { + int debug_id; + struct binder_work work; + struct binder_thread *from; + pid_t from_pid; + pid_t from_tid; + struct binder_transaction *from_parent; + struct binder_proc *to_proc; + struct binder_thread *to_thread; + struct binder_transaction *to_parent; + unsigned is_async:1; + unsigned is_reply:1; + + struct binder_buffer *buffer; + unsigned int code; + unsigned int flags; + long priority; + long saved_priority; + kuid_t sender_euid; + ktime_t start_time; + struct list_head fd_fixups; + binder_uintptr_t security_ctx; + /** + * @lock: protects @from, @to_proc, and @to_thread + * + * @from, @to_proc, and @to_thread can be set to NULL + * during thread teardown + */ + spinlock_t lock; +}; + +/** + * struct binder_object - union of flat binder object types + * @hdr: generic object header + * @fbo: binder object (nodes and refs) + * @fdo: file descriptor object + * @bbo: binder buffer pointer + * @fdao: file descriptor array + * + * Used for type-independent object copies + */ +struct binder_object { + union { + struct binder_object_header hdr; + struct flat_binder_object fbo; + struct binder_fd_object fdo; + struct binder_buffer_object bbo; + struct binder_fd_array_object fdao; + }; +}; + +/** + * Add a binder device to binder_devices + * @device: the new binder device to add to the global list + */ +void binder_add_device(struct binder_device *device); + +/** + * Remove a binder device to binder_devices + * @device: the binder device to remove from the global list + */ +void binder_remove_device(struct binder_device *device); + +#if IS_ENABLED(CONFIG_KUNIT) +vm_fault_t binder_vm_fault(struct vm_fault *vmf); +#endif + +#endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binder_netlink.c b/drivers/android/binder_netlink.c new file mode 100644 index 000000000000..81e8432b5904 --- /dev/null +++ b/drivers/android/binder_netlink.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/binder.yaml */ +/* YNL-GEN kernel source */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include "binder_netlink.h" + +#include <uapi/linux/android/binder_netlink.h> + +/* Ops table for binder */ +static const struct genl_split_ops binder_nl_ops[] = { +}; + +static const struct genl_multicast_group binder_nl_mcgrps[] = { + [BINDER_NLGRP_REPORT] = { "report", }, +}; + +struct genl_family binder_nl_family __ro_after_init = { + .name = BINDER_FAMILY_NAME, + .version = BINDER_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = binder_nl_ops, + .n_split_ops = ARRAY_SIZE(binder_nl_ops), + .mcgrps = binder_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(binder_nl_mcgrps), +}; diff --git a/drivers/android/binder_netlink.h b/drivers/android/binder_netlink.h new file mode 100644 index 000000000000..57399942a5e3 --- /dev/null +++ b/drivers/android/binder_netlink.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/binder.yaml */ +/* YNL-GEN kernel header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ + +#ifndef _LINUX_BINDER_GEN_H +#define _LINUX_BINDER_GEN_H + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include <uapi/linux/android/binder_netlink.h> + +enum { + BINDER_NLGRP_REPORT, +}; + +extern struct genl_family binder_nl_family; + +#endif /* _LINUX_BINDER_GEN_H */ diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h new file mode 100644 index 000000000000..fa5eb61cf580 --- /dev/null +++ b/drivers/android/binder_trace.h @@ -0,0 +1,448 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Google, Inc. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM binder + +#if !defined(_BINDER_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _BINDER_TRACE_H + +#include <linux/tracepoint.h> + +struct binder_buffer; +struct binder_node; +struct binder_proc; +struct binder_alloc; +struct binder_ref_data; +struct binder_thread; +struct binder_transaction; + +TRACE_EVENT(binder_ioctl, + TP_PROTO(unsigned int cmd, unsigned long arg), + TP_ARGS(cmd, arg), + + TP_STRUCT__entry( + __field(unsigned int, cmd) + __field(unsigned long, arg) + ), + TP_fast_assign( + __entry->cmd = cmd; + __entry->arg = arg; + ), + TP_printk("cmd=0x%x arg=0x%lx", __entry->cmd, __entry->arg) +); + +DECLARE_EVENT_CLASS(binder_function_return_class, + TP_PROTO(int ret), + TP_ARGS(ret), + TP_STRUCT__entry( + __field(int, ret) + ), + TP_fast_assign( + __entry->ret = ret; + ), + TP_printk("ret=%d", __entry->ret) +); + +#define DEFINE_BINDER_FUNCTION_RETURN_EVENT(name) \ +DEFINE_EVENT(binder_function_return_class, name, \ + TP_PROTO(int ret), \ + TP_ARGS(ret)) + +DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_ioctl_done); +DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_write_done); +DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_read_done); + +TRACE_EVENT(binder_wait_for_work, + TP_PROTO(bool proc_work, bool transaction_stack, bool thread_todo), + TP_ARGS(proc_work, transaction_stack, thread_todo), + + TP_STRUCT__entry( + __field(bool, proc_work) + __field(bool, transaction_stack) + __field(bool, thread_todo) + ), + TP_fast_assign( + __entry->proc_work = proc_work; + __entry->transaction_stack = transaction_stack; + __entry->thread_todo = thread_todo; + ), + TP_printk("proc_work=%d transaction_stack=%d thread_todo=%d", + __entry->proc_work, __entry->transaction_stack, + __entry->thread_todo) +); + +TRACE_EVENT(binder_txn_latency_free, + TP_PROTO(struct binder_transaction *t, + int from_proc, int from_thread, + int to_proc, int to_thread), + TP_ARGS(t, from_proc, from_thread, to_proc, to_thread), + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, from_proc) + __field(int, from_thread) + __field(int, to_proc) + __field(int, to_thread) + __field(unsigned int, code) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->from_proc = from_proc; + __entry->from_thread = from_thread; + __entry->to_proc = to_proc; + __entry->to_thread = to_thread; + __entry->code = t->code; + __entry->flags = t->flags; + ), + TP_printk("transaction=%d from %d:%d to %d:%d flags=0x%x code=0x%x", + __entry->debug_id, __entry->from_proc, __entry->from_thread, + __entry->to_proc, __entry->to_thread, __entry->code, + __entry->flags) +); + +TRACE_EVENT(binder_transaction, + TP_PROTO(bool reply, struct binder_transaction *t, + struct binder_node *target_node), + TP_ARGS(reply, t, target_node), + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, target_node) + __field(int, to_proc) + __field(int, to_thread) + __field(int, reply) + __field(unsigned int, code) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->target_node = target_node ? target_node->debug_id : 0; + __entry->to_proc = t->to_proc->pid; + __entry->to_thread = t->to_thread ? t->to_thread->pid : 0; + __entry->reply = reply; + __entry->code = t->code; + __entry->flags = t->flags; + ), + TP_printk("transaction=%d dest_node=%d dest_proc=%d dest_thread=%d reply=%d flags=0x%x code=0x%x", + __entry->debug_id, __entry->target_node, + __entry->to_proc, __entry->to_thread, + __entry->reply, __entry->flags, __entry->code) +); + +TRACE_EVENT(binder_transaction_received, + TP_PROTO(struct binder_transaction *t), + TP_ARGS(t), + + TP_STRUCT__entry( + __field(int, debug_id) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + ), + TP_printk("transaction=%d", __entry->debug_id) +); + +TRACE_EVENT(binder_transaction_node_to_ref, + TP_PROTO(struct binder_transaction *t, struct binder_node *node, + struct binder_ref_data *rdata), + TP_ARGS(t, node, rdata), + + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, node_debug_id) + __field(binder_uintptr_t, node_ptr) + __field(int, ref_debug_id) + __field(uint32_t, ref_desc) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->node_debug_id = node->debug_id; + __entry->node_ptr = node->ptr; + __entry->ref_debug_id = rdata->debug_id; + __entry->ref_desc = rdata->desc; + ), + TP_printk("transaction=%d node=%d src_ptr=0x%016llx ==> dest_ref=%d dest_desc=%d", + __entry->debug_id, __entry->node_debug_id, + (u64)__entry->node_ptr, + __entry->ref_debug_id, __entry->ref_desc) +); + +TRACE_EVENT(binder_transaction_ref_to_node, + TP_PROTO(struct binder_transaction *t, struct binder_node *node, + struct binder_ref_data *rdata), + TP_ARGS(t, node, rdata), + + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, ref_debug_id) + __field(uint32_t, ref_desc) + __field(int, node_debug_id) + __field(binder_uintptr_t, node_ptr) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->ref_debug_id = rdata->debug_id; + __entry->ref_desc = rdata->desc; + __entry->node_debug_id = node->debug_id; + __entry->node_ptr = node->ptr; + ), + TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ptr=0x%016llx", + __entry->debug_id, __entry->node_debug_id, + __entry->ref_debug_id, __entry->ref_desc, + (u64)__entry->node_ptr) +); + +TRACE_EVENT(binder_transaction_ref_to_ref, + TP_PROTO(struct binder_transaction *t, struct binder_node *node, + struct binder_ref_data *src_ref, + struct binder_ref_data *dest_ref), + TP_ARGS(t, node, src_ref, dest_ref), + + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, node_debug_id) + __field(int, src_ref_debug_id) + __field(uint32_t, src_ref_desc) + __field(int, dest_ref_debug_id) + __field(uint32_t, dest_ref_desc) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->node_debug_id = node->debug_id; + __entry->src_ref_debug_id = src_ref->debug_id; + __entry->src_ref_desc = src_ref->desc; + __entry->dest_ref_debug_id = dest_ref->debug_id; + __entry->dest_ref_desc = dest_ref->desc; + ), + TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ref=%d dest_desc=%d", + __entry->debug_id, __entry->node_debug_id, + __entry->src_ref_debug_id, __entry->src_ref_desc, + __entry->dest_ref_debug_id, __entry->dest_ref_desc) +); + +TRACE_EVENT(binder_transaction_fd_send, + TP_PROTO(struct binder_transaction *t, int fd, size_t offset), + TP_ARGS(t, fd, offset), + + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, fd) + __field(size_t, offset) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->fd = fd; + __entry->offset = offset; + ), + TP_printk("transaction=%d src_fd=%d offset=%zu", + __entry->debug_id, __entry->fd, __entry->offset) +); + +TRACE_EVENT(binder_transaction_fd_recv, + TP_PROTO(struct binder_transaction *t, int fd, size_t offset), + TP_ARGS(t, fd, offset), + + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, fd) + __field(size_t, offset) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->fd = fd; + __entry->offset = offset; + ), + TP_printk("transaction=%d dest_fd=%d offset=%zu", + __entry->debug_id, __entry->fd, __entry->offset) +); + +DECLARE_EVENT_CLASS(binder_buffer_class, + TP_PROTO(struct binder_buffer *buf), + TP_ARGS(buf), + TP_STRUCT__entry( + __field(int, debug_id) + __field(size_t, data_size) + __field(size_t, offsets_size) + __field(size_t, extra_buffers_size) + ), + TP_fast_assign( + __entry->debug_id = buf->debug_id; + __entry->data_size = buf->data_size; + __entry->offsets_size = buf->offsets_size; + __entry->extra_buffers_size = buf->extra_buffers_size; + ), + TP_printk("transaction=%d data_size=%zd offsets_size=%zd extra_buffers_size=%zd", + __entry->debug_id, __entry->data_size, __entry->offsets_size, + __entry->extra_buffers_size) +); + +DEFINE_EVENT(binder_buffer_class, binder_transaction_alloc_buf, + TP_PROTO(struct binder_buffer *buffer), + TP_ARGS(buffer)); + +DEFINE_EVENT(binder_buffer_class, binder_transaction_buffer_release, + TP_PROTO(struct binder_buffer *buffer), + TP_ARGS(buffer)); + +DEFINE_EVENT(binder_buffer_class, binder_transaction_failed_buffer_release, + TP_PROTO(struct binder_buffer *buffer), + TP_ARGS(buffer)); + +DEFINE_EVENT(binder_buffer_class, binder_transaction_update_buffer_release, + TP_PROTO(struct binder_buffer *buffer), + TP_ARGS(buffer)); + +TRACE_EVENT(binder_update_page_range, + TP_PROTO(struct binder_alloc *alloc, bool allocate, + unsigned long start, unsigned long end), + TP_ARGS(alloc, allocate, start, end), + TP_STRUCT__entry( + __field(int, proc) + __field(bool, allocate) + __field(size_t, offset) + __field(size_t, size) + ), + TP_fast_assign( + __entry->proc = alloc->pid; + __entry->allocate = allocate; + __entry->offset = start - alloc->vm_start; + __entry->size = end - start; + ), + TP_printk("proc=%d allocate=%d offset=%zu size=%zu", + __entry->proc, __entry->allocate, + __entry->offset, __entry->size) +); + +DECLARE_EVENT_CLASS(binder_lru_page_class, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index), + TP_STRUCT__entry( + __field(int, proc) + __field(size_t, page_index) + ), + TP_fast_assign( + __entry->proc = alloc->pid; + __entry->page_index = page_index; + ), + TP_printk("proc=%d page_index=%zu", + __entry->proc, __entry->page_index) +); + +DEFINE_EVENT(binder_lru_page_class, binder_alloc_lru_start, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +DEFINE_EVENT(binder_lru_page_class, binder_alloc_lru_end, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +DEFINE_EVENT(binder_lru_page_class, binder_free_lru_start, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +DEFINE_EVENT(binder_lru_page_class, binder_free_lru_end, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +DEFINE_EVENT(binder_lru_page_class, binder_alloc_page_start, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +DEFINE_EVENT(binder_lru_page_class, binder_alloc_page_end, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +DEFINE_EVENT(binder_lru_page_class, binder_unmap_user_start, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +DEFINE_EVENT(binder_lru_page_class, binder_unmap_user_end, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +DEFINE_EVENT(binder_lru_page_class, binder_unmap_kernel_start, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +DEFINE_EVENT(binder_lru_page_class, binder_unmap_kernel_end, + TP_PROTO(const struct binder_alloc *alloc, size_t page_index), + TP_ARGS(alloc, page_index)); + +TRACE_EVENT(binder_command, + TP_PROTO(uint32_t cmd), + TP_ARGS(cmd), + TP_STRUCT__entry( + __field(uint32_t, cmd) + ), + TP_fast_assign( + __entry->cmd = cmd; + ), + TP_printk("cmd=0x%x %s", + __entry->cmd, + _IOC_NR(__entry->cmd) < ARRAY_SIZE(binder_command_strings) ? + binder_command_strings[_IOC_NR(__entry->cmd)] : + "unknown") +); + +TRACE_EVENT(binder_return, + TP_PROTO(uint32_t cmd), + TP_ARGS(cmd), + TP_STRUCT__entry( + __field(uint32_t, cmd) + ), + TP_fast_assign( + __entry->cmd = cmd; + ), + TP_printk("cmd=0x%x %s", + __entry->cmd, + _IOC_NR(__entry->cmd) < ARRAY_SIZE(binder_return_strings) ? + binder_return_strings[_IOC_NR(__entry->cmd)] : + "unknown") +); + +TRACE_EVENT(binder_netlink_report, + TP_PROTO(const char *context, + struct binder_transaction *t, + u32 data_size, + u32 error), + TP_ARGS(context, t, data_size, error), + TP_STRUCT__entry( + __field(const char *, context) + __field(u32, error) + __field(int, from_pid) + __field(int, from_tid) + __field(int, to_pid) + __field(int, to_tid) + __field(bool, is_reply) + __field(unsigned int, flags) + __field(unsigned int, code) + __field(size_t, data_size) + ), + TP_fast_assign( + __entry->context = context; + __entry->error = error; + __entry->from_pid = t->from_pid; + __entry->from_tid = t->from_tid; + __entry->to_pid = t->to_proc ? t->to_proc->pid : 0; + __entry->to_tid = t->to_thread ? t->to_thread->pid : 0; + __entry->is_reply = t->is_reply; + __entry->flags = t->flags; + __entry->code = t->code; + __entry->data_size = data_size; + ), + TP_printk("from %d:%d to %d:%d context=%s error=%d is_reply=%d flags=0x%x code=0x%x size=%zu", + __entry->from_pid, __entry->from_tid, + __entry->to_pid, __entry->to_tid, + __entry->context, __entry->error, __entry->is_reply, + __entry->flags, __entry->code, __entry->data_size) +); + +#endif /* _BINDER_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE binder_trace +#include <trace/define_trace.h> diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c new file mode 100644 index 000000000000..b46bcb91072d --- /dev/null +++ b/drivers/android/binderfs.c @@ -0,0 +1,786 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/compiler_types.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/fsnotify.h> +#include <linux/gfp.h> +#include <linux/idr.h> +#include <linux/init.h> +#include <linux/ipc_namespace.h> +#include <linux/kdev_t.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/namei.h> +#include <linux/magic.h> +#include <linux/major.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/mount.h> +#include <linux/fs_parser.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/spinlock_types.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/user_namespace.h> +#include <linux/xarray.h> +#include <uapi/linux/android/binder.h> +#include <uapi/linux/android/binderfs.h> + +#include "binder_internal.h" + +#define FIRST_INODE 1 +#define SECOND_INODE 2 +#define INODE_OFFSET 3 +#define BINDERFS_MAX_MINOR (1U << MINORBITS) +/* Ensure that the initial ipc namespace always has devices available. */ +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) + +static dev_t binderfs_dev; +static DEFINE_MUTEX(binderfs_minors_mutex); +static DEFINE_IDA(binderfs_minors); + +enum binderfs_param { + Opt_max, + Opt_stats_mode, +}; + +enum binderfs_stats_mode { + binderfs_stats_mode_unset, + binderfs_stats_mode_global, +}; + +struct binder_features { + bool oneway_spam_detection; + bool extended_error; + bool freeze_notification; + bool transaction_report; +}; + +static const struct constant_table binderfs_param_stats[] = { + { "global", binderfs_stats_mode_global }, + {} +}; + +static const struct fs_parameter_spec binderfs_fs_parameters[] = { + fsparam_u32("max", Opt_max), + fsparam_enum("stats", Opt_stats_mode, binderfs_param_stats), + {} +}; + +static struct binder_features binder_features = { + .oneway_spam_detection = true, + .extended_error = true, + .freeze_notification = true, + .transaction_report = true, +}; + +static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb) +{ + return sb->s_fs_info; +} + +bool is_binderfs_device(const struct inode *inode) +{ + if (inode->i_sb->s_magic == BINDERFS_SUPER_MAGIC) + return true; + + return false; +} + +/** + * binderfs_binder_device_create - allocate inode from super block of a + * binderfs mount + * @ref_inode: inode from which the super block will be taken + * @userp: buffer to copy information about new device for userspace to + * @req: struct binderfs_device as copied from userspace + * + * This function allocates a new binder_device and reserves a new minor + * number for it. + * Minor numbers are limited and tracked globally in binderfs_minors. The + * function will stash a struct binder_device for the specific binder + * device in i_private of the inode. + * It will go on to allocate a new inode from the super block of the + * filesystem mount, stash a struct binder_device in its i_private field + * and attach a dentry to that inode. + * + * Return: 0 on success, negative errno on failure + */ +static int binderfs_binder_device_create(struct inode *ref_inode, + struct binderfs_device __user *userp, + struct binderfs_device *req) +{ + int minor, ret; + struct dentry *dentry, *root; + struct binder_device *device; + char *name = NULL; + struct inode *inode = NULL; + struct super_block *sb = ref_inode->i_sb; + struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif + + /* Reserve new minor number for the new device. */ + mutex_lock(&binderfs_minors_mutex); + if (++info->device_count <= info->mount_opts.max) + minor = ida_alloc_max(&binderfs_minors, + use_reserve ? BINDERFS_MAX_MINOR : + BINDERFS_MAX_MINOR_CAPPED, + GFP_KERNEL); + else + minor = -ENOSPC; + if (minor < 0) { + --info->device_count; + mutex_unlock(&binderfs_minors_mutex); + return minor; + } + mutex_unlock(&binderfs_minors_mutex); + + ret = -ENOMEM; + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + goto err; + + inode = new_inode(sb); + if (!inode) + goto err; + + inode->i_ino = minor + INODE_OFFSET; + simple_inode_init_ts(inode); + init_special_inode(inode, S_IFCHR | 0600, + MKDEV(MAJOR(binderfs_dev), minor)); + inode->i_fop = &binder_fops; + inode->i_uid = info->root_uid; + inode->i_gid = info->root_gid; + + req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */ + name = kstrdup(req->name, GFP_KERNEL); + if (!name) + goto err; + + refcount_set(&device->ref, 1); + device->binderfs_inode = inode; + device->context.binder_context_mgr_uid = INVALID_UID; + device->context.name = name; + device->miscdev.name = name; + device->miscdev.minor = minor; + mutex_init(&device->context.context_mgr_node_lock); + + req->major = MAJOR(binderfs_dev); + req->minor = minor; + + if (userp && copy_to_user(userp, req, sizeof(*req))) { + ret = -EFAULT; + goto err; + } + + root = sb->s_root; + dentry = simple_start_creating(root, name); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto err; + } + inode->i_private = device; + d_make_persistent(dentry, inode); + fsnotify_create(root->d_inode, dentry); + simple_done_creating(dentry); + + binder_add_device(device); + + return 0; + +err: + kfree(name); + kfree(device); + mutex_lock(&binderfs_minors_mutex); + --info->device_count; + ida_free(&binderfs_minors, minor); + mutex_unlock(&binderfs_minors_mutex); + iput(inode); + + return ret; +} + +/** + * binder_ctl_ioctl - handle binder device node allocation requests + * @file: The file pointer for the binder-control device node. + * @cmd: The ioctl command. + * @arg: The ioctl argument. + * + * The request handler for the binder-control device. All requests operate on + * the binderfs mount the binder-control device resides in: + * - BINDER_CTL_ADD + * Allocate a new binder device. + * + * Return: %0 on success, negative errno on failure. + */ +static long binder_ctl_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret = -EINVAL; + struct inode *inode = file_inode(file); + struct binderfs_device __user *device = (struct binderfs_device __user *)arg; + struct binderfs_device device_req; + + switch (cmd) { + case BINDER_CTL_ADD: + ret = copy_from_user(&device_req, device, sizeof(device_req)); + if (ret) { + ret = -EFAULT; + break; + } + + ret = binderfs_binder_device_create(inode, device, &device_req); + break; + default: + break; + } + + return ret; +} + +static void binderfs_evict_inode(struct inode *inode) +{ + struct binder_device *device = inode->i_private; + struct binderfs_info *info = BINDERFS_SB(inode->i_sb); + + clear_inode(inode); + + if (!S_ISCHR(inode->i_mode) || !device) + return; + + mutex_lock(&binderfs_minors_mutex); + --info->device_count; + ida_free(&binderfs_minors, device->miscdev.minor); + mutex_unlock(&binderfs_minors_mutex); + + if (refcount_dec_and_test(&device->ref)) { + binder_remove_device(device); + kfree(device->context.name); + kfree(device); + } +} + +static int binderfs_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + int opt; + struct binderfs_mount_opts *ctx = fc->fs_private; + struct fs_parse_result result; + + opt = fs_parse(fc, binderfs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_max: + if (result.uint_32 > BINDERFS_MAX_MINOR) + return invalfc(fc, "Bad value for '%s'", param->key); + + ctx->max = result.uint_32; + break; + case Opt_stats_mode: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ctx->stats_mode = result.uint_32; + break; + default: + return invalfc(fc, "Unsupported parameter '%s'", param->key); + } + + return 0; +} + +static int binderfs_fs_context_reconfigure(struct fs_context *fc) +{ + struct binderfs_mount_opts *ctx = fc->fs_private; + struct binderfs_info *info = BINDERFS_SB(fc->root->d_sb); + + if (info->mount_opts.stats_mode != ctx->stats_mode) + return invalfc(fc, "Binderfs stats mode cannot be changed during a remount"); + + info->mount_opts.stats_mode = ctx->stats_mode; + info->mount_opts.max = ctx->max; + return 0; +} + +static int binderfs_show_options(struct seq_file *seq, struct dentry *root) +{ + struct binderfs_info *info = BINDERFS_SB(root->d_sb); + + if (info->mount_opts.max <= BINDERFS_MAX_MINOR) + seq_printf(seq, ",max=%d", info->mount_opts.max); + + switch (info->mount_opts.stats_mode) { + case binderfs_stats_mode_unset: + break; + case binderfs_stats_mode_global: + seq_printf(seq, ",stats=global"); + break; + } + + return 0; +} + +static const struct super_operations binderfs_super_ops = { + .evict_inode = binderfs_evict_inode, + .show_options = binderfs_show_options, + .statfs = simple_statfs, +}; + +static inline bool is_binderfs_control_device(const struct dentry *dentry) +{ + struct binderfs_info *info = dentry->d_sb->s_fs_info; + + return info->control_dentry == dentry; +} + +static int binderfs_rename(struct mnt_idmap *idmap, + struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + if (is_binderfs_control_device(old_dentry) || + is_binderfs_control_device(new_dentry)) + return -EPERM; + + return simple_rename(idmap, old_dir, old_dentry, new_dir, + new_dentry, flags); +} + +static int binderfs_unlink(struct inode *dir, struct dentry *dentry) +{ + if (is_binderfs_control_device(dentry)) + return -EPERM; + + return simple_unlink(dir, dentry); +} + +static const struct file_operations binder_ctl_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = binder_ctl_ioctl, + .compat_ioctl = binder_ctl_ioctl, + .llseek = noop_llseek, +}; + +/** + * binderfs_binder_ctl_create - create a new binder-control device + * @sb: super block of the binderfs mount + * + * This function creates a new binder-control device node in the binderfs mount + * referred to by @sb. + * + * Return: 0 on success, negative errno on failure + */ +static int binderfs_binder_ctl_create(struct super_block *sb) +{ + int minor, ret; + struct dentry *dentry; + struct binder_device *device; + struct inode *inode = NULL; + struct dentry *root = sb->s_root; + struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + ret = -ENOMEM; + inode = new_inode(sb); + if (!inode) + goto out; + + /* Reserve a new minor number for the new device. */ + mutex_lock(&binderfs_minors_mutex); + minor = ida_alloc_max(&binderfs_minors, + use_reserve ? BINDERFS_MAX_MINOR : + BINDERFS_MAX_MINOR_CAPPED, + GFP_KERNEL); + mutex_unlock(&binderfs_minors_mutex); + if (minor < 0) { + ret = minor; + goto out; + } + + inode->i_ino = SECOND_INODE; + simple_inode_init_ts(inode); + init_special_inode(inode, S_IFCHR | 0600, + MKDEV(MAJOR(binderfs_dev), minor)); + inode->i_fop = &binder_ctl_fops; + inode->i_uid = info->root_uid; + inode->i_gid = info->root_gid; + + refcount_set(&device->ref, 1); + device->binderfs_inode = inode; + device->miscdev.minor = minor; + + dentry = d_alloc_name(root, "binder-control"); + if (!dentry) + goto out; + + inode->i_private = device; + info->control_dentry = dentry; + d_make_persistent(dentry, inode); + dput(dentry); + + return 0; + +out: + kfree(device); + iput(inode); + + return ret; +} + +static const struct inode_operations binderfs_dir_inode_operations = { + .lookup = simple_lookup, + .rename = binderfs_rename, + .unlink = binderfs_unlink, +}; + +static struct inode *binderfs_make_inode(struct super_block *sb, int mode) +{ + struct inode *ret; + + ret = new_inode(sb); + if (ret) { + ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET); + ret->i_mode = mode; + simple_inode_init_ts(ret); + } + return ret; +} + +struct dentry *binderfs_create_file(struct dentry *parent, const char *name, + const struct file_operations *fops, + void *data) +{ + struct dentry *dentry; + struct inode *new_inode, *parent_inode; + struct super_block *sb; + + parent_inode = d_inode(parent); + + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) + return dentry; + + sb = parent_inode->i_sb; + new_inode = binderfs_make_inode(sb, S_IFREG | 0444); + if (!new_inode) { + simple_done_creating(dentry); + return ERR_PTR(-ENOMEM); + } + + new_inode->i_fop = fops; + new_inode->i_private = data; + d_make_persistent(dentry, new_inode); + fsnotify_create(parent_inode, dentry); + simple_done_creating(dentry); + return dentry; // borrowed +} + +static struct dentry *binderfs_create_dir(struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + struct inode *new_inode, *parent_inode; + struct super_block *sb; + + parent_inode = d_inode(parent); + + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) + return dentry; + + sb = parent_inode->i_sb; + new_inode = binderfs_make_inode(sb, S_IFDIR | 0755); + if (!new_inode) { + simple_done_creating(dentry); + return ERR_PTR(-ENOMEM); + } + + new_inode->i_fop = &simple_dir_operations; + new_inode->i_op = &simple_dir_inode_operations; + + set_nlink(new_inode, 2); + d_make_persistent(dentry, new_inode); + inc_nlink(parent_inode); + fsnotify_mkdir(parent_inode, dentry); + simple_done_creating(dentry); + return dentry; +} + +static int binder_features_show(struct seq_file *m, void *unused) +{ + bool *feature = m->private; + + seq_printf(m, "%d\n", *feature); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(binder_features); + +static int init_binder_features(struct super_block *sb) +{ + struct dentry *dentry, *dir; + + dir = binderfs_create_dir(sb->s_root, "features"); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + dentry = binderfs_create_file(dir, "oneway_spam_detection", + &binder_features_fops, + &binder_features.oneway_spam_detection); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + dentry = binderfs_create_file(dir, "extended_error", + &binder_features_fops, + &binder_features.extended_error); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + dentry = binderfs_create_file(dir, "freeze_notification", + &binder_features_fops, + &binder_features.freeze_notification); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + dentry = binderfs_create_file(dir, "transaction_report", + &binder_features_fops, + &binder_features.transaction_report); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + return 0; +} + +static int init_binder_logs(struct super_block *sb) +{ + struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir; + const struct binder_debugfs_entry *db_entry; + struct binderfs_info *info; + int ret = 0; + + binder_logs_root_dir = binderfs_create_dir(sb->s_root, + "binder_logs"); + if (IS_ERR(binder_logs_root_dir)) { + ret = PTR_ERR(binder_logs_root_dir); + goto out; + } + + binder_for_each_debugfs_entry(db_entry) { + dentry = binderfs_create_file(binder_logs_root_dir, + db_entry->name, + db_entry->fops, + db_entry->data); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + } + + proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc"); + if (IS_ERR(proc_log_dir)) { + ret = PTR_ERR(proc_log_dir); + goto out; + } + info = sb->s_fs_info; + info->proc_log_dir = proc_log_dir; + +out: + return ret; +} + +static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc) +{ + int ret; + struct binderfs_info *info; + struct binderfs_mount_opts *ctx = fc->fs_private; + struct inode *inode = NULL; + struct binderfs_device device_info = {}; + const char *name; + size_t len; + + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + + /* + * The binderfs filesystem can be mounted by userns root in a + * non-initial userns. By default such mounts have the SB_I_NODEV flag + * set in s_iflags to prevent security issues where userns root can + * just create random device nodes via mknod() since it owns the + * filesystem mount. But binderfs does not allow to create any files + * including devices nodes. The only way to create binder devices nodes + * is through the binder-control device which userns root is explicitly + * allowed to do. So removing the SB_I_NODEV flag from s_iflags is both + * necessary and safe. + */ + sb->s_iflags &= ~SB_I_NODEV; + sb->s_iflags |= SB_I_NOEXEC; + sb->s_magic = BINDERFS_SUPER_MAGIC; + sb->s_op = &binderfs_super_ops; + sb->s_time_gran = 1; + + sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); + if (!sb->s_fs_info) + return -ENOMEM; + info = sb->s_fs_info; + + info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); + + info->root_gid = make_kgid(sb->s_user_ns, 0); + if (!gid_valid(info->root_gid)) + info->root_gid = GLOBAL_ROOT_GID; + info->root_uid = make_kuid(sb->s_user_ns, 0); + if (!uid_valid(info->root_uid)) + info->root_uid = GLOBAL_ROOT_UID; + info->mount_opts.max = ctx->max; + info->mount_opts.stats_mode = ctx->stats_mode; + + inode = new_inode(sb); + if (!inode) + return -ENOMEM; + + inode->i_ino = FIRST_INODE; + inode->i_fop = &simple_dir_operations; + inode->i_mode = S_IFDIR | 0755; + simple_inode_init_ts(inode); + inode->i_op = &binderfs_dir_inode_operations; + set_nlink(inode, 2); + + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; + + ret = binderfs_binder_ctl_create(sb); + if (ret) + return ret; + + name = binder_devices_param; + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { + strscpy(device_info.name, name, len + 1); + ret = binderfs_binder_device_create(inode, NULL, &device_info); + if (ret) + return ret; + name += len; + if (*name == ',') + name++; + } + + ret = init_binder_features(sb); + if (ret) + return ret; + + if (info->mount_opts.stats_mode == binderfs_stats_mode_global) + return init_binder_logs(sb); + + return 0; +} + +static int binderfs_fs_context_get_tree(struct fs_context *fc) +{ + return get_tree_nodev(fc, binderfs_fill_super); +} + +static void binderfs_fs_context_free(struct fs_context *fc) +{ + struct binderfs_mount_opts *ctx = fc->fs_private; + + kfree(ctx); +} + +static const struct fs_context_operations binderfs_fs_context_ops = { + .free = binderfs_fs_context_free, + .get_tree = binderfs_fs_context_get_tree, + .parse_param = binderfs_fs_context_parse_param, + .reconfigure = binderfs_fs_context_reconfigure, +}; + +static int binderfs_init_fs_context(struct fs_context *fc) +{ + struct binderfs_mount_opts *ctx; + + ctx = kzalloc(sizeof(struct binderfs_mount_opts), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->max = BINDERFS_MAX_MINOR; + ctx->stats_mode = binderfs_stats_mode_unset; + + fc->fs_private = ctx; + fc->ops = &binderfs_fs_context_ops; + + return 0; +} + +static void binderfs_kill_super(struct super_block *sb) +{ + struct binderfs_info *info = sb->s_fs_info; + + /* + * During inode eviction struct binderfs_info is needed. + * So first wipe the super_block then free struct binderfs_info. + */ + kill_anon_super(sb); + + if (info && info->ipc_ns) + put_ipc_ns(info->ipc_ns); + + kfree(info); +} + +static struct file_system_type binder_fs_type = { + .name = "binder", + .init_fs_context = binderfs_init_fs_context, + .parameters = binderfs_fs_parameters, + .kill_sb = binderfs_kill_super, + .fs_flags = FS_USERNS_MOUNT, +}; + +int __init init_binderfs(void) +{ + int ret; + const char *name; + size_t len; + + /* Verify that the default binderfs device names are valid. */ + name = binder_devices_param; + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { + if (len > BINDERFS_MAX_NAME) + return -E2BIG; + name += len; + if (*name == ',') + name++; + } + + /* Allocate new major number for binderfs. */ + ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR, + "binder"); + if (ret) + return ret; + + ret = register_filesystem(&binder_fs_type); + if (ret) { + unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); + return ret; + } + + return ret; +} diff --git a/drivers/android/dbitmap.h b/drivers/android/dbitmap.h new file mode 100644 index 000000000000..c7299ce8b374 --- /dev/null +++ b/drivers/android/dbitmap.h @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2024 Google LLC + * + * dbitmap - dynamically sized bitmap library. + * + * Used by the binder driver to optimize the allocation of the smallest + * available descriptor ID. Each bit in the bitmap represents the state + * of an ID. + * + * A dbitmap can grow or shrink as needed. This part has been designed + * considering that users might need to briefly release their locks in + * order to allocate memory for the new bitmap. These operations then, + * are verified to determine if the grow or shrink is sill valid. + * + * This library does not provide protection against concurrent access + * by itself. Binder uses the proc->outer_lock for this purpose. + */ + +#ifndef _LINUX_DBITMAP_H +#define _LINUX_DBITMAP_H +#include <linux/bitmap.h> + +#define NBITS_MIN BITS_PER_TYPE(unsigned long) + +struct dbitmap { + unsigned int nbits; + unsigned long *map; +}; + +static inline int dbitmap_enabled(struct dbitmap *dmap) +{ + return !!dmap->nbits; +} + +static inline void dbitmap_free(struct dbitmap *dmap) +{ + dmap->nbits = 0; + kfree(dmap->map); + dmap->map = NULL; +} + +/* Returns the nbits that a dbitmap can shrink to, 0 if not possible. */ +static inline unsigned int dbitmap_shrink_nbits(struct dbitmap *dmap) +{ + unsigned int bit; + + if (dmap->nbits <= NBITS_MIN) + return 0; + + /* + * Determine if the bitmap can shrink based on the position of + * its last set bit. If the bit is within the first quarter of + * the bitmap then shrinking is possible. In this case, the + * bitmap should shrink to half its current size. + */ + bit = find_last_bit(dmap->map, dmap->nbits); + if (bit < (dmap->nbits >> 2)) + return dmap->nbits >> 1; + + /* find_last_bit() returns dmap->nbits when no bits are set. */ + if (bit == dmap->nbits) + return NBITS_MIN; + + return 0; +} + +/* Replace the internal bitmap with a new one of different size */ +static inline void +dbitmap_replace(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) +{ + bitmap_copy(new, dmap->map, min(dmap->nbits, nbits)); + kfree(dmap->map); + dmap->map = new; + dmap->nbits = nbits; +} + +static inline void +dbitmap_shrink(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) +{ + if (!new) + return; + + /* + * Verify that shrinking to @nbits is still possible. The @new + * bitmap might have been allocated without locks, so this call + * could now be outdated. In this case, free @new and move on. + */ + if (!dbitmap_enabled(dmap) || dbitmap_shrink_nbits(dmap) != nbits) { + kfree(new); + return; + } + + dbitmap_replace(dmap, new, nbits); +} + +/* Returns the nbits that a dbitmap can grow to. */ +static inline unsigned int dbitmap_grow_nbits(struct dbitmap *dmap) +{ + return dmap->nbits << 1; +} + +static inline void +dbitmap_grow(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) +{ + /* + * Verify that growing to @nbits is still possible. The @new + * bitmap might have been allocated without locks, so this call + * could now be outdated. In this case, free @new and move on. + */ + if (!dbitmap_enabled(dmap) || nbits <= dmap->nbits) { + kfree(new); + return; + } + + /* + * Check for ENOMEM after confirming the grow operation is still + * required. This ensures we only disable the dbitmap when it's + * necessary. Once the dbitmap is disabled, binder will fallback + * to slow_desc_lookup_olocked(). + */ + if (!new) { + dbitmap_free(dmap); + return; + } + + dbitmap_replace(dmap, new, nbits); +} + +/* + * Finds and sets the next zero bit in the bitmap. Upon success @bit + * is populated with the index and 0 is returned. Otherwise, -ENOSPC + * is returned to indicate that a dbitmap_grow() is needed. + */ +static inline int +dbitmap_acquire_next_zero_bit(struct dbitmap *dmap, unsigned long offset, + unsigned long *bit) +{ + unsigned long n; + + n = find_next_zero_bit(dmap->map, dmap->nbits, offset); + if (n == dmap->nbits) + return -ENOSPC; + + *bit = n; + set_bit(n, dmap->map); + + return 0; +} + +static inline void +dbitmap_clear_bit(struct dbitmap *dmap, unsigned long bit) +{ + clear_bit(bit, dmap->map); +} + +static inline int dbitmap_init(struct dbitmap *dmap) +{ + dmap->map = bitmap_zalloc(NBITS_MIN, GFP_KERNEL); + if (!dmap->map) { + dmap->nbits = 0; + return -ENOMEM; + } + + dmap->nbits = NBITS_MIN; + + return 0; +} +#endif diff --git a/drivers/android/tests/.kunitconfig b/drivers/android/tests/.kunitconfig new file mode 100644 index 000000000000..39b76bab9d9a --- /dev/null +++ b/drivers/android/tests/.kunitconfig @@ -0,0 +1,7 @@ +# +# Copyright 2025 Google LLC. +# + +CONFIG_KUNIT=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_BINDER_ALLOC_KUNIT_TEST=y diff --git a/drivers/android/tests/Makefile b/drivers/android/tests/Makefile new file mode 100644 index 000000000000..27268418eb03 --- /dev/null +++ b/drivers/android/tests/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright 2025 Google LLC. +# + +obj-$(CONFIG_ANDROID_BINDER_ALLOC_KUNIT_TEST) += binder_alloc_kunit.o diff --git a/drivers/android/tests/binder_alloc_kunit.c b/drivers/android/tests/binder_alloc_kunit.c new file mode 100644 index 000000000000..7f9cc003bbe3 --- /dev/null +++ b/drivers/android/tests/binder_alloc_kunit.c @@ -0,0 +1,572 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test cases for binder allocator code. + * + * Copyright 2025 Google LLC. + * Author: Tiffany Yang <ynaffit@google.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <kunit/test.h> +#include <linux/anon_inodes.h> +#include <linux/err.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/seq_buf.h> +#include <linux/sizes.h> + +#include "../binder_alloc.h" +#include "../binder_internal.h" + +MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); + +#define BINDER_MMAP_SIZE SZ_128K + +#define BUFFER_NUM 5 +#define BUFFER_MIN_SIZE (PAGE_SIZE / 8) + +#define FREESEQ_BUFLEN ((3 * BUFFER_NUM) + 1) + +#define ALIGN_TYPE_STRLEN (12) + +#define ALIGNMENTS_BUFLEN (((ALIGN_TYPE_STRLEN + 6) * BUFFER_NUM) + 1) + +#define PRINT_ALL_CASES (0) + +/* 5^5 alignment combinations * 2 places to share pages * 5! free sequences */ +#define TOTAL_EXHAUSTIVE_CASES (3125 * 2 * 120) + +/** + * enum buf_end_align_type - Page alignment of a buffer + * end with regard to the end of the previous buffer. + * + * In the pictures below, buf2 refers to the buffer we + * are aligning. buf1 refers to previous buffer by addr. + * Symbol [ means the start of a buffer, ] means the end + * of a buffer, and | means page boundaries. + */ +enum buf_end_align_type { + /** + * @SAME_PAGE_UNALIGNED: The end of this buffer is on + * the same page as the end of the previous buffer and + * is not page aligned. Examples: + * buf1 ][ buf2 ][ ... + * buf1 ]|[ buf2 ][ ... + */ + SAME_PAGE_UNALIGNED = 0, + /** + * @SAME_PAGE_ALIGNED: When the end of the previous buffer + * is not page aligned, the end of this buffer is on the + * same page as the end of the previous buffer and is page + * aligned. When the previous buffer is page aligned, the + * end of this buffer is aligned to the next page boundary. + * Examples: + * buf1 ][ buf2 ]| ... + * buf1 ]|[ buf2 ]| ... + */ + SAME_PAGE_ALIGNED, + /** + * @NEXT_PAGE_UNALIGNED: The end of this buffer is on + * the page next to the end of the previous buffer and + * is not page aligned. Examples: + * buf1 ][ buf2 | buf2 ][ ... + * buf1 ]|[ buf2 | buf2 ][ ... + */ + NEXT_PAGE_UNALIGNED, + /** + * @NEXT_PAGE_ALIGNED: The end of this buffer is on + * the page next to the end of the previous buffer and + * is page aligned. Examples: + * buf1 ][ buf2 | buf2 ]| ... + * buf1 ]|[ buf2 | buf2 ]| ... + */ + NEXT_PAGE_ALIGNED, + /** + * @NEXT_NEXT_UNALIGNED: The end of this buffer is on + * the page that follows the page after the end of the + * previous buffer and is not page aligned. Examples: + * buf1 ][ buf2 | buf2 | buf2 ][ ... + * buf1 ]|[ buf2 | buf2 | buf2 ][ ... + */ + NEXT_NEXT_UNALIGNED, + /** + * @LOOP_END: The number of enum values in &buf_end_align_type. + * It is used for controlling loop termination. + */ + LOOP_END, +}; + +static const char *const buf_end_align_type_strs[LOOP_END] = { + [SAME_PAGE_UNALIGNED] = "SP_UNALIGNED", + [SAME_PAGE_ALIGNED] = " SP_ALIGNED ", + [NEXT_PAGE_UNALIGNED] = "NP_UNALIGNED", + [NEXT_PAGE_ALIGNED] = " NP_ALIGNED ", + [NEXT_NEXT_UNALIGNED] = "NN_UNALIGNED", +}; + +struct binder_alloc_test_case_info { + char alignments[ALIGNMENTS_BUFLEN]; + struct seq_buf alignments_sb; + size_t *buffer_sizes; + int *free_sequence; + bool front_pages; +}; + +static void stringify_free_seq(struct kunit *test, int *seq, struct seq_buf *sb) +{ + int i; + + for (i = 0; i < BUFFER_NUM; i++) + seq_buf_printf(sb, "[%d]", seq[i]); + + KUNIT_EXPECT_FALSE(test, seq_buf_has_overflowed(sb)); +} + +static void stringify_alignments(struct kunit *test, int *alignments, + struct seq_buf *sb) +{ + int i; + + for (i = 0; i < BUFFER_NUM; i++) + seq_buf_printf(sb, "[ %d:%s ]", i, + buf_end_align_type_strs[alignments[i]]); + + KUNIT_EXPECT_FALSE(test, seq_buf_has_overflowed(sb)); +} + +static bool check_buffer_pages_allocated(struct kunit *test, + struct binder_alloc *alloc, + struct binder_buffer *buffer, + size_t size) +{ + unsigned long page_addr; + unsigned long end; + int page_index; + + end = PAGE_ALIGN(buffer->user_data + size); + page_addr = buffer->user_data; + for (; page_addr < end; page_addr += PAGE_SIZE) { + page_index = (page_addr - alloc->vm_start) / PAGE_SIZE; + if (!alloc->pages[page_index] || + !list_empty(page_to_lru(alloc->pages[page_index]))) { + kunit_err(test, "expect alloc but is %s at page index %d\n", + alloc->pages[page_index] ? + "lru" : "free", page_index); + return false; + } + } + return true; +} + +static unsigned long binder_alloc_test_alloc_buf(struct kunit *test, + struct binder_alloc *alloc, + struct binder_buffer *buffers[], + size_t *sizes, int *seq) +{ + unsigned long failures = 0; + int i; + + for (i = 0; i < BUFFER_NUM; i++) { + buffers[i] = binder_alloc_new_buf(alloc, sizes[i], 0, 0, 0); + if (IS_ERR(buffers[i]) || + !check_buffer_pages_allocated(test, alloc, buffers[i], sizes[i])) + failures++; + } + + return failures; +} + +static unsigned long binder_alloc_test_free_buf(struct kunit *test, + struct binder_alloc *alloc, + struct binder_buffer *buffers[], + size_t *sizes, int *seq, size_t end) +{ + unsigned long failures = 0; + int i; + + for (i = 0; i < BUFFER_NUM; i++) + binder_alloc_free_buf(alloc, buffers[seq[i]]); + + for (i = 0; i <= (end - 1) / PAGE_SIZE; i++) { + if (list_empty(page_to_lru(alloc->pages[i]))) { + kunit_err(test, "expect lru but is %s at page index %d\n", + alloc->pages[i] ? "alloc" : "free", i); + failures++; + } + } + + return failures; +} + +static unsigned long binder_alloc_test_free_page(struct kunit *test, + struct binder_alloc *alloc) +{ + unsigned long failures = 0; + unsigned long count; + int i; + + while ((count = list_lru_count(alloc->freelist))) { + list_lru_walk(alloc->freelist, binder_alloc_free_page, + NULL, count); + } + + for (i = 0; i < (alloc->buffer_size / PAGE_SIZE); i++) { + if (alloc->pages[i]) { + kunit_err(test, "expect free but is %s at page index %d\n", + list_empty(page_to_lru(alloc->pages[i])) ? + "alloc" : "lru", i); + failures++; + } + } + + return failures; +} + +/* Executes one full test run for the given test case. */ +static bool binder_alloc_test_alloc_free(struct kunit *test, + struct binder_alloc *alloc, + struct binder_alloc_test_case_info *tc, + size_t end) +{ + unsigned long pages = PAGE_ALIGN(end) / PAGE_SIZE; + struct binder_buffer *buffers[BUFFER_NUM]; + unsigned long failures; + bool failed = false; + + failures = binder_alloc_test_alloc_buf(test, alloc, buffers, + tc->buffer_sizes, + tc->free_sequence); + failed = failed || failures; + KUNIT_EXPECT_EQ_MSG(test, failures, 0, + "Initial allocation failed: %lu/%u buffers with errors", + failures, BUFFER_NUM); + + failures = binder_alloc_test_free_buf(test, alloc, buffers, + tc->buffer_sizes, + tc->free_sequence, end); + failed = failed || failures; + KUNIT_EXPECT_EQ_MSG(test, failures, 0, + "Initial buffers not freed correctly: %lu/%lu pages not on lru list", + failures, pages); + + /* Allocate from lru. */ + failures = binder_alloc_test_alloc_buf(test, alloc, buffers, + tc->buffer_sizes, + tc->free_sequence); + failed = failed || failures; + KUNIT_EXPECT_EQ_MSG(test, failures, 0, + "Reallocation failed: %lu/%u buffers with errors", + failures, BUFFER_NUM); + + failures = list_lru_count(alloc->freelist); + failed = failed || failures; + KUNIT_EXPECT_EQ_MSG(test, failures, 0, + "lru list should be empty after reallocation but still has %lu pages", + failures); + + failures = binder_alloc_test_free_buf(test, alloc, buffers, + tc->buffer_sizes, + tc->free_sequence, end); + failed = failed || failures; + KUNIT_EXPECT_EQ_MSG(test, failures, 0, + "Reallocated buffers not freed correctly: %lu/%lu pages not on lru list", + failures, pages); + + failures = binder_alloc_test_free_page(test, alloc); + failed = failed || failures; + KUNIT_EXPECT_EQ_MSG(test, failures, 0, + "Failed to clean up allocated pages: %lu/%lu pages still installed", + failures, (alloc->buffer_size / PAGE_SIZE)); + + return failed; +} + +static bool is_dup(int *seq, int index, int val) +{ + int i; + + for (i = 0; i < index; i++) { + if (seq[i] == val) + return true; + } + return false; +} + +/* Generate BUFFER_NUM factorial free orders. */ +static void permute_frees(struct kunit *test, struct binder_alloc *alloc, + struct binder_alloc_test_case_info *tc, + unsigned long *runs, unsigned long *failures, + int index, size_t end) +{ + bool case_failed; + int i; + + if (index == BUFFER_NUM) { + DECLARE_SEQ_BUF(freeseq_sb, FREESEQ_BUFLEN); + + case_failed = binder_alloc_test_alloc_free(test, alloc, tc, end); + *runs += 1; + *failures += case_failed; + + if (case_failed || PRINT_ALL_CASES) { + stringify_free_seq(test, tc->free_sequence, + &freeseq_sb); + kunit_err(test, "case %lu: [%s] | %s - %s - %s", *runs, + case_failed ? "FAILED" : "PASSED", + tc->front_pages ? "front" : "back ", + seq_buf_str(&tc->alignments_sb), + seq_buf_str(&freeseq_sb)); + } + + return; + } + for (i = 0; i < BUFFER_NUM; i++) { + if (is_dup(tc->free_sequence, index, i)) + continue; + tc->free_sequence[index] = i; + permute_frees(test, alloc, tc, runs, failures, index + 1, end); + } +} + +static void gen_buf_sizes(struct kunit *test, + struct binder_alloc *alloc, + struct binder_alloc_test_case_info *tc, + size_t *end_offset, unsigned long *runs, + unsigned long *failures) +{ + size_t last_offset, offset = 0; + size_t front_sizes[BUFFER_NUM]; + size_t back_sizes[BUFFER_NUM]; + int seq[BUFFER_NUM] = {0}; + int i; + + tc->free_sequence = seq; + for (i = 0; i < BUFFER_NUM; i++) { + last_offset = offset; + offset = end_offset[i]; + front_sizes[i] = offset - last_offset; + back_sizes[BUFFER_NUM - i - 1] = front_sizes[i]; + } + back_sizes[0] += alloc->buffer_size - end_offset[BUFFER_NUM - 1]; + + /* + * Buffers share the first or last few pages. + * Only BUFFER_NUM - 1 buffer sizes are adjustable since + * we need one giant buffer before getting to the last page. + */ + tc->front_pages = true; + tc->buffer_sizes = front_sizes; + permute_frees(test, alloc, tc, runs, failures, 0, + end_offset[BUFFER_NUM - 1]); + + tc->front_pages = false; + tc->buffer_sizes = back_sizes; + permute_frees(test, alloc, tc, runs, failures, 0, alloc->buffer_size); +} + +static void gen_buf_offsets(struct kunit *test, struct binder_alloc *alloc, + size_t *end_offset, int *alignments, + unsigned long *runs, unsigned long *failures, + int index) +{ + size_t end, prev; + int align; + + if (index == BUFFER_NUM) { + struct binder_alloc_test_case_info tc = {0}; + + seq_buf_init(&tc.alignments_sb, tc.alignments, + ALIGNMENTS_BUFLEN); + stringify_alignments(test, alignments, &tc.alignments_sb); + + gen_buf_sizes(test, alloc, &tc, end_offset, runs, failures); + return; + } + prev = index == 0 ? 0 : end_offset[index - 1]; + end = prev; + + BUILD_BUG_ON(BUFFER_MIN_SIZE * BUFFER_NUM >= PAGE_SIZE); + + for (align = SAME_PAGE_UNALIGNED; align < LOOP_END; align++) { + if (align % 2) + end = ALIGN(end, PAGE_SIZE); + else + end += BUFFER_MIN_SIZE; + end_offset[index] = end; + alignments[index] = align; + gen_buf_offsets(test, alloc, end_offset, alignments, runs, + failures, index + 1); + } +} + +struct binder_alloc_test { + struct binder_alloc alloc; + struct list_lru binder_test_freelist; + struct file *filp; + unsigned long mmap_uaddr; +}; + +static void binder_alloc_test_init_freelist(struct kunit *test) +{ + struct binder_alloc_test *priv = test->priv; + + KUNIT_EXPECT_PTR_EQ(test, priv->alloc.freelist, + &priv->binder_test_freelist); +} + +static void binder_alloc_test_mmap(struct kunit *test) +{ + struct binder_alloc_test *priv = test->priv; + struct binder_alloc *alloc = &priv->alloc; + struct binder_buffer *buf; + struct rb_node *n; + + KUNIT_EXPECT_EQ(test, alloc->mapped, true); + KUNIT_EXPECT_EQ(test, alloc->buffer_size, BINDER_MMAP_SIZE); + + n = rb_first(&alloc->allocated_buffers); + KUNIT_EXPECT_PTR_EQ(test, n, NULL); + + n = rb_first(&alloc->free_buffers); + buf = rb_entry(n, struct binder_buffer, rb_node); + KUNIT_EXPECT_EQ(test, binder_alloc_buffer_size(alloc, buf), + BINDER_MMAP_SIZE); + KUNIT_EXPECT_TRUE(test, list_is_last(&buf->entry, &alloc->buffers)); +} + +/** + * binder_alloc_exhaustive_test() - Exhaustively test alloc and free of buffer pages. + * @test: The test context object. + * + * Allocate BUFFER_NUM buffers to cover all page alignment cases, + * then free them in all orders possible. Check that pages are + * correctly allocated, put onto lru when buffers are freed, and + * are freed when binder_alloc_free_page() is called. + */ +static void binder_alloc_exhaustive_test(struct kunit *test) +{ + struct binder_alloc_test *priv = test->priv; + size_t end_offset[BUFFER_NUM]; + int alignments[BUFFER_NUM]; + unsigned long failures = 0; + unsigned long runs = 0; + + gen_buf_offsets(test, &priv->alloc, end_offset, alignments, &runs, + &failures, 0); + + KUNIT_EXPECT_EQ(test, runs, TOTAL_EXHAUSTIVE_CASES); + KUNIT_EXPECT_EQ(test, failures, 0); +} + +/* ===== End test cases ===== */ + +static void binder_alloc_test_vma_close(struct vm_area_struct *vma) +{ + struct binder_alloc *alloc = vma->vm_private_data; + + binder_alloc_vma_close(alloc); +} + +static const struct vm_operations_struct binder_alloc_test_vm_ops = { + .close = binder_alloc_test_vma_close, + .fault = binder_vm_fault, +}; + +static int binder_alloc_test_mmap_handler(struct file *filp, + struct vm_area_struct *vma) +{ + struct binder_alloc *alloc = filp->private_data; + + vm_flags_mod(vma, VM_DONTCOPY | VM_MIXEDMAP, VM_MAYWRITE); + + vma->vm_ops = &binder_alloc_test_vm_ops; + vma->vm_private_data = alloc; + + return binder_alloc_mmap_handler(alloc, vma); +} + +static const struct file_operations binder_alloc_test_fops = { + .mmap = binder_alloc_test_mmap_handler, +}; + +static int binder_alloc_test_init(struct kunit *test) +{ + struct binder_alloc_test *priv; + int ret; + + priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + test->priv = priv; + + ret = list_lru_init(&priv->binder_test_freelist); + if (ret) { + kunit_err(test, "Failed to initialize test freelist\n"); + return ret; + } + + /* __binder_alloc_init requires mm to be attached */ + ret = kunit_attach_mm(); + if (ret) { + kunit_err(test, "Failed to attach mm\n"); + return ret; + } + __binder_alloc_init(&priv->alloc, &priv->binder_test_freelist); + + priv->filp = anon_inode_getfile("binder_alloc_kunit", + &binder_alloc_test_fops, &priv->alloc, + O_RDWR | O_CLOEXEC); + if (IS_ERR_OR_NULL(priv->filp)) { + kunit_err(test, "Failed to open binder alloc test driver file\n"); + return priv->filp ? PTR_ERR(priv->filp) : -ENOMEM; + } + + priv->mmap_uaddr = kunit_vm_mmap(test, priv->filp, 0, BINDER_MMAP_SIZE, + PROT_READ, MAP_PRIVATE | MAP_NORESERVE, + 0); + if (!priv->mmap_uaddr) { + kunit_err(test, "Could not map the test's transaction memory\n"); + return -ENOMEM; + } + + return 0; +} + +static void binder_alloc_test_exit(struct kunit *test) +{ + struct binder_alloc_test *priv = test->priv; + + /* Close the backing file to make sure binder_alloc_vma_close runs */ + if (!IS_ERR_OR_NULL(priv->filp)) + fput(priv->filp); + + if (priv->alloc.mm) + binder_alloc_deferred_release(&priv->alloc); + + /* Make sure freelist is empty */ + KUNIT_EXPECT_EQ(test, list_lru_count(&priv->binder_test_freelist), 0); + list_lru_destroy(&priv->binder_test_freelist); +} + +static struct kunit_case binder_alloc_test_cases[] = { + KUNIT_CASE(binder_alloc_test_init_freelist), + KUNIT_CASE(binder_alloc_test_mmap), + KUNIT_CASE_SLOW(binder_alloc_exhaustive_test), + {} +}; + +static struct kunit_suite binder_alloc_test_suite = { + .name = "binder_alloc", + .test_cases = binder_alloc_test_cases, + .init = binder_alloc_test_init, + .exit = binder_alloc_test_exit, +}; + +kunit_test_suite(binder_alloc_test_suite); + +MODULE_AUTHOR("Tiffany Yang <ynaffit@google.com>"); +MODULE_DESCRIPTION("Binder Alloc KUnit tests"); +MODULE_LICENSE("GPL"); |
