summaryrefslogtreecommitdiff
path: root/drivers/android
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/android')
-rw-r--r--drivers/android/Kconfig65
-rw-r--r--drivers/android/Makefile7
-rw-r--r--drivers/android/binder.c7160
-rw-r--r--drivers/android/binder/Makefile9
-rw-r--r--drivers/android/binder/allocation.rs602
-rw-r--r--drivers/android/binder/context.rs180
-rw-r--r--drivers/android/binder/deferred_close.rs204
-rw-r--r--drivers/android/binder/defs.rs182
-rw-r--r--drivers/android/binder/error.rs100
-rw-r--r--drivers/android/binder/freeze.rs398
-rw-r--r--drivers/android/binder/node.rs1131
-rw-r--r--drivers/android/binder/node/wrapper.rs78
-rw-r--r--drivers/android/binder/page_range.rs734
-rw-r--r--drivers/android/binder/page_range_helper.c24
-rw-r--r--drivers/android/binder/page_range_helper.h15
-rw-r--r--drivers/android/binder/process.rs1745
-rw-r--r--drivers/android/binder/range_alloc/array.rs251
-rw-r--r--drivers/android/binder/range_alloc/mod.rs329
-rw-r--r--drivers/android/binder/range_alloc/tree.rs488
-rw-r--r--drivers/android/binder/rust_binder.h23
-rw-r--r--drivers/android/binder/rust_binder_events.c59
-rw-r--r--drivers/android/binder/rust_binder_events.h36
-rw-r--r--drivers/android/binder/rust_binder_internal.h87
-rw-r--r--drivers/android/binder/rust_binder_main.rs611
-rw-r--r--drivers/android/binder/rust_binderfs.c795
-rw-r--r--drivers/android/binder/stats.rs89
-rw-r--r--drivers/android/binder/thread.rs1596
-rw-r--r--drivers/android/binder/trace.rs16
-rw-r--r--drivers/android/binder/transaction.rs456
-rw-r--r--drivers/android/binder_alloc.c1410
-rw-r--r--drivers/android/binder_alloc.h189
-rw-r--r--drivers/android/binder_internal.h597
-rw-r--r--drivers/android/binder_netlink.c32
-rw-r--r--drivers/android/binder_netlink.h21
-rw-r--r--drivers/android/binder_trace.h448
-rw-r--r--drivers/android/binderfs.c786
-rw-r--r--drivers/android/dbitmap.h169
-rw-r--r--drivers/android/tests/.kunitconfig7
-rw-r--r--drivers/android/tests/Makefile6
-rw-r--r--drivers/android/tests/binder_alloc_kunit.c572
40 files changed, 21707 insertions, 0 deletions
diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
new file mode 100644
index 000000000000..e2e402c9d175
--- /dev/null
+++ b/drivers/android/Kconfig
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+menu "Android"
+
+config ANDROID_BINDER_IPC
+ bool "Android Binder IPC Driver"
+ depends on MMU
+ depends on NET
+ default n
+ help
+ Binder is used in Android for both communication between processes,
+ and remote method invocation.
+
+ This means one Android process can call a method/routine in another
+ Android process, using Binder to identify, invoke and pass arguments
+ between said processes.
+
+config ANDROID_BINDER_IPC_RUST
+ bool "Rust version of Android Binder IPC Driver"
+ depends on RUST && MMU && !ANDROID_BINDER_IPC
+ help
+ This enables the Rust implementation of the Binder driver.
+
+ Binder is used in Android for both communication between processes,
+ and remote method invocation.
+
+ This means one Android process can call a method/routine in another
+ Android process, using Binder to identify, invoke and pass arguments
+ between said processes.
+
+config ANDROID_BINDERFS
+ bool "Android Binderfs filesystem"
+ depends on ANDROID_BINDER_IPC
+ default n
+ help
+ Binderfs is a pseudo-filesystem for the Android Binder IPC driver
+ which can be mounted per-ipc namespace allowing to run multiple
+ instances of Android.
+ Each binderfs mount initially only contains a binder-control device.
+ It can be used to dynamically allocate new binder IPC devices via
+ ioctls.
+
+config ANDROID_BINDER_DEVICES
+ string "Android Binder devices"
+ depends on ANDROID_BINDER_IPC || ANDROID_BINDER_IPC_RUST
+ default "binder,hwbinder,vndbinder"
+ help
+ Default value for the binder.devices parameter.
+
+ The binder.devices parameter is a comma-separated list of strings
+ that specifies the names of the binder device nodes that will be
+ created. Each binder device has its own context manager, and is
+ therefore logically separated from the other devices.
+
+config ANDROID_BINDER_ALLOC_KUNIT_TEST
+ tristate "KUnit Tests for Android Binder Alloc" if !KUNIT_ALL_TESTS
+ depends on ANDROID_BINDER_IPC && KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ This feature builds the binder alloc KUnit tests.
+
+ Each test case runs using a pared-down binder_alloc struct and
+ test-specific freelist, which allows this KUnit module to be loaded
+ for testing without interfering with a running system.
+
+endmenu
diff --git a/drivers/android/Makefile b/drivers/android/Makefile
new file mode 100644
index 000000000000..e0c650d3898e
--- /dev/null
+++ b/drivers/android/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y += -I$(src) # needed for trace events
+
+obj-$(CONFIG_ANDROID_BINDERFS) += binderfs.o
+obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o binder_netlink.o
+obj-$(CONFIG_ANDROID_BINDER_ALLOC_KUNIT_TEST) += tests/
+obj-$(CONFIG_ANDROID_BINDER_IPC_RUST) += binder/
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
new file mode 100644
index 000000000000..535fc881c8da
--- /dev/null
+++ b/drivers/android/binder.c
@@ -0,0 +1,7160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* binder.c
+ *
+ * Android IPC Subsystem
+ *
+ * Copyright (C) 2007-2008 Google, Inc.
+ */
+
+/*
+ * Locking overview
+ *
+ * There are 3 main spinlocks which must be acquired in the
+ * order shown:
+ *
+ * 1) proc->outer_lock : protects binder_ref
+ * binder_proc_lock() and binder_proc_unlock() are
+ * used to acq/rel.
+ * 2) node->lock : protects most fields of binder_node.
+ * binder_node_lock() and binder_node_unlock() are
+ * used to acq/rel
+ * 3) proc->inner_lock : protects the thread and node lists
+ * (proc->threads, proc->waiting_threads, proc->nodes)
+ * and all todo lists associated with the binder_proc
+ * (proc->todo, thread->todo, proc->delivered_death and
+ * node->async_todo), as well as thread->transaction_stack
+ * binder_inner_proc_lock() and binder_inner_proc_unlock()
+ * are used to acq/rel
+ *
+ * Any lock under procA must never be nested under any lock at the same
+ * level or below on procB.
+ *
+ * Functions that require a lock held on entry indicate which lock
+ * in the suffix of the function name:
+ *
+ * foo_olocked() : requires node->outer_lock
+ * foo_nlocked() : requires node->lock
+ * foo_ilocked() : requires proc->inner_lock
+ * foo_oilocked(): requires proc->outer_lock and proc->inner_lock
+ * foo_nilocked(): requires node->lock and proc->inner_lock
+ * ...
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/freezer.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/nsproxy.h>
+#include <linux/poll.h>
+#include <linux/debugfs.h>
+#include <linux/rbtree.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/pid_namespace.h>
+#include <linux/security.h>
+#include <linux/spinlock.h>
+#include <linux/ratelimit.h>
+#include <linux/syscalls.h>
+#include <linux/task_work.h>
+#include <linux/sizes.h>
+#include <linux/ktime.h>
+
+#include <kunit/visibility.h>
+
+#include <uapi/linux/android/binder.h>
+
+#include <linux/cacheflush.h>
+
+#include "binder_netlink.h"
+#include "binder_internal.h"
+#include "binder_trace.h"
+
+static HLIST_HEAD(binder_deferred_list);
+static DEFINE_MUTEX(binder_deferred_lock);
+
+static HLIST_HEAD(binder_devices);
+static DEFINE_SPINLOCK(binder_devices_lock);
+
+static HLIST_HEAD(binder_procs);
+static DEFINE_MUTEX(binder_procs_lock);
+
+static HLIST_HEAD(binder_dead_nodes);
+static DEFINE_SPINLOCK(binder_dead_nodes_lock);
+
+static struct dentry *binder_debugfs_dir_entry_root;
+static struct dentry *binder_debugfs_dir_entry_proc;
+static atomic_t binder_last_id;
+
+static int proc_show(struct seq_file *m, void *unused);
+DEFINE_SHOW_ATTRIBUTE(proc);
+
+#define FORBIDDEN_MMAP_FLAGS (VM_WRITE)
+
+enum {
+ BINDER_DEBUG_USER_ERROR = 1U << 0,
+ BINDER_DEBUG_FAILED_TRANSACTION = 1U << 1,
+ BINDER_DEBUG_DEAD_TRANSACTION = 1U << 2,
+ BINDER_DEBUG_OPEN_CLOSE = 1U << 3,
+ BINDER_DEBUG_DEAD_BINDER = 1U << 4,
+ BINDER_DEBUG_DEATH_NOTIFICATION = 1U << 5,
+ BINDER_DEBUG_READ_WRITE = 1U << 6,
+ BINDER_DEBUG_USER_REFS = 1U << 7,
+ BINDER_DEBUG_THREADS = 1U << 8,
+ BINDER_DEBUG_TRANSACTION = 1U << 9,
+ BINDER_DEBUG_TRANSACTION_COMPLETE = 1U << 10,
+ BINDER_DEBUG_FREE_BUFFER = 1U << 11,
+ BINDER_DEBUG_INTERNAL_REFS = 1U << 12,
+ BINDER_DEBUG_PRIORITY_CAP = 1U << 13,
+ BINDER_DEBUG_SPINLOCKS = 1U << 14,
+};
+static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR |
+ BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION;
+module_param_named(debug_mask, binder_debug_mask, uint, 0644);
+
+char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES;
+module_param_named(devices, binder_devices_param, charp, 0444);
+
+static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait);
+static int binder_stop_on_user_error;
+
+static int binder_set_stop_on_user_error(const char *val,
+ const struct kernel_param *kp)
+{
+ int ret;
+
+ ret = param_set_int(val, kp);
+ if (binder_stop_on_user_error < 2)
+ wake_up(&binder_user_error_wait);
+ return ret;
+}
+module_param_call(stop_on_user_error, binder_set_stop_on_user_error,
+ param_get_int, &binder_stop_on_user_error, 0644);
+
+static __printf(2, 3) void binder_debug(int mask, const char *format, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ if (binder_debug_mask & mask) {
+ va_start(args, format);
+ vaf.va = &args;
+ vaf.fmt = format;
+ pr_info_ratelimited("%pV", &vaf);
+ va_end(args);
+ }
+}
+
+#define binder_txn_error(x...) \
+ binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, x)
+
+static __printf(1, 2) void binder_user_error(const char *format, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ if (binder_debug_mask & BINDER_DEBUG_USER_ERROR) {
+ va_start(args, format);
+ vaf.va = &args;
+ vaf.fmt = format;
+ pr_info_ratelimited("%pV", &vaf);
+ va_end(args);
+ }
+
+ if (binder_stop_on_user_error)
+ binder_stop_on_user_error = 2;
+}
+
+#define binder_set_extended_error(ee, _id, _command, _param) \
+ do { \
+ (ee)->id = _id; \
+ (ee)->command = _command; \
+ (ee)->param = _param; \
+ } while (0)
+
+#define to_flat_binder_object(hdr) \
+ container_of(hdr, struct flat_binder_object, hdr)
+
+#define to_binder_fd_object(hdr) container_of(hdr, struct binder_fd_object, hdr)
+
+#define to_binder_buffer_object(hdr) \
+ container_of(hdr, struct binder_buffer_object, hdr)
+
+#define to_binder_fd_array_object(hdr) \
+ container_of(hdr, struct binder_fd_array_object, hdr)
+
+static struct binder_stats binder_stats;
+
+static inline void binder_stats_deleted(enum binder_stat_types type)
+{
+ atomic_inc(&binder_stats.obj_deleted[type]);
+}
+
+static inline void binder_stats_created(enum binder_stat_types type)
+{
+ atomic_inc(&binder_stats.obj_created[type]);
+}
+
+struct binder_transaction_log_entry {
+ int debug_id;
+ int debug_id_done;
+ int call_type;
+ int from_proc;
+ int from_thread;
+ int target_handle;
+ int to_proc;
+ int to_thread;
+ int to_node;
+ int data_size;
+ int offsets_size;
+ int return_error_line;
+ uint32_t return_error;
+ uint32_t return_error_param;
+ char context_name[BINDERFS_MAX_NAME + 1];
+};
+
+struct binder_transaction_log {
+ atomic_t cur;
+ bool full;
+ struct binder_transaction_log_entry entry[32];
+};
+
+static struct binder_transaction_log binder_transaction_log;
+static struct binder_transaction_log binder_transaction_log_failed;
+
+static struct binder_transaction_log_entry *binder_transaction_log_add(
+ struct binder_transaction_log *log)
+{
+ struct binder_transaction_log_entry *e;
+ unsigned int cur = atomic_inc_return(&log->cur);
+
+ if (cur >= ARRAY_SIZE(log->entry))
+ log->full = true;
+ e = &log->entry[cur % ARRAY_SIZE(log->entry)];
+ WRITE_ONCE(e->debug_id_done, 0);
+ /*
+ * write-barrier to synchronize access to e->debug_id_done.
+ * We make sure the initialized 0 value is seen before
+ * memset() other fields are zeroed by memset.
+ */
+ smp_wmb();
+ memset(e, 0, sizeof(*e));
+ return e;
+}
+
+enum binder_deferred_state {
+ BINDER_DEFERRED_FLUSH = 0x01,
+ BINDER_DEFERRED_RELEASE = 0x02,
+};
+
+enum {
+ BINDER_LOOPER_STATE_REGISTERED = 0x01,
+ BINDER_LOOPER_STATE_ENTERED = 0x02,
+ BINDER_LOOPER_STATE_EXITED = 0x04,
+ BINDER_LOOPER_STATE_INVALID = 0x08,
+ BINDER_LOOPER_STATE_WAITING = 0x10,
+ BINDER_LOOPER_STATE_POLL = 0x20,
+};
+
+/**
+ * binder_proc_lock() - Acquire outer lock for given binder_proc
+ * @proc: struct binder_proc to acquire
+ *
+ * Acquires proc->outer_lock. Used to protect binder_ref
+ * structures associated with the given proc.
+ */
+#define binder_proc_lock(proc) _binder_proc_lock(proc, __LINE__)
+static void
+_binder_proc_lock(struct binder_proc *proc, int line)
+ __acquires(&proc->outer_lock)
+{
+ binder_debug(BINDER_DEBUG_SPINLOCKS,
+ "%s: line=%d\n", __func__, line);
+ spin_lock(&proc->outer_lock);
+}
+
+/**
+ * binder_proc_unlock() - Release outer lock for given binder_proc
+ * @proc: struct binder_proc to acquire
+ *
+ * Release lock acquired via binder_proc_lock()
+ */
+#define binder_proc_unlock(proc) _binder_proc_unlock(proc, __LINE__)
+static void
+_binder_proc_unlock(struct binder_proc *proc, int line)
+ __releases(&proc->outer_lock)
+{
+ binder_debug(BINDER_DEBUG_SPINLOCKS,
+ "%s: line=%d\n", __func__, line);
+ spin_unlock(&proc->outer_lock);
+}
+
+/**
+ * binder_inner_proc_lock() - Acquire inner lock for given binder_proc
+ * @proc: struct binder_proc to acquire
+ *
+ * Acquires proc->inner_lock. Used to protect todo lists
+ */
+#define binder_inner_proc_lock(proc) _binder_inner_proc_lock(proc, __LINE__)
+static void
+_binder_inner_proc_lock(struct binder_proc *proc, int line)
+ __acquires(&proc->inner_lock)
+{
+ binder_debug(BINDER_DEBUG_SPINLOCKS,
+ "%s: line=%d\n", __func__, line);
+ spin_lock(&proc->inner_lock);
+}
+
+/**
+ * binder_inner_proc_unlock() - Release inner lock for given binder_proc
+ * @proc: struct binder_proc to acquire
+ *
+ * Release lock acquired via binder_inner_proc_lock()
+ */
+#define binder_inner_proc_unlock(proc) _binder_inner_proc_unlock(proc, __LINE__)
+static void
+_binder_inner_proc_unlock(struct binder_proc *proc, int line)
+ __releases(&proc->inner_lock)
+{
+ binder_debug(BINDER_DEBUG_SPINLOCKS,
+ "%s: line=%d\n", __func__, line);
+ spin_unlock(&proc->inner_lock);
+}
+
+/**
+ * binder_node_lock() - Acquire spinlock for given binder_node
+ * @node: struct binder_node to acquire
+ *
+ * Acquires node->lock. Used to protect binder_node fields
+ */
+#define binder_node_lock(node) _binder_node_lock(node, __LINE__)
+static void
+_binder_node_lock(struct binder_node *node, int line)
+ __acquires(&node->lock)
+{
+ binder_debug(BINDER_DEBUG_SPINLOCKS,
+ "%s: line=%d\n", __func__, line);
+ spin_lock(&node->lock);
+}
+
+/**
+ * binder_node_unlock() - Release spinlock for given binder_proc
+ * @node: struct binder_node to acquire
+ *
+ * Release lock acquired via binder_node_lock()
+ */
+#define binder_node_unlock(node) _binder_node_unlock(node, __LINE__)
+static void
+_binder_node_unlock(struct binder_node *node, int line)
+ __releases(&node->lock)
+{
+ binder_debug(BINDER_DEBUG_SPINLOCKS,
+ "%s: line=%d\n", __func__, line);
+ spin_unlock(&node->lock);
+}
+
+/**
+ * binder_node_inner_lock() - Acquire node and inner locks
+ * @node: struct binder_node to acquire
+ *
+ * Acquires node->lock. If node->proc also acquires
+ * proc->inner_lock. Used to protect binder_node fields
+ */
+#define binder_node_inner_lock(node) _binder_node_inner_lock(node, __LINE__)
+static void
+_binder_node_inner_lock(struct binder_node *node, int line)
+ __acquires(&node->lock) __acquires(&node->proc->inner_lock)
+{
+ binder_debug(BINDER_DEBUG_SPINLOCKS,
+ "%s: line=%d\n", __func__, line);
+ spin_lock(&node->lock);
+ if (node->proc)
+ binder_inner_proc_lock(node->proc);
+ else
+ /* annotation for sparse */
+ __acquire(&node->proc->inner_lock);
+}
+
+/**
+ * binder_node_inner_unlock() - Release node and inner locks
+ * @node: struct binder_node to acquire
+ *
+ * Release lock acquired via binder_node_lock()
+ */
+#define binder_node_inner_unlock(node) _binder_node_inner_unlock(node, __LINE__)
+static void
+_binder_node_inner_unlock(struct binder_node *node, int line)
+ __releases(&node->lock) __releases(&node->proc->inner_lock)
+{
+ struct binder_proc *proc = node->proc;
+
+ binder_debug(BINDER_DEBUG_SPINLOCKS,
+ "%s: line=%d\n", __func__, line);
+ if (proc)
+ binder_inner_proc_unlock(proc);
+ else
+ /* annotation for sparse */
+ __release(&node->proc->inner_lock);
+ spin_unlock(&node->lock);
+}
+
+static bool binder_worklist_empty_ilocked(struct list_head *list)
+{
+ return list_empty(list);
+}
+
+/**
+ * binder_worklist_empty() - Check if no items on the work list
+ * @proc: binder_proc associated with list
+ * @list: list to check
+ *
+ * Return: true if there are no items on list, else false
+ */
+static bool binder_worklist_empty(struct binder_proc *proc,
+ struct list_head *list)
+{
+ bool ret;
+
+ binder_inner_proc_lock(proc);
+ ret = binder_worklist_empty_ilocked(list);
+ binder_inner_proc_unlock(proc);
+ return ret;
+}
+
+/**
+ * binder_enqueue_work_ilocked() - Add an item to the work list
+ * @work: struct binder_work to add to list
+ * @target_list: list to add work to
+ *
+ * Adds the work to the specified list. Asserts that work
+ * is not already on a list.
+ *
+ * Requires the proc->inner_lock to be held.
+ */
+static void
+binder_enqueue_work_ilocked(struct binder_work *work,
+ struct list_head *target_list)
+{
+ BUG_ON(target_list == NULL);
+ BUG_ON(work->entry.next && !list_empty(&work->entry));
+ list_add_tail(&work->entry, target_list);
+}
+
+/**
+ * binder_enqueue_deferred_thread_work_ilocked() - Add deferred thread work
+ * @thread: thread to queue work to
+ * @work: struct binder_work to add to list
+ *
+ * Adds the work to the todo list of the thread. Doesn't set the process_todo
+ * flag, which means that (if it wasn't already set) the thread will go to
+ * sleep without handling this work when it calls read.
+ *
+ * Requires the proc->inner_lock to be held.
+ */
+static void
+binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread,
+ struct binder_work *work)
+{
+ WARN_ON(!list_empty(&thread->waiting_thread_node));
+ binder_enqueue_work_ilocked(work, &thread->todo);
+}
+
+/**
+ * binder_enqueue_thread_work_ilocked() - Add an item to the thread work list
+ * @thread: thread to queue work to
+ * @work: struct binder_work to add to list
+ *
+ * Adds the work to the todo list of the thread, and enables processing
+ * of the todo queue.
+ *
+ * Requires the proc->inner_lock to be held.
+ */
+static void
+binder_enqueue_thread_work_ilocked(struct binder_thread *thread,
+ struct binder_work *work)
+{
+ WARN_ON(!list_empty(&thread->waiting_thread_node));
+ binder_enqueue_work_ilocked(work, &thread->todo);
+
+ /* (e)poll-based threads require an explicit wakeup signal when
+ * queuing their own work; they rely on these events to consume
+ * messages without I/O block. Without it, threads risk waiting
+ * indefinitely without handling the work.
+ */
+ if (thread->looper & BINDER_LOOPER_STATE_POLL &&
+ thread->pid == current->pid && !thread->process_todo)
+ wake_up_interruptible_sync(&thread->wait);
+
+ thread->process_todo = true;
+}
+
+/**
+ * binder_enqueue_thread_work() - Add an item to the thread work list
+ * @thread: thread to queue work to
+ * @work: struct binder_work to add to list
+ *
+ * Adds the work to the todo list of the thread, and enables processing
+ * of the todo queue.
+ */
+static void
+binder_enqueue_thread_work(struct binder_thread *thread,
+ struct binder_work *work)
+{
+ binder_inner_proc_lock(thread->proc);
+ binder_enqueue_thread_work_ilocked(thread, work);
+ binder_inner_proc_unlock(thread->proc);
+}
+
+static void
+binder_dequeue_work_ilocked(struct binder_work *work)
+{
+ list_del_init(&work->entry);
+}
+
+/**
+ * binder_dequeue_work() - Removes an item from the work list
+ * @proc: binder_proc associated with list
+ * @work: struct binder_work to remove from list
+ *
+ * Removes the specified work item from whatever list it is on.
+ * Can safely be called if work is not on any list.
+ */
+static void
+binder_dequeue_work(struct binder_proc *proc, struct binder_work *work)
+{
+ binder_inner_proc_lock(proc);
+ binder_dequeue_work_ilocked(work);
+ binder_inner_proc_unlock(proc);
+}
+
+static struct binder_work *binder_dequeue_work_head_ilocked(
+ struct list_head *list)
+{
+ struct binder_work *w;
+
+ w = list_first_entry_or_null(list, struct binder_work, entry);
+ if (w)
+ list_del_init(&w->entry);
+ return w;
+}
+
+static void
+binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer);
+static void binder_free_thread(struct binder_thread *thread);
+static void binder_free_proc(struct binder_proc *proc);
+static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
+
+static bool binder_has_work_ilocked(struct binder_thread *thread,
+ bool do_proc_work)
+{
+ return thread->process_todo ||
+ thread->looper_need_return ||
+ (do_proc_work &&
+ !binder_worklist_empty_ilocked(&thread->proc->todo));
+}
+
+static bool binder_has_work(struct binder_thread *thread, bool do_proc_work)
+{
+ bool has_work;
+
+ binder_inner_proc_lock(thread->proc);
+ has_work = binder_has_work_ilocked(thread, do_proc_work);
+ binder_inner_proc_unlock(thread->proc);
+
+ return has_work;
+}
+
+static bool binder_available_for_proc_work_ilocked(struct binder_thread *thread)
+{
+ return !thread->transaction_stack &&
+ binder_worklist_empty_ilocked(&thread->todo);
+}
+
+static void binder_wakeup_poll_threads_ilocked(struct binder_proc *proc,
+ bool sync)
+{
+ struct rb_node *n;
+ struct binder_thread *thread;
+
+ for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) {
+ thread = rb_entry(n, struct binder_thread, rb_node);
+ if (thread->looper & BINDER_LOOPER_STATE_POLL &&
+ binder_available_for_proc_work_ilocked(thread)) {
+ if (sync)
+ wake_up_interruptible_sync(&thread->wait);
+ else
+ wake_up_interruptible(&thread->wait);
+ }
+ }
+}
+
+/**
+ * binder_select_thread_ilocked() - selects a thread for doing proc work.
+ * @proc: process to select a thread from
+ *
+ * Note that calling this function moves the thread off the waiting_threads
+ * list, so it can only be woken up by the caller of this function, or a
+ * signal. Therefore, callers *should* always wake up the thread this function
+ * returns.
+ *
+ * Return: If there's a thread currently waiting for process work,
+ * returns that thread. Otherwise returns NULL.
+ */
+static struct binder_thread *
+binder_select_thread_ilocked(struct binder_proc *proc)
+{
+ struct binder_thread *thread;
+
+ assert_spin_locked(&proc->inner_lock);
+ thread = list_first_entry_or_null(&proc->waiting_threads,
+ struct binder_thread,
+ waiting_thread_node);
+
+ if (thread)
+ list_del_init(&thread->waiting_thread_node);
+
+ return thread;
+}
+
+/**
+ * binder_wakeup_thread_ilocked() - wakes up a thread for doing proc work.
+ * @proc: process to wake up a thread in
+ * @thread: specific thread to wake-up (may be NULL)
+ * @sync: whether to do a synchronous wake-up
+ *
+ * This function wakes up a thread in the @proc process.
+ * The caller may provide a specific thread to wake-up in
+ * the @thread parameter. If @thread is NULL, this function
+ * will wake up threads that have called poll().
+ *
+ * Note that for this function to work as expected, callers
+ * should first call binder_select_thread() to find a thread
+ * to handle the work (if they don't have a thread already),
+ * and pass the result into the @thread parameter.
+ */
+static void binder_wakeup_thread_ilocked(struct binder_proc *proc,
+ struct binder_thread *thread,
+ bool sync)
+{
+ assert_spin_locked(&proc->inner_lock);
+
+ if (thread) {
+ if (sync)
+ wake_up_interruptible_sync(&thread->wait);
+ else
+ wake_up_interruptible(&thread->wait);
+ return;
+ }
+
+ /* Didn't find a thread waiting for proc work; this can happen
+ * in two scenarios:
+ * 1. All threads are busy handling transactions
+ * In that case, one of those threads should call back into
+ * the kernel driver soon and pick up this work.
+ * 2. Threads are using the (e)poll interface, in which case
+ * they may be blocked on the waitqueue without having been
+ * added to waiting_threads. For this case, we just iterate
+ * over all threads not handling transaction work, and
+ * wake them all up. We wake all because we don't know whether
+ * a thread that called into (e)poll is handling non-binder
+ * work currently.
+ */
+ binder_wakeup_poll_threads_ilocked(proc, sync);
+}
+
+static void binder_wakeup_proc_ilocked(struct binder_proc *proc)
+{
+ struct binder_thread *thread = binder_select_thread_ilocked(proc);
+
+ binder_wakeup_thread_ilocked(proc, thread, /* sync = */false);
+}
+
+static void binder_set_nice(long nice)
+{
+ long min_nice;
+
+ if (can_nice(current, nice)) {
+ set_user_nice(current, nice);
+ return;
+ }
+ min_nice = rlimit_to_nice(rlimit(RLIMIT_NICE));
+ binder_debug(BINDER_DEBUG_PRIORITY_CAP,
+ "%d: nice value %ld not allowed use %ld instead\n",
+ current->pid, nice, min_nice);
+ set_user_nice(current, min_nice);
+ if (min_nice <= MAX_NICE)
+ return;
+ binder_user_error("%d RLIMIT_NICE not set\n", current->pid);
+}
+
+static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc,
+ binder_uintptr_t ptr)
+{
+ struct rb_node *n = proc->nodes.rb_node;
+ struct binder_node *node;
+
+ assert_spin_locked(&proc->inner_lock);
+
+ while (n) {
+ node = rb_entry(n, struct binder_node, rb_node);
+
+ if (ptr < node->ptr)
+ n = n->rb_left;
+ else if (ptr > node->ptr)
+ n = n->rb_right;
+ else {
+ /*
+ * take an implicit weak reference
+ * to ensure node stays alive until
+ * call to binder_put_node()
+ */
+ binder_inc_node_tmpref_ilocked(node);
+ return node;
+ }
+ }
+ return NULL;
+}
+
+static struct binder_node *binder_get_node(struct binder_proc *proc,
+ binder_uintptr_t ptr)
+{
+ struct binder_node *node;
+
+ binder_inner_proc_lock(proc);
+ node = binder_get_node_ilocked(proc, ptr);
+ binder_inner_proc_unlock(proc);
+ return node;
+}
+
+static struct binder_node *binder_init_node_ilocked(
+ struct binder_proc *proc,
+ struct binder_node *new_node,
+ struct flat_binder_object *fp)
+{
+ struct rb_node **p = &proc->nodes.rb_node;
+ struct rb_node *parent = NULL;
+ struct binder_node *node;
+ binder_uintptr_t ptr = fp ? fp->binder : 0;
+ binder_uintptr_t cookie = fp ? fp->cookie : 0;
+ __u32 flags = fp ? fp->flags : 0;
+
+ assert_spin_locked(&proc->inner_lock);
+
+ while (*p) {
+
+ parent = *p;
+ node = rb_entry(parent, struct binder_node, rb_node);
+
+ if (ptr < node->ptr)
+ p = &(*p)->rb_left;
+ else if (ptr > node->ptr)
+ p = &(*p)->rb_right;
+ else {
+ /*
+ * A matching node is already in
+ * the rb tree. Abandon the init
+ * and return it.
+ */
+ binder_inc_node_tmpref_ilocked(node);
+ return node;
+ }
+ }
+ node = new_node;
+ binder_stats_created(BINDER_STAT_NODE);
+ node->tmp_refs++;
+ rb_link_node(&node->rb_node, parent, p);
+ rb_insert_color(&node->rb_node, &proc->nodes);
+ node->debug_id = atomic_inc_return(&binder_last_id);
+ node->proc = proc;
+ node->ptr = ptr;
+ node->cookie = cookie;
+ node->work.type = BINDER_WORK_NODE;
+ node->min_priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK;
+ node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS);
+ node->txn_security_ctx = !!(flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX);
+ spin_lock_init(&node->lock);
+ INIT_LIST_HEAD(&node->work.entry);
+ INIT_LIST_HEAD(&node->async_todo);
+ binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+ "%d:%d node %d u%016llx c%016llx created\n",
+ proc->pid, current->pid, node->debug_id,
+ (u64)node->ptr, (u64)node->cookie);
+
+ return node;
+}
+
+static struct binder_node *binder_new_node(struct binder_proc *proc,
+ struct flat_binder_object *fp)
+{
+ struct binder_node *node;
+ struct binder_node *new_node = kzalloc(sizeof(*node), GFP_KERNEL);
+
+ if (!new_node)
+ return NULL;
+ binder_inner_proc_lock(proc);
+ node = binder_init_node_ilocked(proc, new_node, fp);
+ binder_inner_proc_unlock(proc);
+ if (node != new_node)
+ /*
+ * The node was already added by another thread
+ */
+ kfree(new_node);
+
+ return node;
+}
+
+static void binder_free_node(struct binder_node *node)
+{
+ kfree(node);
+ binder_stats_deleted(BINDER_STAT_NODE);
+}
+
+static int binder_inc_node_nilocked(struct binder_node *node, int strong,
+ int internal,
+ struct list_head *target_list)
+{
+ struct binder_proc *proc = node->proc;
+
+ assert_spin_locked(&node->lock);
+ if (proc)
+ assert_spin_locked(&proc->inner_lock);
+ if (strong) {
+ if (internal) {
+ if (target_list == NULL &&
+ node->internal_strong_refs == 0 &&
+ !(node->proc &&
+ node == node->proc->context->binder_context_mgr_node &&
+ node->has_strong_ref)) {
+ pr_err("invalid inc strong node for %d\n",
+ node->debug_id);
+ return -EINVAL;
+ }
+ node->internal_strong_refs++;
+ } else
+ node->local_strong_refs++;
+ if (!node->has_strong_ref && target_list) {
+ struct binder_thread *thread = container_of(target_list,
+ struct binder_thread, todo);
+ binder_dequeue_work_ilocked(&node->work);
+ BUG_ON(&thread->todo != target_list);
+ binder_enqueue_deferred_thread_work_ilocked(thread,
+ &node->work);
+ }
+ } else {
+ if (!internal)
+ node->local_weak_refs++;
+ if (!node->has_weak_ref && target_list && list_empty(&node->work.entry))
+ binder_enqueue_work_ilocked(&node->work, target_list);
+ }
+ return 0;
+}
+
+static int binder_inc_node(struct binder_node *node, int strong, int internal,
+ struct list_head *target_list)
+{
+ int ret;
+
+ binder_node_inner_lock(node);
+ ret = binder_inc_node_nilocked(node, strong, internal, target_list);
+ binder_node_inner_unlock(node);
+
+ return ret;
+}
+
+static bool binder_dec_node_nilocked(struct binder_node *node,
+ int strong, int internal)
+{
+ struct binder_proc *proc = node->proc;
+
+ assert_spin_locked(&node->lock);
+ if (proc)
+ assert_spin_locked(&proc->inner_lock);
+ if (strong) {
+ if (internal)
+ node->internal_strong_refs--;
+ else
+ node->local_strong_refs--;
+ if (node->local_strong_refs || node->internal_strong_refs)
+ return false;
+ } else {
+ if (!internal)
+ node->local_weak_refs--;
+ if (node->local_weak_refs || node->tmp_refs ||
+ !hlist_empty(&node->refs))
+ return false;
+ }
+
+ if (proc && (node->has_strong_ref || node->has_weak_ref)) {
+ if (list_empty(&node->work.entry)) {
+ binder_enqueue_work_ilocked(&node->work, &proc->todo);
+ binder_wakeup_proc_ilocked(proc);
+ }
+ } else {
+ if (hlist_empty(&node->refs) && !node->local_strong_refs &&
+ !node->local_weak_refs && !node->tmp_refs) {
+ if (proc) {
+ binder_dequeue_work_ilocked(&node->work);
+ rb_erase(&node->rb_node, &proc->nodes);
+ binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+ "refless node %d deleted\n",
+ node->debug_id);
+ } else {
+ BUG_ON(!list_empty(&node->work.entry));
+ spin_lock(&binder_dead_nodes_lock);
+ /*
+ * tmp_refs could have changed so
+ * check it again
+ */
+ if (node->tmp_refs) {
+ spin_unlock(&binder_dead_nodes_lock);
+ return false;
+ }
+ hlist_del(&node->dead_node);
+ spin_unlock(&binder_dead_nodes_lock);
+ binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+ "dead node %d deleted\n",
+ node->debug_id);
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+static void binder_dec_node(struct binder_node *node, int strong, int internal)
+{
+ bool free_node;
+
+ binder_node_inner_lock(node);
+ free_node = binder_dec_node_nilocked(node, strong, internal);
+ binder_node_inner_unlock(node);
+ if (free_node)
+ binder_free_node(node);
+}
+
+static void binder_inc_node_tmpref_ilocked(struct binder_node *node)
+{
+ /*
+ * No call to binder_inc_node() is needed since we
+ * don't need to inform userspace of any changes to
+ * tmp_refs
+ */
+ node->tmp_refs++;
+}
+
+/**
+ * binder_inc_node_tmpref() - take a temporary reference on node
+ * @node: node to reference
+ *
+ * Take reference on node to prevent the node from being freed
+ * while referenced only by a local variable. The inner lock is
+ * needed to serialize with the node work on the queue (which
+ * isn't needed after the node is dead). If the node is dead
+ * (node->proc is NULL), use binder_dead_nodes_lock to protect
+ * node->tmp_refs against dead-node-only cases where the node
+ * lock cannot be acquired (eg traversing the dead node list to
+ * print nodes)
+ */
+static void binder_inc_node_tmpref(struct binder_node *node)
+{
+ binder_node_lock(node);
+ if (node->proc)
+ binder_inner_proc_lock(node->proc);
+ else
+ spin_lock(&binder_dead_nodes_lock);
+ binder_inc_node_tmpref_ilocked(node);
+ if (node->proc)
+ binder_inner_proc_unlock(node->proc);
+ else
+ spin_unlock(&binder_dead_nodes_lock);
+ binder_node_unlock(node);
+}
+
+/**
+ * binder_dec_node_tmpref() - remove a temporary reference on node
+ * @node: node to reference
+ *
+ * Release temporary reference on node taken via binder_inc_node_tmpref()
+ */
+static void binder_dec_node_tmpref(struct binder_node *node)
+{
+ bool free_node;
+
+ binder_node_inner_lock(node);
+ if (!node->proc)
+ spin_lock(&binder_dead_nodes_lock);
+ else
+ __acquire(&binder_dead_nodes_lock);
+ node->tmp_refs--;
+ BUG_ON(node->tmp_refs < 0);
+ if (!node->proc)
+ spin_unlock(&binder_dead_nodes_lock);
+ else
+ __release(&binder_dead_nodes_lock);
+ /*
+ * Call binder_dec_node() to check if all refcounts are 0
+ * and cleanup is needed. Calling with strong=0 and internal=1
+ * causes no actual reference to be released in binder_dec_node().
+ * If that changes, a change is needed here too.
+ */
+ free_node = binder_dec_node_nilocked(node, 0, 1);
+ binder_node_inner_unlock(node);
+ if (free_node)
+ binder_free_node(node);
+}
+
+static void binder_put_node(struct binder_node *node)
+{
+ binder_dec_node_tmpref(node);
+}
+
+static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc,
+ u32 desc, bool need_strong_ref)
+{
+ struct rb_node *n = proc->refs_by_desc.rb_node;
+ struct binder_ref *ref;
+
+ while (n) {
+ ref = rb_entry(n, struct binder_ref, rb_node_desc);
+
+ if (desc < ref->data.desc) {
+ n = n->rb_left;
+ } else if (desc > ref->data.desc) {
+ n = n->rb_right;
+ } else if (need_strong_ref && !ref->data.strong) {
+ binder_user_error("tried to use weak ref as strong ref\n");
+ return NULL;
+ } else {
+ return ref;
+ }
+ }
+ return NULL;
+}
+
+/* Find the smallest unused descriptor the "slow way" */
+static u32 slow_desc_lookup_olocked(struct binder_proc *proc, u32 offset)
+{
+ struct binder_ref *ref;
+ struct rb_node *n;
+ u32 desc;
+
+ desc = offset;
+ for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) {
+ ref = rb_entry(n, struct binder_ref, rb_node_desc);
+ if (ref->data.desc > desc)
+ break;
+ desc = ref->data.desc + 1;
+ }
+
+ return desc;
+}
+
+/*
+ * Find an available reference descriptor ID. The proc->outer_lock might
+ * be released in the process, in which case -EAGAIN is returned and the
+ * @desc should be considered invalid.
+ */
+static int get_ref_desc_olocked(struct binder_proc *proc,
+ struct binder_node *node,
+ u32 *desc)
+{
+ struct dbitmap *dmap = &proc->dmap;
+ unsigned int nbits, offset;
+ unsigned long *new, bit;
+
+ /* 0 is reserved for the context manager */
+ offset = (node == proc->context->binder_context_mgr_node) ? 0 : 1;
+
+ if (!dbitmap_enabled(dmap)) {
+ *desc = slow_desc_lookup_olocked(proc, offset);
+ return 0;
+ }
+
+ if (dbitmap_acquire_next_zero_bit(dmap, offset, &bit) == 0) {
+ *desc = bit;
+ return 0;
+ }
+
+ /*
+ * The dbitmap is full and needs to grow. The proc->outer_lock
+ * is briefly released to allocate the new bitmap safely.
+ */
+ nbits = dbitmap_grow_nbits(dmap);
+ binder_proc_unlock(proc);
+ new = bitmap_zalloc(nbits, GFP_KERNEL);
+ binder_proc_lock(proc);
+ dbitmap_grow(dmap, new, nbits);
+
+ return -EAGAIN;
+}
+
+/**
+ * binder_get_ref_for_node_olocked() - get the ref associated with given node
+ * @proc: binder_proc that owns the ref
+ * @node: binder_node of target
+ * @new_ref: newly allocated binder_ref to be initialized or %NULL
+ *
+ * Look up the ref for the given node and return it if it exists
+ *
+ * If it doesn't exist and the caller provides a newly allocated
+ * ref, initialize the fields of the newly allocated ref and insert
+ * into the given proc rb_trees and node refs list.
+ *
+ * Return: the ref for node. It is possible that another thread
+ * allocated/initialized the ref first in which case the
+ * returned ref would be different than the passed-in
+ * new_ref. new_ref must be kfree'd by the caller in
+ * this case.
+ */
+static struct binder_ref *binder_get_ref_for_node_olocked(
+ struct binder_proc *proc,
+ struct binder_node *node,
+ struct binder_ref *new_ref)
+{
+ struct binder_ref *ref;
+ struct rb_node *parent;
+ struct rb_node **p;
+ u32 desc;
+
+retry:
+ p = &proc->refs_by_node.rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ ref = rb_entry(parent, struct binder_ref, rb_node_node);
+
+ if (node < ref->node)
+ p = &(*p)->rb_left;
+ else if (node > ref->node)
+ p = &(*p)->rb_right;
+ else
+ return ref;
+ }
+ if (!new_ref)
+ return NULL;
+
+ /* might release the proc->outer_lock */
+ if (get_ref_desc_olocked(proc, node, &desc) == -EAGAIN)
+ goto retry;
+
+ binder_stats_created(BINDER_STAT_REF);
+ new_ref->data.debug_id = atomic_inc_return(&binder_last_id);
+ new_ref->proc = proc;
+ new_ref->node = node;
+ rb_link_node(&new_ref->rb_node_node, parent, p);
+ rb_insert_color(&new_ref->rb_node_node, &proc->refs_by_node);
+
+ new_ref->data.desc = desc;
+ p = &proc->refs_by_desc.rb_node;
+ while (*p) {
+ parent = *p;
+ ref = rb_entry(parent, struct binder_ref, rb_node_desc);
+
+ if (new_ref->data.desc < ref->data.desc)
+ p = &(*p)->rb_left;
+ else if (new_ref->data.desc > ref->data.desc)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+ rb_link_node(&new_ref->rb_node_desc, parent, p);
+ rb_insert_color(&new_ref->rb_node_desc, &proc->refs_by_desc);
+
+ binder_node_lock(node);
+ hlist_add_head(&new_ref->node_entry, &node->refs);
+
+ binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+ "%d new ref %d desc %d for node %d\n",
+ proc->pid, new_ref->data.debug_id, new_ref->data.desc,
+ node->debug_id);
+ binder_node_unlock(node);
+ return new_ref;
+}
+
+static void binder_cleanup_ref_olocked(struct binder_ref *ref)
+{
+ struct dbitmap *dmap = &ref->proc->dmap;
+ bool delete_node = false;
+
+ binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+ "%d delete ref %d desc %d for node %d\n",
+ ref->proc->pid, ref->data.debug_id, ref->data.desc,
+ ref->node->debug_id);
+
+ if (dbitmap_enabled(dmap))
+ dbitmap_clear_bit(dmap, ref->data.desc);
+ rb_erase(&ref->rb_node_desc, &ref->proc->refs_by_desc);
+ rb_erase(&ref->rb_node_node, &ref->proc->refs_by_node);
+
+ binder_node_inner_lock(ref->node);
+ if (ref->data.strong)
+ binder_dec_node_nilocked(ref->node, 1, 1);
+
+ hlist_del(&ref->node_entry);
+ delete_node = binder_dec_node_nilocked(ref->node, 0, 1);
+ binder_node_inner_unlock(ref->node);
+ /*
+ * Clear ref->node unless we want the caller to free the node
+ */
+ if (!delete_node) {
+ /*
+ * The caller uses ref->node to determine
+ * whether the node needs to be freed. Clear
+ * it since the node is still alive.
+ */
+ ref->node = NULL;
+ }
+
+ if (ref->death) {
+ binder_debug(BINDER_DEBUG_DEAD_BINDER,
+ "%d delete ref %d desc %d has death notification\n",
+ ref->proc->pid, ref->data.debug_id,
+ ref->data.desc);
+ binder_dequeue_work(ref->proc, &ref->death->work);
+ binder_stats_deleted(BINDER_STAT_DEATH);
+ }
+
+ if (ref->freeze) {
+ binder_dequeue_work(ref->proc, &ref->freeze->work);
+ binder_stats_deleted(BINDER_STAT_FREEZE);
+ }
+
+ binder_stats_deleted(BINDER_STAT_REF);
+}
+
+/**
+ * binder_inc_ref_olocked() - increment the ref for given handle
+ * @ref: ref to be incremented
+ * @strong: if true, strong increment, else weak
+ * @target_list: list to queue node work on
+ *
+ * Increment the ref. @ref->proc->outer_lock must be held on entry
+ *
+ * Return: 0, if successful, else errno
+ */
+static int binder_inc_ref_olocked(struct binder_ref *ref, int strong,
+ struct list_head *target_list)
+{
+ int ret;
+
+ if (strong) {
+ if (ref->data.strong == 0) {
+ ret = binder_inc_node(ref->node, 1, 1, target_list);
+ if (ret)
+ return ret;
+ }
+ ref->data.strong++;
+ } else {
+ if (ref->data.weak == 0) {
+ ret = binder_inc_node(ref->node, 0, 1, target_list);
+ if (ret)
+ return ret;
+ }
+ ref->data.weak++;
+ }
+ return 0;
+}
+
+/**
+ * binder_dec_ref_olocked() - dec the ref for given handle
+ * @ref: ref to be decremented
+ * @strong: if true, strong decrement, else weak
+ *
+ * Decrement the ref.
+ *
+ * Return: %true if ref is cleaned up and ready to be freed.
+ */
+static bool binder_dec_ref_olocked(struct binder_ref *ref, int strong)
+{
+ if (strong) {
+ if (ref->data.strong == 0) {
+ binder_user_error("%d invalid dec strong, ref %d desc %d s %d w %d\n",
+ ref->proc->pid, ref->data.debug_id,
+ ref->data.desc, ref->data.strong,
+ ref->data.weak);
+ return false;
+ }
+ ref->data.strong--;
+ if (ref->data.strong == 0)
+ binder_dec_node(ref->node, strong, 1);
+ } else {
+ if (ref->data.weak == 0) {
+ binder_user_error("%d invalid dec weak, ref %d desc %d s %d w %d\n",
+ ref->proc->pid, ref->data.debug_id,
+ ref->data.desc, ref->data.strong,
+ ref->data.weak);
+ return false;
+ }
+ ref->data.weak--;
+ }
+ if (ref->data.strong == 0 && ref->data.weak == 0) {
+ binder_cleanup_ref_olocked(ref);
+ return true;
+ }
+ return false;
+}
+
+/**
+ * binder_get_node_from_ref() - get the node from the given proc/desc
+ * @proc: proc containing the ref
+ * @desc: the handle associated with the ref
+ * @need_strong_ref: if true, only return node if ref is strong
+ * @rdata: the id/refcount data for the ref
+ *
+ * Given a proc and ref handle, return the associated binder_node
+ *
+ * Return: a binder_node or NULL if not found or not strong when strong required
+ */
+static struct binder_node *binder_get_node_from_ref(
+ struct binder_proc *proc,
+ u32 desc, bool need_strong_ref,
+ struct binder_ref_data *rdata)
+{
+ struct binder_node *node;
+ struct binder_ref *ref;
+
+ binder_proc_lock(proc);
+ ref = binder_get_ref_olocked(proc, desc, need_strong_ref);
+ if (!ref)
+ goto err_no_ref;
+ node = ref->node;
+ /*
+ * Take an implicit reference on the node to ensure
+ * it stays alive until the call to binder_put_node()
+ */
+ binder_inc_node_tmpref(node);
+ if (rdata)
+ *rdata = ref->data;
+ binder_proc_unlock(proc);
+
+ return node;
+
+err_no_ref:
+ binder_proc_unlock(proc);
+ return NULL;
+}
+
+/**
+ * binder_free_ref() - free the binder_ref
+ * @ref: ref to free
+ *
+ * Free the binder_ref. Free the binder_node indicated by ref->node
+ * (if non-NULL) and the binder_ref_death indicated by ref->death.
+ */
+static void binder_free_ref(struct binder_ref *ref)
+{
+ if (ref->node)
+ binder_free_node(ref->node);
+ kfree(ref->death);
+ kfree(ref->freeze);
+ kfree(ref);
+}
+
+/* shrink descriptor bitmap if needed */
+static void try_shrink_dmap(struct binder_proc *proc)
+{
+ unsigned long *new;
+ int nbits;
+
+ binder_proc_lock(proc);
+ nbits = dbitmap_shrink_nbits(&proc->dmap);
+ binder_proc_unlock(proc);
+
+ if (!nbits)
+ return;
+
+ new = bitmap_zalloc(nbits, GFP_KERNEL);
+ binder_proc_lock(proc);
+ dbitmap_shrink(&proc->dmap, new, nbits);
+ binder_proc_unlock(proc);
+}
+
+/**
+ * binder_update_ref_for_handle() - inc/dec the ref for given handle
+ * @proc: proc containing the ref
+ * @desc: the handle associated with the ref
+ * @increment: true=inc reference, false=dec reference
+ * @strong: true=strong reference, false=weak reference
+ * @rdata: the id/refcount data for the ref
+ *
+ * Given a proc and ref handle, increment or decrement the ref
+ * according to "increment" arg.
+ *
+ * Return: 0 if successful, else errno
+ */
+static int binder_update_ref_for_handle(struct binder_proc *proc,
+ uint32_t desc, bool increment, bool strong,
+ struct binder_ref_data *rdata)
+{
+ int ret = 0;
+ struct binder_ref *ref;
+ bool delete_ref = false;
+
+ binder_proc_lock(proc);
+ ref = binder_get_ref_olocked(proc, desc, strong);
+ if (!ref) {
+ ret = -EINVAL;
+ goto err_no_ref;
+ }
+ if (increment)
+ ret = binder_inc_ref_olocked(ref, strong, NULL);
+ else
+ delete_ref = binder_dec_ref_olocked(ref, strong);
+
+ if (rdata)
+ *rdata = ref->data;
+ binder_proc_unlock(proc);
+
+ if (delete_ref) {
+ binder_free_ref(ref);
+ try_shrink_dmap(proc);
+ }
+ return ret;
+
+err_no_ref:
+ binder_proc_unlock(proc);
+ return ret;
+}
+
+/**
+ * binder_dec_ref_for_handle() - dec the ref for given handle
+ * @proc: proc containing the ref
+ * @desc: the handle associated with the ref
+ * @strong: true=strong reference, false=weak reference
+ * @rdata: the id/refcount data for the ref
+ *
+ * Just calls binder_update_ref_for_handle() to decrement the ref.
+ *
+ * Return: 0 if successful, else errno
+ */
+static int binder_dec_ref_for_handle(struct binder_proc *proc,
+ uint32_t desc, bool strong, struct binder_ref_data *rdata)
+{
+ return binder_update_ref_for_handle(proc, desc, false, strong, rdata);
+}
+
+
+/**
+ * binder_inc_ref_for_node() - increment the ref for given proc/node
+ * @proc: proc containing the ref
+ * @node: target node
+ * @strong: true=strong reference, false=weak reference
+ * @target_list: worklist to use if node is incremented
+ * @rdata: the id/refcount data for the ref
+ *
+ * Given a proc and node, increment the ref. Create the ref if it
+ * doesn't already exist
+ *
+ * Return: 0 if successful, else errno
+ */
+static int binder_inc_ref_for_node(struct binder_proc *proc,
+ struct binder_node *node,
+ bool strong,
+ struct list_head *target_list,
+ struct binder_ref_data *rdata)
+{
+ struct binder_ref *ref;
+ struct binder_ref *new_ref = NULL;
+ int ret = 0;
+
+ binder_proc_lock(proc);
+ ref = binder_get_ref_for_node_olocked(proc, node, NULL);
+ if (!ref) {
+ binder_proc_unlock(proc);
+ new_ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+ if (!new_ref)
+ return -ENOMEM;
+ binder_proc_lock(proc);
+ ref = binder_get_ref_for_node_olocked(proc, node, new_ref);
+ }
+ ret = binder_inc_ref_olocked(ref, strong, target_list);
+ *rdata = ref->data;
+ if (ret && ref == new_ref) {
+ /*
+ * Cleanup the failed reference here as the target
+ * could now be dead and have already released its
+ * references by now. Calling on the new reference
+ * with strong=0 and a tmp_refs will not decrement
+ * the node. The new_ref gets kfree'd below.
+ */
+ binder_cleanup_ref_olocked(new_ref);
+ ref = NULL;
+ }
+
+ binder_proc_unlock(proc);
+ if (new_ref && ref != new_ref)
+ /*
+ * Another thread created the ref first so
+ * free the one we allocated
+ */
+ kfree(new_ref);
+ return ret;
+}
+
+static void binder_pop_transaction_ilocked(struct binder_thread *target_thread,
+ struct binder_transaction *t)
+{
+ BUG_ON(!target_thread);
+ assert_spin_locked(&target_thread->proc->inner_lock);
+ BUG_ON(target_thread->transaction_stack != t);
+ BUG_ON(target_thread->transaction_stack->from != target_thread);
+ target_thread->transaction_stack =
+ target_thread->transaction_stack->from_parent;
+ t->from = NULL;
+}
+
+/**
+ * binder_thread_dec_tmpref() - decrement thread->tmp_ref
+ * @thread: thread to decrement
+ *
+ * A thread needs to be kept alive while being used to create or
+ * handle a transaction. binder_get_txn_from() is used to safely
+ * extract t->from from a binder_transaction and keep the thread
+ * indicated by t->from from being freed. When done with that
+ * binder_thread, this function is called to decrement the
+ * tmp_ref and free if appropriate (thread has been released
+ * and no transaction being processed by the driver)
+ */
+static void binder_thread_dec_tmpref(struct binder_thread *thread)
+{
+ /*
+ * atomic is used to protect the counter value while
+ * it cannot reach zero or thread->is_dead is false
+ */
+ binder_inner_proc_lock(thread->proc);
+ atomic_dec(&thread->tmp_ref);
+ if (thread->is_dead && !atomic_read(&thread->tmp_ref)) {
+ binder_inner_proc_unlock(thread->proc);
+ binder_free_thread(thread);
+ return;
+ }
+ binder_inner_proc_unlock(thread->proc);
+}
+
+/**
+ * binder_proc_dec_tmpref() - decrement proc->tmp_ref
+ * @proc: proc to decrement
+ *
+ * A binder_proc needs to be kept alive while being used to create or
+ * handle a transaction. proc->tmp_ref is incremented when
+ * creating a new transaction or the binder_proc is currently in-use
+ * by threads that are being released. When done with the binder_proc,
+ * this function is called to decrement the counter and free the
+ * proc if appropriate (proc has been released, all threads have
+ * been released and not currently in-use to process a transaction).
+ */
+static void binder_proc_dec_tmpref(struct binder_proc *proc)
+{
+ binder_inner_proc_lock(proc);
+ proc->tmp_ref--;
+ if (proc->is_dead && RB_EMPTY_ROOT(&proc->threads) &&
+ !proc->tmp_ref) {
+ binder_inner_proc_unlock(proc);
+ binder_free_proc(proc);
+ return;
+ }
+ binder_inner_proc_unlock(proc);
+}
+
+/**
+ * binder_get_txn_from() - safely extract the "from" thread in transaction
+ * @t: binder transaction for t->from
+ *
+ * Atomically return the "from" thread and increment the tmp_ref
+ * count for the thread to ensure it stays alive until
+ * binder_thread_dec_tmpref() is called.
+ *
+ * Return: the value of t->from
+ */
+static struct binder_thread *binder_get_txn_from(
+ struct binder_transaction *t)
+{
+ struct binder_thread *from;
+
+ guard(spinlock)(&t->lock);
+ from = t->from;
+ if (from)
+ atomic_inc(&from->tmp_ref);
+ return from;
+}
+
+/**
+ * binder_get_txn_from_and_acq_inner() - get t->from and acquire inner lock
+ * @t: binder transaction for t->from
+ *
+ * Same as binder_get_txn_from() except it also acquires the proc->inner_lock
+ * to guarantee that the thread cannot be released while operating on it.
+ * The caller must call binder_inner_proc_unlock() to release the inner lock
+ * as well as call binder_dec_thread_txn() to release the reference.
+ *
+ * Return: the value of t->from
+ */
+static struct binder_thread *binder_get_txn_from_and_acq_inner(
+ struct binder_transaction *t)
+ __acquires(&t->from->proc->inner_lock)
+{
+ struct binder_thread *from;
+
+ from = binder_get_txn_from(t);
+ if (!from) {
+ __acquire(&from->proc->inner_lock);
+ return NULL;
+ }
+ binder_inner_proc_lock(from->proc);
+ if (t->from) {
+ BUG_ON(from != t->from);
+ return from;
+ }
+ binder_inner_proc_unlock(from->proc);
+ __acquire(&from->proc->inner_lock);
+ binder_thread_dec_tmpref(from);
+ return NULL;
+}
+
+/**
+ * binder_free_txn_fixups() - free unprocessed fd fixups
+ * @t: binder transaction for t->from
+ *
+ * If the transaction is being torn down prior to being
+ * processed by the target process, free all of the
+ * fd fixups and fput the file structs. It is safe to
+ * call this function after the fixups have been
+ * processed -- in that case, the list will be empty.
+ */
+static void binder_free_txn_fixups(struct binder_transaction *t)
+{
+ struct binder_txn_fd_fixup *fixup, *tmp;
+
+ list_for_each_entry_safe(fixup, tmp, &t->fd_fixups, fixup_entry) {
+ fput(fixup->file);
+ if (fixup->target_fd >= 0)
+ put_unused_fd(fixup->target_fd);
+ list_del(&fixup->fixup_entry);
+ kfree(fixup);
+ }
+}
+
+static void binder_txn_latency_free(struct binder_transaction *t)
+{
+ int from_proc, from_thread, to_proc, to_thread;
+
+ spin_lock(&t->lock);
+ from_proc = t->from ? t->from->proc->pid : 0;
+ from_thread = t->from ? t->from->pid : 0;
+ to_proc = t->to_proc ? t->to_proc->pid : 0;
+ to_thread = t->to_thread ? t->to_thread->pid : 0;
+ spin_unlock(&t->lock);
+
+ trace_binder_txn_latency_free(t, from_proc, from_thread, to_proc, to_thread);
+}
+
+static void binder_free_transaction(struct binder_transaction *t)
+{
+ struct binder_proc *target_proc = t->to_proc;
+
+ if (target_proc) {
+ binder_inner_proc_lock(target_proc);
+ target_proc->outstanding_txns--;
+ if (target_proc->outstanding_txns < 0)
+ pr_warn("%s: Unexpected outstanding_txns %d\n",
+ __func__, target_proc->outstanding_txns);
+ if (!target_proc->outstanding_txns && target_proc->is_frozen)
+ wake_up_interruptible_all(&target_proc->freeze_wait);
+ if (t->buffer)
+ t->buffer->transaction = NULL;
+ binder_inner_proc_unlock(target_proc);
+ }
+ if (trace_binder_txn_latency_free_enabled())
+ binder_txn_latency_free(t);
+ /*
+ * If the transaction has no target_proc, then
+ * t->buffer->transaction has already been cleared.
+ */
+ binder_free_txn_fixups(t);
+ kfree(t);
+ binder_stats_deleted(BINDER_STAT_TRANSACTION);
+}
+
+static void binder_send_failed_reply(struct binder_transaction *t,
+ uint32_t error_code)
+{
+ struct binder_thread *target_thread;
+ struct binder_transaction *next;
+
+ BUG_ON(t->flags & TF_ONE_WAY);
+ while (1) {
+ target_thread = binder_get_txn_from_and_acq_inner(t);
+ if (target_thread) {
+ binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
+ "send failed reply for transaction %d to %d:%d\n",
+ t->debug_id,
+ target_thread->proc->pid,
+ target_thread->pid);
+
+ binder_pop_transaction_ilocked(target_thread, t);
+ if (target_thread->reply_error.cmd == BR_OK) {
+ target_thread->reply_error.cmd = error_code;
+ binder_enqueue_thread_work_ilocked(
+ target_thread,
+ &target_thread->reply_error.work);
+ wake_up_interruptible(&target_thread->wait);
+ } else {
+ /*
+ * Cannot get here for normal operation, but
+ * we can if multiple synchronous transactions
+ * are sent without blocking for responses.
+ * Just ignore the 2nd error in this case.
+ */
+ pr_warn("Unexpected reply error: %u\n",
+ target_thread->reply_error.cmd);
+ }
+ binder_inner_proc_unlock(target_thread->proc);
+ binder_thread_dec_tmpref(target_thread);
+ binder_free_transaction(t);
+ return;
+ }
+ __release(&target_thread->proc->inner_lock);
+ next = t->from_parent;
+
+ binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
+ "send failed reply for transaction %d, target dead\n",
+ t->debug_id);
+
+ binder_free_transaction(t);
+ if (next == NULL) {
+ binder_debug(BINDER_DEBUG_DEAD_BINDER,
+ "reply failed, no target thread at root\n");
+ return;
+ }
+ t = next;
+ binder_debug(BINDER_DEBUG_DEAD_BINDER,
+ "reply failed, no target thread -- retry %d\n",
+ t->debug_id);
+ }
+}
+
+/**
+ * binder_cleanup_transaction() - cleans up undelivered transaction
+ * @t: transaction that needs to be cleaned up
+ * @reason: reason the transaction wasn't delivered
+ * @error_code: error to return to caller (if synchronous call)
+ */
+static void binder_cleanup_transaction(struct binder_transaction *t,
+ const char *reason,
+ uint32_t error_code)
+{
+ if (t->buffer->target_node && !(t->flags & TF_ONE_WAY)) {
+ binder_send_failed_reply(t, error_code);
+ } else {
+ binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+ "undelivered transaction %d, %s\n",
+ t->debug_id, reason);
+ binder_free_transaction(t);
+ }
+}
+
+/**
+ * binder_get_object() - gets object and checks for valid metadata
+ * @proc: binder_proc owning the buffer
+ * @u: sender's user pointer to base of buffer
+ * @buffer: binder_buffer that we're parsing.
+ * @offset: offset in the @buffer at which to validate an object.
+ * @object: struct binder_object to read into
+ *
+ * Copy the binder object at the given offset into @object. If @u is
+ * provided then the copy is from the sender's buffer. If not, then
+ * it is copied from the target's @buffer.
+ *
+ * Return: If there's a valid metadata object at @offset, the
+ * size of that object. Otherwise, it returns zero. The object
+ * is read into the struct binder_object pointed to by @object.
+ */
+static size_t binder_get_object(struct binder_proc *proc,
+ const void __user *u,
+ struct binder_buffer *buffer,
+ unsigned long offset,
+ struct binder_object *object)
+{
+ size_t read_size;
+ struct binder_object_header *hdr;
+ size_t object_size = 0;
+
+ read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
+ if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
+ !IS_ALIGNED(offset, sizeof(u32)))
+ return 0;
+
+ if (u) {
+ if (copy_from_user(object, u + offset, read_size))
+ return 0;
+ } else {
+ if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
+ offset, read_size))
+ return 0;
+ }
+
+ /* Ok, now see if we read a complete object. */
+ hdr = &object->hdr;
+ switch (hdr->type) {
+ case BINDER_TYPE_BINDER:
+ case BINDER_TYPE_WEAK_BINDER:
+ case BINDER_TYPE_HANDLE:
+ case BINDER_TYPE_WEAK_HANDLE:
+ object_size = sizeof(struct flat_binder_object);
+ break;
+ case BINDER_TYPE_FD:
+ object_size = sizeof(struct binder_fd_object);
+ break;
+ case BINDER_TYPE_PTR:
+ object_size = sizeof(struct binder_buffer_object);
+ break;
+ case BINDER_TYPE_FDA:
+ object_size = sizeof(struct binder_fd_array_object);
+ break;
+ default:
+ return 0;
+ }
+ if (offset <= buffer->data_size - object_size &&
+ buffer->data_size >= object_size)
+ return object_size;
+ else
+ return 0;
+}
+
+/**
+ * binder_validate_ptr() - validates binder_buffer_object in a binder_buffer.
+ * @proc: binder_proc owning the buffer
+ * @b: binder_buffer containing the object
+ * @object: struct binder_object to read into
+ * @index: index in offset array at which the binder_buffer_object is
+ * located
+ * @start_offset: points to the start of the offset array
+ * @object_offsetp: offset of @object read from @b
+ * @num_valid: the number of valid offsets in the offset array
+ *
+ * Return: If @index is within the valid range of the offset array
+ * described by @start and @num_valid, and if there's a valid
+ * binder_buffer_object at the offset found in index @index
+ * of the offset array, that object is returned. Otherwise,
+ * %NULL is returned.
+ * Note that the offset found in index @index itself is not
+ * verified; this function assumes that @num_valid elements
+ * from @start were previously verified to have valid offsets.
+ * If @object_offsetp is non-NULL, then the offset within
+ * @b is written to it.
+ */
+static struct binder_buffer_object *binder_validate_ptr(
+ struct binder_proc *proc,
+ struct binder_buffer *b,
+ struct binder_object *object,
+ binder_size_t index,
+ binder_size_t start_offset,
+ binder_size_t *object_offsetp,
+ binder_size_t num_valid)
+{
+ size_t object_size;
+ binder_size_t object_offset;
+ unsigned long buffer_offset;
+
+ if (index >= num_valid)
+ return NULL;
+
+ buffer_offset = start_offset + sizeof(binder_size_t) * index;
+ if (binder_alloc_copy_from_buffer(&proc->alloc, &object_offset,
+ b, buffer_offset,
+ sizeof(object_offset)))
+ return NULL;
+ object_size = binder_get_object(proc, NULL, b, object_offset, object);
+ if (!object_size || object->hdr.type != BINDER_TYPE_PTR)
+ return NULL;
+ if (object_offsetp)
+ *object_offsetp = object_offset;
+
+ return &object->bbo;
+}
+
+/**
+ * binder_validate_fixup() - validates pointer/fd fixups happen in order.
+ * @proc: binder_proc owning the buffer
+ * @b: transaction buffer
+ * @objects_start_offset: offset to start of objects buffer
+ * @buffer_obj_offset: offset to binder_buffer_object in which to fix up
+ * @fixup_offset: start offset in @buffer to fix up
+ * @last_obj_offset: offset to last binder_buffer_object that we fixed
+ * @last_min_offset: minimum fixup offset in object at @last_obj_offset
+ *
+ * Return: %true if a fixup in buffer @buffer at offset @offset is
+ * allowed.
+ *
+ * For safety reasons, we only allow fixups inside a buffer to happen
+ * at increasing offsets; additionally, we only allow fixup on the last
+ * buffer object that was verified, or one of its parents.
+ *
+ * Example of what is allowed:
+ *
+ * A
+ * B (parent = A, offset = 0)
+ * C (parent = A, offset = 16)
+ * D (parent = C, offset = 0)
+ * E (parent = A, offset = 32) // min_offset is 16 (C.parent_offset)
+ *
+ * Examples of what is not allowed:
+ *
+ * Decreasing offsets within the same parent:
+ * A
+ * C (parent = A, offset = 16)
+ * B (parent = A, offset = 0) // decreasing offset within A
+ *
+ * Referring to a parent that wasn't the last object or any of its parents:
+ * A
+ * B (parent = A, offset = 0)
+ * C (parent = A, offset = 0)
+ * C (parent = A, offset = 16)
+ * D (parent = B, offset = 0) // B is not A or any of A's parents
+ */
+static bool binder_validate_fixup(struct binder_proc *proc,
+ struct binder_buffer *b,
+ binder_size_t objects_start_offset,
+ binder_size_t buffer_obj_offset,
+ binder_size_t fixup_offset,
+ binder_size_t last_obj_offset,
+ binder_size_t last_min_offset)
+{
+ if (!last_obj_offset) {
+ /* Nothing to fix up in */
+ return false;
+ }
+
+ while (last_obj_offset != buffer_obj_offset) {
+ unsigned long buffer_offset;
+ struct binder_object last_object;
+ struct binder_buffer_object *last_bbo;
+ size_t object_size = binder_get_object(proc, NULL, b,
+ last_obj_offset,
+ &last_object);
+ if (object_size != sizeof(*last_bbo))
+ return false;
+
+ last_bbo = &last_object.bbo;
+ /*
+ * Safe to retrieve the parent of last_obj, since it
+ * was already previously verified by the driver.
+ */
+ if ((last_bbo->flags & BINDER_BUFFER_FLAG_HAS_PARENT) == 0)
+ return false;
+ last_min_offset = last_bbo->parent_offset + sizeof(uintptr_t);
+ buffer_offset = objects_start_offset +
+ sizeof(binder_size_t) * last_bbo->parent;
+ if (binder_alloc_copy_from_buffer(&proc->alloc,
+ &last_obj_offset,
+ b, buffer_offset,
+ sizeof(last_obj_offset)))
+ return false;
+ }
+ return (fixup_offset >= last_min_offset);
+}
+
+/**
+ * struct binder_task_work_cb - for deferred close
+ *
+ * @twork: callback_head for task work
+ * @file: file to close
+ *
+ * Structure to pass task work to be handled after
+ * returning from binder_ioctl() via task_work_add().
+ */
+struct binder_task_work_cb {
+ struct callback_head twork;
+ struct file *file;
+};
+
+/**
+ * binder_do_fd_close() - close list of file descriptors
+ * @twork: callback head for task work
+ *
+ * It is not safe to call ksys_close() during the binder_ioctl()
+ * function if there is a chance that binder's own file descriptor
+ * might be closed. This is to meet the requirements for using
+ * fdget() (see comments for __fget_light()). Therefore use
+ * task_work_add() to schedule the close operation once we have
+ * returned from binder_ioctl(). This function is a callback
+ * for that mechanism and does the actual ksys_close() on the
+ * given file descriptor.
+ */
+static void binder_do_fd_close(struct callback_head *twork)
+{
+ struct binder_task_work_cb *twcb = container_of(twork,
+ struct binder_task_work_cb, twork);
+
+ fput(twcb->file);
+ kfree(twcb);
+}
+
+/**
+ * binder_deferred_fd_close() - schedule a close for the given file-descriptor
+ * @fd: file-descriptor to close
+ *
+ * See comments in binder_do_fd_close(). This function is used to schedule
+ * a file-descriptor to be closed after returning from binder_ioctl().
+ */
+static void binder_deferred_fd_close(int fd)
+{
+ struct binder_task_work_cb *twcb;
+
+ twcb = kzalloc(sizeof(*twcb), GFP_KERNEL);
+ if (!twcb)
+ return;
+ init_task_work(&twcb->twork, binder_do_fd_close);
+ twcb->file = file_close_fd(fd);
+ if (twcb->file) {
+ // pin it until binder_do_fd_close(); see comments there
+ get_file(twcb->file);
+ filp_close(twcb->file, current->files);
+ task_work_add(current, &twcb->twork, TWA_RESUME);
+ } else {
+ kfree(twcb);
+ }
+}
+
+static void binder_transaction_buffer_release(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_buffer *buffer,
+ binder_size_t off_end_offset,
+ bool is_failure)
+{
+ int debug_id = buffer->debug_id;
+ binder_size_t off_start_offset, buffer_offset;
+
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ "%d buffer release %d, size %zd-%zd, failed at %llx\n",
+ proc->pid, buffer->debug_id,
+ buffer->data_size, buffer->offsets_size,
+ (unsigned long long)off_end_offset);
+
+ if (buffer->target_node)
+ binder_dec_node(buffer->target_node, 1, 0);
+
+ off_start_offset = ALIGN(buffer->data_size, sizeof(void *));
+
+ for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
+ buffer_offset += sizeof(binder_size_t)) {
+ struct binder_object_header *hdr;
+ size_t object_size = 0;
+ struct binder_object object;
+ binder_size_t object_offset;
+
+ if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset,
+ buffer, buffer_offset,
+ sizeof(object_offset)))
+ object_size = binder_get_object(proc, NULL, buffer,
+ object_offset, &object);
+ if (object_size == 0) {
+ pr_err("transaction release %d bad object at offset %lld, size %zd\n",
+ debug_id, (u64)object_offset, buffer->data_size);
+ continue;
+ }
+ hdr = &object.hdr;
+ switch (hdr->type) {
+ case BINDER_TYPE_BINDER:
+ case BINDER_TYPE_WEAK_BINDER: {
+ struct flat_binder_object *fp;
+ struct binder_node *node;
+
+ fp = to_flat_binder_object(hdr);
+ node = binder_get_node(proc, fp->binder);
+ if (node == NULL) {
+ pr_err("transaction release %d bad node %016llx\n",
+ debug_id, (u64)fp->binder);
+ break;
+ }
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ " node %d u%016llx\n",
+ node->debug_id, (u64)node->ptr);
+ binder_dec_node(node, hdr->type == BINDER_TYPE_BINDER,
+ 0);
+ binder_put_node(node);
+ } break;
+ case BINDER_TYPE_HANDLE:
+ case BINDER_TYPE_WEAK_HANDLE: {
+ struct flat_binder_object *fp;
+ struct binder_ref_data rdata;
+ int ret;
+
+ fp = to_flat_binder_object(hdr);
+ ret = binder_dec_ref_for_handle(proc, fp->handle,
+ hdr->type == BINDER_TYPE_HANDLE, &rdata);
+
+ if (ret) {
+ pr_err("transaction release %d bad handle %d, ret = %d\n",
+ debug_id, fp->handle, ret);
+ break;
+ }
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ " ref %d desc %d\n",
+ rdata.debug_id, rdata.desc);
+ } break;
+
+ case BINDER_TYPE_FD: {
+ /*
+ * No need to close the file here since user-space
+ * closes it for successfully delivered
+ * transactions. For transactions that weren't
+ * delivered, the new fd was never allocated so
+ * there is no need to close and the fput on the
+ * file is done when the transaction is torn
+ * down.
+ */
+ } break;
+ case BINDER_TYPE_PTR:
+ /*
+ * Nothing to do here, this will get cleaned up when the
+ * transaction buffer gets freed
+ */
+ break;
+ case BINDER_TYPE_FDA: {
+ struct binder_fd_array_object *fda;
+ struct binder_buffer_object *parent;
+ struct binder_object ptr_object;
+ binder_size_t fda_offset;
+ size_t fd_index;
+ binder_size_t fd_buf_size;
+ binder_size_t num_valid;
+
+ if (is_failure) {
+ /*
+ * The fd fixups have not been applied so no
+ * fds need to be closed.
+ */
+ continue;
+ }
+
+ num_valid = (buffer_offset - off_start_offset) /
+ sizeof(binder_size_t);
+ fda = to_binder_fd_array_object(hdr);
+ parent = binder_validate_ptr(proc, buffer, &ptr_object,
+ fda->parent,
+ off_start_offset,
+ NULL,
+ num_valid);
+ if (!parent) {
+ pr_err("transaction release %d bad parent offset\n",
+ debug_id);
+ continue;
+ }
+ fd_buf_size = sizeof(u32) * fda->num_fds;
+ if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
+ pr_err("transaction release %d invalid number of fds (%lld)\n",
+ debug_id, (u64)fda->num_fds);
+ continue;
+ }
+ if (fd_buf_size > parent->length ||
+ fda->parent_offset > parent->length - fd_buf_size) {
+ /* No space for all file descriptors here. */
+ pr_err("transaction release %d not enough space for %lld fds in buffer\n",
+ debug_id, (u64)fda->num_fds);
+ continue;
+ }
+ /*
+ * the source data for binder_buffer_object is visible
+ * to user-space and the @buffer element is the user
+ * pointer to the buffer_object containing the fd_array.
+ * Convert the address to an offset relative to
+ * the base of the transaction buffer.
+ */
+ fda_offset = parent->buffer - buffer->user_data +
+ fda->parent_offset;
+ for (fd_index = 0; fd_index < fda->num_fds;
+ fd_index++) {
+ u32 fd;
+ int err;
+ binder_size_t offset = fda_offset +
+ fd_index * sizeof(fd);
+
+ err = binder_alloc_copy_from_buffer(
+ &proc->alloc, &fd, buffer,
+ offset, sizeof(fd));
+ WARN_ON(err);
+ if (!err) {
+ binder_deferred_fd_close(fd);
+ /*
+ * Need to make sure the thread goes
+ * back to userspace to complete the
+ * deferred close
+ */
+ if (thread)
+ thread->looper_need_return = true;
+ }
+ }
+ } break;
+ default:
+ pr_err("transaction release %d bad object type %x\n",
+ debug_id, hdr->type);
+ break;
+ }
+ }
+}
+
+/* Clean up all the objects in the buffer */
+static inline void binder_release_entire_buffer(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_buffer *buffer,
+ bool is_failure)
+{
+ binder_size_t off_end_offset;
+
+ off_end_offset = ALIGN(buffer->data_size, sizeof(void *));
+ off_end_offset += buffer->offsets_size;
+
+ binder_transaction_buffer_release(proc, thread, buffer,
+ off_end_offset, is_failure);
+}
+
+static int binder_translate_binder(struct flat_binder_object *fp,
+ struct binder_transaction *t,
+ struct binder_thread *thread)
+{
+ struct binder_node *node;
+ struct binder_proc *proc = thread->proc;
+ struct binder_proc *target_proc = t->to_proc;
+ struct binder_ref_data rdata;
+ int ret = 0;
+
+ node = binder_get_node(proc, fp->binder);
+ if (!node) {
+ node = binder_new_node(proc, fp);
+ if (!node)
+ return -ENOMEM;
+ }
+ if (fp->cookie != node->cookie) {
+ binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n",
+ proc->pid, thread->pid, (u64)fp->binder,
+ node->debug_id, (u64)fp->cookie,
+ (u64)node->cookie);
+ ret = -EINVAL;
+ goto done;
+ }
+ if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
+ ret = -EPERM;
+ goto done;
+ }
+
+ ret = binder_inc_ref_for_node(target_proc, node,
+ fp->hdr.type == BINDER_TYPE_BINDER,
+ &thread->todo, &rdata);
+ if (ret)
+ goto done;
+
+ if (fp->hdr.type == BINDER_TYPE_BINDER)
+ fp->hdr.type = BINDER_TYPE_HANDLE;
+ else
+ fp->hdr.type = BINDER_TYPE_WEAK_HANDLE;
+ fp->binder = 0;
+ fp->handle = rdata.desc;
+ fp->cookie = 0;
+
+ trace_binder_transaction_node_to_ref(t, node, &rdata);
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ " node %d u%016llx -> ref %d desc %d\n",
+ node->debug_id, (u64)node->ptr,
+ rdata.debug_id, rdata.desc);
+done:
+ binder_put_node(node);
+ return ret;
+}
+
+static int binder_translate_handle(struct flat_binder_object *fp,
+ struct binder_transaction *t,
+ struct binder_thread *thread)
+{
+ struct binder_proc *proc = thread->proc;
+ struct binder_proc *target_proc = t->to_proc;
+ struct binder_node *node;
+ struct binder_ref_data src_rdata;
+ int ret = 0;
+
+ node = binder_get_node_from_ref(proc, fp->handle,
+ fp->hdr.type == BINDER_TYPE_HANDLE, &src_rdata);
+ if (!node) {
+ binder_user_error("%d:%d got transaction with invalid handle, %d\n",
+ proc->pid, thread->pid, fp->handle);
+ return -EINVAL;
+ }
+ if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
+ ret = -EPERM;
+ goto done;
+ }
+
+ binder_node_lock(node);
+ if (node->proc == target_proc) {
+ if (fp->hdr.type == BINDER_TYPE_HANDLE)
+ fp->hdr.type = BINDER_TYPE_BINDER;
+ else
+ fp->hdr.type = BINDER_TYPE_WEAK_BINDER;
+ fp->binder = node->ptr;
+ fp->cookie = node->cookie;
+ if (node->proc)
+ binder_inner_proc_lock(node->proc);
+ else
+ __acquire(&node->proc->inner_lock);
+ binder_inc_node_nilocked(node,
+ fp->hdr.type == BINDER_TYPE_BINDER,
+ 0, NULL);
+ if (node->proc)
+ binder_inner_proc_unlock(node->proc);
+ else
+ __release(&node->proc->inner_lock);
+ trace_binder_transaction_ref_to_node(t, node, &src_rdata);
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ " ref %d desc %d -> node %d u%016llx\n",
+ src_rdata.debug_id, src_rdata.desc, node->debug_id,
+ (u64)node->ptr);
+ binder_node_unlock(node);
+ } else {
+ struct binder_ref_data dest_rdata;
+
+ binder_node_unlock(node);
+ ret = binder_inc_ref_for_node(target_proc, node,
+ fp->hdr.type == BINDER_TYPE_HANDLE,
+ NULL, &dest_rdata);
+ if (ret)
+ goto done;
+
+ fp->binder = 0;
+ fp->handle = dest_rdata.desc;
+ fp->cookie = 0;
+ trace_binder_transaction_ref_to_ref(t, node, &src_rdata,
+ &dest_rdata);
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ " ref %d desc %d -> ref %d desc %d (node %d)\n",
+ src_rdata.debug_id, src_rdata.desc,
+ dest_rdata.debug_id, dest_rdata.desc,
+ node->debug_id);
+ }
+done:
+ binder_put_node(node);
+ return ret;
+}
+
+static int binder_translate_fd(u32 fd, binder_size_t fd_offset,
+ struct binder_transaction *t,
+ struct binder_thread *thread,
+ struct binder_transaction *in_reply_to)
+{
+ struct binder_proc *proc = thread->proc;
+ struct binder_proc *target_proc = t->to_proc;
+ struct binder_txn_fd_fixup *fixup;
+ struct file *file;
+ int ret = 0;
+ bool target_allows_fd;
+
+ if (in_reply_to)
+ target_allows_fd = !!(in_reply_to->flags & TF_ACCEPT_FDS);
+ else
+ target_allows_fd = t->buffer->target_node->accept_fds;
+ if (!target_allows_fd) {
+ binder_user_error("%d:%d got %s with fd, %d, but target does not allow fds\n",
+ proc->pid, thread->pid,
+ in_reply_to ? "reply" : "transaction",
+ fd);
+ ret = -EPERM;
+ goto err_fd_not_accepted;
+ }
+
+ file = fget(fd);
+ if (!file) {
+ binder_user_error("%d:%d got transaction with invalid fd, %d\n",
+ proc->pid, thread->pid, fd);
+ ret = -EBADF;
+ goto err_fget;
+ }
+ ret = security_binder_transfer_file(proc->cred, target_proc->cred, file);
+ if (ret < 0) {
+ ret = -EPERM;
+ goto err_security;
+ }
+
+ /*
+ * Add fixup record for this transaction. The allocation
+ * of the fd in the target needs to be done from a
+ * target thread.
+ */
+ fixup = kzalloc(sizeof(*fixup), GFP_KERNEL);
+ if (!fixup) {
+ ret = -ENOMEM;
+ goto err_alloc;
+ }
+ fixup->file = file;
+ fixup->offset = fd_offset;
+ fixup->target_fd = -1;
+ trace_binder_transaction_fd_send(t, fd, fixup->offset);
+ list_add_tail(&fixup->fixup_entry, &t->fd_fixups);
+
+ return ret;
+
+err_alloc:
+err_security:
+ fput(file);
+err_fget:
+err_fd_not_accepted:
+ return ret;
+}
+
+/**
+ * struct binder_ptr_fixup - data to be fixed-up in target buffer
+ * @offset: offset in target buffer to fixup
+ * @skip_size: bytes to skip in copy (fixup will be written later)
+ * @fixup_data: data to write at fixup offset
+ * @node: list node
+ *
+ * This is used for the pointer fixup list (pf) which is created and consumed
+ * during binder_transaction() and is only accessed locally. No
+ * locking is necessary.
+ *
+ * The list is ordered by @offset.
+ */
+struct binder_ptr_fixup {
+ binder_size_t offset;
+ size_t skip_size;
+ binder_uintptr_t fixup_data;
+ struct list_head node;
+};
+
+/**
+ * struct binder_sg_copy - scatter-gather data to be copied
+ * @offset: offset in target buffer
+ * @sender_uaddr: user address in source buffer
+ * @length: bytes to copy
+ * @node: list node
+ *
+ * This is used for the sg copy list (sgc) which is created and consumed
+ * during binder_transaction() and is only accessed locally. No
+ * locking is necessary.
+ *
+ * The list is ordered by @offset.
+ */
+struct binder_sg_copy {
+ binder_size_t offset;
+ const void __user *sender_uaddr;
+ size_t length;
+ struct list_head node;
+};
+
+/**
+ * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data
+ * @alloc: binder_alloc associated with @buffer
+ * @buffer: binder buffer in target process
+ * @sgc_head: list_head of scatter-gather copy list
+ * @pf_head: list_head of pointer fixup list
+ *
+ * Processes all elements of @sgc_head, applying fixups from @pf_head
+ * and copying the scatter-gather data from the source process' user
+ * buffer to the target's buffer. It is expected that the list creation
+ * and processing all occurs during binder_transaction() so these lists
+ * are only accessed in local context.
+ *
+ * Return: 0=success, else -errno
+ */
+static int binder_do_deferred_txn_copies(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ struct list_head *sgc_head,
+ struct list_head *pf_head)
+{
+ int ret = 0;
+ struct binder_sg_copy *sgc, *tmpsgc;
+ struct binder_ptr_fixup *tmppf;
+ struct binder_ptr_fixup *pf =
+ list_first_entry_or_null(pf_head, struct binder_ptr_fixup,
+ node);
+
+ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) {
+ size_t bytes_copied = 0;
+
+ while (bytes_copied < sgc->length) {
+ size_t copy_size;
+ size_t bytes_left = sgc->length - bytes_copied;
+ size_t offset = sgc->offset + bytes_copied;
+
+ /*
+ * We copy up to the fixup (pointed to by pf)
+ */
+ copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset)
+ : bytes_left;
+ if (!ret && copy_size)
+ ret = binder_alloc_copy_user_to_buffer(
+ alloc, buffer,
+ offset,
+ sgc->sender_uaddr + bytes_copied,
+ copy_size);
+ bytes_copied += copy_size;
+ if (copy_size != bytes_left) {
+ BUG_ON(!pf);
+ /* we stopped at a fixup offset */
+ if (pf->skip_size) {
+ /*
+ * we are just skipping. This is for
+ * BINDER_TYPE_FDA where the translated
+ * fds will be fixed up when we get
+ * to target context.
+ */
+ bytes_copied += pf->skip_size;
+ } else {
+ /* apply the fixup indicated by pf */
+ if (!ret)
+ ret = binder_alloc_copy_to_buffer(
+ alloc, buffer,
+ pf->offset,
+ &pf->fixup_data,
+ sizeof(pf->fixup_data));
+ bytes_copied += sizeof(pf->fixup_data);
+ }
+ list_del(&pf->node);
+ kfree(pf);
+ pf = list_first_entry_or_null(pf_head,
+ struct binder_ptr_fixup, node);
+ }
+ }
+ list_del(&sgc->node);
+ kfree(sgc);
+ }
+ list_for_each_entry_safe(pf, tmppf, pf_head, node) {
+ BUG_ON(pf->skip_size == 0);
+ list_del(&pf->node);
+ kfree(pf);
+ }
+ BUG_ON(!list_empty(sgc_head));
+
+ return ret > 0 ? -EINVAL : ret;
+}
+
+/**
+ * binder_cleanup_deferred_txn_lists() - free specified lists
+ * @sgc_head: list_head of scatter-gather copy list
+ * @pf_head: list_head of pointer fixup list
+ *
+ * Called to clean up @sgc_head and @pf_head if there is an
+ * error.
+ */
+static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head,
+ struct list_head *pf_head)
+{
+ struct binder_sg_copy *sgc, *tmpsgc;
+ struct binder_ptr_fixup *pf, *tmppf;
+
+ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) {
+ list_del(&sgc->node);
+ kfree(sgc);
+ }
+ list_for_each_entry_safe(pf, tmppf, pf_head, node) {
+ list_del(&pf->node);
+ kfree(pf);
+ }
+}
+
+/**
+ * binder_defer_copy() - queue a scatter-gather buffer for copy
+ * @sgc_head: list_head of scatter-gather copy list
+ * @offset: binder buffer offset in target process
+ * @sender_uaddr: user address in source process
+ * @length: bytes to copy
+ *
+ * Specify a scatter-gather block to be copied. The actual copy must
+ * be deferred until all the needed fixups are identified and queued.
+ * Then the copy and fixups are done together so un-translated values
+ * from the source are never visible in the target buffer.
+ *
+ * We are guaranteed that repeated calls to this function will have
+ * monotonically increasing @offset values so the list will naturally
+ * be ordered.
+ *
+ * Return: 0=success, else -errno
+ */
+static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset,
+ const void __user *sender_uaddr, size_t length)
+{
+ struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL);
+
+ if (!bc)
+ return -ENOMEM;
+
+ bc->offset = offset;
+ bc->sender_uaddr = sender_uaddr;
+ bc->length = length;
+ INIT_LIST_HEAD(&bc->node);
+
+ /*
+ * We are guaranteed that the deferred copies are in-order
+ * so just add to the tail.
+ */
+ list_add_tail(&bc->node, sgc_head);
+
+ return 0;
+}
+
+/**
+ * binder_add_fixup() - queue a fixup to be applied to sg copy
+ * @pf_head: list_head of binder ptr fixup list
+ * @offset: binder buffer offset in target process
+ * @fixup: bytes to be copied for fixup
+ * @skip_size: bytes to skip when copying (fixup will be applied later)
+ *
+ * Add the specified fixup to a list ordered by @offset. When copying
+ * the scatter-gather buffers, the fixup will be copied instead of
+ * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup
+ * will be applied later (in target process context), so we just skip
+ * the bytes specified by @skip_size. If @skip_size is 0, we copy the
+ * value in @fixup.
+ *
+ * This function is called *mostly* in @offset order, but there are
+ * exceptions. Since out-of-order inserts are relatively uncommon,
+ * we insert the new element by searching backward from the tail of
+ * the list.
+ *
+ * Return: 0=success, else -errno
+ */
+static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset,
+ binder_uintptr_t fixup, size_t skip_size)
+{
+ struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL);
+ struct binder_ptr_fixup *tmppf;
+
+ if (!pf)
+ return -ENOMEM;
+
+ pf->offset = offset;
+ pf->fixup_data = fixup;
+ pf->skip_size = skip_size;
+ INIT_LIST_HEAD(&pf->node);
+
+ /* Fixups are *mostly* added in-order, but there are some
+ * exceptions. Look backwards through list for insertion point.
+ */
+ list_for_each_entry_reverse(tmppf, pf_head, node) {
+ if (tmppf->offset < pf->offset) {
+ list_add(&pf->node, &tmppf->node);
+ return 0;
+ }
+ }
+ /*
+ * if we get here, then the new offset is the lowest so
+ * insert at the head
+ */
+ list_add(&pf->node, pf_head);
+ return 0;
+}
+
+static int binder_translate_fd_array(struct list_head *pf_head,
+ struct binder_fd_array_object *fda,
+ const void __user *sender_ubuffer,
+ struct binder_buffer_object *parent,
+ struct binder_buffer_object *sender_uparent,
+ struct binder_transaction *t,
+ struct binder_thread *thread,
+ struct binder_transaction *in_reply_to)
+{
+ binder_size_t fdi, fd_buf_size;
+ binder_size_t fda_offset;
+ const void __user *sender_ufda_base;
+ struct binder_proc *proc = thread->proc;
+ int ret;
+
+ if (fda->num_fds == 0)
+ return 0;
+
+ fd_buf_size = sizeof(u32) * fda->num_fds;
+ if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
+ binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n",
+ proc->pid, thread->pid, (u64)fda->num_fds);
+ return -EINVAL;
+ }
+ if (fd_buf_size > parent->length ||
+ fda->parent_offset > parent->length - fd_buf_size) {
+ /* No space for all file descriptors here. */
+ binder_user_error("%d:%d not enough space to store %lld fds in buffer\n",
+ proc->pid, thread->pid, (u64)fda->num_fds);
+ return -EINVAL;
+ }
+ /*
+ * the source data for binder_buffer_object is visible
+ * to user-space and the @buffer element is the user
+ * pointer to the buffer_object containing the fd_array.
+ * Convert the address to an offset relative to
+ * the base of the transaction buffer.
+ */
+ fda_offset = parent->buffer - t->buffer->user_data +
+ fda->parent_offset;
+ sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer +
+ fda->parent_offset;
+
+ if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) ||
+ !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) {
+ binder_user_error("%d:%d parent offset not aligned correctly.\n",
+ proc->pid, thread->pid);
+ return -EINVAL;
+ }
+ ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32));
+ if (ret)
+ return ret;
+
+ for (fdi = 0; fdi < fda->num_fds; fdi++) {
+ u32 fd;
+ binder_size_t offset = fda_offset + fdi * sizeof(fd);
+ binder_size_t sender_uoffset = fdi * sizeof(fd);
+
+ ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd));
+ if (!ret)
+ ret = binder_translate_fd(fd, offset, t, thread,
+ in_reply_to);
+ if (ret)
+ return ret > 0 ? -EINVAL : ret;
+ }
+ return 0;
+}
+
+static int binder_fixup_parent(struct list_head *pf_head,
+ struct binder_transaction *t,
+ struct binder_thread *thread,
+ struct binder_buffer_object *bp,
+ binder_size_t off_start_offset,
+ binder_size_t num_valid,
+ binder_size_t last_fixup_obj_off,
+ binder_size_t last_fixup_min_off)
+{
+ struct binder_buffer_object *parent;
+ struct binder_buffer *b = t->buffer;
+ struct binder_proc *proc = thread->proc;
+ struct binder_proc *target_proc = t->to_proc;
+ struct binder_object object;
+ binder_size_t buffer_offset;
+ binder_size_t parent_offset;
+
+ if (!(bp->flags & BINDER_BUFFER_FLAG_HAS_PARENT))
+ return 0;
+
+ parent = binder_validate_ptr(target_proc, b, &object, bp->parent,
+ off_start_offset, &parent_offset,
+ num_valid);
+ if (!parent) {
+ binder_user_error("%d:%d got transaction with invalid parent offset or type\n",
+ proc->pid, thread->pid);
+ return -EINVAL;
+ }
+
+ if (!binder_validate_fixup(target_proc, b, off_start_offset,
+ parent_offset, bp->parent_offset,
+ last_fixup_obj_off,
+ last_fixup_min_off)) {
+ binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n",
+ proc->pid, thread->pid);
+ return -EINVAL;
+ }
+
+ if (parent->length < sizeof(binder_uintptr_t) ||
+ bp->parent_offset > parent->length - sizeof(binder_uintptr_t)) {
+ /* No space for a pointer here! */
+ binder_user_error("%d:%d got transaction with invalid parent offset\n",
+ proc->pid, thread->pid);
+ return -EINVAL;
+ }
+
+ buffer_offset = bp->parent_offset + parent->buffer - b->user_data;
+
+ return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0);
+}
+
+/**
+ * binder_can_update_transaction() - Can a txn be superseded by an updated one?
+ * @t1: the pending async txn in the frozen process
+ * @t2: the new async txn to supersede the outdated pending one
+ *
+ * Return: true if t2 can supersede t1
+ * false if t2 can not supersede t1
+ */
+static bool binder_can_update_transaction(struct binder_transaction *t1,
+ struct binder_transaction *t2)
+{
+ if ((t1->flags & t2->flags & (TF_ONE_WAY | TF_UPDATE_TXN)) !=
+ (TF_ONE_WAY | TF_UPDATE_TXN) || !t1->to_proc || !t2->to_proc)
+ return false;
+ if (t1->to_proc->tsk == t2->to_proc->tsk && t1->code == t2->code &&
+ t1->flags == t2->flags && t1->buffer->pid == t2->buffer->pid &&
+ t1->buffer->target_node->ptr == t2->buffer->target_node->ptr &&
+ t1->buffer->target_node->cookie == t2->buffer->target_node->cookie)
+ return true;
+ return false;
+}
+
+/**
+ * binder_find_outdated_transaction_ilocked() - Find the outdated transaction
+ * @t: new async transaction
+ * @target_list: list to find outdated transaction
+ *
+ * Return: the outdated transaction if found
+ * NULL if no outdated transacton can be found
+ *
+ * Requires the proc->inner_lock to be held.
+ */
+static struct binder_transaction *
+binder_find_outdated_transaction_ilocked(struct binder_transaction *t,
+ struct list_head *target_list)
+{
+ struct binder_work *w;
+
+ list_for_each_entry(w, target_list, entry) {
+ struct binder_transaction *t_queued;
+
+ if (w->type != BINDER_WORK_TRANSACTION)
+ continue;
+ t_queued = container_of(w, struct binder_transaction, work);
+ if (binder_can_update_transaction(t_queued, t))
+ return t_queued;
+ }
+ return NULL;
+}
+
+/**
+ * binder_proc_transaction() - sends a transaction to a process and wakes it up
+ * @t: transaction to send
+ * @proc: process to send the transaction to
+ * @thread: thread in @proc to send the transaction to (may be NULL)
+ *
+ * This function queues a transaction to the specified process. It will try
+ * to find a thread in the target process to handle the transaction and
+ * wake it up. If no thread is found, the work is queued to the proc
+ * waitqueue.
+ *
+ * If the @thread parameter is not NULL, the transaction is always queued
+ * to the waitlist of that specific thread.
+ *
+ * Return: 0 if the transaction was successfully queued
+ * BR_DEAD_REPLY if the target process or thread is dead
+ * BR_FROZEN_REPLY if the target process or thread is frozen and
+ * the sync transaction was rejected
+ * BR_TRANSACTION_PENDING_FROZEN if the target process is frozen
+ * and the async transaction was successfully queued
+ */
+static int binder_proc_transaction(struct binder_transaction *t,
+ struct binder_proc *proc,
+ struct binder_thread *thread)
+{
+ struct binder_node *node = t->buffer->target_node;
+ bool oneway = !!(t->flags & TF_ONE_WAY);
+ bool pending_async = false;
+ struct binder_transaction *t_outdated = NULL;
+ bool frozen = false;
+
+ BUG_ON(!node);
+ binder_node_lock(node);
+ if (oneway) {
+ BUG_ON(thread);
+ if (node->has_async_transaction)
+ pending_async = true;
+ else
+ node->has_async_transaction = true;
+ }
+
+ binder_inner_proc_lock(proc);
+ if (proc->is_frozen) {
+ frozen = true;
+ proc->sync_recv |= !oneway;
+ proc->async_recv |= oneway;
+ }
+
+ if ((frozen && !oneway) || proc->is_dead ||
+ (thread && thread->is_dead)) {
+ binder_inner_proc_unlock(proc);
+ binder_node_unlock(node);
+ return frozen ? BR_FROZEN_REPLY : BR_DEAD_REPLY;
+ }
+
+ if (!thread && !pending_async)
+ thread = binder_select_thread_ilocked(proc);
+
+ if (thread) {
+ binder_enqueue_thread_work_ilocked(thread, &t->work);
+ } else if (!pending_async) {
+ binder_enqueue_work_ilocked(&t->work, &proc->todo);
+ } else {
+ if ((t->flags & TF_UPDATE_TXN) && frozen) {
+ t_outdated = binder_find_outdated_transaction_ilocked(t,
+ &node->async_todo);
+ if (t_outdated) {
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ "txn %d supersedes %d\n",
+ t->debug_id, t_outdated->debug_id);
+ list_del_init(&t_outdated->work.entry);
+ proc->outstanding_txns--;
+ }
+ }
+ binder_enqueue_work_ilocked(&t->work, &node->async_todo);
+ }
+
+ if (!pending_async)
+ binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */);
+
+ proc->outstanding_txns++;
+ binder_inner_proc_unlock(proc);
+ binder_node_unlock(node);
+
+ /*
+ * To reduce potential contention, free the outdated transaction and
+ * buffer after releasing the locks.
+ */
+ if (t_outdated) {
+ struct binder_buffer *buffer = t_outdated->buffer;
+
+ t_outdated->buffer = NULL;
+ buffer->transaction = NULL;
+ trace_binder_transaction_update_buffer_release(buffer);
+ binder_release_entire_buffer(proc, NULL, buffer, false);
+ binder_alloc_free_buf(&proc->alloc, buffer);
+ kfree(t_outdated);
+ binder_stats_deleted(BINDER_STAT_TRANSACTION);
+ }
+
+ if (oneway && frozen)
+ return BR_TRANSACTION_PENDING_FROZEN;
+
+ return 0;
+}
+
+/**
+ * binder_get_node_refs_for_txn() - Get required refs on node for txn
+ * @node: struct binder_node for which to get refs
+ * @procp: returns @node->proc if valid
+ * @error: if no @procp then returns BR_DEAD_REPLY
+ *
+ * User-space normally keeps the node alive when creating a transaction
+ * since it has a reference to the target. The local strong ref keeps it
+ * alive if the sending process dies before the target process processes
+ * the transaction. If the source process is malicious or has a reference
+ * counting bug, relying on the local strong ref can fail.
+ *
+ * Since user-space can cause the local strong ref to go away, we also take
+ * a tmpref on the node to ensure it survives while we are constructing
+ * the transaction. We also need a tmpref on the proc while we are
+ * constructing the transaction, so we take that here as well.
+ *
+ * Return: The target_node with refs taken or NULL if no @node->proc is NULL.
+ * Also sets @procp if valid. If the @node->proc is NULL indicating that the
+ * target proc has died, @error is set to BR_DEAD_REPLY.
+ */
+static struct binder_node *binder_get_node_refs_for_txn(
+ struct binder_node *node,
+ struct binder_proc **procp,
+ uint32_t *error)
+{
+ struct binder_node *target_node = NULL;
+
+ binder_node_inner_lock(node);
+ if (node->proc) {
+ target_node = node;
+ binder_inc_node_nilocked(node, 1, 0, NULL);
+ binder_inc_node_tmpref_ilocked(node);
+ node->proc->tmp_ref++;
+ *procp = node->proc;
+ } else
+ *error = BR_DEAD_REPLY;
+ binder_node_inner_unlock(node);
+
+ return target_node;
+}
+
+static void binder_set_txn_from_error(struct binder_transaction *t, int id,
+ uint32_t command, int32_t param)
+{
+ struct binder_thread *from = binder_get_txn_from_and_acq_inner(t);
+
+ if (!from) {
+ /* annotation for sparse */
+ __release(&from->proc->inner_lock);
+ return;
+ }
+
+ /* don't override existing errors */
+ if (from->ee.command == BR_OK)
+ binder_set_extended_error(&from->ee, id, command, param);
+ binder_inner_proc_unlock(from->proc);
+ binder_thread_dec_tmpref(from);
+}
+
+/**
+ * binder_netlink_report() - report a transaction failure via netlink
+ * @proc: the binder proc sending the transaction
+ * @t: the binder transaction that failed
+ * @data_size: the user provided data size for the transaction
+ * @error: enum binder_driver_return_protocol returned to sender
+ */
+static void binder_netlink_report(struct binder_proc *proc,
+ struct binder_transaction *t,
+ u32 data_size,
+ u32 error)
+{
+ const char *context = proc->context->name;
+ struct sk_buff *skb;
+ void *hdr;
+
+ if (!genl_has_listeners(&binder_nl_family, &init_net,
+ BINDER_NLGRP_REPORT))
+ return;
+
+ trace_binder_netlink_report(context, t, data_size, error);
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ hdr = genlmsg_put(skb, 0, 0, &binder_nl_family, 0, BINDER_CMD_REPORT);
+ if (!hdr)
+ goto free_skb;
+
+ if (nla_put_u32(skb, BINDER_A_REPORT_ERROR, error) ||
+ nla_put_string(skb, BINDER_A_REPORT_CONTEXT, context) ||
+ nla_put_u32(skb, BINDER_A_REPORT_FROM_PID, t->from_pid) ||
+ nla_put_u32(skb, BINDER_A_REPORT_FROM_TID, t->from_tid))
+ goto cancel_skb;
+
+ if (t->to_proc &&
+ nla_put_u32(skb, BINDER_A_REPORT_TO_PID, t->to_proc->pid))
+ goto cancel_skb;
+
+ if (t->to_thread &&
+ nla_put_u32(skb, BINDER_A_REPORT_TO_TID, t->to_thread->pid))
+ goto cancel_skb;
+
+ if (t->is_reply && nla_put_flag(skb, BINDER_A_REPORT_IS_REPLY))
+ goto cancel_skb;
+
+ if (nla_put_u32(skb, BINDER_A_REPORT_FLAGS, t->flags) ||
+ nla_put_u32(skb, BINDER_A_REPORT_CODE, t->code) ||
+ nla_put_u32(skb, BINDER_A_REPORT_DATA_SIZE, data_size))
+ goto cancel_skb;
+
+ genlmsg_end(skb, hdr);
+ genlmsg_multicast(&binder_nl_family, skb, 0, BINDER_NLGRP_REPORT,
+ GFP_KERNEL);
+ return;
+
+cancel_skb:
+ genlmsg_cancel(skb, hdr);
+free_skb:
+ nlmsg_free(skb);
+}
+
+static void binder_transaction(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_transaction_data *tr, int reply,
+ binder_size_t extra_buffers_size)
+{
+ int ret;
+ struct binder_transaction *t;
+ struct binder_work *w;
+ struct binder_work *tcomplete;
+ binder_size_t buffer_offset = 0;
+ binder_size_t off_start_offset, off_end_offset;
+ binder_size_t off_min;
+ binder_size_t sg_buf_offset, sg_buf_end_offset;
+ binder_size_t user_offset = 0;
+ struct binder_proc *target_proc = NULL;
+ struct binder_thread *target_thread = NULL;
+ struct binder_node *target_node = NULL;
+ struct binder_transaction *in_reply_to = NULL;
+ struct binder_transaction_log_entry *e;
+ uint32_t return_error = 0;
+ uint32_t return_error_param = 0;
+ uint32_t return_error_line = 0;
+ binder_size_t last_fixup_obj_off = 0;
+ binder_size_t last_fixup_min_off = 0;
+ struct binder_context *context = proc->context;
+ int t_debug_id = atomic_inc_return(&binder_last_id);
+ ktime_t t_start_time = ktime_get();
+ struct lsm_context lsmctx = { };
+ struct list_head sgc_head;
+ struct list_head pf_head;
+ const void __user *user_buffer = (const void __user *)
+ (uintptr_t)tr->data.ptr.buffer;
+ INIT_LIST_HEAD(&sgc_head);
+ INIT_LIST_HEAD(&pf_head);
+
+ e = binder_transaction_log_add(&binder_transaction_log);
+ e->debug_id = t_debug_id;
+ e->call_type = reply ? 2 : !!(tr->flags & TF_ONE_WAY);
+ e->from_proc = proc->pid;
+ e->from_thread = thread->pid;
+ e->target_handle = tr->target.handle;
+ e->data_size = tr->data_size;
+ e->offsets_size = tr->offsets_size;
+ strscpy(e->context_name, proc->context->name, BINDERFS_MAX_NAME);
+
+ binder_inner_proc_lock(proc);
+ binder_set_extended_error(&thread->ee, t_debug_id, BR_OK, 0);
+ binder_inner_proc_unlock(proc);
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t) {
+ binder_txn_error("%d:%d cannot allocate transaction\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -ENOMEM;
+ return_error_line = __LINE__;
+ goto err_alloc_t_failed;
+ }
+ INIT_LIST_HEAD(&t->fd_fixups);
+ binder_stats_created(BINDER_STAT_TRANSACTION);
+ spin_lock_init(&t->lock);
+ t->debug_id = t_debug_id;
+ t->start_time = t_start_time;
+ t->from_pid = proc->pid;
+ t->from_tid = thread->pid;
+ t->sender_euid = task_euid(proc->tsk);
+ t->code = tr->code;
+ t->flags = tr->flags;
+ t->priority = task_nice(current);
+ t->work.type = BINDER_WORK_TRANSACTION;
+ t->is_async = !reply && (tr->flags & TF_ONE_WAY);
+ t->is_reply = reply;
+ if (!reply && !(tr->flags & TF_ONE_WAY))
+ t->from = thread;
+
+ if (reply) {
+ binder_inner_proc_lock(proc);
+ in_reply_to = thread->transaction_stack;
+ if (in_reply_to == NULL) {
+ binder_inner_proc_unlock(proc);
+ binder_user_error("%d:%d got reply transaction with no transaction stack\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EPROTO;
+ return_error_line = __LINE__;
+ goto err_empty_call_stack;
+ }
+ if (in_reply_to->to_thread != thread) {
+ spin_lock(&in_reply_to->lock);
+ binder_user_error("%d:%d got reply transaction with bad transaction stack, transaction %d has target %d:%d\n",
+ proc->pid, thread->pid, in_reply_to->debug_id,
+ in_reply_to->to_proc ?
+ in_reply_to->to_proc->pid : 0,
+ in_reply_to->to_thread ?
+ in_reply_to->to_thread->pid : 0);
+ spin_unlock(&in_reply_to->lock);
+ binder_inner_proc_unlock(proc);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EPROTO;
+ return_error_line = __LINE__;
+ in_reply_to = NULL;
+ goto err_bad_call_stack;
+ }
+ thread->transaction_stack = in_reply_to->to_parent;
+ binder_inner_proc_unlock(proc);
+ binder_set_nice(in_reply_to->saved_priority);
+ target_thread = binder_get_txn_from_and_acq_inner(in_reply_to);
+ if (target_thread == NULL) {
+ /* annotation for sparse */
+ __release(&target_thread->proc->inner_lock);
+ binder_txn_error("%d:%d reply target not found\n",
+ thread->pid, proc->pid);
+ return_error = BR_DEAD_REPLY;
+ return_error_line = __LINE__;
+ goto err_dead_binder;
+ }
+ if (target_thread->transaction_stack != in_reply_to) {
+ binder_user_error("%d:%d got reply transaction with bad target transaction stack %d, expected %d\n",
+ proc->pid, thread->pid,
+ target_thread->transaction_stack ?
+ target_thread->transaction_stack->debug_id : 0,
+ in_reply_to->debug_id);
+ binder_inner_proc_unlock(target_thread->proc);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EPROTO;
+ return_error_line = __LINE__;
+ in_reply_to = NULL;
+ target_thread = NULL;
+ goto err_dead_binder;
+ }
+ target_proc = target_thread->proc;
+ target_proc->tmp_ref++;
+ binder_inner_proc_unlock(target_thread->proc);
+ } else {
+ if (tr->target.handle) {
+ struct binder_ref *ref;
+
+ /*
+ * There must already be a strong ref
+ * on this node. If so, do a strong
+ * increment on the node to ensure it
+ * stays alive until the transaction is
+ * done.
+ */
+ binder_proc_lock(proc);
+ ref = binder_get_ref_olocked(proc, tr->target.handle,
+ true);
+ if (ref) {
+ target_node = binder_get_node_refs_for_txn(
+ ref->node, &target_proc,
+ &return_error);
+ } else {
+ binder_user_error("%d:%d got transaction to invalid handle, %u\n",
+ proc->pid, thread->pid, tr->target.handle);
+ return_error = BR_FAILED_REPLY;
+ }
+ binder_proc_unlock(proc);
+ } else {
+ mutex_lock(&context->context_mgr_node_lock);
+ target_node = context->binder_context_mgr_node;
+ if (target_node)
+ target_node = binder_get_node_refs_for_txn(
+ target_node, &target_proc,
+ &return_error);
+ else
+ return_error = BR_DEAD_REPLY;
+ mutex_unlock(&context->context_mgr_node_lock);
+ if (target_node && target_proc->pid == proc->pid) {
+ binder_user_error("%d:%d got transaction to context manager from process owning it\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_invalid_target_handle;
+ }
+ }
+ if (!target_node) {
+ binder_txn_error("%d:%d cannot find target node\n",
+ proc->pid, thread->pid);
+ /* return_error is set above */
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_dead_binder;
+ }
+ e->to_node = target_node->debug_id;
+ if (WARN_ON(proc == target_proc)) {
+ binder_txn_error("%d:%d self transactions not allowed\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_invalid_target_handle;
+ }
+ if (security_binder_transaction(proc->cred,
+ target_proc->cred) < 0) {
+ binder_txn_error("%d:%d transaction credentials failed\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EPERM;
+ return_error_line = __LINE__;
+ goto err_invalid_target_handle;
+ }
+ binder_inner_proc_lock(proc);
+
+ w = list_first_entry_or_null(&thread->todo,
+ struct binder_work, entry);
+ if (!(tr->flags & TF_ONE_WAY) && w &&
+ w->type == BINDER_WORK_TRANSACTION) {
+ /*
+ * Do not allow new outgoing transaction from a
+ * thread that has a transaction at the head of
+ * its todo list. Only need to check the head
+ * because binder_select_thread_ilocked picks a
+ * thread from proc->waiting_threads to enqueue
+ * the transaction, and nothing is queued to the
+ * todo list while the thread is on waiting_threads.
+ */
+ binder_user_error("%d:%d new transaction not allowed when there is a transaction on thread todo\n",
+ proc->pid, thread->pid);
+ binder_inner_proc_unlock(proc);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EPROTO;
+ return_error_line = __LINE__;
+ goto err_bad_todo_list;
+ }
+
+ if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) {
+ struct binder_transaction *tmp;
+
+ tmp = thread->transaction_stack;
+ if (tmp->to_thread != thread) {
+ spin_lock(&tmp->lock);
+ binder_user_error("%d:%d got new transaction with bad transaction stack, transaction %d has target %d:%d\n",
+ proc->pid, thread->pid, tmp->debug_id,
+ tmp->to_proc ? tmp->to_proc->pid : 0,
+ tmp->to_thread ?
+ tmp->to_thread->pid : 0);
+ spin_unlock(&tmp->lock);
+ binder_inner_proc_unlock(proc);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EPROTO;
+ return_error_line = __LINE__;
+ goto err_bad_call_stack;
+ }
+ while (tmp) {
+ struct binder_thread *from;
+
+ spin_lock(&tmp->lock);
+ from = tmp->from;
+ if (from && from->proc == target_proc) {
+ atomic_inc(&from->tmp_ref);
+ target_thread = from;
+ spin_unlock(&tmp->lock);
+ break;
+ }
+ spin_unlock(&tmp->lock);
+ tmp = tmp->from_parent;
+ }
+ }
+ binder_inner_proc_unlock(proc);
+ }
+
+ t->to_proc = target_proc;
+ t->to_thread = target_thread;
+ if (target_thread)
+ e->to_thread = target_thread->pid;
+ e->to_proc = target_proc->pid;
+
+ tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL);
+ if (tcomplete == NULL) {
+ binder_txn_error("%d:%d cannot allocate work for transaction\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -ENOMEM;
+ return_error_line = __LINE__;
+ goto err_alloc_tcomplete_failed;
+ }
+ binder_stats_created(BINDER_STAT_TRANSACTION_COMPLETE);
+
+ if (reply)
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ "%d:%d BC_REPLY %d -> %d:%d, data size %lld-%lld-%lld\n",
+ proc->pid, thread->pid, t->debug_id,
+ target_proc->pid, target_thread->pid,
+ (u64)tr->data_size, (u64)tr->offsets_size,
+ (u64)extra_buffers_size);
+ else
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ "%d:%d BC_TRANSACTION %d -> %d - node %d, data size %lld-%lld-%lld\n",
+ proc->pid, thread->pid, t->debug_id,
+ target_proc->pid, target_node->debug_id,
+ (u64)tr->data_size, (u64)tr->offsets_size,
+ (u64)extra_buffers_size);
+
+ if (target_node && target_node->txn_security_ctx) {
+ u32 secid;
+ size_t added_size;
+
+ security_cred_getsecid(proc->cred, &secid);
+ ret = security_secid_to_secctx(secid, &lsmctx);
+ if (ret < 0) {
+ binder_txn_error("%d:%d failed to get security context\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
+ return_error_line = __LINE__;
+ goto err_get_secctx_failed;
+ }
+ added_size = ALIGN(lsmctx.len, sizeof(u64));
+ extra_buffers_size += added_size;
+ if (extra_buffers_size < added_size) {
+ binder_txn_error("%d:%d integer overflow of extra_buffers_size\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_extra_size;
+ }
+ }
+
+ trace_binder_transaction(reply, t, target_node);
+
+ t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size,
+ tr->offsets_size, extra_buffers_size,
+ !reply && (t->flags & TF_ONE_WAY));
+ if (IS_ERR(t->buffer)) {
+ char *s;
+
+ ret = PTR_ERR(t->buffer);
+ s = (ret == -ESRCH) ? ": vma cleared, target dead or dying"
+ : (ret == -ENOSPC) ? ": no space left"
+ : (ret == -ENOMEM) ? ": memory allocation failed"
+ : "";
+ binder_txn_error("cannot allocate buffer%s", s);
+
+ return_error_param = PTR_ERR(t->buffer);
+ return_error = return_error_param == -ESRCH ?
+ BR_DEAD_REPLY : BR_FAILED_REPLY;
+ return_error_line = __LINE__;
+ t->buffer = NULL;
+ goto err_binder_alloc_buf_failed;
+ }
+ if (lsmctx.context) {
+ int err;
+ size_t buf_offset = ALIGN(tr->data_size, sizeof(void *)) +
+ ALIGN(tr->offsets_size, sizeof(void *)) +
+ ALIGN(extra_buffers_size, sizeof(void *)) -
+ ALIGN(lsmctx.len, sizeof(u64));
+
+ t->security_ctx = t->buffer->user_data + buf_offset;
+ err = binder_alloc_copy_to_buffer(&target_proc->alloc,
+ t->buffer, buf_offset,
+ lsmctx.context, lsmctx.len);
+ if (err) {
+ t->security_ctx = 0;
+ WARN_ON(1);
+ }
+ security_release_secctx(&lsmctx);
+ lsmctx.context = NULL;
+ }
+ t->buffer->debug_id = t->debug_id;
+ t->buffer->transaction = t;
+ t->buffer->target_node = target_node;
+ t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF);
+ trace_binder_transaction_alloc_buf(t->buffer);
+
+ if (binder_alloc_copy_user_to_buffer(
+ &target_proc->alloc,
+ t->buffer,
+ ALIGN(tr->data_size, sizeof(void *)),
+ (const void __user *)
+ (uintptr_t)tr->data.ptr.offsets,
+ tr->offsets_size)) {
+ binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EFAULT;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
+ if (!IS_ALIGNED(tr->offsets_size, sizeof(binder_size_t))) {
+ binder_user_error("%d:%d got transaction with invalid offsets size, %lld\n",
+ proc->pid, thread->pid, (u64)tr->offsets_size);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_offset;
+ }
+ if (!IS_ALIGNED(extra_buffers_size, sizeof(u64))) {
+ binder_user_error("%d:%d got transaction with unaligned buffers size, %lld\n",
+ proc->pid, thread->pid,
+ (u64)extra_buffers_size);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_offset;
+ }
+ off_start_offset = ALIGN(tr->data_size, sizeof(void *));
+ buffer_offset = off_start_offset;
+ off_end_offset = off_start_offset + tr->offsets_size;
+ sg_buf_offset = ALIGN(off_end_offset, sizeof(void *));
+ sg_buf_end_offset = sg_buf_offset + extra_buffers_size -
+ ALIGN(lsmctx.len, sizeof(u64));
+ off_min = 0;
+ for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
+ buffer_offset += sizeof(binder_size_t)) {
+ struct binder_object_header *hdr;
+ size_t object_size;
+ struct binder_object object;
+ binder_size_t object_offset;
+ binder_size_t copy_size;
+
+ if (binder_alloc_copy_from_buffer(&target_proc->alloc,
+ &object_offset,
+ t->buffer,
+ buffer_offset,
+ sizeof(object_offset))) {
+ binder_txn_error("%d:%d copy offset from buffer failed\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_offset;
+ }
+
+ /*
+ * Copy the source user buffer up to the next object
+ * that will be processed.
+ */
+ copy_size = object_offset - user_offset;
+ if (copy_size && (user_offset > object_offset ||
+ object_offset > tr->data_size ||
+ binder_alloc_copy_user_to_buffer(
+ &target_proc->alloc,
+ t->buffer, user_offset,
+ user_buffer + user_offset,
+ copy_size))) {
+ binder_user_error("%d:%d got transaction with invalid data ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EFAULT;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
+ object_size = binder_get_object(target_proc, user_buffer,
+ t->buffer, object_offset, &object);
+ if (object_size == 0 || object_offset < off_min) {
+ binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n",
+ proc->pid, thread->pid,
+ (u64)object_offset,
+ (u64)off_min,
+ (u64)t->buffer->data_size);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_offset;
+ }
+ /*
+ * Set offset to the next buffer fragment to be
+ * copied
+ */
+ user_offset = object_offset + object_size;
+
+ hdr = &object.hdr;
+ off_min = object_offset + object_size;
+ switch (hdr->type) {
+ case BINDER_TYPE_BINDER:
+ case BINDER_TYPE_WEAK_BINDER: {
+ struct flat_binder_object *fp;
+
+ fp = to_flat_binder_object(hdr);
+ ret = binder_translate_binder(fp, t, thread);
+
+ if (ret < 0 ||
+ binder_alloc_copy_to_buffer(&target_proc->alloc,
+ t->buffer,
+ object_offset,
+ fp, sizeof(*fp))) {
+ binder_txn_error("%d:%d translate binder failed\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
+ return_error_line = __LINE__;
+ goto err_translate_failed;
+ }
+ } break;
+ case BINDER_TYPE_HANDLE:
+ case BINDER_TYPE_WEAK_HANDLE: {
+ struct flat_binder_object *fp;
+
+ fp = to_flat_binder_object(hdr);
+ ret = binder_translate_handle(fp, t, thread);
+ if (ret < 0 ||
+ binder_alloc_copy_to_buffer(&target_proc->alloc,
+ t->buffer,
+ object_offset,
+ fp, sizeof(*fp))) {
+ binder_txn_error("%d:%d translate handle failed\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
+ return_error_line = __LINE__;
+ goto err_translate_failed;
+ }
+ } break;
+
+ case BINDER_TYPE_FD: {
+ struct binder_fd_object *fp = to_binder_fd_object(hdr);
+ binder_size_t fd_offset = object_offset +
+ (uintptr_t)&fp->fd - (uintptr_t)fp;
+ int ret = binder_translate_fd(fp->fd, fd_offset, t,
+ thread, in_reply_to);
+
+ fp->pad_binder = 0;
+ if (ret < 0 ||
+ binder_alloc_copy_to_buffer(&target_proc->alloc,
+ t->buffer,
+ object_offset,
+ fp, sizeof(*fp))) {
+ binder_txn_error("%d:%d translate fd failed\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
+ return_error_line = __LINE__;
+ goto err_translate_failed;
+ }
+ } break;
+ case BINDER_TYPE_FDA: {
+ struct binder_object ptr_object;
+ binder_size_t parent_offset;
+ struct binder_object user_object;
+ size_t user_parent_size;
+ struct binder_fd_array_object *fda =
+ to_binder_fd_array_object(hdr);
+ size_t num_valid = (buffer_offset - off_start_offset) /
+ sizeof(binder_size_t);
+ struct binder_buffer_object *parent =
+ binder_validate_ptr(target_proc, t->buffer,
+ &ptr_object, fda->parent,
+ off_start_offset,
+ &parent_offset,
+ num_valid);
+ if (!parent) {
+ binder_user_error("%d:%d got transaction with invalid parent offset or type\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_parent;
+ }
+ if (!binder_validate_fixup(target_proc, t->buffer,
+ off_start_offset,
+ parent_offset,
+ fda->parent_offset,
+ last_fixup_obj_off,
+ last_fixup_min_off)) {
+ binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_parent;
+ }
+ /*
+ * We need to read the user version of the parent
+ * object to get the original user offset
+ */
+ user_parent_size =
+ binder_get_object(proc, user_buffer, t->buffer,
+ parent_offset, &user_object);
+ if (user_parent_size != sizeof(user_object.bbo)) {
+ binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n",
+ proc->pid, thread->pid,
+ user_parent_size,
+ sizeof(user_object.bbo));
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_parent;
+ }
+ ret = binder_translate_fd_array(&pf_head, fda,
+ user_buffer, parent,
+ &user_object.bbo, t,
+ thread, in_reply_to);
+ if (!ret)
+ ret = binder_alloc_copy_to_buffer(&target_proc->alloc,
+ t->buffer,
+ object_offset,
+ fda, sizeof(*fda));
+ if (ret) {
+ binder_txn_error("%d:%d translate fd array failed\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret > 0 ? -EINVAL : ret;
+ return_error_line = __LINE__;
+ goto err_translate_failed;
+ }
+ last_fixup_obj_off = parent_offset;
+ last_fixup_min_off =
+ fda->parent_offset + sizeof(u32) * fda->num_fds;
+ } break;
+ case BINDER_TYPE_PTR: {
+ struct binder_buffer_object *bp =
+ to_binder_buffer_object(hdr);
+ size_t buf_left = sg_buf_end_offset - sg_buf_offset;
+ size_t num_valid;
+
+ if (bp->length > buf_left) {
+ binder_user_error("%d:%d got transaction with too large buffer\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_offset;
+ }
+ ret = binder_defer_copy(&sgc_head, sg_buf_offset,
+ (const void __user *)(uintptr_t)bp->buffer,
+ bp->length);
+ if (ret) {
+ binder_txn_error("%d:%d deferred copy failed\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
+ return_error_line = __LINE__;
+ goto err_translate_failed;
+ }
+ /* Fixup buffer pointer to target proc address space */
+ bp->buffer = t->buffer->user_data + sg_buf_offset;
+ sg_buf_offset += ALIGN(bp->length, sizeof(u64));
+
+ num_valid = (buffer_offset - off_start_offset) /
+ sizeof(binder_size_t);
+ ret = binder_fixup_parent(&pf_head, t,
+ thread, bp,
+ off_start_offset,
+ num_valid,
+ last_fixup_obj_off,
+ last_fixup_min_off);
+ if (ret < 0 ||
+ binder_alloc_copy_to_buffer(&target_proc->alloc,
+ t->buffer,
+ object_offset,
+ bp, sizeof(*bp))) {
+ binder_txn_error("%d:%d failed to fixup parent\n",
+ thread->pid, proc->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
+ return_error_line = __LINE__;
+ goto err_translate_failed;
+ }
+ last_fixup_obj_off = object_offset;
+ last_fixup_min_off = 0;
+ } break;
+ default:
+ binder_user_error("%d:%d got transaction with invalid object type, %x\n",
+ proc->pid, thread->pid, hdr->type);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_object_type;
+ }
+ }
+ /* Done processing objects, copy the rest of the buffer */
+ if (binder_alloc_copy_user_to_buffer(
+ &target_proc->alloc,
+ t->buffer, user_offset,
+ user_buffer + user_offset,
+ tr->data_size - user_offset)) {
+ binder_user_error("%d:%d got transaction with invalid data ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EFAULT;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
+
+ ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer,
+ &sgc_head, &pf_head);
+ if (ret) {
+ binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
+ if (t->buffer->oneway_spam_suspect) {
+ tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
+ binder_netlink_report(proc, t, tr->data_size,
+ BR_ONEWAY_SPAM_SUSPECT);
+ } else {
+ tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
+ }
+
+ if (reply) {
+ binder_enqueue_thread_work(thread, tcomplete);
+ binder_inner_proc_lock(target_proc);
+ if (target_thread->is_dead) {
+ return_error = BR_DEAD_REPLY;
+ binder_inner_proc_unlock(target_proc);
+ goto err_dead_proc_or_thread;
+ }
+ BUG_ON(t->buffer->async_transaction != 0);
+ binder_pop_transaction_ilocked(target_thread, in_reply_to);
+ binder_enqueue_thread_work_ilocked(target_thread, &t->work);
+ target_proc->outstanding_txns++;
+ binder_inner_proc_unlock(target_proc);
+ wake_up_interruptible_sync(&target_thread->wait);
+ binder_free_transaction(in_reply_to);
+ } else if (!(t->flags & TF_ONE_WAY)) {
+ BUG_ON(t->buffer->async_transaction != 0);
+ binder_inner_proc_lock(proc);
+ /*
+ * Defer the TRANSACTION_COMPLETE, so we don't return to
+ * userspace immediately; this allows the target process to
+ * immediately start processing this transaction, reducing
+ * latency. We will then return the TRANSACTION_COMPLETE when
+ * the target replies (or there is an error).
+ */
+ binder_enqueue_deferred_thread_work_ilocked(thread, tcomplete);
+ t->from_parent = thread->transaction_stack;
+ thread->transaction_stack = t;
+ binder_inner_proc_unlock(proc);
+ return_error = binder_proc_transaction(t,
+ target_proc, target_thread);
+ if (return_error) {
+ binder_inner_proc_lock(proc);
+ binder_pop_transaction_ilocked(thread, t);
+ binder_inner_proc_unlock(proc);
+ goto err_dead_proc_or_thread;
+ }
+ } else {
+ BUG_ON(target_node == NULL);
+ BUG_ON(t->buffer->async_transaction != 1);
+ return_error = binder_proc_transaction(t, target_proc, NULL);
+ /*
+ * Let the caller know when async transaction reaches a frozen
+ * process and is put in a pending queue, waiting for the target
+ * process to be unfrozen.
+ */
+ if (return_error == BR_TRANSACTION_PENDING_FROZEN) {
+ tcomplete->type = BINDER_WORK_TRANSACTION_PENDING;
+ binder_netlink_report(proc, t, tr->data_size,
+ return_error);
+ }
+ binder_enqueue_thread_work(thread, tcomplete);
+ if (return_error &&
+ return_error != BR_TRANSACTION_PENDING_FROZEN)
+ goto err_dead_proc_or_thread;
+ }
+ if (target_thread)
+ binder_thread_dec_tmpref(target_thread);
+ binder_proc_dec_tmpref(target_proc);
+ if (target_node)
+ binder_dec_node_tmpref(target_node);
+ /*
+ * write barrier to synchronize with initialization
+ * of log entry
+ */
+ smp_wmb();
+ WRITE_ONCE(e->debug_id_done, t_debug_id);
+ return;
+
+err_dead_proc_or_thread:
+ binder_txn_error("%d:%d dead process or thread\n",
+ thread->pid, proc->pid);
+ return_error_line = __LINE__;
+ binder_dequeue_work(proc, tcomplete);
+err_translate_failed:
+err_bad_object_type:
+err_bad_offset:
+err_bad_parent:
+err_copy_data_failed:
+ binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head);
+ binder_free_txn_fixups(t);
+ trace_binder_transaction_failed_buffer_release(t->buffer);
+ binder_transaction_buffer_release(target_proc, NULL, t->buffer,
+ buffer_offset, true);
+ if (target_node)
+ binder_dec_node_tmpref(target_node);
+ target_node = NULL;
+ t->buffer->transaction = NULL;
+ binder_alloc_free_buf(&target_proc->alloc, t->buffer);
+err_binder_alloc_buf_failed:
+err_bad_extra_size:
+ if (lsmctx.context)
+ security_release_secctx(&lsmctx);
+err_get_secctx_failed:
+ kfree(tcomplete);
+ binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
+err_alloc_tcomplete_failed:
+ if (trace_binder_txn_latency_free_enabled())
+ binder_txn_latency_free(t);
+err_bad_todo_list:
+err_bad_call_stack:
+err_empty_call_stack:
+err_dead_binder:
+err_invalid_target_handle:
+ if (target_node) {
+ binder_dec_node(target_node, 1, 0);
+ binder_dec_node_tmpref(target_node);
+ }
+
+ binder_netlink_report(proc, t, tr->data_size, return_error);
+ kfree(t);
+ binder_stats_deleted(BINDER_STAT_TRANSACTION);
+err_alloc_t_failed:
+
+ binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
+ "%d:%d transaction %s to %d:%d failed %d/%d/%d, code %u size %lld-%lld line %d\n",
+ proc->pid, thread->pid, reply ? "reply" :
+ (tr->flags & TF_ONE_WAY ? "async" : "call"),
+ target_proc ? target_proc->pid : 0,
+ target_thread ? target_thread->pid : 0,
+ t_debug_id, return_error, return_error_param,
+ tr->code, (u64)tr->data_size, (u64)tr->offsets_size,
+ return_error_line);
+
+ if (target_thread)
+ binder_thread_dec_tmpref(target_thread);
+ if (target_proc)
+ binder_proc_dec_tmpref(target_proc);
+
+ {
+ struct binder_transaction_log_entry *fe;
+
+ e->return_error = return_error;
+ e->return_error_param = return_error_param;
+ e->return_error_line = return_error_line;
+ fe = binder_transaction_log_add(&binder_transaction_log_failed);
+ *fe = *e;
+ /*
+ * write barrier to synchronize with initialization
+ * of log entry
+ */
+ smp_wmb();
+ WRITE_ONCE(e->debug_id_done, t_debug_id);
+ WRITE_ONCE(fe->debug_id_done, t_debug_id);
+ }
+
+ BUG_ON(thread->return_error.cmd != BR_OK);
+ if (in_reply_to) {
+ binder_set_txn_from_error(in_reply_to, t_debug_id,
+ return_error, return_error_param);
+ thread->return_error.cmd = BR_TRANSACTION_COMPLETE;
+ binder_enqueue_thread_work(thread, &thread->return_error.work);
+ binder_send_failed_reply(in_reply_to, return_error);
+ } else {
+ binder_inner_proc_lock(proc);
+ binder_set_extended_error(&thread->ee, t_debug_id,
+ return_error, return_error_param);
+ binder_inner_proc_unlock(proc);
+ thread->return_error.cmd = return_error;
+ binder_enqueue_thread_work(thread, &thread->return_error.work);
+ }
+}
+
+static int
+binder_request_freeze_notification(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_handle_cookie *handle_cookie)
+{
+ struct binder_ref_freeze *freeze;
+ struct binder_ref *ref;
+
+ freeze = kzalloc(sizeof(*freeze), GFP_KERNEL);
+ if (!freeze)
+ return -ENOMEM;
+ binder_proc_lock(proc);
+ ref = binder_get_ref_olocked(proc, handle_cookie->handle, false);
+ if (!ref) {
+ binder_user_error("%d:%d BC_REQUEST_FREEZE_NOTIFICATION invalid ref %d\n",
+ proc->pid, thread->pid, handle_cookie->handle);
+ binder_proc_unlock(proc);
+ kfree(freeze);
+ return -EINVAL;
+ }
+
+ binder_node_lock(ref->node);
+ if (ref->freeze) {
+ binder_user_error("%d:%d BC_REQUEST_FREEZE_NOTIFICATION already set\n",
+ proc->pid, thread->pid);
+ binder_node_unlock(ref->node);
+ binder_proc_unlock(proc);
+ kfree(freeze);
+ return -EINVAL;
+ }
+
+ binder_stats_created(BINDER_STAT_FREEZE);
+ INIT_LIST_HEAD(&freeze->work.entry);
+ freeze->cookie = handle_cookie->cookie;
+ freeze->work.type = BINDER_WORK_FROZEN_BINDER;
+ ref->freeze = freeze;
+
+ if (ref->node->proc) {
+ binder_inner_proc_lock(ref->node->proc);
+ freeze->is_frozen = ref->node->proc->is_frozen;
+ binder_inner_proc_unlock(ref->node->proc);
+
+ binder_inner_proc_lock(proc);
+ binder_enqueue_work_ilocked(&freeze->work, &proc->todo);
+ binder_wakeup_proc_ilocked(proc);
+ binder_inner_proc_unlock(proc);
+ }
+
+ binder_node_unlock(ref->node);
+ binder_proc_unlock(proc);
+ return 0;
+}
+
+static int
+binder_clear_freeze_notification(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_handle_cookie *handle_cookie)
+{
+ struct binder_ref_freeze *freeze;
+ struct binder_ref *ref;
+
+ binder_proc_lock(proc);
+ ref = binder_get_ref_olocked(proc, handle_cookie->handle, false);
+ if (!ref) {
+ binder_user_error("%d:%d BC_CLEAR_FREEZE_NOTIFICATION invalid ref %d\n",
+ proc->pid, thread->pid, handle_cookie->handle);
+ binder_proc_unlock(proc);
+ return -EINVAL;
+ }
+
+ binder_node_lock(ref->node);
+
+ if (!ref->freeze) {
+ binder_user_error("%d:%d BC_CLEAR_FREEZE_NOTIFICATION freeze notification not active\n",
+ proc->pid, thread->pid);
+ binder_node_unlock(ref->node);
+ binder_proc_unlock(proc);
+ return -EINVAL;
+ }
+ freeze = ref->freeze;
+ binder_inner_proc_lock(proc);
+ if (freeze->cookie != handle_cookie->cookie) {
+ binder_user_error("%d:%d BC_CLEAR_FREEZE_NOTIFICATION freeze notification cookie mismatch %016llx != %016llx\n",
+ proc->pid, thread->pid, (u64)freeze->cookie,
+ (u64)handle_cookie->cookie);
+ binder_inner_proc_unlock(proc);
+ binder_node_unlock(ref->node);
+ binder_proc_unlock(proc);
+ return -EINVAL;
+ }
+ ref->freeze = NULL;
+ /*
+ * Take the existing freeze object and overwrite its work type. There are three cases here:
+ * 1. No pending notification. In this case just add the work to the queue.
+ * 2. A notification was sent and is pending an ack from userspace. Once an ack arrives, we
+ * should resend with the new work type.
+ * 3. A notification is pending to be sent. Since the work is already in the queue, nothing
+ * needs to be done here.
+ */
+ freeze->work.type = BINDER_WORK_CLEAR_FREEZE_NOTIFICATION;
+ if (list_empty(&freeze->work.entry)) {
+ binder_enqueue_work_ilocked(&freeze->work, &proc->todo);
+ binder_wakeup_proc_ilocked(proc);
+ } else if (freeze->sent) {
+ freeze->resend = true;
+ }
+ binder_inner_proc_unlock(proc);
+ binder_node_unlock(ref->node);
+ binder_proc_unlock(proc);
+ return 0;
+}
+
+static int
+binder_freeze_notification_done(struct binder_proc *proc,
+ struct binder_thread *thread,
+ binder_uintptr_t cookie)
+{
+ struct binder_ref_freeze *freeze = NULL;
+ struct binder_work *w;
+
+ binder_inner_proc_lock(proc);
+ list_for_each_entry(w, &proc->delivered_freeze, entry) {
+ struct binder_ref_freeze *tmp_freeze =
+ container_of(w, struct binder_ref_freeze, work);
+
+ if (tmp_freeze->cookie == cookie) {
+ freeze = tmp_freeze;
+ break;
+ }
+ }
+ if (!freeze) {
+ binder_user_error("%d:%d BC_FREEZE_NOTIFICATION_DONE %016llx not found\n",
+ proc->pid, thread->pid, (u64)cookie);
+ binder_inner_proc_unlock(proc);
+ return -EINVAL;
+ }
+ binder_dequeue_work_ilocked(&freeze->work);
+ freeze->sent = false;
+ if (freeze->resend) {
+ freeze->resend = false;
+ binder_enqueue_work_ilocked(&freeze->work, &proc->todo);
+ binder_wakeup_proc_ilocked(proc);
+ }
+ binder_inner_proc_unlock(proc);
+ return 0;
+}
+
+/**
+ * binder_free_buf() - free the specified buffer
+ * @proc: binder proc that owns buffer
+ * @thread: binder thread performing the buffer release
+ * @buffer: buffer to be freed
+ * @is_failure: failed to send transaction
+ *
+ * If the buffer is for an async transaction, enqueue the next async
+ * transaction from the node.
+ *
+ * Cleanup the buffer and free it.
+ */
+static void
+binder_free_buf(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_buffer *buffer, bool is_failure)
+{
+ binder_inner_proc_lock(proc);
+ if (buffer->transaction) {
+ buffer->transaction->buffer = NULL;
+ buffer->transaction = NULL;
+ }
+ binder_inner_proc_unlock(proc);
+ if (buffer->async_transaction && buffer->target_node) {
+ struct binder_node *buf_node;
+ struct binder_work *w;
+
+ buf_node = buffer->target_node;
+ binder_node_inner_lock(buf_node);
+ BUG_ON(!buf_node->has_async_transaction);
+ BUG_ON(buf_node->proc != proc);
+ w = binder_dequeue_work_head_ilocked(
+ &buf_node->async_todo);
+ if (!w) {
+ buf_node->has_async_transaction = false;
+ } else {
+ binder_enqueue_work_ilocked(
+ w, &proc->todo);
+ binder_wakeup_proc_ilocked(proc);
+ }
+ binder_node_inner_unlock(buf_node);
+ }
+ trace_binder_transaction_buffer_release(buffer);
+ binder_release_entire_buffer(proc, thread, buffer, is_failure);
+ binder_alloc_free_buf(&proc->alloc, buffer);
+}
+
+static int binder_thread_write(struct binder_proc *proc,
+ struct binder_thread *thread,
+ binder_uintptr_t binder_buffer, size_t size,
+ binder_size_t *consumed)
+{
+ uint32_t cmd;
+ struct binder_context *context = proc->context;
+ void __user *buffer = (void __user *)(uintptr_t)binder_buffer;
+ void __user *ptr = buffer + *consumed;
+ void __user *end = buffer + size;
+
+ while (ptr < end && thread->return_error.cmd == BR_OK) {
+ int ret;
+
+ if (get_user(cmd, (uint32_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(uint32_t);
+ trace_binder_command(cmd);
+ if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) {
+ atomic_inc(&binder_stats.bc[_IOC_NR(cmd)]);
+ atomic_inc(&proc->stats.bc[_IOC_NR(cmd)]);
+ atomic_inc(&thread->stats.bc[_IOC_NR(cmd)]);
+ }
+ switch (cmd) {
+ case BC_INCREFS:
+ case BC_ACQUIRE:
+ case BC_RELEASE:
+ case BC_DECREFS: {
+ uint32_t target;
+ const char *debug_string;
+ bool strong = cmd == BC_ACQUIRE || cmd == BC_RELEASE;
+ bool increment = cmd == BC_INCREFS || cmd == BC_ACQUIRE;
+ struct binder_ref_data rdata;
+
+ if (get_user(target, (uint32_t __user *)ptr))
+ return -EFAULT;
+
+ ptr += sizeof(uint32_t);
+ ret = -1;
+ if (increment && !target) {
+ struct binder_node *ctx_mgr_node;
+
+ mutex_lock(&context->context_mgr_node_lock);
+ ctx_mgr_node = context->binder_context_mgr_node;
+ if (ctx_mgr_node) {
+ if (ctx_mgr_node->proc == proc) {
+ binder_user_error("%d:%d context manager tried to acquire desc 0\n",
+ proc->pid, thread->pid);
+ mutex_unlock(&context->context_mgr_node_lock);
+ return -EINVAL;
+ }
+ ret = binder_inc_ref_for_node(
+ proc, ctx_mgr_node,
+ strong, NULL, &rdata);
+ }
+ mutex_unlock(&context->context_mgr_node_lock);
+ }
+ if (ret)
+ ret = binder_update_ref_for_handle(
+ proc, target, increment, strong,
+ &rdata);
+ if (!ret && rdata.desc != target) {
+ binder_user_error("%d:%d tried to acquire reference to desc %d, got %d instead\n",
+ proc->pid, thread->pid,
+ target, rdata.desc);
+ }
+ switch (cmd) {
+ case BC_INCREFS:
+ debug_string = "IncRefs";
+ break;
+ case BC_ACQUIRE:
+ debug_string = "Acquire";
+ break;
+ case BC_RELEASE:
+ debug_string = "Release";
+ break;
+ case BC_DECREFS:
+ default:
+ debug_string = "DecRefs";
+ break;
+ }
+ if (ret) {
+ binder_user_error("%d:%d %s %d refcount change on invalid ref %d ret %d\n",
+ proc->pid, thread->pid, debug_string,
+ strong, target, ret);
+ break;
+ }
+ binder_debug(BINDER_DEBUG_USER_REFS,
+ "%d:%d %s ref %d desc %d s %d w %d\n",
+ proc->pid, thread->pid, debug_string,
+ rdata.debug_id, rdata.desc, rdata.strong,
+ rdata.weak);
+ break;
+ }
+ case BC_INCREFS_DONE:
+ case BC_ACQUIRE_DONE: {
+ binder_uintptr_t node_ptr;
+ binder_uintptr_t cookie;
+ struct binder_node *node;
+ bool free_node;
+
+ if (get_user(node_ptr, (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(binder_uintptr_t);
+ if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(binder_uintptr_t);
+ node = binder_get_node(proc, node_ptr);
+ if (node == NULL) {
+ binder_user_error("%d:%d %s u%016llx no match\n",
+ proc->pid, thread->pid,
+ cmd == BC_INCREFS_DONE ?
+ "BC_INCREFS_DONE" :
+ "BC_ACQUIRE_DONE",
+ (u64)node_ptr);
+ break;
+ }
+ if (cookie != node->cookie) {
+ binder_user_error("%d:%d %s u%016llx node %d cookie mismatch %016llx != %016llx\n",
+ proc->pid, thread->pid,
+ cmd == BC_INCREFS_DONE ?
+ "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE",
+ (u64)node_ptr, node->debug_id,
+ (u64)cookie, (u64)node->cookie);
+ binder_put_node(node);
+ break;
+ }
+ binder_node_inner_lock(node);
+ if (cmd == BC_ACQUIRE_DONE) {
+ if (node->pending_strong_ref == 0) {
+ binder_user_error("%d:%d BC_ACQUIRE_DONE node %d has no pending acquire request\n",
+ proc->pid, thread->pid,
+ node->debug_id);
+ binder_node_inner_unlock(node);
+ binder_put_node(node);
+ break;
+ }
+ node->pending_strong_ref = 0;
+ } else {
+ if (node->pending_weak_ref == 0) {
+ binder_user_error("%d:%d BC_INCREFS_DONE node %d has no pending increfs request\n",
+ proc->pid, thread->pid,
+ node->debug_id);
+ binder_node_inner_unlock(node);
+ binder_put_node(node);
+ break;
+ }
+ node->pending_weak_ref = 0;
+ }
+ free_node = binder_dec_node_nilocked(node,
+ cmd == BC_ACQUIRE_DONE, 0);
+ WARN_ON(free_node);
+ binder_debug(BINDER_DEBUG_USER_REFS,
+ "%d:%d %s node %d ls %d lw %d tr %d\n",
+ proc->pid, thread->pid,
+ cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE",
+ node->debug_id, node->local_strong_refs,
+ node->local_weak_refs, node->tmp_refs);
+ binder_node_inner_unlock(node);
+ binder_put_node(node);
+ break;
+ }
+ case BC_ATTEMPT_ACQUIRE:
+ pr_err("BC_ATTEMPT_ACQUIRE not supported\n");
+ return -EINVAL;
+ case BC_ACQUIRE_RESULT:
+ pr_err("BC_ACQUIRE_RESULT not supported\n");
+ return -EINVAL;
+
+ case BC_FREE_BUFFER: {
+ binder_uintptr_t data_ptr;
+ struct binder_buffer *buffer;
+
+ if (get_user(data_ptr, (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(binder_uintptr_t);
+
+ buffer = binder_alloc_prepare_to_free(&proc->alloc,
+ data_ptr);
+ if (IS_ERR_OR_NULL(buffer)) {
+ if (PTR_ERR(buffer) == -EPERM) {
+ binder_user_error(
+ "%d:%d BC_FREE_BUFFER matched unreturned or currently freeing buffer at offset %lx\n",
+ proc->pid, thread->pid,
+ (unsigned long)data_ptr - proc->alloc.vm_start);
+ } else {
+ binder_user_error(
+ "%d:%d BC_FREE_BUFFER no match for buffer at offset %lx\n",
+ proc->pid, thread->pid,
+ (unsigned long)data_ptr - proc->alloc.vm_start);
+ }
+ break;
+ }
+ binder_debug(BINDER_DEBUG_FREE_BUFFER,
+ "%d:%d BC_FREE_BUFFER at offset %lx found buffer %d for %s transaction\n",
+ proc->pid, thread->pid,
+ (unsigned long)data_ptr - proc->alloc.vm_start,
+ buffer->debug_id,
+ buffer->transaction ? "active" : "finished");
+ binder_free_buf(proc, thread, buffer, false);
+ break;
+ }
+
+ case BC_TRANSACTION_SG:
+ case BC_REPLY_SG: {
+ struct binder_transaction_data_sg tr;
+
+ if (copy_from_user(&tr, ptr, sizeof(tr)))
+ return -EFAULT;
+ ptr += sizeof(tr);
+ binder_transaction(proc, thread, &tr.transaction_data,
+ cmd == BC_REPLY_SG, tr.buffers_size);
+ break;
+ }
+ case BC_TRANSACTION:
+ case BC_REPLY: {
+ struct binder_transaction_data tr;
+
+ if (copy_from_user(&tr, ptr, sizeof(tr)))
+ return -EFAULT;
+ ptr += sizeof(tr);
+ binder_transaction(proc, thread, &tr,
+ cmd == BC_REPLY, 0);
+ break;
+ }
+
+ case BC_REGISTER_LOOPER:
+ binder_debug(BINDER_DEBUG_THREADS,
+ "%d:%d BC_REGISTER_LOOPER\n",
+ proc->pid, thread->pid);
+ binder_inner_proc_lock(proc);
+ if (thread->looper & BINDER_LOOPER_STATE_ENTERED) {
+ thread->looper |= BINDER_LOOPER_STATE_INVALID;
+ binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called after BC_ENTER_LOOPER\n",
+ proc->pid, thread->pid);
+ } else if (proc->requested_threads == 0) {
+ thread->looper |= BINDER_LOOPER_STATE_INVALID;
+ binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called without request\n",
+ proc->pid, thread->pid);
+ } else {
+ proc->requested_threads--;
+ proc->requested_threads_started++;
+ }
+ thread->looper |= BINDER_LOOPER_STATE_REGISTERED;
+ binder_inner_proc_unlock(proc);
+ break;
+ case BC_ENTER_LOOPER:
+ binder_debug(BINDER_DEBUG_THREADS,
+ "%d:%d BC_ENTER_LOOPER\n",
+ proc->pid, thread->pid);
+ if (thread->looper & BINDER_LOOPER_STATE_REGISTERED) {
+ thread->looper |= BINDER_LOOPER_STATE_INVALID;
+ binder_user_error("%d:%d ERROR: BC_ENTER_LOOPER called after BC_REGISTER_LOOPER\n",
+ proc->pid, thread->pid);
+ }
+ thread->looper |= BINDER_LOOPER_STATE_ENTERED;
+ break;
+ case BC_EXIT_LOOPER:
+ binder_debug(BINDER_DEBUG_THREADS,
+ "%d:%d BC_EXIT_LOOPER\n",
+ proc->pid, thread->pid);
+ thread->looper |= BINDER_LOOPER_STATE_EXITED;
+ break;
+
+ case BC_REQUEST_DEATH_NOTIFICATION:
+ case BC_CLEAR_DEATH_NOTIFICATION: {
+ uint32_t target;
+ binder_uintptr_t cookie;
+ struct binder_ref *ref;
+ struct binder_ref_death *death = NULL;
+
+ if (get_user(target, (uint32_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(uint32_t);
+ if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(binder_uintptr_t);
+ if (cmd == BC_REQUEST_DEATH_NOTIFICATION) {
+ /*
+ * Allocate memory for death notification
+ * before taking lock
+ */
+ death = kzalloc(sizeof(*death), GFP_KERNEL);
+ if (death == NULL) {
+ WARN_ON(thread->return_error.cmd !=
+ BR_OK);
+ thread->return_error.cmd = BR_ERROR;
+ binder_enqueue_thread_work(
+ thread,
+ &thread->return_error.work);
+ binder_debug(
+ BINDER_DEBUG_FAILED_TRANSACTION,
+ "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n",
+ proc->pid, thread->pid);
+ break;
+ }
+ }
+ binder_proc_lock(proc);
+ ref = binder_get_ref_olocked(proc, target, false);
+ if (ref == NULL) {
+ binder_user_error("%d:%d %s invalid ref %d\n",
+ proc->pid, thread->pid,
+ cmd == BC_REQUEST_DEATH_NOTIFICATION ?
+ "BC_REQUEST_DEATH_NOTIFICATION" :
+ "BC_CLEAR_DEATH_NOTIFICATION",
+ target);
+ binder_proc_unlock(proc);
+ kfree(death);
+ break;
+ }
+
+ binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION,
+ "%d:%d %s %016llx ref %d desc %d s %d w %d for node %d\n",
+ proc->pid, thread->pid,
+ cmd == BC_REQUEST_DEATH_NOTIFICATION ?
+ "BC_REQUEST_DEATH_NOTIFICATION" :
+ "BC_CLEAR_DEATH_NOTIFICATION",
+ (u64)cookie, ref->data.debug_id,
+ ref->data.desc, ref->data.strong,
+ ref->data.weak, ref->node->debug_id);
+
+ binder_node_lock(ref->node);
+ if (cmd == BC_REQUEST_DEATH_NOTIFICATION) {
+ if (ref->death) {
+ binder_user_error("%d:%d BC_REQUEST_DEATH_NOTIFICATION death notification already set\n",
+ proc->pid, thread->pid);
+ binder_node_unlock(ref->node);
+ binder_proc_unlock(proc);
+ kfree(death);
+ break;
+ }
+ binder_stats_created(BINDER_STAT_DEATH);
+ INIT_LIST_HEAD(&death->work.entry);
+ death->cookie = cookie;
+ ref->death = death;
+ if (ref->node->proc == NULL) {
+ ref->death->work.type = BINDER_WORK_DEAD_BINDER;
+
+ binder_inner_proc_lock(proc);
+ binder_enqueue_work_ilocked(
+ &ref->death->work, &proc->todo);
+ binder_wakeup_proc_ilocked(proc);
+ binder_inner_proc_unlock(proc);
+ }
+ } else {
+ if (ref->death == NULL) {
+ binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n",
+ proc->pid, thread->pid);
+ binder_node_unlock(ref->node);
+ binder_proc_unlock(proc);
+ break;
+ }
+ death = ref->death;
+ if (death->cookie != cookie) {
+ binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch %016llx != %016llx\n",
+ proc->pid, thread->pid,
+ (u64)death->cookie,
+ (u64)cookie);
+ binder_node_unlock(ref->node);
+ binder_proc_unlock(proc);
+ break;
+ }
+ ref->death = NULL;
+ binder_inner_proc_lock(proc);
+ if (list_empty(&death->work.entry)) {
+ death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION;
+ if (thread->looper &
+ (BINDER_LOOPER_STATE_REGISTERED |
+ BINDER_LOOPER_STATE_ENTERED))
+ binder_enqueue_thread_work_ilocked(
+ thread,
+ &death->work);
+ else {
+ binder_enqueue_work_ilocked(
+ &death->work,
+ &proc->todo);
+ binder_wakeup_proc_ilocked(
+ proc);
+ }
+ } else {
+ BUG_ON(death->work.type != BINDER_WORK_DEAD_BINDER);
+ death->work.type = BINDER_WORK_DEAD_BINDER_AND_CLEAR;
+ }
+ binder_inner_proc_unlock(proc);
+ }
+ binder_node_unlock(ref->node);
+ binder_proc_unlock(proc);
+ } break;
+ case BC_DEAD_BINDER_DONE: {
+ struct binder_work *w;
+ binder_uintptr_t cookie;
+ struct binder_ref_death *death = NULL;
+
+ if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+
+ ptr += sizeof(cookie);
+ binder_inner_proc_lock(proc);
+ list_for_each_entry(w, &proc->delivered_death,
+ entry) {
+ struct binder_ref_death *tmp_death =
+ container_of(w,
+ struct binder_ref_death,
+ work);
+
+ if (tmp_death->cookie == cookie) {
+ death = tmp_death;
+ break;
+ }
+ }
+ binder_debug(BINDER_DEBUG_DEAD_BINDER,
+ "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n",
+ proc->pid, thread->pid, (u64)cookie,
+ death);
+ if (death == NULL) {
+ binder_user_error("%d:%d BC_DEAD_BINDER_DONE %016llx not found\n",
+ proc->pid, thread->pid, (u64)cookie);
+ binder_inner_proc_unlock(proc);
+ break;
+ }
+ binder_dequeue_work_ilocked(&death->work);
+ if (death->work.type == BINDER_WORK_DEAD_BINDER_AND_CLEAR) {
+ death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION;
+ if (thread->looper &
+ (BINDER_LOOPER_STATE_REGISTERED |
+ BINDER_LOOPER_STATE_ENTERED))
+ binder_enqueue_thread_work_ilocked(
+ thread, &death->work);
+ else {
+ binder_enqueue_work_ilocked(
+ &death->work,
+ &proc->todo);
+ binder_wakeup_proc_ilocked(proc);
+ }
+ }
+ binder_inner_proc_unlock(proc);
+ } break;
+
+ case BC_REQUEST_FREEZE_NOTIFICATION: {
+ struct binder_handle_cookie handle_cookie;
+ int error;
+
+ if (copy_from_user(&handle_cookie, ptr, sizeof(handle_cookie)))
+ return -EFAULT;
+ ptr += sizeof(handle_cookie);
+ error = binder_request_freeze_notification(proc, thread,
+ &handle_cookie);
+ if (error)
+ return error;
+ } break;
+
+ case BC_CLEAR_FREEZE_NOTIFICATION: {
+ struct binder_handle_cookie handle_cookie;
+ int error;
+
+ if (copy_from_user(&handle_cookie, ptr, sizeof(handle_cookie)))
+ return -EFAULT;
+ ptr += sizeof(handle_cookie);
+ error = binder_clear_freeze_notification(proc, thread, &handle_cookie);
+ if (error)
+ return error;
+ } break;
+
+ case BC_FREEZE_NOTIFICATION_DONE: {
+ binder_uintptr_t cookie;
+ int error;
+
+ if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+
+ ptr += sizeof(cookie);
+ error = binder_freeze_notification_done(proc, thread, cookie);
+ if (error)
+ return error;
+ } break;
+
+ default:
+ pr_err("%d:%d unknown command %u\n",
+ proc->pid, thread->pid, cmd);
+ return -EINVAL;
+ }
+ *consumed = ptr - buffer;
+ }
+ return 0;
+}
+
+static void binder_stat_br(struct binder_proc *proc,
+ struct binder_thread *thread, uint32_t cmd)
+{
+ trace_binder_return(cmd);
+ if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) {
+ atomic_inc(&binder_stats.br[_IOC_NR(cmd)]);
+ atomic_inc(&proc->stats.br[_IOC_NR(cmd)]);
+ atomic_inc(&thread->stats.br[_IOC_NR(cmd)]);
+ }
+}
+
+static int binder_put_node_cmd(struct binder_proc *proc,
+ struct binder_thread *thread,
+ void __user **ptrp,
+ binder_uintptr_t node_ptr,
+ binder_uintptr_t node_cookie,
+ int node_debug_id,
+ uint32_t cmd, const char *cmd_name)
+{
+ void __user *ptr = *ptrp;
+
+ if (put_user(cmd, (uint32_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(uint32_t);
+
+ if (put_user(node_ptr, (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(binder_uintptr_t);
+
+ if (put_user(node_cookie, (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(binder_uintptr_t);
+
+ binder_stat_br(proc, thread, cmd);
+ binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s %d u%016llx c%016llx\n",
+ proc->pid, thread->pid, cmd_name, node_debug_id,
+ (u64)node_ptr, (u64)node_cookie);
+
+ *ptrp = ptr;
+ return 0;
+}
+
+static int binder_wait_for_work(struct binder_thread *thread,
+ bool do_proc_work)
+{
+ DEFINE_WAIT(wait);
+ struct binder_proc *proc = thread->proc;
+ int ret = 0;
+
+ binder_inner_proc_lock(proc);
+ for (;;) {
+ prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE);
+ if (binder_has_work_ilocked(thread, do_proc_work))
+ break;
+ if (do_proc_work)
+ list_add(&thread->waiting_thread_node,
+ &proc->waiting_threads);
+ binder_inner_proc_unlock(proc);
+ schedule();
+ binder_inner_proc_lock(proc);
+ list_del_init(&thread->waiting_thread_node);
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+ }
+ finish_wait(&thread->wait, &wait);
+ binder_inner_proc_unlock(proc);
+
+ return ret;
+}
+
+/**
+ * binder_apply_fd_fixups() - finish fd translation
+ * @proc: binder_proc associated @t->buffer
+ * @t: binder transaction with list of fd fixups
+ *
+ * Now that we are in the context of the transaction target
+ * process, we can allocate and install fds. Process the
+ * list of fds to translate and fixup the buffer with the
+ * new fds first and only then install the files.
+ *
+ * If we fail to allocate an fd, skip the install and release
+ * any fds that have already been allocated.
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+static int binder_apply_fd_fixups(struct binder_proc *proc,
+ struct binder_transaction *t)
+{
+ struct binder_txn_fd_fixup *fixup, *tmp;
+ int ret = 0;
+
+ list_for_each_entry(fixup, &t->fd_fixups, fixup_entry) {
+ int fd = get_unused_fd_flags(O_CLOEXEC);
+
+ if (fd < 0) {
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ "failed fd fixup txn %d fd %d\n",
+ t->debug_id, fd);
+ ret = -ENOMEM;
+ goto err;
+ }
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ "fd fixup txn %d fd %d\n",
+ t->debug_id, fd);
+ trace_binder_transaction_fd_recv(t, fd, fixup->offset);
+ fixup->target_fd = fd;
+ if (binder_alloc_copy_to_buffer(&proc->alloc, t->buffer,
+ fixup->offset, &fd,
+ sizeof(u32))) {
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+ list_for_each_entry_safe(fixup, tmp, &t->fd_fixups, fixup_entry) {
+ fd_install(fixup->target_fd, fixup->file);
+ list_del(&fixup->fixup_entry);
+ kfree(fixup);
+ }
+
+ return ret;
+
+err:
+ binder_free_txn_fixups(t);
+ return ret;
+}
+
+static int binder_thread_read(struct binder_proc *proc,
+ struct binder_thread *thread,
+ binder_uintptr_t binder_buffer, size_t size,
+ binder_size_t *consumed, int non_block)
+{
+ void __user *buffer = (void __user *)(uintptr_t)binder_buffer;
+ void __user *ptr = buffer + *consumed;
+ void __user *end = buffer + size;
+
+ int ret = 0;
+ int wait_for_proc_work;
+
+ if (*consumed == 0) {
+ if (put_user(BR_NOOP, (uint32_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(uint32_t);
+ }
+
+retry:
+ binder_inner_proc_lock(proc);
+ wait_for_proc_work = binder_available_for_proc_work_ilocked(thread);
+ binder_inner_proc_unlock(proc);
+
+ thread->looper |= BINDER_LOOPER_STATE_WAITING;
+
+ trace_binder_wait_for_work(wait_for_proc_work,
+ !!thread->transaction_stack,
+ !binder_worklist_empty(proc, &thread->todo));
+ if (wait_for_proc_work) {
+ if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED |
+ BINDER_LOOPER_STATE_ENTERED))) {
+ binder_user_error("%d:%d ERROR: Thread waiting for process work before calling BC_REGISTER_LOOPER or BC_ENTER_LOOPER (state %x)\n",
+ proc->pid, thread->pid, thread->looper);
+ wait_event_interruptible(binder_user_error_wait,
+ binder_stop_on_user_error < 2);
+ }
+ binder_set_nice(proc->default_priority);
+ }
+
+ if (non_block) {
+ if (!binder_has_work(thread, wait_for_proc_work))
+ ret = -EAGAIN;
+ } else {
+ ret = binder_wait_for_work(thread, wait_for_proc_work);
+ }
+
+ thread->looper &= ~BINDER_LOOPER_STATE_WAITING;
+
+ if (ret)
+ return ret;
+
+ while (1) {
+ uint32_t cmd;
+ struct binder_transaction_data_secctx tr;
+ struct binder_transaction_data *trd = &tr.transaction_data;
+ struct binder_work *w = NULL;
+ struct list_head *list = NULL;
+ struct binder_transaction *t = NULL;
+ struct binder_thread *t_from;
+ size_t trsize = sizeof(*trd);
+
+ binder_inner_proc_lock(proc);
+ if (!binder_worklist_empty_ilocked(&thread->todo))
+ list = &thread->todo;
+ else if (!binder_worklist_empty_ilocked(&proc->todo) &&
+ wait_for_proc_work)
+ list = &proc->todo;
+ else {
+ binder_inner_proc_unlock(proc);
+
+ /* no data added */
+ if (ptr - buffer == 4 && !thread->looper_need_return)
+ goto retry;
+ break;
+ }
+
+ if (end - ptr < sizeof(tr) + 4) {
+ binder_inner_proc_unlock(proc);
+ break;
+ }
+ w = binder_dequeue_work_head_ilocked(list);
+ if (binder_worklist_empty_ilocked(&thread->todo))
+ thread->process_todo = false;
+
+ switch (w->type) {
+ case BINDER_WORK_TRANSACTION: {
+ binder_inner_proc_unlock(proc);
+ t = container_of(w, struct binder_transaction, work);
+ } break;
+ case BINDER_WORK_RETURN_ERROR: {
+ struct binder_error *e = container_of(
+ w, struct binder_error, work);
+
+ WARN_ON(e->cmd == BR_OK);
+ binder_inner_proc_unlock(proc);
+ if (put_user(e->cmd, (uint32_t __user *)ptr))
+ return -EFAULT;
+ cmd = e->cmd;
+ e->cmd = BR_OK;
+ ptr += sizeof(uint32_t);
+
+ binder_stat_br(proc, thread, cmd);
+ } break;
+ case BINDER_WORK_TRANSACTION_COMPLETE:
+ case BINDER_WORK_TRANSACTION_PENDING:
+ case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: {
+ if (proc->oneway_spam_detection_enabled &&
+ w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT)
+ cmd = BR_ONEWAY_SPAM_SUSPECT;
+ else if (w->type == BINDER_WORK_TRANSACTION_PENDING)
+ cmd = BR_TRANSACTION_PENDING_FROZEN;
+ else
+ cmd = BR_TRANSACTION_COMPLETE;
+ binder_inner_proc_unlock(proc);
+ kfree(w);
+ binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
+ if (put_user(cmd, (uint32_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(uint32_t);
+
+ binder_stat_br(proc, thread, cmd);
+ binder_debug(BINDER_DEBUG_TRANSACTION_COMPLETE,
+ "%d:%d BR_TRANSACTION_COMPLETE\n",
+ proc->pid, thread->pid);
+ } break;
+ case BINDER_WORK_NODE: {
+ struct binder_node *node = container_of(w, struct binder_node, work);
+ int strong, weak;
+ binder_uintptr_t node_ptr = node->ptr;
+ binder_uintptr_t node_cookie = node->cookie;
+ int node_debug_id = node->debug_id;
+ int has_weak_ref;
+ int has_strong_ref;
+ void __user *orig_ptr = ptr;
+
+ BUG_ON(proc != node->proc);
+ strong = node->internal_strong_refs ||
+ node->local_strong_refs;
+ weak = !hlist_empty(&node->refs) ||
+ node->local_weak_refs ||
+ node->tmp_refs || strong;
+ has_strong_ref = node->has_strong_ref;
+ has_weak_ref = node->has_weak_ref;
+
+ if (weak && !has_weak_ref) {
+ node->has_weak_ref = 1;
+ node->pending_weak_ref = 1;
+ node->local_weak_refs++;
+ }
+ if (strong && !has_strong_ref) {
+ node->has_strong_ref = 1;
+ node->pending_strong_ref = 1;
+ node->local_strong_refs++;
+ }
+ if (!strong && has_strong_ref)
+ node->has_strong_ref = 0;
+ if (!weak && has_weak_ref)
+ node->has_weak_ref = 0;
+ if (!weak && !strong) {
+ binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+ "%d:%d node %d u%016llx c%016llx deleted\n",
+ proc->pid, thread->pid,
+ node_debug_id,
+ (u64)node_ptr,
+ (u64)node_cookie);
+ rb_erase(&node->rb_node, &proc->nodes);
+ binder_inner_proc_unlock(proc);
+ binder_node_lock(node);
+ /*
+ * Acquire the node lock before freeing the
+ * node to serialize with other threads that
+ * may have been holding the node lock while
+ * decrementing this node (avoids race where
+ * this thread frees while the other thread
+ * is unlocking the node after the final
+ * decrement)
+ */
+ binder_node_unlock(node);
+ binder_free_node(node);
+ } else
+ binder_inner_proc_unlock(proc);
+
+ if (weak && !has_weak_ref)
+ ret = binder_put_node_cmd(
+ proc, thread, &ptr, node_ptr,
+ node_cookie, node_debug_id,
+ BR_INCREFS, "BR_INCREFS");
+ if (!ret && strong && !has_strong_ref)
+ ret = binder_put_node_cmd(
+ proc, thread, &ptr, node_ptr,
+ node_cookie, node_debug_id,
+ BR_ACQUIRE, "BR_ACQUIRE");
+ if (!ret && !strong && has_strong_ref)
+ ret = binder_put_node_cmd(
+ proc, thread, &ptr, node_ptr,
+ node_cookie, node_debug_id,
+ BR_RELEASE, "BR_RELEASE");
+ if (!ret && !weak && has_weak_ref)
+ ret = binder_put_node_cmd(
+ proc, thread, &ptr, node_ptr,
+ node_cookie, node_debug_id,
+ BR_DECREFS, "BR_DECREFS");
+ if (orig_ptr == ptr)
+ binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+ "%d:%d node %d u%016llx c%016llx state unchanged\n",
+ proc->pid, thread->pid,
+ node_debug_id,
+ (u64)node_ptr,
+ (u64)node_cookie);
+ if (ret)
+ return ret;
+ } break;
+ case BINDER_WORK_DEAD_BINDER:
+ case BINDER_WORK_DEAD_BINDER_AND_CLEAR:
+ case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: {
+ struct binder_ref_death *death;
+ uint32_t cmd;
+ binder_uintptr_t cookie;
+
+ death = container_of(w, struct binder_ref_death, work);
+ if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION)
+ cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;
+ else
+ cmd = BR_DEAD_BINDER;
+ cookie = death->cookie;
+
+ binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION,
+ "%d:%d %s %016llx\n",
+ proc->pid, thread->pid,
+ cmd == BR_DEAD_BINDER ?
+ "BR_DEAD_BINDER" :
+ "BR_CLEAR_DEATH_NOTIFICATION_DONE",
+ (u64)cookie);
+ if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) {
+ binder_inner_proc_unlock(proc);
+ kfree(death);
+ binder_stats_deleted(BINDER_STAT_DEATH);
+ } else {
+ binder_enqueue_work_ilocked(
+ w, &proc->delivered_death);
+ binder_inner_proc_unlock(proc);
+ }
+ if (put_user(cmd, (uint32_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(uint32_t);
+ if (put_user(cookie,
+ (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(binder_uintptr_t);
+ binder_stat_br(proc, thread, cmd);
+ if (cmd == BR_DEAD_BINDER)
+ goto done; /* DEAD_BINDER notifications can cause transactions */
+ } break;
+
+ case BINDER_WORK_FROZEN_BINDER: {
+ struct binder_ref_freeze *freeze;
+ struct binder_frozen_state_info info;
+
+ memset(&info, 0, sizeof(info));
+ freeze = container_of(w, struct binder_ref_freeze, work);
+ info.is_frozen = freeze->is_frozen;
+ info.cookie = freeze->cookie;
+ freeze->sent = true;
+ binder_enqueue_work_ilocked(w, &proc->delivered_freeze);
+ binder_inner_proc_unlock(proc);
+
+ if (put_user(BR_FROZEN_BINDER, (uint32_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(uint32_t);
+ if (copy_to_user(ptr, &info, sizeof(info)))
+ return -EFAULT;
+ ptr += sizeof(info);
+ binder_stat_br(proc, thread, BR_FROZEN_BINDER);
+ goto done; /* BR_FROZEN_BINDER notifications can cause transactions */
+ } break;
+
+ case BINDER_WORK_CLEAR_FREEZE_NOTIFICATION: {
+ struct binder_ref_freeze *freeze =
+ container_of(w, struct binder_ref_freeze, work);
+ binder_uintptr_t cookie = freeze->cookie;
+
+ binder_inner_proc_unlock(proc);
+ kfree(freeze);
+ binder_stats_deleted(BINDER_STAT_FREEZE);
+ if (put_user(BR_CLEAR_FREEZE_NOTIFICATION_DONE, (uint32_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(uint32_t);
+ if (put_user(cookie, (binder_uintptr_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(binder_uintptr_t);
+ binder_stat_br(proc, thread, BR_CLEAR_FREEZE_NOTIFICATION_DONE);
+ } break;
+
+ default:
+ binder_inner_proc_unlock(proc);
+ pr_err("%d:%d: bad work type %d\n",
+ proc->pid, thread->pid, w->type);
+ break;
+ }
+
+ if (!t)
+ continue;
+
+ BUG_ON(t->buffer == NULL);
+ if (t->buffer->target_node) {
+ struct binder_node *target_node = t->buffer->target_node;
+
+ trd->target.ptr = target_node->ptr;
+ trd->cookie = target_node->cookie;
+ t->saved_priority = task_nice(current);
+ if (t->priority < target_node->min_priority &&
+ !(t->flags & TF_ONE_WAY))
+ binder_set_nice(t->priority);
+ else if (!(t->flags & TF_ONE_WAY) ||
+ t->saved_priority > target_node->min_priority)
+ binder_set_nice(target_node->min_priority);
+ cmd = BR_TRANSACTION;
+ } else {
+ trd->target.ptr = 0;
+ trd->cookie = 0;
+ cmd = BR_REPLY;
+ }
+ trd->code = t->code;
+ trd->flags = t->flags;
+ trd->sender_euid = from_kuid(current_user_ns(), t->sender_euid);
+
+ t_from = binder_get_txn_from(t);
+ if (t_from) {
+ struct task_struct *sender = t_from->proc->tsk;
+
+ trd->sender_pid =
+ task_tgid_nr_ns(sender,
+ task_active_pid_ns(current));
+ } else {
+ trd->sender_pid = 0;
+ }
+
+ ret = binder_apply_fd_fixups(proc, t);
+ if (ret) {
+ struct binder_buffer *buffer = t->buffer;
+ bool oneway = !!(t->flags & TF_ONE_WAY);
+ int tid = t->debug_id;
+
+ if (t_from)
+ binder_thread_dec_tmpref(t_from);
+ buffer->transaction = NULL;
+ binder_cleanup_transaction(t, "fd fixups failed",
+ BR_FAILED_REPLY);
+ binder_free_buf(proc, thread, buffer, true);
+ binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
+ "%d:%d %stransaction %d fd fixups failed %d/%d, line %d\n",
+ proc->pid, thread->pid,
+ oneway ? "async " :
+ (cmd == BR_REPLY ? "reply " : ""),
+ tid, BR_FAILED_REPLY, ret, __LINE__);
+ if (cmd == BR_REPLY) {
+ cmd = BR_FAILED_REPLY;
+ if (put_user(cmd, (uint32_t __user *)ptr))
+ return -EFAULT;
+ ptr += sizeof(uint32_t);
+ binder_stat_br(proc, thread, cmd);
+ break;
+ }
+ continue;
+ }
+ trd->data_size = t->buffer->data_size;
+ trd->offsets_size = t->buffer->offsets_size;
+ trd->data.ptr.buffer = t->buffer->user_data;
+ trd->data.ptr.offsets = trd->data.ptr.buffer +
+ ALIGN(t->buffer->data_size,
+ sizeof(void *));
+
+ tr.secctx = t->security_ctx;
+ if (t->security_ctx) {
+ cmd = BR_TRANSACTION_SEC_CTX;
+ trsize = sizeof(tr);
+ }
+ if (put_user(cmd, (uint32_t __user *)ptr)) {
+ if (t_from)
+ binder_thread_dec_tmpref(t_from);
+
+ binder_cleanup_transaction(t, "put_user failed",
+ BR_FAILED_REPLY);
+
+ return -EFAULT;
+ }
+ ptr += sizeof(uint32_t);
+ if (copy_to_user(ptr, &tr, trsize)) {
+ if (t_from)
+ binder_thread_dec_tmpref(t_from);
+
+ binder_cleanup_transaction(t, "copy_to_user failed",
+ BR_FAILED_REPLY);
+
+ return -EFAULT;
+ }
+ ptr += trsize;
+
+ trace_binder_transaction_received(t);
+ binder_stat_br(proc, thread, cmd);
+ binder_debug(BINDER_DEBUG_TRANSACTION,
+ "%d:%d %s %d %d:%d, cmd %u size %zd-%zd\n",
+ proc->pid, thread->pid,
+ (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" :
+ (cmd == BR_TRANSACTION_SEC_CTX) ?
+ "BR_TRANSACTION_SEC_CTX" : "BR_REPLY",
+ t->debug_id, t_from ? t_from->proc->pid : 0,
+ t_from ? t_from->pid : 0, cmd,
+ t->buffer->data_size, t->buffer->offsets_size);
+
+ if (t_from)
+ binder_thread_dec_tmpref(t_from);
+ t->buffer->allow_user_free = 1;
+ if (cmd != BR_REPLY && !(t->flags & TF_ONE_WAY)) {
+ binder_inner_proc_lock(thread->proc);
+ t->to_parent = thread->transaction_stack;
+ t->to_thread = thread;
+ thread->transaction_stack = t;
+ binder_inner_proc_unlock(thread->proc);
+ } else {
+ binder_free_transaction(t);
+ }
+ break;
+ }
+
+done:
+
+ *consumed = ptr - buffer;
+ binder_inner_proc_lock(proc);
+ if (proc->requested_threads == 0 &&
+ list_empty(&thread->proc->waiting_threads) &&
+ proc->requested_threads_started < proc->max_threads &&
+ (thread->looper & (BINDER_LOOPER_STATE_REGISTERED |
+ BINDER_LOOPER_STATE_ENTERED)) /* the user-space code fails to */
+ /*spawn a new thread if we leave this out */) {
+ proc->requested_threads++;
+ binder_inner_proc_unlock(proc);
+ binder_debug(BINDER_DEBUG_THREADS,
+ "%d:%d BR_SPAWN_LOOPER\n",
+ proc->pid, thread->pid);
+ if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer))
+ return -EFAULT;
+ binder_stat_br(proc, thread, BR_SPAWN_LOOPER);
+ } else
+ binder_inner_proc_unlock(proc);
+ return 0;
+}
+
+static void binder_release_work(struct binder_proc *proc,
+ struct list_head *list)
+{
+ struct binder_work *w;
+ enum binder_work_type wtype;
+
+ while (1) {
+ binder_inner_proc_lock(proc);
+ w = binder_dequeue_work_head_ilocked(list);
+ wtype = w ? w->type : 0;
+ binder_inner_proc_unlock(proc);
+ if (!w)
+ return;
+
+ switch (wtype) {
+ case BINDER_WORK_TRANSACTION: {
+ struct binder_transaction *t;
+
+ t = container_of(w, struct binder_transaction, work);
+
+ binder_cleanup_transaction(t, "process died.",
+ BR_DEAD_REPLY);
+ } break;
+ case BINDER_WORK_RETURN_ERROR: {
+ struct binder_error *e = container_of(
+ w, struct binder_error, work);
+
+ binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+ "undelivered TRANSACTION_ERROR: %u\n",
+ e->cmd);
+ } break;
+ case BINDER_WORK_TRANSACTION_PENDING:
+ case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT:
+ case BINDER_WORK_TRANSACTION_COMPLETE: {
+ binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+ "undelivered TRANSACTION_COMPLETE\n");
+ kfree(w);
+ binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
+ } break;
+ case BINDER_WORK_DEAD_BINDER_AND_CLEAR:
+ case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: {
+ struct binder_ref_death *death;
+
+ death = container_of(w, struct binder_ref_death, work);
+ binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+ "undelivered death notification, %016llx\n",
+ (u64)death->cookie);
+ kfree(death);
+ binder_stats_deleted(BINDER_STAT_DEATH);
+ } break;
+ case BINDER_WORK_NODE:
+ break;
+ case BINDER_WORK_CLEAR_FREEZE_NOTIFICATION: {
+ struct binder_ref_freeze *freeze;
+
+ freeze = container_of(w, struct binder_ref_freeze, work);
+ binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+ "undelivered freeze notification, %016llx\n",
+ (u64)freeze->cookie);
+ kfree(freeze);
+ binder_stats_deleted(BINDER_STAT_FREEZE);
+ } break;
+ default:
+ pr_err("unexpected work type, %d, not freed\n",
+ wtype);
+ break;
+ }
+ }
+
+}
+
+static struct binder_thread *binder_get_thread_ilocked(
+ struct binder_proc *proc, struct binder_thread *new_thread)
+{
+ struct binder_thread *thread = NULL;
+ struct rb_node *parent = NULL;
+ struct rb_node **p = &proc->threads.rb_node;
+
+ while (*p) {
+ parent = *p;
+ thread = rb_entry(parent, struct binder_thread, rb_node);
+
+ if (current->pid < thread->pid)
+ p = &(*p)->rb_left;
+ else if (current->pid > thread->pid)
+ p = &(*p)->rb_right;
+ else
+ return thread;
+ }
+ if (!new_thread)
+ return NULL;
+ thread = new_thread;
+ binder_stats_created(BINDER_STAT_THREAD);
+ thread->proc = proc;
+ thread->pid = current->pid;
+ atomic_set(&thread->tmp_ref, 0);
+ init_waitqueue_head(&thread->wait);
+ INIT_LIST_HEAD(&thread->todo);
+ rb_link_node(&thread->rb_node, parent, p);
+ rb_insert_color(&thread->rb_node, &proc->threads);
+ thread->looper_need_return = true;
+ thread->return_error.work.type = BINDER_WORK_RETURN_ERROR;
+ thread->return_error.cmd = BR_OK;
+ thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR;
+ thread->reply_error.cmd = BR_OK;
+ thread->ee.command = BR_OK;
+ INIT_LIST_HEAD(&new_thread->waiting_thread_node);
+ return thread;
+}
+
+static struct binder_thread *binder_get_thread(struct binder_proc *proc)
+{
+ struct binder_thread *thread;
+ struct binder_thread *new_thread;
+
+ binder_inner_proc_lock(proc);
+ thread = binder_get_thread_ilocked(proc, NULL);
+ binder_inner_proc_unlock(proc);
+ if (!thread) {
+ new_thread = kzalloc(sizeof(*thread), GFP_KERNEL);
+ if (new_thread == NULL)
+ return NULL;
+ binder_inner_proc_lock(proc);
+ thread = binder_get_thread_ilocked(proc, new_thread);
+ binder_inner_proc_unlock(proc);
+ if (thread != new_thread)
+ kfree(new_thread);
+ }
+ return thread;
+}
+
+static void binder_free_proc(struct binder_proc *proc)
+{
+ struct binder_device *device;
+
+ BUG_ON(!list_empty(&proc->todo));
+ BUG_ON(!list_empty(&proc->delivered_death));
+ if (proc->outstanding_txns)
+ pr_warn("%s: Unexpected outstanding_txns %d\n",
+ __func__, proc->outstanding_txns);
+ device = container_of(proc->context, struct binder_device, context);
+ if (refcount_dec_and_test(&device->ref)) {
+ binder_remove_device(device);
+ kfree(proc->context->name);
+ kfree(device);
+ }
+ binder_alloc_deferred_release(&proc->alloc);
+ put_task_struct(proc->tsk);
+ put_cred(proc->cred);
+ binder_stats_deleted(BINDER_STAT_PROC);
+ dbitmap_free(&proc->dmap);
+ kfree(proc);
+}
+
+static void binder_free_thread(struct binder_thread *thread)
+{
+ BUG_ON(!list_empty(&thread->todo));
+ binder_stats_deleted(BINDER_STAT_THREAD);
+ binder_proc_dec_tmpref(thread->proc);
+ kfree(thread);
+}
+
+static int binder_thread_release(struct binder_proc *proc,
+ struct binder_thread *thread)
+{
+ struct binder_transaction *t;
+ struct binder_transaction *send_reply = NULL;
+ int active_transactions = 0;
+ struct binder_transaction *last_t = NULL;
+
+ binder_inner_proc_lock(thread->proc);
+ /*
+ * take a ref on the proc so it survives
+ * after we remove this thread from proc->threads.
+ * The corresponding dec is when we actually
+ * free the thread in binder_free_thread()
+ */
+ proc->tmp_ref++;
+ /*
+ * take a ref on this thread to ensure it
+ * survives while we are releasing it
+ */
+ atomic_inc(&thread->tmp_ref);
+ rb_erase(&thread->rb_node, &proc->threads);
+ t = thread->transaction_stack;
+ if (t) {
+ spin_lock(&t->lock);
+ if (t->to_thread == thread)
+ send_reply = t;
+ } else {
+ __acquire(&t->lock);
+ }
+ thread->is_dead = true;
+
+ while (t) {
+ last_t = t;
+ active_transactions++;
+ binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+ "release %d:%d transaction %d %s, still active\n",
+ proc->pid, thread->pid,
+ t->debug_id,
+ (t->to_thread == thread) ? "in" : "out");
+
+ if (t->to_thread == thread) {
+ thread->proc->outstanding_txns--;
+ t->to_proc = NULL;
+ t->to_thread = NULL;
+ if (t->buffer) {
+ t->buffer->transaction = NULL;
+ t->buffer = NULL;
+ }
+ t = t->to_parent;
+ } else if (t->from == thread) {
+ t->from = NULL;
+ t = t->from_parent;
+ } else
+ BUG();
+ spin_unlock(&last_t->lock);
+ if (t)
+ spin_lock(&t->lock);
+ else
+ __acquire(&t->lock);
+ }
+ /* annotation for sparse, lock not acquired in last iteration above */
+ __release(&t->lock);
+
+ /*
+ * If this thread used poll, make sure we remove the waitqueue from any
+ * poll data structures holding it.
+ */
+ if (thread->looper & BINDER_LOOPER_STATE_POLL)
+ wake_up_pollfree(&thread->wait);
+
+ binder_inner_proc_unlock(thread->proc);
+
+ /*
+ * This is needed to avoid races between wake_up_pollfree() above and
+ * someone else removing the last entry from the queue for other reasons
+ * (e.g. ep_remove_wait_queue() being called due to an epoll file
+ * descriptor being closed). Such other users hold an RCU read lock, so
+ * we can be sure they're done after we call synchronize_rcu().
+ */
+ if (thread->looper & BINDER_LOOPER_STATE_POLL)
+ synchronize_rcu();
+
+ if (send_reply)
+ binder_send_failed_reply(send_reply, BR_DEAD_REPLY);
+ binder_release_work(proc, &thread->todo);
+ binder_thread_dec_tmpref(thread);
+ return active_transactions;
+}
+
+static __poll_t binder_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct binder_proc *proc = filp->private_data;
+ struct binder_thread *thread = NULL;
+ bool wait_for_proc_work;
+
+ thread = binder_get_thread(proc);
+ if (!thread)
+ return EPOLLERR;
+
+ binder_inner_proc_lock(thread->proc);
+ thread->looper |= BINDER_LOOPER_STATE_POLL;
+ wait_for_proc_work = binder_available_for_proc_work_ilocked(thread);
+
+ binder_inner_proc_unlock(thread->proc);
+
+ poll_wait(filp, &thread->wait, wait);
+
+ if (binder_has_work(thread, wait_for_proc_work))
+ return EPOLLIN;
+
+ return 0;
+}
+
+static int binder_ioctl_write_read(struct file *filp, unsigned long arg,
+ struct binder_thread *thread)
+{
+ int ret = 0;
+ struct binder_proc *proc = filp->private_data;
+ void __user *ubuf = (void __user *)arg;
+ struct binder_write_read bwr;
+
+ if (copy_from_user(&bwr, ubuf, sizeof(bwr)))
+ return -EFAULT;
+
+ binder_debug(BINDER_DEBUG_READ_WRITE,
+ "%d:%d write %lld at %016llx, read %lld at %016llx\n",
+ proc->pid, thread->pid,
+ (u64)bwr.write_size, (u64)bwr.write_buffer,
+ (u64)bwr.read_size, (u64)bwr.read_buffer);
+
+ if (bwr.write_size > 0) {
+ ret = binder_thread_write(proc, thread,
+ bwr.write_buffer,
+ bwr.write_size,
+ &bwr.write_consumed);
+ trace_binder_write_done(ret);
+ if (ret < 0) {
+ bwr.read_consumed = 0;
+ goto out;
+ }
+ }
+ if (bwr.read_size > 0) {
+ ret = binder_thread_read(proc, thread, bwr.read_buffer,
+ bwr.read_size,
+ &bwr.read_consumed,
+ filp->f_flags & O_NONBLOCK);
+ trace_binder_read_done(ret);
+ binder_inner_proc_lock(proc);
+ if (!binder_worklist_empty_ilocked(&proc->todo))
+ binder_wakeup_proc_ilocked(proc);
+ binder_inner_proc_unlock(proc);
+ if (ret < 0)
+ goto out;
+ }
+ binder_debug(BINDER_DEBUG_READ_WRITE,
+ "%d:%d wrote %lld of %lld, read return %lld of %lld\n",
+ proc->pid, thread->pid,
+ (u64)bwr.write_consumed, (u64)bwr.write_size,
+ (u64)bwr.read_consumed, (u64)bwr.read_size);
+out:
+ if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
+ ret = -EFAULT;
+ return ret;
+}
+
+static int binder_ioctl_set_ctx_mgr(struct file *filp,
+ struct flat_binder_object *fbo)
+{
+ int ret = 0;
+ struct binder_proc *proc = filp->private_data;
+ struct binder_context *context = proc->context;
+ struct binder_node *new_node;
+ kuid_t curr_euid = current_euid();
+
+ guard(mutex)(&context->context_mgr_node_lock);
+ if (context->binder_context_mgr_node) {
+ pr_err("BINDER_SET_CONTEXT_MGR already set\n");
+ return -EBUSY;
+ }
+ ret = security_binder_set_context_mgr(proc->cred);
+ if (ret < 0)
+ return ret;
+ if (uid_valid(context->binder_context_mgr_uid)) {
+ if (!uid_eq(context->binder_context_mgr_uid, curr_euid)) {
+ pr_err("BINDER_SET_CONTEXT_MGR bad uid %d != %d\n",
+ from_kuid(&init_user_ns, curr_euid),
+ from_kuid(&init_user_ns,
+ context->binder_context_mgr_uid));
+ return -EPERM;
+ }
+ } else {
+ context->binder_context_mgr_uid = curr_euid;
+ }
+ new_node = binder_new_node(proc, fbo);
+ if (!new_node)
+ return -ENOMEM;
+ binder_node_lock(new_node);
+ new_node->local_weak_refs++;
+ new_node->local_strong_refs++;
+ new_node->has_strong_ref = 1;
+ new_node->has_weak_ref = 1;
+ context->binder_context_mgr_node = new_node;
+ binder_node_unlock(new_node);
+ binder_put_node(new_node);
+ return ret;
+}
+
+static int binder_ioctl_get_node_info_for_ref(struct binder_proc *proc,
+ struct binder_node_info_for_ref *info)
+{
+ struct binder_node *node;
+ struct binder_context *context = proc->context;
+ __u32 handle = info->handle;
+
+ if (info->strong_count || info->weak_count || info->reserved1 ||
+ info->reserved2 || info->reserved3) {
+ binder_user_error("%d BINDER_GET_NODE_INFO_FOR_REF: only handle may be non-zero.",
+ proc->pid);
+ return -EINVAL;
+ }
+
+ /* This ioctl may only be used by the context manager */
+ mutex_lock(&context->context_mgr_node_lock);
+ if (!context->binder_context_mgr_node ||
+ context->binder_context_mgr_node->proc != proc) {
+ mutex_unlock(&context->context_mgr_node_lock);
+ return -EPERM;
+ }
+ mutex_unlock(&context->context_mgr_node_lock);
+
+ node = binder_get_node_from_ref(proc, handle, true, NULL);
+ if (!node)
+ return -EINVAL;
+
+ info->strong_count = node->local_strong_refs +
+ node->internal_strong_refs;
+ info->weak_count = node->local_weak_refs;
+
+ binder_put_node(node);
+
+ return 0;
+}
+
+static int binder_ioctl_get_node_debug_info(struct binder_proc *proc,
+ struct binder_node_debug_info *info)
+{
+ struct rb_node *n;
+ binder_uintptr_t ptr = info->ptr;
+
+ memset(info, 0, sizeof(*info));
+
+ binder_inner_proc_lock(proc);
+ for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) {
+ struct binder_node *node = rb_entry(n, struct binder_node,
+ rb_node);
+ if (node->ptr > ptr) {
+ info->ptr = node->ptr;
+ info->cookie = node->cookie;
+ info->has_strong_ref = node->has_strong_ref;
+ info->has_weak_ref = node->has_weak_ref;
+ break;
+ }
+ }
+ binder_inner_proc_unlock(proc);
+
+ return 0;
+}
+
+static bool binder_txns_pending_ilocked(struct binder_proc *proc)
+{
+ struct rb_node *n;
+ struct binder_thread *thread;
+
+ if (proc->outstanding_txns > 0)
+ return true;
+
+ for (n = rb_first(&proc->threads); n; n = rb_next(n)) {
+ thread = rb_entry(n, struct binder_thread, rb_node);
+ if (thread->transaction_stack)
+ return true;
+ }
+ return false;
+}
+
+static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen)
+{
+ struct binder_node *prev = NULL;
+ struct rb_node *n;
+ struct binder_ref *ref;
+
+ binder_inner_proc_lock(proc);
+ for (n = rb_first(&proc->nodes); n; n = rb_next(n)) {
+ struct binder_node *node;
+
+ node = rb_entry(n, struct binder_node, rb_node);
+ binder_inc_node_tmpref_ilocked(node);
+ binder_inner_proc_unlock(proc);
+ if (prev)
+ binder_put_node(prev);
+ binder_node_lock(node);
+ hlist_for_each_entry(ref, &node->refs, node_entry) {
+ /*
+ * Need the node lock to synchronize
+ * with new notification requests and the
+ * inner lock to synchronize with queued
+ * freeze notifications.
+ */
+ binder_inner_proc_lock(ref->proc);
+ if (!ref->freeze) {
+ binder_inner_proc_unlock(ref->proc);
+ continue;
+ }
+ ref->freeze->work.type = BINDER_WORK_FROZEN_BINDER;
+ if (list_empty(&ref->freeze->work.entry)) {
+ ref->freeze->is_frozen = is_frozen;
+ binder_enqueue_work_ilocked(&ref->freeze->work, &ref->proc->todo);
+ binder_wakeup_proc_ilocked(ref->proc);
+ } else {
+ if (ref->freeze->sent && ref->freeze->is_frozen != is_frozen)
+ ref->freeze->resend = true;
+ ref->freeze->is_frozen = is_frozen;
+ }
+ binder_inner_proc_unlock(ref->proc);
+ }
+ prev = node;
+ binder_node_unlock(node);
+ binder_inner_proc_lock(proc);
+ if (proc->is_dead)
+ break;
+ }
+ binder_inner_proc_unlock(proc);
+ if (prev)
+ binder_put_node(prev);
+}
+
+static int binder_ioctl_freeze(struct binder_freeze_info *info,
+ struct binder_proc *target_proc)
+{
+ int ret = 0;
+
+ if (!info->enable) {
+ binder_inner_proc_lock(target_proc);
+ target_proc->sync_recv = false;
+ target_proc->async_recv = false;
+ target_proc->is_frozen = false;
+ binder_inner_proc_unlock(target_proc);
+ binder_add_freeze_work(target_proc, false);
+ return 0;
+ }
+
+ /*
+ * Freezing the target. Prevent new transactions by
+ * setting frozen state. If timeout specified, wait
+ * for transactions to drain.
+ */
+ binder_inner_proc_lock(target_proc);
+ target_proc->sync_recv = false;
+ target_proc->async_recv = false;
+ target_proc->is_frozen = true;
+ binder_inner_proc_unlock(target_proc);
+
+ if (info->timeout_ms > 0)
+ ret = wait_event_interruptible_timeout(
+ target_proc->freeze_wait,
+ (!target_proc->outstanding_txns),
+ msecs_to_jiffies(info->timeout_ms));
+
+ /* Check pending transactions that wait for reply */
+ if (ret >= 0) {
+ binder_inner_proc_lock(target_proc);
+ if (binder_txns_pending_ilocked(target_proc))
+ ret = -EAGAIN;
+ binder_inner_proc_unlock(target_proc);
+ }
+
+ if (ret < 0) {
+ binder_inner_proc_lock(target_proc);
+ target_proc->is_frozen = false;
+ binder_inner_proc_unlock(target_proc);
+ } else {
+ binder_add_freeze_work(target_proc, true);
+ }
+
+ return ret;
+}
+
+static int binder_ioctl_get_freezer_info(
+ struct binder_frozen_status_info *info)
+{
+ struct binder_proc *target_proc;
+ bool found = false;
+ __u32 txns_pending;
+
+ info->sync_recv = 0;
+ info->async_recv = 0;
+
+ mutex_lock(&binder_procs_lock);
+ hlist_for_each_entry(target_proc, &binder_procs, proc_node) {
+ if (target_proc->pid == info->pid) {
+ found = true;
+ binder_inner_proc_lock(target_proc);
+ txns_pending = binder_txns_pending_ilocked(target_proc);
+ info->sync_recv |= target_proc->sync_recv |
+ (txns_pending << 1);
+ info->async_recv |= target_proc->async_recv;
+ binder_inner_proc_unlock(target_proc);
+ }
+ }
+ mutex_unlock(&binder_procs_lock);
+
+ if (!found)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int binder_ioctl_get_extended_error(struct binder_thread *thread,
+ void __user *ubuf)
+{
+ struct binder_extended_error ee;
+
+ binder_inner_proc_lock(thread->proc);
+ ee = thread->ee;
+ binder_set_extended_error(&thread->ee, 0, BR_OK, 0);
+ binder_inner_proc_unlock(thread->proc);
+
+ if (copy_to_user(ubuf, &ee, sizeof(ee)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ int ret;
+ struct binder_proc *proc = filp->private_data;
+ struct binder_thread *thread;
+ void __user *ubuf = (void __user *)arg;
+
+ trace_binder_ioctl(cmd, arg);
+
+ ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
+ if (ret)
+ goto err_unlocked;
+
+ thread = binder_get_thread(proc);
+ if (thread == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ switch (cmd) {
+ case BINDER_WRITE_READ:
+ ret = binder_ioctl_write_read(filp, arg, thread);
+ if (ret)
+ goto err;
+ break;
+ case BINDER_SET_MAX_THREADS: {
+ u32 max_threads;
+
+ if (copy_from_user(&max_threads, ubuf,
+ sizeof(max_threads))) {
+ ret = -EINVAL;
+ goto err;
+ }
+ binder_inner_proc_lock(proc);
+ proc->max_threads = max_threads;
+ binder_inner_proc_unlock(proc);
+ break;
+ }
+ case BINDER_SET_CONTEXT_MGR_EXT: {
+ struct flat_binder_object fbo;
+
+ if (copy_from_user(&fbo, ubuf, sizeof(fbo))) {
+ ret = -EINVAL;
+ goto err;
+ }
+ ret = binder_ioctl_set_ctx_mgr(filp, &fbo);
+ if (ret)
+ goto err;
+ break;
+ }
+ case BINDER_SET_CONTEXT_MGR:
+ ret = binder_ioctl_set_ctx_mgr(filp, NULL);
+ if (ret)
+ goto err;
+ break;
+ case BINDER_THREAD_EXIT:
+ binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n",
+ proc->pid, thread->pid);
+ binder_thread_release(proc, thread);
+ thread = NULL;
+ break;
+ case BINDER_VERSION: {
+ struct binder_version __user *ver = ubuf;
+
+ if (put_user(BINDER_CURRENT_PROTOCOL_VERSION,
+ &ver->protocol_version)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ break;
+ }
+ case BINDER_GET_NODE_INFO_FOR_REF: {
+ struct binder_node_info_for_ref info;
+
+ if (copy_from_user(&info, ubuf, sizeof(info))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ ret = binder_ioctl_get_node_info_for_ref(proc, &info);
+ if (ret < 0)
+ goto err;
+
+ if (copy_to_user(ubuf, &info, sizeof(info))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ break;
+ }
+ case BINDER_GET_NODE_DEBUG_INFO: {
+ struct binder_node_debug_info info;
+
+ if (copy_from_user(&info, ubuf, sizeof(info))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ ret = binder_ioctl_get_node_debug_info(proc, &info);
+ if (ret < 0)
+ goto err;
+
+ if (copy_to_user(ubuf, &info, sizeof(info))) {
+ ret = -EFAULT;
+ goto err;
+ }
+ break;
+ }
+ case BINDER_FREEZE: {
+ struct binder_freeze_info info;
+ struct binder_proc **target_procs = NULL, *target_proc;
+ int target_procs_count = 0, i = 0;
+
+ ret = 0;
+
+ if (copy_from_user(&info, ubuf, sizeof(info))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ mutex_lock(&binder_procs_lock);
+ hlist_for_each_entry(target_proc, &binder_procs, proc_node) {
+ if (target_proc->pid == info.pid)
+ target_procs_count++;
+ }
+
+ if (target_procs_count == 0) {
+ mutex_unlock(&binder_procs_lock);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ target_procs = kcalloc(target_procs_count,
+ sizeof(struct binder_proc *),
+ GFP_KERNEL);
+
+ if (!target_procs) {
+ mutex_unlock(&binder_procs_lock);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ hlist_for_each_entry(target_proc, &binder_procs, proc_node) {
+ if (target_proc->pid != info.pid)
+ continue;
+
+ binder_inner_proc_lock(target_proc);
+ target_proc->tmp_ref++;
+ binder_inner_proc_unlock(target_proc);
+
+ target_procs[i++] = target_proc;
+ }
+ mutex_unlock(&binder_procs_lock);
+
+ for (i = 0; i < target_procs_count; i++) {
+ if (ret >= 0)
+ ret = binder_ioctl_freeze(&info,
+ target_procs[i]);
+
+ binder_proc_dec_tmpref(target_procs[i]);
+ }
+
+ kfree(target_procs);
+
+ if (ret < 0)
+ goto err;
+ break;
+ }
+ case BINDER_GET_FROZEN_INFO: {
+ struct binder_frozen_status_info info;
+
+ if (copy_from_user(&info, ubuf, sizeof(info))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ ret = binder_ioctl_get_freezer_info(&info);
+ if (ret < 0)
+ goto err;
+
+ if (copy_to_user(ubuf, &info, sizeof(info))) {
+ ret = -EFAULT;
+ goto err;
+ }
+ break;
+ }
+ case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: {
+ uint32_t enable;
+
+ if (copy_from_user(&enable, ubuf, sizeof(enable))) {
+ ret = -EFAULT;
+ goto err;
+ }
+ binder_inner_proc_lock(proc);
+ proc->oneway_spam_detection_enabled = (bool)enable;
+ binder_inner_proc_unlock(proc);
+ break;
+ }
+ case BINDER_GET_EXTENDED_ERROR:
+ ret = binder_ioctl_get_extended_error(thread, ubuf);
+ if (ret < 0)
+ goto err;
+ break;
+ default:
+ ret = -EINVAL;
+ goto err;
+ }
+ ret = 0;
+err:
+ if (thread)
+ thread->looper_need_return = false;
+ wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
+ if (ret && ret != -EINTR)
+ pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret);
+err_unlocked:
+ trace_binder_ioctl_done(ret);
+ return ret;
+}
+
+static void binder_vma_open(struct vm_area_struct *vma)
+{
+ struct binder_proc *proc = vma->vm_private_data;
+
+ binder_debug(BINDER_DEBUG_OPEN_CLOSE,
+ "%d open vm area %lx-%lx (%ld K) vma %lx pagep %lx\n",
+ proc->pid, vma->vm_start, vma->vm_end,
+ (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
+ (unsigned long)pgprot_val(vma->vm_page_prot));
+}
+
+static void binder_vma_close(struct vm_area_struct *vma)
+{
+ struct binder_proc *proc = vma->vm_private_data;
+
+ binder_debug(BINDER_DEBUG_OPEN_CLOSE,
+ "%d close vm area %lx-%lx (%ld K) vma %lx pagep %lx\n",
+ proc->pid, vma->vm_start, vma->vm_end,
+ (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
+ (unsigned long)pgprot_val(vma->vm_page_prot));
+ binder_alloc_vma_close(&proc->alloc);
+}
+
+VISIBLE_IF_KUNIT vm_fault_t binder_vm_fault(struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+EXPORT_SYMBOL_IF_KUNIT(binder_vm_fault);
+
+static const struct vm_operations_struct binder_vm_ops = {
+ .open = binder_vma_open,
+ .close = binder_vma_close,
+ .fault = binder_vm_fault,
+};
+
+static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct binder_proc *proc = filp->private_data;
+
+ if (proc->tsk != current->group_leader)
+ return -EINVAL;
+
+ binder_debug(BINDER_DEBUG_OPEN_CLOSE,
+ "%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n",
+ __func__, proc->pid, vma->vm_start, vma->vm_end,
+ (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
+ (unsigned long)pgprot_val(vma->vm_page_prot));
+
+ if (vma->vm_flags & FORBIDDEN_MMAP_FLAGS) {
+ pr_err("%s: %d %lx-%lx %s failed %d\n", __func__,
+ proc->pid, vma->vm_start, vma->vm_end, "bad vm_flags", -EPERM);
+ return -EPERM;
+ }
+ vm_flags_mod(vma, VM_DONTCOPY | VM_MIXEDMAP, VM_MAYWRITE);
+
+ vma->vm_ops = &binder_vm_ops;
+ vma->vm_private_data = proc;
+
+ return binder_alloc_mmap_handler(&proc->alloc, vma);
+}
+
+static int binder_open(struct inode *nodp, struct file *filp)
+{
+ struct binder_proc *proc, *itr;
+ struct binder_device *binder_dev;
+ struct binderfs_info *info;
+ struct dentry *binder_binderfs_dir_entry_proc = NULL;
+ bool existing_pid = false;
+
+ binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
+ current->group_leader->pid, current->pid);
+
+ proc = kzalloc(sizeof(*proc), GFP_KERNEL);
+ if (proc == NULL)
+ return -ENOMEM;
+
+ dbitmap_init(&proc->dmap);
+ spin_lock_init(&proc->inner_lock);
+ spin_lock_init(&proc->outer_lock);
+ get_task_struct(current->group_leader);
+ proc->tsk = current->group_leader;
+ proc->cred = get_cred(filp->f_cred);
+ INIT_LIST_HEAD(&proc->todo);
+ init_waitqueue_head(&proc->freeze_wait);
+ proc->default_priority = task_nice(current);
+ /* binderfs stashes devices in i_private */
+ if (is_binderfs_device(nodp)) {
+ binder_dev = nodp->i_private;
+ info = nodp->i_sb->s_fs_info;
+ binder_binderfs_dir_entry_proc = info->proc_log_dir;
+ } else {
+ binder_dev = container_of(filp->private_data,
+ struct binder_device, miscdev);
+ }
+ refcount_inc(&binder_dev->ref);
+ proc->context = &binder_dev->context;
+ binder_alloc_init(&proc->alloc);
+
+ binder_stats_created(BINDER_STAT_PROC);
+ proc->pid = current->group_leader->pid;
+ INIT_LIST_HEAD(&proc->delivered_death);
+ INIT_LIST_HEAD(&proc->delivered_freeze);
+ INIT_LIST_HEAD(&proc->waiting_threads);
+ filp->private_data = proc;
+
+ mutex_lock(&binder_procs_lock);
+ hlist_for_each_entry(itr, &binder_procs, proc_node) {
+ if (itr->pid == proc->pid) {
+ existing_pid = true;
+ break;
+ }
+ }
+ hlist_add_head(&proc->proc_node, &binder_procs);
+ mutex_unlock(&binder_procs_lock);
+
+ if (binder_debugfs_dir_entry_proc && !existing_pid) {
+ char strbuf[11];
+
+ snprintf(strbuf, sizeof(strbuf), "%u", proc->pid);
+ /*
+ * proc debug entries are shared between contexts.
+ * Only create for the first PID to avoid debugfs log spamming
+ * The printing code will anyway print all contexts for a given
+ * PID so this is not a problem.
+ */
+ proc->debugfs_entry = debugfs_create_file(strbuf, 0444,
+ binder_debugfs_dir_entry_proc,
+ (void *)(unsigned long)proc->pid,
+ &proc_fops);
+ }
+
+ if (binder_binderfs_dir_entry_proc && !existing_pid) {
+ char strbuf[11];
+ struct dentry *binderfs_entry;
+
+ snprintf(strbuf, sizeof(strbuf), "%u", proc->pid);
+ /*
+ * Similar to debugfs, the process specific log file is shared
+ * between contexts. Only create for the first PID.
+ * This is ok since same as debugfs, the log file will contain
+ * information on all contexts of a given PID.
+ */
+ binderfs_entry = binderfs_create_file(binder_binderfs_dir_entry_proc,
+ strbuf, &proc_fops, (void *)(unsigned long)proc->pid);
+ if (!IS_ERR(binderfs_entry)) {
+ proc->binderfs_entry = binderfs_entry;
+ } else {
+ int error;
+
+ error = PTR_ERR(binderfs_entry);
+ pr_warn("Unable to create file %s in binderfs (error %d)\n",
+ strbuf, error);
+ }
+ }
+
+ return 0;
+}
+
+static int binder_flush(struct file *filp, fl_owner_t id)
+{
+ struct binder_proc *proc = filp->private_data;
+
+ binder_defer_work(proc, BINDER_DEFERRED_FLUSH);
+
+ return 0;
+}
+
+static void binder_deferred_flush(struct binder_proc *proc)
+{
+ struct rb_node *n;
+ int wake_count = 0;
+
+ binder_inner_proc_lock(proc);
+ for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) {
+ struct binder_thread *thread = rb_entry(n, struct binder_thread, rb_node);
+
+ thread->looper_need_return = true;
+ if (thread->looper & BINDER_LOOPER_STATE_WAITING) {
+ wake_up_interruptible(&thread->wait);
+ wake_count++;
+ }
+ }
+ binder_inner_proc_unlock(proc);
+
+ binder_debug(BINDER_DEBUG_OPEN_CLOSE,
+ "binder_flush: %d woke %d threads\n", proc->pid,
+ wake_count);
+}
+
+static int binder_release(struct inode *nodp, struct file *filp)
+{
+ struct binder_proc *proc = filp->private_data;
+
+ debugfs_remove(proc->debugfs_entry);
+
+ if (proc->binderfs_entry) {
+ simple_recursive_removal(proc->binderfs_entry, NULL);
+ proc->binderfs_entry = NULL;
+ }
+
+ binder_defer_work(proc, BINDER_DEFERRED_RELEASE);
+
+ return 0;
+}
+
+static int binder_node_release(struct binder_node *node, int refs)
+{
+ struct binder_ref *ref;
+ int death = 0;
+ struct binder_proc *proc = node->proc;
+
+ binder_release_work(proc, &node->async_todo);
+
+ binder_node_lock(node);
+ binder_inner_proc_lock(proc);
+ binder_dequeue_work_ilocked(&node->work);
+ /*
+ * The caller must have taken a temporary ref on the node,
+ */
+ BUG_ON(!node->tmp_refs);
+ if (hlist_empty(&node->refs) && node->tmp_refs == 1) {
+ binder_inner_proc_unlock(proc);
+ binder_node_unlock(node);
+ binder_free_node(node);
+
+ return refs;
+ }
+
+ node->proc = NULL;
+ node->local_strong_refs = 0;
+ node->local_weak_refs = 0;
+ binder_inner_proc_unlock(proc);
+
+ spin_lock(&binder_dead_nodes_lock);
+ hlist_add_head(&node->dead_node, &binder_dead_nodes);
+ spin_unlock(&binder_dead_nodes_lock);
+
+ hlist_for_each_entry(ref, &node->refs, node_entry) {
+ refs++;
+ /*
+ * Need the node lock to synchronize
+ * with new notification requests and the
+ * inner lock to synchronize with queued
+ * death notifications.
+ */
+ binder_inner_proc_lock(ref->proc);
+ if (!ref->death) {
+ binder_inner_proc_unlock(ref->proc);
+ continue;
+ }
+
+ death++;
+
+ BUG_ON(!list_empty(&ref->death->work.entry));
+ ref->death->work.type = BINDER_WORK_DEAD_BINDER;
+ binder_enqueue_work_ilocked(&ref->death->work,
+ &ref->proc->todo);
+ binder_wakeup_proc_ilocked(ref->proc);
+ binder_inner_proc_unlock(ref->proc);
+ }
+
+ binder_debug(BINDER_DEBUG_DEAD_BINDER,
+ "node %d now dead, refs %d, death %d\n",
+ node->debug_id, refs, death);
+ binder_node_unlock(node);
+ binder_put_node(node);
+
+ return refs;
+}
+
+static void binder_deferred_release(struct binder_proc *proc)
+{
+ struct binder_context *context = proc->context;
+ struct rb_node *n;
+ int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
+
+ mutex_lock(&binder_procs_lock);
+ hlist_del(&proc->proc_node);
+ mutex_unlock(&binder_procs_lock);
+
+ mutex_lock(&context->context_mgr_node_lock);
+ if (context->binder_context_mgr_node &&
+ context->binder_context_mgr_node->proc == proc) {
+ binder_debug(BINDER_DEBUG_DEAD_BINDER,
+ "%s: %d context_mgr_node gone\n",
+ __func__, proc->pid);
+ context->binder_context_mgr_node = NULL;
+ }
+ mutex_unlock(&context->context_mgr_node_lock);
+ binder_inner_proc_lock(proc);
+ /*
+ * Make sure proc stays alive after we
+ * remove all the threads
+ */
+ proc->tmp_ref++;
+
+ proc->is_dead = true;
+ proc->is_frozen = false;
+ proc->sync_recv = false;
+ proc->async_recv = false;
+ threads = 0;
+ active_transactions = 0;
+ while ((n = rb_first(&proc->threads))) {
+ struct binder_thread *thread;
+
+ thread = rb_entry(n, struct binder_thread, rb_node);
+ binder_inner_proc_unlock(proc);
+ threads++;
+ active_transactions += binder_thread_release(proc, thread);
+ binder_inner_proc_lock(proc);
+ }
+
+ nodes = 0;
+ incoming_refs = 0;
+ while ((n = rb_first(&proc->nodes))) {
+ struct binder_node *node;
+
+ node = rb_entry(n, struct binder_node, rb_node);
+ nodes++;
+ /*
+ * take a temporary ref on the node before
+ * calling binder_node_release() which will either
+ * kfree() the node or call binder_put_node()
+ */
+ binder_inc_node_tmpref_ilocked(node);
+ rb_erase(&node->rb_node, &proc->nodes);
+ binder_inner_proc_unlock(proc);
+ incoming_refs = binder_node_release(node, incoming_refs);
+ binder_inner_proc_lock(proc);
+ }
+ binder_inner_proc_unlock(proc);
+
+ outgoing_refs = 0;
+ binder_proc_lock(proc);
+ while ((n = rb_first(&proc->refs_by_desc))) {
+ struct binder_ref *ref;
+
+ ref = rb_entry(n, struct binder_ref, rb_node_desc);
+ outgoing_refs++;
+ binder_cleanup_ref_olocked(ref);
+ binder_proc_unlock(proc);
+ binder_free_ref(ref);
+ binder_proc_lock(proc);
+ }
+ binder_proc_unlock(proc);
+
+ binder_release_work(proc, &proc->todo);
+ binder_release_work(proc, &proc->delivered_death);
+ binder_release_work(proc, &proc->delivered_freeze);
+
+ binder_debug(BINDER_DEBUG_OPEN_CLOSE,
+ "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d\n",
+ __func__, proc->pid, threads, nodes, incoming_refs,
+ outgoing_refs, active_transactions);
+
+ binder_proc_dec_tmpref(proc);
+}
+
+static void binder_deferred_func(struct work_struct *work)
+{
+ struct binder_proc *proc;
+
+ int defer;
+
+ do {
+ mutex_lock(&binder_deferred_lock);
+ if (!hlist_empty(&binder_deferred_list)) {
+ proc = hlist_entry(binder_deferred_list.first,
+ struct binder_proc, deferred_work_node);
+ hlist_del_init(&proc->deferred_work_node);
+ defer = proc->deferred_work;
+ proc->deferred_work = 0;
+ } else {
+ proc = NULL;
+ defer = 0;
+ }
+ mutex_unlock(&binder_deferred_lock);
+
+ if (defer & BINDER_DEFERRED_FLUSH)
+ binder_deferred_flush(proc);
+
+ if (defer & BINDER_DEFERRED_RELEASE)
+ binder_deferred_release(proc); /* frees proc */
+ } while (proc);
+}
+static DECLARE_WORK(binder_deferred_work, binder_deferred_func);
+
+static void
+binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer)
+{
+ guard(mutex)(&binder_deferred_lock);
+ proc->deferred_work |= defer;
+ if (hlist_unhashed(&proc->deferred_work_node)) {
+ hlist_add_head(&proc->deferred_work_node,
+ &binder_deferred_list);
+ schedule_work(&binder_deferred_work);
+ }
+}
+
+static void print_binder_transaction_ilocked(struct seq_file *m,
+ struct binder_proc *proc,
+ const char *prefix,
+ struct binder_transaction *t)
+{
+ struct binder_proc *to_proc;
+ struct binder_buffer *buffer = t->buffer;
+ ktime_t current_time = ktime_get();
+
+ spin_lock(&t->lock);
+ to_proc = t->to_proc;
+ seq_printf(m,
+ "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld a%d r%d elapsed %lldms",
+ prefix, t->debug_id, t,
+ t->from_pid,
+ t->from_tid,
+ to_proc ? to_proc->pid : 0,
+ t->to_thread ? t->to_thread->pid : 0,
+ t->code, t->flags, t->priority, t->is_async, t->is_reply,
+ ktime_ms_delta(current_time, t->start_time));
+ spin_unlock(&t->lock);
+
+ if (proc != to_proc) {
+ /*
+ * Can only safely deref buffer if we are holding the
+ * correct proc inner lock for this node
+ */
+ seq_puts(m, "\n");
+ return;
+ }
+
+ if (buffer == NULL) {
+ seq_puts(m, " buffer free\n");
+ return;
+ }
+ if (buffer->target_node)
+ seq_printf(m, " node %d", buffer->target_node->debug_id);
+ seq_printf(m, " size %zd:%zd offset %lx\n",
+ buffer->data_size, buffer->offsets_size,
+ buffer->user_data - proc->alloc.vm_start);
+}
+
+static void print_binder_work_ilocked(struct seq_file *m,
+ struct binder_proc *proc,
+ const char *prefix,
+ const char *transaction_prefix,
+ struct binder_work *w, bool hash_ptrs)
+{
+ struct binder_node *node;
+ struct binder_transaction *t;
+
+ switch (w->type) {
+ case BINDER_WORK_TRANSACTION:
+ t = container_of(w, struct binder_transaction, work);
+ print_binder_transaction_ilocked(
+ m, proc, transaction_prefix, t);
+ break;
+ case BINDER_WORK_RETURN_ERROR: {
+ struct binder_error *e = container_of(
+ w, struct binder_error, work);
+
+ seq_printf(m, "%stransaction error: %u\n",
+ prefix, e->cmd);
+ } break;
+ case BINDER_WORK_TRANSACTION_COMPLETE:
+ seq_printf(m, "%stransaction complete\n", prefix);
+ break;
+ case BINDER_WORK_NODE:
+ node = container_of(w, struct binder_node, work);
+ if (hash_ptrs)
+ seq_printf(m, "%snode work %d: u%p c%p\n",
+ prefix, node->debug_id,
+ (void *)(long)node->ptr,
+ (void *)(long)node->cookie);
+ else
+ seq_printf(m, "%snode work %d: u%016llx c%016llx\n",
+ prefix, node->debug_id,
+ (u64)node->ptr, (u64)node->cookie);
+ break;
+ case BINDER_WORK_DEAD_BINDER:
+ seq_printf(m, "%shas dead binder\n", prefix);
+ break;
+ case BINDER_WORK_DEAD_BINDER_AND_CLEAR:
+ seq_printf(m, "%shas cleared dead binder\n", prefix);
+ break;
+ case BINDER_WORK_CLEAR_DEATH_NOTIFICATION:
+ seq_printf(m, "%shas cleared death notification\n", prefix);
+ break;
+ case BINDER_WORK_FROZEN_BINDER:
+ seq_printf(m, "%shas frozen binder\n", prefix);
+ break;
+ case BINDER_WORK_CLEAR_FREEZE_NOTIFICATION:
+ seq_printf(m, "%shas cleared freeze notification\n", prefix);
+ break;
+ default:
+ seq_printf(m, "%sunknown work: type %d\n", prefix, w->type);
+ break;
+ }
+}
+
+static void print_binder_thread_ilocked(struct seq_file *m,
+ struct binder_thread *thread,
+ bool print_always, bool hash_ptrs)
+{
+ struct binder_transaction *t;
+ struct binder_work *w;
+ size_t start_pos = m->count;
+ size_t header_pos;
+
+ seq_printf(m, " thread %d: l %02x need_return %d tr %d\n",
+ thread->pid, thread->looper,
+ thread->looper_need_return,
+ atomic_read(&thread->tmp_ref));
+ header_pos = m->count;
+ t = thread->transaction_stack;
+ while (t) {
+ if (t->from == thread) {
+ print_binder_transaction_ilocked(m, thread->proc,
+ " outgoing transaction", t);
+ t = t->from_parent;
+ } else if (t->to_thread == thread) {
+ print_binder_transaction_ilocked(m, thread->proc,
+ " incoming transaction", t);
+ t = t->to_parent;
+ } else {
+ print_binder_transaction_ilocked(m, thread->proc,
+ " bad transaction", t);
+ t = NULL;
+ }
+ }
+ list_for_each_entry(w, &thread->todo, entry) {
+ print_binder_work_ilocked(m, thread->proc, " ",
+ " pending transaction",
+ w, hash_ptrs);
+ }
+ if (!print_always && m->count == header_pos)
+ m->count = start_pos;
+}
+
+static void print_binder_node_nilocked(struct seq_file *m,
+ struct binder_node *node,
+ bool hash_ptrs)
+{
+ struct binder_ref *ref;
+ struct binder_work *w;
+ int count;
+
+ count = hlist_count_nodes(&node->refs);
+
+ if (hash_ptrs)
+ seq_printf(m, " node %d: u%p c%p", node->debug_id,
+ (void *)(long)node->ptr, (void *)(long)node->cookie);
+ else
+ seq_printf(m, " node %d: u%016llx c%016llx", node->debug_id,
+ (u64)node->ptr, (u64)node->cookie);
+ seq_printf(m, " hs %d hw %d ls %d lw %d is %d iw %d tr %d",
+ node->has_strong_ref, node->has_weak_ref,
+ node->local_strong_refs, node->local_weak_refs,
+ node->internal_strong_refs, count, node->tmp_refs);
+ if (count) {
+ seq_puts(m, " proc");
+ hlist_for_each_entry(ref, &node->refs, node_entry)
+ seq_printf(m, " %d", ref->proc->pid);
+ }
+ seq_puts(m, "\n");
+ if (node->proc) {
+ list_for_each_entry(w, &node->async_todo, entry)
+ print_binder_work_ilocked(m, node->proc, " ",
+ " pending async transaction",
+ w, hash_ptrs);
+ }
+}
+
+static void print_binder_ref_olocked(struct seq_file *m,
+ struct binder_ref *ref)
+{
+ binder_node_lock(ref->node);
+ seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n",
+ ref->data.debug_id, ref->data.desc,
+ ref->node->proc ? "" : "dead ",
+ ref->node->debug_id, ref->data.strong,
+ ref->data.weak, ref->death);
+ binder_node_unlock(ref->node);
+}
+
+/**
+ * print_next_binder_node_ilocked() - Print binder_node from a locked list
+ * @m: struct seq_file for output via seq_printf()
+ * @proc: struct binder_proc we hold the inner_proc_lock to (if any)
+ * @node: struct binder_node to print fields of
+ * @prev_node: struct binder_node we hold a temporary reference to (if any)
+ * @hash_ptrs: whether to hash @node's binder_uintptr_t fields
+ *
+ * Helper function to handle synchronization around printing a struct
+ * binder_node while iterating through @proc->nodes or the dead nodes list.
+ * Caller must hold either @proc->inner_lock (for live nodes) or
+ * binder_dead_nodes_lock. This lock will be released during the body of this
+ * function, but it will be reacquired before returning to the caller.
+ *
+ * Return: pointer to the struct binder_node we hold a tmpref on
+ */
+static struct binder_node *
+print_next_binder_node_ilocked(struct seq_file *m, struct binder_proc *proc,
+ struct binder_node *node,
+ struct binder_node *prev_node, bool hash_ptrs)
+{
+ /*
+ * Take a temporary reference on the node so that isn't freed while
+ * we print it.
+ */
+ binder_inc_node_tmpref_ilocked(node);
+ /*
+ * Live nodes need to drop the inner proc lock and dead nodes need to
+ * drop the binder_dead_nodes_lock before trying to take the node lock.
+ */
+ if (proc)
+ binder_inner_proc_unlock(proc);
+ else
+ spin_unlock(&binder_dead_nodes_lock);
+ if (prev_node)
+ binder_put_node(prev_node);
+ binder_node_inner_lock(node);
+ print_binder_node_nilocked(m, node, hash_ptrs);
+ binder_node_inner_unlock(node);
+ if (proc)
+ binder_inner_proc_lock(proc);
+ else
+ spin_lock(&binder_dead_nodes_lock);
+ return node;
+}
+
+static void print_binder_proc(struct seq_file *m, struct binder_proc *proc,
+ bool print_all, bool hash_ptrs)
+{
+ struct binder_work *w;
+ struct rb_node *n;
+ size_t start_pos = m->count;
+ size_t header_pos;
+ struct binder_node *last_node = NULL;
+
+ seq_printf(m, "proc %d\n", proc->pid);
+ seq_printf(m, "context %s\n", proc->context->name);
+ header_pos = m->count;
+
+ binder_inner_proc_lock(proc);
+ for (n = rb_first(&proc->threads); n; n = rb_next(n))
+ print_binder_thread_ilocked(m, rb_entry(n, struct binder_thread,
+ rb_node), print_all, hash_ptrs);
+
+ for (n = rb_first(&proc->nodes); n; n = rb_next(n)) {
+ struct binder_node *node = rb_entry(n, struct binder_node,
+ rb_node);
+ if (!print_all && !node->has_async_transaction)
+ continue;
+
+ last_node = print_next_binder_node_ilocked(m, proc, node,
+ last_node,
+ hash_ptrs);
+ }
+ binder_inner_proc_unlock(proc);
+ if (last_node)
+ binder_put_node(last_node);
+
+ if (print_all) {
+ binder_proc_lock(proc);
+ for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n))
+ print_binder_ref_olocked(m, rb_entry(n,
+ struct binder_ref,
+ rb_node_desc));
+ binder_proc_unlock(proc);
+ }
+ binder_alloc_print_allocated(m, &proc->alloc);
+ binder_inner_proc_lock(proc);
+ list_for_each_entry(w, &proc->todo, entry)
+ print_binder_work_ilocked(m, proc, " ",
+ " pending transaction", w,
+ hash_ptrs);
+ list_for_each_entry(w, &proc->delivered_death, entry) {
+ seq_puts(m, " has delivered dead binder\n");
+ break;
+ }
+ list_for_each_entry(w, &proc->delivered_freeze, entry) {
+ seq_puts(m, " has delivered freeze binder\n");
+ break;
+ }
+ binder_inner_proc_unlock(proc);
+ if (!print_all && m->count == header_pos)
+ m->count = start_pos;
+}
+
+static const char * const binder_return_strings[] = {
+ "BR_ERROR",
+ "BR_OK",
+ "BR_TRANSACTION",
+ "BR_REPLY",
+ "BR_ACQUIRE_RESULT",
+ "BR_DEAD_REPLY",
+ "BR_TRANSACTION_COMPLETE",
+ "BR_INCREFS",
+ "BR_ACQUIRE",
+ "BR_RELEASE",
+ "BR_DECREFS",
+ "BR_ATTEMPT_ACQUIRE",
+ "BR_NOOP",
+ "BR_SPAWN_LOOPER",
+ "BR_FINISHED",
+ "BR_DEAD_BINDER",
+ "BR_CLEAR_DEATH_NOTIFICATION_DONE",
+ "BR_FAILED_REPLY",
+ "BR_FROZEN_REPLY",
+ "BR_ONEWAY_SPAM_SUSPECT",
+ "BR_TRANSACTION_PENDING_FROZEN",
+ "BR_FROZEN_BINDER",
+ "BR_CLEAR_FREEZE_NOTIFICATION_DONE",
+};
+
+static const char * const binder_command_strings[] = {
+ "BC_TRANSACTION",
+ "BC_REPLY",
+ "BC_ACQUIRE_RESULT",
+ "BC_FREE_BUFFER",
+ "BC_INCREFS",
+ "BC_ACQUIRE",
+ "BC_RELEASE",
+ "BC_DECREFS",
+ "BC_INCREFS_DONE",
+ "BC_ACQUIRE_DONE",
+ "BC_ATTEMPT_ACQUIRE",
+ "BC_REGISTER_LOOPER",
+ "BC_ENTER_LOOPER",
+ "BC_EXIT_LOOPER",
+ "BC_REQUEST_DEATH_NOTIFICATION",
+ "BC_CLEAR_DEATH_NOTIFICATION",
+ "BC_DEAD_BINDER_DONE",
+ "BC_TRANSACTION_SG",
+ "BC_REPLY_SG",
+ "BC_REQUEST_FREEZE_NOTIFICATION",
+ "BC_CLEAR_FREEZE_NOTIFICATION",
+ "BC_FREEZE_NOTIFICATION_DONE",
+};
+
+static const char * const binder_objstat_strings[] = {
+ "proc",
+ "thread",
+ "node",
+ "ref",
+ "death",
+ "transaction",
+ "transaction_complete",
+ "freeze",
+};
+
+static void print_binder_stats(struct seq_file *m, const char *prefix,
+ struct binder_stats *stats)
+{
+ int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(stats->bc) !=
+ ARRAY_SIZE(binder_command_strings));
+ for (i = 0; i < ARRAY_SIZE(stats->bc); i++) {
+ int temp = atomic_read(&stats->bc[i]);
+
+ if (temp)
+ seq_printf(m, "%s%s: %d\n", prefix,
+ binder_command_strings[i], temp);
+ }
+
+ BUILD_BUG_ON(ARRAY_SIZE(stats->br) !=
+ ARRAY_SIZE(binder_return_strings));
+ for (i = 0; i < ARRAY_SIZE(stats->br); i++) {
+ int temp = atomic_read(&stats->br[i]);
+
+ if (temp)
+ seq_printf(m, "%s%s: %d\n", prefix,
+ binder_return_strings[i], temp);
+ }
+
+ BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) !=
+ ARRAY_SIZE(binder_objstat_strings));
+ BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) !=
+ ARRAY_SIZE(stats->obj_deleted));
+ for (i = 0; i < ARRAY_SIZE(stats->obj_created); i++) {
+ int created = atomic_read(&stats->obj_created[i]);
+ int deleted = atomic_read(&stats->obj_deleted[i]);
+
+ if (created || deleted)
+ seq_printf(m, "%s%s: active %d total %d\n",
+ prefix,
+ binder_objstat_strings[i],
+ created - deleted,
+ created);
+ }
+}
+
+static void print_binder_proc_stats(struct seq_file *m,
+ struct binder_proc *proc)
+{
+ struct binder_work *w;
+ struct binder_thread *thread;
+ struct rb_node *n;
+ int count, strong, weak, ready_threads;
+ size_t free_async_space =
+ binder_alloc_get_free_async_space(&proc->alloc);
+
+ seq_printf(m, "proc %d\n", proc->pid);
+ seq_printf(m, "context %s\n", proc->context->name);
+ count = 0;
+ ready_threads = 0;
+ binder_inner_proc_lock(proc);
+ for (n = rb_first(&proc->threads); n; n = rb_next(n))
+ count++;
+
+ list_for_each_entry(thread, &proc->waiting_threads, waiting_thread_node)
+ ready_threads++;
+
+ seq_printf(m, " threads: %d\n", count);
+ seq_printf(m, " requested threads: %d+%d/%d\n"
+ " ready threads %d\n"
+ " free async space %zd\n", proc->requested_threads,
+ proc->requested_threads_started, proc->max_threads,
+ ready_threads,
+ free_async_space);
+ count = 0;
+ for (n = rb_first(&proc->nodes); n; n = rb_next(n))
+ count++;
+ binder_inner_proc_unlock(proc);
+ seq_printf(m, " nodes: %d\n", count);
+ count = 0;
+ strong = 0;
+ weak = 0;
+ binder_proc_lock(proc);
+ for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) {
+ struct binder_ref *ref = rb_entry(n, struct binder_ref,
+ rb_node_desc);
+ count++;
+ strong += ref->data.strong;
+ weak += ref->data.weak;
+ }
+ binder_proc_unlock(proc);
+ seq_printf(m, " refs: %d s %d w %d\n", count, strong, weak);
+
+ count = binder_alloc_get_allocated_count(&proc->alloc);
+ seq_printf(m, " buffers: %d\n", count);
+
+ binder_alloc_print_pages(m, &proc->alloc);
+
+ count = 0;
+ binder_inner_proc_lock(proc);
+ list_for_each_entry(w, &proc->todo, entry) {
+ if (w->type == BINDER_WORK_TRANSACTION)
+ count++;
+ }
+ binder_inner_proc_unlock(proc);
+ seq_printf(m, " pending transactions: %d\n", count);
+
+ print_binder_stats(m, " ", &proc->stats);
+}
+
+static void print_binder_state(struct seq_file *m, bool hash_ptrs)
+{
+ struct binder_proc *proc;
+ struct binder_node *node;
+ struct binder_node *last_node = NULL;
+
+ seq_puts(m, "binder state:\n");
+
+ spin_lock(&binder_dead_nodes_lock);
+ if (!hlist_empty(&binder_dead_nodes))
+ seq_puts(m, "dead nodes:\n");
+ hlist_for_each_entry(node, &binder_dead_nodes, dead_node)
+ last_node = print_next_binder_node_ilocked(m, NULL, node,
+ last_node,
+ hash_ptrs);
+ spin_unlock(&binder_dead_nodes_lock);
+ if (last_node)
+ binder_put_node(last_node);
+
+ mutex_lock(&binder_procs_lock);
+ hlist_for_each_entry(proc, &binder_procs, proc_node)
+ print_binder_proc(m, proc, true, hash_ptrs);
+ mutex_unlock(&binder_procs_lock);
+}
+
+static void print_binder_transactions(struct seq_file *m, bool hash_ptrs)
+{
+ struct binder_proc *proc;
+
+ seq_puts(m, "binder transactions:\n");
+ mutex_lock(&binder_procs_lock);
+ hlist_for_each_entry(proc, &binder_procs, proc_node)
+ print_binder_proc(m, proc, false, hash_ptrs);
+ mutex_unlock(&binder_procs_lock);
+}
+
+static int state_show(struct seq_file *m, void *unused)
+{
+ print_binder_state(m, false);
+ return 0;
+}
+
+static int state_hashed_show(struct seq_file *m, void *unused)
+{
+ print_binder_state(m, true);
+ return 0;
+}
+
+static int stats_show(struct seq_file *m, void *unused)
+{
+ struct binder_proc *proc;
+
+ seq_puts(m, "binder stats:\n");
+
+ print_binder_stats(m, "", &binder_stats);
+
+ mutex_lock(&binder_procs_lock);
+ hlist_for_each_entry(proc, &binder_procs, proc_node)
+ print_binder_proc_stats(m, proc);
+ mutex_unlock(&binder_procs_lock);
+
+ return 0;
+}
+
+static int transactions_show(struct seq_file *m, void *unused)
+{
+ print_binder_transactions(m, false);
+ return 0;
+}
+
+static int transactions_hashed_show(struct seq_file *m, void *unused)
+{
+ print_binder_transactions(m, true);
+ return 0;
+}
+
+static int proc_show(struct seq_file *m, void *unused)
+{
+ struct binder_proc *itr;
+ int pid = (unsigned long)m->private;
+
+ guard(mutex)(&binder_procs_lock);
+ hlist_for_each_entry(itr, &binder_procs, proc_node) {
+ if (itr->pid == pid) {
+ seq_puts(m, "binder proc state:\n");
+ print_binder_proc(m, itr, true, false);
+ }
+ }
+
+ return 0;
+}
+
+static void print_binder_transaction_log_entry(struct seq_file *m,
+ struct binder_transaction_log_entry *e)
+{
+ int debug_id = READ_ONCE(e->debug_id_done);
+ /*
+ * read barrier to guarantee debug_id_done read before
+ * we print the log values
+ */
+ smp_rmb();
+ seq_printf(m,
+ "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
+ e->debug_id, (e->call_type == 2) ? "reply" :
+ ((e->call_type == 1) ? "async" : "call "), e->from_proc,
+ e->from_thread, e->to_proc, e->to_thread, e->context_name,
+ e->to_node, e->target_handle, e->data_size, e->offsets_size,
+ e->return_error, e->return_error_param,
+ e->return_error_line);
+ /*
+ * read-barrier to guarantee read of debug_id_done after
+ * done printing the fields of the entry
+ */
+ smp_rmb();
+ seq_printf(m, debug_id && debug_id == READ_ONCE(e->debug_id_done) ?
+ "\n" : " (incomplete)\n");
+}
+
+static int transaction_log_show(struct seq_file *m, void *unused)
+{
+ struct binder_transaction_log *log = m->private;
+ unsigned int log_cur = atomic_read(&log->cur);
+ unsigned int count;
+ unsigned int cur;
+ int i;
+
+ count = log_cur + 1;
+ cur = count < ARRAY_SIZE(log->entry) && !log->full ?
+ 0 : count % ARRAY_SIZE(log->entry);
+ if (count > ARRAY_SIZE(log->entry) || log->full)
+ count = ARRAY_SIZE(log->entry);
+ for (i = 0; i < count; i++) {
+ unsigned int index = cur++ % ARRAY_SIZE(log->entry);
+
+ print_binder_transaction_log_entry(m, &log->entry[index]);
+ }
+ return 0;
+}
+
+const struct file_operations binder_fops = {
+ .owner = THIS_MODULE,
+ .poll = binder_poll,
+ .unlocked_ioctl = binder_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .mmap = binder_mmap,
+ .open = binder_open,
+ .flush = binder_flush,
+ .release = binder_release,
+};
+
+DEFINE_SHOW_ATTRIBUTE(state);
+DEFINE_SHOW_ATTRIBUTE(state_hashed);
+DEFINE_SHOW_ATTRIBUTE(stats);
+DEFINE_SHOW_ATTRIBUTE(transactions);
+DEFINE_SHOW_ATTRIBUTE(transactions_hashed);
+DEFINE_SHOW_ATTRIBUTE(transaction_log);
+
+const struct binder_debugfs_entry binder_debugfs_entries[] = {
+ {
+ .name = "state",
+ .mode = 0444,
+ .fops = &state_fops,
+ .data = NULL,
+ },
+ {
+ .name = "state_hashed",
+ .mode = 0444,
+ .fops = &state_hashed_fops,
+ .data = NULL,
+ },
+ {
+ .name = "stats",
+ .mode = 0444,
+ .fops = &stats_fops,
+ .data = NULL,
+ },
+ {
+ .name = "transactions",
+ .mode = 0444,
+ .fops = &transactions_fops,
+ .data = NULL,
+ },
+ {
+ .name = "transactions_hashed",
+ .mode = 0444,
+ .fops = &transactions_hashed_fops,
+ .data = NULL,
+ },
+ {
+ .name = "transaction_log",
+ .mode = 0444,
+ .fops = &transaction_log_fops,
+ .data = &binder_transaction_log,
+ },
+ {
+ .name = "failed_transaction_log",
+ .mode = 0444,
+ .fops = &transaction_log_fops,
+ .data = &binder_transaction_log_failed,
+ },
+ {} /* terminator */
+};
+
+void binder_add_device(struct binder_device *device)
+{
+ guard(spinlock)(&binder_devices_lock);
+ hlist_add_head(&device->hlist, &binder_devices);
+}
+
+void binder_remove_device(struct binder_device *device)
+{
+ guard(spinlock)(&binder_devices_lock);
+ hlist_del_init(&device->hlist);
+}
+
+static int __init init_binder_device(const char *name)
+{
+ int ret;
+ struct binder_device *binder_device;
+
+ binder_device = kzalloc(sizeof(*binder_device), GFP_KERNEL);
+ if (!binder_device)
+ return -ENOMEM;
+
+ binder_device->miscdev.fops = &binder_fops;
+ binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
+ binder_device->miscdev.name = name;
+
+ refcount_set(&binder_device->ref, 1);
+ binder_device->context.binder_context_mgr_uid = INVALID_UID;
+ binder_device->context.name = name;
+ mutex_init(&binder_device->context.context_mgr_node_lock);
+
+ ret = misc_register(&binder_device->miscdev);
+ if (ret < 0) {
+ kfree(binder_device);
+ return ret;
+ }
+
+ binder_add_device(binder_device);
+
+ return ret;
+}
+
+static int __init binder_init(void)
+{
+ int ret;
+ char *device_name, *device_tmp;
+ struct binder_device *device;
+ struct hlist_node *tmp;
+ char *device_names = NULL;
+ const struct binder_debugfs_entry *db_entry;
+
+ ret = binder_alloc_shrinker_init();
+ if (ret)
+ return ret;
+
+ atomic_set(&binder_transaction_log.cur, ~0U);
+ atomic_set(&binder_transaction_log_failed.cur, ~0U);
+
+ binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL);
+
+ binder_for_each_debugfs_entry(db_entry)
+ debugfs_create_file(db_entry->name,
+ db_entry->mode,
+ binder_debugfs_dir_entry_root,
+ db_entry->data,
+ db_entry->fops);
+
+ binder_debugfs_dir_entry_proc = debugfs_create_dir("proc",
+ binder_debugfs_dir_entry_root);
+
+ if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) &&
+ strcmp(binder_devices_param, "") != 0) {
+ /*
+ * Copy the module_parameter string, because we don't want to
+ * tokenize it in-place.
+ */
+ device_names = kstrdup(binder_devices_param, GFP_KERNEL);
+ if (!device_names) {
+ ret = -ENOMEM;
+ goto err_alloc_device_names_failed;
+ }
+
+ device_tmp = device_names;
+ while ((device_name = strsep(&device_tmp, ","))) {
+ ret = init_binder_device(device_name);
+ if (ret)
+ goto err_init_binder_device_failed;
+ }
+ }
+
+ ret = genl_register_family(&binder_nl_family);
+ if (ret)
+ goto err_init_binder_device_failed;
+
+ ret = init_binderfs();
+ if (ret)
+ goto err_init_binderfs_failed;
+
+ return ret;
+
+err_init_binderfs_failed:
+ genl_unregister_family(&binder_nl_family);
+
+err_init_binder_device_failed:
+ hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
+ misc_deregister(&device->miscdev);
+ binder_remove_device(device);
+ kfree(device);
+ }
+
+ kfree(device_names);
+
+err_alloc_device_names_failed:
+ debugfs_remove_recursive(binder_debugfs_dir_entry_root);
+ binder_alloc_shrinker_exit();
+
+ return ret;
+}
+
+device_initcall(binder_init);
+
+#define CREATE_TRACE_POINTS
+#include "binder_trace.h"
diff --git a/drivers/android/binder/Makefile b/drivers/android/binder/Makefile
new file mode 100644
index 000000000000..09eabb527fa0
--- /dev/null
+++ b/drivers/android/binder/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y += -I$(src) # needed for trace events
+
+obj-$(CONFIG_ANDROID_BINDER_IPC_RUST) += rust_binder.o
+rust_binder-y := \
+ rust_binder_main.o \
+ rust_binderfs.o \
+ rust_binder_events.o \
+ page_range_helper.o
diff --git a/drivers/android/binder/allocation.rs b/drivers/android/binder/allocation.rs
new file mode 100644
index 000000000000..7f65a9c3a0e5
--- /dev/null
+++ b/drivers/android/binder/allocation.rs
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use core::mem::{size_of, size_of_val, MaybeUninit};
+use core::ops::Range;
+
+use kernel::{
+ bindings,
+ fs::file::{File, FileDescriptorReservation},
+ prelude::*,
+ sync::{aref::ARef, Arc},
+ transmute::{AsBytes, FromBytes},
+ uaccess::UserSliceReader,
+ uapi,
+};
+
+use crate::{
+ deferred_close::DeferredFdCloser,
+ defs::*,
+ node::{Node, NodeRef},
+ process::Process,
+ DArc,
+};
+
+#[derive(Default)]
+pub(crate) struct AllocationInfo {
+ /// Range within the allocation where we can find the offsets to the object descriptors.
+ pub(crate) offsets: Option<Range<usize>>,
+ /// The target node of the transaction this allocation is associated to.
+ /// Not set for replies.
+ pub(crate) target_node: Option<NodeRef>,
+ /// When this allocation is dropped, call `pending_oneway_finished` on the node.
+ ///
+ /// This is used to serialize oneway transaction on the same node. Binder guarantees that
+ /// oneway transactions to the same node are delivered sequentially in the order they are sent.
+ pub(crate) oneway_node: Option<DArc<Node>>,
+ /// Zero the data in the buffer on free.
+ pub(crate) clear_on_free: bool,
+ /// List of files embedded in this transaction.
+ file_list: FileList,
+}
+
+/// Represents an allocation that the kernel is currently using.
+///
+/// When allocations are idle, the range allocator holds the data related to them.
+///
+/// # Invariants
+///
+/// This allocation corresponds to an allocation in the range allocator, so the relevant pages are
+/// marked in use in the page range.
+pub(crate) struct Allocation {
+ pub(crate) offset: usize,
+ size: usize,
+ pub(crate) ptr: usize,
+ pub(crate) process: Arc<Process>,
+ allocation_info: Option<AllocationInfo>,
+ free_on_drop: bool,
+ pub(crate) oneway_spam_detected: bool,
+ #[allow(dead_code)]
+ pub(crate) debug_id: usize,
+}
+
+impl Allocation {
+ pub(crate) fn new(
+ process: Arc<Process>,
+ debug_id: usize,
+ offset: usize,
+ size: usize,
+ ptr: usize,
+ oneway_spam_detected: bool,
+ ) -> Self {
+ Self {
+ process,
+ offset,
+ size,
+ ptr,
+ debug_id,
+ oneway_spam_detected,
+ allocation_info: None,
+ free_on_drop: true,
+ }
+ }
+
+ fn size_check(&self, offset: usize, size: usize) -> Result {
+ let overflow_fail = offset.checked_add(size).is_none();
+ let cmp_size_fail = offset.wrapping_add(size) > self.size;
+ if overflow_fail || cmp_size_fail {
+ return Err(EFAULT);
+ }
+ Ok(())
+ }
+
+ pub(crate) fn copy_into(
+ &self,
+ reader: &mut UserSliceReader,
+ offset: usize,
+ size: usize,
+ ) -> Result {
+ self.size_check(offset, size)?;
+
+ // SAFETY: While this object exists, the range allocator will keep the range allocated, and
+ // in turn, the pages will be marked as in use.
+ unsafe {
+ self.process
+ .pages
+ .copy_from_user_slice(reader, self.offset + offset, size)
+ }
+ }
+
+ pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
+ self.size_check(offset, size_of::<T>())?;
+
+ // SAFETY: While this object exists, the range allocator will keep the range allocated, and
+ // in turn, the pages will be marked as in use.
+ unsafe { self.process.pages.read(self.offset + offset) }
+ }
+
+ pub(crate) fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result {
+ self.size_check(offset, size_of_val::<T>(obj))?;
+
+ // SAFETY: While this object exists, the range allocator will keep the range allocated, and
+ // in turn, the pages will be marked as in use.
+ unsafe { self.process.pages.write(self.offset + offset, obj) }
+ }
+
+ pub(crate) fn fill_zero(&self) -> Result {
+ // SAFETY: While this object exists, the range allocator will keep the range allocated, and
+ // in turn, the pages will be marked as in use.
+ unsafe { self.process.pages.fill_zero(self.offset, self.size) }
+ }
+
+ pub(crate) fn keep_alive(mut self) {
+ self.process
+ .buffer_make_freeable(self.offset, self.allocation_info.take());
+ self.free_on_drop = false;
+ }
+
+ pub(crate) fn set_info(&mut self, info: AllocationInfo) {
+ self.allocation_info = Some(info);
+ }
+
+ pub(crate) fn get_or_init_info(&mut self) -> &mut AllocationInfo {
+ self.allocation_info.get_or_insert_with(Default::default)
+ }
+
+ pub(crate) fn set_info_offsets(&mut self, offsets: Range<usize>) {
+ self.get_or_init_info().offsets = Some(offsets);
+ }
+
+ pub(crate) fn set_info_oneway_node(&mut self, oneway_node: DArc<Node>) {
+ self.get_or_init_info().oneway_node = Some(oneway_node);
+ }
+
+ pub(crate) fn set_info_clear_on_drop(&mut self) {
+ self.get_or_init_info().clear_on_free = true;
+ }
+
+ pub(crate) fn set_info_target_node(&mut self, target_node: NodeRef) {
+ self.get_or_init_info().target_node = Some(target_node);
+ }
+
+ /// Reserve enough space to push at least `num_fds` fds.
+ pub(crate) fn info_add_fd_reserve(&mut self, num_fds: usize) -> Result {
+ self.get_or_init_info()
+ .file_list
+ .files_to_translate
+ .reserve(num_fds, GFP_KERNEL)?;
+
+ Ok(())
+ }
+
+ pub(crate) fn info_add_fd(
+ &mut self,
+ file: ARef<File>,
+ buffer_offset: usize,
+ close_on_free: bool,
+ ) -> Result {
+ self.get_or_init_info().file_list.files_to_translate.push(
+ FileEntry {
+ file,
+ buffer_offset,
+ close_on_free,
+ },
+ GFP_KERNEL,
+ )?;
+
+ Ok(())
+ }
+
+ pub(crate) fn set_info_close_on_free(&mut self, cof: FdsCloseOnFree) {
+ self.get_or_init_info().file_list.close_on_free = cof.0;
+ }
+
+ pub(crate) fn translate_fds(&mut self) -> Result<TranslatedFds> {
+ let file_list = match self.allocation_info.as_mut() {
+ Some(info) => &mut info.file_list,
+ None => return Ok(TranslatedFds::new()),
+ };
+
+ let files = core::mem::take(&mut file_list.files_to_translate);
+
+ let num_close_on_free = files.iter().filter(|entry| entry.close_on_free).count();
+ let mut close_on_free = KVec::with_capacity(num_close_on_free, GFP_KERNEL)?;
+
+ let mut reservations = KVec::with_capacity(files.len(), GFP_KERNEL)?;
+ for file_info in files {
+ let res = FileDescriptorReservation::get_unused_fd_flags(bindings::O_CLOEXEC)?;
+ let fd = res.reserved_fd();
+ self.write::<u32>(file_info.buffer_offset, &fd)?;
+
+ reservations.push(
+ Reservation {
+ res,
+ file: file_info.file,
+ },
+ GFP_KERNEL,
+ )?;
+ if file_info.close_on_free {
+ close_on_free.push(fd, GFP_KERNEL)?;
+ }
+ }
+
+ Ok(TranslatedFds {
+ reservations,
+ close_on_free: FdsCloseOnFree(close_on_free),
+ })
+ }
+
+ /// Should the looper return to userspace when freeing this allocation?
+ pub(crate) fn looper_need_return_on_free(&self) -> bool {
+ // Closing fds involves pushing task_work for execution when we return to userspace. Hence,
+ // we should return to userspace asap if we are closing fds.
+ match self.allocation_info {
+ Some(ref info) => !info.file_list.close_on_free.is_empty(),
+ None => false,
+ }
+ }
+}
+
+impl Drop for Allocation {
+ fn drop(&mut self) {
+ if !self.free_on_drop {
+ return;
+ }
+
+ if let Some(mut info) = self.allocation_info.take() {
+ if let Some(oneway_node) = info.oneway_node.as_ref() {
+ oneway_node.pending_oneway_finished();
+ }
+
+ info.target_node = None;
+
+ if let Some(offsets) = info.offsets.clone() {
+ let view = AllocationView::new(self, offsets.start);
+ for i in offsets.step_by(size_of::<usize>()) {
+ if view.cleanup_object(i).is_err() {
+ pr_warn!("Error cleaning up object at offset {}\n", i)
+ }
+ }
+ }
+
+ for &fd in &info.file_list.close_on_free {
+ let closer = match DeferredFdCloser::new(GFP_KERNEL) {
+ Ok(closer) => closer,
+ Err(kernel::alloc::AllocError) => {
+ // Ignore allocation failures.
+ break;
+ }
+ };
+
+ // Here, we ignore errors. The operation can fail if the fd is not valid, or if the
+ // method is called from a kthread. However, this is always called from a syscall,
+ // so the latter case cannot happen, and we don't care about the first case.
+ let _ = closer.close_fd(fd);
+ }
+
+ if info.clear_on_free {
+ if let Err(e) = self.fill_zero() {
+ pr_warn!("Failed to clear data on free: {:?}", e);
+ }
+ }
+ }
+
+ self.process.buffer_raw_free(self.ptr);
+ }
+}
+
+/// A wrapper around `Allocation` that is being created.
+///
+/// If the allocation is destroyed while wrapped in this wrapper, then the allocation will be
+/// considered to be part of a failed transaction. Successful transactions avoid that by calling
+/// `success`, which skips the destructor.
+#[repr(transparent)]
+pub(crate) struct NewAllocation(pub(crate) Allocation);
+
+impl NewAllocation {
+ pub(crate) fn success(self) -> Allocation {
+ // This skips the destructor.
+ //
+ // SAFETY: This type is `#[repr(transparent)]`, so the layout matches.
+ unsafe { core::mem::transmute(self) }
+ }
+}
+
+impl core::ops::Deref for NewAllocation {
+ type Target = Allocation;
+ fn deref(&self) -> &Allocation {
+ &self.0
+ }
+}
+
+impl core::ops::DerefMut for NewAllocation {
+ fn deref_mut(&mut self) -> &mut Allocation {
+ &mut self.0
+ }
+}
+
+/// A view into the beginning of an allocation.
+///
+/// All attempts to read or write outside of the view will fail. To intentionally access outside of
+/// this view, use the `alloc` field of this struct directly.
+pub(crate) struct AllocationView<'a> {
+ pub(crate) alloc: &'a mut Allocation,
+ limit: usize,
+}
+
+impl<'a> AllocationView<'a> {
+ pub(crate) fn new(alloc: &'a mut Allocation, limit: usize) -> Self {
+ AllocationView { alloc, limit }
+ }
+
+ pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
+ if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit {
+ return Err(EINVAL);
+ }
+ self.alloc.read(offset)
+ }
+
+ pub(crate) fn write<T: AsBytes>(&self, offset: usize, obj: &T) -> Result {
+ if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit {
+ return Err(EINVAL);
+ }
+ self.alloc.write(offset, obj)
+ }
+
+ pub(crate) fn copy_into(
+ &self,
+ reader: &mut UserSliceReader,
+ offset: usize,
+ size: usize,
+ ) -> Result {
+ if offset.checked_add(size).ok_or(EINVAL)? > self.limit {
+ return Err(EINVAL);
+ }
+ self.alloc.copy_into(reader, offset, size)
+ }
+
+ pub(crate) fn transfer_binder_object(
+ &self,
+ offset: usize,
+ obj: &uapi::flat_binder_object,
+ strong: bool,
+ node_ref: NodeRef,
+ ) -> Result {
+ let mut newobj = FlatBinderObject::default();
+ let node = node_ref.node.clone();
+ if Arc::ptr_eq(&node_ref.node.owner, &self.alloc.process) {
+ // The receiving process is the owner of the node, so send it a binder object (instead
+ // of a handle).
+ let (ptr, cookie) = node.get_id();
+ newobj.hdr.type_ = if strong {
+ BINDER_TYPE_BINDER
+ } else {
+ BINDER_TYPE_WEAK_BINDER
+ };
+ newobj.flags = obj.flags;
+ newobj.__bindgen_anon_1.binder = ptr as _;
+ newobj.cookie = cookie as _;
+ self.write(offset, &newobj)?;
+ // Increment the user ref count on the node. It will be decremented as part of the
+ // destruction of the buffer, when we see a binder or weak-binder object.
+ node.update_refcount(true, 1, strong);
+ } else {
+ // The receiving process is different from the owner, so we need to insert a handle to
+ // the binder object.
+ let handle = self
+ .alloc
+ .process
+ .as_arc_borrow()
+ .insert_or_update_handle(node_ref, false)?;
+ newobj.hdr.type_ = if strong {
+ BINDER_TYPE_HANDLE
+ } else {
+ BINDER_TYPE_WEAK_HANDLE
+ };
+ newobj.flags = obj.flags;
+ newobj.__bindgen_anon_1.handle = handle;
+ if self.write(offset, &newobj).is_err() {
+ // Decrement ref count on the handle we just created.
+ let _ = self
+ .alloc
+ .process
+ .as_arc_borrow()
+ .update_ref(handle, false, strong);
+ return Err(EINVAL);
+ }
+ }
+
+ Ok(())
+ }
+
+ fn cleanup_object(&self, index_offset: usize) -> Result {
+ let offset = self.alloc.read(index_offset)?;
+ let header = self.read::<BinderObjectHeader>(offset)?;
+ match header.type_ {
+ BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => {
+ let obj = self.read::<FlatBinderObject>(offset)?;
+ let strong = header.type_ == BINDER_TYPE_BINDER;
+ // SAFETY: The type is `BINDER_TYPE_{WEAK_}BINDER`, so the `binder` field is
+ // populated.
+ let ptr = unsafe { obj.__bindgen_anon_1.binder };
+ let cookie = obj.cookie;
+ self.alloc.process.update_node(ptr, cookie, strong);
+ Ok(())
+ }
+ BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => {
+ let obj = self.read::<FlatBinderObject>(offset)?;
+ let strong = header.type_ == BINDER_TYPE_HANDLE;
+ // SAFETY: The type is `BINDER_TYPE_{WEAK_}HANDLE`, so the `handle` field is
+ // populated.
+ let handle = unsafe { obj.__bindgen_anon_1.handle };
+ self.alloc
+ .process
+ .as_arc_borrow()
+ .update_ref(handle, false, strong)
+ }
+ _ => Ok(()),
+ }
+ }
+}
+
+/// A binder object as it is serialized.
+///
+/// # Invariants
+///
+/// All bytes must be initialized, and the value of `self.hdr.type_` must be one of the allowed
+/// types.
+#[repr(C)]
+pub(crate) union BinderObject {
+ hdr: uapi::binder_object_header,
+ fbo: uapi::flat_binder_object,
+ fdo: uapi::binder_fd_object,
+ bbo: uapi::binder_buffer_object,
+ fdao: uapi::binder_fd_array_object,
+}
+
+/// A view into a `BinderObject` that can be used in a match statement.
+pub(crate) enum BinderObjectRef<'a> {
+ Binder(&'a mut uapi::flat_binder_object),
+ Handle(&'a mut uapi::flat_binder_object),
+ Fd(&'a mut uapi::binder_fd_object),
+ Ptr(&'a mut uapi::binder_buffer_object),
+ Fda(&'a mut uapi::binder_fd_array_object),
+}
+
+impl BinderObject {
+ pub(crate) fn read_from(reader: &mut UserSliceReader) -> Result<BinderObject> {
+ let object = Self::read_from_inner(|slice| {
+ let read_len = usize::min(slice.len(), reader.len());
+ reader.clone_reader().read_slice(&mut slice[..read_len])?;
+ Ok(())
+ })?;
+
+ // If we used a object type smaller than the largest object size, then we've read more
+ // bytes than we needed to. However, we used `.clone_reader()` to avoid advancing the
+ // original reader. Now, we call `skip` so that the caller's reader is advanced by the
+ // right amount.
+ //
+ // The `skip` call fails if the reader doesn't have `size` bytes available. This could
+ // happen if the type header corresponds to an object type that is larger than the rest of
+ // the reader.
+ //
+ // Any extra bytes beyond the size of the object are inaccessible after this call, so
+ // reading them again from the `reader` later does not result in TOCTOU bugs.
+ reader.skip(object.size())?;
+
+ Ok(object)
+ }
+
+ /// Use the provided reader closure to construct a `BinderObject`.
+ ///
+ /// The closure should write the bytes for the object into the provided slice.
+ pub(crate) fn read_from_inner<R>(reader: R) -> Result<BinderObject>
+ where
+ R: FnOnce(&mut [u8; size_of::<BinderObject>()]) -> Result<()>,
+ {
+ let mut obj = MaybeUninit::<BinderObject>::zeroed();
+
+ // SAFETY: The lengths of `BinderObject` and `[u8; size_of::<BinderObject>()]` are equal,
+ // and the byte array has an alignment requirement of one, so the pointer cast is okay.
+ // Additionally, `obj` was initialized to zeros, so the byte array will not be
+ // uninitialized.
+ (reader)(unsafe { &mut *obj.as_mut_ptr().cast() })?;
+
+ // SAFETY: The entire object is initialized, so accessing this field is safe.
+ let type_ = unsafe { obj.assume_init_ref().hdr.type_ };
+ if Self::type_to_size(type_).is_none() {
+ // The value of `obj.hdr_type_` was invalid.
+ return Err(EINVAL);
+ }
+
+ // SAFETY: All bytes are initialized (since we zeroed them at the start) and we checked
+ // that `self.hdr.type_` is one of the allowed types, so the type invariants are satisfied.
+ unsafe { Ok(obj.assume_init()) }
+ }
+
+ pub(crate) fn as_ref(&mut self) -> BinderObjectRef<'_> {
+ use BinderObjectRef::*;
+ // SAFETY: The constructor ensures that all bytes of `self` are initialized, and all
+ // variants of this union accept all initialized bit patterns.
+ unsafe {
+ match self.hdr.type_ {
+ BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => Binder(&mut self.fbo),
+ BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => Handle(&mut self.fbo),
+ BINDER_TYPE_FD => Fd(&mut self.fdo),
+ BINDER_TYPE_PTR => Ptr(&mut self.bbo),
+ BINDER_TYPE_FDA => Fda(&mut self.fdao),
+ // SAFETY: By the type invariant, the value of `self.hdr.type_` cannot have any
+ // other value than the ones checked above.
+ _ => core::hint::unreachable_unchecked(),
+ }
+ }
+ }
+
+ pub(crate) fn size(&self) -> usize {
+ // SAFETY: The entire object is initialized, so accessing this field is safe.
+ let type_ = unsafe { self.hdr.type_ };
+
+ // SAFETY: The type invariants guarantee that the type field is correct.
+ unsafe { Self::type_to_size(type_).unwrap_unchecked() }
+ }
+
+ fn type_to_size(type_: u32) -> Option<usize> {
+ match type_ {
+ BINDER_TYPE_WEAK_BINDER => Some(size_of::<uapi::flat_binder_object>()),
+ BINDER_TYPE_BINDER => Some(size_of::<uapi::flat_binder_object>()),
+ BINDER_TYPE_WEAK_HANDLE => Some(size_of::<uapi::flat_binder_object>()),
+ BINDER_TYPE_HANDLE => Some(size_of::<uapi::flat_binder_object>()),
+ BINDER_TYPE_FD => Some(size_of::<uapi::binder_fd_object>()),
+ BINDER_TYPE_PTR => Some(size_of::<uapi::binder_buffer_object>()),
+ BINDER_TYPE_FDA => Some(size_of::<uapi::binder_fd_array_object>()),
+ _ => None,
+ }
+ }
+}
+
+#[derive(Default)]
+struct FileList {
+ files_to_translate: KVec<FileEntry>,
+ close_on_free: KVec<u32>,
+}
+
+struct FileEntry {
+ /// The file for which a descriptor will be created in the recipient process.
+ file: ARef<File>,
+ /// The offset in the buffer where the file descriptor is stored.
+ buffer_offset: usize,
+ /// Whether this fd should be closed when the allocation is freed.
+ close_on_free: bool,
+}
+
+pub(crate) struct TranslatedFds {
+ reservations: KVec<Reservation>,
+ /// If commit is called, then these fds should be closed. (If commit is not called, then they
+ /// shouldn't be closed.)
+ close_on_free: FdsCloseOnFree,
+}
+
+struct Reservation {
+ res: FileDescriptorReservation,
+ file: ARef<File>,
+}
+
+impl TranslatedFds {
+ pub(crate) fn new() -> Self {
+ Self {
+ reservations: KVec::new(),
+ close_on_free: FdsCloseOnFree(KVec::new()),
+ }
+ }
+
+ pub(crate) fn commit(self) -> FdsCloseOnFree {
+ for entry in self.reservations {
+ entry.res.fd_install(entry.file);
+ }
+
+ self.close_on_free
+ }
+}
+
+pub(crate) struct FdsCloseOnFree(KVec<u32>);
diff --git a/drivers/android/binder/context.rs b/drivers/android/binder/context.rs
new file mode 100644
index 000000000000..3d135ec03ca7
--- /dev/null
+++ b/drivers/android/binder/context.rs
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use kernel::{
+ error::Error,
+ list::{List, ListArc, ListLinks},
+ prelude::*,
+ security,
+ str::{CStr, CString},
+ sync::{Arc, Mutex},
+ task::Kuid,
+};
+
+use crate::{error::BinderError, node::NodeRef, process::Process};
+
+kernel::sync::global_lock! {
+ // SAFETY: We call `init` in the module initializer, so it's initialized before first use.
+ pub(crate) unsafe(uninit) static CONTEXTS: Mutex<ContextList> = ContextList {
+ list: List::new(),
+ };
+}
+
+pub(crate) struct ContextList {
+ list: List<Context>,
+}
+
+pub(crate) fn get_all_contexts() -> Result<KVec<Arc<Context>>> {
+ let lock = CONTEXTS.lock();
+
+ let count = lock.list.iter().count();
+
+ let mut ctxs = KVec::with_capacity(count, GFP_KERNEL)?;
+ for ctx in &lock.list {
+ ctxs.push(Arc::from(ctx), GFP_KERNEL)?;
+ }
+ Ok(ctxs)
+}
+
+/// This struct keeps track of the processes using this context, and which process is the context
+/// manager.
+struct Manager {
+ node: Option<NodeRef>,
+ uid: Option<Kuid>,
+ all_procs: List<Process>,
+}
+
+/// There is one context per binder file (/dev/binder, /dev/hwbinder, etc)
+#[pin_data]
+pub(crate) struct Context {
+ #[pin]
+ manager: Mutex<Manager>,
+ pub(crate) name: CString,
+ #[pin]
+ links: ListLinks,
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for Context { untracked; }
+}
+kernel::list::impl_list_item! {
+ impl ListItem<0> for Context {
+ using ListLinks { self.links };
+ }
+}
+
+impl Context {
+ pub(crate) fn new(name: &CStr) -> Result<Arc<Self>> {
+ let name = CString::try_from(name)?;
+ let list_ctx = ListArc::pin_init::<Error>(
+ try_pin_init!(Context {
+ name,
+ links <- ListLinks::new(),
+ manager <- kernel::new_mutex!(Manager {
+ all_procs: List::new(),
+ node: None,
+ uid: None,
+ }, "Context::manager"),
+ }),
+ GFP_KERNEL,
+ )?;
+
+ let ctx = list_ctx.clone_arc();
+ CONTEXTS.lock().list.push_back(list_ctx);
+
+ Ok(ctx)
+ }
+
+ /// Called when the file for this context is unlinked.
+ ///
+ /// No-op if called twice.
+ pub(crate) fn deregister(&self) {
+ // SAFETY: We never add the context to any other linked list than this one, so it is either
+ // in this list, or not in any list.
+ unsafe { CONTEXTS.lock().list.remove(self) };
+ }
+
+ pub(crate) fn register_process(self: &Arc<Self>, proc: ListArc<Process>) {
+ if !Arc::ptr_eq(self, &proc.ctx) {
+ pr_err!("Context::register_process called on the wrong context.");
+ return;
+ }
+ self.manager.lock().all_procs.push_back(proc);
+ }
+
+ pub(crate) fn deregister_process(self: &Arc<Self>, proc: &Process) {
+ if !Arc::ptr_eq(self, &proc.ctx) {
+ pr_err!("Context::deregister_process called on the wrong context.");
+ return;
+ }
+ // SAFETY: We just checked that this is the right list.
+ unsafe { self.manager.lock().all_procs.remove(proc) };
+ }
+
+ pub(crate) fn set_manager_node(&self, node_ref: NodeRef) -> Result {
+ let mut manager = self.manager.lock();
+ if manager.node.is_some() {
+ pr_warn!("BINDER_SET_CONTEXT_MGR already set");
+ return Err(EBUSY);
+ }
+ security::binder_set_context_mgr(&node_ref.node.owner.cred)?;
+
+ // If the context manager has been set before, ensure that we use the same euid.
+ let caller_uid = Kuid::current_euid();
+ if let Some(ref uid) = manager.uid {
+ if *uid != caller_uid {
+ return Err(EPERM);
+ }
+ }
+
+ manager.node = Some(node_ref);
+ manager.uid = Some(caller_uid);
+ Ok(())
+ }
+
+ pub(crate) fn unset_manager_node(&self) {
+ let node_ref = self.manager.lock().node.take();
+ drop(node_ref);
+ }
+
+ pub(crate) fn get_manager_node(&self, strong: bool) -> Result<NodeRef, BinderError> {
+ self.manager
+ .lock()
+ .node
+ .as_ref()
+ .ok_or_else(BinderError::new_dead)?
+ .clone(strong)
+ .map_err(BinderError::from)
+ }
+
+ pub(crate) fn for_each_proc<F>(&self, mut func: F)
+ where
+ F: FnMut(&Process),
+ {
+ let lock = self.manager.lock();
+ for proc in &lock.all_procs {
+ func(&proc);
+ }
+ }
+
+ pub(crate) fn get_all_procs(&self) -> Result<KVec<Arc<Process>>> {
+ let lock = self.manager.lock();
+ let count = lock.all_procs.iter().count();
+
+ let mut procs = KVec::with_capacity(count, GFP_KERNEL)?;
+ for proc in &lock.all_procs {
+ procs.push(Arc::from(proc), GFP_KERNEL)?;
+ }
+ Ok(procs)
+ }
+
+ pub(crate) fn get_procs_with_pid(&self, pid: i32) -> Result<KVec<Arc<Process>>> {
+ let orig = self.get_all_procs()?;
+ let mut backing = KVec::with_capacity(orig.len(), GFP_KERNEL)?;
+ for proc in orig.into_iter().filter(|proc| proc.task.pid() == pid) {
+ backing.push(proc, GFP_KERNEL)?;
+ }
+ Ok(backing)
+ }
+}
diff --git a/drivers/android/binder/deferred_close.rs b/drivers/android/binder/deferred_close.rs
new file mode 100644
index 000000000000..ac895c04d0cb
--- /dev/null
+++ b/drivers/android/binder/deferred_close.rs
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! Logic for closing files in a deferred manner.
+//!
+//! This file could make sense to have in `kernel::fs`, but it was rejected for being too
+//! Binder-specific.
+
+use core::mem::MaybeUninit;
+use kernel::{
+ alloc::{AllocError, Flags},
+ bindings,
+ prelude::*,
+};
+
+/// Helper used for closing file descriptors in a way that is safe even if the file is currently
+/// held using `fdget`.
+///
+/// Additional motivation can be found in commit 80cd795630d6 ("binder: fix use-after-free due to
+/// ksys_close() during fdget()") and in the comments on `binder_do_fd_close`.
+pub(crate) struct DeferredFdCloser {
+ inner: KBox<DeferredFdCloserInner>,
+}
+
+/// SAFETY: This just holds an allocation with no real content, so there's no safety issue with
+/// moving it across threads.
+unsafe impl Send for DeferredFdCloser {}
+/// SAFETY: This just holds an allocation with no real content, so there's no safety issue with
+/// moving it across threads.
+unsafe impl Sync for DeferredFdCloser {}
+
+/// # Invariants
+///
+/// If the `file` pointer is non-null, then it points at a `struct file` and owns a refcount to
+/// that file.
+#[repr(C)]
+struct DeferredFdCloserInner {
+ twork: MaybeUninit<bindings::callback_head>,
+ file: *mut bindings::file,
+}
+
+impl DeferredFdCloser {
+ /// Create a new [`DeferredFdCloser`].
+ pub(crate) fn new(flags: Flags) -> Result<Self, AllocError> {
+ Ok(Self {
+ // INVARIANT: The `file` pointer is null, so the type invariant does not apply.
+ inner: KBox::new(
+ DeferredFdCloserInner {
+ twork: MaybeUninit::uninit(),
+ file: core::ptr::null_mut(),
+ },
+ flags,
+ )?,
+ })
+ }
+
+ /// Schedule a task work that closes the file descriptor when this task returns to userspace.
+ ///
+ /// Fails if this is called from a context where we cannot run work when returning to
+ /// userspace. (E.g., from a kthread.)
+ pub(crate) fn close_fd(self, fd: u32) -> Result<(), DeferredFdCloseError> {
+ use bindings::task_work_notify_mode_TWA_RESUME as TWA_RESUME;
+
+ // In this method, we schedule the task work before closing the file. This is because
+ // scheduling a task work is fallible, and we need to know whether it will fail before we
+ // attempt to close the file.
+
+ // Task works are not available on kthreads.
+ let current = kernel::current!();
+
+ // Check if this is a kthread.
+ // SAFETY: Reading `flags` from a task is always okay.
+ if unsafe { ((*current.as_ptr()).flags & bindings::PF_KTHREAD) != 0 } {
+ return Err(DeferredFdCloseError::TaskWorkUnavailable);
+ }
+
+ // Transfer ownership of the box's allocation to a raw pointer. This disables the
+ // destructor, so we must manually convert it back to a KBox to drop it.
+ //
+ // Until we convert it back to a `KBox`, there are no aliasing requirements on this
+ // pointer.
+ let inner = KBox::into_raw(self.inner);
+
+ // The `callback_head` field is first in the struct, so this cast correctly gives us a
+ // pointer to the field.
+ let callback_head = inner.cast::<bindings::callback_head>();
+ // SAFETY: This pointer offset operation does not go out-of-bounds.
+ let file_field = unsafe { core::ptr::addr_of_mut!((*inner).file) };
+
+ let current = current.as_ptr();
+
+ // SAFETY: This function currently has exclusive access to the `DeferredFdCloserInner`, so
+ // it is okay for us to perform unsynchronized writes to its `callback_head` field.
+ unsafe { bindings::init_task_work(callback_head, Some(Self::do_close_fd)) };
+
+ // SAFETY: This inserts the `DeferredFdCloserInner` into the task workqueue for the current
+ // task. If this operation is successful, then this transfers exclusive ownership of the
+ // `callback_head` field to the C side until it calls `do_close_fd`, and we don't touch or
+ // invalidate the field during that time.
+ //
+ // When the C side calls `do_close_fd`, the safety requirements of that method are
+ // satisfied because when a task work is executed, the callback is given ownership of the
+ // pointer.
+ //
+ // The file pointer is currently null. If it is changed to be non-null before `do_close_fd`
+ // is called, then that change happens due to the write at the end of this function, and
+ // that write has a safety comment that explains why the refcount can be dropped when
+ // `do_close_fd` runs.
+ let res = unsafe { bindings::task_work_add(current, callback_head, TWA_RESUME) };
+
+ if res != 0 {
+ // SAFETY: Scheduling the task work failed, so we still have ownership of the box, so
+ // we may destroy it.
+ unsafe { drop(KBox::from_raw(inner)) };
+
+ return Err(DeferredFdCloseError::TaskWorkUnavailable);
+ }
+
+ // This removes the fd from the fd table in `current`. The file is not fully closed until
+ // `filp_close` is called. We are given ownership of one refcount to the file.
+ //
+ // SAFETY: This is safe no matter what `fd` is. If the `fd` is valid (that is, if the
+ // pointer is non-null), then we call `filp_close` on the returned pointer as required by
+ // `file_close_fd`.
+ let file = unsafe { bindings::file_close_fd(fd) };
+ if file.is_null() {
+ // We don't clean up the task work since that might be expensive if the task work queue
+ // is long. Just let it execute and let it clean up for itself.
+ return Err(DeferredFdCloseError::BadFd);
+ }
+
+ // Acquire a second refcount to the file.
+ //
+ // SAFETY: The `file` pointer points at a file with a non-zero refcount.
+ unsafe { bindings::get_file(file) };
+
+ // This method closes the fd, consuming one of our two refcounts. There could be active
+ // light refcounts created from that fd, so we must ensure that the file has a positive
+ // refcount for the duration of those active light refcounts. We do that by holding on to
+ // the second refcount until the current task returns to userspace.
+ //
+ // SAFETY: The `file` pointer is valid. Passing `current->files` as the file table to close
+ // it in is correct, since we just got the `fd` from `file_close_fd` which also uses
+ // `current->files`.
+ //
+ // Note: fl_owner_t is currently a void pointer.
+ unsafe { bindings::filp_close(file, (*current).files as bindings::fl_owner_t) };
+
+ // We update the file pointer that the task work is supposed to fput. This transfers
+ // ownership of our last refcount.
+ //
+ // INVARIANT: This changes the `file` field of a `DeferredFdCloserInner` from null to
+ // non-null. This doesn't break the type invariant for `DeferredFdCloserInner` because we
+ // still own a refcount to the file, so we can pass ownership of that refcount to the
+ // `DeferredFdCloserInner`.
+ //
+ // When `do_close_fd` runs, it must be safe for it to `fput` the refcount. However, this is
+ // the case because all light refcounts that are associated with the fd we closed
+ // previously must be dropped when `do_close_fd`, since light refcounts must be dropped
+ // before returning to userspace.
+ //
+ // SAFETY: Task works are executed on the current thread right before we return to
+ // userspace, so this write is guaranteed to happen before `do_close_fd` is called, which
+ // means that a race is not possible here.
+ unsafe { *file_field = file };
+
+ Ok(())
+ }
+
+ /// # Safety
+ ///
+ /// The provided pointer must point at the `twork` field of a `DeferredFdCloserInner` stored in
+ /// a `KBox`, and the caller must pass exclusive ownership of that `KBox`. Furthermore, if the
+ /// file pointer is non-null, then it must be okay to release the refcount by calling `fput`.
+ unsafe extern "C" fn do_close_fd(inner: *mut bindings::callback_head) {
+ // SAFETY: The caller just passed us ownership of this box.
+ let inner = unsafe { KBox::from_raw(inner.cast::<DeferredFdCloserInner>()) };
+ if !inner.file.is_null() {
+ // SAFETY: By the type invariants, we own a refcount to this file, and the caller
+ // guarantees that dropping the refcount now is okay.
+ unsafe { bindings::fput(inner.file) };
+ }
+ // The allocation is freed when `inner` goes out of scope.
+ }
+}
+
+/// Represents a failure to close an fd in a deferred manner.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub(crate) enum DeferredFdCloseError {
+ /// Closing the fd failed because we were unable to schedule a task work.
+ TaskWorkUnavailable,
+ /// Closing the fd failed because the fd does not exist.
+ BadFd,
+}
+
+impl From<DeferredFdCloseError> for Error {
+ fn from(err: DeferredFdCloseError) -> Error {
+ match err {
+ DeferredFdCloseError::TaskWorkUnavailable => ESRCH,
+ DeferredFdCloseError::BadFd => EBADF,
+ }
+ }
+}
diff --git a/drivers/android/binder/defs.rs b/drivers/android/binder/defs.rs
new file mode 100644
index 000000000000..33f51b4139c7
--- /dev/null
+++ b/drivers/android/binder/defs.rs
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use core::mem::MaybeUninit;
+use core::ops::{Deref, DerefMut};
+use kernel::{
+ transmute::{AsBytes, FromBytes},
+ uapi::{self, *},
+};
+
+macro_rules! pub_no_prefix {
+ ($prefix:ident, $($newname:ident),+ $(,)?) => {
+ $(pub(crate) const $newname: u32 = kernel::macros::concat_idents!($prefix, $newname);)+
+ };
+}
+
+pub_no_prefix!(
+ binder_driver_return_protocol_,
+ BR_TRANSACTION,
+ BR_TRANSACTION_SEC_CTX,
+ BR_REPLY,
+ BR_DEAD_REPLY,
+ BR_FAILED_REPLY,
+ BR_FROZEN_REPLY,
+ BR_NOOP,
+ BR_SPAWN_LOOPER,
+ BR_TRANSACTION_COMPLETE,
+ BR_TRANSACTION_PENDING_FROZEN,
+ BR_ONEWAY_SPAM_SUSPECT,
+ BR_OK,
+ BR_ERROR,
+ BR_INCREFS,
+ BR_ACQUIRE,
+ BR_RELEASE,
+ BR_DECREFS,
+ BR_DEAD_BINDER,
+ BR_CLEAR_DEATH_NOTIFICATION_DONE,
+ BR_FROZEN_BINDER,
+ BR_CLEAR_FREEZE_NOTIFICATION_DONE,
+);
+
+pub_no_prefix!(
+ binder_driver_command_protocol_,
+ BC_TRANSACTION,
+ BC_TRANSACTION_SG,
+ BC_REPLY,
+ BC_REPLY_SG,
+ BC_FREE_BUFFER,
+ BC_ENTER_LOOPER,
+ BC_EXIT_LOOPER,
+ BC_REGISTER_LOOPER,
+ BC_INCREFS,
+ BC_ACQUIRE,
+ BC_RELEASE,
+ BC_DECREFS,
+ BC_INCREFS_DONE,
+ BC_ACQUIRE_DONE,
+ BC_REQUEST_DEATH_NOTIFICATION,
+ BC_CLEAR_DEATH_NOTIFICATION,
+ BC_DEAD_BINDER_DONE,
+ BC_REQUEST_FREEZE_NOTIFICATION,
+ BC_CLEAR_FREEZE_NOTIFICATION,
+ BC_FREEZE_NOTIFICATION_DONE,
+);
+
+pub_no_prefix!(
+ flat_binder_object_flags_,
+ FLAT_BINDER_FLAG_ACCEPTS_FDS,
+ FLAT_BINDER_FLAG_TXN_SECURITY_CTX
+);
+
+pub_no_prefix!(
+ transaction_flags_,
+ TF_ONE_WAY,
+ TF_ACCEPT_FDS,
+ TF_CLEAR_BUF,
+ TF_UPDATE_TXN
+);
+
+pub(crate) use uapi::{
+ BINDER_TYPE_BINDER, BINDER_TYPE_FD, BINDER_TYPE_FDA, BINDER_TYPE_HANDLE, BINDER_TYPE_PTR,
+ BINDER_TYPE_WEAK_BINDER, BINDER_TYPE_WEAK_HANDLE,
+};
+
+macro_rules! decl_wrapper {
+ ($newname:ident, $wrapped:ty) => {
+ // Define a wrapper around the C type. Use `MaybeUninit` to enforce that the value of
+ // padding bytes must be preserved.
+ #[derive(Copy, Clone)]
+ #[repr(transparent)]
+ pub(crate) struct $newname(MaybeUninit<$wrapped>);
+
+ // SAFETY: This macro is only used with types where this is ok.
+ unsafe impl FromBytes for $newname {}
+ // SAFETY: This macro is only used with types where this is ok.
+ unsafe impl AsBytes for $newname {}
+
+ impl Deref for $newname {
+ type Target = $wrapped;
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: We use `MaybeUninit` only to preserve padding. The value must still
+ // always be valid.
+ unsafe { self.0.assume_init_ref() }
+ }
+ }
+
+ impl DerefMut for $newname {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ // SAFETY: We use `MaybeUninit` only to preserve padding. The value must still
+ // always be valid.
+ unsafe { self.0.assume_init_mut() }
+ }
+ }
+
+ impl Default for $newname {
+ fn default() -> Self {
+ // Create a new value of this type where all bytes (including padding) are zeroed.
+ Self(MaybeUninit::zeroed())
+ }
+ }
+ };
+}
+
+decl_wrapper!(BinderNodeDebugInfo, uapi::binder_node_debug_info);
+decl_wrapper!(BinderNodeInfoForRef, uapi::binder_node_info_for_ref);
+decl_wrapper!(FlatBinderObject, uapi::flat_binder_object);
+decl_wrapper!(BinderFdObject, uapi::binder_fd_object);
+decl_wrapper!(BinderFdArrayObject, uapi::binder_fd_array_object);
+decl_wrapper!(BinderObjectHeader, uapi::binder_object_header);
+decl_wrapper!(BinderBufferObject, uapi::binder_buffer_object);
+decl_wrapper!(BinderTransactionData, uapi::binder_transaction_data);
+decl_wrapper!(
+ BinderTransactionDataSecctx,
+ uapi::binder_transaction_data_secctx
+);
+decl_wrapper!(BinderTransactionDataSg, uapi::binder_transaction_data_sg);
+decl_wrapper!(BinderWriteRead, uapi::binder_write_read);
+decl_wrapper!(BinderVersion, uapi::binder_version);
+decl_wrapper!(BinderFrozenStatusInfo, uapi::binder_frozen_status_info);
+decl_wrapper!(BinderFreezeInfo, uapi::binder_freeze_info);
+decl_wrapper!(BinderFrozenStateInfo, uapi::binder_frozen_state_info);
+decl_wrapper!(BinderHandleCookie, uapi::binder_handle_cookie);
+decl_wrapper!(ExtendedError, uapi::binder_extended_error);
+
+impl BinderVersion {
+ pub(crate) fn current() -> Self {
+ Self(MaybeUninit::new(uapi::binder_version {
+ protocol_version: BINDER_CURRENT_PROTOCOL_VERSION as _,
+ }))
+ }
+}
+
+impl BinderTransactionData {
+ pub(crate) fn with_buffers_size(self, buffers_size: u64) -> BinderTransactionDataSg {
+ BinderTransactionDataSg(MaybeUninit::new(uapi::binder_transaction_data_sg {
+ transaction_data: *self,
+ buffers_size,
+ }))
+ }
+}
+
+impl BinderTransactionDataSecctx {
+ /// View the inner data as wrapped in `BinderTransactionData`.
+ pub(crate) fn tr_data(&mut self) -> &mut BinderTransactionData {
+ // SAFETY: Transparent wrapper is safe to transmute.
+ unsafe {
+ &mut *(&mut self.transaction_data as *mut uapi::binder_transaction_data
+ as *mut BinderTransactionData)
+ }
+ }
+}
+
+impl ExtendedError {
+ pub(crate) fn new(id: u32, command: u32, param: i32) -> Self {
+ Self(MaybeUninit::new(uapi::binder_extended_error {
+ id,
+ command,
+ param,
+ }))
+ }
+}
diff --git a/drivers/android/binder/error.rs b/drivers/android/binder/error.rs
new file mode 100644
index 000000000000..b24497cfa292
--- /dev/null
+++ b/drivers/android/binder/error.rs
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use kernel::fmt;
+use kernel::prelude::*;
+
+use crate::defs::*;
+
+pub(crate) type BinderResult<T = ()> = core::result::Result<T, BinderError>;
+
+/// An error that will be returned to userspace via the `BINDER_WRITE_READ` ioctl rather than via
+/// errno.
+pub(crate) struct BinderError {
+ pub(crate) reply: u32,
+ source: Option<Error>,
+}
+
+impl BinderError {
+ pub(crate) fn new_dead() -> Self {
+ Self {
+ reply: BR_DEAD_REPLY,
+ source: None,
+ }
+ }
+
+ pub(crate) fn new_frozen() -> Self {
+ Self {
+ reply: BR_FROZEN_REPLY,
+ source: None,
+ }
+ }
+
+ pub(crate) fn new_frozen_oneway() -> Self {
+ Self {
+ reply: BR_TRANSACTION_PENDING_FROZEN,
+ source: None,
+ }
+ }
+
+ pub(crate) fn is_dead(&self) -> bool {
+ self.reply == BR_DEAD_REPLY
+ }
+
+ pub(crate) fn as_errno(&self) -> kernel::ffi::c_int {
+ self.source.unwrap_or(EINVAL).to_errno()
+ }
+
+ pub(crate) fn should_pr_warn(&self) -> bool {
+ self.source.is_some()
+ }
+}
+
+/// Convert an errno into a `BinderError` and store the errno used to construct it. The errno
+/// should be stored as the thread's extended error when given to userspace.
+impl From<Error> for BinderError {
+ fn from(source: Error) -> Self {
+ Self {
+ reply: BR_FAILED_REPLY,
+ source: Some(source),
+ }
+ }
+}
+
+impl From<kernel::fs::file::BadFdError> for BinderError {
+ fn from(source: kernel::fs::file::BadFdError) -> Self {
+ BinderError::from(Error::from(source))
+ }
+}
+
+impl From<kernel::alloc::AllocError> for BinderError {
+ fn from(_: kernel::alloc::AllocError) -> Self {
+ Self {
+ reply: BR_FAILED_REPLY,
+ source: Some(ENOMEM),
+ }
+ }
+}
+
+impl fmt::Debug for BinderError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self.reply {
+ BR_FAILED_REPLY => match self.source.as_ref() {
+ Some(source) => f
+ .debug_struct("BR_FAILED_REPLY")
+ .field("source", source)
+ .finish(),
+ None => f.pad("BR_FAILED_REPLY"),
+ },
+ BR_DEAD_REPLY => f.pad("BR_DEAD_REPLY"),
+ BR_FROZEN_REPLY => f.pad("BR_FROZEN_REPLY"),
+ BR_TRANSACTION_PENDING_FROZEN => f.pad("BR_TRANSACTION_PENDING_FROZEN"),
+ BR_TRANSACTION_COMPLETE => f.pad("BR_TRANSACTION_COMPLETE"),
+ _ => f
+ .debug_struct("BinderError")
+ .field("reply", &self.reply)
+ .finish(),
+ }
+ }
+}
diff --git a/drivers/android/binder/freeze.rs b/drivers/android/binder/freeze.rs
new file mode 100644
index 000000000000..53b60035639a
--- /dev/null
+++ b/drivers/android/binder/freeze.rs
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use kernel::{
+ alloc::AllocError,
+ list::ListArc,
+ prelude::*,
+ rbtree::{self, RBTreeNodeReservation},
+ seq_file::SeqFile,
+ seq_print,
+ sync::{Arc, UniqueArc},
+ uaccess::UserSliceReader,
+};
+
+use crate::{
+ defs::*, node::Node, process::Process, thread::Thread, BinderReturnWriter, DArc, DLArc,
+ DTRWrap, DeliverToRead,
+};
+
+#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
+pub(crate) struct FreezeCookie(u64);
+
+/// Represents a listener for changes to the frozen state of a process.
+pub(crate) struct FreezeListener {
+ /// The node we are listening for.
+ pub(crate) node: DArc<Node>,
+ /// The cookie of this freeze listener.
+ cookie: FreezeCookie,
+ /// What value of `is_frozen` did we most recently tell userspace about?
+ last_is_frozen: Option<bool>,
+ /// We sent a `BR_FROZEN_BINDER` and we are waiting for `BC_FREEZE_NOTIFICATION_DONE` before
+ /// sending any other commands.
+ is_pending: bool,
+ /// Userspace sent `BC_CLEAR_FREEZE_NOTIFICATION` and we need to reply with
+ /// `BR_CLEAR_FREEZE_NOTIFICATION_DONE` as soon as possible. If `is_pending` is set, then we
+ /// must wait for it to be unset before we can reply.
+ is_clearing: bool,
+ /// Number of cleared duplicates that can't be deleted until userspace sends
+ /// `BC_FREEZE_NOTIFICATION_DONE`.
+ num_pending_duplicates: u64,
+ /// Number of cleared duplicates that can be deleted.
+ num_cleared_duplicates: u64,
+}
+
+impl FreezeListener {
+ /// Is it okay to create a new listener with the same cookie as this one for the provided node?
+ ///
+ /// Under some scenarios, userspace may delete a freeze listener and immediately recreate it
+ /// with the same cookie. This results in duplicate listeners. To avoid issues with ambiguity,
+ /// we allow this only if the new listener is for the same node, and we also require that the
+ /// old listener has already been cleared.
+ fn allow_duplicate(&self, node: &DArc<Node>) -> bool {
+ Arc::ptr_eq(&self.node, node) && self.is_clearing
+ }
+}
+
+type UninitFM = UniqueArc<core::mem::MaybeUninit<DTRWrap<FreezeMessage>>>;
+
+/// Represents a notification that the freeze state has changed.
+pub(crate) struct FreezeMessage {
+ cookie: FreezeCookie,
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for FreezeMessage {
+ untracked;
+ }
+}
+
+impl FreezeMessage {
+ fn new(flags: kernel::alloc::Flags) -> Result<UninitFM, AllocError> {
+ UniqueArc::new_uninit(flags)
+ }
+
+ fn init(ua: UninitFM, cookie: FreezeCookie) -> DLArc<FreezeMessage> {
+ match ua.pin_init_with(DTRWrap::new(FreezeMessage { cookie })) {
+ Ok(msg) => ListArc::from(msg),
+ Err(err) => match err {},
+ }
+ }
+}
+
+impl DeliverToRead for FreezeMessage {
+ fn do_work(
+ self: DArc<Self>,
+ thread: &Thread,
+ writer: &mut BinderReturnWriter<'_>,
+ ) -> Result<bool> {
+ let _removed_listener;
+ let mut node_refs = thread.process.node_refs.lock();
+ let Some(mut freeze_entry) = node_refs.freeze_listeners.find_mut(&self.cookie) else {
+ return Ok(true);
+ };
+ let freeze = freeze_entry.get_mut();
+
+ if freeze.num_cleared_duplicates > 0 {
+ freeze.num_cleared_duplicates -= 1;
+ drop(node_refs);
+ writer.write_code(BR_CLEAR_FREEZE_NOTIFICATION_DONE)?;
+ writer.write_payload(&self.cookie.0)?;
+ return Ok(true);
+ }
+
+ if freeze.is_pending {
+ return Ok(true);
+ }
+ if freeze.is_clearing {
+ kernel::warn_on!(freeze.num_cleared_duplicates != 0);
+ if freeze.num_pending_duplicates > 0 {
+ // The primary freeze listener was deleted, so convert a pending duplicate back
+ // into the primary one.
+ freeze.num_pending_duplicates -= 1;
+ freeze.is_pending = true;
+ freeze.is_clearing = true;
+ } else {
+ _removed_listener = freeze_entry.remove_node();
+ }
+ drop(node_refs);
+ writer.write_code(BR_CLEAR_FREEZE_NOTIFICATION_DONE)?;
+ writer.write_payload(&self.cookie.0)?;
+ Ok(true)
+ } else {
+ let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen();
+ if freeze.last_is_frozen == Some(is_frozen) {
+ return Ok(true);
+ }
+
+ let mut state_info = BinderFrozenStateInfo::default();
+ state_info.is_frozen = is_frozen as u32;
+ state_info.cookie = freeze.cookie.0;
+ freeze.is_pending = true;
+ freeze.last_is_frozen = Some(is_frozen);
+ drop(node_refs);
+
+ writer.write_code(BR_FROZEN_BINDER)?;
+ writer.write_payload(&state_info)?;
+ // BR_FROZEN_BINDER notifications can cause transactions
+ Ok(false)
+ }
+ }
+
+ fn cancel(self: DArc<Self>) {}
+
+ fn should_sync_wakeup(&self) -> bool {
+ false
+ }
+
+ #[inline(never)]
+ fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> {
+ seq_print!(m, "{}has frozen binder\n", prefix);
+ Ok(())
+ }
+}
+
+impl FreezeListener {
+ pub(crate) fn on_process_exit(&self, proc: &Arc<Process>) {
+ if !self.is_clearing {
+ self.node.remove_freeze_listener(proc);
+ }
+ }
+}
+
+impl Process {
+ pub(crate) fn request_freeze_notif(
+ self: &Arc<Self>,
+ reader: &mut UserSliceReader,
+ ) -> Result<()> {
+ let hc = reader.read::<BinderHandleCookie>()?;
+ let handle = hc.handle;
+ let cookie = FreezeCookie(hc.cookie);
+
+ let msg = FreezeMessage::new(GFP_KERNEL)?;
+ let alloc = RBTreeNodeReservation::new(GFP_KERNEL)?;
+
+ let mut node_refs_guard = self.node_refs.lock();
+ let node_refs = &mut *node_refs_guard;
+ let Some(info) = node_refs.by_handle.get_mut(&handle) else {
+ pr_warn!("BC_REQUEST_FREEZE_NOTIFICATION invalid ref {}\n", handle);
+ return Err(EINVAL);
+ };
+ if info.freeze().is_some() {
+ pr_warn!("BC_REQUEST_FREEZE_NOTIFICATION already set\n");
+ return Err(EINVAL);
+ }
+ let node_ref = info.node_ref();
+ let freeze_entry = node_refs.freeze_listeners.entry(cookie);
+
+ if let rbtree::Entry::Occupied(ref dupe) = freeze_entry {
+ if !dupe.get().allow_duplicate(&node_ref.node) {
+ pr_warn!("BC_REQUEST_FREEZE_NOTIFICATION duplicate cookie\n");
+ return Err(EINVAL);
+ }
+ }
+
+ // All failure paths must come before this call, and all modifications must come after this
+ // call.
+ node_ref.node.add_freeze_listener(self, GFP_KERNEL)?;
+
+ match freeze_entry {
+ rbtree::Entry::Vacant(entry) => {
+ entry.insert(
+ FreezeListener {
+ cookie,
+ node: node_ref.node.clone(),
+ last_is_frozen: None,
+ is_pending: false,
+ is_clearing: false,
+ num_pending_duplicates: 0,
+ num_cleared_duplicates: 0,
+ },
+ alloc,
+ );
+ }
+ rbtree::Entry::Occupied(mut dupe) => {
+ let dupe = dupe.get_mut();
+ if dupe.is_pending {
+ dupe.num_pending_duplicates += 1;
+ } else {
+ dupe.num_cleared_duplicates += 1;
+ }
+ dupe.last_is_frozen = None;
+ dupe.is_pending = false;
+ dupe.is_clearing = false;
+ }
+ }
+
+ *info.freeze() = Some(cookie);
+ let msg = FreezeMessage::init(msg, cookie);
+ drop(node_refs_guard);
+ let _ = self.push_work(msg);
+ Ok(())
+ }
+
+ pub(crate) fn freeze_notif_done(self: &Arc<Self>, reader: &mut UserSliceReader) -> Result<()> {
+ let cookie = FreezeCookie(reader.read()?);
+ let alloc = FreezeMessage::new(GFP_KERNEL)?;
+ let mut node_refs_guard = self.node_refs.lock();
+ let node_refs = &mut *node_refs_guard;
+ let Some(freeze) = node_refs.freeze_listeners.get_mut(&cookie) else {
+ pr_warn!("BC_FREEZE_NOTIFICATION_DONE {:016x} not found\n", cookie.0);
+ return Err(EINVAL);
+ };
+ let mut clear_msg = None;
+ if freeze.num_pending_duplicates > 0 {
+ clear_msg = Some(FreezeMessage::init(alloc, cookie));
+ freeze.num_pending_duplicates -= 1;
+ freeze.num_cleared_duplicates += 1;
+ } else {
+ if !freeze.is_pending {
+ pr_warn!(
+ "BC_FREEZE_NOTIFICATION_DONE {:016x} not pending\n",
+ cookie.0
+ );
+ return Err(EINVAL);
+ }
+ let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen();
+ if freeze.is_clearing || freeze.last_is_frozen != Some(is_frozen) {
+ // Immediately send another FreezeMessage.
+ clear_msg = Some(FreezeMessage::init(alloc, cookie));
+ }
+ freeze.is_pending = false;
+ }
+ drop(node_refs_guard);
+ if let Some(clear_msg) = clear_msg {
+ let _ = self.push_work(clear_msg);
+ }
+ Ok(())
+ }
+
+ pub(crate) fn clear_freeze_notif(self: &Arc<Self>, reader: &mut UserSliceReader) -> Result<()> {
+ let hc = reader.read::<BinderHandleCookie>()?;
+ let handle = hc.handle;
+ let cookie = FreezeCookie(hc.cookie);
+
+ let alloc = FreezeMessage::new(GFP_KERNEL)?;
+ let mut node_refs_guard = self.node_refs.lock();
+ let node_refs = &mut *node_refs_guard;
+ let Some(info) = node_refs.by_handle.get_mut(&handle) else {
+ pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION invalid ref {}\n", handle);
+ return Err(EINVAL);
+ };
+ let Some(info_cookie) = info.freeze() else {
+ pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION freeze notification not active\n");
+ return Err(EINVAL);
+ };
+ if *info_cookie != cookie {
+ pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION freeze notification cookie mismatch\n");
+ return Err(EINVAL);
+ }
+ let Some(listener) = node_refs.freeze_listeners.get_mut(&cookie) else {
+ pr_warn!("BC_CLEAR_FREEZE_NOTIFICATION invalid cookie {}\n", handle);
+ return Err(EINVAL);
+ };
+ listener.is_clearing = true;
+ listener.node.remove_freeze_listener(self);
+ *info.freeze() = None;
+ let mut msg = None;
+ if !listener.is_pending {
+ msg = Some(FreezeMessage::init(alloc, cookie));
+ }
+ drop(node_refs_guard);
+
+ if let Some(msg) = msg {
+ let _ = self.push_work(msg);
+ }
+ Ok(())
+ }
+
+ fn get_freeze_cookie(&self, node: &DArc<Node>) -> Option<FreezeCookie> {
+ let node_refs = &mut *self.node_refs.lock();
+ let handle = node_refs.by_node.get(&node.global_id())?;
+ let node_ref = node_refs.by_handle.get_mut(handle)?;
+ *node_ref.freeze()
+ }
+
+ /// Creates a vector of every freeze listener on this process.
+ ///
+ /// Returns pairs of the remote process listening for notifications and the local node it is
+ /// listening on.
+ #[expect(clippy::type_complexity)]
+ fn find_freeze_recipients(&self) -> Result<KVVec<(DArc<Node>, Arc<Process>)>, AllocError> {
+ // Defined before `inner` to drop after releasing spinlock if `push_within_capacity` fails.
+ let mut node_proc_pair;
+
+ // We pre-allocate space for up to 8 recipients before we take the spinlock. However, if
+ // the allocation fails, use a vector with a capacity of zero instead of failing. After
+ // all, there might not be any freeze listeners, in which case this operation could still
+ // succeed.
+ let mut recipients =
+ KVVec::with_capacity(8, GFP_KERNEL).unwrap_or_else(|_err| KVVec::new());
+
+ let mut inner = self.lock_with_nodes();
+ let mut curr = inner.nodes.cursor_front_mut();
+ while let Some(cursor) = curr {
+ let (key, node) = cursor.current();
+ let key = *key;
+ let list = node.freeze_list(&inner.inner);
+ let len = list.len();
+
+ if recipients.spare_capacity_mut().len() < len {
+ drop(inner);
+ recipients.reserve(len, GFP_KERNEL)?;
+ inner = self.lock_with_nodes();
+ // Find the node we were looking at and try again. If the set of nodes was changed,
+ // then just proceed to the next node. This is ok because we don't guarantee the
+ // inclusion of nodes that are added or removed in parallel with this operation.
+ curr = inner.nodes.cursor_lower_bound_mut(&key);
+ continue;
+ }
+
+ for proc in list {
+ node_proc_pair = (node.clone(), proc.clone());
+ recipients
+ .push_within_capacity(node_proc_pair)
+ .map_err(|_| {
+ pr_err!(
+ "push_within_capacity failed even though we checked the capacity\n"
+ );
+ AllocError
+ })?;
+ }
+
+ curr = cursor.move_next();
+ }
+ Ok(recipients)
+ }
+
+ /// Prepare allocations for sending freeze messages.
+ pub(crate) fn prepare_freeze_messages(&self) -> Result<FreezeMessages, AllocError> {
+ let recipients = self.find_freeze_recipients()?;
+ let mut batch = KVVec::with_capacity(recipients.len(), GFP_KERNEL)?;
+ for (node, proc) in recipients {
+ let Some(cookie) = proc.get_freeze_cookie(&node) else {
+ // If the freeze listener was removed in the meantime, just discard the
+ // notification.
+ continue;
+ };
+ let msg_alloc = FreezeMessage::new(GFP_KERNEL)?;
+ let msg = FreezeMessage::init(msg_alloc, cookie);
+ batch.push((proc, msg), GFP_KERNEL)?;
+ }
+
+ Ok(FreezeMessages { batch })
+ }
+}
+
+pub(crate) struct FreezeMessages {
+ batch: KVVec<(Arc<Process>, DLArc<FreezeMessage>)>,
+}
+
+impl FreezeMessages {
+ pub(crate) fn send_messages(self) {
+ for (proc, msg) in self.batch {
+ let _ = proc.push_work(msg);
+ }
+ }
+}
diff --git a/drivers/android/binder/node.rs b/drivers/android/binder/node.rs
new file mode 100644
index 000000000000..c26d113ede96
--- /dev/null
+++ b/drivers/android/binder/node.rs
@@ -0,0 +1,1131 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use kernel::{
+ list::{AtomicTracker, List, ListArc, ListLinks, TryNewListArc},
+ prelude::*,
+ seq_file::SeqFile,
+ seq_print,
+ sync::lock::{spinlock::SpinLockBackend, Guard},
+ sync::{Arc, LockedBy, SpinLock},
+};
+
+use crate::{
+ defs::*,
+ error::BinderError,
+ process::{NodeRefInfo, Process, ProcessInner},
+ thread::Thread,
+ transaction::Transaction,
+ BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead,
+};
+
+use core::mem;
+
+mod wrapper;
+pub(crate) use self::wrapper::CritIncrWrapper;
+
+#[derive(Debug)]
+pub(crate) struct CouldNotDeliverCriticalIncrement;
+
+/// Keeps track of how this node is scheduled.
+///
+/// There are two ways to schedule a node to a work list. Just schedule the node itself, or
+/// allocate a wrapper that references the node and schedule the wrapper. These wrappers exists to
+/// make it possible to "move" a node from one list to another - when `do_work` is called directly
+/// on the `Node`, then it's a no-op if there's also a pending wrapper.
+///
+/// Wrappers are generally only needed for zero-to-one refcount increments, and there are two cases
+/// of this: weak increments and strong increments. We call such increments "critical" because it
+/// is critical that they are delivered to the thread doing the increment. Some examples:
+///
+/// * One thread makes a zero-to-one strong increment, and another thread makes a zero-to-one weak
+/// increment. Delivering the node to the thread doing the weak increment is wrong, since the
+/// thread doing the strong increment may have ended a long time ago when the command is actually
+/// processed by userspace.
+///
+/// * We have a weak reference and are about to drop it on one thread. But then another thread does
+/// a zero-to-one strong increment. If the strong increment gets sent to the thread that was
+/// about to drop the weak reference, then the strong increment could be processed after the
+/// other thread has already exited, which would be too late.
+///
+/// Note that trying to create a `ListArc` to the node can succeed even if `has_normal_push` is
+/// set. This is because another thread might just have popped the node from a todo list, but not
+/// yet called `do_work`. However, if `has_normal_push` is false, then creating a `ListArc` should
+/// always succeed.
+///
+/// Like the other fields in `NodeInner`, the delivery state is protected by the process lock.
+struct DeliveryState {
+ /// Is the `Node` currently scheduled?
+ has_pushed_node: bool,
+
+ /// Is a wrapper currently scheduled?
+ ///
+ /// The wrapper is used only for strong zero2one increments.
+ has_pushed_wrapper: bool,
+
+ /// Is the currently scheduled `Node` scheduled due to a weak zero2one increment?
+ ///
+ /// Weak zero2one operations are always scheduled using the `Node`.
+ has_weak_zero2one: bool,
+
+ /// Is the currently scheduled wrapper/`Node` scheduled due to a strong zero2one increment?
+ ///
+ /// If `has_pushed_wrapper` is set, then the strong zero2one increment was scheduled using the
+ /// wrapper. Otherwise, `has_pushed_node` must be set and it was scheduled using the `Node`.
+ has_strong_zero2one: bool,
+}
+
+impl DeliveryState {
+ fn should_normal_push(&self) -> bool {
+ !self.has_pushed_node && !self.has_pushed_wrapper
+ }
+
+ fn did_normal_push(&mut self) {
+ assert!(self.should_normal_push());
+ self.has_pushed_node = true;
+ }
+
+ fn should_push_weak_zero2one(&self) -> bool {
+ !self.has_weak_zero2one && !self.has_strong_zero2one
+ }
+
+ fn can_push_weak_zero2one_normally(&self) -> bool {
+ !self.has_pushed_node
+ }
+
+ fn did_push_weak_zero2one(&mut self) {
+ assert!(self.should_push_weak_zero2one());
+ assert!(self.can_push_weak_zero2one_normally());
+ self.has_pushed_node = true;
+ self.has_weak_zero2one = true;
+ }
+
+ fn should_push_strong_zero2one(&self) -> bool {
+ !self.has_strong_zero2one
+ }
+
+ fn can_push_strong_zero2one_normally(&self) -> bool {
+ !self.has_pushed_node
+ }
+
+ fn did_push_strong_zero2one(&mut self) {
+ assert!(self.should_push_strong_zero2one());
+ assert!(self.can_push_strong_zero2one_normally());
+ self.has_pushed_node = true;
+ self.has_strong_zero2one = true;
+ }
+
+ fn did_push_strong_zero2one_wrapper(&mut self) {
+ assert!(self.should_push_strong_zero2one());
+ assert!(!self.can_push_strong_zero2one_normally());
+ self.has_pushed_wrapper = true;
+ self.has_strong_zero2one = true;
+ }
+}
+
+struct CountState {
+ /// The reference count.
+ count: usize,
+ /// Whether the process that owns this node thinks that we hold a refcount on it. (Note that
+ /// even if count is greater than one, we only increment it once in the owning process.)
+ has_count: bool,
+}
+
+impl CountState {
+ fn new() -> Self {
+ Self {
+ count: 0,
+ has_count: false,
+ }
+ }
+}
+
+struct NodeInner {
+ /// Strong refcounts held on this node by `NodeRef` objects.
+ strong: CountState,
+ /// Weak refcounts held on this node by `NodeRef` objects.
+ weak: CountState,
+ delivery_state: DeliveryState,
+ /// The binder driver guarantees that oneway transactions sent to the same node are serialized,
+ /// that is, userspace will not be given the next one until it has finished processing the
+ /// previous oneway transaction. This is done to avoid the case where two oneway transactions
+ /// arrive in opposite order from the order in which they were sent. (E.g., they could be
+ /// delivered to two different threads, which could appear as-if they were sent in opposite
+ /// order.)
+ ///
+ /// To fix that, we store pending oneway transactions in a separate list in the node, and don't
+ /// deliver the next oneway transaction until userspace signals that it has finished processing
+ /// the previous oneway transaction by calling the `BC_FREE_BUFFER` ioctl.
+ oneway_todo: List<DTRWrap<Transaction>>,
+ /// Keeps track of whether this node has a pending oneway transaction.
+ ///
+ /// When this is true, incoming oneway transactions are stored in `oneway_todo`, instead of
+ /// being delivered directly to the process.
+ has_oneway_transaction: bool,
+ /// List of processes to deliver a notification to when this node is destroyed (usually due to
+ /// the process dying).
+ death_list: List<DTRWrap<NodeDeath>, 1>,
+ /// List of processes to deliver freeze notifications to.
+ freeze_list: KVVec<Arc<Process>>,
+ /// The number of active BR_INCREFS or BR_ACQUIRE operations. (should be maximum two)
+ ///
+ /// If this is non-zero, then we postpone any BR_RELEASE or BR_DECREFS notifications until the
+ /// active operations have ended. This avoids the situation an increment and decrement get
+ /// reordered from userspace's perspective.
+ active_inc_refs: u8,
+ /// List of `NodeRefInfo` objects that reference this node.
+ refs: List<NodeRefInfo, { NodeRefInfo::LIST_NODE }>,
+}
+
+#[pin_data]
+pub(crate) struct Node {
+ pub(crate) debug_id: usize,
+ ptr: u64,
+ pub(crate) cookie: u64,
+ pub(crate) flags: u32,
+ pub(crate) owner: Arc<Process>,
+ inner: LockedBy<NodeInner, ProcessInner>,
+ #[pin]
+ links_track: AtomicTracker,
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for Node {
+ tracked_by links_track: AtomicTracker;
+ }
+}
+
+// Make `oneway_todo` work.
+kernel::list::impl_list_item! {
+ impl ListItem<0> for DTRWrap<Transaction> {
+ using ListLinks { self.links.inner };
+ }
+}
+
+impl Node {
+ pub(crate) fn new(
+ ptr: u64,
+ cookie: u64,
+ flags: u32,
+ owner: Arc<Process>,
+ ) -> impl PinInit<Self> {
+ pin_init!(Self {
+ inner: LockedBy::new(
+ &owner.inner,
+ NodeInner {
+ strong: CountState::new(),
+ weak: CountState::new(),
+ delivery_state: DeliveryState {
+ has_pushed_node: false,
+ has_pushed_wrapper: false,
+ has_weak_zero2one: false,
+ has_strong_zero2one: false,
+ },
+ death_list: List::new(),
+ oneway_todo: List::new(),
+ freeze_list: KVVec::new(),
+ has_oneway_transaction: false,
+ active_inc_refs: 0,
+ refs: List::new(),
+ },
+ ),
+ debug_id: super::next_debug_id(),
+ ptr,
+ cookie,
+ flags,
+ owner,
+ links_track <- AtomicTracker::new(),
+ })
+ }
+
+ pub(crate) fn has_oneway_transaction(&self, owner_inner: &mut ProcessInner) -> bool {
+ let inner = self.inner.access_mut(owner_inner);
+ inner.has_oneway_transaction
+ }
+
+ #[inline(never)]
+ pub(crate) fn full_debug_print(
+ &self,
+ m: &SeqFile,
+ owner_inner: &mut ProcessInner,
+ ) -> Result<()> {
+ let inner = self.inner.access_mut(owner_inner);
+ seq_print!(
+ m,
+ " node {}: u{:016x} c{:016x} hs {} hw {} cs {} cw {}",
+ self.debug_id,
+ self.ptr,
+ self.cookie,
+ inner.strong.has_count,
+ inner.weak.has_count,
+ inner.strong.count,
+ inner.weak.count,
+ );
+ if !inner.refs.is_empty() {
+ seq_print!(m, " proc");
+ for node_ref in &inner.refs {
+ seq_print!(m, " {}", node_ref.process.task.pid());
+ }
+ }
+ seq_print!(m, "\n");
+ for t in &inner.oneway_todo {
+ t.debug_print_inner(m, " pending async transaction ");
+ }
+ Ok(())
+ }
+
+ /// Insert the `NodeRef` into this `refs` list.
+ ///
+ /// # Safety
+ ///
+ /// It must be the case that `info.node_ref.node` is this node.
+ pub(crate) unsafe fn insert_node_info(
+ &self,
+ info: ListArc<NodeRefInfo, { NodeRefInfo::LIST_NODE }>,
+ ) {
+ self.inner
+ .access_mut(&mut self.owner.inner.lock())
+ .refs
+ .push_front(info);
+ }
+
+ /// Insert the `NodeRef` into this `refs` list.
+ ///
+ /// # Safety
+ ///
+ /// It must be the case that `info.node_ref.node` is this node.
+ pub(crate) unsafe fn remove_node_info(
+ &self,
+ info: &NodeRefInfo,
+ ) -> Option<ListArc<NodeRefInfo, { NodeRefInfo::LIST_NODE }>> {
+ // SAFETY: We always insert `NodeRefInfo` objects into the `refs` list of the node that it
+ // references in `info.node_ref.node`. That is this node, so `info` cannot possibly be in
+ // the `refs` list of another node.
+ unsafe {
+ self.inner
+ .access_mut(&mut self.owner.inner.lock())
+ .refs
+ .remove(info)
+ }
+ }
+
+ /// An id that is unique across all binder nodes on the system. Used as the key in the
+ /// `by_node` map.
+ pub(crate) fn global_id(&self) -> usize {
+ self as *const Node as usize
+ }
+
+ pub(crate) fn get_id(&self) -> (u64, u64) {
+ (self.ptr, self.cookie)
+ }
+
+ pub(crate) fn add_death(
+ &self,
+ death: ListArc<DTRWrap<NodeDeath>, 1>,
+ guard: &mut Guard<'_, ProcessInner, SpinLockBackend>,
+ ) {
+ self.inner.access_mut(guard).death_list.push_back(death);
+ }
+
+ pub(crate) fn inc_ref_done_locked(
+ self: &DArc<Node>,
+ _strong: bool,
+ owner_inner: &mut ProcessInner,
+ ) -> Option<DLArc<Node>> {
+ let inner = self.inner.access_mut(owner_inner);
+ if inner.active_inc_refs == 0 {
+ pr_err!("inc_ref_done called when no active inc_refs");
+ return None;
+ }
+
+ inner.active_inc_refs -= 1;
+ if inner.active_inc_refs == 0 {
+ // Having active inc_refs can inhibit dropping of ref-counts. Calculate whether we
+ // would send a refcount decrement, and if so, tell the caller to schedule us.
+ let strong = inner.strong.count > 0;
+ let has_strong = inner.strong.has_count;
+ let weak = strong || inner.weak.count > 0;
+ let has_weak = inner.weak.has_count;
+
+ let should_drop_weak = !weak && has_weak;
+ let should_drop_strong = !strong && has_strong;
+
+ // If we want to drop the ref-count again, tell the caller to schedule a work node for
+ // that.
+ let need_push = should_drop_weak || should_drop_strong;
+
+ if need_push && inner.delivery_state.should_normal_push() {
+ let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap();
+ inner.delivery_state.did_normal_push();
+ Some(list_arc)
+ } else {
+ None
+ }
+ } else {
+ None
+ }
+ }
+
+ pub(crate) fn update_refcount_locked(
+ self: &DArc<Node>,
+ inc: bool,
+ strong: bool,
+ count: usize,
+ owner_inner: &mut ProcessInner,
+ ) -> Option<DLArc<Node>> {
+ let is_dead = owner_inner.is_dead;
+ let inner = self.inner.access_mut(owner_inner);
+
+ // Get a reference to the state we'll update.
+ let state = if strong {
+ &mut inner.strong
+ } else {
+ &mut inner.weak
+ };
+
+ // Update the count and determine whether we need to push work.
+ let need_push = if inc {
+ state.count += count;
+ // TODO: This method shouldn't be used for zero-to-one increments.
+ !is_dead && !state.has_count
+ } else {
+ if state.count < count {
+ pr_err!("Failure: refcount underflow!");
+ return None;
+ }
+ state.count -= count;
+ !is_dead && state.count == 0 && state.has_count
+ };
+
+ if need_push && inner.delivery_state.should_normal_push() {
+ let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap();
+ inner.delivery_state.did_normal_push();
+ Some(list_arc)
+ } else {
+ None
+ }
+ }
+
+ pub(crate) fn incr_refcount_allow_zero2one(
+ self: &DArc<Self>,
+ strong: bool,
+ owner_inner: &mut ProcessInner,
+ ) -> Result<Option<DLArc<Node>>, CouldNotDeliverCriticalIncrement> {
+ let is_dead = owner_inner.is_dead;
+ let inner = self.inner.access_mut(owner_inner);
+
+ // Get a reference to the state we'll update.
+ let state = if strong {
+ &mut inner.strong
+ } else {
+ &mut inner.weak
+ };
+
+ // Update the count and determine whether we need to push work.
+ state.count += 1;
+ if is_dead || state.has_count {
+ return Ok(None);
+ }
+
+ // Userspace needs to be notified of this.
+ if !strong && inner.delivery_state.should_push_weak_zero2one() {
+ assert!(inner.delivery_state.can_push_weak_zero2one_normally());
+ let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap();
+ inner.delivery_state.did_push_weak_zero2one();
+ Ok(Some(list_arc))
+ } else if strong && inner.delivery_state.should_push_strong_zero2one() {
+ if inner.delivery_state.can_push_strong_zero2one_normally() {
+ let list_arc = ListArc::try_from_arc(self.clone()).ok().unwrap();
+ inner.delivery_state.did_push_strong_zero2one();
+ Ok(Some(list_arc))
+ } else {
+ state.count -= 1;
+ Err(CouldNotDeliverCriticalIncrement)
+ }
+ } else {
+ // Work is already pushed, and we don't need to push again.
+ Ok(None)
+ }
+ }
+
+ pub(crate) fn incr_refcount_allow_zero2one_with_wrapper(
+ self: &DArc<Self>,
+ strong: bool,
+ wrapper: CritIncrWrapper,
+ owner_inner: &mut ProcessInner,
+ ) -> Option<DLArc<dyn DeliverToRead>> {
+ match self.incr_refcount_allow_zero2one(strong, owner_inner) {
+ Ok(Some(node)) => Some(node as _),
+ Ok(None) => None,
+ Err(CouldNotDeliverCriticalIncrement) => {
+ assert!(strong);
+ let inner = self.inner.access_mut(owner_inner);
+ inner.strong.count += 1;
+ inner.delivery_state.did_push_strong_zero2one_wrapper();
+ Some(wrapper.init(self.clone()))
+ }
+ }
+ }
+
+ pub(crate) fn update_refcount(self: &DArc<Self>, inc: bool, count: usize, strong: bool) {
+ self.owner
+ .inner
+ .lock()
+ .update_node_refcount(self, inc, strong, count, None);
+ }
+
+ pub(crate) fn populate_counts(
+ &self,
+ out: &mut BinderNodeInfoForRef,
+ guard: &Guard<'_, ProcessInner, SpinLockBackend>,
+ ) {
+ let inner = self.inner.access(guard);
+ out.strong_count = inner.strong.count as _;
+ out.weak_count = inner.weak.count as _;
+ }
+
+ pub(crate) fn populate_debug_info(
+ &self,
+ out: &mut BinderNodeDebugInfo,
+ guard: &Guard<'_, ProcessInner, SpinLockBackend>,
+ ) {
+ out.ptr = self.ptr as _;
+ out.cookie = self.cookie as _;
+ let inner = self.inner.access(guard);
+ if inner.strong.has_count {
+ out.has_strong_ref = 1;
+ }
+ if inner.weak.has_count {
+ out.has_weak_ref = 1;
+ }
+ }
+
+ pub(crate) fn force_has_count(&self, guard: &mut Guard<'_, ProcessInner, SpinLockBackend>) {
+ let inner = self.inner.access_mut(guard);
+ inner.strong.has_count = true;
+ inner.weak.has_count = true;
+ }
+
+ fn write(&self, writer: &mut BinderReturnWriter<'_>, code: u32) -> Result {
+ writer.write_code(code)?;
+ writer.write_payload(&self.ptr)?;
+ writer.write_payload(&self.cookie)?;
+ Ok(())
+ }
+
+ pub(crate) fn submit_oneway(
+ &self,
+ transaction: DLArc<Transaction>,
+ guard: &mut Guard<'_, ProcessInner, SpinLockBackend>,
+ ) -> Result<(), (BinderError, DLArc<dyn DeliverToRead>)> {
+ if guard.is_dead {
+ return Err((BinderError::new_dead(), transaction));
+ }
+
+ let inner = self.inner.access_mut(guard);
+ if inner.has_oneway_transaction {
+ inner.oneway_todo.push_back(transaction);
+ } else {
+ inner.has_oneway_transaction = true;
+ guard.push_work(transaction)?;
+ }
+ Ok(())
+ }
+
+ pub(crate) fn release(&self) {
+ let mut guard = self.owner.inner.lock();
+ while let Some(work) = self.inner.access_mut(&mut guard).oneway_todo.pop_front() {
+ drop(guard);
+ work.into_arc().cancel();
+ guard = self.owner.inner.lock();
+ }
+
+ while let Some(death) = self.inner.access_mut(&mut guard).death_list.pop_front() {
+ drop(guard);
+ death.into_arc().set_dead();
+ guard = self.owner.inner.lock();
+ }
+ }
+
+ pub(crate) fn pending_oneway_finished(&self) {
+ let mut guard = self.owner.inner.lock();
+ if guard.is_dead {
+ // Cleanup will happen in `Process::deferred_release`.
+ return;
+ }
+
+ let inner = self.inner.access_mut(&mut guard);
+
+ let transaction = inner.oneway_todo.pop_front();
+ inner.has_oneway_transaction = transaction.is_some();
+ if let Some(transaction) = transaction {
+ match guard.push_work(transaction) {
+ Ok(()) => {}
+ Err((_err, work)) => {
+ // Process is dead.
+ // This shouldn't happen due to the `is_dead` check, but if it does, just drop
+ // the transaction and return.
+ drop(guard);
+ drop(work);
+ }
+ }
+ }
+ }
+
+ /// Finds an outdated transaction that the given transaction can replace.
+ ///
+ /// If one is found, it is removed from the list and returned.
+ pub(crate) fn take_outdated_transaction(
+ &self,
+ new: &Transaction,
+ guard: &mut Guard<'_, ProcessInner, SpinLockBackend>,
+ ) -> Option<DLArc<Transaction>> {
+ let inner = self.inner.access_mut(guard);
+ let mut cursor = inner.oneway_todo.cursor_front();
+ while let Some(next) = cursor.peek_next() {
+ if new.can_replace(&next) {
+ return Some(next.remove());
+ }
+ cursor.move_next();
+ }
+ None
+ }
+
+ /// This is split into a separate function since it's called by both `Node::do_work` and
+ /// `NodeWrapper::do_work`.
+ fn do_work_locked(
+ &self,
+ writer: &mut BinderReturnWriter<'_>,
+ mut guard: Guard<'_, ProcessInner, SpinLockBackend>,
+ ) -> Result<bool> {
+ let inner = self.inner.access_mut(&mut guard);
+ let strong = inner.strong.count > 0;
+ let has_strong = inner.strong.has_count;
+ let weak = strong || inner.weak.count > 0;
+ let has_weak = inner.weak.has_count;
+
+ if weak && !has_weak {
+ inner.weak.has_count = true;
+ inner.active_inc_refs += 1;
+ }
+
+ if strong && !has_strong {
+ inner.strong.has_count = true;
+ inner.active_inc_refs += 1;
+ }
+
+ let no_active_inc_refs = inner.active_inc_refs == 0;
+ let should_drop_weak = no_active_inc_refs && (!weak && has_weak);
+ let should_drop_strong = no_active_inc_refs && (!strong && has_strong);
+ if should_drop_weak {
+ inner.weak.has_count = false;
+ }
+ if should_drop_strong {
+ inner.strong.has_count = false;
+ }
+ if no_active_inc_refs && !weak {
+ // Remove the node if there are no references to it.
+ guard.remove_node(self.ptr);
+ }
+ drop(guard);
+
+ if weak && !has_weak {
+ self.write(writer, BR_INCREFS)?;
+ }
+ if strong && !has_strong {
+ self.write(writer, BR_ACQUIRE)?;
+ }
+ if should_drop_strong {
+ self.write(writer, BR_RELEASE)?;
+ }
+ if should_drop_weak {
+ self.write(writer, BR_DECREFS)?;
+ }
+
+ Ok(true)
+ }
+
+ pub(crate) fn add_freeze_listener(
+ &self,
+ process: &Arc<Process>,
+ flags: kernel::alloc::Flags,
+ ) -> Result {
+ let mut vec_alloc = KVVec::<Arc<Process>>::new();
+ loop {
+ let mut guard = self.owner.inner.lock();
+ // Do not check for `guard.dead`. The `dead` flag that matters here is the owner of the
+ // listener, no the target.
+ let inner = self.inner.access_mut(&mut guard);
+ let len = inner.freeze_list.len();
+ if len >= inner.freeze_list.capacity() {
+ if len >= vec_alloc.capacity() {
+ drop(guard);
+ vec_alloc = KVVec::with_capacity((1 + len).next_power_of_two(), flags)?;
+ continue;
+ }
+ mem::swap(&mut inner.freeze_list, &mut vec_alloc);
+ for elem in vec_alloc.drain_all() {
+ inner.freeze_list.push_within_capacity(elem)?;
+ }
+ }
+ inner.freeze_list.push_within_capacity(process.clone())?;
+ return Ok(());
+ }
+ }
+
+ pub(crate) fn remove_freeze_listener(&self, p: &Arc<Process>) {
+ let _unused_capacity;
+ let mut guard = self.owner.inner.lock();
+ let inner = self.inner.access_mut(&mut guard);
+ let len = inner.freeze_list.len();
+ inner.freeze_list.retain(|proc| !Arc::ptr_eq(proc, p));
+ if len == inner.freeze_list.len() {
+ pr_warn!(
+ "Could not remove freeze listener for {}\n",
+ p.pid_in_current_ns()
+ );
+ }
+ if inner.freeze_list.is_empty() {
+ _unused_capacity = mem::take(&mut inner.freeze_list);
+ }
+ }
+
+ pub(crate) fn freeze_list<'a>(&'a self, guard: &'a ProcessInner) -> &'a [Arc<Process>] {
+ &self.inner.access(guard).freeze_list
+ }
+}
+
+impl DeliverToRead for Node {
+ fn do_work(
+ self: DArc<Self>,
+ _thread: &Thread,
+ writer: &mut BinderReturnWriter<'_>,
+ ) -> Result<bool> {
+ let mut owner_inner = self.owner.inner.lock();
+ let inner = self.inner.access_mut(&mut owner_inner);
+
+ assert!(inner.delivery_state.has_pushed_node);
+ if inner.delivery_state.has_pushed_wrapper {
+ // If the wrapper is scheduled, then we are either a normal push or weak zero2one
+ // increment, and the wrapper is a strong zero2one increment, so the wrapper always
+ // takes precedence over us.
+ assert!(inner.delivery_state.has_strong_zero2one);
+ inner.delivery_state.has_pushed_node = false;
+ inner.delivery_state.has_weak_zero2one = false;
+ return Ok(true);
+ }
+
+ inner.delivery_state.has_pushed_node = false;
+ inner.delivery_state.has_weak_zero2one = false;
+ inner.delivery_state.has_strong_zero2one = false;
+
+ self.do_work_locked(writer, owner_inner)
+ }
+
+ fn cancel(self: DArc<Self>) {}
+
+ fn should_sync_wakeup(&self) -> bool {
+ false
+ }
+
+ #[inline(never)]
+ fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> {
+ seq_print!(
+ m,
+ "{}node work {}: u{:016x} c{:016x}\n",
+ prefix,
+ self.debug_id,
+ self.ptr,
+ self.cookie,
+ );
+ Ok(())
+ }
+}
+
+/// Represents something that holds one or more ref-counts to a `Node`.
+///
+/// Whenever process A holds a refcount to a node owned by a different process B, then process A
+/// will store a `NodeRef` that refers to the `Node` in process B. When process A releases the
+/// refcount, we destroy the NodeRef, which decrements the ref-count in process A.
+///
+/// This type is also used for some other cases. For example, a transaction allocation holds a
+/// refcount on the target node, and this is implemented by storing a `NodeRef` in the allocation
+/// so that the destructor of the allocation will drop a refcount of the `Node`.
+pub(crate) struct NodeRef {
+ pub(crate) node: DArc<Node>,
+ /// How many times does this NodeRef hold a refcount on the Node?
+ strong_node_count: usize,
+ weak_node_count: usize,
+ /// How many times does userspace hold a refcount on this NodeRef?
+ strong_count: usize,
+ weak_count: usize,
+}
+
+impl NodeRef {
+ pub(crate) fn new(node: DArc<Node>, strong_count: usize, weak_count: usize) -> Self {
+ Self {
+ node,
+ strong_node_count: strong_count,
+ weak_node_count: weak_count,
+ strong_count,
+ weak_count,
+ }
+ }
+
+ pub(crate) fn absorb(&mut self, mut other: Self) {
+ assert!(
+ Arc::ptr_eq(&self.node, &other.node),
+ "absorb called with differing nodes"
+ );
+ self.strong_node_count += other.strong_node_count;
+ self.weak_node_count += other.weak_node_count;
+ self.strong_count += other.strong_count;
+ self.weak_count += other.weak_count;
+ other.strong_count = 0;
+ other.weak_count = 0;
+ other.strong_node_count = 0;
+ other.weak_node_count = 0;
+
+ if self.strong_node_count >= 2 || self.weak_node_count >= 2 {
+ let mut guard = self.node.owner.inner.lock();
+ let inner = self.node.inner.access_mut(&mut guard);
+
+ if self.strong_node_count >= 2 {
+ inner.strong.count -= self.strong_node_count - 1;
+ self.strong_node_count = 1;
+ assert_ne!(inner.strong.count, 0);
+ }
+ if self.weak_node_count >= 2 {
+ inner.weak.count -= self.weak_node_count - 1;
+ self.weak_node_count = 1;
+ assert_ne!(inner.weak.count, 0);
+ }
+ }
+ }
+
+ pub(crate) fn get_count(&self) -> (usize, usize) {
+ (self.strong_count, self.weak_count)
+ }
+
+ pub(crate) fn clone(&self, strong: bool) -> Result<NodeRef> {
+ if strong && self.strong_count == 0 {
+ return Err(EINVAL);
+ }
+ Ok(self
+ .node
+ .owner
+ .inner
+ .lock()
+ .new_node_ref(self.node.clone(), strong, None))
+ }
+
+ /// Updates (increments or decrements) the number of references held against the node. If the
+ /// count being updated transitions from 0 to 1 or from 1 to 0, the node is notified by having
+ /// its `update_refcount` function called.
+ ///
+ /// Returns whether `self` should be removed (when both counts are zero).
+ pub(crate) fn update(&mut self, inc: bool, strong: bool) -> bool {
+ if strong && self.strong_count == 0 {
+ return false;
+ }
+ let (count, node_count, other_count) = if strong {
+ (
+ &mut self.strong_count,
+ &mut self.strong_node_count,
+ self.weak_count,
+ )
+ } else {
+ (
+ &mut self.weak_count,
+ &mut self.weak_node_count,
+ self.strong_count,
+ )
+ };
+ if inc {
+ if *count == 0 {
+ *node_count = 1;
+ self.node.update_refcount(true, 1, strong);
+ }
+ *count += 1;
+ } else {
+ if *count == 0 {
+ pr_warn!(
+ "pid {} performed invalid decrement on ref\n",
+ kernel::current!().pid()
+ );
+ return false;
+ }
+ *count -= 1;
+ if *count == 0 {
+ self.node.update_refcount(false, *node_count, strong);
+ *node_count = 0;
+ return other_count == 0;
+ }
+ }
+ false
+ }
+}
+
+impl Drop for NodeRef {
+ // This destructor is called conditionally from `Allocation::drop`. That branch is often
+ // mispredicted. Inlining this method call reduces the cost of those branch mispredictions.
+ #[inline(always)]
+ fn drop(&mut self) {
+ if self.strong_node_count > 0 {
+ self.node
+ .update_refcount(false, self.strong_node_count, true);
+ }
+ if self.weak_node_count > 0 {
+ self.node
+ .update_refcount(false, self.weak_node_count, false);
+ }
+ }
+}
+
+struct NodeDeathInner {
+ dead: bool,
+ cleared: bool,
+ notification_done: bool,
+ /// Indicates whether the normal flow was interrupted by removing the handle. In this case, we
+ /// need behave as if the death notification didn't exist (i.e., we don't deliver anything to
+ /// the user.
+ aborted: bool,
+}
+
+/// Used to deliver notifications when a process dies.
+///
+/// A process can request to be notified when a process dies using `BC_REQUEST_DEATH_NOTIFICATION`.
+/// This will make the driver send a `BR_DEAD_BINDER` to userspace when the process dies (or
+/// immediately if it is already dead). Userspace is supposed to respond with `BC_DEAD_BINDER_DONE`
+/// once it has processed the notification.
+///
+/// Userspace can unregister from death notifications using the `BC_CLEAR_DEATH_NOTIFICATION`
+/// command. In this case, the kernel will respond with `BR_CLEAR_DEATH_NOTIFICATION_DONE` once the
+/// notification has been removed. Note that if the remote process dies before the kernel has
+/// responded with `BR_CLEAR_DEATH_NOTIFICATION_DONE`, then the kernel will still send a
+/// `BR_DEAD_BINDER`, which userspace must be able to process. In this case, the kernel will wait
+/// for the `BC_DEAD_BINDER_DONE` command before it sends `BR_CLEAR_DEATH_NOTIFICATION_DONE`.
+///
+/// Note that even if the kernel sends a `BR_DEAD_BINDER`, this does not remove the death
+/// notification. Userspace must still remove it manually using `BC_CLEAR_DEATH_NOTIFICATION`.
+///
+/// If a process uses `BC_RELEASE` to destroy its last refcount on a node that has an active death
+/// registration, then the death registration is immediately deleted (we implement this using the
+/// `aborted` field). However, userspace is not supposed to delete a `NodeRef` without first
+/// deregistering death notifications, so this codepath is not executed under normal circumstances.
+#[pin_data]
+pub(crate) struct NodeDeath {
+ node: DArc<Node>,
+ process: Arc<Process>,
+ pub(crate) cookie: u64,
+ #[pin]
+ links_track: AtomicTracker<0>,
+ /// Used by the owner `Node` to store a list of registered death notifications.
+ ///
+ /// # Invariants
+ ///
+ /// Only ever used with the `death_list` list of `self.node`.
+ #[pin]
+ death_links: ListLinks<1>,
+ /// Used by the process to keep track of the death notifications for which we have sent a
+ /// `BR_DEAD_BINDER` but not yet received a `BC_DEAD_BINDER_DONE`.
+ ///
+ /// # Invariants
+ ///
+ /// Only ever used with the `delivered_deaths` list of `self.process`.
+ #[pin]
+ delivered_links: ListLinks<2>,
+ #[pin]
+ delivered_links_track: AtomicTracker<2>,
+ #[pin]
+ inner: SpinLock<NodeDeathInner>,
+}
+
+impl NodeDeath {
+ /// Constructs a new node death notification object.
+ pub(crate) fn new(
+ node: DArc<Node>,
+ process: Arc<Process>,
+ cookie: u64,
+ ) -> impl PinInit<DTRWrap<Self>> {
+ DTRWrap::new(pin_init!(
+ Self {
+ node,
+ process,
+ cookie,
+ links_track <- AtomicTracker::new(),
+ death_links <- ListLinks::new(),
+ delivered_links <- ListLinks::new(),
+ delivered_links_track <- AtomicTracker::new(),
+ inner <- kernel::new_spinlock!(NodeDeathInner {
+ dead: false,
+ cleared: false,
+ notification_done: false,
+ aborted: false,
+ }, "NodeDeath::inner"),
+ }
+ ))
+ }
+
+ /// Sets the cleared flag to `true`.
+ ///
+ /// It removes `self` from the node's death notification list if needed.
+ ///
+ /// Returns whether it needs to be queued.
+ pub(crate) fn set_cleared(self: &DArc<Self>, abort: bool) -> bool {
+ let (needs_removal, needs_queueing) = {
+ // Update state and determine if we need to queue a work item. We only need to do it
+ // when the node is not dead or if the user already completed the death notification.
+ let mut inner = self.inner.lock();
+ if abort {
+ inner.aborted = true;
+ }
+ if inner.cleared {
+ // Already cleared.
+ return false;
+ }
+ inner.cleared = true;
+ (!inner.dead, !inner.dead || inner.notification_done)
+ };
+
+ // Remove death notification from node.
+ if needs_removal {
+ let mut owner_inner = self.node.owner.inner.lock();
+ let node_inner = self.node.inner.access_mut(&mut owner_inner);
+ // SAFETY: A `NodeDeath` is never inserted into the death list of any node other than
+ // its owner, so it is either in this death list or in no death list.
+ unsafe { node_inner.death_list.remove(self) };
+ }
+ needs_queueing
+ }
+
+ /// Sets the 'notification done' flag to `true`.
+ pub(crate) fn set_notification_done(self: DArc<Self>, thread: &Thread) {
+ let needs_queueing = {
+ let mut inner = self.inner.lock();
+ inner.notification_done = true;
+ inner.cleared
+ };
+ if needs_queueing {
+ if let Some(death) = ListArc::try_from_arc_or_drop(self) {
+ let _ = thread.push_work_if_looper(death);
+ }
+ }
+ }
+
+ /// Sets the 'dead' flag to `true` and queues work item if needed.
+ pub(crate) fn set_dead(self: DArc<Self>) {
+ let needs_queueing = {
+ let mut inner = self.inner.lock();
+ if inner.cleared {
+ false
+ } else {
+ inner.dead = true;
+ true
+ }
+ };
+ if needs_queueing {
+ // Push the death notification to the target process. There is nothing else to do if
+ // it's already dead.
+ if let Some(death) = ListArc::try_from_arc_or_drop(self) {
+ let process = death.process.clone();
+ let _ = process.push_work(death);
+ }
+ }
+ }
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for NodeDeath {
+ tracked_by links_track: AtomicTracker;
+ }
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<1> for DTRWrap<NodeDeath> { untracked; }
+}
+kernel::list::impl_list_item! {
+ impl ListItem<1> for DTRWrap<NodeDeath> {
+ using ListLinks { self.wrapped.death_links };
+ }
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<2> for DTRWrap<NodeDeath> {
+ tracked_by wrapped: NodeDeath;
+ }
+}
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<2> for NodeDeath {
+ tracked_by delivered_links_track: AtomicTracker<2>;
+ }
+}
+kernel::list::impl_list_item! {
+ impl ListItem<2> for DTRWrap<NodeDeath> {
+ using ListLinks { self.wrapped.delivered_links };
+ }
+}
+
+impl DeliverToRead for NodeDeath {
+ fn do_work(
+ self: DArc<Self>,
+ _thread: &Thread,
+ writer: &mut BinderReturnWriter<'_>,
+ ) -> Result<bool> {
+ let done = {
+ let inner = self.inner.lock();
+ if inner.aborted {
+ return Ok(true);
+ }
+ inner.cleared && (!inner.dead || inner.notification_done)
+ };
+
+ let cookie = self.cookie;
+ let cmd = if done {
+ BR_CLEAR_DEATH_NOTIFICATION_DONE
+ } else {
+ let process = self.process.clone();
+ let mut process_inner = process.inner.lock();
+ let inner = self.inner.lock();
+ if inner.aborted {
+ return Ok(true);
+ }
+ // We're still holding the inner lock, so it cannot be aborted while we insert it into
+ // the delivered list.
+ process_inner.death_delivered(self.clone());
+ BR_DEAD_BINDER
+ };
+
+ writer.write_code(cmd)?;
+ writer.write_payload(&cookie)?;
+ // DEAD_BINDER notifications can cause transactions, so stop processing work items when we
+ // get to a death notification.
+ Ok(cmd != BR_DEAD_BINDER)
+ }
+
+ fn cancel(self: DArc<Self>) {}
+
+ fn should_sync_wakeup(&self) -> bool {
+ false
+ }
+
+ #[inline(never)]
+ fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> {
+ let inner = self.inner.lock();
+
+ let dead_binder = inner.dead && !inner.notification_done;
+
+ if dead_binder {
+ if inner.cleared {
+ seq_print!(m, "{}has cleared dead binder\n", prefix);
+ } else {
+ seq_print!(m, "{}has dead binder\n", prefix);
+ }
+ } else {
+ seq_print!(m, "{}has cleared death notification\n", prefix);
+ }
+
+ Ok(())
+ }
+}
diff --git a/drivers/android/binder/node/wrapper.rs b/drivers/android/binder/node/wrapper.rs
new file mode 100644
index 000000000000..43294c050502
--- /dev/null
+++ b/drivers/android/binder/node/wrapper.rs
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use kernel::{list::ListArc, prelude::*, seq_file::SeqFile, seq_print, sync::UniqueArc};
+
+use crate::{node::Node, thread::Thread, BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead};
+
+use core::mem::MaybeUninit;
+
+pub(crate) struct CritIncrWrapper {
+ inner: UniqueArc<MaybeUninit<DTRWrap<NodeWrapper>>>,
+}
+
+impl CritIncrWrapper {
+ pub(crate) fn new() -> Result<Self> {
+ Ok(CritIncrWrapper {
+ inner: UniqueArc::new_uninit(GFP_KERNEL)?,
+ })
+ }
+
+ pub(super) fn init(self, node: DArc<Node>) -> DLArc<dyn DeliverToRead> {
+ match self.inner.pin_init_with(DTRWrap::new(NodeWrapper { node })) {
+ Ok(initialized) => ListArc::from(initialized) as _,
+ Err(err) => match err {},
+ }
+ }
+}
+
+struct NodeWrapper {
+ node: DArc<Node>,
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for NodeWrapper {
+ untracked;
+ }
+}
+
+impl DeliverToRead for NodeWrapper {
+ fn do_work(
+ self: DArc<Self>,
+ _thread: &Thread,
+ writer: &mut BinderReturnWriter<'_>,
+ ) -> Result<bool> {
+ let node = &self.node;
+ let mut owner_inner = node.owner.inner.lock();
+ let inner = node.inner.access_mut(&mut owner_inner);
+
+ let ds = &mut inner.delivery_state;
+
+ assert!(ds.has_pushed_wrapper);
+ assert!(ds.has_strong_zero2one);
+ ds.has_pushed_wrapper = false;
+ ds.has_strong_zero2one = false;
+
+ node.do_work_locked(writer, owner_inner)
+ }
+
+ fn cancel(self: DArc<Self>) {}
+
+ fn should_sync_wakeup(&self) -> bool {
+ false
+ }
+
+ #[inline(never)]
+ fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> {
+ seq_print!(
+ m,
+ "{}node work {}: u{:016x} c{:016x}\n",
+ prefix,
+ self.node.debug_id,
+ self.node.ptr,
+ self.node.cookie,
+ );
+ Ok(())
+ }
+}
diff --git a/drivers/android/binder/page_range.rs b/drivers/android/binder/page_range.rs
new file mode 100644
index 000000000000..9379038f61f5
--- /dev/null
+++ b/drivers/android/binder/page_range.rs
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! This module has utilities for managing a page range where unused pages may be reclaimed by a
+//! vma shrinker.
+
+// To avoid deadlocks, locks are taken in the order:
+//
+// 1. mmap lock
+// 2. spinlock
+// 3. lru spinlock
+//
+// The shrinker will use trylock methods because it locks them in a different order.
+
+use core::{
+ marker::PhantomPinned,
+ mem::{size_of, size_of_val, MaybeUninit},
+ ptr,
+};
+
+use kernel::{
+ bindings,
+ error::Result,
+ ffi::{c_ulong, c_void},
+ mm::{virt, Mm, MmWithUser},
+ new_mutex, new_spinlock,
+ page::{Page, PAGE_SHIFT, PAGE_SIZE},
+ prelude::*,
+ str::CStr,
+ sync::{aref::ARef, Mutex, SpinLock},
+ task::Pid,
+ transmute::FromBytes,
+ types::Opaque,
+ uaccess::UserSliceReader,
+};
+
+/// Represents a shrinker that can be registered with the kernel.
+///
+/// Each shrinker can be used by many `ShrinkablePageRange` objects.
+#[repr(C)]
+pub(crate) struct Shrinker {
+ inner: Opaque<*mut bindings::shrinker>,
+ list_lru: Opaque<bindings::list_lru>,
+}
+
+// SAFETY: The shrinker and list_lru are thread safe.
+unsafe impl Send for Shrinker {}
+// SAFETY: The shrinker and list_lru are thread safe.
+unsafe impl Sync for Shrinker {}
+
+impl Shrinker {
+ /// Create a new shrinker.
+ ///
+ /// # Safety
+ ///
+ /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have
+ /// been called exactly once, and it must not have returned an error.
+ pub(crate) const unsafe fn new() -> Self {
+ Self {
+ inner: Opaque::uninit(),
+ list_lru: Opaque::uninit(),
+ }
+ }
+
+ /// Register this shrinker with the kernel.
+ pub(crate) fn register(&'static self, name: &CStr) -> Result<()> {
+ // SAFETY: These fields are not yet used, so it's okay to zero them.
+ unsafe {
+ self.inner.get().write(ptr::null_mut());
+ self.list_lru.get().write_bytes(0, 1);
+ }
+
+ // SAFETY: The field is not yet used, so we can initialize it.
+ let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) };
+ if ret != 0 {
+ return Err(Error::from_errno(ret));
+ }
+
+ // SAFETY: The `name` points at a valid c string.
+ let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) };
+ if shrinker.is_null() {
+ // SAFETY: We initialized it, so its okay to destroy it.
+ unsafe { bindings::list_lru_destroy(self.list_lru.get()) };
+ return Err(Error::from_errno(ret));
+ }
+
+ // SAFETY: We're about to register the shrinker, and these are the fields we need to
+ // initialize. (All other fields are already zeroed.)
+ unsafe {
+ (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count));
+ (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan));
+ (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast());
+ }
+
+ // SAFETY: The new shrinker has been fully initialized, so we can register it.
+ unsafe { bindings::shrinker_register(shrinker) };
+
+ // SAFETY: This initializes the pointer to the shrinker so that we can use it.
+ unsafe { self.inner.get().write(shrinker) };
+
+ Ok(())
+ }
+}
+
+/// A container that manages a page range in a vma.
+///
+/// The pages can be thought of as an array of booleans of whether the pages are usable. The
+/// methods `use_range` and `stop_using_range` set all booleans in a range to true or false
+/// respectively. Initially, no pages are allocated. When a page is not used, it is not freed
+/// immediately. Instead, it is made available to the memory shrinker to free it if the device is
+/// under memory pressure.
+///
+/// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no
+/// way to know whether an index ends up with true or false if a call to `use_range` races with
+/// another call to `stop_using_range` on a given index.
+///
+/// It's also okay for the two methods to race with themselves, e.g. if two threads call
+/// `use_range` on the same index, then that's fine and neither call will return until the page is
+/// allocated and mapped.
+///
+/// The methods that read or write to a range require that the page is marked as in use. So it is
+/// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or
+/// write to the page.
+#[pin_data(PinnedDrop)]
+pub(crate) struct ShrinkablePageRange {
+ /// Shrinker object registered with the kernel.
+ shrinker: &'static Shrinker,
+ /// Pid using this page range. Only used as debugging information.
+ pid: Pid,
+ /// The mm for the relevant process.
+ mm: ARef<Mm>,
+ /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`.
+ #[pin]
+ mm_lock: Mutex<()>,
+ /// Spinlock protecting changes to pages.
+ #[pin]
+ lock: SpinLock<Inner>,
+
+ /// Must not move, since page info has pointers back.
+ #[pin]
+ _pin: PhantomPinned,
+}
+
+struct Inner {
+ /// Array of pages.
+ ///
+ /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive
+ /// ownership. To deal with that, we manage it using raw pointers.
+ pages: *mut PageInfo,
+ /// Length of the `pages` array.
+ size: usize,
+ /// The address of the vma to insert the pages into.
+ vma_addr: usize,
+}
+
+// SAFETY: proper locking is in place for `Inner`
+unsafe impl Send for Inner {}
+
+type StableMmGuard =
+ kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>;
+
+/// An array element that describes the current state of a page.
+///
+/// There are three states:
+///
+/// * Free. The page is None. The `lru` element is not queued.
+/// * Available. The page is Some. The `lru` element is queued to the shrinker's lru.
+/// * Used. The page is Some. The `lru` element is not queued.
+///
+/// When an element is available, the shrinker is able to free the page.
+#[repr(C)]
+struct PageInfo {
+ lru: bindings::list_head,
+ page: Option<Page>,
+ range: *const ShrinkablePageRange,
+}
+
+impl PageInfo {
+ /// # Safety
+ ///
+ /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set.
+ unsafe fn set_page(me: *mut PageInfo, page: Page) {
+ // SAFETY: This pointer offset is in bounds.
+ let ptr = unsafe { &raw mut (*me).page };
+
+ // SAFETY: The pointer is valid for writing, so also valid for reading.
+ if unsafe { (*ptr).is_some() } {
+ pr_err!("set_page called when there is already a page");
+ // SAFETY: We will initialize the page again below.
+ unsafe { ptr::drop_in_place(ptr) };
+ }
+
+ // SAFETY: The pointer is valid for writing.
+ unsafe { ptr::write(ptr, Some(page)) };
+ }
+
+ /// # Safety
+ ///
+ /// The caller ensures that reading from `me.page` is ok for the duration of 'a.
+ unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> {
+ // SAFETY: This pointer offset is in bounds.
+ let ptr = unsafe { &raw const (*me).page };
+
+ // SAFETY: The pointer is valid for reading.
+ unsafe { (*ptr).as_ref() }
+ }
+
+ /// # Safety
+ ///
+ /// The caller ensures that writing to `me.page` is ok for the duration of 'a.
+ unsafe fn take_page(me: *mut PageInfo) -> Option<Page> {
+ // SAFETY: This pointer offset is in bounds.
+ let ptr = unsafe { &raw mut (*me).page };
+
+ // SAFETY: The pointer is valid for reading.
+ unsafe { (*ptr).take() }
+ }
+
+ /// Add this page to the lru list, if not already in the list.
+ ///
+ /// # Safety
+ ///
+ /// The pointer must be valid, and it must be the right shrinker and nid.
+ unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
+ // SAFETY: This pointer offset is in bounds.
+ let lru_ptr = unsafe { &raw mut (*me).lru };
+ // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
+ unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
+ }
+
+ /// Remove this page from the lru list, if it is in the list.
+ ///
+ /// # Safety
+ ///
+ /// The pointer must be valid, and it must be the right shrinker and nid.
+ unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
+ // SAFETY: This pointer offset is in bounds.
+ let lru_ptr = unsafe { &raw mut (*me).lru };
+ // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
+ unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
+ }
+}
+
+impl ShrinkablePageRange {
+ /// Create a new `ShrinkablePageRange` using the given shrinker.
+ pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ shrinker,
+ pid: kernel::current!().pid(),
+ mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?),
+ mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"),
+ lock <- new_spinlock!(Inner {
+ pages: ptr::null_mut(),
+ size: 0,
+ vma_addr: 0,
+ }, "ShrinkablePageRange"),
+ _pin: PhantomPinned,
+ })
+ }
+
+ pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> {
+ // SAFETY: This extends the duration of the reference. Since this call happens before
+ // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block
+ // until the returned guard is dropped. This ensures that the guard is valid until dropped.
+ let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) };
+
+ mm_lock.try_lock()
+ }
+
+ /// Register a vma with this page range. Returns the size of the region.
+ pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> {
+ let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize);
+ let num_pages = num_bytes >> PAGE_SHIFT;
+
+ if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) {
+ pr_debug!("Failed to register with vma: invalid vma->vm_mm");
+ return Err(EINVAL);
+ }
+ if num_pages == 0 {
+ pr_debug!("Failed to register with vma: size zero");
+ return Err(EINVAL);
+ }
+
+ let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?;
+
+ // SAFETY: This just initializes the pages array.
+ unsafe {
+ let self_ptr = self as *const ShrinkablePageRange;
+ for i in 0..num_pages {
+ let info = pages.as_mut_ptr().add(i);
+ (&raw mut (*info).range).write(self_ptr);
+ (&raw mut (*info).page).write(None);
+ let lru = &raw mut (*info).lru;
+ (&raw mut (*lru).next).write(lru);
+ (&raw mut (*lru).prev).write(lru);
+ }
+ }
+
+ let mut inner = self.lock.lock();
+ if inner.size > 0 {
+ pr_debug!("Failed to register with vma: already registered");
+ drop(inner);
+ return Err(EBUSY);
+ }
+
+ inner.pages = pages.into_raw_parts().0;
+ inner.size = num_pages;
+ inner.vma_addr = vma.start();
+
+ Ok(num_pages)
+ }
+
+ /// Make sure that the given pages are allocated and mapped.
+ ///
+ /// Must not be called from an atomic context.
+ pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> {
+ if start >= end {
+ return Ok(());
+ }
+ let mut inner = self.lock.lock();
+ assert!(end <= inner.size);
+
+ for i in start..end {
+ // SAFETY: This pointer offset is in bounds.
+ let page_info = unsafe { inner.pages.add(i) };
+
+ // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
+ if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
+ // Since we're going to use the page, we should remove it from the lru list so that
+ // the shrinker will not free it.
+ //
+ // SAFETY: The pointer is valid, and this is the right shrinker.
+ //
+ // The shrinker can't free the page between the check and this call to
+ // `list_lru_del` because we hold the lock.
+ unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
+ } else {
+ // We have to allocate a new page. Use the slow path.
+ drop(inner);
+ // SAFETY: `i < end <= inner.size` so `i` is in bounds.
+ match unsafe { self.use_page_slow(i) } {
+ Ok(()) => {}
+ Err(err) => {
+ pr_warn!("Error in use_page_slow: {:?}", err);
+ return Err(err);
+ }
+ }
+ inner = self.lock.lock();
+ }
+ }
+ Ok(())
+ }
+
+ /// Mark the given page as in use, slow path.
+ ///
+ /// Must not be called from an atomic context.
+ ///
+ /// # Safety
+ ///
+ /// Assumes that `i` is in bounds.
+ #[cold]
+ unsafe fn use_page_slow(&self, i: usize) -> Result<()> {
+ let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?;
+
+ let mm_mutex = self.mm_lock.lock();
+ let inner = self.lock.lock();
+
+ // SAFETY: This pointer offset is in bounds.
+ let page_info = unsafe { inner.pages.add(i) };
+
+ // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
+ if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
+ // The page was already there, or someone else added the page while we didn't hold the
+ // spinlock.
+ //
+ // SAFETY: The pointer is valid, and this is the right shrinker.
+ //
+ // The shrinker can't free the page between the check and this call to
+ // `list_lru_del` because we hold the lock.
+ unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
+ return Ok(());
+ }
+
+ let vma_addr = inner.vma_addr;
+ // Release the spinlock while we insert the page into the vma.
+ drop(inner);
+
+ // No overflow since we stay in bounds of the vma.
+ let user_page_addr = vma_addr + (i << PAGE_SHIFT);
+
+ // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from
+ // a remote process. If the call to `mmput` races with the process shutting down, then the
+ // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't
+ // happen until it returns to userspace. However, the caller might instead go to sleep and
+ // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the
+ // middle of a shutdown process that won't complete until the `mm` is dropped. This can
+ // amount to a deadlock.
+ //
+ // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a
+ // workqueue.
+ MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?)
+ .mmap_read_lock()
+ .vma_lookup(vma_addr)
+ .ok_or(ESRCH)?
+ .as_mixedmap_vma()
+ .ok_or(ESRCH)?
+ .vm_insert_page(user_page_addr, &new_page)
+ .inspect_err(|err| {
+ pr_warn!(
+ "Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}",
+ user_page_addr,
+ vma_addr,
+ i,
+ err
+ )
+ })?;
+
+ let inner = self.lock.lock();
+
+ // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page
+ // can be written to since we hold the lock.
+ //
+ // We released and reacquired the spinlock since we checked that the page is null, but we
+ // always hold the mm_lock mutex when setting the page to a non-null value, so it's not
+ // possible for someone else to have changed it since our check.
+ unsafe { PageInfo::set_page(page_info, new_page) };
+
+ drop(inner);
+ drop(mm_mutex);
+
+ Ok(())
+ }
+
+ /// If the given page is in use, then mark it as available so that the shrinker can free it.
+ ///
+ /// May be called from an atomic context.
+ pub(crate) fn stop_using_range(&self, start: usize, end: usize) {
+ if start >= end {
+ return;
+ }
+ let inner = self.lock.lock();
+ assert!(end <= inner.size);
+
+ for i in (start..end).rev() {
+ // SAFETY: The pointer is in bounds.
+ let page_info = unsafe { inner.pages.add(i) };
+
+ // SAFETY: Okay for reading since we have the lock.
+ if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
+ // SAFETY: The pointer is valid, and it's the right shrinker.
+ unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) };
+ }
+ }
+ }
+
+ /// Helper for reading or writing to a range of bytes that may overlap with several pages.
+ ///
+ /// # Safety
+ ///
+ /// All pages touched by this operation must be in use for the duration of this call.
+ unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result
+ where
+ T: FnMut(&Page, usize, usize) -> Result,
+ {
+ if size == 0 {
+ return Ok(());
+ }
+
+ let (pages, num_pages) = {
+ let inner = self.lock.lock();
+ (inner.pages, inner.size)
+ };
+ let num_bytes = num_pages << PAGE_SHIFT;
+
+ // Check that the request is within the buffer.
+ if offset.checked_add(size).ok_or(EFAULT)? > num_bytes {
+ return Err(EFAULT);
+ }
+
+ let mut page_index = offset >> PAGE_SHIFT;
+ offset &= PAGE_SIZE - 1;
+ while size > 0 {
+ let available = usize::min(size, PAGE_SIZE - offset);
+ // SAFETY: The pointer is in bounds.
+ let page_info = unsafe { pages.add(page_index) };
+ // SAFETY: The caller guarantees that this page is in the "in use" state for the
+ // duration of this call to `iterate`, so nobody will change the page.
+ let page = unsafe { PageInfo::get_page(page_info) };
+ if page.is_none() {
+ pr_warn!("Page is null!");
+ }
+ let page = page.ok_or(EFAULT)?;
+ cb(page, offset, available)?;
+ size -= available;
+ page_index += 1;
+ offset = 0;
+ }
+ Ok(())
+ }
+
+ /// Copy from userspace into this page range.
+ ///
+ /// # Safety
+ ///
+ /// All pages touched by this operation must be in use for the duration of this call.
+ pub(crate) unsafe fn copy_from_user_slice(
+ &self,
+ reader: &mut UserSliceReader,
+ offset: usize,
+ size: usize,
+ ) -> Result {
+ // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`.
+ unsafe {
+ self.iterate(offset, size, |page, offset, to_copy| {
+ page.copy_from_user_slice_raw(reader, offset, to_copy)
+ })
+ }
+ }
+
+ /// Copy from this page range into kernel space.
+ ///
+ /// # Safety
+ ///
+ /// All pages touched by this operation must be in use for the duration of this call.
+ pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
+ let mut out = MaybeUninit::<T>::uninit();
+ let mut out_offset = 0;
+ // SAFETY: `self.iterate` has the same safety requirements as `read`.
+ unsafe {
+ self.iterate(offset, size_of::<T>(), |page, offset, to_copy| {
+ // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
+ let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset);
+ // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid.
+ page.read_raw(obj_ptr, offset, to_copy)?;
+ out_offset += to_copy;
+ Ok(())
+ })?;
+ }
+ // SAFETY: We just initialised the data.
+ Ok(unsafe { out.assume_init() })
+ }
+
+ /// Copy from kernel space into this page range.
+ ///
+ /// # Safety
+ ///
+ /// All pages touched by this operation must be in use for the duration of this call.
+ pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result {
+ let mut obj_offset = 0;
+ // SAFETY: `self.iterate` has the same safety requirements as `write`.
+ unsafe {
+ self.iterate(offset, size_of_val(obj), |page, offset, to_copy| {
+ // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
+ let obj_ptr = (obj as *const T as *const u8).add(obj_offset);
+ // SAFETY: We have a reference to the object, so the pointer is valid.
+ page.write_raw(obj_ptr, offset, to_copy)?;
+ obj_offset += to_copy;
+ Ok(())
+ })
+ }
+ }
+
+ /// Write zeroes to the given range.
+ ///
+ /// # Safety
+ ///
+ /// All pages touched by this operation must be in use for the duration of this call.
+ pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result {
+ // SAFETY: `self.iterate` has the same safety requirements as `copy_into`.
+ unsafe {
+ self.iterate(offset, size, |page, offset, len| {
+ page.fill_zero_raw(offset, len)
+ })
+ }
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for ShrinkablePageRange {
+ fn drop(self: Pin<&mut Self>) {
+ let (pages, size) = {
+ let lock = self.lock.lock();
+ (lock.pages, lock.size)
+ };
+
+ if size == 0 {
+ return;
+ }
+
+ // Note: This call is also necessary for the safety of `stable_trylock_mm`.
+ let mm_lock = self.mm_lock.lock();
+
+ // This is the destructor, so unlike the other methods, we only need to worry about races
+ // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the
+ // shrinker, and after this loop, the shrinker will not access any of our pages since we
+ // removed them from the lru list.
+ for i in 0..size {
+ // SAFETY: Loop is in-bounds of the size.
+ let p_ptr = unsafe { pages.add(i) };
+ // SAFETY: No other readers, so we can read.
+ if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } {
+ // SAFETY: The pointer is valid and it's the right shrinker.
+ unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) };
+ }
+ }
+
+ drop(mm_lock);
+
+ // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore,
+ // all `size` elements are initialized. Also, the array is no longer shared with the
+ // shrinker due to the above loop.
+ drop(unsafe { KVVec::from_raw_parts(pages, size, size) });
+ }
+}
+
+/// # Safety
+/// Called by the shrinker.
+#[no_mangle]
+unsafe extern "C" fn rust_shrink_count(
+ shrink: *mut bindings::shrinker,
+ _sc: *mut bindings::shrink_control,
+) -> c_ulong {
+ // SAFETY: We can access our own private data.
+ let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
+ // SAFETY: Accessing the lru list is okay. Just an FFI call.
+ unsafe { bindings::list_lru_count(list_lru) }
+}
+
+/// # Safety
+/// Called by the shrinker.
+#[no_mangle]
+unsafe extern "C" fn rust_shrink_scan(
+ shrink: *mut bindings::shrinker,
+ sc: *mut bindings::shrink_control,
+) -> c_ulong {
+ // SAFETY: We can access our own private data.
+ let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
+ // SAFETY: Caller guarantees that it is safe to read this field.
+ let nr_to_scan = unsafe { (*sc).nr_to_scan };
+ // SAFETY: Accessing the lru list is okay. Just an FFI call.
+ unsafe {
+ bindings::list_lru_walk(
+ list_lru,
+ Some(bindings::rust_shrink_free_page_wrap),
+ ptr::null_mut(),
+ nr_to_scan,
+ )
+ }
+}
+
+const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP;
+const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY;
+
+/// # Safety
+/// Called by the shrinker.
+#[no_mangle]
+unsafe extern "C" fn rust_shrink_free_page(
+ item: *mut bindings::list_head,
+ lru: *mut bindings::list_lru_one,
+ _cb_arg: *mut c_void,
+) -> bindings::lru_status {
+ // Fields that should survive after unlocking the lru lock.
+ let page;
+ let page_index;
+ let mm;
+ let mmap_read;
+ let mm_mutex;
+ let vma_addr;
+
+ {
+ // CAST: The `list_head` field is first in `PageInfo`.
+ let info = item as *mut PageInfo;
+ // SAFETY: The `range` field of `PageInfo` is immutable.
+ let range = unsafe { &*((*info).range) };
+
+ mm = match range.mm.mmget_not_zero() {
+ Some(mm) => MmWithUser::into_mmput_async(mm),
+ None => return LRU_SKIP,
+ };
+
+ mm_mutex = match range.stable_trylock_mm() {
+ Some(guard) => guard,
+ None => return LRU_SKIP,
+ };
+
+ mmap_read = match mm.mmap_read_trylock() {
+ Some(guard) => guard,
+ None => return LRU_SKIP,
+ };
+
+ // We can't lock it normally here, since we hold the lru lock.
+ let inner = match range.lock.try_lock() {
+ Some(inner) => inner,
+ None => return LRU_SKIP,
+ };
+
+ // SAFETY: The item is in this lru list, so it's okay to remove it.
+ unsafe { bindings::list_lru_isolate(lru, item) };
+
+ // SAFETY: Both pointers are in bounds of the same allocation.
+ page_index = unsafe { info.offset_from(inner.pages) } as usize;
+
+ // SAFETY: We hold the spinlock, so we can take the page.
+ //
+ // This sets the page pointer to zero before we unmap it from the vma. However, we call
+ // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to
+ // insert a new page until after our call to `zap_page_range`.
+ page = unsafe { PageInfo::take_page(info) };
+ vma_addr = inner.vma_addr;
+
+ // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because
+ // they can be freed at any point after we unlock `lru_lock`. This is with the exception of
+ // `mm_mutex` which is kept alive by holding the lock.
+ }
+
+ // SAFETY: The lru lock is locked when this method is called.
+ unsafe { bindings::spin_unlock(&raw mut (*lru).lock) };
+
+ if let Some(vma) = mmap_read.vma_lookup(vma_addr) {
+ let user_page_addr = vma_addr + (page_index << PAGE_SHIFT);
+ vma.zap_page_range_single(user_page_addr, PAGE_SIZE);
+ }
+
+ drop(mmap_read);
+ drop(mm_mutex);
+ drop(mm);
+ drop(page);
+
+ // SAFETY: We just unlocked the lru lock, but it should be locked when we return.
+ unsafe { bindings::spin_lock(&raw mut (*lru).lock) };
+
+ LRU_REMOVED_ENTRY
+}
diff --git a/drivers/android/binder/page_range_helper.c b/drivers/android/binder/page_range_helper.c
new file mode 100644
index 000000000000..496887723ee0
--- /dev/null
+++ b/drivers/android/binder/page_range_helper.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* C helper for page_range.rs to work around a CFI violation.
+ *
+ * Bindgen currently pretends that `enum lru_status` is the same as an integer.
+ * This assumption is fine ABI-wise, but once you add CFI to the mix, it
+ * triggers a CFI violation because `enum lru_status` gets a different CFI tag.
+ *
+ * This file contains a workaround until bindgen can be fixed.
+ *
+ * Copyright (C) 2025 Google LLC.
+ */
+#include "page_range_helper.h"
+
+unsigned int rust_shrink_free_page(struct list_head *item,
+ struct list_lru_one *list,
+ void *cb_arg);
+
+enum lru_status
+rust_shrink_free_page_wrap(struct list_head *item, struct list_lru_one *list,
+ void *cb_arg)
+{
+ return rust_shrink_free_page(item, list, cb_arg);
+}
diff --git a/drivers/android/binder/page_range_helper.h b/drivers/android/binder/page_range_helper.h
new file mode 100644
index 000000000000..18dd2dd117b2
--- /dev/null
+++ b/drivers/android/binder/page_range_helper.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025 Google, Inc.
+ */
+
+#ifndef _LINUX_PAGE_RANGE_HELPER_H
+#define _LINUX_PAGE_RANGE_HELPER_H
+
+#include <linux/list_lru.h>
+
+enum lru_status
+rust_shrink_free_page_wrap(struct list_head *item, struct list_lru_one *list,
+ void *cb_arg);
+
+#endif /* _LINUX_PAGE_RANGE_HELPER_H */
diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs
new file mode 100644
index 000000000000..132055b4790f
--- /dev/null
+++ b/drivers/android/binder/process.rs
@@ -0,0 +1,1745 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! This module defines the `Process` type, which represents a process using a particular binder
+//! context.
+//!
+//! The `Process` object keeps track of all of the resources that this process owns in the binder
+//! context.
+//!
+//! There is one `Process` object for each binder fd that a process has opened, so processes using
+//! several binder contexts have several `Process` objects. This ensures that the contexts are
+//! fully separated.
+
+use core::mem::take;
+
+use kernel::{
+ bindings,
+ cred::Credential,
+ error::Error,
+ fs::file::{self, File},
+ id_pool::IdPool,
+ list::{List, ListArc, ListArcField, ListLinks},
+ mm,
+ prelude::*,
+ rbtree::{self, RBTree, RBTreeNode, RBTreeNodeReservation},
+ seq_file::SeqFile,
+ seq_print,
+ sync::poll::PollTable,
+ sync::{
+ lock::{spinlock::SpinLockBackend, Guard},
+ Arc, ArcBorrow, CondVar, CondVarTimeoutResult, Mutex, SpinLock, UniqueArc,
+ },
+ task::Task,
+ types::ARef,
+ uaccess::{UserSlice, UserSliceReader},
+ uapi,
+ workqueue::{self, Work},
+};
+
+use crate::{
+ allocation::{Allocation, AllocationInfo, NewAllocation},
+ context::Context,
+ defs::*,
+ error::{BinderError, BinderResult},
+ node::{CouldNotDeliverCriticalIncrement, CritIncrWrapper, Node, NodeDeath, NodeRef},
+ page_range::ShrinkablePageRange,
+ range_alloc::{RangeAllocator, ReserveNew, ReserveNewArgs},
+ stats::BinderStats,
+ thread::{PushWorkRes, Thread},
+ BinderfsProcFile, DArc, DLArc, DTRWrap, DeliverToRead,
+};
+
+#[path = "freeze.rs"]
+mod freeze;
+use self::freeze::{FreezeCookie, FreezeListener};
+
+struct Mapping {
+ address: usize,
+ alloc: RangeAllocator<AllocationInfo>,
+}
+
+impl Mapping {
+ fn new(address: usize, size: usize) -> Self {
+ Self {
+ address,
+ alloc: RangeAllocator::new(size),
+ }
+ }
+}
+
+// bitflags for defer_work.
+const PROC_DEFER_FLUSH: u8 = 1;
+const PROC_DEFER_RELEASE: u8 = 2;
+
+#[derive(Copy, Clone)]
+pub(crate) enum IsFrozen {
+ Yes,
+ No,
+ InProgress,
+}
+
+impl IsFrozen {
+ /// Whether incoming transactions should be rejected due to freeze.
+ pub(crate) fn is_frozen(self) -> bool {
+ match self {
+ IsFrozen::Yes => true,
+ IsFrozen::No => false,
+ IsFrozen::InProgress => true,
+ }
+ }
+
+ /// Whether freeze notifications consider this process frozen.
+ pub(crate) fn is_fully_frozen(self) -> bool {
+ match self {
+ IsFrozen::Yes => true,
+ IsFrozen::No => false,
+ IsFrozen::InProgress => false,
+ }
+ }
+}
+
+/// The fields of `Process` protected by the spinlock.
+pub(crate) struct ProcessInner {
+ is_manager: bool,
+ pub(crate) is_dead: bool,
+ threads: RBTree<i32, Arc<Thread>>,
+ /// INVARIANT: Threads pushed to this list must be owned by this process.
+ ready_threads: List<Thread>,
+ nodes: RBTree<u64, DArc<Node>>,
+ mapping: Option<Mapping>,
+ work: List<DTRWrap<dyn DeliverToRead>>,
+ delivered_deaths: List<DTRWrap<NodeDeath>, 2>,
+
+ /// The number of requested threads that haven't registered yet.
+ requested_thread_count: u32,
+ /// The maximum number of threads used by the process thread pool.
+ max_threads: u32,
+ /// The number of threads the started and registered with the thread pool.
+ started_thread_count: u32,
+
+ /// Bitmap of deferred work to do.
+ defer_work: u8,
+
+ /// Number of transactions to be transmitted before processes in freeze_wait
+ /// are woken up.
+ outstanding_txns: u32,
+ /// Process is frozen and unable to service binder transactions.
+ pub(crate) is_frozen: IsFrozen,
+ /// Process received sync transactions since last frozen.
+ pub(crate) sync_recv: bool,
+ /// Process received async transactions since last frozen.
+ pub(crate) async_recv: bool,
+ pub(crate) binderfs_file: Option<BinderfsProcFile>,
+ /// Check for oneway spam
+ oneway_spam_detection_enabled: bool,
+}
+
+impl ProcessInner {
+ fn new() -> Self {
+ Self {
+ is_manager: false,
+ is_dead: false,
+ threads: RBTree::new(),
+ ready_threads: List::new(),
+ mapping: None,
+ nodes: RBTree::new(),
+ work: List::new(),
+ delivered_deaths: List::new(),
+ requested_thread_count: 0,
+ max_threads: 0,
+ started_thread_count: 0,
+ defer_work: 0,
+ outstanding_txns: 0,
+ is_frozen: IsFrozen::No,
+ sync_recv: false,
+ async_recv: false,
+ binderfs_file: None,
+ oneway_spam_detection_enabled: false,
+ }
+ }
+
+ /// Schedule the work item for execution on this process.
+ ///
+ /// If any threads are ready for work, then the work item is given directly to that thread and
+ /// it is woken up. Otherwise, it is pushed to the process work list.
+ ///
+ /// This call can fail only if the process is dead. In this case, the work item is returned to
+ /// the caller so that the caller can drop it after releasing the inner process lock. This is
+ /// necessary since the destructor of `Transaction` will take locks that can't necessarily be
+ /// taken while holding the inner process lock.
+ pub(crate) fn push_work(
+ &mut self,
+ work: DLArc<dyn DeliverToRead>,
+ ) -> Result<(), (BinderError, DLArc<dyn DeliverToRead>)> {
+ // Try to find a ready thread to which to push the work.
+ if let Some(thread) = self.ready_threads.pop_front() {
+ // Push to thread while holding state lock. This prevents the thread from giving up
+ // (for example, because of a signal) when we're about to deliver work.
+ match thread.push_work(work) {
+ PushWorkRes::Ok => Ok(()),
+ PushWorkRes::FailedDead(work) => Err((BinderError::new_dead(), work)),
+ }
+ } else if self.is_dead {
+ Err((BinderError::new_dead(), work))
+ } else {
+ let sync = work.should_sync_wakeup();
+
+ // Didn't find a thread waiting for proc work; this can happen
+ // in two scenarios:
+ // 1. All threads are busy handling transactions
+ // In that case, one of those threads should call back into
+ // the kernel driver soon and pick up this work.
+ // 2. Threads are using the (e)poll interface, in which case
+ // they may be blocked on the waitqueue without having been
+ // added to waiting_threads. For this case, we just iterate
+ // over all threads not handling transaction work, and
+ // wake them all up. We wake all because we don't know whether
+ // a thread that called into (e)poll is handling non-binder
+ // work currently.
+ self.work.push_back(work);
+
+ // Wake up polling threads, if any.
+ for thread in self.threads.values() {
+ thread.notify_if_poll_ready(sync);
+ }
+
+ Ok(())
+ }
+ }
+
+ pub(crate) fn remove_node(&mut self, ptr: u64) {
+ self.nodes.remove(&ptr);
+ }
+
+ /// Updates the reference count on the given node.
+ pub(crate) fn update_node_refcount(
+ &mut self,
+ node: &DArc<Node>,
+ inc: bool,
+ strong: bool,
+ count: usize,
+ othread: Option<&Thread>,
+ ) {
+ let push = node.update_refcount_locked(inc, strong, count, self);
+
+ // If we decided that we need to push work, push either to the process or to a thread if
+ // one is specified.
+ if let Some(node) = push {
+ if let Some(thread) = othread {
+ thread.push_work_deferred(node);
+ } else {
+ let _ = self.push_work(node);
+ // Nothing to do: `push_work` may fail if the process is dead, but that's ok as in
+ // that case, it doesn't care about the notification.
+ }
+ }
+ }
+
+ pub(crate) fn new_node_ref(
+ &mut self,
+ node: DArc<Node>,
+ strong: bool,
+ thread: Option<&Thread>,
+ ) -> NodeRef {
+ self.update_node_refcount(&node, true, strong, 1, thread);
+ let strong_count = if strong { 1 } else { 0 };
+ NodeRef::new(node, strong_count, 1 - strong_count)
+ }
+
+ pub(crate) fn new_node_ref_with_thread(
+ &mut self,
+ node: DArc<Node>,
+ strong: bool,
+ thread: &Thread,
+ wrapper: Option<CritIncrWrapper>,
+ ) -> Result<NodeRef, CouldNotDeliverCriticalIncrement> {
+ let push = match wrapper {
+ None => node
+ .incr_refcount_allow_zero2one(strong, self)?
+ .map(|node| node as _),
+ Some(wrapper) => node.incr_refcount_allow_zero2one_with_wrapper(strong, wrapper, self),
+ };
+ if let Some(node) = push {
+ thread.push_work_deferred(node);
+ }
+ let strong_count = if strong { 1 } else { 0 };
+ Ok(NodeRef::new(node, strong_count, 1 - strong_count))
+ }
+
+ /// Returns an existing node with the given pointer and cookie, if one exists.
+ ///
+ /// Returns an error if a node with the given pointer but a different cookie exists.
+ fn get_existing_node(&self, ptr: u64, cookie: u64) -> Result<Option<DArc<Node>>> {
+ match self.nodes.get(&ptr) {
+ None => Ok(None),
+ Some(node) => {
+ let (_, node_cookie) = node.get_id();
+ if node_cookie == cookie {
+ Ok(Some(node.clone()))
+ } else {
+ Err(EINVAL)
+ }
+ }
+ }
+ }
+
+ fn register_thread(&mut self) -> bool {
+ if self.requested_thread_count == 0 {
+ return false;
+ }
+
+ self.requested_thread_count -= 1;
+ self.started_thread_count += 1;
+ true
+ }
+
+ /// Finds a delivered death notification with the given cookie, removes it from the thread's
+ /// delivered list, and returns it.
+ fn pull_delivered_death(&mut self, cookie: u64) -> Option<DArc<NodeDeath>> {
+ let mut cursor = self.delivered_deaths.cursor_front();
+ while let Some(next) = cursor.peek_next() {
+ if next.cookie == cookie {
+ return Some(next.remove().into_arc());
+ }
+ cursor.move_next();
+ }
+ None
+ }
+
+ pub(crate) fn death_delivered(&mut self, death: DArc<NodeDeath>) {
+ if let Some(death) = ListArc::try_from_arc_or_drop(death) {
+ self.delivered_deaths.push_back(death);
+ } else {
+ pr_warn!("Notification added to `delivered_deaths` twice.");
+ }
+ }
+
+ pub(crate) fn add_outstanding_txn(&mut self) {
+ self.outstanding_txns += 1;
+ }
+
+ fn txns_pending_locked(&self) -> bool {
+ if self.outstanding_txns > 0 {
+ return true;
+ }
+ for thread in self.threads.values() {
+ if thread.has_current_transaction() {
+ return true;
+ }
+ }
+ false
+ }
+}
+
+/// Used to keep track of a node that this process has a handle to.
+#[pin_data]
+pub(crate) struct NodeRefInfo {
+ debug_id: usize,
+ /// The refcount that this process owns to the node.
+ node_ref: ListArcField<NodeRef, { Self::LIST_PROC }>,
+ death: ListArcField<Option<DArc<NodeDeath>>, { Self::LIST_PROC }>,
+ /// Cookie of the active freeze listener for this node.
+ freeze: ListArcField<Option<FreezeCookie>, { Self::LIST_PROC }>,
+ /// Used to store this `NodeRefInfo` in the node's `refs` list.
+ #[pin]
+ links: ListLinks<{ Self::LIST_NODE }>,
+ /// The handle for this `NodeRefInfo`.
+ handle: u32,
+ /// The process that has a handle to the node.
+ pub(crate) process: Arc<Process>,
+}
+
+impl NodeRefInfo {
+ /// The id used for the `Node::refs` list.
+ pub(crate) const LIST_NODE: u64 = 0x2da16350fb724a10;
+ /// The id used for the `ListArc` in `ProcessNodeRefs`.
+ const LIST_PROC: u64 = 0xd703a5263dcc8650;
+
+ fn new(node_ref: NodeRef, handle: u32, process: Arc<Process>) -> impl PinInit<Self> {
+ pin_init!(Self {
+ debug_id: super::next_debug_id(),
+ node_ref: ListArcField::new(node_ref),
+ death: ListArcField::new(None),
+ freeze: ListArcField::new(None),
+ links <- ListLinks::new(),
+ handle,
+ process,
+ })
+ }
+
+ kernel::list::define_list_arc_field_getter! {
+ pub(crate) fn death(&mut self<{Self::LIST_PROC}>) -> &mut Option<DArc<NodeDeath>> { death }
+ pub(crate) fn freeze(&mut self<{Self::LIST_PROC}>) -> &mut Option<FreezeCookie> { freeze }
+ pub(crate) fn node_ref(&mut self<{Self::LIST_PROC}>) -> &mut NodeRef { node_ref }
+ pub(crate) fn node_ref2(&self<{Self::LIST_PROC}>) -> &NodeRef { node_ref }
+ }
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<{Self::LIST_NODE}> for NodeRefInfo { untracked; }
+ impl ListArcSafe<{Self::LIST_PROC}> for NodeRefInfo { untracked; }
+}
+kernel::list::impl_list_item! {
+ impl ListItem<{Self::LIST_NODE}> for NodeRefInfo {
+ using ListLinks { self.links };
+ }
+}
+
+/// Keeps track of references this process has to nodes owned by other processes.
+///
+/// TODO: Currently, the rbtree requires two allocations per node reference, and two tree
+/// traversals to look up a node by `Node::global_id`. Once the rbtree is more powerful, these
+/// extra costs should be eliminated.
+struct ProcessNodeRefs {
+ /// Used to look up nodes using the 32-bit id that this process knows it by.
+ by_handle: RBTree<u32, ListArc<NodeRefInfo, { NodeRefInfo::LIST_PROC }>>,
+ /// Used to quickly find unused ids in `by_handle`.
+ handle_is_present: IdPool,
+ /// Used to look up nodes without knowing their local 32-bit id. The usize is the address of
+ /// the underlying `Node` struct as returned by `Node::global_id`.
+ by_node: RBTree<usize, u32>,
+ /// Used to look up a `FreezeListener` by cookie.
+ ///
+ /// There might be multiple freeze listeners for the same node, but at most one of them is
+ /// active.
+ freeze_listeners: RBTree<FreezeCookie, FreezeListener>,
+}
+
+impl ProcessNodeRefs {
+ fn new() -> Self {
+ Self {
+ by_handle: RBTree::new(),
+ handle_is_present: IdPool::new(),
+ by_node: RBTree::new(),
+ freeze_listeners: RBTree::new(),
+ }
+ }
+}
+
+/// A process using binder.
+///
+/// Strictly speaking, there can be multiple of these per process. There is one for each binder fd
+/// that a process has opened, so processes using several binder contexts have several `Process`
+/// objects. This ensures that the contexts are fully separated.
+#[pin_data]
+pub(crate) struct Process {
+ pub(crate) ctx: Arc<Context>,
+
+ // The task leader (process).
+ pub(crate) task: ARef<Task>,
+
+ // Credential associated with file when `Process` is created.
+ pub(crate) cred: ARef<Credential>,
+
+ #[pin]
+ pub(crate) inner: SpinLock<ProcessInner>,
+
+ #[pin]
+ pub(crate) pages: ShrinkablePageRange,
+
+ // Waitqueue of processes waiting for all outstanding transactions to be
+ // processed.
+ #[pin]
+ freeze_wait: CondVar,
+
+ // Node references are in a different lock to avoid recursive acquisition when
+ // incrementing/decrementing a node in another process.
+ #[pin]
+ node_refs: Mutex<ProcessNodeRefs>,
+
+ // Work node for deferred work item.
+ #[pin]
+ defer_work: Work<Process>,
+
+ // Links for process list in Context.
+ #[pin]
+ links: ListLinks,
+
+ pub(crate) stats: BinderStats,
+}
+
+kernel::impl_has_work! {
+ impl HasWork<Process> for Process { self.defer_work }
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for Process { untracked; }
+}
+kernel::list::impl_list_item! {
+ impl ListItem<0> for Process {
+ using ListLinks { self.links };
+ }
+}
+
+impl workqueue::WorkItem for Process {
+ type Pointer = Arc<Process>;
+
+ fn run(me: Arc<Self>) {
+ let defer;
+ {
+ let mut inner = me.inner.lock();
+ defer = inner.defer_work;
+ inner.defer_work = 0;
+ }
+
+ if defer & PROC_DEFER_FLUSH != 0 {
+ me.deferred_flush();
+ }
+ if defer & PROC_DEFER_RELEASE != 0 {
+ me.deferred_release();
+ }
+ }
+}
+
+impl Process {
+ fn new(ctx: Arc<Context>, cred: ARef<Credential>) -> Result<Arc<Self>> {
+ let current = kernel::current!();
+ let list_process = ListArc::pin_init::<Error>(
+ try_pin_init!(Process {
+ ctx,
+ cred,
+ inner <- kernel::new_spinlock!(ProcessInner::new(), "Process::inner"),
+ pages <- ShrinkablePageRange::new(&super::BINDER_SHRINKER),
+ node_refs <- kernel::new_mutex!(ProcessNodeRefs::new(), "Process::node_refs"),
+ freeze_wait <- kernel::new_condvar!("Process::freeze_wait"),
+ task: current.group_leader().into(),
+ defer_work <- kernel::new_work!("Process::defer_work"),
+ links <- ListLinks::new(),
+ stats: BinderStats::new(),
+ }),
+ GFP_KERNEL,
+ )?;
+
+ let process = list_process.clone_arc();
+ process.ctx.register_process(list_process);
+
+ Ok(process)
+ }
+
+ pub(crate) fn pid_in_current_ns(&self) -> kernel::task::Pid {
+ self.task.tgid_nr_ns(None)
+ }
+
+ #[inline(never)]
+ pub(crate) fn debug_print_stats(&self, m: &SeqFile, ctx: &Context) -> Result<()> {
+ seq_print!(m, "proc {}\n", self.pid_in_current_ns());
+ seq_print!(m, "context {}\n", &*ctx.name);
+
+ let inner = self.inner.lock();
+ seq_print!(m, " threads: {}\n", inner.threads.iter().count());
+ seq_print!(
+ m,
+ " requested threads: {}+{}/{}\n",
+ inner.requested_thread_count,
+ inner.started_thread_count,
+ inner.max_threads,
+ );
+ if let Some(mapping) = &inner.mapping {
+ seq_print!(
+ m,
+ " free oneway space: {}\n",
+ mapping.alloc.free_oneway_space()
+ );
+ seq_print!(m, " buffers: {}\n", mapping.alloc.count_buffers());
+ }
+ seq_print!(
+ m,
+ " outstanding transactions: {}\n",
+ inner.outstanding_txns
+ );
+ seq_print!(m, " nodes: {}\n", inner.nodes.iter().count());
+ drop(inner);
+
+ {
+ let mut refs = self.node_refs.lock();
+ let (mut count, mut weak, mut strong) = (0, 0, 0);
+ for r in refs.by_handle.values_mut() {
+ let node_ref = r.node_ref();
+ let (nstrong, nweak) = node_ref.get_count();
+ count += 1;
+ weak += nweak;
+ strong += nstrong;
+ }
+ seq_print!(m, " refs: {count} s {strong} w {weak}\n");
+ }
+
+ self.stats.debug_print(" ", m);
+
+ Ok(())
+ }
+
+ #[inline(never)]
+ pub(crate) fn debug_print(&self, m: &SeqFile, ctx: &Context, print_all: bool) -> Result<()> {
+ seq_print!(m, "proc {}\n", self.pid_in_current_ns());
+ seq_print!(m, "context {}\n", &*ctx.name);
+
+ let mut all_threads = KVec::new();
+ let mut all_nodes = KVec::new();
+ loop {
+ let inner = self.inner.lock();
+ let num_threads = inner.threads.iter().count();
+ let num_nodes = inner.nodes.iter().count();
+
+ if all_threads.capacity() < num_threads || all_nodes.capacity() < num_nodes {
+ drop(inner);
+ all_threads.reserve(num_threads, GFP_KERNEL)?;
+ all_nodes.reserve(num_nodes, GFP_KERNEL)?;
+ continue;
+ }
+
+ for thread in inner.threads.values() {
+ assert!(all_threads.len() < all_threads.capacity());
+ let _ = all_threads.push(thread.clone(), GFP_ATOMIC);
+ }
+
+ for node in inner.nodes.values() {
+ assert!(all_nodes.len() < all_nodes.capacity());
+ let _ = all_nodes.push(node.clone(), GFP_ATOMIC);
+ }
+
+ break;
+ }
+
+ for thread in all_threads {
+ thread.debug_print(m, print_all)?;
+ }
+
+ let mut inner = self.inner.lock();
+ for node in all_nodes {
+ if print_all || node.has_oneway_transaction(&mut inner) {
+ node.full_debug_print(m, &mut inner)?;
+ }
+ }
+ drop(inner);
+
+ if print_all {
+ let mut refs = self.node_refs.lock();
+ for r in refs.by_handle.values_mut() {
+ let node_ref = r.node_ref();
+ let dead = node_ref.node.owner.inner.lock().is_dead;
+ let (strong, weak) = node_ref.get_count();
+ let debug_id = node_ref.node.debug_id;
+
+ seq_print!(
+ m,
+ " ref {}: desc {} {}node {debug_id} s {strong} w {weak}",
+ r.debug_id,
+ r.handle,
+ if dead { "dead " } else { "" }
+ );
+ }
+ }
+
+ let inner = self.inner.lock();
+ for work in &inner.work {
+ work.debug_print(m, " ", " pending transaction ")?;
+ }
+ for _death in &inner.delivered_deaths {
+ seq_print!(m, " has delivered dead binder\n");
+ }
+ if let Some(mapping) = &inner.mapping {
+ mapping.alloc.debug_print(m)?;
+ }
+ drop(inner);
+
+ Ok(())
+ }
+
+ /// Attempts to fetch a work item from the process queue.
+ pub(crate) fn get_work(&self) -> Option<DLArc<dyn DeliverToRead>> {
+ self.inner.lock().work.pop_front()
+ }
+
+ /// Attempts to fetch a work item from the process queue. If none is available, it registers the
+ /// given thread as ready to receive work directly.
+ ///
+ /// This must only be called when the thread is not participating in a transaction chain; when
+ /// it is, work will always be delivered directly to the thread (and not through the process
+ /// queue).
+ pub(crate) fn get_work_or_register<'a>(
+ &'a self,
+ thread: &'a Arc<Thread>,
+ ) -> GetWorkOrRegister<'a> {
+ let mut inner = self.inner.lock();
+ // Try to get work from the process queue.
+ if let Some(work) = inner.work.pop_front() {
+ return GetWorkOrRegister::Work(work);
+ }
+
+ // Register the thread as ready.
+ GetWorkOrRegister::Register(Registration::new(thread, &mut inner))
+ }
+
+ fn get_current_thread(self: ArcBorrow<'_, Self>) -> Result<Arc<Thread>> {
+ let id = {
+ let current = kernel::current!();
+ if !core::ptr::eq(current.group_leader(), &*self.task) {
+ pr_err!("get_current_thread was called from the wrong process.");
+ return Err(EINVAL);
+ }
+ current.pid()
+ };
+
+ {
+ let inner = self.inner.lock();
+ if let Some(thread) = inner.threads.get(&id) {
+ return Ok(thread.clone());
+ }
+ }
+
+ // Allocate a new `Thread` without holding any locks.
+ let reservation = RBTreeNodeReservation::new(GFP_KERNEL)?;
+ let ta: Arc<Thread> = Thread::new(id, self.into())?;
+
+ let mut inner = self.inner.lock();
+ match inner.threads.entry(id) {
+ rbtree::Entry::Vacant(entry) => {
+ entry.insert(ta.clone(), reservation);
+ Ok(ta)
+ }
+ rbtree::Entry::Occupied(_entry) => {
+ pr_err!("Cannot create two threads with the same id.");
+ Err(EINVAL)
+ }
+ }
+ }
+
+ pub(crate) fn push_work(&self, work: DLArc<dyn DeliverToRead>) -> BinderResult {
+ // If push_work fails, drop the work item outside the lock.
+ let res = self.inner.lock().push_work(work);
+ match res {
+ Ok(()) => Ok(()),
+ Err((err, work)) => {
+ drop(work);
+ Err(err)
+ }
+ }
+ }
+
+ fn set_as_manager(
+ self: ArcBorrow<'_, Self>,
+ info: Option<FlatBinderObject>,
+ thread: &Thread,
+ ) -> Result {
+ let (ptr, cookie, flags) = if let Some(obj) = info {
+ (
+ // SAFETY: The object type for this ioctl is implicitly `BINDER_TYPE_BINDER`, so it
+ // is safe to access the `binder` field.
+ unsafe { obj.__bindgen_anon_1.binder },
+ obj.cookie,
+ obj.flags,
+ )
+ } else {
+ (0, 0, 0)
+ };
+ let node_ref = self.get_node(ptr, cookie, flags as _, true, thread)?;
+ let node = node_ref.node.clone();
+ self.ctx.set_manager_node(node_ref)?;
+ self.inner.lock().is_manager = true;
+
+ // Force the state of the node to prevent the delivery of acquire/increfs.
+ let mut owner_inner = node.owner.inner.lock();
+ node.force_has_count(&mut owner_inner);
+ Ok(())
+ }
+
+ fn get_node_inner(
+ self: ArcBorrow<'_, Self>,
+ ptr: u64,
+ cookie: u64,
+ flags: u32,
+ strong: bool,
+ thread: &Thread,
+ wrapper: Option<CritIncrWrapper>,
+ ) -> Result<Result<NodeRef, CouldNotDeliverCriticalIncrement>> {
+ // Try to find an existing node.
+ {
+ let mut inner = self.inner.lock();
+ if let Some(node) = inner.get_existing_node(ptr, cookie)? {
+ return Ok(inner.new_node_ref_with_thread(node, strong, thread, wrapper));
+ }
+ }
+
+ // Allocate the node before reacquiring the lock.
+ let node = DTRWrap::arc_pin_init(Node::new(ptr, cookie, flags, self.into()))?.into_arc();
+ let rbnode = RBTreeNode::new(ptr, node.clone(), GFP_KERNEL)?;
+ let mut inner = self.inner.lock();
+ if let Some(node) = inner.get_existing_node(ptr, cookie)? {
+ return Ok(inner.new_node_ref_with_thread(node, strong, thread, wrapper));
+ }
+
+ inner.nodes.insert(rbnode);
+ // This can only fail if someone has already pushed the node to a list, but we just created
+ // it and still hold the lock, so it can't fail right now.
+ let node_ref = inner
+ .new_node_ref_with_thread(node, strong, thread, wrapper)
+ .unwrap();
+
+ Ok(Ok(node_ref))
+ }
+
+ pub(crate) fn get_node(
+ self: ArcBorrow<'_, Self>,
+ ptr: u64,
+ cookie: u64,
+ flags: u32,
+ strong: bool,
+ thread: &Thread,
+ ) -> Result<NodeRef> {
+ let mut wrapper = None;
+ for _ in 0..2 {
+ match self.get_node_inner(ptr, cookie, flags, strong, thread, wrapper) {
+ Err(err) => return Err(err),
+ Ok(Ok(node_ref)) => return Ok(node_ref),
+ Ok(Err(CouldNotDeliverCriticalIncrement)) => {
+ wrapper = Some(CritIncrWrapper::new()?);
+ }
+ }
+ }
+ // We only get a `CouldNotDeliverCriticalIncrement` error if `wrapper` is `None`, so the
+ // loop should run at most twice.
+ unreachable!()
+ }
+
+ pub(crate) fn insert_or_update_handle(
+ self: ArcBorrow<'_, Process>,
+ node_ref: NodeRef,
+ is_manager: bool,
+ ) -> Result<u32> {
+ {
+ let mut refs = self.node_refs.lock();
+
+ // Do a lookup before inserting.
+ if let Some(handle_ref) = refs.by_node.get(&node_ref.node.global_id()) {
+ let handle = *handle_ref;
+ let info = refs.by_handle.get_mut(&handle).unwrap();
+ info.node_ref().absorb(node_ref);
+ return Ok(handle);
+ }
+ }
+
+ // Reserve memory for tree nodes.
+ let reserve1 = RBTreeNodeReservation::new(GFP_KERNEL)?;
+ let reserve2 = RBTreeNodeReservation::new(GFP_KERNEL)?;
+ let info = UniqueArc::new_uninit(GFP_KERNEL)?;
+
+ let mut refs_lock = self.node_refs.lock();
+ let mut refs = &mut *refs_lock;
+
+ let (unused_id, by_handle_slot) = loop {
+ // ID 0 may only be used by the manager.
+ let start = if is_manager { 0 } else { 1 };
+
+ if let Some(res) = refs.handle_is_present.find_unused_id(start) {
+ match refs.by_handle.entry(res.as_u32()) {
+ rbtree::Entry::Vacant(entry) => break (res, entry),
+ rbtree::Entry::Occupied(_) => {
+ pr_err!("Detected mismatch between handle_is_present and by_handle");
+ res.acquire();
+ kernel::warn_on!(true);
+ return Err(EINVAL);
+ }
+ }
+ }
+
+ let grow_request = refs.handle_is_present.grow_request().ok_or(ENOMEM)?;
+ drop(refs_lock);
+ let resizer = grow_request.realloc(GFP_KERNEL)?;
+ refs_lock = self.node_refs.lock();
+ refs = &mut *refs_lock;
+ refs.handle_is_present.grow(resizer);
+ };
+ let handle = unused_id.as_u32();
+
+ // Do a lookup again as node may have been inserted before the lock was reacquired.
+ if let Some(handle_ref) = refs.by_node.get(&node_ref.node.global_id()) {
+ let handle = *handle_ref;
+ let info = refs.by_handle.get_mut(&handle).unwrap();
+ info.node_ref().absorb(node_ref);
+ return Ok(handle);
+ }
+
+ let gid = node_ref.node.global_id();
+ let (info_proc, info_node) = {
+ let info_init = NodeRefInfo::new(node_ref, handle, self.into());
+ match info.pin_init_with(info_init) {
+ Ok(info) => ListArc::pair_from_pin_unique(info),
+ // error is infallible
+ Err(err) => match err {},
+ }
+ };
+
+ // Ensure the process is still alive while we insert a new reference.
+ //
+ // This releases the lock before inserting the nodes, but since `is_dead` is set as the
+ // first thing in `deferred_release`, process cleanup will not miss the items inserted into
+ // `refs` below.
+ if self.inner.lock().is_dead {
+ return Err(ESRCH);
+ }
+
+ // SAFETY: `info_proc` and `info_node` reference the same node, so we are inserting
+ // `info_node` into the right node's `refs` list.
+ unsafe { info_proc.node_ref2().node.insert_node_info(info_node) };
+
+ refs.by_node.insert(reserve1.into_node(gid, handle));
+ by_handle_slot.insert(info_proc, reserve2);
+ unused_id.acquire();
+ Ok(handle)
+ }
+
+ pub(crate) fn get_transaction_node(&self, handle: u32) -> BinderResult<NodeRef> {
+ // When handle is zero, try to get the context manager.
+ if handle == 0 {
+ Ok(self.ctx.get_manager_node(true)?)
+ } else {
+ Ok(self.get_node_from_handle(handle, true)?)
+ }
+ }
+
+ pub(crate) fn get_node_from_handle(&self, handle: u32, strong: bool) -> Result<NodeRef> {
+ self.node_refs
+ .lock()
+ .by_handle
+ .get_mut(&handle)
+ .ok_or(ENOENT)?
+ .node_ref()
+ .clone(strong)
+ }
+
+ pub(crate) fn remove_from_delivered_deaths(&self, death: &DArc<NodeDeath>) {
+ let mut inner = self.inner.lock();
+ // SAFETY: By the invariant on the `delivered_links` field, this is the right linked list.
+ let removed = unsafe { inner.delivered_deaths.remove(death) };
+ drop(inner);
+ drop(removed);
+ }
+
+ pub(crate) fn update_ref(
+ self: ArcBorrow<'_, Process>,
+ handle: u32,
+ inc: bool,
+ strong: bool,
+ ) -> Result {
+ if inc && handle == 0 {
+ if let Ok(node_ref) = self.ctx.get_manager_node(strong) {
+ if core::ptr::eq(&*self, &*node_ref.node.owner) {
+ return Err(EINVAL);
+ }
+ let _ = self.insert_or_update_handle(node_ref, true);
+ return Ok(());
+ }
+ }
+
+ // To preserve original binder behaviour, we only fail requests where the manager tries to
+ // increment references on itself.
+ let mut refs = self.node_refs.lock();
+ if let Some(info) = refs.by_handle.get_mut(&handle) {
+ if info.node_ref().update(inc, strong) {
+ // Clean up death if there is one attached to this node reference.
+ if let Some(death) = info.death().take() {
+ death.set_cleared(true);
+ self.remove_from_delivered_deaths(&death);
+ }
+
+ // Remove reference from process tables, and from the node's `refs` list.
+
+ // SAFETY: We are removing the `NodeRefInfo` from the right node.
+ unsafe { info.node_ref2().node.remove_node_info(info) };
+
+ let id = info.node_ref().node.global_id();
+ refs.by_handle.remove(&handle);
+ refs.by_node.remove(&id);
+ refs.handle_is_present.release_id(handle as usize);
+
+ if let Some(shrink) = refs.handle_is_present.shrink_request() {
+ drop(refs);
+ // This intentionally ignores allocation failures.
+ if let Ok(new_bitmap) = shrink.realloc(GFP_KERNEL) {
+ refs = self.node_refs.lock();
+ refs.handle_is_present.shrink(new_bitmap);
+ }
+ }
+ }
+ } else {
+ // All refs are cleared in process exit, so this warning is expected in that case.
+ if !self.inner.lock().is_dead {
+ pr_warn!("{}: no such ref {handle}\n", self.pid_in_current_ns());
+ }
+ }
+ Ok(())
+ }
+
+ /// Decrements the refcount of the given node, if one exists.
+ pub(crate) fn update_node(&self, ptr: u64, cookie: u64, strong: bool) {
+ let mut inner = self.inner.lock();
+ if let Ok(Some(node)) = inner.get_existing_node(ptr, cookie) {
+ inner.update_node_refcount(&node, false, strong, 1, None);
+ }
+ }
+
+ pub(crate) fn inc_ref_done(&self, reader: &mut UserSliceReader, strong: bool) -> Result {
+ let ptr = reader.read::<u64>()?;
+ let cookie = reader.read::<u64>()?;
+ let mut inner = self.inner.lock();
+ if let Ok(Some(node)) = inner.get_existing_node(ptr, cookie) {
+ if let Some(node) = node.inc_ref_done_locked(strong, &mut inner) {
+ // This only fails if the process is dead.
+ let _ = inner.push_work(node);
+ }
+ }
+ Ok(())
+ }
+
+ pub(crate) fn buffer_alloc(
+ self: &Arc<Self>,
+ debug_id: usize,
+ size: usize,
+ is_oneway: bool,
+ from_pid: i32,
+ ) -> BinderResult<NewAllocation> {
+ use kernel::page::PAGE_SIZE;
+
+ let mut reserve_new_args = ReserveNewArgs {
+ debug_id,
+ size,
+ is_oneway,
+ pid: from_pid,
+ ..ReserveNewArgs::default()
+ };
+
+ let (new_alloc, addr) = loop {
+ let mut inner = self.inner.lock();
+ let mapping = inner.mapping.as_mut().ok_or_else(BinderError::new_dead)?;
+ let alloc_request = match mapping.alloc.reserve_new(reserve_new_args)? {
+ ReserveNew::Success(new_alloc) => break (new_alloc, mapping.address),
+ ReserveNew::NeedAlloc(request) => request,
+ };
+ drop(inner);
+ // We need to allocate memory and then call `reserve_new` again.
+ reserve_new_args = alloc_request.make_alloc()?;
+ };
+
+ let res = Allocation::new(
+ self.clone(),
+ debug_id,
+ new_alloc.offset,
+ size,
+ addr + new_alloc.offset,
+ new_alloc.oneway_spam_detected,
+ );
+
+ // This allocation will be marked as in use until the `Allocation` is used to free it.
+ //
+ // This method can't be called while holding a lock, so we release the lock first. It's
+ // okay for several threads to use the method on the same index at the same time. In that
+ // case, one of the calls will allocate the given page (if missing), and the other call
+ // will wait for the other call to finish allocating the page.
+ //
+ // We will not call `stop_using_range` in parallel with this on the same page, because the
+ // allocation can only be removed via the destructor of the `Allocation` object that we
+ // currently own.
+ match self.pages.use_range(
+ new_alloc.offset / PAGE_SIZE,
+ (new_alloc.offset + size).div_ceil(PAGE_SIZE),
+ ) {
+ Ok(()) => {}
+ Err(err) => {
+ pr_warn!("use_range failure {:?}", err);
+ return Err(err.into());
+ }
+ }
+
+ Ok(NewAllocation(res))
+ }
+
+ pub(crate) fn buffer_get(self: &Arc<Self>, ptr: usize) -> Option<Allocation> {
+ let mut inner = self.inner.lock();
+ let mapping = inner.mapping.as_mut()?;
+ let offset = ptr.checked_sub(mapping.address)?;
+ let (size, debug_id, odata) = mapping.alloc.reserve_existing(offset).ok()?;
+ let mut alloc = Allocation::new(self.clone(), debug_id, offset, size, ptr, false);
+ if let Some(data) = odata {
+ alloc.set_info(data);
+ }
+ Some(alloc)
+ }
+
+ pub(crate) fn buffer_raw_free(&self, ptr: usize) {
+ let mut inner = self.inner.lock();
+ if let Some(ref mut mapping) = &mut inner.mapping {
+ let offset = match ptr.checked_sub(mapping.address) {
+ Some(offset) => offset,
+ None => return,
+ };
+
+ let freed_range = match mapping.alloc.reservation_abort(offset) {
+ Ok(freed_range) => freed_range,
+ Err(_) => {
+ pr_warn!(
+ "Pointer {:x} failed to free, base = {:x}\n",
+ ptr,
+ mapping.address
+ );
+ return;
+ }
+ };
+
+ // No more allocations in this range. Mark them as not in use.
+ //
+ // Must be done before we release the lock so that `use_range` is not used on these
+ // indices until `stop_using_range` returns.
+ self.pages
+ .stop_using_range(freed_range.start_page_idx, freed_range.end_page_idx);
+ }
+ }
+
+ pub(crate) fn buffer_make_freeable(&self, offset: usize, mut data: Option<AllocationInfo>) {
+ let mut inner = self.inner.lock();
+ if let Some(ref mut mapping) = &mut inner.mapping {
+ if mapping.alloc.reservation_commit(offset, &mut data).is_err() {
+ pr_warn!("Offset {} failed to be marked freeable\n", offset);
+ }
+ }
+ }
+
+ fn create_mapping(&self, vma: &mm::virt::VmaNew) -> Result {
+ use kernel::page::PAGE_SIZE;
+ let size = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize);
+ let mapping = Mapping::new(vma.start(), size);
+ let page_count = self.pages.register_with_vma(vma)?;
+ if page_count * PAGE_SIZE != size {
+ return Err(EINVAL);
+ }
+
+ // Save range allocator for later.
+ self.inner.lock().mapping = Some(mapping);
+
+ Ok(())
+ }
+
+ fn version(&self, data: UserSlice) -> Result {
+ data.writer().write(&BinderVersion::current())
+ }
+
+ pub(crate) fn register_thread(&self) -> bool {
+ self.inner.lock().register_thread()
+ }
+
+ fn remove_thread(&self, thread: Arc<Thread>) {
+ self.inner.lock().threads.remove(&thread.id);
+ thread.release();
+ }
+
+ fn set_max_threads(&self, max: u32) {
+ self.inner.lock().max_threads = max;
+ }
+
+ fn set_oneway_spam_detection_enabled(&self, enabled: u32) {
+ self.inner.lock().oneway_spam_detection_enabled = enabled != 0;
+ }
+
+ pub(crate) fn is_oneway_spam_detection_enabled(&self) -> bool {
+ self.inner.lock().oneway_spam_detection_enabled
+ }
+
+ fn get_node_debug_info(&self, data: UserSlice) -> Result {
+ let (mut reader, mut writer) = data.reader_writer();
+
+ // Read the starting point.
+ let ptr = reader.read::<BinderNodeDebugInfo>()?.ptr;
+ let mut out = BinderNodeDebugInfo::default();
+
+ {
+ let inner = self.inner.lock();
+ for (node_ptr, node) in &inner.nodes {
+ if *node_ptr > ptr {
+ node.populate_debug_info(&mut out, &inner);
+ break;
+ }
+ }
+ }
+
+ writer.write(&out)
+ }
+
+ fn get_node_info_from_ref(&self, data: UserSlice) -> Result {
+ let (mut reader, mut writer) = data.reader_writer();
+ let mut out = reader.read::<BinderNodeInfoForRef>()?;
+
+ if out.strong_count != 0
+ || out.weak_count != 0
+ || out.reserved1 != 0
+ || out.reserved2 != 0
+ || out.reserved3 != 0
+ {
+ return Err(EINVAL);
+ }
+
+ // Only the context manager is allowed to use this ioctl.
+ if !self.inner.lock().is_manager {
+ return Err(EPERM);
+ }
+
+ {
+ let mut node_refs = self.node_refs.lock();
+ let node_info = node_refs.by_handle.get_mut(&out.handle).ok_or(ENOENT)?;
+ let node_ref = node_info.node_ref();
+ let owner_inner = node_ref.node.owner.inner.lock();
+ node_ref.node.populate_counts(&mut out, &owner_inner);
+ }
+
+ // Write the result back.
+ writer.write(&out)
+ }
+
+ pub(crate) fn needs_thread(&self) -> bool {
+ let mut inner = self.inner.lock();
+ let ret = inner.requested_thread_count == 0
+ && inner.ready_threads.is_empty()
+ && inner.started_thread_count < inner.max_threads;
+ if ret {
+ inner.requested_thread_count += 1
+ }
+ ret
+ }
+
+ pub(crate) fn request_death(
+ self: &Arc<Self>,
+ reader: &mut UserSliceReader,
+ thread: &Thread,
+ ) -> Result {
+ let handle: u32 = reader.read()?;
+ let cookie: u64 = reader.read()?;
+
+ // Queue BR_ERROR if we can't allocate memory for the death notification.
+ let death = UniqueArc::new_uninit(GFP_KERNEL).inspect_err(|_| {
+ thread.push_return_work(BR_ERROR);
+ })?;
+ let mut refs = self.node_refs.lock();
+ let Some(info) = refs.by_handle.get_mut(&handle) else {
+ pr_warn!("BC_REQUEST_DEATH_NOTIFICATION invalid ref {handle}\n");
+ return Ok(());
+ };
+
+ // Nothing to do if there is already a death notification request for this handle.
+ if info.death().is_some() {
+ pr_warn!("BC_REQUEST_DEATH_NOTIFICATION death notification already set\n");
+ return Ok(());
+ }
+
+ let death = {
+ let death_init = NodeDeath::new(info.node_ref().node.clone(), self.clone(), cookie);
+ match death.pin_init_with(death_init) {
+ Ok(death) => death,
+ // error is infallible
+ Err(err) => match err {},
+ }
+ };
+
+ // Register the death notification.
+ {
+ let owner = info.node_ref2().node.owner.clone();
+ let mut owner_inner = owner.inner.lock();
+ if owner_inner.is_dead {
+ let death = Arc::from(death);
+ *info.death() = Some(death.clone());
+ drop(owner_inner);
+ death.set_dead();
+ } else {
+ let death = ListArc::from(death);
+ *info.death() = Some(death.clone_arc());
+ info.node_ref().node.add_death(death, &mut owner_inner);
+ }
+ }
+ Ok(())
+ }
+
+ pub(crate) fn clear_death(&self, reader: &mut UserSliceReader, thread: &Thread) -> Result {
+ let handle: u32 = reader.read()?;
+ let cookie: u64 = reader.read()?;
+
+ let mut refs = self.node_refs.lock();
+ let Some(info) = refs.by_handle.get_mut(&handle) else {
+ pr_warn!("BC_CLEAR_DEATH_NOTIFICATION invalid ref {handle}\n");
+ return Ok(());
+ };
+
+ let Some(death) = info.death().take() else {
+ pr_warn!("BC_CLEAR_DEATH_NOTIFICATION death notification not active\n");
+ return Ok(());
+ };
+ if death.cookie != cookie {
+ *info.death() = Some(death);
+ pr_warn!("BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch\n");
+ return Ok(());
+ }
+
+ // Update state and determine if we need to queue a work item. We only need to do it when
+ // the node is not dead or if the user already completed the death notification.
+ if death.set_cleared(false) {
+ if let Some(death) = ListArc::try_from_arc_or_drop(death) {
+ let _ = thread.push_work_if_looper(death);
+ }
+ }
+
+ Ok(())
+ }
+
+ pub(crate) fn dead_binder_done(&self, cookie: u64, thread: &Thread) {
+ if let Some(death) = self.inner.lock().pull_delivered_death(cookie) {
+ death.set_notification_done(thread);
+ }
+ }
+
+ /// Locks the spinlock and move the `nodes` rbtree out.
+ ///
+ /// This allows you to iterate through `nodes` while also allowing you to give other parts of
+ /// the codebase exclusive access to `ProcessInner`.
+ pub(crate) fn lock_with_nodes(&self) -> WithNodes<'_> {
+ let mut inner = self.inner.lock();
+ WithNodes {
+ nodes: take(&mut inner.nodes),
+ inner,
+ }
+ }
+
+ fn deferred_flush(&self) {
+ let inner = self.inner.lock();
+ for thread in inner.threads.values() {
+ thread.exit_looper();
+ }
+ }
+
+ fn deferred_release(self: Arc<Self>) {
+ let is_manager = {
+ let mut inner = self.inner.lock();
+ inner.is_dead = true;
+ inner.is_frozen = IsFrozen::No;
+ inner.sync_recv = false;
+ inner.async_recv = false;
+ inner.is_manager
+ };
+
+ if is_manager {
+ self.ctx.unset_manager_node();
+ }
+
+ self.ctx.deregister_process(&self);
+
+ let binderfs_file = self.inner.lock().binderfs_file.take();
+ drop(binderfs_file);
+
+ // Release threads.
+ let threads = {
+ let mut inner = self.inner.lock();
+ let threads = take(&mut inner.threads);
+ let ready = take(&mut inner.ready_threads);
+ drop(inner);
+ drop(ready);
+
+ for thread in threads.values() {
+ thread.release();
+ }
+ threads
+ };
+
+ // Release nodes.
+ {
+ while let Some(node) = {
+ let mut lock = self.inner.lock();
+ lock.nodes.cursor_front_mut().map(|c| c.remove_current().1)
+ } {
+ node.to_key_value().1.release();
+ }
+ }
+
+ // Clean up death listeners and remove nodes from external node info lists.
+ for info in self.node_refs.lock().by_handle.values_mut() {
+ // SAFETY: We are removing the `NodeRefInfo` from the right node.
+ unsafe { info.node_ref2().node.remove_node_info(info) };
+
+ // Remove all death notifications from the nodes (that belong to a different process).
+ let death = if let Some(existing) = info.death().take() {
+ existing
+ } else {
+ continue;
+ };
+ death.set_cleared(false);
+ }
+
+ // Clean up freeze listeners.
+ let freeze_listeners = take(&mut self.node_refs.lock().freeze_listeners);
+ for listener in freeze_listeners.values() {
+ listener.on_process_exit(&self);
+ }
+ drop(freeze_listeners);
+
+ // Release refs on foreign nodes.
+ {
+ let mut refs = self.node_refs.lock();
+ let by_handle = take(&mut refs.by_handle);
+ let by_node = take(&mut refs.by_node);
+ drop(refs);
+ drop(by_node);
+ drop(by_handle);
+ }
+
+ // Cancel all pending work items.
+ while let Some(work) = self.get_work() {
+ work.into_arc().cancel();
+ }
+
+ // Clear delivered_deaths list.
+ //
+ // Scope ensures that MutexGuard is dropped while executing the body.
+ while let Some(delivered_death) = { self.inner.lock().delivered_deaths.pop_front() } {
+ drop(delivered_death);
+ }
+
+ // Free any resources kept alive by allocated buffers.
+ let omapping = self.inner.lock().mapping.take();
+ if let Some(mut mapping) = omapping {
+ let address = mapping.address;
+ mapping
+ .alloc
+ .take_for_each(|offset, size, debug_id, odata| {
+ let ptr = offset + address;
+ let mut alloc =
+ Allocation::new(self.clone(), debug_id, offset, size, ptr, false);
+ if let Some(data) = odata {
+ alloc.set_info(data);
+ }
+ drop(alloc)
+ });
+ }
+
+ // calls to synchronize_rcu() in thread drop will happen here
+ drop(threads);
+ }
+
+ pub(crate) fn drop_outstanding_txn(&self) {
+ let wake = {
+ let mut inner = self.inner.lock();
+ if inner.outstanding_txns == 0 {
+ pr_err!("outstanding_txns underflow");
+ return;
+ }
+ inner.outstanding_txns -= 1;
+ inner.is_frozen.is_frozen() && inner.outstanding_txns == 0
+ };
+
+ if wake {
+ self.freeze_wait.notify_all();
+ }
+ }
+
+ pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result {
+ if info.enable == 0 {
+ let msgs = self.prepare_freeze_messages()?;
+ let mut inner = self.inner.lock();
+ inner.sync_recv = false;
+ inner.async_recv = false;
+ inner.is_frozen = IsFrozen::No;
+ drop(inner);
+ msgs.send_messages();
+ return Ok(());
+ }
+
+ let mut inner = self.inner.lock();
+ inner.sync_recv = false;
+ inner.async_recv = false;
+ inner.is_frozen = IsFrozen::InProgress;
+
+ if info.timeout_ms > 0 {
+ let mut jiffies = kernel::time::msecs_to_jiffies(info.timeout_ms);
+ while jiffies > 0 {
+ if inner.outstanding_txns == 0 {
+ break;
+ }
+
+ match self
+ .freeze_wait
+ .wait_interruptible_timeout(&mut inner, jiffies)
+ {
+ CondVarTimeoutResult::Signal { .. } => {
+ inner.is_frozen = IsFrozen::No;
+ return Err(ERESTARTSYS);
+ }
+ CondVarTimeoutResult::Woken { jiffies: remaining } => {
+ jiffies = remaining;
+ }
+ CondVarTimeoutResult::Timeout => {
+ jiffies = 0;
+ }
+ }
+ }
+ }
+
+ if inner.txns_pending_locked() {
+ inner.is_frozen = IsFrozen::No;
+ Err(EAGAIN)
+ } else {
+ drop(inner);
+ match self.prepare_freeze_messages() {
+ Ok(batch) => {
+ self.inner.lock().is_frozen = IsFrozen::Yes;
+ batch.send_messages();
+ Ok(())
+ }
+ Err(kernel::alloc::AllocError) => {
+ self.inner.lock().is_frozen = IsFrozen::No;
+ Err(ENOMEM)
+ }
+ }
+ }
+ }
+}
+
+fn get_frozen_status(data: UserSlice) -> Result {
+ let (mut reader, mut writer) = data.reader_writer();
+
+ let mut info = reader.read::<BinderFrozenStatusInfo>()?;
+ info.sync_recv = 0;
+ info.async_recv = 0;
+ let mut found = false;
+
+ for ctx in crate::context::get_all_contexts()? {
+ ctx.for_each_proc(|proc| {
+ if proc.task.pid() == info.pid as _ {
+ found = true;
+ let inner = proc.inner.lock();
+ let txns_pending = inner.txns_pending_locked();
+ info.async_recv |= inner.async_recv as u32;
+ info.sync_recv |= inner.sync_recv as u32;
+ info.sync_recv |= (txns_pending as u32) << 1;
+ }
+ });
+ }
+
+ if found {
+ writer.write(&info)?;
+ Ok(())
+ } else {
+ Err(EINVAL)
+ }
+}
+
+fn ioctl_freeze(reader: &mut UserSliceReader) -> Result {
+ let info = reader.read::<BinderFreezeInfo>()?;
+
+ // Very unlikely for there to be more than 3, since a process normally uses at most binder and
+ // hwbinder.
+ let mut procs = KVec::with_capacity(3, GFP_KERNEL)?;
+
+ let ctxs = crate::context::get_all_contexts()?;
+ for ctx in ctxs {
+ for proc in ctx.get_procs_with_pid(info.pid as i32)? {
+ procs.push(proc, GFP_KERNEL)?;
+ }
+ }
+
+ for proc in procs {
+ proc.ioctl_freeze(&info)?;
+ }
+ Ok(())
+}
+
+/// The ioctl handler.
+impl Process {
+ /// Ioctls that are write-only from the perspective of userspace.
+ ///
+ /// The kernel will only read from the pointer that userspace provided to us.
+ fn ioctl_write_only(
+ this: ArcBorrow<'_, Process>,
+ _file: &File,
+ cmd: u32,
+ reader: &mut UserSliceReader,
+ ) -> Result {
+ let thread = this.get_current_thread()?;
+ match cmd {
+ uapi::BINDER_SET_MAX_THREADS => this.set_max_threads(reader.read()?),
+ uapi::BINDER_THREAD_EXIT => this.remove_thread(thread),
+ uapi::BINDER_SET_CONTEXT_MGR => this.set_as_manager(None, &thread)?,
+ uapi::BINDER_SET_CONTEXT_MGR_EXT => {
+ this.set_as_manager(Some(reader.read()?), &thread)?
+ }
+ uapi::BINDER_ENABLE_ONEWAY_SPAM_DETECTION => {
+ this.set_oneway_spam_detection_enabled(reader.read()?)
+ }
+ uapi::BINDER_FREEZE => ioctl_freeze(reader)?,
+ _ => return Err(EINVAL),
+ }
+ Ok(())
+ }
+
+ /// Ioctls that are read/write from the perspective of userspace.
+ ///
+ /// The kernel will both read from and write to the pointer that userspace provided to us.
+ fn ioctl_write_read(
+ this: ArcBorrow<'_, Process>,
+ file: &File,
+ cmd: u32,
+ data: UserSlice,
+ ) -> Result {
+ let thread = this.get_current_thread()?;
+ let blocking = (file.flags() & file::flags::O_NONBLOCK) == 0;
+ match cmd {
+ uapi::BINDER_WRITE_READ => thread.write_read(data, blocking)?,
+ uapi::BINDER_GET_NODE_DEBUG_INFO => this.get_node_debug_info(data)?,
+ uapi::BINDER_GET_NODE_INFO_FOR_REF => this.get_node_info_from_ref(data)?,
+ uapi::BINDER_VERSION => this.version(data)?,
+ uapi::BINDER_GET_FROZEN_INFO => get_frozen_status(data)?,
+ uapi::BINDER_GET_EXTENDED_ERROR => thread.get_extended_error(data)?,
+ _ => return Err(EINVAL),
+ }
+ Ok(())
+ }
+}
+
+/// The file operations supported by `Process`.
+impl Process {
+ pub(crate) fn open(ctx: ArcBorrow<'_, Context>, file: &File) -> Result<Arc<Process>> {
+ Self::new(ctx.into(), ARef::from(file.cred()))
+ }
+
+ pub(crate) fn release(this: Arc<Process>, _file: &File) {
+ let binderfs_file;
+ let should_schedule;
+ {
+ let mut inner = this.inner.lock();
+ should_schedule = inner.defer_work == 0;
+ inner.defer_work |= PROC_DEFER_RELEASE;
+ binderfs_file = inner.binderfs_file.take();
+ }
+
+ if should_schedule {
+ // Ignore failures to schedule to the workqueue. Those just mean that we're already
+ // scheduled for execution.
+ let _ = workqueue::system().enqueue(this);
+ }
+
+ drop(binderfs_file);
+ }
+
+ pub(crate) fn flush(this: ArcBorrow<'_, Process>) -> Result {
+ let should_schedule;
+ {
+ let mut inner = this.inner.lock();
+ should_schedule = inner.defer_work == 0;
+ inner.defer_work |= PROC_DEFER_FLUSH;
+ }
+
+ if should_schedule {
+ // Ignore failures to schedule to the workqueue. Those just mean that we're already
+ // scheduled for execution.
+ let _ = workqueue::system().enqueue(Arc::from(this));
+ }
+ Ok(())
+ }
+
+ pub(crate) fn ioctl(this: ArcBorrow<'_, Process>, file: &File, cmd: u32, arg: usize) -> Result {
+ use kernel::ioctl::{_IOC_DIR, _IOC_SIZE};
+ use kernel::uapi::{_IOC_READ, _IOC_WRITE};
+
+ crate::trace::trace_ioctl(cmd, arg);
+
+ let user_slice = UserSlice::new(UserPtr::from_addr(arg), _IOC_SIZE(cmd));
+
+ const _IOC_READ_WRITE: u32 = _IOC_READ | _IOC_WRITE;
+
+ match _IOC_DIR(cmd) {
+ _IOC_WRITE => Self::ioctl_write_only(this, file, cmd, &mut user_slice.reader()),
+ _IOC_READ_WRITE => Self::ioctl_write_read(this, file, cmd, user_slice),
+ _ => Err(EINVAL),
+ }
+ }
+
+ pub(crate) fn mmap(
+ this: ArcBorrow<'_, Process>,
+ _file: &File,
+ vma: &mm::virt::VmaNew,
+ ) -> Result {
+ // We don't allow mmap to be used in a different process.
+ if !core::ptr::eq(kernel::current!().group_leader(), &*this.task) {
+ return Err(EINVAL);
+ }
+ if vma.start() == 0 {
+ return Err(EINVAL);
+ }
+
+ vma.try_clear_maywrite().map_err(|_| EPERM)?;
+ vma.set_dontcopy();
+ vma.set_mixedmap();
+
+ // TODO: Set ops. We need to learn when the user unmaps so that we can stop using it.
+ this.create_mapping(vma)
+ }
+
+ pub(crate) fn poll(
+ this: ArcBorrow<'_, Process>,
+ file: &File,
+ table: PollTable<'_>,
+ ) -> Result<u32> {
+ let thread = this.get_current_thread()?;
+ let (from_proc, mut mask) = thread.poll(file, table);
+ if mask == 0 && from_proc && !this.inner.lock().work.is_empty() {
+ mask |= bindings::POLLIN;
+ }
+ Ok(mask)
+ }
+}
+
+/// Represents that a thread has registered with the `ready_threads` list of its process.
+///
+/// The destructor of this type will unregister the thread from the list of ready threads.
+pub(crate) struct Registration<'a> {
+ thread: &'a Arc<Thread>,
+}
+
+impl<'a> Registration<'a> {
+ fn new(thread: &'a Arc<Thread>, guard: &mut Guard<'_, ProcessInner, SpinLockBackend>) -> Self {
+ assert!(core::ptr::eq(&thread.process.inner, guard.lock_ref()));
+ // INVARIANT: We are pushing this thread to the right `ready_threads` list.
+ if let Ok(list_arc) = ListArc::try_from_arc(thread.clone()) {
+ guard.ready_threads.push_front(list_arc);
+ } else {
+ // It is an error to hit this branch, and it should not be reachable. We try to do
+ // something reasonable when the failure path happens. Most likely, the thread in
+ // question will sleep forever.
+ pr_err!("Same thread registered with `ready_threads` twice.");
+ }
+ Self { thread }
+ }
+}
+
+impl Drop for Registration<'_> {
+ fn drop(&mut self) {
+ let mut inner = self.thread.process.inner.lock();
+ // SAFETY: The thread has the invariant that we never push it to any other linked list than
+ // the `ready_threads` list of its parent process. Therefore, the thread is either in that
+ // list, or in no list.
+ unsafe { inner.ready_threads.remove(self.thread) };
+ }
+}
+
+pub(crate) struct WithNodes<'a> {
+ pub(crate) inner: Guard<'a, ProcessInner, SpinLockBackend>,
+ pub(crate) nodes: RBTree<u64, DArc<Node>>,
+}
+
+impl Drop for WithNodes<'_> {
+ fn drop(&mut self) {
+ core::mem::swap(&mut self.nodes, &mut self.inner.nodes);
+ if self.nodes.iter().next().is_some() {
+ pr_err!("nodes array was modified while using lock_with_nodes\n");
+ }
+ }
+}
+
+pub(crate) enum GetWorkOrRegister<'a> {
+ Work(DLArc<dyn DeliverToRead>),
+ Register(Registration<'a>),
+}
diff --git a/drivers/android/binder/range_alloc/array.rs b/drivers/android/binder/range_alloc/array.rs
new file mode 100644
index 000000000000..07e1dec2ce63
--- /dev/null
+++ b/drivers/android/binder/range_alloc/array.rs
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use kernel::{
+ page::{PAGE_MASK, PAGE_SIZE},
+ prelude::*,
+ seq_file::SeqFile,
+ seq_print,
+ task::Pid,
+};
+
+use crate::range_alloc::{DescriptorState, FreedRange, Range};
+
+/// Keeps track of allocations in a process' mmap.
+///
+/// Each process has an mmap where the data for incoming transactions will be placed. This struct
+/// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that
+/// has metadata related to the allocation. We also keep track of available free space.
+pub(super) struct ArrayRangeAllocator<T> {
+ /// This stores all ranges that are allocated. Unlike the tree based allocator, we do *not*
+ /// store the free ranges.
+ ///
+ /// Sorted by offset.
+ pub(super) ranges: KVec<Range<T>>,
+ size: usize,
+ free_oneway_space: usize,
+}
+
+struct FindEmptyRes {
+ /// Which index in `ranges` should we insert the new range at?
+ ///
+ /// Inserting the new range at this index keeps `ranges` sorted.
+ insert_at_idx: usize,
+ /// Which offset should we insert the new range at?
+ insert_at_offset: usize,
+}
+
+impl<T> ArrayRangeAllocator<T> {
+ pub(crate) fn new(size: usize, alloc: EmptyArrayAlloc<T>) -> Self {
+ Self {
+ ranges: alloc.ranges,
+ size,
+ free_oneway_space: size / 2,
+ }
+ }
+
+ pub(crate) fn free_oneway_space(&self) -> usize {
+ self.free_oneway_space
+ }
+
+ pub(crate) fn count_buffers(&self) -> usize {
+ self.ranges.len()
+ }
+
+ pub(crate) fn total_size(&self) -> usize {
+ self.size
+ }
+
+ pub(crate) fn is_full(&self) -> bool {
+ self.ranges.len() == self.ranges.capacity()
+ }
+
+ pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
+ for range in &self.ranges {
+ seq_print!(
+ m,
+ " buffer {}: {} size {} pid {} oneway {}",
+ 0,
+ range.offset,
+ range.size,
+ range.state.pid(),
+ range.state.is_oneway(),
+ );
+ if let DescriptorState::Reserved(_) = range.state {
+ seq_print!(m, " reserved\n");
+ } else {
+ seq_print!(m, " allocated\n");
+ }
+ }
+ Ok(())
+ }
+
+ /// Find somewhere to put a new range.
+ ///
+ /// Unlike the tree implementation, we do not bother to find the smallest gap. The idea is that
+ /// fragmentation isn't a big issue when we don't have many ranges.
+ ///
+ /// Returns the index that the new range should have in `self.ranges` after insertion.
+ fn find_empty_range(&self, size: usize) -> Option<FindEmptyRes> {
+ let after_last_range = self.ranges.last().map(Range::endpoint).unwrap_or(0);
+
+ if size <= self.total_size() - after_last_range {
+ // We can put the range at the end, so just do that.
+ Some(FindEmptyRes {
+ insert_at_idx: self.ranges.len(),
+ insert_at_offset: after_last_range,
+ })
+ } else {
+ let mut end_of_prev = 0;
+ for (i, range) in self.ranges.iter().enumerate() {
+ // Does it fit before the i'th range?
+ if size <= range.offset - end_of_prev {
+ return Some(FindEmptyRes {
+ insert_at_idx: i,
+ insert_at_offset: end_of_prev,
+ });
+ }
+ end_of_prev = range.endpoint();
+ }
+ None
+ }
+ }
+
+ pub(crate) fn reserve_new(
+ &mut self,
+ debug_id: usize,
+ size: usize,
+ is_oneway: bool,
+ pid: Pid,
+ ) -> Result<usize> {
+ // Compute new value of free_oneway_space, which is set only on success.
+ let new_oneway_space = if is_oneway {
+ match self.free_oneway_space.checked_sub(size) {
+ Some(new_oneway_space) => new_oneway_space,
+ None => return Err(ENOSPC),
+ }
+ } else {
+ self.free_oneway_space
+ };
+
+ let FindEmptyRes {
+ insert_at_idx,
+ insert_at_offset,
+ } = self.find_empty_range(size).ok_or(ENOSPC)?;
+ self.free_oneway_space = new_oneway_space;
+
+ let new_range = Range {
+ offset: insert_at_offset,
+ size,
+ state: DescriptorState::new(is_oneway, debug_id, pid),
+ };
+ // Insert the value at the given index to keep the array sorted.
+ self.ranges
+ .insert_within_capacity(insert_at_idx, new_range)
+ .ok()
+ .unwrap();
+
+ Ok(insert_at_offset)
+ }
+
+ pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
+ // This could use a binary search, but linear scans are usually faster for small arrays.
+ let i = self
+ .ranges
+ .iter()
+ .position(|range| range.offset == offset)
+ .ok_or(EINVAL)?;
+ let range = &self.ranges[i];
+
+ if let DescriptorState::Allocated(_) = range.state {
+ return Err(EPERM);
+ }
+
+ let size = range.size;
+ let offset = range.offset;
+
+ if range.state.is_oneway() {
+ self.free_oneway_space += size;
+ }
+
+ // This computes the range of pages that are no longer used by *any* allocated range. The
+ // caller will mark them as unused, which means that they can be freed if the system comes
+ // under memory pressure.
+ let mut freed_range = FreedRange::interior_pages(offset, size);
+ #[expect(clippy::collapsible_if)] // reads better like this
+ if offset % PAGE_SIZE != 0 {
+ if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) {
+ freed_range.start_page_idx -= 1;
+ }
+ }
+ if range.endpoint() % PAGE_SIZE != 0 {
+ let page_after = (range.endpoint() & PAGE_MASK) + PAGE_SIZE;
+ if i + 1 == self.ranges.len() || page_after <= self.ranges[i + 1].offset {
+ freed_range.end_page_idx += 1;
+ }
+ }
+
+ self.ranges.remove(i)?;
+ Ok(freed_range)
+ }
+
+ pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result {
+ // This could use a binary search, but linear scans are usually faster for small arrays.
+ let range = self
+ .ranges
+ .iter_mut()
+ .find(|range| range.offset == offset)
+ .ok_or(ENOENT)?;
+
+ let DescriptorState::Reserved(reservation) = &range.state else {
+ return Err(ENOENT);
+ };
+
+ range.state = DescriptorState::Allocated(reservation.clone().allocate(data.take()));
+ Ok(())
+ }
+
+ pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
+ // This could use a binary search, but linear scans are usually faster for small arrays.
+ let range = self
+ .ranges
+ .iter_mut()
+ .find(|range| range.offset == offset)
+ .ok_or(ENOENT)?;
+
+ let DescriptorState::Allocated(allocation) = &mut range.state else {
+ return Err(ENOENT);
+ };
+
+ let data = allocation.take();
+ let debug_id = allocation.reservation.debug_id;
+ range.state = DescriptorState::Reserved(allocation.reservation.clone());
+ Ok((range.size, debug_id, data))
+ }
+
+ pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
+ for range in self.ranges.iter_mut() {
+ if let DescriptorState::Allocated(allocation) = &mut range.state {
+ callback(
+ range.offset,
+ range.size,
+ allocation.reservation.debug_id,
+ allocation.data.take(),
+ );
+ }
+ }
+ }
+}
+
+pub(crate) struct EmptyArrayAlloc<T> {
+ ranges: KVec<Range<T>>,
+}
+
+impl<T> EmptyArrayAlloc<T> {
+ pub(crate) fn try_new(capacity: usize) -> Result<Self> {
+ Ok(Self {
+ ranges: KVec::with_capacity(capacity, GFP_KERNEL)?,
+ })
+ }
+}
diff --git a/drivers/android/binder/range_alloc/mod.rs b/drivers/android/binder/range_alloc/mod.rs
new file mode 100644
index 000000000000..2301e2bc1a1f
--- /dev/null
+++ b/drivers/android/binder/range_alloc/mod.rs
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use kernel::{page::PAGE_SIZE, prelude::*, seq_file::SeqFile, task::Pid};
+
+mod tree;
+use self::tree::{FromArrayAllocs, ReserveNewTreeAlloc, TreeRangeAllocator};
+
+mod array;
+use self::array::{ArrayRangeAllocator, EmptyArrayAlloc};
+
+enum DescriptorState<T> {
+ Reserved(Reservation),
+ Allocated(Allocation<T>),
+}
+
+impl<T> DescriptorState<T> {
+ fn new(is_oneway: bool, debug_id: usize, pid: Pid) -> Self {
+ DescriptorState::Reserved(Reservation {
+ debug_id,
+ is_oneway,
+ pid,
+ })
+ }
+
+ fn pid(&self) -> Pid {
+ match self {
+ DescriptorState::Reserved(inner) => inner.pid,
+ DescriptorState::Allocated(inner) => inner.reservation.pid,
+ }
+ }
+
+ fn is_oneway(&self) -> bool {
+ match self {
+ DescriptorState::Reserved(inner) => inner.is_oneway,
+ DescriptorState::Allocated(inner) => inner.reservation.is_oneway,
+ }
+ }
+}
+
+#[derive(Clone)]
+struct Reservation {
+ debug_id: usize,
+ is_oneway: bool,
+ pid: Pid,
+}
+
+impl Reservation {
+ fn allocate<T>(self, data: Option<T>) -> Allocation<T> {
+ Allocation {
+ data,
+ reservation: self,
+ }
+ }
+}
+
+struct Allocation<T> {
+ reservation: Reservation,
+ data: Option<T>,
+}
+
+impl<T> Allocation<T> {
+ fn deallocate(self) -> (Reservation, Option<T>) {
+ (self.reservation, self.data)
+ }
+
+ fn debug_id(&self) -> usize {
+ self.reservation.debug_id
+ }
+
+ fn take(&mut self) -> Option<T> {
+ self.data.take()
+ }
+}
+
+/// The array implementation must switch to the tree if it wants to go beyond this number of
+/// ranges.
+const TREE_THRESHOLD: usize = 8;
+
+/// Represents a range of pages that have just become completely free.
+#[derive(Copy, Clone)]
+pub(crate) struct FreedRange {
+ pub(crate) start_page_idx: usize,
+ pub(crate) end_page_idx: usize,
+}
+
+impl FreedRange {
+ fn interior_pages(offset: usize, size: usize) -> FreedRange {
+ FreedRange {
+ // Divide round up
+ start_page_idx: offset.div_ceil(PAGE_SIZE),
+ // Divide round down
+ end_page_idx: (offset + size) / PAGE_SIZE,
+ }
+ }
+}
+
+struct Range<T> {
+ offset: usize,
+ size: usize,
+ state: DescriptorState<T>,
+}
+
+impl<T> Range<T> {
+ fn endpoint(&self) -> usize {
+ self.offset + self.size
+ }
+}
+
+pub(crate) struct RangeAllocator<T> {
+ inner: Impl<T>,
+}
+
+enum Impl<T> {
+ Empty(usize),
+ Array(ArrayRangeAllocator<T>),
+ Tree(TreeRangeAllocator<T>),
+}
+
+impl<T> RangeAllocator<T> {
+ pub(crate) fn new(size: usize) -> Self {
+ Self {
+ inner: Impl::Empty(size),
+ }
+ }
+
+ pub(crate) fn free_oneway_space(&self) -> usize {
+ match &self.inner {
+ Impl::Empty(size) => size / 2,
+ Impl::Array(array) => array.free_oneway_space(),
+ Impl::Tree(tree) => tree.free_oneway_space(),
+ }
+ }
+
+ pub(crate) fn count_buffers(&self) -> usize {
+ match &self.inner {
+ Impl::Empty(_size) => 0,
+ Impl::Array(array) => array.count_buffers(),
+ Impl::Tree(tree) => tree.count_buffers(),
+ }
+ }
+
+ pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
+ match &self.inner {
+ Impl::Empty(_size) => Ok(()),
+ Impl::Array(array) => array.debug_print(m),
+ Impl::Tree(tree) => tree.debug_print(m),
+ }
+ }
+
+ /// Try to reserve a new buffer, using the provided allocation if necessary.
+ pub(crate) fn reserve_new(&mut self, mut args: ReserveNewArgs<T>) -> Result<ReserveNew<T>> {
+ match &mut self.inner {
+ Impl::Empty(size) => {
+ let empty_array = match args.empty_array_alloc.take() {
+ Some(empty_array) => ArrayRangeAllocator::new(*size, empty_array),
+ None => {
+ return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc {
+ args,
+ need_empty_array_alloc: true,
+ need_new_tree_alloc: false,
+ need_tree_alloc: false,
+ }))
+ }
+ };
+
+ self.inner = Impl::Array(empty_array);
+ self.reserve_new(args)
+ }
+ Impl::Array(array) if array.is_full() => {
+ let allocs = match args.new_tree_alloc {
+ Some(ref mut allocs) => allocs,
+ None => {
+ return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc {
+ args,
+ need_empty_array_alloc: false,
+ need_new_tree_alloc: true,
+ need_tree_alloc: true,
+ }))
+ }
+ };
+
+ let new_tree =
+ TreeRangeAllocator::from_array(array.total_size(), &mut array.ranges, allocs);
+
+ self.inner = Impl::Tree(new_tree);
+ self.reserve_new(args)
+ }
+ Impl::Array(array) => {
+ let offset =
+ array.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid)?;
+ Ok(ReserveNew::Success(ReserveNewSuccess {
+ offset,
+ oneway_spam_detected: false,
+ _empty_array_alloc: args.empty_array_alloc,
+ _new_tree_alloc: args.new_tree_alloc,
+ _tree_alloc: args.tree_alloc,
+ }))
+ }
+ Impl::Tree(tree) => {
+ let alloc = match args.tree_alloc {
+ Some(alloc) => alloc,
+ None => {
+ return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc {
+ args,
+ need_empty_array_alloc: false,
+ need_new_tree_alloc: false,
+ need_tree_alloc: true,
+ }));
+ }
+ };
+ let (offset, oneway_spam_detected) =
+ tree.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid, alloc)?;
+ Ok(ReserveNew::Success(ReserveNewSuccess {
+ offset,
+ oneway_spam_detected,
+ _empty_array_alloc: args.empty_array_alloc,
+ _new_tree_alloc: args.new_tree_alloc,
+ _tree_alloc: None,
+ }))
+ }
+ }
+ }
+
+ /// Deletes the allocations at `offset`.
+ pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
+ match &mut self.inner {
+ Impl::Empty(_size) => Err(EINVAL),
+ Impl::Array(array) => array.reservation_abort(offset),
+ Impl::Tree(tree) => {
+ let freed_range = tree.reservation_abort(offset)?;
+ if tree.is_empty() {
+ self.inner = Impl::Empty(tree.total_size());
+ }
+ Ok(freed_range)
+ }
+ }
+ }
+
+ /// Called when an allocation is no longer in use by the kernel.
+ ///
+ /// The value in `data` will be stored, if any. A mutable reference is used to avoid dropping
+ /// the `T` when an error is returned.
+ pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result {
+ match &mut self.inner {
+ Impl::Empty(_size) => Err(EINVAL),
+ Impl::Array(array) => array.reservation_commit(offset, data),
+ Impl::Tree(tree) => tree.reservation_commit(offset, data),
+ }
+ }
+
+ /// Called when the kernel starts using an allocation.
+ ///
+ /// Returns the size of the existing entry and the data associated with it.
+ pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
+ match &mut self.inner {
+ Impl::Empty(_size) => Err(EINVAL),
+ Impl::Array(array) => array.reserve_existing(offset),
+ Impl::Tree(tree) => tree.reserve_existing(offset),
+ }
+ }
+
+ /// Call the provided callback at every allocated region.
+ ///
+ /// This destroys the range allocator. Used only during shutdown.
+ pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
+ match &mut self.inner {
+ Impl::Empty(_size) => {}
+ Impl::Array(array) => array.take_for_each(callback),
+ Impl::Tree(tree) => tree.take_for_each(callback),
+ }
+ }
+}
+
+/// The arguments for `reserve_new`.
+#[derive(Default)]
+pub(crate) struct ReserveNewArgs<T> {
+ pub(crate) size: usize,
+ pub(crate) is_oneway: bool,
+ pub(crate) debug_id: usize,
+ pub(crate) pid: Pid,
+ pub(crate) empty_array_alloc: Option<EmptyArrayAlloc<T>>,
+ pub(crate) new_tree_alloc: Option<FromArrayAllocs<T>>,
+ pub(crate) tree_alloc: Option<ReserveNewTreeAlloc<T>>,
+}
+
+/// The return type of `ReserveNew`.
+pub(crate) enum ReserveNew<T> {
+ Success(ReserveNewSuccess<T>),
+ NeedAlloc(ReserveNewNeedAlloc<T>),
+}
+
+/// Returned by `reserve_new` when the reservation was successul.
+pub(crate) struct ReserveNewSuccess<T> {
+ pub(crate) offset: usize,
+ pub(crate) oneway_spam_detected: bool,
+
+ // If the user supplied an allocation that we did not end up using, then we return it here.
+ // The caller will kfree it outside of the lock.
+ _empty_array_alloc: Option<EmptyArrayAlloc<T>>,
+ _new_tree_alloc: Option<FromArrayAllocs<T>>,
+ _tree_alloc: Option<ReserveNewTreeAlloc<T>>,
+}
+
+/// Returned by `reserve_new` to request the caller to make an allocation before calling the method
+/// again.
+pub(crate) struct ReserveNewNeedAlloc<T> {
+ args: ReserveNewArgs<T>,
+ need_empty_array_alloc: bool,
+ need_new_tree_alloc: bool,
+ need_tree_alloc: bool,
+}
+
+impl<T> ReserveNewNeedAlloc<T> {
+ /// Make the necessary allocations for another call to `reserve_new`.
+ pub(crate) fn make_alloc(mut self) -> Result<ReserveNewArgs<T>> {
+ if self.need_empty_array_alloc && self.args.empty_array_alloc.is_none() {
+ self.args.empty_array_alloc = Some(EmptyArrayAlloc::try_new(TREE_THRESHOLD)?);
+ }
+ if self.need_new_tree_alloc && self.args.new_tree_alloc.is_none() {
+ self.args.new_tree_alloc = Some(FromArrayAllocs::try_new(TREE_THRESHOLD)?);
+ }
+ if self.need_tree_alloc && self.args.tree_alloc.is_none() {
+ self.args.tree_alloc = Some(ReserveNewTreeAlloc::try_new()?);
+ }
+ Ok(self.args)
+ }
+}
diff --git a/drivers/android/binder/range_alloc/tree.rs b/drivers/android/binder/range_alloc/tree.rs
new file mode 100644
index 000000000000..838fdd2b47ea
--- /dev/null
+++ b/drivers/android/binder/range_alloc/tree.rs
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use kernel::{
+ page::PAGE_SIZE,
+ prelude::*,
+ rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation},
+ seq_file::SeqFile,
+ seq_print,
+ task::Pid,
+};
+
+use crate::range_alloc::{DescriptorState, FreedRange, Range};
+
+/// Keeps track of allocations in a process' mmap.
+///
+/// Each process has an mmap where the data for incoming transactions will be placed. This struct
+/// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that
+/// has metadata related to the allocation. We also keep track of available free space.
+pub(super) struct TreeRangeAllocator<T> {
+ /// This collection contains descriptors for *both* ranges containing an allocation, *and* free
+ /// ranges between allocations. The free ranges get merged, so there are never two free ranges
+ /// next to each other.
+ tree: RBTree<usize, Descriptor<T>>,
+ /// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size,
+ /// letting us look up the smallest range whose size is at least some lower bound.
+ free_tree: RBTree<FreeKey, ()>,
+ size: usize,
+ free_oneway_space: usize,
+}
+
+impl<T> TreeRangeAllocator<T> {
+ pub(crate) fn from_array(
+ size: usize,
+ ranges: &mut KVec<Range<T>>,
+ alloc: &mut FromArrayAllocs<T>,
+ ) -> Self {
+ let mut tree = TreeRangeAllocator {
+ tree: RBTree::new(),
+ free_tree: RBTree::new(),
+ size,
+ free_oneway_space: size / 2,
+ };
+
+ let mut free_offset = 0;
+ for range in ranges.drain_all() {
+ let free_size = range.offset - free_offset;
+ if free_size > 0 {
+ let free_node = alloc.free_tree.pop().unwrap();
+ tree.free_tree
+ .insert(free_node.into_node((free_size, free_offset), ()));
+ let tree_node = alloc.tree.pop().unwrap();
+ tree.tree.insert(
+ tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)),
+ );
+ }
+ free_offset = range.endpoint();
+
+ if range.state.is_oneway() {
+ tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size);
+ }
+
+ let free_res = alloc.free_tree.pop().unwrap();
+ let tree_node = alloc.tree.pop().unwrap();
+ let mut desc = Descriptor::new(range.offset, range.size);
+ desc.state = Some((range.state, free_res));
+ tree.tree.insert(tree_node.into_node(range.offset, desc));
+ }
+
+ // After the last range, we may need a free range.
+ if free_offset < size {
+ let free_size = size - free_offset;
+ let free_node = alloc.free_tree.pop().unwrap();
+ tree.free_tree
+ .insert(free_node.into_node((free_size, free_offset), ()));
+ let tree_node = alloc.tree.pop().unwrap();
+ tree.tree
+ .insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)));
+ }
+
+ tree
+ }
+
+ pub(crate) fn is_empty(&self) -> bool {
+ let mut tree_iter = self.tree.values();
+ // There's always at least one range, because index zero is either the start of a free or
+ // allocated range.
+ let first_value = tree_iter.next().unwrap();
+ if tree_iter.next().is_some() {
+ // There are never two free ranges next to each other, so if there is more than one
+ // descriptor, then at least one of them must hold an allocated range.
+ return false;
+ }
+ // There is only one descriptor. Return true if it is for a free range.
+ first_value.state.is_none()
+ }
+
+ pub(crate) fn total_size(&self) -> usize {
+ self.size
+ }
+
+ pub(crate) fn free_oneway_space(&self) -> usize {
+ self.free_oneway_space
+ }
+
+ pub(crate) fn count_buffers(&self) -> usize {
+ self.tree
+ .values()
+ .filter(|desc| desc.state.is_some())
+ .count()
+ }
+
+ pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
+ for desc in self.tree.values() {
+ let state = match &desc.state {
+ Some(state) => &state.0,
+ None => continue,
+ };
+ seq_print!(
+ m,
+ " buffer: {} size {} pid {}",
+ desc.offset,
+ desc.size,
+ state.pid(),
+ );
+ if state.is_oneway() {
+ seq_print!(m, " oneway");
+ }
+ match state {
+ DescriptorState::Reserved(_res) => {
+ seq_print!(m, " reserved\n");
+ }
+ DescriptorState::Allocated(_alloc) => {
+ seq_print!(m, " allocated\n");
+ }
+ }
+ }
+ Ok(())
+ }
+
+ fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> {
+ let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?;
+ let ((_, offset), ()) = free_cursor.current();
+ self.tree.get_mut(offset)
+ }
+
+ /// Try to reserve a new buffer, using the provided allocation if necessary.
+ pub(crate) fn reserve_new(
+ &mut self,
+ debug_id: usize,
+ size: usize,
+ is_oneway: bool,
+ pid: Pid,
+ alloc: ReserveNewTreeAlloc<T>,
+ ) -> Result<(usize, bool)> {
+ // Compute new value of free_oneway_space, which is set only on success.
+ let new_oneway_space = if is_oneway {
+ match self.free_oneway_space.checked_sub(size) {
+ Some(new_oneway_space) => new_oneway_space,
+ None => return Err(ENOSPC),
+ }
+ } else {
+ self.free_oneway_space
+ };
+
+ // Start detecting spammers once we have less than 20%
+ // of async space left (which is less than 10% of total
+ // buffer size).
+ //
+ // (This will short-circut, so `low_oneway_space` is
+ // only called when necessary.)
+ let oneway_spam_detected =
+ is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid);
+
+ let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) {
+ None => {
+ pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size);
+ return Err(ENOSPC);
+ }
+ Some(desc) => {
+ let found_size = desc.size;
+ let found_offset = desc.offset;
+
+ // In case we need to break up the descriptor
+ let new_desc = Descriptor::new(found_offset + size, found_size - size);
+ let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc);
+
+ desc.state = Some((
+ DescriptorState::new(is_oneway, debug_id, pid),
+ desc_node_res,
+ ));
+ desc.size = size;
+
+ (found_size, found_offset, tree_node, free_tree_node)
+ }
+ };
+ self.free_oneway_space = new_oneway_space;
+ self.free_tree.remove(&(found_size, found_off));
+
+ if found_size != size {
+ self.tree.insert(tree_node);
+ self.free_tree.insert(free_tree_node);
+ }
+
+ Ok((found_off, oneway_spam_detected))
+ }
+
+ pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
+ let mut cursor = self.tree.cursor_lower_bound_mut(&offset).ok_or_else(|| {
+ pr_warn!(
+ "EINVAL from range_alloc.reservation_abort - offset: {}",
+ offset
+ );
+ EINVAL
+ })?;
+
+ let (_, desc) = cursor.current_mut();
+
+ if desc.offset != offset {
+ pr_warn!(
+ "EINVAL from range_alloc.reservation_abort - offset: {}",
+ offset
+ );
+ return Err(EINVAL);
+ }
+
+ let (reservation, free_node_res) = desc.try_change_state(|state| match state {
+ Some((DescriptorState::Reserved(reservation), free_node_res)) => {
+ (None, Ok((reservation, free_node_res)))
+ }
+ None => {
+ pr_warn!(
+ "EINVAL from range_alloc.reservation_abort - offset: {}",
+ offset
+ );
+ (None, Err(EINVAL))
+ }
+ allocated => {
+ pr_warn!(
+ "EPERM from range_alloc.reservation_abort - offset: {}",
+ offset
+ );
+ (allocated, Err(EPERM))
+ }
+ })?;
+
+ let mut size = desc.size;
+ let mut offset = desc.offset;
+ let free_oneway_space_add = if reservation.is_oneway { size } else { 0 };
+
+ self.free_oneway_space += free_oneway_space_add;
+
+ let mut freed_range = FreedRange::interior_pages(offset, size);
+ // Compute how large the next free region needs to be to include one more page in
+ // the newly freed range.
+ let add_next_page_needed = match (offset + size) % PAGE_SIZE {
+ 0 => usize::MAX,
+ unalign => PAGE_SIZE - unalign,
+ };
+ // Compute how large the previous free region needs to be to include one more page
+ // in the newly freed range.
+ let add_prev_page_needed = match offset % PAGE_SIZE {
+ 0 => usize::MAX,
+ unalign => unalign,
+ };
+
+ // Merge next into current if next is free
+ let remove_next = match cursor.peek_next() {
+ Some((_, next)) if next.state.is_none() => {
+ if next.size >= add_next_page_needed {
+ freed_range.end_page_idx += 1;
+ }
+ self.free_tree.remove(&(next.size, next.offset));
+ size += next.size;
+ true
+ }
+ _ => false,
+ };
+
+ if remove_next {
+ let (_, desc) = cursor.current_mut();
+ desc.size = size;
+ cursor.remove_next();
+ }
+
+ // Merge current into prev if prev is free
+ match cursor.peek_prev_mut() {
+ Some((_, prev)) if prev.state.is_none() => {
+ if prev.size >= add_prev_page_needed {
+ freed_range.start_page_idx -= 1;
+ }
+ // merge previous with current, remove current
+ self.free_tree.remove(&(prev.size, prev.offset));
+ offset = prev.offset;
+ size += prev.size;
+ prev.size = size;
+ cursor.remove_current();
+ }
+ _ => {}
+ };
+
+ self.free_tree
+ .insert(free_node_res.into_node((size, offset), ()));
+
+ Ok(freed_range)
+ }
+
+ pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result {
+ let desc = self.tree.get_mut(&offset).ok_or(ENOENT)?;
+
+ desc.try_change_state(|state| match state {
+ Some((DescriptorState::Reserved(reservation), free_node_res)) => (
+ Some((
+ DescriptorState::Allocated(reservation.allocate(data.take())),
+ free_node_res,
+ )),
+ Ok(()),
+ ),
+ other => (other, Err(ENOENT)),
+ })
+ }
+
+ /// Takes an entry at the given offset from [`DescriptorState::Allocated`] to
+ /// [`DescriptorState::Reserved`].
+ ///
+ /// Returns the size of the existing entry and the data associated with it.
+ pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
+ let desc = self.tree.get_mut(&offset).ok_or_else(|| {
+ pr_warn!(
+ "ENOENT from range_alloc.reserve_existing - offset: {}",
+ offset
+ );
+ ENOENT
+ })?;
+
+ let (debug_id, data) = desc.try_change_state(|state| match state {
+ Some((DescriptorState::Allocated(allocation), free_node_res)) => {
+ let (reservation, data) = allocation.deallocate();
+ let debug_id = reservation.debug_id;
+ (
+ Some((DescriptorState::Reserved(reservation), free_node_res)),
+ Ok((debug_id, data)),
+ )
+ }
+ other => {
+ pr_warn!(
+ "ENOENT from range_alloc.reserve_existing - offset: {}",
+ offset
+ );
+ (other, Err(ENOENT))
+ }
+ })?;
+
+ Ok((desc.size, debug_id, data))
+ }
+
+ /// Call the provided callback at every allocated region.
+ ///
+ /// This destroys the range allocator. Used only during shutdown.
+ pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
+ for (_, desc) in self.tree.iter_mut() {
+ if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state {
+ callback(
+ desc.offset,
+ desc.size,
+ allocation.debug_id(),
+ allocation.take(),
+ );
+ }
+ }
+ }
+
+ /// Find the amount and size of buffers allocated by the current caller.
+ ///
+ /// The idea is that once we cross the threshold, whoever is responsible
+ /// for the low async space is likely to try to send another async transaction,
+ /// and at some point we'll catch them in the act. This is more efficient
+ /// than keeping a map per pid.
+ fn low_oneway_space(&self, calling_pid: Pid) -> bool {
+ let mut total_alloc_size = 0;
+ let mut num_buffers = 0;
+ for (_, desc) in self.tree.iter() {
+ if let Some((state, _)) = &desc.state {
+ if state.is_oneway() && state.pid() == calling_pid {
+ total_alloc_size += desc.size;
+ num_buffers += 1;
+ }
+ }
+ }
+
+ // Warn if this pid has more than 50 transactions, or more than 50% of
+ // async space (which is 25% of total buffer size). Oneway spam is only
+ // detected when the threshold is exceeded.
+ num_buffers > 50 || total_alloc_size > self.size / 4
+ }
+}
+
+type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes);
+struct Descriptor<T> {
+ size: usize,
+ offset: usize,
+ state: Option<TreeDescriptorState<T>>,
+}
+
+impl<T> Descriptor<T> {
+ fn new(offset: usize, size: usize) -> Self {
+ Self {
+ size,
+ offset,
+ state: None,
+ }
+ }
+
+ fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data>
+ where
+ F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>),
+ {
+ let (new_state, result) = f(self.state.take());
+ self.state = new_state;
+ result
+ }
+}
+
+// (Descriptor.size, Descriptor.offset)
+type FreeKey = (usize, usize);
+type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>;
+
+/// An allocation for use by `reserve_new`.
+pub(crate) struct ReserveNewTreeAlloc<T> {
+ tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>,
+ free_tree_node_res: FreeNodeRes,
+ desc_node_res: FreeNodeRes,
+}
+
+impl<T> ReserveNewTreeAlloc<T> {
+ pub(crate) fn try_new() -> Result<Self> {
+ let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
+ let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
+ let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
+ Ok(Self {
+ tree_node_res,
+ free_tree_node_res,
+ desc_node_res,
+ })
+ }
+
+ fn initialize(
+ self,
+ desc: Descriptor<T>,
+ ) -> (
+ RBTreeNode<usize, Descriptor<T>>,
+ RBTreeNode<FreeKey, ()>,
+ FreeNodeRes,
+ ) {
+ let size = desc.size;
+ let offset = desc.offset;
+ (
+ self.tree_node_res.into_node(offset, desc),
+ self.free_tree_node_res.into_node((size, offset), ()),
+ self.desc_node_res,
+ )
+ }
+}
+
+/// An allocation for creating a tree from an `ArrayRangeAllocator`.
+pub(crate) struct FromArrayAllocs<T> {
+ tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>,
+ free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>,
+}
+
+impl<T> FromArrayAllocs<T> {
+ pub(crate) fn try_new(len: usize) -> Result<Self> {
+ let num_descriptors = 2 * len + 1;
+
+ let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
+ for _ in 0..num_descriptors {
+ tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
+ }
+
+ let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
+ for _ in 0..num_descriptors {
+ free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
+ }
+
+ Ok(Self { tree, free_tree })
+ }
+}
diff --git a/drivers/android/binder/rust_binder.h b/drivers/android/binder/rust_binder.h
new file mode 100644
index 000000000000..31806890ed1a
--- /dev/null
+++ b/drivers/android/binder/rust_binder.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025 Google, Inc.
+ */
+
+#ifndef _LINUX_RUST_BINDER_H
+#define _LINUX_RUST_BINDER_H
+
+#include <uapi/linux/android/binder.h>
+#include <uapi/linux/android/binderfs.h>
+
+/*
+ * These symbols are exposed by `rust_binderfs.c` and exist here so that Rust
+ * Binder can call them.
+ */
+int init_rust_binderfs(void);
+
+struct dentry;
+struct inode;
+struct dentry *rust_binderfs_create_proc_file(struct inode *nodp, int pid);
+void rust_binderfs_remove_file(struct dentry *dentry);
+
+#endif
diff --git a/drivers/android/binder/rust_binder_events.c b/drivers/android/binder/rust_binder_events.c
new file mode 100644
index 000000000000..488b1470060c
--- /dev/null
+++ b/drivers/android/binder/rust_binder_events.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* rust_binder_events.c
+ *
+ * Rust Binder tracepoints.
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "rust_binder.h"
+
+const char * const binder_command_strings[] = {
+ "BC_TRANSACTION",
+ "BC_REPLY",
+ "BC_ACQUIRE_RESULT",
+ "BC_FREE_BUFFER",
+ "BC_INCREFS",
+ "BC_ACQUIRE",
+ "BC_RELEASE",
+ "BC_DECREFS",
+ "BC_INCREFS_DONE",
+ "BC_ACQUIRE_DONE",
+ "BC_ATTEMPT_ACQUIRE",
+ "BC_REGISTER_LOOPER",
+ "BC_ENTER_LOOPER",
+ "BC_EXIT_LOOPER",
+ "BC_REQUEST_DEATH_NOTIFICATION",
+ "BC_CLEAR_DEATH_NOTIFICATION",
+ "BC_DEAD_BINDER_DONE",
+ "BC_TRANSACTION_SG",
+ "BC_REPLY_SG",
+};
+
+const char * const binder_return_strings[] = {
+ "BR_ERROR",
+ "BR_OK",
+ "BR_TRANSACTION",
+ "BR_REPLY",
+ "BR_ACQUIRE_RESULT",
+ "BR_DEAD_REPLY",
+ "BR_TRANSACTION_COMPLETE",
+ "BR_INCREFS",
+ "BR_ACQUIRE",
+ "BR_RELEASE",
+ "BR_DECREFS",
+ "BR_ATTEMPT_ACQUIRE",
+ "BR_NOOP",
+ "BR_SPAWN_LOOPER",
+ "BR_FINISHED",
+ "BR_DEAD_BINDER",
+ "BR_CLEAR_DEATH_NOTIFICATION_DONE",
+ "BR_FAILED_REPLY",
+ "BR_FROZEN_REPLY",
+ "BR_ONEWAY_SPAM_SUSPECT",
+ "BR_TRANSACTION_PENDING_FROZEN"
+};
+
+#define CREATE_TRACE_POINTS
+#define CREATE_RUST_TRACE_POINTS
+#include "rust_binder_events.h"
diff --git a/drivers/android/binder/rust_binder_events.h b/drivers/android/binder/rust_binder_events.h
new file mode 100644
index 000000000000..2f3efbf9dba6
--- /dev/null
+++ b/drivers/android/binder/rust_binder_events.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2025 Google, Inc.
+ */
+
+#undef TRACE_SYSTEM
+#undef TRACE_INCLUDE_FILE
+#undef TRACE_INCLUDE_PATH
+#define TRACE_SYSTEM rust_binder
+#define TRACE_INCLUDE_FILE rust_binder_events
+#define TRACE_INCLUDE_PATH ../drivers/android/binder
+
+#if !defined(_RUST_BINDER_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _RUST_BINDER_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(rust_binder_ioctl,
+ TP_PROTO(unsigned int cmd, unsigned long arg),
+ TP_ARGS(cmd, arg),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cmd)
+ __field(unsigned long, arg)
+ ),
+ TP_fast_assign(
+ __entry->cmd = cmd;
+ __entry->arg = arg;
+ ),
+ TP_printk("cmd=0x%x arg=0x%lx", __entry->cmd, __entry->arg)
+);
+
+#endif /* _RUST_BINDER_TRACE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/android/binder/rust_binder_internal.h b/drivers/android/binder/rust_binder_internal.h
new file mode 100644
index 000000000000..78288fe7964d
--- /dev/null
+++ b/drivers/android/binder/rust_binder_internal.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* rust_binder_internal.h
+ *
+ * This file contains internal data structures used by Rust Binder. Mostly,
+ * these are type definitions used only by binderfs or things that Rust Binder
+ * define and export to binderfs.
+ *
+ * It does not include things exported by binderfs to Rust Binder since this
+ * file is not included as input to bindgen.
+ *
+ * Copyright (C) 2025 Google LLC.
+ */
+
+#ifndef _LINUX_RUST_BINDER_INTERNAL_H
+#define _LINUX_RUST_BINDER_INTERNAL_H
+
+#define RUST_BINDERFS_SUPER_MAGIC 0x6c6f6f71
+
+#include <linux/seq_file.h>
+#include <uapi/linux/android/binder.h>
+#include <uapi/linux/android/binderfs.h>
+
+/*
+ * The internal data types in the Rust Binder driver are opaque to C, so we use
+ * void pointer typedefs for these types.
+ */
+typedef void *rust_binder_context;
+
+/**
+ * struct binder_device - information about a binder device node
+ * @minor: the minor number used by this device
+ * @ctx: the Rust Context used by this device, or null for binder-control
+ *
+ * This is used as the private data for files directly in binderfs, but not
+ * files in the binder_logs subdirectory. This struct owns a refcount on `ctx`
+ * and the entry for `minor` in `binderfs_minors`. For binder-control `ctx` is
+ * null.
+ */
+struct binder_device {
+ int minor;
+ rust_binder_context ctx;
+};
+
+int rust_binder_stats_show(struct seq_file *m, void *unused);
+int rust_binder_state_show(struct seq_file *m, void *unused);
+int rust_binder_transactions_show(struct seq_file *m, void *unused);
+int rust_binder_proc_show(struct seq_file *m, void *pid);
+
+extern const struct file_operations rust_binder_fops;
+rust_binder_context rust_binder_new_context(char *name);
+void rust_binder_remove_context(rust_binder_context device);
+
+/**
+ * binderfs_mount_opts - mount options for binderfs
+ * @max: maximum number of allocatable binderfs binder devices
+ * @stats_mode: enable binder stats in binderfs.
+ */
+struct binderfs_mount_opts {
+ int max;
+ int stats_mode;
+};
+
+/**
+ * binderfs_info - information about a binderfs mount
+ * @ipc_ns: The ipc namespace the binderfs mount belongs to.
+ * @control_dentry: This records the dentry of this binderfs mount
+ * binder-control device.
+ * @root_uid: uid that needs to be used when a new binder device is
+ * created.
+ * @root_gid: gid that needs to be used when a new binder device is
+ * created.
+ * @mount_opts: The mount options in use.
+ * @device_count: The current number of allocated binder devices.
+ * @proc_log_dir: Pointer to the directory dentry containing process-specific
+ * logs.
+ */
+struct binderfs_info {
+ struct ipc_namespace *ipc_ns;
+ struct dentry *control_dentry;
+ kuid_t root_uid;
+ kgid_t root_gid;
+ struct binderfs_mount_opts mount_opts;
+ int device_count;
+ struct dentry *proc_log_dir;
+};
+
+#endif /* _LINUX_RUST_BINDER_INTERNAL_H */
diff --git a/drivers/android/binder/rust_binder_main.rs b/drivers/android/binder/rust_binder_main.rs
new file mode 100644
index 000000000000..c79a9e742240
--- /dev/null
+++ b/drivers/android/binder/rust_binder_main.rs
@@ -0,0 +1,611 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! Binder -- the Android IPC mechanism.
+#![recursion_limit = "256"]
+#![allow(
+ clippy::as_underscore,
+ clippy::ref_as_ptr,
+ clippy::ptr_as_ptr,
+ clippy::cast_lossless
+)]
+
+use kernel::{
+ bindings::{self, seq_file},
+ fs::File,
+ list::{ListArc, ListArcSafe, ListLinksSelfPtr, TryNewListArc},
+ prelude::*,
+ seq_file::SeqFile,
+ seq_print,
+ sync::poll::PollTable,
+ sync::Arc,
+ task::Pid,
+ transmute::AsBytes,
+ types::ForeignOwnable,
+ uaccess::UserSliceWriter,
+};
+
+use crate::{context::Context, page_range::Shrinker, process::Process, thread::Thread};
+
+use core::{
+ ptr::NonNull,
+ sync::atomic::{AtomicBool, AtomicUsize, Ordering},
+};
+
+mod allocation;
+mod context;
+mod deferred_close;
+mod defs;
+mod error;
+mod node;
+mod page_range;
+mod process;
+mod range_alloc;
+mod stats;
+mod thread;
+mod trace;
+mod transaction;
+
+#[allow(warnings)] // generated bindgen code
+mod binderfs {
+ use kernel::bindings::{dentry, inode};
+
+ extern "C" {
+ pub fn init_rust_binderfs() -> kernel::ffi::c_int;
+ }
+ extern "C" {
+ pub fn rust_binderfs_create_proc_file(
+ nodp: *mut inode,
+ pid: kernel::ffi::c_int,
+ ) -> *mut dentry;
+ }
+ extern "C" {
+ pub fn rust_binderfs_remove_file(dentry: *mut dentry);
+ }
+ pub type rust_binder_context = *mut kernel::ffi::c_void;
+ #[repr(C)]
+ #[derive(Copy, Clone)]
+ pub struct binder_device {
+ pub minor: kernel::ffi::c_int,
+ pub ctx: rust_binder_context,
+ }
+ impl Default for binder_device {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+ }
+}
+
+module! {
+ type: BinderModule,
+ name: "rust_binder",
+ authors: ["Wedson Almeida Filho", "Alice Ryhl"],
+ description: "Android Binder",
+ license: "GPL",
+}
+
+fn next_debug_id() -> usize {
+ static NEXT_DEBUG_ID: AtomicUsize = AtomicUsize::new(0);
+
+ NEXT_DEBUG_ID.fetch_add(1, Ordering::Relaxed)
+}
+
+/// Provides a single place to write Binder return values via the
+/// supplied `UserSliceWriter`.
+pub(crate) struct BinderReturnWriter<'a> {
+ writer: UserSliceWriter,
+ thread: &'a Thread,
+}
+
+impl<'a> BinderReturnWriter<'a> {
+ fn new(writer: UserSliceWriter, thread: &'a Thread) -> Self {
+ BinderReturnWriter { writer, thread }
+ }
+
+ /// Write a return code back to user space.
+ /// Should be a `BR_` constant from [`defs`] e.g. [`defs::BR_TRANSACTION_COMPLETE`].
+ fn write_code(&mut self, code: u32) -> Result {
+ stats::GLOBAL_STATS.inc_br(code);
+ self.thread.process.stats.inc_br(code);
+ self.writer.write(&code)
+ }
+
+ /// Write something *other than* a return code to user space.
+ fn write_payload<T: AsBytes>(&mut self, payload: &T) -> Result {
+ self.writer.write(payload)
+ }
+
+ fn len(&self) -> usize {
+ self.writer.len()
+ }
+}
+
+/// Specifies how a type should be delivered to the read part of a BINDER_WRITE_READ ioctl.
+///
+/// When a value is pushed to the todo list for a process or thread, it is stored as a trait object
+/// with the type `Arc<dyn DeliverToRead>`. Trait objects are a Rust feature that lets you
+/// implement dynamic dispatch over many different types. This lets us store many different types
+/// in the todo list.
+trait DeliverToRead: ListArcSafe + Send + Sync {
+ /// Performs work. Returns true if remaining work items in the queue should be processed
+ /// immediately, or false if it should return to caller before processing additional work
+ /// items.
+ fn do_work(
+ self: DArc<Self>,
+ thread: &Thread,
+ writer: &mut BinderReturnWriter<'_>,
+ ) -> Result<bool>;
+
+ /// Cancels the given work item. This is called instead of [`DeliverToRead::do_work`] when work
+ /// won't be delivered.
+ fn cancel(self: DArc<Self>);
+
+ /// Should we use `wake_up_interruptible_sync` or `wake_up_interruptible` when scheduling this
+ /// work item?
+ ///
+ /// Generally only set to true for non-oneway transactions.
+ fn should_sync_wakeup(&self) -> bool;
+
+ fn debug_print(&self, m: &SeqFile, prefix: &str, transaction_prefix: &str) -> Result<()>;
+}
+
+// Wrapper around a `DeliverToRead` with linked list links.
+#[pin_data]
+struct DTRWrap<T: ?Sized> {
+ #[pin]
+ links: ListLinksSelfPtr<DTRWrap<dyn DeliverToRead>>,
+ #[pin]
+ wrapped: T,
+}
+kernel::list::impl_list_arc_safe! {
+ impl{T: ListArcSafe + ?Sized} ListArcSafe<0> for DTRWrap<T> {
+ tracked_by wrapped: T;
+ }
+}
+kernel::list::impl_list_item! {
+ impl ListItem<0> for DTRWrap<dyn DeliverToRead> {
+ using ListLinksSelfPtr { self.links };
+ }
+}
+
+impl<T: ?Sized> core::ops::Deref for DTRWrap<T> {
+ type Target = T;
+ fn deref(&self) -> &T {
+ &self.wrapped
+ }
+}
+
+type DArc<T> = kernel::sync::Arc<DTRWrap<T>>;
+type DLArc<T> = kernel::list::ListArc<DTRWrap<T>>;
+
+impl<T: ListArcSafe> DTRWrap<T> {
+ fn new(val: impl PinInit<T>) -> impl PinInit<Self> {
+ pin_init!(Self {
+ links <- ListLinksSelfPtr::new(),
+ wrapped <- val,
+ })
+ }
+
+ fn arc_try_new(val: T) -> Result<DLArc<T>, kernel::alloc::AllocError> {
+ ListArc::pin_init(
+ try_pin_init!(Self {
+ links <- ListLinksSelfPtr::new(),
+ wrapped: val,
+ }),
+ GFP_KERNEL,
+ )
+ .map_err(|_| kernel::alloc::AllocError)
+ }
+
+ fn arc_pin_init(init: impl PinInit<T>) -> Result<DLArc<T>, kernel::error::Error> {
+ ListArc::pin_init(
+ try_pin_init!(Self {
+ links <- ListLinksSelfPtr::new(),
+ wrapped <- init,
+ }),
+ GFP_KERNEL,
+ )
+ }
+}
+
+struct DeliverCode {
+ code: u32,
+ skip: AtomicBool,
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for DeliverCode { untracked; }
+}
+
+impl DeliverCode {
+ fn new(code: u32) -> Self {
+ Self {
+ code,
+ skip: AtomicBool::new(false),
+ }
+ }
+
+ /// Disable this DeliverCode and make it do nothing.
+ ///
+ /// This is used instead of removing it from the work list, since `LinkedList::remove` is
+ /// unsafe, whereas this method is not.
+ fn skip(&self) {
+ self.skip.store(true, Ordering::Relaxed);
+ }
+}
+
+impl DeliverToRead for DeliverCode {
+ fn do_work(
+ self: DArc<Self>,
+ _thread: &Thread,
+ writer: &mut BinderReturnWriter<'_>,
+ ) -> Result<bool> {
+ if !self.skip.load(Ordering::Relaxed) {
+ writer.write_code(self.code)?;
+ }
+ Ok(true)
+ }
+
+ fn cancel(self: DArc<Self>) {}
+
+ fn should_sync_wakeup(&self) -> bool {
+ false
+ }
+
+ fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> {
+ seq_print!(m, "{}", prefix);
+ if self.skip.load(Ordering::Relaxed) {
+ seq_print!(m, "(skipped) ");
+ }
+ if self.code == defs::BR_TRANSACTION_COMPLETE {
+ seq_print!(m, "transaction complete\n");
+ } else {
+ seq_print!(m, "transaction error: {}\n", self.code);
+ }
+ Ok(())
+ }
+}
+
+fn ptr_align(value: usize) -> Option<usize> {
+ let size = core::mem::size_of::<usize>() - 1;
+ Some(value.checked_add(size)? & !size)
+}
+
+// SAFETY: We call register in `init`.
+static BINDER_SHRINKER: Shrinker = unsafe { Shrinker::new() };
+
+struct BinderModule {}
+
+impl kernel::Module for BinderModule {
+ fn init(_module: &'static kernel::ThisModule) -> Result<Self> {
+ // SAFETY: The module initializer never runs twice, so we only call this once.
+ unsafe { crate::context::CONTEXTS.init() };
+
+ pr_warn!("Loaded Rust Binder.");
+
+ BINDER_SHRINKER.register(kernel::c_str!("android-binder"))?;
+
+ // SAFETY: The module is being loaded, so we can initialize binderfs.
+ unsafe { kernel::error::to_result(binderfs::init_rust_binderfs())? };
+
+ Ok(Self {})
+ }
+}
+
+/// Makes the inner type Sync.
+#[repr(transparent)]
+pub struct AssertSync<T>(T);
+// SAFETY: Used only to insert `file_operations` into a global, which is safe.
+unsafe impl<T> Sync for AssertSync<T> {}
+
+/// File operations that rust_binderfs.c can use.
+#[no_mangle]
+#[used]
+pub static rust_binder_fops: AssertSync<kernel::bindings::file_operations> = {
+ // SAFETY: All zeroes is safe for the `file_operations` type.
+ let zeroed_ops = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+ let ops = kernel::bindings::file_operations {
+ owner: THIS_MODULE.as_ptr(),
+ poll: Some(rust_binder_poll),
+ unlocked_ioctl: Some(rust_binder_ioctl),
+ compat_ioctl: Some(bindings::compat_ptr_ioctl),
+ mmap: Some(rust_binder_mmap),
+ open: Some(rust_binder_open),
+ release: Some(rust_binder_release),
+ flush: Some(rust_binder_flush),
+ ..zeroed_ops
+ };
+ AssertSync(ops)
+};
+
+/// # Safety
+/// Only called by binderfs.
+#[no_mangle]
+unsafe extern "C" fn rust_binder_new_context(
+ name: *const kernel::ffi::c_char,
+) -> *mut kernel::ffi::c_void {
+ // SAFETY: The caller will always provide a valid c string here.
+ let name = unsafe { kernel::str::CStr::from_char_ptr(name) };
+ match Context::new(name) {
+ Ok(ctx) => Arc::into_foreign(ctx),
+ Err(_err) => core::ptr::null_mut(),
+ }
+}
+
+/// # Safety
+/// Only called by binderfs.
+#[no_mangle]
+unsafe extern "C" fn rust_binder_remove_context(device: *mut kernel::ffi::c_void) {
+ if !device.is_null() {
+ // SAFETY: The caller ensures that the `device` pointer came from a previous call to
+ // `rust_binder_new_device`.
+ let ctx = unsafe { Arc::<Context>::from_foreign(device) };
+ ctx.deregister();
+ drop(ctx);
+ }
+}
+
+/// # Safety
+/// Only called by binderfs.
+unsafe extern "C" fn rust_binder_open(
+ inode: *mut bindings::inode,
+ file_ptr: *mut bindings::file,
+) -> kernel::ffi::c_int {
+ // SAFETY: The `rust_binderfs.c` file ensures that `i_private` is set to a
+ // `struct binder_device`.
+ let device = unsafe { (*inode).i_private } as *const binderfs::binder_device;
+
+ assert!(!device.is_null());
+
+ // SAFETY: The `rust_binderfs.c` file ensures that `device->ctx` holds a binder context when
+ // using the rust binder fops.
+ let ctx = unsafe { Arc::<Context>::borrow((*device).ctx) };
+
+ // SAFETY: The caller provides a valid file pointer to a new `struct file`.
+ let file = unsafe { File::from_raw_file(file_ptr) };
+ let process = match Process::open(ctx, file) {
+ Ok(process) => process,
+ Err(err) => return err.to_errno(),
+ };
+
+ // SAFETY: This is an `inode` for a newly created binder file.
+ match unsafe { BinderfsProcFile::new(inode, process.task.pid()) } {
+ Ok(Some(file)) => process.inner.lock().binderfs_file = Some(file),
+ Ok(None) => { /* pid already exists */ }
+ Err(err) => return err.to_errno(),
+ }
+
+ // SAFETY: This file is associated with Rust binder, so we own the `private_data` field.
+ unsafe { (*file_ptr).private_data = process.into_foreign() };
+ 0
+}
+
+/// # Safety
+/// Only called by binderfs.
+unsafe extern "C" fn rust_binder_release(
+ _inode: *mut bindings::inode,
+ file: *mut bindings::file,
+) -> kernel::ffi::c_int {
+ // SAFETY: We previously set `private_data` in `rust_binder_open`.
+ let process = unsafe { Arc::<Process>::from_foreign((*file).private_data) };
+ // SAFETY: The caller ensures that the file is valid.
+ let file = unsafe { File::from_raw_file(file) };
+ Process::release(process, file);
+ 0
+}
+
+/// # Safety
+/// Only called by binderfs.
+unsafe extern "C" fn rust_binder_ioctl(
+ file: *mut bindings::file,
+ cmd: kernel::ffi::c_uint,
+ arg: kernel::ffi::c_ulong,
+) -> kernel::ffi::c_long {
+ // SAFETY: We previously set `private_data` in `rust_binder_open`.
+ let f = unsafe { Arc::<Process>::borrow((*file).private_data) };
+ // SAFETY: The caller ensures that the file is valid.
+ match Process::ioctl(f, unsafe { File::from_raw_file(file) }, cmd as _, arg as _) {
+ Ok(()) => 0,
+ Err(err) => err.to_errno() as isize,
+ }
+}
+
+/// # Safety
+/// Only called by binderfs.
+unsafe extern "C" fn rust_binder_mmap(
+ file: *mut bindings::file,
+ vma: *mut bindings::vm_area_struct,
+) -> kernel::ffi::c_int {
+ // SAFETY: We previously set `private_data` in `rust_binder_open`.
+ let f = unsafe { Arc::<Process>::borrow((*file).private_data) };
+ // SAFETY: The caller ensures that the vma is valid.
+ let area = unsafe { kernel::mm::virt::VmaNew::from_raw(vma) };
+ // SAFETY: The caller ensures that the file is valid.
+ match Process::mmap(f, unsafe { File::from_raw_file(file) }, area) {
+ Ok(()) => 0,
+ Err(err) => err.to_errno(),
+ }
+}
+
+/// # Safety
+/// Only called by binderfs.
+unsafe extern "C" fn rust_binder_poll(
+ file: *mut bindings::file,
+ wait: *mut bindings::poll_table_struct,
+) -> bindings::__poll_t {
+ // SAFETY: We previously set `private_data` in `rust_binder_open`.
+ let f = unsafe { Arc::<Process>::borrow((*file).private_data) };
+ // SAFETY: The caller ensures that the file is valid.
+ let fileref = unsafe { File::from_raw_file(file) };
+ // SAFETY: The caller ensures that the `PollTable` is valid.
+ match Process::poll(f, fileref, unsafe { PollTable::from_raw(wait) }) {
+ Ok(v) => v,
+ Err(_) => bindings::POLLERR,
+ }
+}
+
+/// # Safety
+/// Only called by binderfs.
+unsafe extern "C" fn rust_binder_flush(
+ file: *mut bindings::file,
+ _id: bindings::fl_owner_t,
+) -> kernel::ffi::c_int {
+ // SAFETY: We previously set `private_data` in `rust_binder_open`.
+ let f = unsafe { Arc::<Process>::borrow((*file).private_data) };
+ match Process::flush(f) {
+ Ok(()) => 0,
+ Err(err) => err.to_errno(),
+ }
+}
+
+/// # Safety
+/// Only called by binderfs.
+#[no_mangle]
+unsafe extern "C" fn rust_binder_stats_show(
+ ptr: *mut seq_file,
+ _: *mut kernel::ffi::c_void,
+) -> kernel::ffi::c_int {
+ // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which
+ // this method is called.
+ let m = unsafe { SeqFile::from_raw(ptr) };
+ if let Err(err) = rust_binder_stats_show_impl(m) {
+ seq_print!(m, "failed to generate state: {:?}\n", err);
+ }
+ 0
+}
+
+/// # Safety
+/// Only called by binderfs.
+#[no_mangle]
+unsafe extern "C" fn rust_binder_state_show(
+ ptr: *mut seq_file,
+ _: *mut kernel::ffi::c_void,
+) -> kernel::ffi::c_int {
+ // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which
+ // this method is called.
+ let m = unsafe { SeqFile::from_raw(ptr) };
+ if let Err(err) = rust_binder_state_show_impl(m) {
+ seq_print!(m, "failed to generate state: {:?}\n", err);
+ }
+ 0
+}
+
+/// # Safety
+/// Only called by binderfs.
+#[no_mangle]
+unsafe extern "C" fn rust_binder_proc_show(
+ ptr: *mut seq_file,
+ _: *mut kernel::ffi::c_void,
+) -> kernel::ffi::c_int {
+ // SAFETY: Accessing the private field of `seq_file` is okay.
+ let pid = (unsafe { (*ptr).private }) as usize as Pid;
+ // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which
+ // this method is called.
+ let m = unsafe { SeqFile::from_raw(ptr) };
+ if let Err(err) = rust_binder_proc_show_impl(m, pid) {
+ seq_print!(m, "failed to generate state: {:?}\n", err);
+ }
+ 0
+}
+
+/// # Safety
+/// Only called by binderfs.
+#[no_mangle]
+unsafe extern "C" fn rust_binder_transactions_show(
+ ptr: *mut seq_file,
+ _: *mut kernel::ffi::c_void,
+) -> kernel::ffi::c_int {
+ // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which
+ // this method is called.
+ let m = unsafe { SeqFile::from_raw(ptr) };
+ if let Err(err) = rust_binder_transactions_show_impl(m) {
+ seq_print!(m, "failed to generate state: {:?}\n", err);
+ }
+ 0
+}
+
+fn rust_binder_transactions_show_impl(m: &SeqFile) -> Result<()> {
+ seq_print!(m, "binder transactions:\n");
+ let contexts = context::get_all_contexts()?;
+ for ctx in contexts {
+ let procs = ctx.get_all_procs()?;
+ for proc in procs {
+ proc.debug_print(m, &ctx, false)?;
+ seq_print!(m, "\n");
+ }
+ }
+ Ok(())
+}
+
+fn rust_binder_stats_show_impl(m: &SeqFile) -> Result<()> {
+ seq_print!(m, "binder stats:\n");
+ stats::GLOBAL_STATS.debug_print("", m);
+ let contexts = context::get_all_contexts()?;
+ for ctx in contexts {
+ let procs = ctx.get_all_procs()?;
+ for proc in procs {
+ proc.debug_print_stats(m, &ctx)?;
+ seq_print!(m, "\n");
+ }
+ }
+ Ok(())
+}
+
+fn rust_binder_state_show_impl(m: &SeqFile) -> Result<()> {
+ seq_print!(m, "binder state:\n");
+ let contexts = context::get_all_contexts()?;
+ for ctx in contexts {
+ let procs = ctx.get_all_procs()?;
+ for proc in procs {
+ proc.debug_print(m, &ctx, true)?;
+ seq_print!(m, "\n");
+ }
+ }
+ Ok(())
+}
+
+fn rust_binder_proc_show_impl(m: &SeqFile, pid: Pid) -> Result<()> {
+ seq_print!(m, "binder proc state:\n");
+ let contexts = context::get_all_contexts()?;
+ for ctx in contexts {
+ let procs = ctx.get_procs_with_pid(pid)?;
+ for proc in procs {
+ proc.debug_print(m, &ctx, true)?;
+ seq_print!(m, "\n");
+ }
+ }
+ Ok(())
+}
+
+struct BinderfsProcFile(NonNull<bindings::dentry>);
+
+// SAFETY: Safe to drop any thread.
+unsafe impl Send for BinderfsProcFile {}
+
+impl BinderfsProcFile {
+ /// # Safety
+ ///
+ /// Takes an inode from a newly created binder file.
+ unsafe fn new(nodp: *mut bindings::inode, pid: i32) -> Result<Option<Self>> {
+ // SAFETY: The caller passes an `inode` for a newly created binder file.
+ let dentry = unsafe { binderfs::rust_binderfs_create_proc_file(nodp, pid) };
+ match kernel::error::from_err_ptr(dentry) {
+ Ok(dentry) => Ok(NonNull::new(dentry).map(Self)),
+ Err(err) if err == EEXIST => Ok(None),
+ Err(err) => Err(err),
+ }
+ }
+}
+
+impl Drop for BinderfsProcFile {
+ fn drop(&mut self) {
+ // SAFETY: This is a dentry from `rust_binderfs_remove_file` that has not been deleted yet.
+ unsafe { binderfs::rust_binderfs_remove_file(self.0.as_ptr()) };
+ }
+}
diff --git a/drivers/android/binder/rust_binderfs.c b/drivers/android/binder/rust_binderfs.c
new file mode 100644
index 000000000000..c69026df775c
--- /dev/null
+++ b/drivers/android/binder/rust_binderfs.c
@@ -0,0 +1,795 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/compiler_types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/gfp.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/ipc_namespace.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/namei.h>
+#include <linux/magic.h>
+#include <linux/major.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/mount.h>
+#include <linux/fs_parser.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/user_namespace.h>
+#include <linux/xarray.h>
+#include <uapi/asm-generic/errno-base.h>
+#include <uapi/linux/android/binder.h>
+#include <uapi/linux/android/binderfs.h>
+
+#include "rust_binder.h"
+#include "rust_binder_internal.h"
+
+#define FIRST_INODE 1
+#define SECOND_INODE 2
+#define INODE_OFFSET 3
+#define BINDERFS_MAX_MINOR (1U << MINORBITS)
+/* Ensure that the initial ipc namespace always has devices available. */
+#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4)
+
+DEFINE_SHOW_ATTRIBUTE(rust_binder_stats);
+DEFINE_SHOW_ATTRIBUTE(rust_binder_state);
+DEFINE_SHOW_ATTRIBUTE(rust_binder_transactions);
+DEFINE_SHOW_ATTRIBUTE(rust_binder_proc);
+
+char *rust_binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES;
+module_param_named(rust_devices, rust_binder_devices_param, charp, 0444);
+
+static dev_t binderfs_dev;
+static DEFINE_MUTEX(binderfs_minors_mutex);
+static DEFINE_IDA(binderfs_minors);
+
+enum binderfs_param {
+ Opt_max,
+ Opt_stats_mode,
+};
+
+enum binderfs_stats_mode {
+ binderfs_stats_mode_unset,
+ binderfs_stats_mode_global,
+};
+
+struct binder_features {
+ bool oneway_spam_detection;
+ bool extended_error;
+ bool freeze_notification;
+};
+
+static const struct constant_table binderfs_param_stats[] = {
+ { "global", binderfs_stats_mode_global },
+ {}
+};
+
+static const struct fs_parameter_spec binderfs_fs_parameters[] = {
+ fsparam_u32("max", Opt_max),
+ fsparam_enum("stats", Opt_stats_mode, binderfs_param_stats),
+ {}
+};
+
+static struct binder_features binder_features = {
+ .oneway_spam_detection = true,
+ .extended_error = true,
+ .freeze_notification = true,
+};
+
+static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+/**
+ * binderfs_binder_device_create - allocate inode from super block of a
+ * binderfs mount
+ * @ref_inode: inode from wich the super block will be taken
+ * @userp: buffer to copy information about new device for userspace to
+ * @req: struct binderfs_device as copied from userspace
+ *
+ * This function allocates a new binder_device and reserves a new minor
+ * number for it.
+ * Minor numbers are limited and tracked globally in binderfs_minors. The
+ * function will stash a struct binder_device for the specific binder
+ * device in i_private of the inode.
+ * It will go on to allocate a new inode from the super block of the
+ * filesystem mount, stash a struct binder_device in its i_private field
+ * and attach a dentry to that inode.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+static int binderfs_binder_device_create(struct inode *ref_inode,
+ struct binderfs_device __user *userp,
+ struct binderfs_device *req)
+{
+ int minor, ret;
+ struct dentry *dentry, *root;
+ struct binder_device *device = NULL;
+ rust_binder_context ctx = NULL;
+ struct inode *inode = NULL;
+ struct super_block *sb = ref_inode->i_sb;
+ struct binderfs_info *info = sb->s_fs_info;
+#if defined(CONFIG_IPC_NS)
+ bool use_reserve = (info->ipc_ns == &init_ipc_ns);
+#else
+ bool use_reserve = true;
+#endif
+
+ /* Reserve new minor number for the new device. */
+ mutex_lock(&binderfs_minors_mutex);
+ if (++info->device_count <= info->mount_opts.max)
+ minor = ida_alloc_max(&binderfs_minors,
+ use_reserve ? BINDERFS_MAX_MINOR :
+ BINDERFS_MAX_MINOR_CAPPED,
+ GFP_KERNEL);
+ else
+ minor = -ENOSPC;
+ if (minor < 0) {
+ --info->device_count;
+ mutex_unlock(&binderfs_minors_mutex);
+ return minor;
+ }
+ mutex_unlock(&binderfs_minors_mutex);
+
+ ret = -ENOMEM;
+ device = kzalloc(sizeof(*device), GFP_KERNEL);
+ if (!device)
+ goto err;
+
+ req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */
+
+ ctx = rust_binder_new_context(req->name);
+ if (!ctx)
+ goto err;
+
+ inode = new_inode(sb);
+ if (!inode)
+ goto err;
+
+ inode->i_ino = minor + INODE_OFFSET;
+ simple_inode_init_ts(inode);
+ init_special_inode(inode, S_IFCHR | 0600,
+ MKDEV(MAJOR(binderfs_dev), minor));
+ inode->i_fop = &rust_binder_fops;
+ inode->i_uid = info->root_uid;
+ inode->i_gid = info->root_gid;
+
+ req->major = MAJOR(binderfs_dev);
+ req->minor = minor;
+ device->ctx = ctx;
+ device->minor = minor;
+
+ if (userp && copy_to_user(userp, req, sizeof(*req))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ root = sb->s_root;
+ dentry = simple_start_creating(root, req->name);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto err;
+ }
+
+ inode->i_private = device;
+ d_make_persistent(dentry, inode);
+
+ fsnotify_create(root->d_inode, dentry);
+ simple_done_creating(dentry);
+
+ return 0;
+
+err:
+ kfree(device);
+ rust_binder_remove_context(ctx);
+ mutex_lock(&binderfs_minors_mutex);
+ --info->device_count;
+ ida_free(&binderfs_minors, minor);
+ mutex_unlock(&binderfs_minors_mutex);
+ iput(inode);
+
+ return ret;
+}
+
+/**
+ * binder_ctl_ioctl - handle binder device node allocation requests
+ *
+ * The request handler for the binder-control device. All requests operate on
+ * the binderfs mount the binder-control device resides in:
+ * - BINDER_CTL_ADD
+ * Allocate a new binder device.
+ *
+ * Return: %0 on success, negative errno on failure.
+ */
+static long binder_ctl_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = -EINVAL;
+ struct inode *inode = file_inode(file);
+ struct binderfs_device __user *device = (struct binderfs_device __user *)arg;
+ struct binderfs_device device_req;
+
+ switch (cmd) {
+ case BINDER_CTL_ADD:
+ ret = copy_from_user(&device_req, device, sizeof(device_req));
+ if (ret) {
+ ret = -EFAULT;
+ break;
+ }
+
+ ret = binderfs_binder_device_create(inode, device, &device_req);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void binderfs_evict_inode(struct inode *inode)
+{
+ struct binder_device *device = inode->i_private;
+ struct binderfs_info *info = BINDERFS_SB(inode->i_sb);
+
+ clear_inode(inode);
+
+ if (!S_ISCHR(inode->i_mode) || !device)
+ return;
+
+ mutex_lock(&binderfs_minors_mutex);
+ --info->device_count;
+ ida_free(&binderfs_minors, device->minor);
+ mutex_unlock(&binderfs_minors_mutex);
+
+ /* ctx is null for binder-control, but this function ignores null pointers */
+ rust_binder_remove_context(device->ctx);
+
+ kfree(device);
+}
+
+static int binderfs_fs_context_parse_param(struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ int opt;
+ struct binderfs_mount_opts *ctx = fc->fs_private;
+ struct fs_parse_result result;
+
+ opt = fs_parse(fc, binderfs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_max:
+ if (result.uint_32 > BINDERFS_MAX_MINOR)
+ return invalfc(fc, "Bad value for '%s'", param->key);
+
+ ctx->max = result.uint_32;
+ break;
+ case Opt_stats_mode:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ ctx->stats_mode = result.uint_32;
+ break;
+ default:
+ return invalfc(fc, "Unsupported parameter '%s'", param->key);
+ }
+
+ return 0;
+}
+
+static int binderfs_fs_context_reconfigure(struct fs_context *fc)
+{
+ struct binderfs_mount_opts *ctx = fc->fs_private;
+ struct binderfs_info *info = BINDERFS_SB(fc->root->d_sb);
+
+ if (info->mount_opts.stats_mode != ctx->stats_mode)
+ return invalfc(fc, "Binderfs stats mode cannot be changed during a remount");
+
+ info->mount_opts.stats_mode = ctx->stats_mode;
+ info->mount_opts.max = ctx->max;
+ return 0;
+}
+
+static int binderfs_show_options(struct seq_file *seq, struct dentry *root)
+{
+ struct binderfs_info *info = BINDERFS_SB(root->d_sb);
+
+ if (info->mount_opts.max <= BINDERFS_MAX_MINOR)
+ seq_printf(seq, ",max=%d", info->mount_opts.max);
+
+ switch (info->mount_opts.stats_mode) {
+ case binderfs_stats_mode_unset:
+ break;
+ case binderfs_stats_mode_global:
+ seq_puts(seq, ",stats=global");
+ break;
+ }
+
+ return 0;
+}
+
+static const struct super_operations binderfs_super_ops = {
+ .evict_inode = binderfs_evict_inode,
+ .show_options = binderfs_show_options,
+ .statfs = simple_statfs,
+};
+
+static inline bool is_binderfs_control_device(const struct dentry *dentry)
+{
+ struct binderfs_info *info = dentry->d_sb->s_fs_info;
+
+ return info->control_dentry == dentry;
+}
+
+static int binderfs_rename(struct mnt_idmap *idmap,
+ struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ if (is_binderfs_control_device(old_dentry) ||
+ is_binderfs_control_device(new_dentry))
+ return -EPERM;
+
+ return simple_rename(idmap, old_dir, old_dentry, new_dir,
+ new_dentry, flags);
+}
+
+static int binderfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ if (is_binderfs_control_device(dentry))
+ return -EPERM;
+
+ return simple_unlink(dir, dentry);
+}
+
+static const struct file_operations binder_ctl_fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .unlocked_ioctl = binder_ctl_ioctl,
+ .compat_ioctl = binder_ctl_ioctl,
+ .llseek = noop_llseek,
+};
+
+/**
+ * binderfs_binder_ctl_create - create a new binder-control device
+ * @sb: super block of the binderfs mount
+ *
+ * This function creates a new binder-control device node in the binderfs mount
+ * referred to by @sb.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+static int binderfs_binder_ctl_create(struct super_block *sb)
+{
+ int minor, ret;
+ struct dentry *dentry;
+ struct binder_device *device;
+ struct inode *inode = NULL;
+ struct dentry *root = sb->s_root;
+ struct binderfs_info *info = sb->s_fs_info;
+#if defined(CONFIG_IPC_NS)
+ bool use_reserve = (info->ipc_ns == &init_ipc_ns);
+#else
+ bool use_reserve = true;
+#endif
+
+ device = kzalloc(sizeof(*device), GFP_KERNEL);
+ if (!device)
+ return -ENOMEM;
+
+ /* If we have already created a binder-control node, return. */
+ if (info->control_dentry) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = -ENOMEM;
+ inode = new_inode(sb);
+ if (!inode)
+ goto out;
+
+ /* Reserve a new minor number for the new device. */
+ mutex_lock(&binderfs_minors_mutex);
+ minor = ida_alloc_max(&binderfs_minors,
+ use_reserve ? BINDERFS_MAX_MINOR :
+ BINDERFS_MAX_MINOR_CAPPED,
+ GFP_KERNEL);
+ mutex_unlock(&binderfs_minors_mutex);
+ if (minor < 0) {
+ ret = minor;
+ goto out;
+ }
+
+ inode->i_ino = SECOND_INODE;
+ simple_inode_init_ts(inode);
+ init_special_inode(inode, S_IFCHR | 0600,
+ MKDEV(MAJOR(binderfs_dev), minor));
+ inode->i_fop = &binder_ctl_fops;
+ inode->i_uid = info->root_uid;
+ inode->i_gid = info->root_gid;
+
+ device->minor = minor;
+ device->ctx = NULL;
+
+ dentry = d_alloc_name(root, "binder-control");
+ if (!dentry)
+ goto out;
+
+ inode->i_private = device;
+ info->control_dentry = dentry;
+ d_add(dentry, inode);
+
+ return 0;
+
+out:
+ kfree(device);
+ iput(inode);
+
+ return ret;
+}
+
+static const struct inode_operations binderfs_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .rename = binderfs_rename,
+ .unlink = binderfs_unlink,
+};
+
+static struct inode *binderfs_make_inode(struct super_block *sb, int mode)
+{
+ struct inode *ret;
+
+ ret = new_inode(sb);
+ if (ret) {
+ ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET);
+ ret->i_mode = mode;
+ simple_inode_init_ts(ret);
+ }
+ return ret;
+}
+
+void rust_binderfs_remove_file(struct dentry *dentry)
+{
+ simple_recursive_removal(dentry, NULL);
+}
+
+static struct dentry *rust_binderfs_create_file(struct dentry *parent, const char *name,
+ const struct file_operations *fops,
+ void *data)
+{
+ struct dentry *dentry;
+ struct inode *new_inode;
+
+ new_inode = binderfs_make_inode(parent->d_sb, S_IFREG | 0444);
+ if (!new_inode)
+ return ERR_PTR(-ENOMEM);
+ new_inode->i_fop = fops;
+ new_inode->i_private = data;
+
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry)) {
+ iput(new_inode);
+ return dentry;
+ }
+
+ d_make_persistent(dentry, new_inode);
+ fsnotify_create(parent->d_inode, dentry);
+ simple_done_creating(dentry);
+ return dentry;
+}
+
+struct dentry *rust_binderfs_create_proc_file(struct inode *nodp, int pid)
+{
+ struct binderfs_info *info = nodp->i_sb->s_fs_info;
+ struct dentry *dir = info->proc_log_dir;
+ char strbuf[20 + 1];
+ void *data = (void *)(unsigned long) pid;
+
+ if (!dir)
+ return NULL;
+
+ snprintf(strbuf, sizeof(strbuf), "%u", pid);
+ return rust_binderfs_create_file(dir, strbuf, &rust_binder_proc_fops, data);
+}
+
+static struct dentry *binderfs_create_dir(struct dentry *parent,
+ const char *name)
+{
+ struct dentry *dentry;
+ struct inode *new_inode;
+
+ new_inode = binderfs_make_inode(parent->d_sb, S_IFDIR | 0755);
+ if (!new_inode)
+ return ERR_PTR(-ENOMEM);
+
+ new_inode->i_fop = &simple_dir_operations;
+ new_inode->i_op = &simple_dir_inode_operations;
+
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry)) {
+ iput(new_inode);
+ return dentry;
+ }
+
+ inc_nlink(parent->d_inode);
+ set_nlink(new_inode, 2);
+ d_make_persistent(dentry, new_inode);
+ fsnotify_mkdir(parent->d_inode, dentry);
+ simple_done_creating(dentry);
+ return dentry;
+}
+
+static int binder_features_show(struct seq_file *m, void *unused)
+{
+ bool *feature = m->private;
+
+ seq_printf(m, "%d\n", *feature);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(binder_features);
+
+static int init_binder_features(struct super_block *sb)
+{
+ struct dentry *dentry, *dir;
+
+ dir = binderfs_create_dir(sb->s_root, "features");
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ dentry = rust_binderfs_create_file(dir, "oneway_spam_detection",
+ &binder_features_fops,
+ &binder_features.oneway_spam_detection);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ dentry = rust_binderfs_create_file(dir, "extended_error",
+ &binder_features_fops,
+ &binder_features.extended_error);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ dentry = rust_binderfs_create_file(dir, "freeze_notification",
+ &binder_features_fops,
+ &binder_features.freeze_notification);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ return 0;
+}
+
+static int init_binder_logs(struct super_block *sb)
+{
+ struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir;
+ struct binderfs_info *info;
+ int ret = 0;
+
+ binder_logs_root_dir = binderfs_create_dir(sb->s_root,
+ "binder_logs");
+ if (IS_ERR(binder_logs_root_dir)) {
+ ret = PTR_ERR(binder_logs_root_dir);
+ goto out;
+ }
+
+ dentry = rust_binderfs_create_file(binder_logs_root_dir, "stats",
+ &rust_binder_stats_fops, NULL);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto out;
+ }
+
+ dentry = rust_binderfs_create_file(binder_logs_root_dir, "state",
+ &rust_binder_state_fops, NULL);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto out;
+ }
+
+ dentry = rust_binderfs_create_file(binder_logs_root_dir, "transactions",
+ &rust_binder_transactions_fops, NULL);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto out;
+ }
+
+ proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc");
+ if (IS_ERR(proc_log_dir)) {
+ ret = PTR_ERR(proc_log_dir);
+ goto out;
+ }
+ info = sb->s_fs_info;
+ info->proc_log_dir = proc_log_dir;
+
+out:
+ return ret;
+}
+
+static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ int ret;
+ struct binderfs_info *info;
+ struct binderfs_mount_opts *ctx = fc->fs_private;
+ struct inode *inode = NULL;
+ struct binderfs_device device_info = {};
+ const char *name;
+ size_t len;
+
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+
+ /*
+ * The binderfs filesystem can be mounted by userns root in a
+ * non-initial userns. By default such mounts have the SB_I_NODEV flag
+ * set in s_iflags to prevent security issues where userns root can
+ * just create random device nodes via mknod() since it owns the
+ * filesystem mount. But binderfs does not allow to create any files
+ * including devices nodes. The only way to create binder devices nodes
+ * is through the binder-control device which userns root is explicitly
+ * allowed to do. So removing the SB_I_NODEV flag from s_iflags is both
+ * necessary and safe.
+ */
+ sb->s_iflags &= ~SB_I_NODEV;
+ sb->s_iflags |= SB_I_NOEXEC;
+ sb->s_magic = RUST_BINDERFS_SUPER_MAGIC;
+ sb->s_op = &binderfs_super_ops;
+ sb->s_time_gran = 1;
+
+ sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL);
+ if (!sb->s_fs_info)
+ return -ENOMEM;
+ info = sb->s_fs_info;
+
+ info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
+
+ info->root_gid = make_kgid(sb->s_user_ns, 0);
+ if (!gid_valid(info->root_gid))
+ info->root_gid = GLOBAL_ROOT_GID;
+ info->root_uid = make_kuid(sb->s_user_ns, 0);
+ if (!uid_valid(info->root_uid))
+ info->root_uid = GLOBAL_ROOT_UID;
+ info->mount_opts.max = ctx->max;
+ info->mount_opts.stats_mode = ctx->stats_mode;
+
+ inode = new_inode(sb);
+ if (!inode)
+ return -ENOMEM;
+
+ inode->i_ino = FIRST_INODE;
+ inode->i_fop = &simple_dir_operations;
+ inode->i_mode = S_IFDIR | 0755;
+ simple_inode_init_ts(inode);
+ inode->i_op = &binderfs_dir_inode_operations;
+ set_nlink(inode, 2);
+
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
+ return -ENOMEM;
+
+ ret = binderfs_binder_ctl_create(sb);
+ if (ret)
+ return ret;
+
+ name = rust_binder_devices_param;
+ for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) {
+ strscpy(device_info.name, name, len + 1);
+ ret = binderfs_binder_device_create(inode, NULL, &device_info);
+ if (ret)
+ return ret;
+ name += len;
+ if (*name == ',')
+ name++;
+ }
+
+ ret = init_binder_features(sb);
+ if (ret)
+ return ret;
+
+ if (info->mount_opts.stats_mode == binderfs_stats_mode_global)
+ return init_binder_logs(sb);
+
+ return 0;
+}
+
+static int binderfs_fs_context_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, binderfs_fill_super);
+}
+
+static void binderfs_fs_context_free(struct fs_context *fc)
+{
+ struct binderfs_mount_opts *ctx = fc->fs_private;
+
+ kfree(ctx);
+}
+
+static const struct fs_context_operations binderfs_fs_context_ops = {
+ .free = binderfs_fs_context_free,
+ .get_tree = binderfs_fs_context_get_tree,
+ .parse_param = binderfs_fs_context_parse_param,
+ .reconfigure = binderfs_fs_context_reconfigure,
+};
+
+static int binderfs_init_fs_context(struct fs_context *fc)
+{
+ struct binderfs_mount_opts *ctx;
+
+ ctx = kzalloc(sizeof(struct binderfs_mount_opts), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->max = BINDERFS_MAX_MINOR;
+ ctx->stats_mode = binderfs_stats_mode_unset;
+
+ fc->fs_private = ctx;
+ fc->ops = &binderfs_fs_context_ops;
+
+ return 0;
+}
+
+static void binderfs_kill_super(struct super_block *sb)
+{
+ struct binderfs_info *info = sb->s_fs_info;
+
+ /*
+ * During inode eviction struct binderfs_info is needed.
+ * So first wipe the super_block then free struct binderfs_info.
+ */
+ kill_anon_super(sb);
+
+ if (info && info->ipc_ns)
+ put_ipc_ns(info->ipc_ns);
+
+ kfree(info);
+}
+
+static struct file_system_type binder_fs_type = {
+ .name = "binder",
+ .init_fs_context = binderfs_init_fs_context,
+ .parameters = binderfs_fs_parameters,
+ .kill_sb = binderfs_kill_super,
+ .fs_flags = FS_USERNS_MOUNT,
+};
+
+int init_rust_binderfs(void)
+{
+ int ret;
+ const char *name;
+ size_t len;
+
+ /* Verify that the default binderfs device names are valid. */
+ name = rust_binder_devices_param;
+ for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) {
+ if (len > BINDERFS_MAX_NAME)
+ return -E2BIG;
+ name += len;
+ if (*name == ',')
+ name++;
+ }
+
+ /* Allocate new major number for binderfs. */
+ ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR,
+ "rust_binder");
+ if (ret)
+ return ret;
+
+ ret = register_filesystem(&binder_fs_type);
+ if (ret) {
+ unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR);
+ return ret;
+ }
+
+ return ret;
+}
diff --git a/drivers/android/binder/stats.rs b/drivers/android/binder/stats.rs
new file mode 100644
index 000000000000..037002651941
--- /dev/null
+++ b/drivers/android/binder/stats.rs
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! Keep track of statistics for binder_logs.
+
+use crate::defs::*;
+use core::sync::atomic::{AtomicU32, Ordering::Relaxed};
+use kernel::{ioctl::_IOC_NR, seq_file::SeqFile, seq_print};
+
+const BC_COUNT: usize = _IOC_NR(BC_REPLY_SG) as usize + 1;
+const BR_COUNT: usize = _IOC_NR(BR_TRANSACTION_PENDING_FROZEN) as usize + 1;
+
+pub(crate) static GLOBAL_STATS: BinderStats = BinderStats::new();
+
+pub(crate) struct BinderStats {
+ bc: [AtomicU32; BC_COUNT],
+ br: [AtomicU32; BR_COUNT],
+}
+
+impl BinderStats {
+ pub(crate) const fn new() -> Self {
+ #[expect(clippy::declare_interior_mutable_const)]
+ const ZERO: AtomicU32 = AtomicU32::new(0);
+
+ Self {
+ bc: [ZERO; BC_COUNT],
+ br: [ZERO; BR_COUNT],
+ }
+ }
+
+ pub(crate) fn inc_bc(&self, bc: u32) {
+ let idx = _IOC_NR(bc) as usize;
+ if let Some(bc_ref) = self.bc.get(idx) {
+ bc_ref.fetch_add(1, Relaxed);
+ }
+ }
+
+ pub(crate) fn inc_br(&self, br: u32) {
+ let idx = _IOC_NR(br) as usize;
+ if let Some(br_ref) = self.br.get(idx) {
+ br_ref.fetch_add(1, Relaxed);
+ }
+ }
+
+ pub(crate) fn debug_print(&self, prefix: &str, m: &SeqFile) {
+ for (i, cnt) in self.bc.iter().enumerate() {
+ let cnt = cnt.load(Relaxed);
+ if cnt > 0 {
+ seq_print!(m, "{}{}: {}\n", prefix, command_string(i), cnt);
+ }
+ }
+ for (i, cnt) in self.br.iter().enumerate() {
+ let cnt = cnt.load(Relaxed);
+ if cnt > 0 {
+ seq_print!(m, "{}{}: {}\n", prefix, return_string(i), cnt);
+ }
+ }
+ }
+}
+
+mod strings {
+ use core::str::from_utf8_unchecked;
+ use kernel::str::{CStr, CStrExt as _};
+
+ extern "C" {
+ static binder_command_strings: [*const u8; super::BC_COUNT];
+ static binder_return_strings: [*const u8; super::BR_COUNT];
+ }
+
+ pub(super) fn command_string(i: usize) -> &'static str {
+ // SAFETY: Accessing `binder_command_strings` is always safe.
+ let c_str_ptr = unsafe { binder_command_strings[i] };
+ // SAFETY: The `binder_command_strings` array only contains nul-terminated strings.
+ let bytes = unsafe { CStr::from_char_ptr(c_str_ptr) }.to_bytes();
+ // SAFETY: The `binder_command_strings` array only contains strings with ascii-chars.
+ unsafe { from_utf8_unchecked(bytes) }
+ }
+
+ pub(super) fn return_string(i: usize) -> &'static str {
+ // SAFETY: Accessing `binder_return_strings` is always safe.
+ let c_str_ptr = unsafe { binder_return_strings[i] };
+ // SAFETY: The `binder_command_strings` array only contains nul-terminated strings.
+ let bytes = unsafe { CStr::from_char_ptr(c_str_ptr) }.to_bytes();
+ // SAFETY: The `binder_command_strings` array only contains strings with ascii-chars.
+ unsafe { from_utf8_unchecked(bytes) }
+ }
+}
+use strings::{command_string, return_string};
diff --git a/drivers/android/binder/thread.rs b/drivers/android/binder/thread.rs
new file mode 100644
index 000000000000..1a8e6fdc0dc4
--- /dev/null
+++ b/drivers/android/binder/thread.rs
@@ -0,0 +1,1596 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+//! This module defines the `Thread` type, which represents a userspace thread that is using
+//! binder.
+//!
+//! The `Process` object stores all of the threads in an rb tree.
+
+use kernel::{
+ bindings,
+ fs::{File, LocalFile},
+ list::{AtomicTracker, List, ListArc, ListLinks, TryNewListArc},
+ prelude::*,
+ security,
+ seq_file::SeqFile,
+ seq_print,
+ sync::poll::{PollCondVar, PollTable},
+ sync::{Arc, SpinLock},
+ task::Task,
+ types::ARef,
+ uaccess::UserSlice,
+ uapi,
+};
+
+use crate::{
+ allocation::{Allocation, AllocationView, BinderObject, BinderObjectRef, NewAllocation},
+ defs::*,
+ error::BinderResult,
+ process::{GetWorkOrRegister, Process},
+ ptr_align,
+ stats::GLOBAL_STATS,
+ transaction::Transaction,
+ BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverCode, DeliverToRead,
+};
+
+use core::{
+ mem::size_of,
+ sync::atomic::{AtomicU32, Ordering},
+};
+
+/// Stores the layout of the scatter-gather entries. This is used during the `translate_objects`
+/// call and is discarded when it returns.
+struct ScatterGatherState {
+ /// A struct that tracks the amount of unused buffer space.
+ unused_buffer_space: UnusedBufferSpace,
+ /// Scatter-gather entries to copy.
+ sg_entries: KVec<ScatterGatherEntry>,
+ /// Indexes into `sg_entries` corresponding to the last binder_buffer_object that
+ /// was processed and all of its ancestors. The array is in sorted order.
+ ancestors: KVec<usize>,
+}
+
+/// This entry specifies an additional buffer that should be copied using the scatter-gather
+/// mechanism.
+struct ScatterGatherEntry {
+ /// The index in the offset array of the BINDER_TYPE_PTR that this entry originates from.
+ obj_index: usize,
+ /// Offset in target buffer.
+ offset: usize,
+ /// User address in source buffer.
+ sender_uaddr: usize,
+ /// Number of bytes to copy.
+ length: usize,
+ /// The minimum offset of the next fixup in this buffer.
+ fixup_min_offset: usize,
+ /// The offsets within this buffer that contain pointers which should be translated.
+ pointer_fixups: KVec<PointerFixupEntry>,
+}
+
+/// This entry specifies that a fixup should happen at `target_offset` of the
+/// buffer. If `skip` is nonzero, then the fixup is a `binder_fd_array_object`
+/// and is applied later. Otherwise if `skip` is zero, then the size of the
+/// fixup is `sizeof::<u64>()` and `pointer_value` is written to the buffer.
+struct PointerFixupEntry {
+ /// The number of bytes to skip, or zero for a `binder_buffer_object` fixup.
+ skip: usize,
+ /// The translated pointer to write when `skip` is zero.
+ pointer_value: u64,
+ /// The offset at which the value should be written. The offset is relative
+ /// to the original buffer.
+ target_offset: usize,
+}
+
+/// Return type of `apply_and_validate_fixup_in_parent`.
+struct ParentFixupInfo {
+ /// The index of the parent buffer in `sg_entries`.
+ parent_sg_index: usize,
+ /// The number of ancestors of the buffer.
+ ///
+ /// The buffer is considered an ancestor of itself, so this is always at
+ /// least one.
+ num_ancestors: usize,
+ /// New value of `fixup_min_offset` if this fixup is applied.
+ new_min_offset: usize,
+ /// The offset of the fixup in the target buffer.
+ target_offset: usize,
+}
+
+impl ScatterGatherState {
+ /// Called when a `binder_buffer_object` or `binder_fd_array_object` tries
+ /// to access a region in its parent buffer. These accesses have various
+ /// restrictions, which this method verifies.
+ ///
+ /// The `parent_offset` and `length` arguments describe the offset and
+ /// length of the access in the parent buffer.
+ ///
+ /// # Detailed restrictions
+ ///
+ /// Obviously the fixup must be in-bounds for the parent buffer.
+ ///
+ /// For safety reasons, we only allow fixups inside a buffer to happen
+ /// at increasing offsets; additionally, we only allow fixup on the last
+ /// buffer object that was verified, or one of its parents.
+ ///
+ /// Example of what is allowed:
+ ///
+ /// A
+ /// B (parent = A, offset = 0)
+ /// C (parent = A, offset = 16)
+ /// D (parent = C, offset = 0)
+ /// E (parent = A, offset = 32) // min_offset is 16 (C.parent_offset)
+ ///
+ /// Examples of what is not allowed:
+ ///
+ /// Decreasing offsets within the same parent:
+ /// A
+ /// C (parent = A, offset = 16)
+ /// B (parent = A, offset = 0) // decreasing offset within A
+ ///
+ /// Arcerring to a parent that wasn't the last object or any of its parents:
+ /// A
+ /// B (parent = A, offset = 0)
+ /// C (parent = A, offset = 0)
+ /// C (parent = A, offset = 16)
+ /// D (parent = B, offset = 0) // B is not A or any of A's parents
+ fn validate_parent_fixup(
+ &self,
+ parent: usize,
+ parent_offset: usize,
+ length: usize,
+ ) -> Result<ParentFixupInfo> {
+ // Using `position` would also be correct, but `rposition` avoids
+ // quadratic running times.
+ let ancestors_i = self
+ .ancestors
+ .iter()
+ .copied()
+ .rposition(|sg_idx| self.sg_entries[sg_idx].obj_index == parent)
+ .ok_or(EINVAL)?;
+ let sg_idx = self.ancestors[ancestors_i];
+ let sg_entry = match self.sg_entries.get(sg_idx) {
+ Some(sg_entry) => sg_entry,
+ None => {
+ pr_err!(
+ "self.ancestors[{}] is {}, but self.sg_entries.len() is {}",
+ ancestors_i,
+ sg_idx,
+ self.sg_entries.len()
+ );
+ return Err(EINVAL);
+ }
+ };
+ if sg_entry.fixup_min_offset > parent_offset {
+ pr_warn!(
+ "validate_parent_fixup: fixup_min_offset={}, parent_offset={}",
+ sg_entry.fixup_min_offset,
+ parent_offset
+ );
+ return Err(EINVAL);
+ }
+ let new_min_offset = parent_offset.checked_add(length).ok_or(EINVAL)?;
+ if new_min_offset > sg_entry.length {
+ pr_warn!(
+ "validate_parent_fixup: new_min_offset={}, sg_entry.length={}",
+ new_min_offset,
+ sg_entry.length
+ );
+ return Err(EINVAL);
+ }
+ let target_offset = sg_entry.offset.checked_add(parent_offset).ok_or(EINVAL)?;
+ // The `ancestors_i + 1` operation can't overflow since the output of the addition is at
+ // most `self.ancestors.len()`, which also fits in a usize.
+ Ok(ParentFixupInfo {
+ parent_sg_index: sg_idx,
+ num_ancestors: ancestors_i + 1,
+ new_min_offset,
+ target_offset,
+ })
+ }
+}
+
+/// Keeps track of how much unused buffer space is left. The initial amount is the number of bytes
+/// requested by the user using the `buffers_size` field of `binder_transaction_data_sg`. Each time
+/// we translate an object of type `BINDER_TYPE_PTR`, some of the unused buffer space is consumed.
+struct UnusedBufferSpace {
+ /// The start of the remaining space.
+ offset: usize,
+ /// The end of the remaining space.
+ limit: usize,
+}
+impl UnusedBufferSpace {
+ /// Claim the next `size` bytes from the unused buffer space. The offset for the claimed chunk
+ /// into the buffer is returned.
+ fn claim_next(&mut self, size: usize) -> Result<usize> {
+ // We require every chunk to be aligned.
+ let size = ptr_align(size).ok_or(EINVAL)?;
+ let new_offset = self.offset.checked_add(size).ok_or(EINVAL)?;
+
+ if new_offset <= self.limit {
+ let offset = self.offset;
+ self.offset = new_offset;
+ Ok(offset)
+ } else {
+ Err(EINVAL)
+ }
+ }
+}
+
+pub(crate) enum PushWorkRes {
+ Ok,
+ FailedDead(DLArc<dyn DeliverToRead>),
+}
+
+impl PushWorkRes {
+ fn is_ok(&self) -> bool {
+ match self {
+ PushWorkRes::Ok => true,
+ PushWorkRes::FailedDead(_) => false,
+ }
+ }
+}
+
+/// The fields of `Thread` protected by the spinlock.
+struct InnerThread {
+ /// Determines the looper state of the thread. It is a bit-wise combination of the constants
+ /// prefixed with `LOOPER_`.
+ looper_flags: u32,
+
+ /// Determines whether the looper should return.
+ looper_need_return: bool,
+
+ /// Determines if thread is dead.
+ is_dead: bool,
+
+ /// Work item used to deliver error codes to the thread that started a transaction. Stored here
+ /// so that it can be reused.
+ reply_work: DArc<ThreadError>,
+
+ /// Work item used to deliver error codes to the current thread. Stored here so that it can be
+ /// reused.
+ return_work: DArc<ThreadError>,
+
+ /// Determines whether the work list below should be processed. When set to false, `work_list`
+ /// is treated as if it were empty.
+ process_work_list: bool,
+ /// List of work items to deliver to userspace.
+ work_list: List<DTRWrap<dyn DeliverToRead>>,
+ current_transaction: Option<DArc<Transaction>>,
+
+ /// Extended error information for this thread.
+ extended_error: ExtendedError,
+}
+
+const LOOPER_REGISTERED: u32 = 0x01;
+const LOOPER_ENTERED: u32 = 0x02;
+const LOOPER_EXITED: u32 = 0x04;
+const LOOPER_INVALID: u32 = 0x08;
+const LOOPER_WAITING: u32 = 0x10;
+const LOOPER_WAITING_PROC: u32 = 0x20;
+const LOOPER_POLL: u32 = 0x40;
+
+impl InnerThread {
+ fn new() -> Result<Self> {
+ fn next_err_id() -> u32 {
+ static EE_ID: AtomicU32 = AtomicU32::new(0);
+ EE_ID.fetch_add(1, Ordering::Relaxed)
+ }
+
+ Ok(Self {
+ looper_flags: 0,
+ looper_need_return: false,
+ is_dead: false,
+ process_work_list: false,
+ reply_work: ThreadError::try_new()?,
+ return_work: ThreadError::try_new()?,
+ work_list: List::new(),
+ current_transaction: None,
+ extended_error: ExtendedError::new(next_err_id(), BR_OK, 0),
+ })
+ }
+
+ fn pop_work(&mut self) -> Option<DLArc<dyn DeliverToRead>> {
+ if !self.process_work_list {
+ return None;
+ }
+
+ let ret = self.work_list.pop_front();
+ self.process_work_list = !self.work_list.is_empty();
+ ret
+ }
+
+ fn push_work(&mut self, work: DLArc<dyn DeliverToRead>) -> PushWorkRes {
+ if self.is_dead {
+ PushWorkRes::FailedDead(work)
+ } else {
+ self.work_list.push_back(work);
+ self.process_work_list = true;
+ PushWorkRes::Ok
+ }
+ }
+
+ fn push_reply_work(&mut self, code: u32) {
+ if let Ok(work) = ListArc::try_from_arc(self.reply_work.clone()) {
+ work.set_error_code(code);
+ self.push_work(work);
+ } else {
+ pr_warn!("Thread reply work is already in use.");
+ }
+ }
+
+ fn push_return_work(&mut self, reply: u32) {
+ if let Ok(work) = ListArc::try_from_arc(self.return_work.clone()) {
+ work.set_error_code(reply);
+ self.push_work(work);
+ } else {
+ pr_warn!("Thread return work is already in use.");
+ }
+ }
+
+ /// Used to push work items that do not need to be processed immediately and can wait until the
+ /// thread gets another work item.
+ fn push_work_deferred(&mut self, work: DLArc<dyn DeliverToRead>) {
+ self.work_list.push_back(work);
+ }
+
+ /// Fetches the transaction this thread can reply to. If the thread has a pending transaction
+ /// (that it could respond to) but it has also issued a transaction, it must first wait for the
+ /// previously-issued transaction to complete.
+ ///
+ /// The `thread` parameter should be the thread containing this `ThreadInner`.
+ fn pop_transaction_to_reply(&mut self, thread: &Thread) -> Result<DArc<Transaction>> {
+ let transaction = self.current_transaction.take().ok_or(EINVAL)?;
+ if core::ptr::eq(thread, transaction.from.as_ref()) {
+ self.current_transaction = Some(transaction);
+ return Err(EINVAL);
+ }
+ // Find a new current transaction for this thread.
+ self.current_transaction = transaction.find_from(thread).cloned();
+ Ok(transaction)
+ }
+
+ fn pop_transaction_replied(&mut self, transaction: &DArc<Transaction>) -> bool {
+ match self.current_transaction.take() {
+ None => false,
+ Some(old) => {
+ if !Arc::ptr_eq(transaction, &old) {
+ self.current_transaction = Some(old);
+ return false;
+ }
+ self.current_transaction = old.clone_next();
+ true
+ }
+ }
+ }
+
+ fn looper_enter(&mut self) {
+ self.looper_flags |= LOOPER_ENTERED;
+ if self.looper_flags & LOOPER_REGISTERED != 0 {
+ self.looper_flags |= LOOPER_INVALID;
+ }
+ }
+
+ fn looper_register(&mut self, valid: bool) {
+ self.looper_flags |= LOOPER_REGISTERED;
+ if !valid || self.looper_flags & LOOPER_ENTERED != 0 {
+ self.looper_flags |= LOOPER_INVALID;
+ }
+ }
+
+ fn looper_exit(&mut self) {
+ self.looper_flags |= LOOPER_EXITED;
+ }
+
+ /// Determines whether the thread is part of a pool, i.e., if it is a looper.
+ fn is_looper(&self) -> bool {
+ self.looper_flags & (LOOPER_ENTERED | LOOPER_REGISTERED) != 0
+ }
+
+ /// Determines whether the thread should attempt to fetch work items from the process queue.
+ /// This is generally case when the thread is registered as a looper and not part of a
+ /// transaction stack. But if there is local work, we want to return to userspace before we
+ /// deliver any remote work.
+ fn should_use_process_work_queue(&self) -> bool {
+ self.current_transaction.is_none() && !self.process_work_list && self.is_looper()
+ }
+
+ fn poll(&mut self) -> u32 {
+ self.looper_flags |= LOOPER_POLL;
+ if self.process_work_list || self.looper_need_return {
+ bindings::POLLIN
+ } else {
+ 0
+ }
+ }
+}
+
+/// This represents a thread that's used with binder.
+#[pin_data]
+pub(crate) struct Thread {
+ pub(crate) id: i32,
+ pub(crate) process: Arc<Process>,
+ pub(crate) task: ARef<Task>,
+ #[pin]
+ inner: SpinLock<InnerThread>,
+ #[pin]
+ work_condvar: PollCondVar,
+ /// Used to insert this thread into the process' `ready_threads` list.
+ ///
+ /// INVARIANT: May never be used for any other list than the `self.process.ready_threads`.
+ #[pin]
+ links: ListLinks,
+ #[pin]
+ links_track: AtomicTracker,
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for Thread {
+ tracked_by links_track: AtomicTracker;
+ }
+}
+kernel::list::impl_list_item! {
+ impl ListItem<0> for Thread {
+ using ListLinks { self.links };
+ }
+}
+
+impl Thread {
+ pub(crate) fn new(id: i32, process: Arc<Process>) -> Result<Arc<Self>> {
+ let inner = InnerThread::new()?;
+
+ Arc::pin_init(
+ try_pin_init!(Thread {
+ id,
+ process,
+ task: ARef::from(&**kernel::current!()),
+ inner <- kernel::new_spinlock!(inner, "Thread::inner"),
+ work_condvar <- kernel::new_poll_condvar!("Thread::work_condvar"),
+ links <- ListLinks::new(),
+ links_track <- AtomicTracker::new(),
+ }),
+ GFP_KERNEL,
+ )
+ }
+
+ #[inline(never)]
+ pub(crate) fn debug_print(self: &Arc<Self>, m: &SeqFile, print_all: bool) -> Result<()> {
+ let inner = self.inner.lock();
+
+ if print_all || inner.current_transaction.is_some() || !inner.work_list.is_empty() {
+ seq_print!(
+ m,
+ " thread {}: l {:02x} need_return {}\n",
+ self.id,
+ inner.looper_flags,
+ inner.looper_need_return,
+ );
+ }
+
+ let mut t_opt = inner.current_transaction.as_ref();
+ while let Some(t) = t_opt {
+ if Arc::ptr_eq(&t.from, self) {
+ t.debug_print_inner(m, " outgoing transaction ");
+ t_opt = t.from_parent.as_ref();
+ } else if Arc::ptr_eq(&t.to, &self.process) {
+ t.debug_print_inner(m, " incoming transaction ");
+ t_opt = t.find_from(self);
+ } else {
+ t.debug_print_inner(m, " bad transaction ");
+ t_opt = None;
+ }
+ }
+
+ for work in &inner.work_list {
+ work.debug_print(m, " ", " pending transaction ")?;
+ }
+ Ok(())
+ }
+
+ pub(crate) fn get_extended_error(&self, data: UserSlice) -> Result {
+ let mut writer = data.writer();
+ let ee = self.inner.lock().extended_error;
+ writer.write(&ee)?;
+ Ok(())
+ }
+
+ pub(crate) fn set_current_transaction(&self, transaction: DArc<Transaction>) {
+ self.inner.lock().current_transaction = Some(transaction);
+ }
+
+ pub(crate) fn has_current_transaction(&self) -> bool {
+ self.inner.lock().current_transaction.is_some()
+ }
+
+ /// Attempts to fetch a work item from the thread-local queue. The behaviour if the queue is
+ /// empty depends on `wait`: if it is true, the function waits for some work to be queued (or a
+ /// signal); otherwise it returns indicating that none is available.
+ fn get_work_local(self: &Arc<Self>, wait: bool) -> Result<Option<DLArc<dyn DeliverToRead>>> {
+ {
+ let mut inner = self.inner.lock();
+ if inner.looper_need_return {
+ return Ok(inner.pop_work());
+ }
+ }
+
+ // Try once if the caller does not want to wait.
+ if !wait {
+ return self.inner.lock().pop_work().ok_or(EAGAIN).map(Some);
+ }
+
+ // Loop waiting only on the local queue (i.e., not registering with the process queue).
+ let mut inner = self.inner.lock();
+ loop {
+ if let Some(work) = inner.pop_work() {
+ return Ok(Some(work));
+ }
+
+ inner.looper_flags |= LOOPER_WAITING;
+ let signal_pending = self.work_condvar.wait_interruptible_freezable(&mut inner);
+ inner.looper_flags &= !LOOPER_WAITING;
+
+ if signal_pending {
+ return Err(EINTR);
+ }
+ if inner.looper_need_return {
+ return Ok(None);
+ }
+ }
+ }
+
+ /// Attempts to fetch a work item from the thread-local queue, falling back to the process-wide
+ /// queue if none is available locally.
+ ///
+ /// This must only be called when the thread is not participating in a transaction chain. If it
+ /// is, the local version (`get_work_local`) should be used instead.
+ fn get_work(self: &Arc<Self>, wait: bool) -> Result<Option<DLArc<dyn DeliverToRead>>> {
+ // Try to get work from the thread's work queue, using only a local lock.
+ {
+ let mut inner = self.inner.lock();
+ if let Some(work) = inner.pop_work() {
+ return Ok(Some(work));
+ }
+ if inner.looper_need_return {
+ drop(inner);
+ return Ok(self.process.get_work());
+ }
+ }
+
+ // If the caller doesn't want to wait, try to grab work from the process queue.
+ //
+ // We know nothing will have been queued directly to the thread queue because it is not in
+ // a transaction and it is not in the process' ready list.
+ if !wait {
+ return self.process.get_work().ok_or(EAGAIN).map(Some);
+ }
+
+ // Get work from the process queue. If none is available, atomically register as ready.
+ let reg = match self.process.get_work_or_register(self) {
+ GetWorkOrRegister::Work(work) => return Ok(Some(work)),
+ GetWorkOrRegister::Register(reg) => reg,
+ };
+
+ let mut inner = self.inner.lock();
+ loop {
+ if let Some(work) = inner.pop_work() {
+ return Ok(Some(work));
+ }
+
+ inner.looper_flags |= LOOPER_WAITING | LOOPER_WAITING_PROC;
+ let signal_pending = self.work_condvar.wait_interruptible_freezable(&mut inner);
+ inner.looper_flags &= !(LOOPER_WAITING | LOOPER_WAITING_PROC);
+
+ if signal_pending || inner.looper_need_return {
+ // We need to return now. We need to pull the thread off the list of ready threads
+ // (by dropping `reg`), then check the state again after it's off the list to
+ // ensure that something was not queued in the meantime. If something has been
+ // queued, we just return it (instead of the error).
+ drop(inner);
+ drop(reg);
+
+ let res = match self.inner.lock().pop_work() {
+ Some(work) => Ok(Some(work)),
+ None if signal_pending => Err(EINTR),
+ None => Ok(None),
+ };
+ return res;
+ }
+ }
+ }
+
+ /// Push the provided work item to be delivered to user space via this thread.
+ ///
+ /// Returns whether the item was successfully pushed. This can only fail if the thread is dead.
+ pub(crate) fn push_work(&self, work: DLArc<dyn DeliverToRead>) -> PushWorkRes {
+ let sync = work.should_sync_wakeup();
+
+ let res = self.inner.lock().push_work(work);
+
+ if res.is_ok() {
+ if sync {
+ self.work_condvar.notify_sync();
+ } else {
+ self.work_condvar.notify_one();
+ }
+ }
+
+ res
+ }
+
+ /// Attempts to push to given work item to the thread if it's a looper thread (i.e., if it's
+ /// part of a thread pool) and is alive. Otherwise, push the work item to the process instead.
+ pub(crate) fn push_work_if_looper(&self, work: DLArc<dyn DeliverToRead>) -> BinderResult {
+ let mut inner = self.inner.lock();
+ if inner.is_looper() && !inner.is_dead {
+ inner.push_work(work);
+ Ok(())
+ } else {
+ drop(inner);
+ self.process.push_work(work)
+ }
+ }
+
+ pub(crate) fn push_work_deferred(&self, work: DLArc<dyn DeliverToRead>) {
+ self.inner.lock().push_work_deferred(work);
+ }
+
+ pub(crate) fn push_return_work(&self, reply: u32) {
+ self.inner.lock().push_return_work(reply);
+ }
+
+ fn translate_object(
+ &self,
+ obj_index: usize,
+ offset: usize,
+ object: BinderObjectRef<'_>,
+ view: &mut AllocationView<'_>,
+ allow_fds: bool,
+ sg_state: &mut ScatterGatherState,
+ ) -> BinderResult {
+ match object {
+ BinderObjectRef::Binder(obj) => {
+ let strong = obj.hdr.type_ == BINDER_TYPE_BINDER;
+ // SAFETY: `binder` is a `binder_uintptr_t`; any bit pattern is a valid
+ // representation.
+ let ptr = unsafe { obj.__bindgen_anon_1.binder } as _;
+ let cookie = obj.cookie as _;
+ let flags = obj.flags as _;
+ let node = self
+ .process
+ .as_arc_borrow()
+ .get_node(ptr, cookie, flags, strong, self)?;
+ security::binder_transfer_binder(&self.process.cred, &view.alloc.process.cred)?;
+ view.transfer_binder_object(offset, obj, strong, node)?;
+ }
+ BinderObjectRef::Handle(obj) => {
+ let strong = obj.hdr.type_ == BINDER_TYPE_HANDLE;
+ // SAFETY: `handle` is a `u32`; any bit pattern is a valid representation.
+ let handle = unsafe { obj.__bindgen_anon_1.handle } as _;
+ let node = self.process.get_node_from_handle(handle, strong)?;
+ security::binder_transfer_binder(&self.process.cred, &view.alloc.process.cred)?;
+ view.transfer_binder_object(offset, obj, strong, node)?;
+ }
+ BinderObjectRef::Fd(obj) => {
+ if !allow_fds {
+ return Err(EPERM.into());
+ }
+
+ // SAFETY: `fd` is a `u32`; any bit pattern is a valid representation.
+ let fd = unsafe { obj.__bindgen_anon_1.fd };
+ let file = LocalFile::fget(fd)?;
+ // SAFETY: The binder driver never calls `fdget_pos` and this code runs from an
+ // ioctl, so there are no active calls to `fdget_pos` on this thread.
+ let file = unsafe { LocalFile::assume_no_fdget_pos(file) };
+ security::binder_transfer_file(
+ &self.process.cred,
+ &view.alloc.process.cred,
+ &file,
+ )?;
+
+ let mut obj_write = BinderFdObject::default();
+ obj_write.hdr.type_ = BINDER_TYPE_FD;
+ // This will be overwritten with the actual fd when the transaction is received.
+ obj_write.__bindgen_anon_1.fd = u32::MAX;
+ obj_write.cookie = obj.cookie;
+ view.write::<BinderFdObject>(offset, &obj_write)?;
+
+ const FD_FIELD_OFFSET: usize =
+ core::mem::offset_of!(uapi::binder_fd_object, __bindgen_anon_1.fd);
+
+ let field_offset = offset + FD_FIELD_OFFSET;
+
+ view.alloc.info_add_fd(file, field_offset, false)?;
+ }
+ BinderObjectRef::Ptr(obj) => {
+ let obj_length = obj.length.try_into().map_err(|_| EINVAL)?;
+ let alloc_offset = match sg_state.unused_buffer_space.claim_next(obj_length) {
+ Ok(alloc_offset) => alloc_offset,
+ Err(err) => {
+ pr_warn!(
+ "Failed to claim space for a BINDER_TYPE_PTR. (offset: {}, limit: {}, size: {})",
+ sg_state.unused_buffer_space.offset,
+ sg_state.unused_buffer_space.limit,
+ obj_length,
+ );
+ return Err(err.into());
+ }
+ };
+
+ let sg_state_idx = sg_state.sg_entries.len();
+ sg_state.sg_entries.push(
+ ScatterGatherEntry {
+ obj_index,
+ offset: alloc_offset,
+ sender_uaddr: obj.buffer as _,
+ length: obj_length,
+ pointer_fixups: KVec::new(),
+ fixup_min_offset: 0,
+ },
+ GFP_KERNEL,
+ )?;
+
+ let buffer_ptr_in_user_space = (view.alloc.ptr + alloc_offset) as u64;
+
+ if obj.flags & uapi::BINDER_BUFFER_FLAG_HAS_PARENT == 0 {
+ sg_state.ancestors.clear();
+ sg_state.ancestors.push(sg_state_idx, GFP_KERNEL)?;
+ } else {
+ // Another buffer also has a pointer to this buffer, and we need to fixup that
+ // pointer too.
+
+ let parent_index = usize::try_from(obj.parent).map_err(|_| EINVAL)?;
+ let parent_offset = usize::try_from(obj.parent_offset).map_err(|_| EINVAL)?;
+
+ let info = sg_state.validate_parent_fixup(
+ parent_index,
+ parent_offset,
+ size_of::<u64>(),
+ )?;
+
+ sg_state.ancestors.truncate(info.num_ancestors);
+ sg_state.ancestors.push(sg_state_idx, GFP_KERNEL)?;
+
+ let parent_entry = match sg_state.sg_entries.get_mut(info.parent_sg_index) {
+ Some(parent_entry) => parent_entry,
+ None => {
+ pr_err!(
+ "validate_parent_fixup returned index out of bounds for sg.entries"
+ );
+ return Err(EINVAL.into());
+ }
+ };
+
+ parent_entry.fixup_min_offset = info.new_min_offset;
+ parent_entry.pointer_fixups.push(
+ PointerFixupEntry {
+ skip: 0,
+ pointer_value: buffer_ptr_in_user_space,
+ target_offset: info.target_offset,
+ },
+ GFP_KERNEL,
+ )?;
+ }
+
+ let mut obj_write = BinderBufferObject::default();
+ obj_write.hdr.type_ = BINDER_TYPE_PTR;
+ obj_write.flags = obj.flags;
+ obj_write.buffer = buffer_ptr_in_user_space;
+ obj_write.length = obj.length;
+ obj_write.parent = obj.parent;
+ obj_write.parent_offset = obj.parent_offset;
+ view.write::<BinderBufferObject>(offset, &obj_write)?;
+ }
+ BinderObjectRef::Fda(obj) => {
+ if !allow_fds {
+ return Err(EPERM.into());
+ }
+ let parent_index = usize::try_from(obj.parent).map_err(|_| EINVAL)?;
+ let parent_offset = usize::try_from(obj.parent_offset).map_err(|_| EINVAL)?;
+ let num_fds = usize::try_from(obj.num_fds).map_err(|_| EINVAL)?;
+ let fds_len = num_fds.checked_mul(size_of::<u32>()).ok_or(EINVAL)?;
+
+ let info = sg_state.validate_parent_fixup(parent_index, parent_offset, fds_len)?;
+ view.alloc.info_add_fd_reserve(num_fds)?;
+
+ sg_state.ancestors.truncate(info.num_ancestors);
+ let parent_entry = match sg_state.sg_entries.get_mut(info.parent_sg_index) {
+ Some(parent_entry) => parent_entry,
+ None => {
+ pr_err!(
+ "validate_parent_fixup returned index out of bounds for sg.entries"
+ );
+ return Err(EINVAL.into());
+ }
+ };
+
+ parent_entry.fixup_min_offset = info.new_min_offset;
+ parent_entry
+ .pointer_fixups
+ .push(
+ PointerFixupEntry {
+ skip: fds_len,
+ pointer_value: 0,
+ target_offset: info.target_offset,
+ },
+ GFP_KERNEL,
+ )
+ .map_err(|_| ENOMEM)?;
+
+ let fda_uaddr = parent_entry
+ .sender_uaddr
+ .checked_add(parent_offset)
+ .ok_or(EINVAL)?;
+ let mut fda_bytes = KVec::new();
+ UserSlice::new(UserPtr::from_addr(fda_uaddr as _), fds_len)
+ .read_all(&mut fda_bytes, GFP_KERNEL)?;
+
+ if fds_len != fda_bytes.len() {
+ pr_err!("UserSlice::read_all returned wrong length in BINDER_TYPE_FDA");
+ return Err(EINVAL.into());
+ }
+
+ for i in (0..fds_len).step_by(size_of::<u32>()) {
+ let fd = {
+ let mut fd_bytes = [0u8; size_of::<u32>()];
+ fd_bytes.copy_from_slice(&fda_bytes[i..i + size_of::<u32>()]);
+ u32::from_ne_bytes(fd_bytes)
+ };
+
+ let file = LocalFile::fget(fd)?;
+ // SAFETY: The binder driver never calls `fdget_pos` and this code runs from an
+ // ioctl, so there are no active calls to `fdget_pos` on this thread.
+ let file = unsafe { LocalFile::assume_no_fdget_pos(file) };
+ security::binder_transfer_file(
+ &self.process.cred,
+ &view.alloc.process.cred,
+ &file,
+ )?;
+
+ // The `validate_parent_fixup` call ensuers that this addition will not
+ // overflow.
+ view.alloc.info_add_fd(file, info.target_offset + i, true)?;
+ }
+ drop(fda_bytes);
+
+ let mut obj_write = BinderFdArrayObject::default();
+ obj_write.hdr.type_ = BINDER_TYPE_FDA;
+ obj_write.num_fds = obj.num_fds;
+ obj_write.parent = obj.parent;
+ obj_write.parent_offset = obj.parent_offset;
+ view.write::<BinderFdArrayObject>(offset, &obj_write)?;
+ }
+ }
+ Ok(())
+ }
+
+ fn apply_sg(&self, alloc: &mut Allocation, sg_state: &mut ScatterGatherState) -> BinderResult {
+ for sg_entry in &mut sg_state.sg_entries {
+ let mut end_of_previous_fixup = sg_entry.offset;
+ let offset_end = sg_entry.offset.checked_add(sg_entry.length).ok_or(EINVAL)?;
+
+ let mut reader =
+ UserSlice::new(UserPtr::from_addr(sg_entry.sender_uaddr), sg_entry.length).reader();
+ for fixup in &mut sg_entry.pointer_fixups {
+ let fixup_len = if fixup.skip == 0 {
+ size_of::<u64>()
+ } else {
+ fixup.skip
+ };
+
+ let target_offset_end = fixup.target_offset.checked_add(fixup_len).ok_or(EINVAL)?;
+ if fixup.target_offset < end_of_previous_fixup || offset_end < target_offset_end {
+ pr_warn!(
+ "Fixups oob {} {} {} {}",
+ fixup.target_offset,
+ end_of_previous_fixup,
+ offset_end,
+ target_offset_end
+ );
+ return Err(EINVAL.into());
+ }
+
+ let copy_off = end_of_previous_fixup;
+ let copy_len = fixup.target_offset - end_of_previous_fixup;
+ if let Err(err) = alloc.copy_into(&mut reader, copy_off, copy_len) {
+ pr_warn!("Failed copying into alloc: {:?}", err);
+ return Err(err.into());
+ }
+ if fixup.skip == 0 {
+ let res = alloc.write::<u64>(fixup.target_offset, &fixup.pointer_value);
+ if let Err(err) = res {
+ pr_warn!("Failed copying ptr into alloc: {:?}", err);
+ return Err(err.into());
+ }
+ }
+ if let Err(err) = reader.skip(fixup_len) {
+ pr_warn!("Failed skipping {} from reader: {:?}", fixup_len, err);
+ return Err(err.into());
+ }
+ end_of_previous_fixup = target_offset_end;
+ }
+ let copy_off = end_of_previous_fixup;
+ let copy_len = offset_end - end_of_previous_fixup;
+ if let Err(err) = alloc.copy_into(&mut reader, copy_off, copy_len) {
+ pr_warn!("Failed copying remainder into alloc: {:?}", err);
+ return Err(err.into());
+ }
+ }
+ Ok(())
+ }
+
+ /// This method copies the payload of a transaction into the target process.
+ ///
+ /// The resulting payload will have several different components, which will be stored next to
+ /// each other in the allocation. Furthermore, various objects can be embedded in the payload,
+ /// and those objects have to be translated so that they make sense to the target transaction.
+ pub(crate) fn copy_transaction_data(
+ &self,
+ to_process: Arc<Process>,
+ tr: &BinderTransactionDataSg,
+ debug_id: usize,
+ allow_fds: bool,
+ txn_security_ctx_offset: Option<&mut usize>,
+ ) -> BinderResult<NewAllocation> {
+ let trd = &tr.transaction_data;
+ let is_oneway = trd.flags & TF_ONE_WAY != 0;
+ let mut secctx = if let Some(offset) = txn_security_ctx_offset {
+ let secid = self.process.cred.get_secid();
+ let ctx = match security::SecurityCtx::from_secid(secid) {
+ Ok(ctx) => ctx,
+ Err(err) => {
+ pr_warn!("Failed to get security ctx for id {}: {:?}", secid, err);
+ return Err(err.into());
+ }
+ };
+ Some((offset, ctx))
+ } else {
+ None
+ };
+
+ let data_size = trd.data_size.try_into().map_err(|_| EINVAL)?;
+ let aligned_data_size = ptr_align(data_size).ok_or(EINVAL)?;
+ let offsets_size = trd.offsets_size.try_into().map_err(|_| EINVAL)?;
+ let aligned_offsets_size = ptr_align(offsets_size).ok_or(EINVAL)?;
+ let buffers_size = tr.buffers_size.try_into().map_err(|_| EINVAL)?;
+ let aligned_buffers_size = ptr_align(buffers_size).ok_or(EINVAL)?;
+ let aligned_secctx_size = match secctx.as_ref() {
+ Some((_offset, ctx)) => ptr_align(ctx.len()).ok_or(EINVAL)?,
+ None => 0,
+ };
+
+ // This guarantees that at least `sizeof(usize)` bytes will be allocated.
+ let len = usize::max(
+ aligned_data_size
+ .checked_add(aligned_offsets_size)
+ .and_then(|sum| sum.checked_add(aligned_buffers_size))
+ .and_then(|sum| sum.checked_add(aligned_secctx_size))
+ .ok_or(ENOMEM)?,
+ size_of::<usize>(),
+ );
+ let secctx_off = aligned_data_size + aligned_offsets_size + aligned_buffers_size;
+ let mut alloc =
+ match to_process.buffer_alloc(debug_id, len, is_oneway, self.process.task.pid()) {
+ Ok(alloc) => alloc,
+ Err(err) => {
+ pr_warn!(
+ "Failed to allocate buffer. len:{}, is_oneway:{}",
+ len,
+ is_oneway
+ );
+ return Err(err);
+ }
+ };
+
+ // SAFETY: This accesses a union field, but it's okay because the field's type is valid for
+ // all bit-patterns.
+ let trd_data_ptr = unsafe { &trd.data.ptr };
+ let mut buffer_reader =
+ UserSlice::new(UserPtr::from_addr(trd_data_ptr.buffer as _), data_size).reader();
+ let mut end_of_previous_object = 0;
+ let mut sg_state = None;
+
+ // Copy offsets if there are any.
+ if offsets_size > 0 {
+ {
+ let mut reader =
+ UserSlice::new(UserPtr::from_addr(trd_data_ptr.offsets as _), offsets_size)
+ .reader();
+ alloc.copy_into(&mut reader, aligned_data_size, offsets_size)?;
+ }
+
+ let offsets_start = aligned_data_size;
+ let offsets_end = aligned_data_size + aligned_offsets_size;
+
+ // This state is used for BINDER_TYPE_PTR objects.
+ let sg_state = sg_state.insert(ScatterGatherState {
+ unused_buffer_space: UnusedBufferSpace {
+ offset: offsets_end,
+ limit: len,
+ },
+ sg_entries: KVec::new(),
+ ancestors: KVec::new(),
+ });
+
+ // Traverse the objects specified.
+ let mut view = AllocationView::new(&mut alloc, data_size);
+ for (index, index_offset) in (offsets_start..offsets_end)
+ .step_by(size_of::<usize>())
+ .enumerate()
+ {
+ let offset = view.alloc.read(index_offset)?;
+
+ if offset < end_of_previous_object {
+ pr_warn!("Got transaction with invalid offset.");
+ return Err(EINVAL.into());
+ }
+
+ // Copy data between two objects.
+ if end_of_previous_object < offset {
+ view.copy_into(
+ &mut buffer_reader,
+ end_of_previous_object,
+ offset - end_of_previous_object,
+ )?;
+ }
+
+ let mut object = BinderObject::read_from(&mut buffer_reader)?;
+
+ match self.translate_object(
+ index,
+ offset,
+ object.as_ref(),
+ &mut view,
+ allow_fds,
+ sg_state,
+ ) {
+ Ok(()) => end_of_previous_object = offset + object.size(),
+ Err(err) => {
+ pr_warn!("Error while translating object.");
+ return Err(err);
+ }
+ }
+
+ // Update the indexes containing objects to clean up.
+ let offset_after_object = index_offset + size_of::<usize>();
+ view.alloc
+ .set_info_offsets(offsets_start..offset_after_object);
+ }
+ }
+
+ // Copy remaining raw data.
+ alloc.copy_into(
+ &mut buffer_reader,
+ end_of_previous_object,
+ data_size - end_of_previous_object,
+ )?;
+
+ if let Some(sg_state) = sg_state.as_mut() {
+ if let Err(err) = self.apply_sg(&mut alloc, sg_state) {
+ pr_warn!("Failure in apply_sg: {:?}", err);
+ return Err(err);
+ }
+ }
+
+ if let Some((off_out, secctx)) = secctx.as_mut() {
+ if let Err(err) = alloc.write(secctx_off, secctx.as_bytes()) {
+ pr_warn!("Failed to write security context: {:?}", err);
+ return Err(err.into());
+ }
+ **off_out = secctx_off;
+ }
+ Ok(alloc)
+ }
+
+ fn unwind_transaction_stack(self: &Arc<Self>) {
+ let mut thread = self.clone();
+ while let Ok(transaction) = {
+ let mut inner = thread.inner.lock();
+ inner.pop_transaction_to_reply(thread.as_ref())
+ } {
+ let reply = Err(BR_DEAD_REPLY);
+ if !transaction.from.deliver_single_reply(reply, &transaction) {
+ break;
+ }
+
+ thread = transaction.from.clone();
+ }
+ }
+
+ pub(crate) fn deliver_reply(
+ &self,
+ reply: Result<DLArc<Transaction>, u32>,
+ transaction: &DArc<Transaction>,
+ ) {
+ if self.deliver_single_reply(reply, transaction) {
+ transaction.from.unwind_transaction_stack();
+ }
+ }
+
+ /// Delivers a reply to the thread that started a transaction. The reply can either be a
+ /// reply-transaction or an error code to be delivered instead.
+ ///
+ /// Returns whether the thread is dead. If it is, the caller is expected to unwind the
+ /// transaction stack by completing transactions for threads that are dead.
+ fn deliver_single_reply(
+ &self,
+ reply: Result<DLArc<Transaction>, u32>,
+ transaction: &DArc<Transaction>,
+ ) -> bool {
+ if let Ok(transaction) = &reply {
+ transaction.set_outstanding(&mut self.process.inner.lock());
+ }
+
+ {
+ let mut inner = self.inner.lock();
+ if !inner.pop_transaction_replied(transaction) {
+ return false;
+ }
+
+ if inner.is_dead {
+ return true;
+ }
+
+ match reply {
+ Ok(work) => {
+ inner.push_work(work);
+ }
+ Err(code) => inner.push_reply_work(code),
+ }
+ }
+
+ // Notify the thread now that we've released the inner lock.
+ self.work_condvar.notify_sync();
+ false
+ }
+
+ /// Determines if the given transaction is the current transaction for this thread.
+ fn is_current_transaction(&self, transaction: &DArc<Transaction>) -> bool {
+ let inner = self.inner.lock();
+ match &inner.current_transaction {
+ None => false,
+ Some(current) => Arc::ptr_eq(current, transaction),
+ }
+ }
+
+ /// Determines the current top of the transaction stack. It fails if the top is in another
+ /// thread (i.e., this thread belongs to a stack but it has called another thread). The top is
+ /// [`None`] if the thread is not currently participating in a transaction stack.
+ fn top_of_transaction_stack(&self) -> Result<Option<DArc<Transaction>>> {
+ let inner = self.inner.lock();
+ if let Some(cur) = &inner.current_transaction {
+ if core::ptr::eq(self, cur.from.as_ref()) {
+ pr_warn!("got new transaction with bad transaction stack");
+ return Err(EINVAL);
+ }
+ Ok(Some(cur.clone()))
+ } else {
+ Ok(None)
+ }
+ }
+
+ fn transaction<T>(self: &Arc<Self>, tr: &BinderTransactionDataSg, inner: T)
+ where
+ T: FnOnce(&Arc<Self>, &BinderTransactionDataSg) -> BinderResult,
+ {
+ if let Err(err) = inner(self, tr) {
+ if err.should_pr_warn() {
+ let mut ee = self.inner.lock().extended_error;
+ ee.command = err.reply;
+ ee.param = err.as_errno();
+ pr_warn!(
+ "Transaction failed: {:?} my_pid:{}",
+ err,
+ self.process.pid_in_current_ns()
+ );
+ }
+
+ self.push_return_work(err.reply);
+ }
+ }
+
+ fn transaction_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult {
+ // SAFETY: Handle's type has no invalid bit patterns.
+ let handle = unsafe { tr.transaction_data.target.handle };
+ let node_ref = self.process.get_transaction_node(handle)?;
+ security::binder_transaction(&self.process.cred, &node_ref.node.owner.cred)?;
+ // TODO: We need to ensure that there isn't a pending transaction in the work queue. How
+ // could this happen?
+ let top = self.top_of_transaction_stack()?;
+ let list_completion = DTRWrap::arc_try_new(DeliverCode::new(BR_TRANSACTION_COMPLETE))?;
+ let completion = list_completion.clone_arc();
+ let transaction = Transaction::new(node_ref, top, self, tr)?;
+
+ // Check that the transaction stack hasn't changed while the lock was released, then update
+ // it with the new transaction.
+ {
+ let mut inner = self.inner.lock();
+ if !transaction.is_stacked_on(&inner.current_transaction) {
+ pr_warn!("Transaction stack changed during transaction!");
+ return Err(EINVAL.into());
+ }
+ inner.current_transaction = Some(transaction.clone_arc());
+ // We push the completion as a deferred work so that we wait for the reply before
+ // returning to userland.
+ inner.push_work_deferred(list_completion);
+ }
+
+ if let Err(e) = transaction.submit() {
+ completion.skip();
+ // Define `transaction` first to drop it after `inner`.
+ let transaction;
+ let mut inner = self.inner.lock();
+ transaction = inner.current_transaction.take().unwrap();
+ inner.current_transaction = transaction.clone_next();
+ Err(e)
+ } else {
+ Ok(())
+ }
+ }
+
+ fn reply_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult {
+ let orig = self.inner.lock().pop_transaction_to_reply(self)?;
+ if !orig.from.is_current_transaction(&orig) {
+ return Err(EINVAL.into());
+ }
+
+ // We need to complete the transaction even if we cannot complete building the reply.
+ let out = (|| -> BinderResult<_> {
+ let completion = DTRWrap::arc_try_new(DeliverCode::new(BR_TRANSACTION_COMPLETE))?;
+ let process = orig.from.process.clone();
+ let allow_fds = orig.flags & TF_ACCEPT_FDS != 0;
+ let reply = Transaction::new_reply(self, process, tr, allow_fds)?;
+ self.inner.lock().push_work(completion);
+ orig.from.deliver_reply(Ok(reply), &orig);
+ Ok(())
+ })()
+ .map_err(|mut err| {
+ // At this point we only return `BR_TRANSACTION_COMPLETE` to the caller, and we must let
+ // the sender know that the transaction has completed (with an error in this case).
+ pr_warn!(
+ "Failure {:?} during reply - delivering BR_FAILED_REPLY to sender.",
+ err
+ );
+ let reply = Err(BR_FAILED_REPLY);
+ orig.from.deliver_reply(reply, &orig);
+ err.reply = BR_TRANSACTION_COMPLETE;
+ err
+ });
+
+ out
+ }
+
+ fn oneway_transaction_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult {
+ // SAFETY: The `handle` field is valid for all possible byte values, so reading from the
+ // union is okay.
+ let handle = unsafe { tr.transaction_data.target.handle };
+ let node_ref = self.process.get_transaction_node(handle)?;
+ security::binder_transaction(&self.process.cred, &node_ref.node.owner.cred)?;
+ let transaction = Transaction::new(node_ref, None, self, tr)?;
+ let code = if self.process.is_oneway_spam_detection_enabled()
+ && transaction.oneway_spam_detected
+ {
+ BR_ONEWAY_SPAM_SUSPECT
+ } else {
+ BR_TRANSACTION_COMPLETE
+ };
+ let list_completion = DTRWrap::arc_try_new(DeliverCode::new(code))?;
+ let completion = list_completion.clone_arc();
+ self.inner.lock().push_work(list_completion);
+ match transaction.submit() {
+ Ok(()) => Ok(()),
+ Err(err) => {
+ completion.skip();
+ Err(err)
+ }
+ }
+ }
+
+ fn write(self: &Arc<Self>, req: &mut BinderWriteRead) -> Result {
+ let write_start = req.write_buffer.wrapping_add(req.write_consumed);
+ let write_len = req.write_size.saturating_sub(req.write_consumed);
+ let mut reader =
+ UserSlice::new(UserPtr::from_addr(write_start as _), write_len as _).reader();
+
+ while reader.len() >= size_of::<u32>() && self.inner.lock().return_work.is_unused() {
+ let before = reader.len();
+ let cmd = reader.read::<u32>()?;
+ GLOBAL_STATS.inc_bc(cmd);
+ self.process.stats.inc_bc(cmd);
+ match cmd {
+ BC_TRANSACTION => {
+ let tr = reader.read::<BinderTransactionData>()?.with_buffers_size(0);
+ if tr.transaction_data.flags & TF_ONE_WAY != 0 {
+ self.transaction(&tr, Self::oneway_transaction_inner);
+ } else {
+ self.transaction(&tr, Self::transaction_inner);
+ }
+ }
+ BC_TRANSACTION_SG => {
+ let tr = reader.read::<BinderTransactionDataSg>()?;
+ if tr.transaction_data.flags & TF_ONE_WAY != 0 {
+ self.transaction(&tr, Self::oneway_transaction_inner);
+ } else {
+ self.transaction(&tr, Self::transaction_inner);
+ }
+ }
+ BC_REPLY => {
+ let tr = reader.read::<BinderTransactionData>()?.with_buffers_size(0);
+ self.transaction(&tr, Self::reply_inner)
+ }
+ BC_REPLY_SG => {
+ let tr = reader.read::<BinderTransactionDataSg>()?;
+ self.transaction(&tr, Self::reply_inner)
+ }
+ BC_FREE_BUFFER => {
+ let buffer = self.process.buffer_get(reader.read()?);
+ if let Some(buffer) = buffer {
+ if buffer.looper_need_return_on_free() {
+ self.inner.lock().looper_need_return = true;
+ }
+ drop(buffer);
+ }
+ }
+ BC_INCREFS => {
+ self.process
+ .as_arc_borrow()
+ .update_ref(reader.read()?, true, false)?
+ }
+ BC_ACQUIRE => {
+ self.process
+ .as_arc_borrow()
+ .update_ref(reader.read()?, true, true)?
+ }
+ BC_RELEASE => {
+ self.process
+ .as_arc_borrow()
+ .update_ref(reader.read()?, false, true)?
+ }
+ BC_DECREFS => {
+ self.process
+ .as_arc_borrow()
+ .update_ref(reader.read()?, false, false)?
+ }
+ BC_INCREFS_DONE => self.process.inc_ref_done(&mut reader, false)?,
+ BC_ACQUIRE_DONE => self.process.inc_ref_done(&mut reader, true)?,
+ BC_REQUEST_DEATH_NOTIFICATION => self.process.request_death(&mut reader, self)?,
+ BC_CLEAR_DEATH_NOTIFICATION => self.process.clear_death(&mut reader, self)?,
+ BC_DEAD_BINDER_DONE => self.process.dead_binder_done(reader.read()?, self),
+ BC_REGISTER_LOOPER => {
+ let valid = self.process.register_thread();
+ self.inner.lock().looper_register(valid);
+ }
+ BC_ENTER_LOOPER => self.inner.lock().looper_enter(),
+ BC_EXIT_LOOPER => self.inner.lock().looper_exit(),
+ BC_REQUEST_FREEZE_NOTIFICATION => self.process.request_freeze_notif(&mut reader)?,
+ BC_CLEAR_FREEZE_NOTIFICATION => self.process.clear_freeze_notif(&mut reader)?,
+ BC_FREEZE_NOTIFICATION_DONE => self.process.freeze_notif_done(&mut reader)?,
+
+ // Fail if given an unknown error code.
+ // BC_ATTEMPT_ACQUIRE and BC_ACQUIRE_RESULT are no longer supported.
+ _ => return Err(EINVAL),
+ }
+ // Update the number of write bytes consumed.
+ req.write_consumed += (before - reader.len()) as u64;
+ }
+
+ Ok(())
+ }
+
+ fn read(self: &Arc<Self>, req: &mut BinderWriteRead, wait: bool) -> Result {
+ let read_start = req.read_buffer.wrapping_add(req.read_consumed);
+ let read_len = req.read_size.saturating_sub(req.read_consumed);
+ let mut writer = BinderReturnWriter::new(
+ UserSlice::new(UserPtr::from_addr(read_start as _), read_len as _).writer(),
+ self,
+ );
+ let (in_pool, use_proc_queue) = {
+ let inner = self.inner.lock();
+ (inner.is_looper(), inner.should_use_process_work_queue())
+ };
+
+ let getter = if use_proc_queue {
+ Self::get_work
+ } else {
+ Self::get_work_local
+ };
+
+ // Reserve some room at the beginning of the read buffer so that we can send a
+ // BR_SPAWN_LOOPER if we need to.
+ let mut has_noop_placeholder = false;
+ if req.read_consumed == 0 {
+ if let Err(err) = writer.write_code(BR_NOOP) {
+ pr_warn!("Failure when writing BR_NOOP at beginning of buffer.");
+ return Err(err);
+ }
+ has_noop_placeholder = true;
+ }
+
+ // Loop doing work while there is room in the buffer.
+ let initial_len = writer.len();
+ while writer.len() >= size_of::<uapi::binder_transaction_data_secctx>() + 4 {
+ match getter(self, wait && initial_len == writer.len()) {
+ Ok(Some(work)) => match work.into_arc().do_work(self, &mut writer) {
+ Ok(true) => {}
+ Ok(false) => break,
+ Err(err) => {
+ return Err(err);
+ }
+ },
+ Ok(None) => {
+ break;
+ }
+ Err(err) => {
+ // Propagate the error if we haven't written anything else.
+ if err != EINTR && err != EAGAIN {
+ pr_warn!("Failure in work getter: {:?}", err);
+ }
+ if initial_len == writer.len() {
+ return Err(err);
+ } else {
+ break;
+ }
+ }
+ }
+ }
+
+ req.read_consumed += read_len - writer.len() as u64;
+
+ // Write BR_SPAWN_LOOPER if the process needs more threads for its pool.
+ if has_noop_placeholder && in_pool && self.process.needs_thread() {
+ let mut writer =
+ UserSlice::new(UserPtr::from_addr(req.read_buffer as _), req.read_size as _)
+ .writer();
+ writer.write(&BR_SPAWN_LOOPER)?;
+ }
+ Ok(())
+ }
+
+ pub(crate) fn write_read(self: &Arc<Self>, data: UserSlice, wait: bool) -> Result {
+ let (mut reader, mut writer) = data.reader_writer();
+ let mut req = reader.read::<BinderWriteRead>()?;
+
+ // Go through the write buffer.
+ let mut ret = Ok(());
+ if req.write_size > 0 {
+ ret = self.write(&mut req);
+ if let Err(err) = ret {
+ pr_warn!(
+ "Write failure {:?} in pid:{}",
+ err,
+ self.process.pid_in_current_ns()
+ );
+ req.read_consumed = 0;
+ writer.write(&req)?;
+ self.inner.lock().looper_need_return = false;
+ return ret;
+ }
+ }
+
+ // Go through the work queue.
+ if req.read_size > 0 {
+ ret = self.read(&mut req, wait);
+ if ret.is_err() && ret != Err(EINTR) {
+ pr_warn!(
+ "Read failure {:?} in pid:{}",
+ ret,
+ self.process.pid_in_current_ns()
+ );
+ }
+ }
+
+ // Write the request back so that the consumed fields are visible to the caller.
+ writer.write(&req)?;
+
+ self.inner.lock().looper_need_return = false;
+
+ ret
+ }
+
+ pub(crate) fn poll(&self, file: &File, table: PollTable<'_>) -> (bool, u32) {
+ table.register_wait(file, &self.work_condvar);
+ let mut inner = self.inner.lock();
+ (inner.should_use_process_work_queue(), inner.poll())
+ }
+
+ /// Make the call to `get_work` or `get_work_local` return immediately, if any.
+ pub(crate) fn exit_looper(&self) {
+ let mut inner = self.inner.lock();
+ let should_notify = inner.looper_flags & LOOPER_WAITING != 0;
+ if should_notify {
+ inner.looper_need_return = true;
+ }
+ drop(inner);
+
+ if should_notify {
+ self.work_condvar.notify_one();
+ }
+ }
+
+ pub(crate) fn notify_if_poll_ready(&self, sync: bool) {
+ // Determine if we need to notify. This requires the lock.
+ let inner = self.inner.lock();
+ let notify = inner.looper_flags & LOOPER_POLL != 0 && inner.should_use_process_work_queue();
+ drop(inner);
+
+ // Now that the lock is no longer held, notify the waiters if we have to.
+ if notify {
+ if sync {
+ self.work_condvar.notify_sync();
+ } else {
+ self.work_condvar.notify_one();
+ }
+ }
+ }
+
+ pub(crate) fn release(self: &Arc<Self>) {
+ self.inner.lock().is_dead = true;
+
+ //self.work_condvar.clear();
+ self.unwind_transaction_stack();
+
+ // Cancel all pending work items.
+ while let Ok(Some(work)) = self.get_work_local(false) {
+ work.into_arc().cancel();
+ }
+ }
+}
+
+#[pin_data]
+struct ThreadError {
+ error_code: AtomicU32,
+ #[pin]
+ links_track: AtomicTracker,
+}
+
+impl ThreadError {
+ fn try_new() -> Result<DArc<Self>> {
+ DTRWrap::arc_pin_init(pin_init!(Self {
+ error_code: AtomicU32::new(BR_OK),
+ links_track <- AtomicTracker::new(),
+ }))
+ .map(ListArc::into_arc)
+ }
+
+ fn set_error_code(&self, code: u32) {
+ self.error_code.store(code, Ordering::Relaxed);
+ }
+
+ fn is_unused(&self) -> bool {
+ self.error_code.load(Ordering::Relaxed) == BR_OK
+ }
+}
+
+impl DeliverToRead for ThreadError {
+ fn do_work(
+ self: DArc<Self>,
+ _thread: &Thread,
+ writer: &mut BinderReturnWriter<'_>,
+ ) -> Result<bool> {
+ let code = self.error_code.load(Ordering::Relaxed);
+ self.error_code.store(BR_OK, Ordering::Relaxed);
+ writer.write_code(code)?;
+ Ok(true)
+ }
+
+ fn cancel(self: DArc<Self>) {}
+
+ fn should_sync_wakeup(&self) -> bool {
+ false
+ }
+
+ fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> {
+ seq_print!(
+ m,
+ "{}transaction error: {}\n",
+ prefix,
+ self.error_code.load(Ordering::Relaxed)
+ );
+ Ok(())
+ }
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for ThreadError {
+ tracked_by links_track: AtomicTracker;
+ }
+}
diff --git a/drivers/android/binder/trace.rs b/drivers/android/binder/trace.rs
new file mode 100644
index 000000000000..af0e4392805e
--- /dev/null
+++ b/drivers/android/binder/trace.rs
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use kernel::ffi::{c_uint, c_ulong};
+use kernel::tracepoint::declare_trace;
+
+declare_trace! {
+ unsafe fn rust_binder_ioctl(cmd: c_uint, arg: c_ulong);
+}
+
+#[inline]
+pub(crate) fn trace_ioctl(cmd: u32, arg: usize) {
+ // SAFETY: Always safe to call.
+ unsafe { rust_binder_ioctl(cmd, arg as c_ulong) }
+}
diff --git a/drivers/android/binder/transaction.rs b/drivers/android/binder/transaction.rs
new file mode 100644
index 000000000000..4bd3c0e417eb
--- /dev/null
+++ b/drivers/android/binder/transaction.rs
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2025 Google LLC.
+
+use core::sync::atomic::{AtomicBool, Ordering};
+use kernel::{
+ prelude::*,
+ seq_file::SeqFile,
+ seq_print,
+ sync::{Arc, SpinLock},
+ task::Kuid,
+ time::{Instant, Monotonic},
+ types::ScopeGuard,
+};
+
+use crate::{
+ allocation::{Allocation, TranslatedFds},
+ defs::*,
+ error::{BinderError, BinderResult},
+ node::{Node, NodeRef},
+ process::{Process, ProcessInner},
+ ptr_align,
+ thread::{PushWorkRes, Thread},
+ BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead,
+};
+
+#[pin_data(PinnedDrop)]
+pub(crate) struct Transaction {
+ pub(crate) debug_id: usize,
+ target_node: Option<DArc<Node>>,
+ pub(crate) from_parent: Option<DArc<Transaction>>,
+ pub(crate) from: Arc<Thread>,
+ pub(crate) to: Arc<Process>,
+ #[pin]
+ allocation: SpinLock<Option<Allocation>>,
+ is_outstanding: AtomicBool,
+ code: u32,
+ pub(crate) flags: u32,
+ data_size: usize,
+ offsets_size: usize,
+ data_address: usize,
+ sender_euid: Kuid,
+ txn_security_ctx_off: Option<usize>,
+ pub(crate) oneway_spam_detected: bool,
+ start_time: Instant<Monotonic>,
+}
+
+kernel::list::impl_list_arc_safe! {
+ impl ListArcSafe<0> for Transaction { untracked; }
+}
+
+impl Transaction {
+ pub(crate) fn new(
+ node_ref: NodeRef,
+ from_parent: Option<DArc<Transaction>>,
+ from: &Arc<Thread>,
+ tr: &BinderTransactionDataSg,
+ ) -> BinderResult<DLArc<Self>> {
+ let debug_id = super::next_debug_id();
+ let trd = &tr.transaction_data;
+ let allow_fds = node_ref.node.flags & FLAT_BINDER_FLAG_ACCEPTS_FDS != 0;
+ let txn_security_ctx = node_ref.node.flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX != 0;
+ let mut txn_security_ctx_off = if txn_security_ctx { Some(0) } else { None };
+ let to = node_ref.node.owner.clone();
+ let mut alloc = match from.copy_transaction_data(
+ to.clone(),
+ tr,
+ debug_id,
+ allow_fds,
+ txn_security_ctx_off.as_mut(),
+ ) {
+ Ok(alloc) => alloc,
+ Err(err) => {
+ if !err.is_dead() {
+ pr_warn!("Failure in copy_transaction_data: {:?}", err);
+ }
+ return Err(err);
+ }
+ };
+ let oneway_spam_detected = alloc.oneway_spam_detected;
+ if trd.flags & TF_ONE_WAY != 0 {
+ if from_parent.is_some() {
+ pr_warn!("Oneway transaction should not be in a transaction stack.");
+ return Err(EINVAL.into());
+ }
+ alloc.set_info_oneway_node(node_ref.node.clone());
+ }
+ if trd.flags & TF_CLEAR_BUF != 0 {
+ alloc.set_info_clear_on_drop();
+ }
+ let target_node = node_ref.node.clone();
+ alloc.set_info_target_node(node_ref);
+ let data_address = alloc.ptr;
+
+ Ok(DTRWrap::arc_pin_init(pin_init!(Transaction {
+ debug_id,
+ target_node: Some(target_node),
+ from_parent,
+ sender_euid: from.process.task.euid(),
+ from: from.clone(),
+ to,
+ code: trd.code,
+ flags: trd.flags,
+ data_size: trd.data_size as _,
+ offsets_size: trd.offsets_size as _,
+ data_address,
+ allocation <- kernel::new_spinlock!(Some(alloc.success()), "Transaction::new"),
+ is_outstanding: AtomicBool::new(false),
+ txn_security_ctx_off,
+ oneway_spam_detected,
+ start_time: Instant::now(),
+ }))?)
+ }
+
+ pub(crate) fn new_reply(
+ from: &Arc<Thread>,
+ to: Arc<Process>,
+ tr: &BinderTransactionDataSg,
+ allow_fds: bool,
+ ) -> BinderResult<DLArc<Self>> {
+ let debug_id = super::next_debug_id();
+ let trd = &tr.transaction_data;
+ let mut alloc = match from.copy_transaction_data(to.clone(), tr, debug_id, allow_fds, None)
+ {
+ Ok(alloc) => alloc,
+ Err(err) => {
+ pr_warn!("Failure in copy_transaction_data: {:?}", err);
+ return Err(err);
+ }
+ };
+ let oneway_spam_detected = alloc.oneway_spam_detected;
+ if trd.flags & TF_CLEAR_BUF != 0 {
+ alloc.set_info_clear_on_drop();
+ }
+ Ok(DTRWrap::arc_pin_init(pin_init!(Transaction {
+ debug_id,
+ target_node: None,
+ from_parent: None,
+ sender_euid: from.process.task.euid(),
+ from: from.clone(),
+ to,
+ code: trd.code,
+ flags: trd.flags,
+ data_size: trd.data_size as _,
+ offsets_size: trd.offsets_size as _,
+ data_address: alloc.ptr,
+ allocation <- kernel::new_spinlock!(Some(alloc.success()), "Transaction::new"),
+ is_outstanding: AtomicBool::new(false),
+ txn_security_ctx_off: None,
+ oneway_spam_detected,
+ start_time: Instant::now(),
+ }))?)
+ }
+
+ #[inline(never)]
+ pub(crate) fn debug_print_inner(&self, m: &SeqFile, prefix: &str) {
+ seq_print!(
+ m,
+ "{}{}: from {}:{} to {} code {:x} flags {:x} elapsed {}ms",
+ prefix,
+ self.debug_id,
+ self.from.process.task.pid(),
+ self.from.id,
+ self.to.task.pid(),
+ self.code,
+ self.flags,
+ self.start_time.elapsed().as_millis(),
+ );
+ if let Some(target_node) = &self.target_node {
+ seq_print!(m, " node {}", target_node.debug_id);
+ }
+ seq_print!(m, " size {}:{}\n", self.data_size, self.offsets_size);
+ }
+
+ /// Determines if the transaction is stacked on top of the given transaction.
+ pub(crate) fn is_stacked_on(&self, onext: &Option<DArc<Self>>) -> bool {
+ match (&self.from_parent, onext) {
+ (None, None) => true,
+ (Some(from_parent), Some(next)) => Arc::ptr_eq(from_parent, next),
+ _ => false,
+ }
+ }
+
+ /// Returns a pointer to the next transaction on the transaction stack, if there is one.
+ pub(crate) fn clone_next(&self) -> Option<DArc<Self>> {
+ Some(self.from_parent.as_ref()?.clone())
+ }
+
+ /// Searches in the transaction stack for a thread that belongs to the target process. This is
+ /// useful when finding a target for a new transaction: if the node belongs to a process that
+ /// is already part of the transaction stack, we reuse the thread.
+ fn find_target_thread(&self) -> Option<Arc<Thread>> {
+ let mut it = &self.from_parent;
+ while let Some(transaction) = it {
+ if Arc::ptr_eq(&transaction.from.process, &self.to) {
+ return Some(transaction.from.clone());
+ }
+ it = &transaction.from_parent;
+ }
+ None
+ }
+
+ /// Searches in the transaction stack for a transaction originating at the given thread.
+ pub(crate) fn find_from(&self, thread: &Thread) -> Option<&DArc<Transaction>> {
+ let mut it = &self.from_parent;
+ while let Some(transaction) = it {
+ if core::ptr::eq(thread, transaction.from.as_ref()) {
+ return Some(transaction);
+ }
+
+ it = &transaction.from_parent;
+ }
+ None
+ }
+
+ pub(crate) fn set_outstanding(&self, to_process: &mut ProcessInner) {
+ // No race because this method is only called once.
+ if !self.is_outstanding.load(Ordering::Relaxed) {
+ self.is_outstanding.store(true, Ordering::Relaxed);
+ to_process.add_outstanding_txn();
+ }
+ }
+
+ /// Decrement `outstanding_txns` in `to` if it hasn't already been decremented.
+ fn drop_outstanding_txn(&self) {
+ // No race because this is called at most twice, and one of the calls are in the
+ // destructor, which is guaranteed to not race with any other operations on the
+ // transaction. It also cannot race with `set_outstanding`, since submission happens
+ // before delivery.
+ if self.is_outstanding.load(Ordering::Relaxed) {
+ self.is_outstanding.store(false, Ordering::Relaxed);
+ self.to.drop_outstanding_txn();
+ }
+ }
+
+ /// Submits the transaction to a work queue. Uses a thread if there is one in the transaction
+ /// stack, otherwise uses the destination process.
+ ///
+ /// Not used for replies.
+ pub(crate) fn submit(self: DLArc<Self>) -> BinderResult {
+ // Defined before `process_inner` so that the destructor runs after releasing the lock.
+ let mut _t_outdated;
+
+ let oneway = self.flags & TF_ONE_WAY != 0;
+ let process = self.to.clone();
+ let mut process_inner = process.inner.lock();
+
+ self.set_outstanding(&mut process_inner);
+
+ if oneway {
+ if let Some(target_node) = self.target_node.clone() {
+ if process_inner.is_frozen.is_frozen() {
+ process_inner.async_recv = true;
+ if self.flags & TF_UPDATE_TXN != 0 {
+ if let Some(t_outdated) =
+ target_node.take_outdated_transaction(&self, &mut process_inner)
+ {
+ // Save the transaction to be dropped after locks are released.
+ _t_outdated = t_outdated;
+ }
+ }
+ }
+ match target_node.submit_oneway(self, &mut process_inner) {
+ Ok(()) => {}
+ Err((err, work)) => {
+ drop(process_inner);
+ // Drop work after releasing process lock.
+ drop(work);
+ return Err(err);
+ }
+ }
+
+ if process_inner.is_frozen.is_frozen() {
+ return Err(BinderError::new_frozen_oneway());
+ } else {
+ return Ok(());
+ }
+ } else {
+ pr_err!("Failed to submit oneway transaction to node.");
+ }
+ }
+
+ if process_inner.is_frozen.is_frozen() {
+ process_inner.sync_recv = true;
+ return Err(BinderError::new_frozen());
+ }
+
+ let res = if let Some(thread) = self.find_target_thread() {
+ match thread.push_work(self) {
+ PushWorkRes::Ok => Ok(()),
+ PushWorkRes::FailedDead(me) => Err((BinderError::new_dead(), me)),
+ }
+ } else {
+ process_inner.push_work(self)
+ };
+ drop(process_inner);
+
+ match res {
+ Ok(()) => Ok(()),
+ Err((err, work)) => {
+ // Drop work after releasing process lock.
+ drop(work);
+ Err(err)
+ }
+ }
+ }
+
+ /// Check whether one oneway transaction can supersede another.
+ pub(crate) fn can_replace(&self, old: &Transaction) -> bool {
+ if self.from.process.task.pid() != old.from.process.task.pid() {
+ return false;
+ }
+
+ if self.flags & old.flags & (TF_ONE_WAY | TF_UPDATE_TXN) != (TF_ONE_WAY | TF_UPDATE_TXN) {
+ return false;
+ }
+
+ let target_node_match = match (self.target_node.as_ref(), old.target_node.as_ref()) {
+ (None, None) => true,
+ (Some(tn1), Some(tn2)) => Arc::ptr_eq(tn1, tn2),
+ _ => false,
+ };
+
+ self.code == old.code && self.flags == old.flags && target_node_match
+ }
+
+ fn prepare_file_list(&self) -> Result<TranslatedFds> {
+ let mut alloc = self.allocation.lock().take().ok_or(ESRCH)?;
+
+ match alloc.translate_fds() {
+ Ok(translated) => {
+ *self.allocation.lock() = Some(alloc);
+ Ok(translated)
+ }
+ Err(err) => {
+ // Free the allocation eagerly.
+ drop(alloc);
+ Err(err)
+ }
+ }
+ }
+}
+
+impl DeliverToRead for Transaction {
+ fn do_work(
+ self: DArc<Self>,
+ thread: &Thread,
+ writer: &mut BinderReturnWriter<'_>,
+ ) -> Result<bool> {
+ let send_failed_reply = ScopeGuard::new(|| {
+ if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 {
+ let reply = Err(BR_FAILED_REPLY);
+ self.from.deliver_reply(reply, &self);
+ }
+ self.drop_outstanding_txn();
+ });
+
+ let files = if let Ok(list) = self.prepare_file_list() {
+ list
+ } else {
+ // On failure to process the list, we send a reply back to the sender and ignore the
+ // transaction on the recipient.
+ return Ok(true);
+ };
+
+ let mut tr_sec = BinderTransactionDataSecctx::default();
+ let tr = tr_sec.tr_data();
+ if let Some(target_node) = &self.target_node {
+ let (ptr, cookie) = target_node.get_id();
+ tr.target.ptr = ptr as _;
+ tr.cookie = cookie as _;
+ };
+ tr.code = self.code;
+ tr.flags = self.flags;
+ tr.data_size = self.data_size as _;
+ tr.data.ptr.buffer = self.data_address as _;
+ tr.offsets_size = self.offsets_size as _;
+ if tr.offsets_size > 0 {
+ tr.data.ptr.offsets = (self.data_address + ptr_align(self.data_size).unwrap()) as _;
+ }
+ tr.sender_euid = self.sender_euid.into_uid_in_current_ns();
+ tr.sender_pid = 0;
+ if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 {
+ // Not a reply and not one-way.
+ tr.sender_pid = self.from.process.pid_in_current_ns();
+ }
+ let code = if self.target_node.is_none() {
+ BR_REPLY
+ } else if self.txn_security_ctx_off.is_some() {
+ BR_TRANSACTION_SEC_CTX
+ } else {
+ BR_TRANSACTION
+ };
+
+ // Write the transaction code and data to the user buffer.
+ writer.write_code(code)?;
+ if let Some(off) = self.txn_security_ctx_off {
+ tr_sec.secctx = (self.data_address + off) as u64;
+ writer.write_payload(&tr_sec)?;
+ } else {
+ writer.write_payload(&*tr)?;
+ }
+
+ let mut alloc = self.allocation.lock().take().ok_or(ESRCH)?;
+
+ // Dismiss the completion of transaction with a failure. No failure paths are allowed from
+ // here on out.
+ send_failed_reply.dismiss();
+
+ // Commit files, and set FDs in FDA to be closed on buffer free.
+ let close_on_free = files.commit();
+ alloc.set_info_close_on_free(close_on_free);
+
+ // It is now the user's responsibility to clear the allocation.
+ alloc.keep_alive();
+
+ self.drop_outstanding_txn();
+
+ // When this is not a reply and not a oneway transaction, update `current_transaction`. If
+ // it's a reply, `current_transaction` has already been updated appropriately.
+ if self.target_node.is_some() && tr_sec.transaction_data.flags & TF_ONE_WAY == 0 {
+ thread.set_current_transaction(self);
+ }
+
+ Ok(false)
+ }
+
+ fn cancel(self: DArc<Self>) {
+ let allocation = self.allocation.lock().take();
+ drop(allocation);
+
+ // If this is not a reply or oneway transaction, then send a dead reply.
+ if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 {
+ let reply = Err(BR_DEAD_REPLY);
+ self.from.deliver_reply(reply, &self);
+ }
+
+ self.drop_outstanding_txn();
+ }
+
+ fn should_sync_wakeup(&self) -> bool {
+ self.flags & TF_ONE_WAY == 0
+ }
+
+ fn debug_print(&self, m: &SeqFile, _prefix: &str, tprefix: &str) -> Result<()> {
+ self.debug_print_inner(m, tprefix);
+ Ok(())
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for Transaction {
+ fn drop(self: Pin<&mut Self>) {
+ self.drop_outstanding_txn();
+ }
+}
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
new file mode 100644
index 000000000000..979c96b74cad
--- /dev/null
+++ b/drivers/android/binder_alloc.c
@@ -0,0 +1,1410 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* binder_alloc.c
+ *
+ * Android IPC Subsystem
+ *
+ * Copyright (C) 2007-2017 Google, Inc.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/sched/mm.h>
+#include <linux/module.h>
+#include <linux/rtmutex.h>
+#include <linux/rbtree.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/list_lru.h>
+#include <linux/ratelimit.h>
+#include <asm/cacheflush.h>
+#include <linux/uaccess.h>
+#include <linux/highmem.h>
+#include <linux/sizes.h>
+#include <kunit/visibility.h>
+#include "binder_alloc.h"
+#include "binder_trace.h"
+
+static struct list_lru binder_freelist;
+
+static DEFINE_MUTEX(binder_alloc_mmap_lock);
+
+enum {
+ BINDER_DEBUG_USER_ERROR = 1U << 0,
+ BINDER_DEBUG_OPEN_CLOSE = 1U << 1,
+ BINDER_DEBUG_BUFFER_ALLOC = 1U << 2,
+ BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 3,
+};
+static uint32_t binder_alloc_debug_mask = BINDER_DEBUG_USER_ERROR;
+
+module_param_named(debug_mask, binder_alloc_debug_mask,
+ uint, 0644);
+
+#define binder_alloc_debug(mask, x...) \
+ do { \
+ if (binder_alloc_debug_mask & mask) \
+ pr_info_ratelimited(x); \
+ } while (0)
+
+static struct binder_buffer *binder_buffer_next(struct binder_buffer *buffer)
+{
+ return list_entry(buffer->entry.next, struct binder_buffer, entry);
+}
+
+static struct binder_buffer *binder_buffer_prev(struct binder_buffer *buffer)
+{
+ return list_entry(buffer->entry.prev, struct binder_buffer, entry);
+}
+
+VISIBLE_IF_KUNIT size_t binder_alloc_buffer_size(struct binder_alloc *alloc,
+ struct binder_buffer *buffer)
+{
+ if (list_is_last(&buffer->entry, &alloc->buffers))
+ return alloc->vm_start + alloc->buffer_size - buffer->user_data;
+ return binder_buffer_next(buffer)->user_data - buffer->user_data;
+}
+EXPORT_SYMBOL_IF_KUNIT(binder_alloc_buffer_size);
+
+static void binder_insert_free_buffer(struct binder_alloc *alloc,
+ struct binder_buffer *new_buffer)
+{
+ struct rb_node **p = &alloc->free_buffers.rb_node;
+ struct rb_node *parent = NULL;
+ struct binder_buffer *buffer;
+ size_t buffer_size;
+ size_t new_buffer_size;
+
+ BUG_ON(!new_buffer->free);
+
+ new_buffer_size = binder_alloc_buffer_size(alloc, new_buffer);
+
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+ "%d: add free buffer, size %zd, at %pK\n",
+ alloc->pid, new_buffer_size, new_buffer);
+
+ while (*p) {
+ parent = *p;
+ buffer = rb_entry(parent, struct binder_buffer, rb_node);
+ BUG_ON(!buffer->free);
+
+ buffer_size = binder_alloc_buffer_size(alloc, buffer);
+
+ if (new_buffer_size < buffer_size)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+ rb_link_node(&new_buffer->rb_node, parent, p);
+ rb_insert_color(&new_buffer->rb_node, &alloc->free_buffers);
+}
+
+static void binder_insert_allocated_buffer_locked(
+ struct binder_alloc *alloc, struct binder_buffer *new_buffer)
+{
+ struct rb_node **p = &alloc->allocated_buffers.rb_node;
+ struct rb_node *parent = NULL;
+ struct binder_buffer *buffer;
+
+ BUG_ON(new_buffer->free);
+
+ while (*p) {
+ parent = *p;
+ buffer = rb_entry(parent, struct binder_buffer, rb_node);
+ BUG_ON(buffer->free);
+
+ if (new_buffer->user_data < buffer->user_data)
+ p = &parent->rb_left;
+ else if (new_buffer->user_data > buffer->user_data)
+ p = &parent->rb_right;
+ else
+ BUG();
+ }
+ rb_link_node(&new_buffer->rb_node, parent, p);
+ rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers);
+}
+
+static struct binder_buffer *binder_alloc_prepare_to_free_locked(
+ struct binder_alloc *alloc,
+ unsigned long user_ptr)
+{
+ struct rb_node *n = alloc->allocated_buffers.rb_node;
+ struct binder_buffer *buffer;
+
+ while (n) {
+ buffer = rb_entry(n, struct binder_buffer, rb_node);
+ BUG_ON(buffer->free);
+
+ if (user_ptr < buffer->user_data) {
+ n = n->rb_left;
+ } else if (user_ptr > buffer->user_data) {
+ n = n->rb_right;
+ } else {
+ /*
+ * Guard against user threads attempting to
+ * free the buffer when in use by kernel or
+ * after it's already been freed.
+ */
+ if (!buffer->allow_user_free)
+ return ERR_PTR(-EPERM);
+ buffer->allow_user_free = 0;
+ return buffer;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * binder_alloc_prepare_to_free() - get buffer given user ptr
+ * @alloc: binder_alloc for this proc
+ * @user_ptr: User pointer to buffer data
+ *
+ * Validate userspace pointer to buffer data and return buffer corresponding to
+ * that user pointer. Search the rb tree for buffer that matches user data
+ * pointer.
+ *
+ * Return: Pointer to buffer or NULL
+ */
+struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc,
+ unsigned long user_ptr)
+{
+ guard(mutex)(&alloc->mutex);
+ return binder_alloc_prepare_to_free_locked(alloc, user_ptr);
+}
+
+static inline void
+binder_set_installed_page(struct binder_alloc *alloc,
+ unsigned long index,
+ struct page *page)
+{
+ /* Pairs with acquire in binder_get_installed_page() */
+ smp_store_release(&alloc->pages[index], page);
+}
+
+static inline struct page *
+binder_get_installed_page(struct binder_alloc *alloc, unsigned long index)
+{
+ /* Pairs with release in binder_set_installed_page() */
+ return smp_load_acquire(&alloc->pages[index]);
+}
+
+static void binder_lru_freelist_add(struct binder_alloc *alloc,
+ unsigned long start, unsigned long end)
+{
+ unsigned long page_addr;
+ struct page *page;
+
+ trace_binder_update_page_range(alloc, false, start, end);
+
+ for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
+ size_t index;
+ int ret;
+
+ index = (page_addr - alloc->vm_start) / PAGE_SIZE;
+ page = binder_get_installed_page(alloc, index);
+ if (!page)
+ continue;
+
+ trace_binder_free_lru_start(alloc, index);
+
+ ret = list_lru_add(alloc->freelist,
+ page_to_lru(page),
+ page_to_nid(page),
+ NULL);
+ WARN_ON(!ret);
+
+ trace_binder_free_lru_end(alloc, index);
+ }
+}
+
+static inline
+void binder_alloc_set_mapped(struct binder_alloc *alloc, bool state)
+{
+ /* pairs with smp_load_acquire in binder_alloc_is_mapped() */
+ smp_store_release(&alloc->mapped, state);
+}
+
+static inline bool binder_alloc_is_mapped(struct binder_alloc *alloc)
+{
+ /* pairs with smp_store_release in binder_alloc_set_mapped() */
+ return smp_load_acquire(&alloc->mapped);
+}
+
+static struct page *binder_page_lookup(struct binder_alloc *alloc,
+ unsigned long addr)
+{
+ struct mm_struct *mm = alloc->mm;
+ struct page *page;
+ long npages = 0;
+
+ /*
+ * Find an existing page in the remote mm. If missing,
+ * don't attempt to fault-in just propagate an error.
+ */
+ mmap_read_lock(mm);
+ if (binder_alloc_is_mapped(alloc))
+ npages = get_user_pages_remote(mm, addr, 1, FOLL_NOFAULT,
+ &page, NULL);
+ mmap_read_unlock(mm);
+
+ return npages > 0 ? page : NULL;
+}
+
+static int binder_page_insert(struct binder_alloc *alloc,
+ unsigned long addr,
+ struct page *page)
+{
+ struct mm_struct *mm = alloc->mm;
+ struct vm_area_struct *vma;
+ int ret = -ESRCH;
+
+ /* attempt per-vma lock first */
+ vma = lock_vma_under_rcu(mm, addr);
+ if (vma) {
+ if (binder_alloc_is_mapped(alloc))
+ ret = vm_insert_page(vma, addr, page);
+ vma_end_read(vma);
+ return ret;
+ }
+
+ /* fall back to mmap_lock */
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, addr);
+ if (vma && binder_alloc_is_mapped(alloc))
+ ret = vm_insert_page(vma, addr, page);
+ mmap_read_unlock(mm);
+
+ return ret;
+}
+
+static struct page *binder_page_alloc(struct binder_alloc *alloc,
+ unsigned long index)
+{
+ struct binder_shrinker_mdata *mdata;
+ struct page *page;
+
+ page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+ if (!page)
+ return NULL;
+
+ /* allocate and install shrinker metadata under page->private */
+ mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
+ if (!mdata) {
+ __free_page(page);
+ return NULL;
+ }
+
+ mdata->alloc = alloc;
+ mdata->page_index = index;
+ INIT_LIST_HEAD(&mdata->lru);
+ set_page_private(page, (unsigned long)mdata);
+
+ return page;
+}
+
+static void binder_free_page(struct page *page)
+{
+ kfree((struct binder_shrinker_mdata *)page_private(page));
+ __free_page(page);
+}
+
+static int binder_install_single_page(struct binder_alloc *alloc,
+ unsigned long index,
+ unsigned long addr)
+{
+ struct page *page;
+ int ret;
+
+ if (!mmget_not_zero(alloc->mm))
+ return -ESRCH;
+
+ page = binder_page_alloc(alloc, index);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = binder_page_insert(alloc, addr, page);
+ switch (ret) {
+ case -EBUSY:
+ /*
+ * EBUSY is ok. Someone installed the pte first but the
+ * alloc->pages[index] has not been updated yet. Discard
+ * our page and look up the one already installed.
+ */
+ ret = 0;
+ binder_free_page(page);
+ page = binder_page_lookup(alloc, addr);
+ if (!page) {
+ pr_err("%d: failed to find page at offset %lx\n",
+ alloc->pid, addr - alloc->vm_start);
+ ret = -ESRCH;
+ break;
+ }
+ fallthrough;
+ case 0:
+ /* Mark page installation complete and safe to use */
+ binder_set_installed_page(alloc, index, page);
+ break;
+ default:
+ binder_free_page(page);
+ pr_err("%d: %s failed to insert page at offset %lx with %d\n",
+ alloc->pid, __func__, addr - alloc->vm_start, ret);
+ break;
+ }
+out:
+ mmput_async(alloc->mm);
+ return ret;
+}
+
+static int binder_install_buffer_pages(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ size_t size)
+{
+ unsigned long start, final;
+ unsigned long page_addr;
+
+ start = buffer->user_data & PAGE_MASK;
+ final = PAGE_ALIGN(buffer->user_data + size);
+
+ for (page_addr = start; page_addr < final; page_addr += PAGE_SIZE) {
+ unsigned long index;
+ int ret;
+
+ index = (page_addr - alloc->vm_start) / PAGE_SIZE;
+ if (binder_get_installed_page(alloc, index))
+ continue;
+
+ trace_binder_alloc_page_start(alloc, index);
+
+ ret = binder_install_single_page(alloc, index, page_addr);
+ if (ret)
+ return ret;
+
+ trace_binder_alloc_page_end(alloc, index);
+ }
+
+ return 0;
+}
+
+/* The range of pages should exclude those shared with other buffers */
+static void binder_lru_freelist_del(struct binder_alloc *alloc,
+ unsigned long start, unsigned long end)
+{
+ unsigned long page_addr;
+ struct page *page;
+
+ trace_binder_update_page_range(alloc, true, start, end);
+
+ for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
+ unsigned long index;
+ bool on_lru;
+
+ index = (page_addr - alloc->vm_start) / PAGE_SIZE;
+ page = binder_get_installed_page(alloc, index);
+
+ if (page) {
+ trace_binder_alloc_lru_start(alloc, index);
+
+ on_lru = list_lru_del(alloc->freelist,
+ page_to_lru(page),
+ page_to_nid(page),
+ NULL);
+ WARN_ON(!on_lru);
+
+ trace_binder_alloc_lru_end(alloc, index);
+ continue;
+ }
+
+ if (index + 1 > alloc->pages_high)
+ alloc->pages_high = index + 1;
+ }
+}
+
+static void debug_no_space_locked(struct binder_alloc *alloc)
+{
+ size_t largest_alloc_size = 0;
+ struct binder_buffer *buffer;
+ size_t allocated_buffers = 0;
+ size_t largest_free_size = 0;
+ size_t total_alloc_size = 0;
+ size_t total_free_size = 0;
+ size_t free_buffers = 0;
+ size_t buffer_size;
+ struct rb_node *n;
+
+ for (n = rb_first(&alloc->allocated_buffers); n; n = rb_next(n)) {
+ buffer = rb_entry(n, struct binder_buffer, rb_node);
+ buffer_size = binder_alloc_buffer_size(alloc, buffer);
+ allocated_buffers++;
+ total_alloc_size += buffer_size;
+ if (buffer_size > largest_alloc_size)
+ largest_alloc_size = buffer_size;
+ }
+
+ for (n = rb_first(&alloc->free_buffers); n; n = rb_next(n)) {
+ buffer = rb_entry(n, struct binder_buffer, rb_node);
+ buffer_size = binder_alloc_buffer_size(alloc, buffer);
+ free_buffers++;
+ total_free_size += buffer_size;
+ if (buffer_size > largest_free_size)
+ largest_free_size = buffer_size;
+ }
+
+ binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
+ "allocated: %zd (num: %zd largest: %zd), free: %zd (num: %zd largest: %zd)\n",
+ total_alloc_size, allocated_buffers,
+ largest_alloc_size, total_free_size,
+ free_buffers, largest_free_size);
+}
+
+static bool debug_low_async_space_locked(struct binder_alloc *alloc)
+{
+ /*
+ * Find the amount and size of buffers allocated by the current caller;
+ * The idea is that once we cross the threshold, whoever is responsible
+ * for the low async space is likely to try to send another async txn,
+ * and at some point we'll catch them in the act. This is more efficient
+ * than keeping a map per pid.
+ */
+ struct binder_buffer *buffer;
+ size_t total_alloc_size = 0;
+ int pid = current->tgid;
+ size_t num_buffers = 0;
+ struct rb_node *n;
+
+ /*
+ * Only start detecting spammers once we have less than 20% of async
+ * space left (which is less than 10% of total buffer size).
+ */
+ if (alloc->free_async_space >= alloc->buffer_size / 10) {
+ alloc->oneway_spam_detected = false;
+ return false;
+ }
+
+ for (n = rb_first(&alloc->allocated_buffers); n != NULL;
+ n = rb_next(n)) {
+ buffer = rb_entry(n, struct binder_buffer, rb_node);
+ if (buffer->pid != pid)
+ continue;
+ if (!buffer->async_transaction)
+ continue;
+ total_alloc_size += binder_alloc_buffer_size(alloc, buffer);
+ num_buffers++;
+ }
+
+ /*
+ * Warn if this pid has more than 50 transactions, or more than 50% of
+ * async space (which is 25% of total buffer size). Oneway spam is only
+ * detected when the threshold is exceeded.
+ */
+ if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
+ binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
+ "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
+ alloc->pid, pid, num_buffers, total_alloc_size);
+ if (!alloc->oneway_spam_detected) {
+ alloc->oneway_spam_detected = true;
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Callers preallocate @new_buffer, it is freed by this function if unused */
+static struct binder_buffer *binder_alloc_new_buf_locked(
+ struct binder_alloc *alloc,
+ struct binder_buffer *new_buffer,
+ size_t size,
+ int is_async)
+{
+ struct rb_node *n = alloc->free_buffers.rb_node;
+ struct rb_node *best_fit = NULL;
+ struct binder_buffer *buffer;
+ unsigned long next_used_page;
+ unsigned long curr_last_page;
+ size_t buffer_size;
+
+ if (is_async && alloc->free_async_space < size) {
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+ "%d: binder_alloc_buf size %zd failed, no async space left\n",
+ alloc->pid, size);
+ buffer = ERR_PTR(-ENOSPC);
+ goto out;
+ }
+
+ while (n) {
+ buffer = rb_entry(n, struct binder_buffer, rb_node);
+ BUG_ON(!buffer->free);
+ buffer_size = binder_alloc_buffer_size(alloc, buffer);
+
+ if (size < buffer_size) {
+ best_fit = n;
+ n = n->rb_left;
+ } else if (size > buffer_size) {
+ n = n->rb_right;
+ } else {
+ best_fit = n;
+ break;
+ }
+ }
+
+ if (unlikely(!best_fit)) {
+ binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
+ "%d: binder_alloc_buf size %zd failed, no address space\n",
+ alloc->pid, size);
+ debug_no_space_locked(alloc);
+ buffer = ERR_PTR(-ENOSPC);
+ goto out;
+ }
+
+ if (buffer_size != size) {
+ /* Found an oversized buffer and needs to be split */
+ buffer = rb_entry(best_fit, struct binder_buffer, rb_node);
+ buffer_size = binder_alloc_buffer_size(alloc, buffer);
+
+ WARN_ON(n || buffer_size == size);
+ new_buffer->user_data = buffer->user_data + size;
+ list_add(&new_buffer->entry, &buffer->entry);
+ new_buffer->free = 1;
+ binder_insert_free_buffer(alloc, new_buffer);
+ new_buffer = NULL;
+ }
+
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+ "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n",
+ alloc->pid, size, buffer, buffer_size);
+
+ /*
+ * Now we remove the pages from the freelist. A clever calculation
+ * with buffer_size determines if the last page is shared with an
+ * adjacent in-use buffer. In such case, the page has been already
+ * removed from the freelist so we trim our range short.
+ */
+ next_used_page = (buffer->user_data + buffer_size) & PAGE_MASK;
+ curr_last_page = PAGE_ALIGN(buffer->user_data + size);
+ binder_lru_freelist_del(alloc, PAGE_ALIGN(buffer->user_data),
+ min(next_used_page, curr_last_page));
+
+ rb_erase(&buffer->rb_node, &alloc->free_buffers);
+ buffer->free = 0;
+ buffer->allow_user_free = 0;
+ binder_insert_allocated_buffer_locked(alloc, buffer);
+ buffer->async_transaction = is_async;
+ buffer->oneway_spam_suspect = false;
+ if (is_async) {
+ alloc->free_async_space -= size;
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
+ "%d: binder_alloc_buf size %zd async free %zd\n",
+ alloc->pid, size, alloc->free_async_space);
+ if (debug_low_async_space_locked(alloc))
+ buffer->oneway_spam_suspect = true;
+ }
+
+out:
+ /* Discard possibly unused new_buffer */
+ kfree(new_buffer);
+ return buffer;
+}
+
+/* Calculate the sanitized total size, returns 0 for invalid request */
+static inline size_t sanitized_size(size_t data_size,
+ size_t offsets_size,
+ size_t extra_buffers_size)
+{
+ size_t total, tmp;
+
+ /* Align to pointer size and check for overflows */
+ tmp = ALIGN(data_size, sizeof(void *)) +
+ ALIGN(offsets_size, sizeof(void *));
+ if (tmp < data_size || tmp < offsets_size)
+ return 0;
+ total = tmp + ALIGN(extra_buffers_size, sizeof(void *));
+ if (total < tmp || total < extra_buffers_size)
+ return 0;
+
+ /* Pad 0-sized buffers so they get a unique address */
+ total = max(total, sizeof(void *));
+
+ return total;
+}
+
+/**
+ * binder_alloc_new_buf() - Allocate a new binder buffer
+ * @alloc: binder_alloc for this proc
+ * @data_size: size of user data buffer
+ * @offsets_size: user specified buffer offset
+ * @extra_buffers_size: size of extra space for meta-data (eg, security context)
+ * @is_async: buffer for async transaction
+ *
+ * Allocate a new buffer given the requested sizes. Returns
+ * the kernel version of the buffer pointer. The size allocated
+ * is the sum of the three given sizes (each rounded up to
+ * pointer-sized boundary)
+ *
+ * Return: The allocated buffer or %ERR_PTR(-errno) if error
+ */
+struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
+ size_t data_size,
+ size_t offsets_size,
+ size_t extra_buffers_size,
+ int is_async)
+{
+ struct binder_buffer *buffer, *next;
+ size_t size;
+ int ret;
+
+ /* Check binder_alloc is fully initialized */
+ if (!binder_alloc_is_mapped(alloc)) {
+ binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
+ "%d: binder_alloc_buf, no vma\n",
+ alloc->pid);
+ return ERR_PTR(-ESRCH);
+ }
+
+ size = sanitized_size(data_size, offsets_size, extra_buffers_size);
+ if (unlikely(!size)) {
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+ "%d: got transaction with invalid size %zd-%zd-%zd\n",
+ alloc->pid, data_size, offsets_size,
+ extra_buffers_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* Preallocate the next buffer */
+ next = kzalloc(sizeof(*next), GFP_KERNEL);
+ if (!next)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&alloc->mutex);
+ buffer = binder_alloc_new_buf_locked(alloc, next, size, is_async);
+ if (IS_ERR(buffer)) {
+ mutex_unlock(&alloc->mutex);
+ goto out;
+ }
+
+ buffer->data_size = data_size;
+ buffer->offsets_size = offsets_size;
+ buffer->extra_buffers_size = extra_buffers_size;
+ buffer->pid = current->tgid;
+ mutex_unlock(&alloc->mutex);
+
+ ret = binder_install_buffer_pages(alloc, buffer, size);
+ if (ret) {
+ binder_alloc_free_buf(alloc, buffer);
+ buffer = ERR_PTR(ret);
+ }
+out:
+ return buffer;
+}
+EXPORT_SYMBOL_IF_KUNIT(binder_alloc_new_buf);
+
+static unsigned long buffer_start_page(struct binder_buffer *buffer)
+{
+ return buffer->user_data & PAGE_MASK;
+}
+
+static unsigned long prev_buffer_end_page(struct binder_buffer *buffer)
+{
+ return (buffer->user_data - 1) & PAGE_MASK;
+}
+
+static void binder_delete_free_buffer(struct binder_alloc *alloc,
+ struct binder_buffer *buffer)
+{
+ struct binder_buffer *prev, *next;
+
+ if (PAGE_ALIGNED(buffer->user_data))
+ goto skip_freelist;
+
+ BUG_ON(alloc->buffers.next == &buffer->entry);
+ prev = binder_buffer_prev(buffer);
+ BUG_ON(!prev->free);
+ if (prev_buffer_end_page(prev) == buffer_start_page(buffer))
+ goto skip_freelist;
+
+ if (!list_is_last(&buffer->entry, &alloc->buffers)) {
+ next = binder_buffer_next(buffer);
+ if (buffer_start_page(next) == buffer_start_page(buffer))
+ goto skip_freelist;
+ }
+
+ binder_lru_freelist_add(alloc, buffer_start_page(buffer),
+ buffer_start_page(buffer) + PAGE_SIZE);
+skip_freelist:
+ list_del(&buffer->entry);
+ kfree(buffer);
+}
+
+static void binder_free_buf_locked(struct binder_alloc *alloc,
+ struct binder_buffer *buffer)
+{
+ size_t size, buffer_size;
+
+ buffer_size = binder_alloc_buffer_size(alloc, buffer);
+
+ size = ALIGN(buffer->data_size, sizeof(void *)) +
+ ALIGN(buffer->offsets_size, sizeof(void *)) +
+ ALIGN(buffer->extra_buffers_size, sizeof(void *));
+
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+ "%d: binder_free_buf %pK size %zd buffer_size %zd\n",
+ alloc->pid, buffer, size, buffer_size);
+
+ BUG_ON(buffer->free);
+ BUG_ON(size > buffer_size);
+ BUG_ON(buffer->transaction != NULL);
+ BUG_ON(buffer->user_data < alloc->vm_start);
+ BUG_ON(buffer->user_data > alloc->vm_start + alloc->buffer_size);
+
+ if (buffer->async_transaction) {
+ alloc->free_async_space += buffer_size;
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
+ "%d: binder_free_buf size %zd async free %zd\n",
+ alloc->pid, size, alloc->free_async_space);
+ }
+
+ binder_lru_freelist_add(alloc, PAGE_ALIGN(buffer->user_data),
+ (buffer->user_data + buffer_size) & PAGE_MASK);
+
+ rb_erase(&buffer->rb_node, &alloc->allocated_buffers);
+ buffer->free = 1;
+ if (!list_is_last(&buffer->entry, &alloc->buffers)) {
+ struct binder_buffer *next = binder_buffer_next(buffer);
+
+ if (next->free) {
+ rb_erase(&next->rb_node, &alloc->free_buffers);
+ binder_delete_free_buffer(alloc, next);
+ }
+ }
+ if (alloc->buffers.next != &buffer->entry) {
+ struct binder_buffer *prev = binder_buffer_prev(buffer);
+
+ if (prev->free) {
+ binder_delete_free_buffer(alloc, buffer);
+ rb_erase(&prev->rb_node, &alloc->free_buffers);
+ buffer = prev;
+ }
+ }
+ binder_insert_free_buffer(alloc, buffer);
+}
+
+/**
+ * binder_alloc_get_page() - get kernel pointer for given buffer offset
+ * @alloc: binder_alloc for this proc
+ * @buffer: binder buffer to be accessed
+ * @buffer_offset: offset into @buffer data
+ * @pgoffp: address to copy final page offset to
+ *
+ * Lookup the struct page corresponding to the address
+ * at @buffer_offset into @buffer->user_data. If @pgoffp is not
+ * NULL, the byte-offset into the page is written there.
+ *
+ * The caller is responsible to ensure that the offset points
+ * to a valid address within the @buffer and that @buffer is
+ * not freeable by the user. Since it can't be freed, we are
+ * guaranteed that the corresponding elements of @alloc->pages[]
+ * cannot change.
+ *
+ * Return: struct page
+ */
+static struct page *binder_alloc_get_page(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ binder_size_t buffer_offset,
+ pgoff_t *pgoffp)
+{
+ binder_size_t buffer_space_offset = buffer_offset +
+ (buffer->user_data - alloc->vm_start);
+ pgoff_t pgoff = buffer_space_offset & ~PAGE_MASK;
+ size_t index = buffer_space_offset >> PAGE_SHIFT;
+
+ *pgoffp = pgoff;
+
+ return alloc->pages[index];
+}
+
+/**
+ * binder_alloc_clear_buf() - zero out buffer
+ * @alloc: binder_alloc for this proc
+ * @buffer: binder buffer to be cleared
+ *
+ * memset the given buffer to 0
+ */
+static void binder_alloc_clear_buf(struct binder_alloc *alloc,
+ struct binder_buffer *buffer)
+{
+ size_t bytes = binder_alloc_buffer_size(alloc, buffer);
+ binder_size_t buffer_offset = 0;
+
+ while (bytes) {
+ unsigned long size;
+ struct page *page;
+ pgoff_t pgoff;
+
+ page = binder_alloc_get_page(alloc, buffer,
+ buffer_offset, &pgoff);
+ size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
+ memset_page(page, pgoff, 0, size);
+ bytes -= size;
+ buffer_offset += size;
+ }
+}
+
+/**
+ * binder_alloc_free_buf() - free a binder buffer
+ * @alloc: binder_alloc for this proc
+ * @buffer: kernel pointer to buffer
+ *
+ * Free the buffer allocated via binder_alloc_new_buf()
+ */
+void binder_alloc_free_buf(struct binder_alloc *alloc,
+ struct binder_buffer *buffer)
+{
+ /*
+ * We could eliminate the call to binder_alloc_clear_buf()
+ * from binder_alloc_deferred_release() by moving this to
+ * binder_free_buf_locked(). However, that could
+ * increase contention for the alloc mutex if clear_on_free
+ * is used frequently for large buffers. The mutex is not
+ * needed for correctness here.
+ */
+ if (buffer->clear_on_free) {
+ binder_alloc_clear_buf(alloc, buffer);
+ buffer->clear_on_free = false;
+ }
+ mutex_lock(&alloc->mutex);
+ binder_free_buf_locked(alloc, buffer);
+ mutex_unlock(&alloc->mutex);
+}
+EXPORT_SYMBOL_IF_KUNIT(binder_alloc_free_buf);
+
+/**
+ * binder_alloc_mmap_handler() - map virtual address space for proc
+ * @alloc: alloc structure for this proc
+ * @vma: vma passed to mmap()
+ *
+ * Called by binder_mmap() to initialize the space specified in
+ * vma for allocating binder buffers
+ *
+ * Return:
+ * 0 = success
+ * -EBUSY = address space already mapped
+ * -ENOMEM = failed to map memory to given address space
+ */
+int binder_alloc_mmap_handler(struct binder_alloc *alloc,
+ struct vm_area_struct *vma)
+{
+ struct binder_buffer *buffer;
+ const char *failure_string;
+ int ret;
+
+ if (unlikely(vma->vm_mm != alloc->mm)) {
+ ret = -EINVAL;
+ failure_string = "invalid vma->vm_mm";
+ goto err_invalid_mm;
+ }
+
+ mutex_lock(&binder_alloc_mmap_lock);
+ if (alloc->buffer_size) {
+ ret = -EBUSY;
+ failure_string = "already mapped";
+ goto err_already_mapped;
+ }
+ alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start,
+ SZ_4M);
+ mutex_unlock(&binder_alloc_mmap_lock);
+
+ alloc->vm_start = vma->vm_start;
+
+ alloc->pages = kvcalloc(alloc->buffer_size / PAGE_SIZE,
+ sizeof(alloc->pages[0]),
+ GFP_KERNEL);
+ if (!alloc->pages) {
+ ret = -ENOMEM;
+ failure_string = "alloc page array";
+ goto err_alloc_pages_failed;
+ }
+
+ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+ if (!buffer) {
+ ret = -ENOMEM;
+ failure_string = "alloc buffer struct";
+ goto err_alloc_buf_struct_failed;
+ }
+
+ buffer->user_data = alloc->vm_start;
+ list_add(&buffer->entry, &alloc->buffers);
+ buffer->free = 1;
+ binder_insert_free_buffer(alloc, buffer);
+ alloc->free_async_space = alloc->buffer_size / 2;
+
+ /* Signal binder_alloc is fully initialized */
+ binder_alloc_set_mapped(alloc, true);
+
+ return 0;
+
+err_alloc_buf_struct_failed:
+ kvfree(alloc->pages);
+ alloc->pages = NULL;
+err_alloc_pages_failed:
+ alloc->vm_start = 0;
+ mutex_lock(&binder_alloc_mmap_lock);
+ alloc->buffer_size = 0;
+err_already_mapped:
+ mutex_unlock(&binder_alloc_mmap_lock);
+err_invalid_mm:
+ binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
+ "%s: %d %lx-%lx %s failed %d\n", __func__,
+ alloc->pid, vma->vm_start, vma->vm_end,
+ failure_string, ret);
+ return ret;
+}
+EXPORT_SYMBOL_IF_KUNIT(binder_alloc_mmap_handler);
+
+void binder_alloc_deferred_release(struct binder_alloc *alloc)
+{
+ struct rb_node *n;
+ int buffers, page_count;
+ struct binder_buffer *buffer;
+
+ buffers = 0;
+ mutex_lock(&alloc->mutex);
+ BUG_ON(alloc->mapped);
+
+ while ((n = rb_first(&alloc->allocated_buffers))) {
+ buffer = rb_entry(n, struct binder_buffer, rb_node);
+
+ /* Transaction should already have been freed */
+ BUG_ON(buffer->transaction);
+
+ if (buffer->clear_on_free) {
+ binder_alloc_clear_buf(alloc, buffer);
+ buffer->clear_on_free = false;
+ }
+ binder_free_buf_locked(alloc, buffer);
+ buffers++;
+ }
+
+ while (!list_empty(&alloc->buffers)) {
+ buffer = list_first_entry(&alloc->buffers,
+ struct binder_buffer, entry);
+ WARN_ON(!buffer->free);
+
+ list_del(&buffer->entry);
+ WARN_ON_ONCE(!list_empty(&alloc->buffers));
+ kfree(buffer);
+ }
+
+ page_count = 0;
+ if (alloc->pages) {
+ int i;
+
+ for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
+ struct page *page;
+ bool on_lru;
+
+ page = binder_get_installed_page(alloc, i);
+ if (!page)
+ continue;
+
+ on_lru = list_lru_del(alloc->freelist,
+ page_to_lru(page),
+ page_to_nid(page),
+ NULL);
+ binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+ "%s: %d: page %d %s\n",
+ __func__, alloc->pid, i,
+ on_lru ? "on lru" : "active");
+ binder_free_page(page);
+ page_count++;
+ }
+ }
+ mutex_unlock(&alloc->mutex);
+ kvfree(alloc->pages);
+ if (alloc->mm)
+ mmdrop(alloc->mm);
+
+ binder_alloc_debug(BINDER_DEBUG_OPEN_CLOSE,
+ "%s: %d buffers %d, pages %d\n",
+ __func__, alloc->pid, buffers, page_count);
+}
+EXPORT_SYMBOL_IF_KUNIT(binder_alloc_deferred_release);
+
+/**
+ * binder_alloc_print_allocated() - print buffer info
+ * @m: seq_file for output via seq_printf()
+ * @alloc: binder_alloc for this proc
+ *
+ * Prints information about every buffer associated with
+ * the binder_alloc state to the given seq_file
+ */
+void binder_alloc_print_allocated(struct seq_file *m,
+ struct binder_alloc *alloc)
+{
+ struct binder_buffer *buffer;
+ struct rb_node *n;
+
+ guard(mutex)(&alloc->mutex);
+ for (n = rb_first(&alloc->allocated_buffers); n; n = rb_next(n)) {
+ buffer = rb_entry(n, struct binder_buffer, rb_node);
+ seq_printf(m, " buffer %d: %lx size %zd:%zd:%zd %s\n",
+ buffer->debug_id,
+ buffer->user_data - alloc->vm_start,
+ buffer->data_size, buffer->offsets_size,
+ buffer->extra_buffers_size,
+ buffer->transaction ? "active" : "delivered");
+ }
+}
+
+/**
+ * binder_alloc_print_pages() - print page usage
+ * @m: seq_file for output via seq_printf()
+ * @alloc: binder_alloc for this proc
+ */
+void binder_alloc_print_pages(struct seq_file *m,
+ struct binder_alloc *alloc)
+{
+ struct page *page;
+ int i;
+ int active = 0;
+ int lru = 0;
+ int free = 0;
+
+ mutex_lock(&alloc->mutex);
+ /*
+ * Make sure the binder_alloc is fully initialized, otherwise we might
+ * read inconsistent state.
+ */
+ if (binder_alloc_is_mapped(alloc)) {
+ for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
+ page = binder_get_installed_page(alloc, i);
+ if (!page)
+ free++;
+ else if (list_empty(page_to_lru(page)))
+ active++;
+ else
+ lru++;
+ }
+ }
+ mutex_unlock(&alloc->mutex);
+ seq_printf(m, " pages: %d:%d:%d\n", active, lru, free);
+ seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high);
+}
+
+/**
+ * binder_alloc_get_allocated_count() - return count of buffers
+ * @alloc: binder_alloc for this proc
+ *
+ * Return: count of allocated buffers
+ */
+int binder_alloc_get_allocated_count(struct binder_alloc *alloc)
+{
+ struct rb_node *n;
+ int count = 0;
+
+ guard(mutex)(&alloc->mutex);
+ for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n))
+ count++;
+ return count;
+}
+
+
+/**
+ * binder_alloc_vma_close() - invalidate address space
+ * @alloc: binder_alloc for this proc
+ *
+ * Called from binder_vma_close() when releasing address space.
+ * Clears alloc->mapped to prevent new incoming transactions from
+ * allocating more buffers.
+ */
+void binder_alloc_vma_close(struct binder_alloc *alloc)
+{
+ binder_alloc_set_mapped(alloc, false);
+}
+EXPORT_SYMBOL_IF_KUNIT(binder_alloc_vma_close);
+
+/**
+ * binder_alloc_free_page() - shrinker callback to free pages
+ * @item: item to free
+ * @lru: list_lru instance of the item
+ * @cb_arg: callback argument
+ *
+ * Called from list_lru_walk() in binder_shrink_scan() to free
+ * up pages when the system is under memory pressure.
+ */
+enum lru_status binder_alloc_free_page(struct list_head *item,
+ struct list_lru_one *lru,
+ void *cb_arg)
+ __must_hold(&lru->lock)
+{
+ struct binder_shrinker_mdata *mdata = container_of(item, typeof(*mdata), lru);
+ struct binder_alloc *alloc = mdata->alloc;
+ struct mm_struct *mm = alloc->mm;
+ struct vm_area_struct *vma;
+ struct page *page_to_free;
+ unsigned long page_addr;
+ int mm_locked = 0;
+ size_t index;
+
+ if (!mmget_not_zero(mm))
+ goto err_mmget;
+
+ index = mdata->page_index;
+ page_addr = alloc->vm_start + index * PAGE_SIZE;
+
+ /* attempt per-vma lock first */
+ vma = lock_vma_under_rcu(mm, page_addr);
+ if (!vma) {
+ /* fall back to mmap_lock */
+ if (!mmap_read_trylock(mm))
+ goto err_mmap_read_lock_failed;
+ mm_locked = 1;
+ vma = vma_lookup(mm, page_addr);
+ }
+
+ if (!mutex_trylock(&alloc->mutex))
+ goto err_get_alloc_mutex_failed;
+
+ /*
+ * Since a binder_alloc can only be mapped once, we ensure
+ * the vma corresponds to this mapping by checking whether
+ * the binder_alloc is still mapped.
+ */
+ if (vma && !binder_alloc_is_mapped(alloc))
+ goto err_invalid_vma;
+
+ trace_binder_unmap_kernel_start(alloc, index);
+
+ page_to_free = alloc->pages[index];
+ binder_set_installed_page(alloc, index, NULL);
+
+ trace_binder_unmap_kernel_end(alloc, index);
+
+ list_lru_isolate(lru, item);
+ spin_unlock(&lru->lock);
+
+ if (vma) {
+ trace_binder_unmap_user_start(alloc, index);
+
+ zap_page_range_single(vma, page_addr, PAGE_SIZE, NULL);
+
+ trace_binder_unmap_user_end(alloc, index);
+ }
+
+ mutex_unlock(&alloc->mutex);
+ if (mm_locked)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
+ mmput_async(mm);
+ binder_free_page(page_to_free);
+
+ return LRU_REMOVED_RETRY;
+
+err_invalid_vma:
+ mutex_unlock(&alloc->mutex);
+err_get_alloc_mutex_failed:
+ if (mm_locked)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
+err_mmap_read_lock_failed:
+ mmput_async(mm);
+err_mmget:
+ return LRU_SKIP;
+}
+EXPORT_SYMBOL_IF_KUNIT(binder_alloc_free_page);
+
+static unsigned long
+binder_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return list_lru_count(&binder_freelist);
+}
+
+static unsigned long
+binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return list_lru_walk(&binder_freelist, binder_alloc_free_page,
+ NULL, sc->nr_to_scan);
+}
+
+static struct shrinker *binder_shrinker;
+
+VISIBLE_IF_KUNIT void __binder_alloc_init(struct binder_alloc *alloc,
+ struct list_lru *freelist)
+{
+ alloc->pid = current->group_leader->pid;
+ alloc->mm = current->mm;
+ mmgrab(alloc->mm);
+ mutex_init(&alloc->mutex);
+ INIT_LIST_HEAD(&alloc->buffers);
+ alloc->freelist = freelist;
+}
+EXPORT_SYMBOL_IF_KUNIT(__binder_alloc_init);
+
+/**
+ * binder_alloc_init() - called by binder_open() for per-proc initialization
+ * @alloc: binder_alloc for this proc
+ *
+ * Called from binder_open() to initialize binder_alloc fields for
+ * new binder proc
+ */
+void binder_alloc_init(struct binder_alloc *alloc)
+{
+ __binder_alloc_init(alloc, &binder_freelist);
+}
+
+int binder_alloc_shrinker_init(void)
+{
+ int ret;
+
+ ret = list_lru_init(&binder_freelist);
+ if (ret)
+ return ret;
+
+ binder_shrinker = shrinker_alloc(0, "android-binder");
+ if (!binder_shrinker) {
+ list_lru_destroy(&binder_freelist);
+ return -ENOMEM;
+ }
+
+ binder_shrinker->count_objects = binder_shrink_count;
+ binder_shrinker->scan_objects = binder_shrink_scan;
+
+ shrinker_register(binder_shrinker);
+
+ return 0;
+}
+
+void binder_alloc_shrinker_exit(void)
+{
+ shrinker_free(binder_shrinker);
+ list_lru_destroy(&binder_freelist);
+}
+
+/**
+ * check_buffer() - verify that buffer/offset is safe to access
+ * @alloc: binder_alloc for this proc
+ * @buffer: binder buffer to be accessed
+ * @offset: offset into @buffer data
+ * @bytes: bytes to access from offset
+ *
+ * Check that the @offset/@bytes are within the size of the given
+ * @buffer and that the buffer is currently active and not freeable.
+ * Offsets must also be multiples of sizeof(u32). The kernel is
+ * allowed to touch the buffer in two cases:
+ *
+ * 1) when the buffer is being created:
+ * (buffer->free == 0 && buffer->allow_user_free == 0)
+ * 2) when the buffer is being torn down:
+ * (buffer->free == 0 && buffer->transaction == NULL).
+ *
+ * Return: true if the buffer is safe to access
+ */
+static inline bool check_buffer(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ binder_size_t offset, size_t bytes)
+{
+ size_t buffer_size = binder_alloc_buffer_size(alloc, buffer);
+
+ return buffer_size >= bytes &&
+ offset <= buffer_size - bytes &&
+ IS_ALIGNED(offset, sizeof(u32)) &&
+ !buffer->free &&
+ (!buffer->allow_user_free || !buffer->transaction);
+}
+
+/**
+ * binder_alloc_copy_user_to_buffer() - copy src user to tgt user
+ * @alloc: binder_alloc for this proc
+ * @buffer: binder buffer to be accessed
+ * @buffer_offset: offset into @buffer data
+ * @from: userspace pointer to source buffer
+ * @bytes: bytes to copy
+ *
+ * Copy bytes from source userspace to target buffer.
+ *
+ * Return: bytes remaining to be copied
+ */
+unsigned long
+binder_alloc_copy_user_to_buffer(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ binder_size_t buffer_offset,
+ const void __user *from,
+ size_t bytes)
+{
+ if (!check_buffer(alloc, buffer, buffer_offset, bytes))
+ return bytes;
+
+ while (bytes) {
+ unsigned long size;
+ unsigned long ret;
+ struct page *page;
+ pgoff_t pgoff;
+ void *kptr;
+
+ page = binder_alloc_get_page(alloc, buffer,
+ buffer_offset, &pgoff);
+ size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
+ kptr = kmap_local_page(page) + pgoff;
+ ret = copy_from_user(kptr, from, size);
+ kunmap_local(kptr);
+ if (ret)
+ return bytes - size + ret;
+ bytes -= size;
+ from += size;
+ buffer_offset += size;
+ }
+ return 0;
+}
+
+static int binder_alloc_do_buffer_copy(struct binder_alloc *alloc,
+ bool to_buffer,
+ struct binder_buffer *buffer,
+ binder_size_t buffer_offset,
+ void *ptr,
+ size_t bytes)
+{
+ /* All copies must be 32-bit aligned and 32-bit size */
+ if (!check_buffer(alloc, buffer, buffer_offset, bytes))
+ return -EINVAL;
+
+ while (bytes) {
+ unsigned long size;
+ struct page *page;
+ pgoff_t pgoff;
+
+ page = binder_alloc_get_page(alloc, buffer,
+ buffer_offset, &pgoff);
+ size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
+ if (to_buffer)
+ memcpy_to_page(page, pgoff, ptr, size);
+ else
+ memcpy_from_page(ptr, page, pgoff, size);
+ bytes -= size;
+ pgoff = 0;
+ ptr = ptr + size;
+ buffer_offset += size;
+ }
+ return 0;
+}
+
+int binder_alloc_copy_to_buffer(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ binder_size_t buffer_offset,
+ void *src,
+ size_t bytes)
+{
+ return binder_alloc_do_buffer_copy(alloc, true, buffer, buffer_offset,
+ src, bytes);
+}
+
+int binder_alloc_copy_from_buffer(struct binder_alloc *alloc,
+ void *dest,
+ struct binder_buffer *buffer,
+ binder_size_t buffer_offset,
+ size_t bytes)
+{
+ return binder_alloc_do_buffer_copy(alloc, false, buffer, buffer_offset,
+ dest, bytes);
+}
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
new file mode 100644
index 000000000000..d6f1f6f2d00e
--- /dev/null
+++ b/drivers/android/binder_alloc.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017 Google, Inc.
+ */
+
+#ifndef _LINUX_BINDER_ALLOC_H
+#define _LINUX_BINDER_ALLOC_H
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rtmutex.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/list_lru.h>
+#include <uapi/linux/android/binder.h>
+
+struct binder_transaction;
+
+/**
+ * struct binder_buffer - buffer used for binder transactions
+ * @entry: entry alloc->buffers
+ * @rb_node: node for allocated_buffers/free_buffers rb trees
+ * @free: %true if buffer is free
+ * @clear_on_free: %true if buffer must be zeroed after use
+ * @allow_user_free: %true if user is allowed to free buffer
+ * @async_transaction: %true if buffer is in use for an async txn
+ * @oneway_spam_suspect: %true if total async allocate size just exceed
+ * spamming detect threshold
+ * @debug_id: unique ID for debugging
+ * @transaction: pointer to associated struct binder_transaction
+ * @target_node: struct binder_node associated with this buffer
+ * @data_size: size of @transaction data
+ * @offsets_size: size of array of offsets
+ * @extra_buffers_size: size of space for other objects (like sg lists)
+ * @user_data: user pointer to base of buffer space
+ * @pid: pid to attribute the buffer to (caller)
+ *
+ * Bookkeeping structure for binder transaction buffers
+ */
+struct binder_buffer {
+ struct list_head entry; /* free and allocated entries by address */
+ struct rb_node rb_node; /* free entry by size or allocated entry */
+ /* by address */
+ unsigned free:1;
+ unsigned clear_on_free:1;
+ unsigned allow_user_free:1;
+ unsigned async_transaction:1;
+ unsigned oneway_spam_suspect:1;
+ unsigned debug_id:27;
+ struct binder_transaction *transaction;
+ struct binder_node *target_node;
+ size_t data_size;
+ size_t offsets_size;
+ size_t extra_buffers_size;
+ unsigned long user_data;
+ int pid;
+};
+
+/**
+ * struct binder_shrinker_mdata - binder metadata used to reclaim pages
+ * @lru: LRU entry in binder_freelist
+ * @alloc: binder_alloc owning the page to reclaim
+ * @page_index: offset in @alloc->pages[] into the page to reclaim
+ */
+struct binder_shrinker_mdata {
+ struct list_head lru;
+ struct binder_alloc *alloc;
+ unsigned long page_index;
+};
+
+static inline struct list_head *page_to_lru(struct page *p)
+{
+ struct binder_shrinker_mdata *mdata;
+
+ mdata = (struct binder_shrinker_mdata *)page_private(p);
+
+ return &mdata->lru;
+}
+
+/**
+ * struct binder_alloc - per-binder proc state for binder allocator
+ * @mutex: protects binder_alloc fields
+ * @mm: copy of task->mm (invariant after open)
+ * @vm_start: base of per-proc address space mapped via mmap
+ * @buffers: list of all buffers for this proc
+ * @free_buffers: rb tree of buffers available for allocation
+ * sorted by size
+ * @allocated_buffers: rb tree of allocated buffers sorted by address
+ * @free_async_space: VA space available for async buffers. This is
+ * initialized at mmap time to 1/2 the full VA space
+ * @pages: array of struct page *
+ * @freelist: lru list to use for free pages (invariant after init)
+ * @buffer_size: size of address space specified via mmap
+ * @pid: pid for associated binder_proc (invariant after init)
+ * @pages_high: high watermark of offset in @pages
+ * @mapped: whether the vm area is mapped, each binder instance is
+ * allowed a single mapping throughout its lifetime
+ * @oneway_spam_detected: %true if oneway spam detection fired, clear that
+ * flag once the async buffer has returned to a healthy state
+ *
+ * Bookkeeping structure for per-proc address space management for binder
+ * buffers. It is normally initialized during binder_init() and binder_mmap()
+ * calls. The address space is used for both user-visible buffers and for
+ * struct binder_buffer objects used to track the user buffers
+ */
+struct binder_alloc {
+ struct mutex mutex;
+ struct mm_struct *mm;
+ unsigned long vm_start;
+ struct list_head buffers;
+ struct rb_root free_buffers;
+ struct rb_root allocated_buffers;
+ size_t free_async_space;
+ struct page **pages;
+ struct list_lru *freelist;
+ size_t buffer_size;
+ int pid;
+ size_t pages_high;
+ bool mapped;
+ bool oneway_spam_detected;
+};
+
+enum lru_status binder_alloc_free_page(struct list_head *item,
+ struct list_lru_one *lru,
+ void *cb_arg);
+struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
+ size_t data_size,
+ size_t offsets_size,
+ size_t extra_buffers_size,
+ int is_async);
+void binder_alloc_init(struct binder_alloc *alloc);
+int binder_alloc_shrinker_init(void);
+void binder_alloc_shrinker_exit(void);
+void binder_alloc_vma_close(struct binder_alloc *alloc);
+struct binder_buffer *
+binder_alloc_prepare_to_free(struct binder_alloc *alloc,
+ unsigned long user_ptr);
+void binder_alloc_free_buf(struct binder_alloc *alloc,
+ struct binder_buffer *buffer);
+int binder_alloc_mmap_handler(struct binder_alloc *alloc,
+ struct vm_area_struct *vma);
+void binder_alloc_deferred_release(struct binder_alloc *alloc);
+int binder_alloc_get_allocated_count(struct binder_alloc *alloc);
+void binder_alloc_print_allocated(struct seq_file *m,
+ struct binder_alloc *alloc);
+void binder_alloc_print_pages(struct seq_file *m,
+ struct binder_alloc *alloc);
+
+/**
+ * binder_alloc_get_free_async_space() - get free space available for async
+ * @alloc: binder_alloc for this proc
+ *
+ * Return: the bytes remaining in the address-space for async transactions
+ */
+static inline size_t
+binder_alloc_get_free_async_space(struct binder_alloc *alloc)
+{
+ guard(mutex)(&alloc->mutex);
+ return alloc->free_async_space;
+}
+
+unsigned long
+binder_alloc_copy_user_to_buffer(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ binder_size_t buffer_offset,
+ const void __user *from,
+ size_t bytes);
+
+int binder_alloc_copy_to_buffer(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ binder_size_t buffer_offset,
+ void *src,
+ size_t bytes);
+
+int binder_alloc_copy_from_buffer(struct binder_alloc *alloc,
+ void *dest,
+ struct binder_buffer *buffer,
+ binder_size_t buffer_offset,
+ size_t bytes);
+
+#if IS_ENABLED(CONFIG_KUNIT)
+void __binder_alloc_init(struct binder_alloc *alloc, struct list_lru *freelist);
+size_t binder_alloc_buffer_size(struct binder_alloc *alloc,
+ struct binder_buffer *buffer);
+#endif
+
+#endif /* _LINUX_BINDER_ALLOC_H */
+
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
new file mode 100644
index 000000000000..342574bfd28a
--- /dev/null
+++ b/drivers/android/binder_internal.h
@@ -0,0 +1,597 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_BINDER_INTERNAL_H
+#define _LINUX_BINDER_INTERNAL_H
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/uidgid.h>
+#include <uapi/linux/android/binderfs.h>
+#include "binder_alloc.h"
+#include "dbitmap.h"
+
+struct binder_context {
+ struct binder_node *binder_context_mgr_node;
+ struct mutex context_mgr_node_lock;
+ kuid_t binder_context_mgr_uid;
+ const char *name;
+};
+
+/**
+ * struct binder_device - information about a binder device node
+ * @hlist: list of binder devices
+ * @miscdev: information about a binder character device node
+ * @context: binder context information
+ * @binderfs_inode: This is the inode of the root dentry of the super block
+ * belonging to a binderfs mount.
+ */
+struct binder_device {
+ struct hlist_node hlist;
+ struct miscdevice miscdev;
+ struct binder_context context;
+ struct inode *binderfs_inode;
+ refcount_t ref;
+};
+
+/**
+ * binderfs_mount_opts - mount options for binderfs
+ * @max: maximum number of allocatable binderfs binder devices
+ * @stats_mode: enable binder stats in binderfs.
+ */
+struct binderfs_mount_opts {
+ int max;
+ int stats_mode;
+};
+
+/**
+ * binderfs_info - information about a binderfs mount
+ * @ipc_ns: The ipc namespace the binderfs mount belongs to.
+ * @control_dentry: This records the dentry of this binderfs mount
+ * binder-control device.
+ * @root_uid: uid that needs to be used when a new binder device is
+ * created.
+ * @root_gid: gid that needs to be used when a new binder device is
+ * created.
+ * @mount_opts: The mount options in use.
+ * @device_count: The current number of allocated binder devices.
+ * @proc_log_dir: Pointer to the directory dentry containing process-specific
+ * logs.
+ */
+struct binderfs_info {
+ struct ipc_namespace *ipc_ns;
+ struct dentry *control_dentry;
+ kuid_t root_uid;
+ kgid_t root_gid;
+ struct binderfs_mount_opts mount_opts;
+ int device_count;
+ struct dentry *proc_log_dir;
+};
+
+extern const struct file_operations binder_fops;
+
+extern char *binder_devices_param;
+
+#ifdef CONFIG_ANDROID_BINDERFS
+extern bool is_binderfs_device(const struct inode *inode);
+extern struct dentry *binderfs_create_file(struct dentry *dir, const char *name,
+ const struct file_operations *fops,
+ void *data);
+#else
+static inline bool is_binderfs_device(const struct inode *inode)
+{
+ return false;
+}
+static inline struct dentry *binderfs_create_file(struct dentry *dir,
+ const char *name,
+ const struct file_operations *fops,
+ void *data)
+{
+ return NULL;
+}
+#endif
+
+#ifdef CONFIG_ANDROID_BINDERFS
+extern int __init init_binderfs(void);
+#else
+static inline int __init init_binderfs(void)
+{
+ return 0;
+}
+#endif
+
+struct binder_debugfs_entry {
+ const char *name;
+ umode_t mode;
+ const struct file_operations *fops;
+ void *data;
+};
+
+extern const struct binder_debugfs_entry binder_debugfs_entries[];
+
+#define binder_for_each_debugfs_entry(entry) \
+ for ((entry) = binder_debugfs_entries; \
+ (entry)->name; \
+ (entry)++)
+
+enum binder_stat_types {
+ BINDER_STAT_PROC,
+ BINDER_STAT_THREAD,
+ BINDER_STAT_NODE,
+ BINDER_STAT_REF,
+ BINDER_STAT_DEATH,
+ BINDER_STAT_TRANSACTION,
+ BINDER_STAT_TRANSACTION_COMPLETE,
+ BINDER_STAT_FREEZE,
+ BINDER_STAT_COUNT
+};
+
+struct binder_stats {
+ atomic_t br[_IOC_NR(BR_CLEAR_FREEZE_NOTIFICATION_DONE) + 1];
+ atomic_t bc[_IOC_NR(BC_FREEZE_NOTIFICATION_DONE) + 1];
+ atomic_t obj_created[BINDER_STAT_COUNT];
+ atomic_t obj_deleted[BINDER_STAT_COUNT];
+};
+
+/**
+ * struct binder_work - work enqueued on a worklist
+ * @entry: node enqueued on list
+ * @type: type of work to be performed
+ *
+ * There are separate work lists for proc, thread, and node (async).
+ */
+struct binder_work {
+ struct list_head entry;
+
+ enum binder_work_type {
+ BINDER_WORK_TRANSACTION = 1,
+ BINDER_WORK_TRANSACTION_COMPLETE,
+ BINDER_WORK_TRANSACTION_PENDING,
+ BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT,
+ BINDER_WORK_RETURN_ERROR,
+ BINDER_WORK_NODE,
+ BINDER_WORK_DEAD_BINDER,
+ BINDER_WORK_DEAD_BINDER_AND_CLEAR,
+ BINDER_WORK_CLEAR_DEATH_NOTIFICATION,
+ BINDER_WORK_FROZEN_BINDER,
+ BINDER_WORK_CLEAR_FREEZE_NOTIFICATION,
+ } type;
+};
+
+struct binder_error {
+ struct binder_work work;
+ uint32_t cmd;
+};
+
+/**
+ * struct binder_node - binder node bookkeeping
+ * @debug_id: unique ID for debugging
+ * (invariant after initialized)
+ * @lock: lock for node fields
+ * @work: worklist element for node work
+ * (protected by @proc->inner_lock)
+ * @rb_node: element for proc->nodes tree
+ * (protected by @proc->inner_lock)
+ * @dead_node: element for binder_dead_nodes list
+ * (protected by binder_dead_nodes_lock)
+ * @proc: binder_proc that owns this node
+ * (invariant after initialized)
+ * @refs: list of references on this node
+ * (protected by @lock)
+ * @internal_strong_refs: used to take strong references when
+ * initiating a transaction
+ * (protected by @proc->inner_lock if @proc
+ * and by @lock)
+ * @local_weak_refs: weak user refs from local process
+ * (protected by @proc->inner_lock if @proc
+ * and by @lock)
+ * @local_strong_refs: strong user refs from local process
+ * (protected by @proc->inner_lock if @proc
+ * and by @lock)
+ * @tmp_refs: temporary kernel refs
+ * (protected by @proc->inner_lock while @proc
+ * is valid, and by binder_dead_nodes_lock
+ * if @proc is NULL. During inc/dec and node release
+ * it is also protected by @lock to provide safety
+ * as the node dies and @proc becomes NULL)
+ * @ptr: userspace pointer for node
+ * (invariant, no lock needed)
+ * @cookie: userspace cookie for node
+ * (invariant, no lock needed)
+ * @has_strong_ref: userspace notified of strong ref
+ * (protected by @proc->inner_lock if @proc
+ * and by @lock)
+ * @pending_strong_ref: userspace has acked notification of strong ref
+ * (protected by @proc->inner_lock if @proc
+ * and by @lock)
+ * @has_weak_ref: userspace notified of weak ref
+ * (protected by @proc->inner_lock if @proc
+ * and by @lock)
+ * @pending_weak_ref: userspace has acked notification of weak ref
+ * (protected by @proc->inner_lock if @proc
+ * and by @lock)
+ * @has_async_transaction: async transaction to node in progress
+ * (protected by @lock)
+ * @accept_fds: file descriptor operations supported for node
+ * (invariant after initialized)
+ * @min_priority: minimum scheduling priority
+ * (invariant after initialized)
+ * @txn_security_ctx: require sender's security context
+ * (invariant after initialized)
+ * @async_todo: list of async work items
+ * (protected by @proc->inner_lock)
+ *
+ * Bookkeeping structure for binder nodes.
+ */
+struct binder_node {
+ int debug_id;
+ spinlock_t lock;
+ struct binder_work work;
+ union {
+ struct rb_node rb_node;
+ struct hlist_node dead_node;
+ };
+ struct binder_proc *proc;
+ struct hlist_head refs;
+ int internal_strong_refs;
+ int local_weak_refs;
+ int local_strong_refs;
+ int tmp_refs;
+ binder_uintptr_t ptr;
+ binder_uintptr_t cookie;
+ struct {
+ /*
+ * bitfield elements protected by
+ * proc inner_lock
+ */
+ u8 has_strong_ref:1;
+ u8 pending_strong_ref:1;
+ u8 has_weak_ref:1;
+ u8 pending_weak_ref:1;
+ };
+ struct {
+ /*
+ * invariant after initialization
+ */
+ u8 accept_fds:1;
+ u8 txn_security_ctx:1;
+ u8 min_priority;
+ };
+ bool has_async_transaction;
+ struct list_head async_todo;
+};
+
+struct binder_ref_death {
+ /**
+ * @work: worklist element for death notifications
+ * (protected by inner_lock of the proc that
+ * this ref belongs to)
+ */
+ struct binder_work work;
+ binder_uintptr_t cookie;
+};
+
+struct binder_ref_freeze {
+ struct binder_work work;
+ binder_uintptr_t cookie;
+ bool is_frozen:1;
+ bool sent:1;
+ bool resend:1;
+};
+
+/**
+ * struct binder_ref_data - binder_ref counts and id
+ * @debug_id: unique ID for the ref
+ * @desc: unique userspace handle for ref
+ * @strong: strong ref count (debugging only if not locked)
+ * @weak: weak ref count (debugging only if not locked)
+ *
+ * Structure to hold ref count and ref id information. Since
+ * the actual ref can only be accessed with a lock, this structure
+ * is used to return information about the ref to callers of
+ * ref inc/dec functions.
+ */
+struct binder_ref_data {
+ int debug_id;
+ uint32_t desc;
+ int strong;
+ int weak;
+};
+
+/**
+ * struct binder_ref - struct to track references on nodes
+ * @data: binder_ref_data containing id, handle, and current refcounts
+ * @rb_node_desc: node for lookup by @data.desc in proc's rb_tree
+ * @rb_node_node: node for lookup by @node in proc's rb_tree
+ * @node_entry: list entry for node->refs list in target node
+ * (protected by @node->lock)
+ * @proc: binder_proc containing ref
+ * @node: binder_node of target node. When cleaning up a
+ * ref for deletion in binder_cleanup_ref, a non-NULL
+ * @node indicates the node must be freed
+ * @death: pointer to death notification (ref_death) if requested
+ * (protected by @node->lock)
+ * @freeze: pointer to freeze notification (ref_freeze) if requested
+ * (protected by @node->lock)
+ *
+ * Structure to track references from procA to target node (on procB). This
+ * structure is unsafe to access without holding @proc->outer_lock.
+ */
+struct binder_ref {
+ /* Lookups needed: */
+ /* node + proc => ref (transaction) */
+ /* desc + proc => ref (transaction, inc/dec ref) */
+ /* node => refs + procs (proc exit) */
+ struct binder_ref_data data;
+ struct rb_node rb_node_desc;
+ struct rb_node rb_node_node;
+ struct hlist_node node_entry;
+ struct binder_proc *proc;
+ struct binder_node *node;
+ struct binder_ref_death *death;
+ struct binder_ref_freeze *freeze;
+};
+
+/**
+ * struct binder_proc - binder process bookkeeping
+ * @proc_node: element for binder_procs list
+ * @threads: rbtree of binder_threads in this proc
+ * (protected by @inner_lock)
+ * @nodes: rbtree of binder nodes associated with
+ * this proc ordered by node->ptr
+ * (protected by @inner_lock)
+ * @refs_by_desc: rbtree of refs ordered by ref->desc
+ * (protected by @outer_lock)
+ * @refs_by_node: rbtree of refs ordered by ref->node
+ * (protected by @outer_lock)
+ * @waiting_threads: threads currently waiting for proc work
+ * (protected by @inner_lock)
+ * @pid PID of group_leader of process
+ * (invariant after initialized)
+ * @tsk task_struct for group_leader of process
+ * (invariant after initialized)
+ * @cred struct cred associated with the `struct file`
+ * in binder_open()
+ * (invariant after initialized)
+ * @deferred_work_node: element for binder_deferred_list
+ * (protected by binder_deferred_lock)
+ * @deferred_work: bitmap of deferred work to perform
+ * (protected by binder_deferred_lock)
+ * @outstanding_txns: number of transactions to be transmitted before
+ * processes in freeze_wait are woken up
+ * (protected by @inner_lock)
+ * @is_dead: process is dead and awaiting free
+ * when outstanding transactions are cleaned up
+ * (protected by @inner_lock)
+ * @is_frozen: process is frozen and unable to service
+ * binder transactions
+ * (protected by @inner_lock)
+ * @sync_recv: process received sync transactions since last frozen
+ * bit 0: received sync transaction after being frozen
+ * bit 1: new pending sync transaction during freezing
+ * (protected by @inner_lock)
+ * @async_recv: process received async transactions since last frozen
+ * (protected by @inner_lock)
+ * @freeze_wait: waitqueue of processes waiting for all outstanding
+ * transactions to be processed
+ * (protected by @inner_lock)
+ * @dmap dbitmap to manage available reference descriptors
+ * (protected by @outer_lock)
+ * @todo: list of work for this process
+ * (protected by @inner_lock)
+ * @stats: per-process binder statistics
+ * (atomics, no lock needed)
+ * @delivered_death: list of delivered death notification
+ * (protected by @inner_lock)
+ * @delivered_freeze: list of delivered freeze notification
+ * (protected by @inner_lock)
+ * @max_threads: cap on number of binder threads
+ * (protected by @inner_lock)
+ * @requested_threads: number of binder threads requested but not
+ * yet started. In current implementation, can
+ * only be 0 or 1.
+ * (protected by @inner_lock)
+ * @requested_threads_started: number binder threads started
+ * (protected by @inner_lock)
+ * @tmp_ref: temporary reference to indicate proc is in use
+ * (protected by @inner_lock)
+ * @default_priority: default scheduler priority
+ * (invariant after initialized)
+ * @debugfs_entry: debugfs node
+ * @alloc: binder allocator bookkeeping
+ * @context: binder_context for this proc
+ * (invariant after initialized)
+ * @inner_lock: can nest under outer_lock and/or node lock
+ * @outer_lock: no nesting under innor or node lock
+ * Lock order: 1) outer, 2) node, 3) inner
+ * @binderfs_entry: process-specific binderfs log file
+ * @oneway_spam_detection_enabled: process enabled oneway spam detection
+ * or not
+ *
+ * Bookkeeping structure for binder processes
+ */
+struct binder_proc {
+ struct hlist_node proc_node;
+ struct rb_root threads;
+ struct rb_root nodes;
+ struct rb_root refs_by_desc;
+ struct rb_root refs_by_node;
+ struct list_head waiting_threads;
+ int pid;
+ struct task_struct *tsk;
+ const struct cred *cred;
+ struct hlist_node deferred_work_node;
+ int deferred_work;
+ int outstanding_txns;
+ bool is_dead;
+ bool is_frozen;
+ bool sync_recv;
+ bool async_recv;
+ wait_queue_head_t freeze_wait;
+ struct dbitmap dmap;
+ struct list_head todo;
+ struct binder_stats stats;
+ struct list_head delivered_death;
+ struct list_head delivered_freeze;
+ u32 max_threads;
+ int requested_threads;
+ int requested_threads_started;
+ int tmp_ref;
+ long default_priority;
+ struct dentry *debugfs_entry;
+ struct binder_alloc alloc;
+ struct binder_context *context;
+ spinlock_t inner_lock;
+ spinlock_t outer_lock;
+ struct dentry *binderfs_entry;
+ bool oneway_spam_detection_enabled;
+};
+
+/**
+ * struct binder_thread - binder thread bookkeeping
+ * @proc: binder process for this thread
+ * (invariant after initialization)
+ * @rb_node: element for proc->threads rbtree
+ * (protected by @proc->inner_lock)
+ * @waiting_thread_node: element for @proc->waiting_threads list
+ * (protected by @proc->inner_lock)
+ * @pid: PID for this thread
+ * (invariant after initialization)
+ * @looper: bitmap of looping state
+ * (only accessed by this thread)
+ * @looper_needs_return: looping thread needs to exit driver
+ * (no lock needed)
+ * @transaction_stack: stack of in-progress transactions for this thread
+ * (protected by @proc->inner_lock)
+ * @todo: list of work to do for this thread
+ * (protected by @proc->inner_lock)
+ * @process_todo: whether work in @todo should be processed
+ * (protected by @proc->inner_lock)
+ * @return_error: transaction errors reported by this thread
+ * (only accessed by this thread)
+ * @reply_error: transaction errors reported by target thread
+ * (protected by @proc->inner_lock)
+ * @ee: extended error information from this thread
+ * (protected by @proc->inner_lock)
+ * @wait: wait queue for thread work
+ * @stats: per-thread statistics
+ * (atomics, no lock needed)
+ * @tmp_ref: temporary reference to indicate thread is in use
+ * (atomic since @proc->inner_lock cannot
+ * always be acquired)
+ * @is_dead: thread is dead and awaiting free
+ * when outstanding transactions are cleaned up
+ * (protected by @proc->inner_lock)
+ *
+ * Bookkeeping structure for binder threads.
+ */
+struct binder_thread {
+ struct binder_proc *proc;
+ struct rb_node rb_node;
+ struct list_head waiting_thread_node;
+ int pid;
+ int looper; /* only modified by this thread */
+ bool looper_need_return; /* can be written by other thread */
+ struct binder_transaction *transaction_stack;
+ struct list_head todo;
+ bool process_todo;
+ struct binder_error return_error;
+ struct binder_error reply_error;
+ struct binder_extended_error ee;
+ wait_queue_head_t wait;
+ struct binder_stats stats;
+ atomic_t tmp_ref;
+ bool is_dead;
+};
+
+/**
+ * struct binder_txn_fd_fixup - transaction fd fixup list element
+ * @fixup_entry: list entry
+ * @file: struct file to be associated with new fd
+ * @offset: offset in buffer data to this fixup
+ * @target_fd: fd to use by the target to install @file
+ *
+ * List element for fd fixups in a transaction. Since file
+ * descriptors need to be allocated in the context of the
+ * target process, we pass each fd to be processed in this
+ * struct.
+ */
+struct binder_txn_fd_fixup {
+ struct list_head fixup_entry;
+ struct file *file;
+ size_t offset;
+ int target_fd;
+};
+
+struct binder_transaction {
+ int debug_id;
+ struct binder_work work;
+ struct binder_thread *from;
+ pid_t from_pid;
+ pid_t from_tid;
+ struct binder_transaction *from_parent;
+ struct binder_proc *to_proc;
+ struct binder_thread *to_thread;
+ struct binder_transaction *to_parent;
+ unsigned is_async:1;
+ unsigned is_reply:1;
+
+ struct binder_buffer *buffer;
+ unsigned int code;
+ unsigned int flags;
+ long priority;
+ long saved_priority;
+ kuid_t sender_euid;
+ ktime_t start_time;
+ struct list_head fd_fixups;
+ binder_uintptr_t security_ctx;
+ /**
+ * @lock: protects @from, @to_proc, and @to_thread
+ *
+ * @from, @to_proc, and @to_thread can be set to NULL
+ * during thread teardown
+ */
+ spinlock_t lock;
+};
+
+/**
+ * struct binder_object - union of flat binder object types
+ * @hdr: generic object header
+ * @fbo: binder object (nodes and refs)
+ * @fdo: file descriptor object
+ * @bbo: binder buffer pointer
+ * @fdao: file descriptor array
+ *
+ * Used for type-independent object copies
+ */
+struct binder_object {
+ union {
+ struct binder_object_header hdr;
+ struct flat_binder_object fbo;
+ struct binder_fd_object fdo;
+ struct binder_buffer_object bbo;
+ struct binder_fd_array_object fdao;
+ };
+};
+
+/**
+ * Add a binder device to binder_devices
+ * @device: the new binder device to add to the global list
+ */
+void binder_add_device(struct binder_device *device);
+
+/**
+ * Remove a binder device to binder_devices
+ * @device: the binder device to remove from the global list
+ */
+void binder_remove_device(struct binder_device *device);
+
+#if IS_ENABLED(CONFIG_KUNIT)
+vm_fault_t binder_vm_fault(struct vm_fault *vmf);
+#endif
+
+#endif /* _LINUX_BINDER_INTERNAL_H */
diff --git a/drivers/android/binder_netlink.c b/drivers/android/binder_netlink.c
new file mode 100644
index 000000000000..81e8432b5904
--- /dev/null
+++ b/drivers/android/binder_netlink.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/binder.yaml */
+/* YNL-GEN kernel source */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "binder_netlink.h"
+
+#include <uapi/linux/android/binder_netlink.h>
+
+/* Ops table for binder */
+static const struct genl_split_ops binder_nl_ops[] = {
+};
+
+static const struct genl_multicast_group binder_nl_mcgrps[] = {
+ [BINDER_NLGRP_REPORT] = { "report", },
+};
+
+struct genl_family binder_nl_family __ro_after_init = {
+ .name = BINDER_FAMILY_NAME,
+ .version = BINDER_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = binder_nl_ops,
+ .n_split_ops = ARRAY_SIZE(binder_nl_ops),
+ .mcgrps = binder_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(binder_nl_mcgrps),
+};
diff --git a/drivers/android/binder_netlink.h b/drivers/android/binder_netlink.h
new file mode 100644
index 000000000000..57399942a5e3
--- /dev/null
+++ b/drivers/android/binder_netlink.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/binder.yaml */
+/* YNL-GEN kernel header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
+
+#ifndef _LINUX_BINDER_GEN_H
+#define _LINUX_BINDER_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/android/binder_netlink.h>
+
+enum {
+ BINDER_NLGRP_REPORT,
+};
+
+extern struct genl_family binder_nl_family;
+
+#endif /* _LINUX_BINDER_GEN_H */
diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h
new file mode 100644
index 000000000000..fa5eb61cf580
--- /dev/null
+++ b/drivers/android/binder_trace.h
@@ -0,0 +1,448 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 Google, Inc.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM binder
+
+#if !defined(_BINDER_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _BINDER_TRACE_H
+
+#include <linux/tracepoint.h>
+
+struct binder_buffer;
+struct binder_node;
+struct binder_proc;
+struct binder_alloc;
+struct binder_ref_data;
+struct binder_thread;
+struct binder_transaction;
+
+TRACE_EVENT(binder_ioctl,
+ TP_PROTO(unsigned int cmd, unsigned long arg),
+ TP_ARGS(cmd, arg),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cmd)
+ __field(unsigned long, arg)
+ ),
+ TP_fast_assign(
+ __entry->cmd = cmd;
+ __entry->arg = arg;
+ ),
+ TP_printk("cmd=0x%x arg=0x%lx", __entry->cmd, __entry->arg)
+);
+
+DECLARE_EVENT_CLASS(binder_function_return_class,
+ TP_PROTO(int ret),
+ TP_ARGS(ret),
+ TP_STRUCT__entry(
+ __field(int, ret)
+ ),
+ TP_fast_assign(
+ __entry->ret = ret;
+ ),
+ TP_printk("ret=%d", __entry->ret)
+);
+
+#define DEFINE_BINDER_FUNCTION_RETURN_EVENT(name) \
+DEFINE_EVENT(binder_function_return_class, name, \
+ TP_PROTO(int ret), \
+ TP_ARGS(ret))
+
+DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_ioctl_done);
+DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_write_done);
+DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_read_done);
+
+TRACE_EVENT(binder_wait_for_work,
+ TP_PROTO(bool proc_work, bool transaction_stack, bool thread_todo),
+ TP_ARGS(proc_work, transaction_stack, thread_todo),
+
+ TP_STRUCT__entry(
+ __field(bool, proc_work)
+ __field(bool, transaction_stack)
+ __field(bool, thread_todo)
+ ),
+ TP_fast_assign(
+ __entry->proc_work = proc_work;
+ __entry->transaction_stack = transaction_stack;
+ __entry->thread_todo = thread_todo;
+ ),
+ TP_printk("proc_work=%d transaction_stack=%d thread_todo=%d",
+ __entry->proc_work, __entry->transaction_stack,
+ __entry->thread_todo)
+);
+
+TRACE_EVENT(binder_txn_latency_free,
+ TP_PROTO(struct binder_transaction *t,
+ int from_proc, int from_thread,
+ int to_proc, int to_thread),
+ TP_ARGS(t, from_proc, from_thread, to_proc, to_thread),
+ TP_STRUCT__entry(
+ __field(int, debug_id)
+ __field(int, from_proc)
+ __field(int, from_thread)
+ __field(int, to_proc)
+ __field(int, to_thread)
+ __field(unsigned int, code)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->debug_id = t->debug_id;
+ __entry->from_proc = from_proc;
+ __entry->from_thread = from_thread;
+ __entry->to_proc = to_proc;
+ __entry->to_thread = to_thread;
+ __entry->code = t->code;
+ __entry->flags = t->flags;
+ ),
+ TP_printk("transaction=%d from %d:%d to %d:%d flags=0x%x code=0x%x",
+ __entry->debug_id, __entry->from_proc, __entry->from_thread,
+ __entry->to_proc, __entry->to_thread, __entry->code,
+ __entry->flags)
+);
+
+TRACE_EVENT(binder_transaction,
+ TP_PROTO(bool reply, struct binder_transaction *t,
+ struct binder_node *target_node),
+ TP_ARGS(reply, t, target_node),
+ TP_STRUCT__entry(
+ __field(int, debug_id)
+ __field(int, target_node)
+ __field(int, to_proc)
+ __field(int, to_thread)
+ __field(int, reply)
+ __field(unsigned int, code)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->debug_id = t->debug_id;
+ __entry->target_node = target_node ? target_node->debug_id : 0;
+ __entry->to_proc = t->to_proc->pid;
+ __entry->to_thread = t->to_thread ? t->to_thread->pid : 0;
+ __entry->reply = reply;
+ __entry->code = t->code;
+ __entry->flags = t->flags;
+ ),
+ TP_printk("transaction=%d dest_node=%d dest_proc=%d dest_thread=%d reply=%d flags=0x%x code=0x%x",
+ __entry->debug_id, __entry->target_node,
+ __entry->to_proc, __entry->to_thread,
+ __entry->reply, __entry->flags, __entry->code)
+);
+
+TRACE_EVENT(binder_transaction_received,
+ TP_PROTO(struct binder_transaction *t),
+ TP_ARGS(t),
+
+ TP_STRUCT__entry(
+ __field(int, debug_id)
+ ),
+ TP_fast_assign(
+ __entry->debug_id = t->debug_id;
+ ),
+ TP_printk("transaction=%d", __entry->debug_id)
+);
+
+TRACE_EVENT(binder_transaction_node_to_ref,
+ TP_PROTO(struct binder_transaction *t, struct binder_node *node,
+ struct binder_ref_data *rdata),
+ TP_ARGS(t, node, rdata),
+
+ TP_STRUCT__entry(
+ __field(int, debug_id)
+ __field(int, node_debug_id)
+ __field(binder_uintptr_t, node_ptr)
+ __field(int, ref_debug_id)
+ __field(uint32_t, ref_desc)
+ ),
+ TP_fast_assign(
+ __entry->debug_id = t->debug_id;
+ __entry->node_debug_id = node->debug_id;
+ __entry->node_ptr = node->ptr;
+ __entry->ref_debug_id = rdata->debug_id;
+ __entry->ref_desc = rdata->desc;
+ ),
+ TP_printk("transaction=%d node=%d src_ptr=0x%016llx ==> dest_ref=%d dest_desc=%d",
+ __entry->debug_id, __entry->node_debug_id,
+ (u64)__entry->node_ptr,
+ __entry->ref_debug_id, __entry->ref_desc)
+);
+
+TRACE_EVENT(binder_transaction_ref_to_node,
+ TP_PROTO(struct binder_transaction *t, struct binder_node *node,
+ struct binder_ref_data *rdata),
+ TP_ARGS(t, node, rdata),
+
+ TP_STRUCT__entry(
+ __field(int, debug_id)
+ __field(int, ref_debug_id)
+ __field(uint32_t, ref_desc)
+ __field(int, node_debug_id)
+ __field(binder_uintptr_t, node_ptr)
+ ),
+ TP_fast_assign(
+ __entry->debug_id = t->debug_id;
+ __entry->ref_debug_id = rdata->debug_id;
+ __entry->ref_desc = rdata->desc;
+ __entry->node_debug_id = node->debug_id;
+ __entry->node_ptr = node->ptr;
+ ),
+ TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ptr=0x%016llx",
+ __entry->debug_id, __entry->node_debug_id,
+ __entry->ref_debug_id, __entry->ref_desc,
+ (u64)__entry->node_ptr)
+);
+
+TRACE_EVENT(binder_transaction_ref_to_ref,
+ TP_PROTO(struct binder_transaction *t, struct binder_node *node,
+ struct binder_ref_data *src_ref,
+ struct binder_ref_data *dest_ref),
+ TP_ARGS(t, node, src_ref, dest_ref),
+
+ TP_STRUCT__entry(
+ __field(int, debug_id)
+ __field(int, node_debug_id)
+ __field(int, src_ref_debug_id)
+ __field(uint32_t, src_ref_desc)
+ __field(int, dest_ref_debug_id)
+ __field(uint32_t, dest_ref_desc)
+ ),
+ TP_fast_assign(
+ __entry->debug_id = t->debug_id;
+ __entry->node_debug_id = node->debug_id;
+ __entry->src_ref_debug_id = src_ref->debug_id;
+ __entry->src_ref_desc = src_ref->desc;
+ __entry->dest_ref_debug_id = dest_ref->debug_id;
+ __entry->dest_ref_desc = dest_ref->desc;
+ ),
+ TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ref=%d dest_desc=%d",
+ __entry->debug_id, __entry->node_debug_id,
+ __entry->src_ref_debug_id, __entry->src_ref_desc,
+ __entry->dest_ref_debug_id, __entry->dest_ref_desc)
+);
+
+TRACE_EVENT(binder_transaction_fd_send,
+ TP_PROTO(struct binder_transaction *t, int fd, size_t offset),
+ TP_ARGS(t, fd, offset),
+
+ TP_STRUCT__entry(
+ __field(int, debug_id)
+ __field(int, fd)
+ __field(size_t, offset)
+ ),
+ TP_fast_assign(
+ __entry->debug_id = t->debug_id;
+ __entry->fd = fd;
+ __entry->offset = offset;
+ ),
+ TP_printk("transaction=%d src_fd=%d offset=%zu",
+ __entry->debug_id, __entry->fd, __entry->offset)
+);
+
+TRACE_EVENT(binder_transaction_fd_recv,
+ TP_PROTO(struct binder_transaction *t, int fd, size_t offset),
+ TP_ARGS(t, fd, offset),
+
+ TP_STRUCT__entry(
+ __field(int, debug_id)
+ __field(int, fd)
+ __field(size_t, offset)
+ ),
+ TP_fast_assign(
+ __entry->debug_id = t->debug_id;
+ __entry->fd = fd;
+ __entry->offset = offset;
+ ),
+ TP_printk("transaction=%d dest_fd=%d offset=%zu",
+ __entry->debug_id, __entry->fd, __entry->offset)
+);
+
+DECLARE_EVENT_CLASS(binder_buffer_class,
+ TP_PROTO(struct binder_buffer *buf),
+ TP_ARGS(buf),
+ TP_STRUCT__entry(
+ __field(int, debug_id)
+ __field(size_t, data_size)
+ __field(size_t, offsets_size)
+ __field(size_t, extra_buffers_size)
+ ),
+ TP_fast_assign(
+ __entry->debug_id = buf->debug_id;
+ __entry->data_size = buf->data_size;
+ __entry->offsets_size = buf->offsets_size;
+ __entry->extra_buffers_size = buf->extra_buffers_size;
+ ),
+ TP_printk("transaction=%d data_size=%zd offsets_size=%zd extra_buffers_size=%zd",
+ __entry->debug_id, __entry->data_size, __entry->offsets_size,
+ __entry->extra_buffers_size)
+);
+
+DEFINE_EVENT(binder_buffer_class, binder_transaction_alloc_buf,
+ TP_PROTO(struct binder_buffer *buffer),
+ TP_ARGS(buffer));
+
+DEFINE_EVENT(binder_buffer_class, binder_transaction_buffer_release,
+ TP_PROTO(struct binder_buffer *buffer),
+ TP_ARGS(buffer));
+
+DEFINE_EVENT(binder_buffer_class, binder_transaction_failed_buffer_release,
+ TP_PROTO(struct binder_buffer *buffer),
+ TP_ARGS(buffer));
+
+DEFINE_EVENT(binder_buffer_class, binder_transaction_update_buffer_release,
+ TP_PROTO(struct binder_buffer *buffer),
+ TP_ARGS(buffer));
+
+TRACE_EVENT(binder_update_page_range,
+ TP_PROTO(struct binder_alloc *alloc, bool allocate,
+ unsigned long start, unsigned long end),
+ TP_ARGS(alloc, allocate, start, end),
+ TP_STRUCT__entry(
+ __field(int, proc)
+ __field(bool, allocate)
+ __field(size_t, offset)
+ __field(size_t, size)
+ ),
+ TP_fast_assign(
+ __entry->proc = alloc->pid;
+ __entry->allocate = allocate;
+ __entry->offset = start - alloc->vm_start;
+ __entry->size = end - start;
+ ),
+ TP_printk("proc=%d allocate=%d offset=%zu size=%zu",
+ __entry->proc, __entry->allocate,
+ __entry->offset, __entry->size)
+);
+
+DECLARE_EVENT_CLASS(binder_lru_page_class,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index),
+ TP_STRUCT__entry(
+ __field(int, proc)
+ __field(size_t, page_index)
+ ),
+ TP_fast_assign(
+ __entry->proc = alloc->pid;
+ __entry->page_index = page_index;
+ ),
+ TP_printk("proc=%d page_index=%zu",
+ __entry->proc, __entry->page_index)
+);
+
+DEFINE_EVENT(binder_lru_page_class, binder_alloc_lru_start,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+DEFINE_EVENT(binder_lru_page_class, binder_alloc_lru_end,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+DEFINE_EVENT(binder_lru_page_class, binder_free_lru_start,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+DEFINE_EVENT(binder_lru_page_class, binder_free_lru_end,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+DEFINE_EVENT(binder_lru_page_class, binder_alloc_page_start,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+DEFINE_EVENT(binder_lru_page_class, binder_alloc_page_end,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+DEFINE_EVENT(binder_lru_page_class, binder_unmap_user_start,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+DEFINE_EVENT(binder_lru_page_class, binder_unmap_user_end,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+DEFINE_EVENT(binder_lru_page_class, binder_unmap_kernel_start,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+DEFINE_EVENT(binder_lru_page_class, binder_unmap_kernel_end,
+ TP_PROTO(const struct binder_alloc *alloc, size_t page_index),
+ TP_ARGS(alloc, page_index));
+
+TRACE_EVENT(binder_command,
+ TP_PROTO(uint32_t cmd),
+ TP_ARGS(cmd),
+ TP_STRUCT__entry(
+ __field(uint32_t, cmd)
+ ),
+ TP_fast_assign(
+ __entry->cmd = cmd;
+ ),
+ TP_printk("cmd=0x%x %s",
+ __entry->cmd,
+ _IOC_NR(__entry->cmd) < ARRAY_SIZE(binder_command_strings) ?
+ binder_command_strings[_IOC_NR(__entry->cmd)] :
+ "unknown")
+);
+
+TRACE_EVENT(binder_return,
+ TP_PROTO(uint32_t cmd),
+ TP_ARGS(cmd),
+ TP_STRUCT__entry(
+ __field(uint32_t, cmd)
+ ),
+ TP_fast_assign(
+ __entry->cmd = cmd;
+ ),
+ TP_printk("cmd=0x%x %s",
+ __entry->cmd,
+ _IOC_NR(__entry->cmd) < ARRAY_SIZE(binder_return_strings) ?
+ binder_return_strings[_IOC_NR(__entry->cmd)] :
+ "unknown")
+);
+
+TRACE_EVENT(binder_netlink_report,
+ TP_PROTO(const char *context,
+ struct binder_transaction *t,
+ u32 data_size,
+ u32 error),
+ TP_ARGS(context, t, data_size, error),
+ TP_STRUCT__entry(
+ __field(const char *, context)
+ __field(u32, error)
+ __field(int, from_pid)
+ __field(int, from_tid)
+ __field(int, to_pid)
+ __field(int, to_tid)
+ __field(bool, is_reply)
+ __field(unsigned int, flags)
+ __field(unsigned int, code)
+ __field(size_t, data_size)
+ ),
+ TP_fast_assign(
+ __entry->context = context;
+ __entry->error = error;
+ __entry->from_pid = t->from_pid;
+ __entry->from_tid = t->from_tid;
+ __entry->to_pid = t->to_proc ? t->to_proc->pid : 0;
+ __entry->to_tid = t->to_thread ? t->to_thread->pid : 0;
+ __entry->is_reply = t->is_reply;
+ __entry->flags = t->flags;
+ __entry->code = t->code;
+ __entry->data_size = data_size;
+ ),
+ TP_printk("from %d:%d to %d:%d context=%s error=%d is_reply=%d flags=0x%x code=0x%x size=%zu",
+ __entry->from_pid, __entry->from_tid,
+ __entry->to_pid, __entry->to_tid,
+ __entry->context, __entry->error, __entry->is_reply,
+ __entry->flags, __entry->code, __entry->data_size)
+);
+
+#endif /* _BINDER_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE binder_trace
+#include <trace/define_trace.h>
diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
new file mode 100644
index 000000000000..b46bcb91072d
--- /dev/null
+++ b/drivers/android/binderfs.c
@@ -0,0 +1,786 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/compiler_types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/gfp.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/ipc_namespace.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/namei.h>
+#include <linux/magic.h>
+#include <linux/major.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/mount.h>
+#include <linux/fs_parser.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/user_namespace.h>
+#include <linux/xarray.h>
+#include <uapi/linux/android/binder.h>
+#include <uapi/linux/android/binderfs.h>
+
+#include "binder_internal.h"
+
+#define FIRST_INODE 1
+#define SECOND_INODE 2
+#define INODE_OFFSET 3
+#define BINDERFS_MAX_MINOR (1U << MINORBITS)
+/* Ensure that the initial ipc namespace always has devices available. */
+#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4)
+
+static dev_t binderfs_dev;
+static DEFINE_MUTEX(binderfs_minors_mutex);
+static DEFINE_IDA(binderfs_minors);
+
+enum binderfs_param {
+ Opt_max,
+ Opt_stats_mode,
+};
+
+enum binderfs_stats_mode {
+ binderfs_stats_mode_unset,
+ binderfs_stats_mode_global,
+};
+
+struct binder_features {
+ bool oneway_spam_detection;
+ bool extended_error;
+ bool freeze_notification;
+ bool transaction_report;
+};
+
+static const struct constant_table binderfs_param_stats[] = {
+ { "global", binderfs_stats_mode_global },
+ {}
+};
+
+static const struct fs_parameter_spec binderfs_fs_parameters[] = {
+ fsparam_u32("max", Opt_max),
+ fsparam_enum("stats", Opt_stats_mode, binderfs_param_stats),
+ {}
+};
+
+static struct binder_features binder_features = {
+ .oneway_spam_detection = true,
+ .extended_error = true,
+ .freeze_notification = true,
+ .transaction_report = true,
+};
+
+static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+bool is_binderfs_device(const struct inode *inode)
+{
+ if (inode->i_sb->s_magic == BINDERFS_SUPER_MAGIC)
+ return true;
+
+ return false;
+}
+
+/**
+ * binderfs_binder_device_create - allocate inode from super block of a
+ * binderfs mount
+ * @ref_inode: inode from which the super block will be taken
+ * @userp: buffer to copy information about new device for userspace to
+ * @req: struct binderfs_device as copied from userspace
+ *
+ * This function allocates a new binder_device and reserves a new minor
+ * number for it.
+ * Minor numbers are limited and tracked globally in binderfs_minors. The
+ * function will stash a struct binder_device for the specific binder
+ * device in i_private of the inode.
+ * It will go on to allocate a new inode from the super block of the
+ * filesystem mount, stash a struct binder_device in its i_private field
+ * and attach a dentry to that inode.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+static int binderfs_binder_device_create(struct inode *ref_inode,
+ struct binderfs_device __user *userp,
+ struct binderfs_device *req)
+{
+ int minor, ret;
+ struct dentry *dentry, *root;
+ struct binder_device *device;
+ char *name = NULL;
+ struct inode *inode = NULL;
+ struct super_block *sb = ref_inode->i_sb;
+ struct binderfs_info *info = sb->s_fs_info;
+#if defined(CONFIG_IPC_NS)
+ bool use_reserve = (info->ipc_ns == &init_ipc_ns);
+#else
+ bool use_reserve = true;
+#endif
+
+ /* Reserve new minor number for the new device. */
+ mutex_lock(&binderfs_minors_mutex);
+ if (++info->device_count <= info->mount_opts.max)
+ minor = ida_alloc_max(&binderfs_minors,
+ use_reserve ? BINDERFS_MAX_MINOR :
+ BINDERFS_MAX_MINOR_CAPPED,
+ GFP_KERNEL);
+ else
+ minor = -ENOSPC;
+ if (minor < 0) {
+ --info->device_count;
+ mutex_unlock(&binderfs_minors_mutex);
+ return minor;
+ }
+ mutex_unlock(&binderfs_minors_mutex);
+
+ ret = -ENOMEM;
+ device = kzalloc(sizeof(*device), GFP_KERNEL);
+ if (!device)
+ goto err;
+
+ inode = new_inode(sb);
+ if (!inode)
+ goto err;
+
+ inode->i_ino = minor + INODE_OFFSET;
+ simple_inode_init_ts(inode);
+ init_special_inode(inode, S_IFCHR | 0600,
+ MKDEV(MAJOR(binderfs_dev), minor));
+ inode->i_fop = &binder_fops;
+ inode->i_uid = info->root_uid;
+ inode->i_gid = info->root_gid;
+
+ req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */
+ name = kstrdup(req->name, GFP_KERNEL);
+ if (!name)
+ goto err;
+
+ refcount_set(&device->ref, 1);
+ device->binderfs_inode = inode;
+ device->context.binder_context_mgr_uid = INVALID_UID;
+ device->context.name = name;
+ device->miscdev.name = name;
+ device->miscdev.minor = minor;
+ mutex_init(&device->context.context_mgr_node_lock);
+
+ req->major = MAJOR(binderfs_dev);
+ req->minor = minor;
+
+ if (userp && copy_to_user(userp, req, sizeof(*req))) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ root = sb->s_root;
+ dentry = simple_start_creating(root, name);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto err;
+ }
+ inode->i_private = device;
+ d_make_persistent(dentry, inode);
+ fsnotify_create(root->d_inode, dentry);
+ simple_done_creating(dentry);
+
+ binder_add_device(device);
+
+ return 0;
+
+err:
+ kfree(name);
+ kfree(device);
+ mutex_lock(&binderfs_minors_mutex);
+ --info->device_count;
+ ida_free(&binderfs_minors, minor);
+ mutex_unlock(&binderfs_minors_mutex);
+ iput(inode);
+
+ return ret;
+}
+
+/**
+ * binder_ctl_ioctl - handle binder device node allocation requests
+ * @file: The file pointer for the binder-control device node.
+ * @cmd: The ioctl command.
+ * @arg: The ioctl argument.
+ *
+ * The request handler for the binder-control device. All requests operate on
+ * the binderfs mount the binder-control device resides in:
+ * - BINDER_CTL_ADD
+ * Allocate a new binder device.
+ *
+ * Return: %0 on success, negative errno on failure.
+ */
+static long binder_ctl_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = -EINVAL;
+ struct inode *inode = file_inode(file);
+ struct binderfs_device __user *device = (struct binderfs_device __user *)arg;
+ struct binderfs_device device_req;
+
+ switch (cmd) {
+ case BINDER_CTL_ADD:
+ ret = copy_from_user(&device_req, device, sizeof(device_req));
+ if (ret) {
+ ret = -EFAULT;
+ break;
+ }
+
+ ret = binderfs_binder_device_create(inode, device, &device_req);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void binderfs_evict_inode(struct inode *inode)
+{
+ struct binder_device *device = inode->i_private;
+ struct binderfs_info *info = BINDERFS_SB(inode->i_sb);
+
+ clear_inode(inode);
+
+ if (!S_ISCHR(inode->i_mode) || !device)
+ return;
+
+ mutex_lock(&binderfs_minors_mutex);
+ --info->device_count;
+ ida_free(&binderfs_minors, device->miscdev.minor);
+ mutex_unlock(&binderfs_minors_mutex);
+
+ if (refcount_dec_and_test(&device->ref)) {
+ binder_remove_device(device);
+ kfree(device->context.name);
+ kfree(device);
+ }
+}
+
+static int binderfs_fs_context_parse_param(struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ int opt;
+ struct binderfs_mount_opts *ctx = fc->fs_private;
+ struct fs_parse_result result;
+
+ opt = fs_parse(fc, binderfs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_max:
+ if (result.uint_32 > BINDERFS_MAX_MINOR)
+ return invalfc(fc, "Bad value for '%s'", param->key);
+
+ ctx->max = result.uint_32;
+ break;
+ case Opt_stats_mode:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ ctx->stats_mode = result.uint_32;
+ break;
+ default:
+ return invalfc(fc, "Unsupported parameter '%s'", param->key);
+ }
+
+ return 0;
+}
+
+static int binderfs_fs_context_reconfigure(struct fs_context *fc)
+{
+ struct binderfs_mount_opts *ctx = fc->fs_private;
+ struct binderfs_info *info = BINDERFS_SB(fc->root->d_sb);
+
+ if (info->mount_opts.stats_mode != ctx->stats_mode)
+ return invalfc(fc, "Binderfs stats mode cannot be changed during a remount");
+
+ info->mount_opts.stats_mode = ctx->stats_mode;
+ info->mount_opts.max = ctx->max;
+ return 0;
+}
+
+static int binderfs_show_options(struct seq_file *seq, struct dentry *root)
+{
+ struct binderfs_info *info = BINDERFS_SB(root->d_sb);
+
+ if (info->mount_opts.max <= BINDERFS_MAX_MINOR)
+ seq_printf(seq, ",max=%d", info->mount_opts.max);
+
+ switch (info->mount_opts.stats_mode) {
+ case binderfs_stats_mode_unset:
+ break;
+ case binderfs_stats_mode_global:
+ seq_printf(seq, ",stats=global");
+ break;
+ }
+
+ return 0;
+}
+
+static const struct super_operations binderfs_super_ops = {
+ .evict_inode = binderfs_evict_inode,
+ .show_options = binderfs_show_options,
+ .statfs = simple_statfs,
+};
+
+static inline bool is_binderfs_control_device(const struct dentry *dentry)
+{
+ struct binderfs_info *info = dentry->d_sb->s_fs_info;
+
+ return info->control_dentry == dentry;
+}
+
+static int binderfs_rename(struct mnt_idmap *idmap,
+ struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ if (is_binderfs_control_device(old_dentry) ||
+ is_binderfs_control_device(new_dentry))
+ return -EPERM;
+
+ return simple_rename(idmap, old_dir, old_dentry, new_dir,
+ new_dentry, flags);
+}
+
+static int binderfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ if (is_binderfs_control_device(dentry))
+ return -EPERM;
+
+ return simple_unlink(dir, dentry);
+}
+
+static const struct file_operations binder_ctl_fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .unlocked_ioctl = binder_ctl_ioctl,
+ .compat_ioctl = binder_ctl_ioctl,
+ .llseek = noop_llseek,
+};
+
+/**
+ * binderfs_binder_ctl_create - create a new binder-control device
+ * @sb: super block of the binderfs mount
+ *
+ * This function creates a new binder-control device node in the binderfs mount
+ * referred to by @sb.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+static int binderfs_binder_ctl_create(struct super_block *sb)
+{
+ int minor, ret;
+ struct dentry *dentry;
+ struct binder_device *device;
+ struct inode *inode = NULL;
+ struct dentry *root = sb->s_root;
+ struct binderfs_info *info = sb->s_fs_info;
+#if defined(CONFIG_IPC_NS)
+ bool use_reserve = (info->ipc_ns == &init_ipc_ns);
+#else
+ bool use_reserve = true;
+#endif
+
+ device = kzalloc(sizeof(*device), GFP_KERNEL);
+ if (!device)
+ return -ENOMEM;
+
+ ret = -ENOMEM;
+ inode = new_inode(sb);
+ if (!inode)
+ goto out;
+
+ /* Reserve a new minor number for the new device. */
+ mutex_lock(&binderfs_minors_mutex);
+ minor = ida_alloc_max(&binderfs_minors,
+ use_reserve ? BINDERFS_MAX_MINOR :
+ BINDERFS_MAX_MINOR_CAPPED,
+ GFP_KERNEL);
+ mutex_unlock(&binderfs_minors_mutex);
+ if (minor < 0) {
+ ret = minor;
+ goto out;
+ }
+
+ inode->i_ino = SECOND_INODE;
+ simple_inode_init_ts(inode);
+ init_special_inode(inode, S_IFCHR | 0600,
+ MKDEV(MAJOR(binderfs_dev), minor));
+ inode->i_fop = &binder_ctl_fops;
+ inode->i_uid = info->root_uid;
+ inode->i_gid = info->root_gid;
+
+ refcount_set(&device->ref, 1);
+ device->binderfs_inode = inode;
+ device->miscdev.minor = minor;
+
+ dentry = d_alloc_name(root, "binder-control");
+ if (!dentry)
+ goto out;
+
+ inode->i_private = device;
+ info->control_dentry = dentry;
+ d_make_persistent(dentry, inode);
+ dput(dentry);
+
+ return 0;
+
+out:
+ kfree(device);
+ iput(inode);
+
+ return ret;
+}
+
+static const struct inode_operations binderfs_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .rename = binderfs_rename,
+ .unlink = binderfs_unlink,
+};
+
+static struct inode *binderfs_make_inode(struct super_block *sb, int mode)
+{
+ struct inode *ret;
+
+ ret = new_inode(sb);
+ if (ret) {
+ ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET);
+ ret->i_mode = mode;
+ simple_inode_init_ts(ret);
+ }
+ return ret;
+}
+
+struct dentry *binderfs_create_file(struct dentry *parent, const char *name,
+ const struct file_operations *fops,
+ void *data)
+{
+ struct dentry *dentry;
+ struct inode *new_inode, *parent_inode;
+ struct super_block *sb;
+
+ parent_inode = d_inode(parent);
+
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry))
+ return dentry;
+
+ sb = parent_inode->i_sb;
+ new_inode = binderfs_make_inode(sb, S_IFREG | 0444);
+ if (!new_inode) {
+ simple_done_creating(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ new_inode->i_fop = fops;
+ new_inode->i_private = data;
+ d_make_persistent(dentry, new_inode);
+ fsnotify_create(parent_inode, dentry);
+ simple_done_creating(dentry);
+ return dentry; // borrowed
+}
+
+static struct dentry *binderfs_create_dir(struct dentry *parent,
+ const char *name)
+{
+ struct dentry *dentry;
+ struct inode *new_inode, *parent_inode;
+ struct super_block *sb;
+
+ parent_inode = d_inode(parent);
+
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry))
+ return dentry;
+
+ sb = parent_inode->i_sb;
+ new_inode = binderfs_make_inode(sb, S_IFDIR | 0755);
+ if (!new_inode) {
+ simple_done_creating(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ new_inode->i_fop = &simple_dir_operations;
+ new_inode->i_op = &simple_dir_inode_operations;
+
+ set_nlink(new_inode, 2);
+ d_make_persistent(dentry, new_inode);
+ inc_nlink(parent_inode);
+ fsnotify_mkdir(parent_inode, dentry);
+ simple_done_creating(dentry);
+ return dentry;
+}
+
+static int binder_features_show(struct seq_file *m, void *unused)
+{
+ bool *feature = m->private;
+
+ seq_printf(m, "%d\n", *feature);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(binder_features);
+
+static int init_binder_features(struct super_block *sb)
+{
+ struct dentry *dentry, *dir;
+
+ dir = binderfs_create_dir(sb->s_root, "features");
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ dentry = binderfs_create_file(dir, "oneway_spam_detection",
+ &binder_features_fops,
+ &binder_features.oneway_spam_detection);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ dentry = binderfs_create_file(dir, "extended_error",
+ &binder_features_fops,
+ &binder_features.extended_error);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ dentry = binderfs_create_file(dir, "freeze_notification",
+ &binder_features_fops,
+ &binder_features.freeze_notification);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ dentry = binderfs_create_file(dir, "transaction_report",
+ &binder_features_fops,
+ &binder_features.transaction_report);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ return 0;
+}
+
+static int init_binder_logs(struct super_block *sb)
+{
+ struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir;
+ const struct binder_debugfs_entry *db_entry;
+ struct binderfs_info *info;
+ int ret = 0;
+
+ binder_logs_root_dir = binderfs_create_dir(sb->s_root,
+ "binder_logs");
+ if (IS_ERR(binder_logs_root_dir)) {
+ ret = PTR_ERR(binder_logs_root_dir);
+ goto out;
+ }
+
+ binder_for_each_debugfs_entry(db_entry) {
+ dentry = binderfs_create_file(binder_logs_root_dir,
+ db_entry->name,
+ db_entry->fops,
+ db_entry->data);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto out;
+ }
+ }
+
+ proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc");
+ if (IS_ERR(proc_log_dir)) {
+ ret = PTR_ERR(proc_log_dir);
+ goto out;
+ }
+ info = sb->s_fs_info;
+ info->proc_log_dir = proc_log_dir;
+
+out:
+ return ret;
+}
+
+static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ int ret;
+ struct binderfs_info *info;
+ struct binderfs_mount_opts *ctx = fc->fs_private;
+ struct inode *inode = NULL;
+ struct binderfs_device device_info = {};
+ const char *name;
+ size_t len;
+
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+
+ /*
+ * The binderfs filesystem can be mounted by userns root in a
+ * non-initial userns. By default such mounts have the SB_I_NODEV flag
+ * set in s_iflags to prevent security issues where userns root can
+ * just create random device nodes via mknod() since it owns the
+ * filesystem mount. But binderfs does not allow to create any files
+ * including devices nodes. The only way to create binder devices nodes
+ * is through the binder-control device which userns root is explicitly
+ * allowed to do. So removing the SB_I_NODEV flag from s_iflags is both
+ * necessary and safe.
+ */
+ sb->s_iflags &= ~SB_I_NODEV;
+ sb->s_iflags |= SB_I_NOEXEC;
+ sb->s_magic = BINDERFS_SUPER_MAGIC;
+ sb->s_op = &binderfs_super_ops;
+ sb->s_time_gran = 1;
+
+ sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL);
+ if (!sb->s_fs_info)
+ return -ENOMEM;
+ info = sb->s_fs_info;
+
+ info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
+
+ info->root_gid = make_kgid(sb->s_user_ns, 0);
+ if (!gid_valid(info->root_gid))
+ info->root_gid = GLOBAL_ROOT_GID;
+ info->root_uid = make_kuid(sb->s_user_ns, 0);
+ if (!uid_valid(info->root_uid))
+ info->root_uid = GLOBAL_ROOT_UID;
+ info->mount_opts.max = ctx->max;
+ info->mount_opts.stats_mode = ctx->stats_mode;
+
+ inode = new_inode(sb);
+ if (!inode)
+ return -ENOMEM;
+
+ inode->i_ino = FIRST_INODE;
+ inode->i_fop = &simple_dir_operations;
+ inode->i_mode = S_IFDIR | 0755;
+ simple_inode_init_ts(inode);
+ inode->i_op = &binderfs_dir_inode_operations;
+ set_nlink(inode, 2);
+
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
+ return -ENOMEM;
+
+ ret = binderfs_binder_ctl_create(sb);
+ if (ret)
+ return ret;
+
+ name = binder_devices_param;
+ for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) {
+ strscpy(device_info.name, name, len + 1);
+ ret = binderfs_binder_device_create(inode, NULL, &device_info);
+ if (ret)
+ return ret;
+ name += len;
+ if (*name == ',')
+ name++;
+ }
+
+ ret = init_binder_features(sb);
+ if (ret)
+ return ret;
+
+ if (info->mount_opts.stats_mode == binderfs_stats_mode_global)
+ return init_binder_logs(sb);
+
+ return 0;
+}
+
+static int binderfs_fs_context_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, binderfs_fill_super);
+}
+
+static void binderfs_fs_context_free(struct fs_context *fc)
+{
+ struct binderfs_mount_opts *ctx = fc->fs_private;
+
+ kfree(ctx);
+}
+
+static const struct fs_context_operations binderfs_fs_context_ops = {
+ .free = binderfs_fs_context_free,
+ .get_tree = binderfs_fs_context_get_tree,
+ .parse_param = binderfs_fs_context_parse_param,
+ .reconfigure = binderfs_fs_context_reconfigure,
+};
+
+static int binderfs_init_fs_context(struct fs_context *fc)
+{
+ struct binderfs_mount_opts *ctx;
+
+ ctx = kzalloc(sizeof(struct binderfs_mount_opts), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->max = BINDERFS_MAX_MINOR;
+ ctx->stats_mode = binderfs_stats_mode_unset;
+
+ fc->fs_private = ctx;
+ fc->ops = &binderfs_fs_context_ops;
+
+ return 0;
+}
+
+static void binderfs_kill_super(struct super_block *sb)
+{
+ struct binderfs_info *info = sb->s_fs_info;
+
+ /*
+ * During inode eviction struct binderfs_info is needed.
+ * So first wipe the super_block then free struct binderfs_info.
+ */
+ kill_anon_super(sb);
+
+ if (info && info->ipc_ns)
+ put_ipc_ns(info->ipc_ns);
+
+ kfree(info);
+}
+
+static struct file_system_type binder_fs_type = {
+ .name = "binder",
+ .init_fs_context = binderfs_init_fs_context,
+ .parameters = binderfs_fs_parameters,
+ .kill_sb = binderfs_kill_super,
+ .fs_flags = FS_USERNS_MOUNT,
+};
+
+int __init init_binderfs(void)
+{
+ int ret;
+ const char *name;
+ size_t len;
+
+ /* Verify that the default binderfs device names are valid. */
+ name = binder_devices_param;
+ for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) {
+ if (len > BINDERFS_MAX_NAME)
+ return -E2BIG;
+ name += len;
+ if (*name == ',')
+ name++;
+ }
+
+ /* Allocate new major number for binderfs. */
+ ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR,
+ "binder");
+ if (ret)
+ return ret;
+
+ ret = register_filesystem(&binder_fs_type);
+ if (ret) {
+ unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR);
+ return ret;
+ }
+
+ return ret;
+}
diff --git a/drivers/android/dbitmap.h b/drivers/android/dbitmap.h
new file mode 100644
index 000000000000..c7299ce8b374
--- /dev/null
+++ b/drivers/android/dbitmap.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2024 Google LLC
+ *
+ * dbitmap - dynamically sized bitmap library.
+ *
+ * Used by the binder driver to optimize the allocation of the smallest
+ * available descriptor ID. Each bit in the bitmap represents the state
+ * of an ID.
+ *
+ * A dbitmap can grow or shrink as needed. This part has been designed
+ * considering that users might need to briefly release their locks in
+ * order to allocate memory for the new bitmap. These operations then,
+ * are verified to determine if the grow or shrink is sill valid.
+ *
+ * This library does not provide protection against concurrent access
+ * by itself. Binder uses the proc->outer_lock for this purpose.
+ */
+
+#ifndef _LINUX_DBITMAP_H
+#define _LINUX_DBITMAP_H
+#include <linux/bitmap.h>
+
+#define NBITS_MIN BITS_PER_TYPE(unsigned long)
+
+struct dbitmap {
+ unsigned int nbits;
+ unsigned long *map;
+};
+
+static inline int dbitmap_enabled(struct dbitmap *dmap)
+{
+ return !!dmap->nbits;
+}
+
+static inline void dbitmap_free(struct dbitmap *dmap)
+{
+ dmap->nbits = 0;
+ kfree(dmap->map);
+ dmap->map = NULL;
+}
+
+/* Returns the nbits that a dbitmap can shrink to, 0 if not possible. */
+static inline unsigned int dbitmap_shrink_nbits(struct dbitmap *dmap)
+{
+ unsigned int bit;
+
+ if (dmap->nbits <= NBITS_MIN)
+ return 0;
+
+ /*
+ * Determine if the bitmap can shrink based on the position of
+ * its last set bit. If the bit is within the first quarter of
+ * the bitmap then shrinking is possible. In this case, the
+ * bitmap should shrink to half its current size.
+ */
+ bit = find_last_bit(dmap->map, dmap->nbits);
+ if (bit < (dmap->nbits >> 2))
+ return dmap->nbits >> 1;
+
+ /* find_last_bit() returns dmap->nbits when no bits are set. */
+ if (bit == dmap->nbits)
+ return NBITS_MIN;
+
+ return 0;
+}
+
+/* Replace the internal bitmap with a new one of different size */
+static inline void
+dbitmap_replace(struct dbitmap *dmap, unsigned long *new, unsigned int nbits)
+{
+ bitmap_copy(new, dmap->map, min(dmap->nbits, nbits));
+ kfree(dmap->map);
+ dmap->map = new;
+ dmap->nbits = nbits;
+}
+
+static inline void
+dbitmap_shrink(struct dbitmap *dmap, unsigned long *new, unsigned int nbits)
+{
+ if (!new)
+ return;
+
+ /*
+ * Verify that shrinking to @nbits is still possible. The @new
+ * bitmap might have been allocated without locks, so this call
+ * could now be outdated. In this case, free @new and move on.
+ */
+ if (!dbitmap_enabled(dmap) || dbitmap_shrink_nbits(dmap) != nbits) {
+ kfree(new);
+ return;
+ }
+
+ dbitmap_replace(dmap, new, nbits);
+}
+
+/* Returns the nbits that a dbitmap can grow to. */
+static inline unsigned int dbitmap_grow_nbits(struct dbitmap *dmap)
+{
+ return dmap->nbits << 1;
+}
+
+static inline void
+dbitmap_grow(struct dbitmap *dmap, unsigned long *new, unsigned int nbits)
+{
+ /*
+ * Verify that growing to @nbits is still possible. The @new
+ * bitmap might have been allocated without locks, so this call
+ * could now be outdated. In this case, free @new and move on.
+ */
+ if (!dbitmap_enabled(dmap) || nbits <= dmap->nbits) {
+ kfree(new);
+ return;
+ }
+
+ /*
+ * Check for ENOMEM after confirming the grow operation is still
+ * required. This ensures we only disable the dbitmap when it's
+ * necessary. Once the dbitmap is disabled, binder will fallback
+ * to slow_desc_lookup_olocked().
+ */
+ if (!new) {
+ dbitmap_free(dmap);
+ return;
+ }
+
+ dbitmap_replace(dmap, new, nbits);
+}
+
+/*
+ * Finds and sets the next zero bit in the bitmap. Upon success @bit
+ * is populated with the index and 0 is returned. Otherwise, -ENOSPC
+ * is returned to indicate that a dbitmap_grow() is needed.
+ */
+static inline int
+dbitmap_acquire_next_zero_bit(struct dbitmap *dmap, unsigned long offset,
+ unsigned long *bit)
+{
+ unsigned long n;
+
+ n = find_next_zero_bit(dmap->map, dmap->nbits, offset);
+ if (n == dmap->nbits)
+ return -ENOSPC;
+
+ *bit = n;
+ set_bit(n, dmap->map);
+
+ return 0;
+}
+
+static inline void
+dbitmap_clear_bit(struct dbitmap *dmap, unsigned long bit)
+{
+ clear_bit(bit, dmap->map);
+}
+
+static inline int dbitmap_init(struct dbitmap *dmap)
+{
+ dmap->map = bitmap_zalloc(NBITS_MIN, GFP_KERNEL);
+ if (!dmap->map) {
+ dmap->nbits = 0;
+ return -ENOMEM;
+ }
+
+ dmap->nbits = NBITS_MIN;
+
+ return 0;
+}
+#endif
diff --git a/drivers/android/tests/.kunitconfig b/drivers/android/tests/.kunitconfig
new file mode 100644
index 000000000000..39b76bab9d9a
--- /dev/null
+++ b/drivers/android/tests/.kunitconfig
@@ -0,0 +1,7 @@
+#
+# Copyright 2025 Google LLC.
+#
+
+CONFIG_KUNIT=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ANDROID_BINDER_ALLOC_KUNIT_TEST=y
diff --git a/drivers/android/tests/Makefile b/drivers/android/tests/Makefile
new file mode 100644
index 000000000000..27268418eb03
--- /dev/null
+++ b/drivers/android/tests/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright 2025 Google LLC.
+#
+
+obj-$(CONFIG_ANDROID_BINDER_ALLOC_KUNIT_TEST) += binder_alloc_kunit.o
diff --git a/drivers/android/tests/binder_alloc_kunit.c b/drivers/android/tests/binder_alloc_kunit.c
new file mode 100644
index 000000000000..7f9cc003bbe3
--- /dev/null
+++ b/drivers/android/tests/binder_alloc_kunit.c
@@ -0,0 +1,572 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test cases for binder allocator code.
+ *
+ * Copyright 2025 Google LLC.
+ * Author: Tiffany Yang <ynaffit@google.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <kunit/test.h>
+#include <linux/anon_inodes.h>
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/seq_buf.h>
+#include <linux/sizes.h>
+
+#include "../binder_alloc.h"
+#include "../binder_internal.h"
+
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
+
+#define BINDER_MMAP_SIZE SZ_128K
+
+#define BUFFER_NUM 5
+#define BUFFER_MIN_SIZE (PAGE_SIZE / 8)
+
+#define FREESEQ_BUFLEN ((3 * BUFFER_NUM) + 1)
+
+#define ALIGN_TYPE_STRLEN (12)
+
+#define ALIGNMENTS_BUFLEN (((ALIGN_TYPE_STRLEN + 6) * BUFFER_NUM) + 1)
+
+#define PRINT_ALL_CASES (0)
+
+/* 5^5 alignment combinations * 2 places to share pages * 5! free sequences */
+#define TOTAL_EXHAUSTIVE_CASES (3125 * 2 * 120)
+
+/**
+ * enum buf_end_align_type - Page alignment of a buffer
+ * end with regard to the end of the previous buffer.
+ *
+ * In the pictures below, buf2 refers to the buffer we
+ * are aligning. buf1 refers to previous buffer by addr.
+ * Symbol [ means the start of a buffer, ] means the end
+ * of a buffer, and | means page boundaries.
+ */
+enum buf_end_align_type {
+ /**
+ * @SAME_PAGE_UNALIGNED: The end of this buffer is on
+ * the same page as the end of the previous buffer and
+ * is not page aligned. Examples:
+ * buf1 ][ buf2 ][ ...
+ * buf1 ]|[ buf2 ][ ...
+ */
+ SAME_PAGE_UNALIGNED = 0,
+ /**
+ * @SAME_PAGE_ALIGNED: When the end of the previous buffer
+ * is not page aligned, the end of this buffer is on the
+ * same page as the end of the previous buffer and is page
+ * aligned. When the previous buffer is page aligned, the
+ * end of this buffer is aligned to the next page boundary.
+ * Examples:
+ * buf1 ][ buf2 ]| ...
+ * buf1 ]|[ buf2 ]| ...
+ */
+ SAME_PAGE_ALIGNED,
+ /**
+ * @NEXT_PAGE_UNALIGNED: The end of this buffer is on
+ * the page next to the end of the previous buffer and
+ * is not page aligned. Examples:
+ * buf1 ][ buf2 | buf2 ][ ...
+ * buf1 ]|[ buf2 | buf2 ][ ...
+ */
+ NEXT_PAGE_UNALIGNED,
+ /**
+ * @NEXT_PAGE_ALIGNED: The end of this buffer is on
+ * the page next to the end of the previous buffer and
+ * is page aligned. Examples:
+ * buf1 ][ buf2 | buf2 ]| ...
+ * buf1 ]|[ buf2 | buf2 ]| ...
+ */
+ NEXT_PAGE_ALIGNED,
+ /**
+ * @NEXT_NEXT_UNALIGNED: The end of this buffer is on
+ * the page that follows the page after the end of the
+ * previous buffer and is not page aligned. Examples:
+ * buf1 ][ buf2 | buf2 | buf2 ][ ...
+ * buf1 ]|[ buf2 | buf2 | buf2 ][ ...
+ */
+ NEXT_NEXT_UNALIGNED,
+ /**
+ * @LOOP_END: The number of enum values in &buf_end_align_type.
+ * It is used for controlling loop termination.
+ */
+ LOOP_END,
+};
+
+static const char *const buf_end_align_type_strs[LOOP_END] = {
+ [SAME_PAGE_UNALIGNED] = "SP_UNALIGNED",
+ [SAME_PAGE_ALIGNED] = " SP_ALIGNED ",
+ [NEXT_PAGE_UNALIGNED] = "NP_UNALIGNED",
+ [NEXT_PAGE_ALIGNED] = " NP_ALIGNED ",
+ [NEXT_NEXT_UNALIGNED] = "NN_UNALIGNED",
+};
+
+struct binder_alloc_test_case_info {
+ char alignments[ALIGNMENTS_BUFLEN];
+ struct seq_buf alignments_sb;
+ size_t *buffer_sizes;
+ int *free_sequence;
+ bool front_pages;
+};
+
+static void stringify_free_seq(struct kunit *test, int *seq, struct seq_buf *sb)
+{
+ int i;
+
+ for (i = 0; i < BUFFER_NUM; i++)
+ seq_buf_printf(sb, "[%d]", seq[i]);
+
+ KUNIT_EXPECT_FALSE(test, seq_buf_has_overflowed(sb));
+}
+
+static void stringify_alignments(struct kunit *test, int *alignments,
+ struct seq_buf *sb)
+{
+ int i;
+
+ for (i = 0; i < BUFFER_NUM; i++)
+ seq_buf_printf(sb, "[ %d:%s ]", i,
+ buf_end_align_type_strs[alignments[i]]);
+
+ KUNIT_EXPECT_FALSE(test, seq_buf_has_overflowed(sb));
+}
+
+static bool check_buffer_pages_allocated(struct kunit *test,
+ struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ size_t size)
+{
+ unsigned long page_addr;
+ unsigned long end;
+ int page_index;
+
+ end = PAGE_ALIGN(buffer->user_data + size);
+ page_addr = buffer->user_data;
+ for (; page_addr < end; page_addr += PAGE_SIZE) {
+ page_index = (page_addr - alloc->vm_start) / PAGE_SIZE;
+ if (!alloc->pages[page_index] ||
+ !list_empty(page_to_lru(alloc->pages[page_index]))) {
+ kunit_err(test, "expect alloc but is %s at page index %d\n",
+ alloc->pages[page_index] ?
+ "lru" : "free", page_index);
+ return false;
+ }
+ }
+ return true;
+}
+
+static unsigned long binder_alloc_test_alloc_buf(struct kunit *test,
+ struct binder_alloc *alloc,
+ struct binder_buffer *buffers[],
+ size_t *sizes, int *seq)
+{
+ unsigned long failures = 0;
+ int i;
+
+ for (i = 0; i < BUFFER_NUM; i++) {
+ buffers[i] = binder_alloc_new_buf(alloc, sizes[i], 0, 0, 0);
+ if (IS_ERR(buffers[i]) ||
+ !check_buffer_pages_allocated(test, alloc, buffers[i], sizes[i]))
+ failures++;
+ }
+
+ return failures;
+}
+
+static unsigned long binder_alloc_test_free_buf(struct kunit *test,
+ struct binder_alloc *alloc,
+ struct binder_buffer *buffers[],
+ size_t *sizes, int *seq, size_t end)
+{
+ unsigned long failures = 0;
+ int i;
+
+ for (i = 0; i < BUFFER_NUM; i++)
+ binder_alloc_free_buf(alloc, buffers[seq[i]]);
+
+ for (i = 0; i <= (end - 1) / PAGE_SIZE; i++) {
+ if (list_empty(page_to_lru(alloc->pages[i]))) {
+ kunit_err(test, "expect lru but is %s at page index %d\n",
+ alloc->pages[i] ? "alloc" : "free", i);
+ failures++;
+ }
+ }
+
+ return failures;
+}
+
+static unsigned long binder_alloc_test_free_page(struct kunit *test,
+ struct binder_alloc *alloc)
+{
+ unsigned long failures = 0;
+ unsigned long count;
+ int i;
+
+ while ((count = list_lru_count(alloc->freelist))) {
+ list_lru_walk(alloc->freelist, binder_alloc_free_page,
+ NULL, count);
+ }
+
+ for (i = 0; i < (alloc->buffer_size / PAGE_SIZE); i++) {
+ if (alloc->pages[i]) {
+ kunit_err(test, "expect free but is %s at page index %d\n",
+ list_empty(page_to_lru(alloc->pages[i])) ?
+ "alloc" : "lru", i);
+ failures++;
+ }
+ }
+
+ return failures;
+}
+
+/* Executes one full test run for the given test case. */
+static bool binder_alloc_test_alloc_free(struct kunit *test,
+ struct binder_alloc *alloc,
+ struct binder_alloc_test_case_info *tc,
+ size_t end)
+{
+ unsigned long pages = PAGE_ALIGN(end) / PAGE_SIZE;
+ struct binder_buffer *buffers[BUFFER_NUM];
+ unsigned long failures;
+ bool failed = false;
+
+ failures = binder_alloc_test_alloc_buf(test, alloc, buffers,
+ tc->buffer_sizes,
+ tc->free_sequence);
+ failed = failed || failures;
+ KUNIT_EXPECT_EQ_MSG(test, failures, 0,
+ "Initial allocation failed: %lu/%u buffers with errors",
+ failures, BUFFER_NUM);
+
+ failures = binder_alloc_test_free_buf(test, alloc, buffers,
+ tc->buffer_sizes,
+ tc->free_sequence, end);
+ failed = failed || failures;
+ KUNIT_EXPECT_EQ_MSG(test, failures, 0,
+ "Initial buffers not freed correctly: %lu/%lu pages not on lru list",
+ failures, pages);
+
+ /* Allocate from lru. */
+ failures = binder_alloc_test_alloc_buf(test, alloc, buffers,
+ tc->buffer_sizes,
+ tc->free_sequence);
+ failed = failed || failures;
+ KUNIT_EXPECT_EQ_MSG(test, failures, 0,
+ "Reallocation failed: %lu/%u buffers with errors",
+ failures, BUFFER_NUM);
+
+ failures = list_lru_count(alloc->freelist);
+ failed = failed || failures;
+ KUNIT_EXPECT_EQ_MSG(test, failures, 0,
+ "lru list should be empty after reallocation but still has %lu pages",
+ failures);
+
+ failures = binder_alloc_test_free_buf(test, alloc, buffers,
+ tc->buffer_sizes,
+ tc->free_sequence, end);
+ failed = failed || failures;
+ KUNIT_EXPECT_EQ_MSG(test, failures, 0,
+ "Reallocated buffers not freed correctly: %lu/%lu pages not on lru list",
+ failures, pages);
+
+ failures = binder_alloc_test_free_page(test, alloc);
+ failed = failed || failures;
+ KUNIT_EXPECT_EQ_MSG(test, failures, 0,
+ "Failed to clean up allocated pages: %lu/%lu pages still installed",
+ failures, (alloc->buffer_size / PAGE_SIZE));
+
+ return failed;
+}
+
+static bool is_dup(int *seq, int index, int val)
+{
+ int i;
+
+ for (i = 0; i < index; i++) {
+ if (seq[i] == val)
+ return true;
+ }
+ return false;
+}
+
+/* Generate BUFFER_NUM factorial free orders. */
+static void permute_frees(struct kunit *test, struct binder_alloc *alloc,
+ struct binder_alloc_test_case_info *tc,
+ unsigned long *runs, unsigned long *failures,
+ int index, size_t end)
+{
+ bool case_failed;
+ int i;
+
+ if (index == BUFFER_NUM) {
+ DECLARE_SEQ_BUF(freeseq_sb, FREESEQ_BUFLEN);
+
+ case_failed = binder_alloc_test_alloc_free(test, alloc, tc, end);
+ *runs += 1;
+ *failures += case_failed;
+
+ if (case_failed || PRINT_ALL_CASES) {
+ stringify_free_seq(test, tc->free_sequence,
+ &freeseq_sb);
+ kunit_err(test, "case %lu: [%s] | %s - %s - %s", *runs,
+ case_failed ? "FAILED" : "PASSED",
+ tc->front_pages ? "front" : "back ",
+ seq_buf_str(&tc->alignments_sb),
+ seq_buf_str(&freeseq_sb));
+ }
+
+ return;
+ }
+ for (i = 0; i < BUFFER_NUM; i++) {
+ if (is_dup(tc->free_sequence, index, i))
+ continue;
+ tc->free_sequence[index] = i;
+ permute_frees(test, alloc, tc, runs, failures, index + 1, end);
+ }
+}
+
+static void gen_buf_sizes(struct kunit *test,
+ struct binder_alloc *alloc,
+ struct binder_alloc_test_case_info *tc,
+ size_t *end_offset, unsigned long *runs,
+ unsigned long *failures)
+{
+ size_t last_offset, offset = 0;
+ size_t front_sizes[BUFFER_NUM];
+ size_t back_sizes[BUFFER_NUM];
+ int seq[BUFFER_NUM] = {0};
+ int i;
+
+ tc->free_sequence = seq;
+ for (i = 0; i < BUFFER_NUM; i++) {
+ last_offset = offset;
+ offset = end_offset[i];
+ front_sizes[i] = offset - last_offset;
+ back_sizes[BUFFER_NUM - i - 1] = front_sizes[i];
+ }
+ back_sizes[0] += alloc->buffer_size - end_offset[BUFFER_NUM - 1];
+
+ /*
+ * Buffers share the first or last few pages.
+ * Only BUFFER_NUM - 1 buffer sizes are adjustable since
+ * we need one giant buffer before getting to the last page.
+ */
+ tc->front_pages = true;
+ tc->buffer_sizes = front_sizes;
+ permute_frees(test, alloc, tc, runs, failures, 0,
+ end_offset[BUFFER_NUM - 1]);
+
+ tc->front_pages = false;
+ tc->buffer_sizes = back_sizes;
+ permute_frees(test, alloc, tc, runs, failures, 0, alloc->buffer_size);
+}
+
+static void gen_buf_offsets(struct kunit *test, struct binder_alloc *alloc,
+ size_t *end_offset, int *alignments,
+ unsigned long *runs, unsigned long *failures,
+ int index)
+{
+ size_t end, prev;
+ int align;
+
+ if (index == BUFFER_NUM) {
+ struct binder_alloc_test_case_info tc = {0};
+
+ seq_buf_init(&tc.alignments_sb, tc.alignments,
+ ALIGNMENTS_BUFLEN);
+ stringify_alignments(test, alignments, &tc.alignments_sb);
+
+ gen_buf_sizes(test, alloc, &tc, end_offset, runs, failures);
+ return;
+ }
+ prev = index == 0 ? 0 : end_offset[index - 1];
+ end = prev;
+
+ BUILD_BUG_ON(BUFFER_MIN_SIZE * BUFFER_NUM >= PAGE_SIZE);
+
+ for (align = SAME_PAGE_UNALIGNED; align < LOOP_END; align++) {
+ if (align % 2)
+ end = ALIGN(end, PAGE_SIZE);
+ else
+ end += BUFFER_MIN_SIZE;
+ end_offset[index] = end;
+ alignments[index] = align;
+ gen_buf_offsets(test, alloc, end_offset, alignments, runs,
+ failures, index + 1);
+ }
+}
+
+struct binder_alloc_test {
+ struct binder_alloc alloc;
+ struct list_lru binder_test_freelist;
+ struct file *filp;
+ unsigned long mmap_uaddr;
+};
+
+static void binder_alloc_test_init_freelist(struct kunit *test)
+{
+ struct binder_alloc_test *priv = test->priv;
+
+ KUNIT_EXPECT_PTR_EQ(test, priv->alloc.freelist,
+ &priv->binder_test_freelist);
+}
+
+static void binder_alloc_test_mmap(struct kunit *test)
+{
+ struct binder_alloc_test *priv = test->priv;
+ struct binder_alloc *alloc = &priv->alloc;
+ struct binder_buffer *buf;
+ struct rb_node *n;
+
+ KUNIT_EXPECT_EQ(test, alloc->mapped, true);
+ KUNIT_EXPECT_EQ(test, alloc->buffer_size, BINDER_MMAP_SIZE);
+
+ n = rb_first(&alloc->allocated_buffers);
+ KUNIT_EXPECT_PTR_EQ(test, n, NULL);
+
+ n = rb_first(&alloc->free_buffers);
+ buf = rb_entry(n, struct binder_buffer, rb_node);
+ KUNIT_EXPECT_EQ(test, binder_alloc_buffer_size(alloc, buf),
+ BINDER_MMAP_SIZE);
+ KUNIT_EXPECT_TRUE(test, list_is_last(&buf->entry, &alloc->buffers));
+}
+
+/**
+ * binder_alloc_exhaustive_test() - Exhaustively test alloc and free of buffer pages.
+ * @test: The test context object.
+ *
+ * Allocate BUFFER_NUM buffers to cover all page alignment cases,
+ * then free them in all orders possible. Check that pages are
+ * correctly allocated, put onto lru when buffers are freed, and
+ * are freed when binder_alloc_free_page() is called.
+ */
+static void binder_alloc_exhaustive_test(struct kunit *test)
+{
+ struct binder_alloc_test *priv = test->priv;
+ size_t end_offset[BUFFER_NUM];
+ int alignments[BUFFER_NUM];
+ unsigned long failures = 0;
+ unsigned long runs = 0;
+
+ gen_buf_offsets(test, &priv->alloc, end_offset, alignments, &runs,
+ &failures, 0);
+
+ KUNIT_EXPECT_EQ(test, runs, TOTAL_EXHAUSTIVE_CASES);
+ KUNIT_EXPECT_EQ(test, failures, 0);
+}
+
+/* ===== End test cases ===== */
+
+static void binder_alloc_test_vma_close(struct vm_area_struct *vma)
+{
+ struct binder_alloc *alloc = vma->vm_private_data;
+
+ binder_alloc_vma_close(alloc);
+}
+
+static const struct vm_operations_struct binder_alloc_test_vm_ops = {
+ .close = binder_alloc_test_vma_close,
+ .fault = binder_vm_fault,
+};
+
+static int binder_alloc_test_mmap_handler(struct file *filp,
+ struct vm_area_struct *vma)
+{
+ struct binder_alloc *alloc = filp->private_data;
+
+ vm_flags_mod(vma, VM_DONTCOPY | VM_MIXEDMAP, VM_MAYWRITE);
+
+ vma->vm_ops = &binder_alloc_test_vm_ops;
+ vma->vm_private_data = alloc;
+
+ return binder_alloc_mmap_handler(alloc, vma);
+}
+
+static const struct file_operations binder_alloc_test_fops = {
+ .mmap = binder_alloc_test_mmap_handler,
+};
+
+static int binder_alloc_test_init(struct kunit *test)
+{
+ struct binder_alloc_test *priv;
+ int ret;
+
+ priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+ test->priv = priv;
+
+ ret = list_lru_init(&priv->binder_test_freelist);
+ if (ret) {
+ kunit_err(test, "Failed to initialize test freelist\n");
+ return ret;
+ }
+
+ /* __binder_alloc_init requires mm to be attached */
+ ret = kunit_attach_mm();
+ if (ret) {
+ kunit_err(test, "Failed to attach mm\n");
+ return ret;
+ }
+ __binder_alloc_init(&priv->alloc, &priv->binder_test_freelist);
+
+ priv->filp = anon_inode_getfile("binder_alloc_kunit",
+ &binder_alloc_test_fops, &priv->alloc,
+ O_RDWR | O_CLOEXEC);
+ if (IS_ERR_OR_NULL(priv->filp)) {
+ kunit_err(test, "Failed to open binder alloc test driver file\n");
+ return priv->filp ? PTR_ERR(priv->filp) : -ENOMEM;
+ }
+
+ priv->mmap_uaddr = kunit_vm_mmap(test, priv->filp, 0, BINDER_MMAP_SIZE,
+ PROT_READ, MAP_PRIVATE | MAP_NORESERVE,
+ 0);
+ if (!priv->mmap_uaddr) {
+ kunit_err(test, "Could not map the test's transaction memory\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void binder_alloc_test_exit(struct kunit *test)
+{
+ struct binder_alloc_test *priv = test->priv;
+
+ /* Close the backing file to make sure binder_alloc_vma_close runs */
+ if (!IS_ERR_OR_NULL(priv->filp))
+ fput(priv->filp);
+
+ if (priv->alloc.mm)
+ binder_alloc_deferred_release(&priv->alloc);
+
+ /* Make sure freelist is empty */
+ KUNIT_EXPECT_EQ(test, list_lru_count(&priv->binder_test_freelist), 0);
+ list_lru_destroy(&priv->binder_test_freelist);
+}
+
+static struct kunit_case binder_alloc_test_cases[] = {
+ KUNIT_CASE(binder_alloc_test_init_freelist),
+ KUNIT_CASE(binder_alloc_test_mmap),
+ KUNIT_CASE_SLOW(binder_alloc_exhaustive_test),
+ {}
+};
+
+static struct kunit_suite binder_alloc_test_suite = {
+ .name = "binder_alloc",
+ .test_cases = binder_alloc_test_cases,
+ .init = binder_alloc_test_init,
+ .exit = binder_alloc_test_exit,
+};
+
+kunit_test_suite(binder_alloc_test_suite);
+
+MODULE_AUTHOR("Tiffany Yang <ynaffit@google.com>");
+MODULE_DESCRIPTION("Binder Alloc KUnit tests");
+MODULE_LICENSE("GPL");