summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/cachefiles/namei.c12
-rw-r--r--fs/file_table.c91
-rw-r--r--fs/internal.h5
-rw-r--r--fs/namei.c24
-rw-r--r--fs/open.c76
-rw-r--r--fs/overlayfs/file.c8
-rw-r--r--fs/overlayfs/overlayfs.h5
-rw-r--r--include/linux/fs.h42
-rw-r--r--include/linux/fsnotify.h4
9 files changed, 205 insertions, 62 deletions
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 66482c193e86..d9d22d0ec38a 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -451,10 +451,10 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
ret = cachefiles_inject_write_error();
if (ret == 0) {
- file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath,
- S_IFREG | 0600,
- O_RDWR | O_LARGEFILE | O_DIRECT,
- cache->cache_cred);
+ file = kernel_tmpfile_open(&nop_mnt_idmap, &parentpath,
+ S_IFREG | 0600,
+ O_RDWR | O_LARGEFILE | O_DIRECT,
+ cache->cache_cred);
ret = PTR_ERR_OR_ZERO(file);
}
if (ret) {
@@ -561,8 +561,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
*/
path.mnt = cache->mnt;
path.dentry = dentry;
- file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
- d_backing_inode(dentry), cache->cache_cred);
+ file = kernel_file_open(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
+ d_backing_inode(dentry), cache->cache_cred);
if (IS_ERR(file)) {
trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
PTR_ERR(file),
diff --git a/fs/file_table.c b/fs/file_table.c
index 372653b92617..e06c68e2d757 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -44,18 +44,40 @@ static struct kmem_cache *filp_cachep __read_mostly;
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
+/* Container for backing file with optional real path */
+struct backing_file {
+ struct file file;
+ struct path real_path;
+};
+
+static inline struct backing_file *backing_file(struct file *f)
+{
+ return container_of(f, struct backing_file, file);
+}
+
+struct path *backing_file_real_path(struct file *f)
+{
+ return &backing_file(f)->real_path;
+}
+EXPORT_SYMBOL_GPL(backing_file_real_path);
+
static void file_free_rcu(struct rcu_head *head)
{
struct file *f = container_of(head, struct file, f_rcuhead);
put_cred(f->f_cred);
- kmem_cache_free(filp_cachep, f);
+ if (unlikely(f->f_mode & FMODE_BACKING))
+ kfree(backing_file(f));
+ else
+ kmem_cache_free(filp_cachep, f);
}
static inline void file_free(struct file *f)
{
security_file_free(f);
- if (!(f->f_mode & FMODE_NOACCOUNT))
+ if (unlikely(f->f_mode & FMODE_BACKING))
+ path_put(backing_file_real_path(f));
+ if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
percpu_counter_dec(&nr_files);
call_rcu(&f->f_rcuhead, file_free_rcu);
}
@@ -131,20 +153,15 @@ static int __init init_fs_stat_sysctls(void)
fs_initcall(init_fs_stat_sysctls);
#endif
-static struct file *__alloc_file(int flags, const struct cred *cred)
+static int init_file(struct file *f, int flags, const struct cred *cred)
{
- struct file *f;
int error;
- f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
- if (unlikely(!f))
- return ERR_PTR(-ENOMEM);
-
f->f_cred = get_cred(cred);
error = security_file_alloc(f);
if (unlikely(error)) {
file_free_rcu(&f->f_rcuhead);
- return ERR_PTR(error);
+ return error;
}
atomic_long_set(&f->f_count, 1);
@@ -155,7 +172,7 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
f->f_mode = OPEN_FMODE(flags);
/* f->f_version: 0 */
- return f;
+ return 0;
}
/* Find an unused file structure and return a pointer to it.
@@ -172,6 +189,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
{
static long old_max;
struct file *f;
+ int error;
/*
* Privileged users can go above max_files
@@ -185,9 +203,15 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
goto over;
}
- f = __alloc_file(flags, cred);
- if (!IS_ERR(f))
- percpu_counter_inc(&nr_files);
+ f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+ if (unlikely(!f))
+ return ERR_PTR(-ENOMEM);
+
+ error = init_file(f, flags, cred);
+ if (unlikely(error))
+ return ERR_PTR(error);
+
+ percpu_counter_inc(&nr_files);
return f;
@@ -203,18 +227,51 @@ over:
/*
* Variant of alloc_empty_file() that doesn't check and modify nr_files.
*
- * Should not be used unless there's a very good reason to do so.
+ * This is only for kernel internal use, and the allocate file must not be
+ * installed into file tables or such.
*/
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
{
- struct file *f = __alloc_file(flags, cred);
+ struct file *f;
+ int error;
- if (!IS_ERR(f))
- f->f_mode |= FMODE_NOACCOUNT;
+ f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+ if (unlikely(!f))
+ return ERR_PTR(-ENOMEM);
+
+ error = init_file(f, flags, cred);
+ if (unlikely(error))
+ return ERR_PTR(error);
+
+ f->f_mode |= FMODE_NOACCOUNT;
return f;
}
+/*
+ * Variant of alloc_empty_file() that allocates a backing_file container
+ * and doesn't check and modify nr_files.
+ *
+ * This is only for kernel internal use, and the allocate file must not be
+ * installed into file tables or such.
+ */
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
+{
+ struct backing_file *ff;
+ int error;
+
+ ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL);
+ if (unlikely(!ff))
+ return ERR_PTR(-ENOMEM);
+
+ error = init_file(&ff->file, flags, cred);
+ if (unlikely(error))
+ return ERR_PTR(error);
+
+ ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
+ return &ff->file;
+}
+
/**
* alloc_file - allocate and initialize a 'struct file'
*
diff --git a/fs/internal.h b/fs/internal.h
index 5cec33cdf70f..f7a3dc111026 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,8 +97,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
/*
* file_table.c
*/
-extern struct file *alloc_empty_file(int, const struct cred *);
-extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
+struct file *alloc_empty_file(int flags, const struct cred *cred);
+struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
static inline void put_file_access(struct file *file)
{
diff --git a/fs/namei.c b/fs/namei.c
index 6a5e26a529e1..91171da719c5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3703,7 +3703,7 @@ static int vfs_tmpfile(struct mnt_idmap *idmap,
}
/**
- * vfs_tmpfile_open - open a tmpfile for kernel internal use
+ * kernel_tmpfile_open - open a tmpfile for kernel internal use
* @idmap: idmap of the mount the inode was found from
* @parentpath: path of the base directory
* @mode: mode of the new tmpfile
@@ -3714,24 +3714,26 @@ static int vfs_tmpfile(struct mnt_idmap *idmap,
* hence this is only for kernel internal use, and must not be installed into
* file tables or such.
*/
-struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
- const struct path *parentpath,
- umode_t mode, int open_flag, const struct cred *cred)
+struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
+ const struct path *parentpath,
+ umode_t mode, int open_flag,
+ const struct cred *cred)
{
struct file *file;
int error;
file = alloc_empty_file_noaccount(open_flag, cred);
- if (!IS_ERR(file)) {
- error = vfs_tmpfile(idmap, parentpath, file, mode);
- if (error) {
- fput(file);
- file = ERR_PTR(error);
- }
+ if (IS_ERR(file))
+ return file;
+
+ error = vfs_tmpfile(idmap, parentpath, file, mode);
+ if (error) {
+ fput(file);
+ file = ERR_PTR(error);
}
return file;
}
-EXPORT_SYMBOL(vfs_tmpfile_open);
+EXPORT_SYMBOL(kernel_tmpfile_open);
static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
diff --git a/fs/open.c b/fs/open.c
index fa5d53282dfe..fb07b2840eb4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1108,23 +1108,77 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
}
EXPORT_SYMBOL(dentry_create);
-struct file *open_with_fake_path(const struct path *path, int flags,
+/**
+ * kernel_file_open - open a file for kernel internal use
+ * @path: path of the file to open
+ * @flags: open flags
+ * @inode: the inode
+ * @cred: credentials for open
+ *
+ * Open a file for use by in-kernel consumers. The file is not accounted
+ * against nr_files and must not be installed into the file descriptor
+ * table.
+ *
+ * Return: Opened file on success, an error pointer on failure.
+ */
+struct file *kernel_file_open(const struct path *path, int flags,
struct inode *inode, const struct cred *cred)
{
- struct file *f = alloc_empty_file_noaccount(flags, cred);
- if (!IS_ERR(f)) {
- int error;
+ struct file *f;
+ int error;
- f->f_path = *path;
- error = do_dentry_open(f, inode, NULL);
- if (error) {
- fput(f);
- f = ERR_PTR(error);
- }
+ f = alloc_empty_file_noaccount(flags, cred);
+ if (IS_ERR(f))
+ return f;
+
+ f->f_path = *path;
+ error = do_dentry_open(f, inode, NULL);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
}
return f;
}
-EXPORT_SYMBOL(open_with_fake_path);
+EXPORT_SYMBOL_GPL(kernel_file_open);
+
+/**
+ * backing_file_open - open a backing file for kernel internal use
+ * @path: path of the file to open
+ * @flags: open flags
+ * @path: path of the backing file
+ * @cred: credentials for open
+ *
+ * Open a backing file for a stackable filesystem (e.g., overlayfs).
+ * @path may be on the stackable filesystem and backing inode on the
+ * underlying filesystem. In this case, we want to be able to return
+ * the @real_path of the backing inode. This is done by embedding the
+ * returned file into a container structure that also stores the path of
+ * the backing inode on the underlying filesystem, which can be
+ * retrieved using backing_file_real_path().
+ */
+struct file *backing_file_open(const struct path *path, int flags,
+ const struct path *real_path,
+ const struct cred *cred)
+{
+ struct file *f;
+ int error;
+
+ f = alloc_empty_backing_file(flags, cred);
+ if (IS_ERR(f))
+ return f;
+
+ f->f_path = *path;
+ path_get(real_path);
+ *backing_file_real_path(f) = *real_path;
+ error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
+ }
+
+ return f;
+}
+EXPORT_SYMBOL_GPL(backing_file_open);
#define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE))
#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0801917f932e..dbbb156c2d67 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -34,8 +34,8 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
return 'm';
}
-/* No atime modification nor notify on underlying */
-#define OVL_OPEN_FLAGS (O_NOATIME | __FMODE_NONOTIFY)
+/* No atime modification on underlying */
+#define OVL_OPEN_FLAGS (O_NOATIME)
static struct file *ovl_open_realfile(const struct file *file,
const struct path *realpath)
@@ -61,8 +61,8 @@ static struct file *ovl_open_realfile(const struct file *file,
if (!inode_owner_or_capable(real_idmap, realinode))
flags &= ~O_NOATIME;
- realfile = open_with_fake_path(&file->f_path, flags, realinode,
- current_cred());
+ realfile = backing_file_open(&file->f_path, flags, realpath,
+ current_cred());
}
revert_creds(old_cred);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4d0b278f5630..23686e8a06c4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -329,8 +329,9 @@ static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs,
struct dentry *dentry, umode_t mode)
{
struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry };
- struct file *file = vfs_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, mode,
- O_LARGEFILE | O_WRONLY, current_cred());
+ struct file *file = kernel_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path,
+ mode, O_LARGEFILE | O_WRONLY,
+ current_cred());
int err = PTR_ERR_OR_ZERO(file);
pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 66f105ef3427..4ca804f2b3ed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -171,6 +171,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File supports non-exclusive O_DIRECT writes from multiple threads */
#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000)
+/* File is embedded in backing_file object */
+#define FMODE_BACKING ((__force fmode_t)0x2000000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
@@ -1678,9 +1681,12 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap,
WHITEOUT_DEV);
}
-struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
- const struct path *parentpath,
- umode_t mode, int open_flag, const struct cred *cred);
+struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
+ const struct path *parentpath,
+ umode_t mode, int open_flag,
+ const struct cred *cred);
+struct file *kernel_file_open(const struct path *path, int flags,
+ struct inode *inode, const struct cred *cred);
int vfs_mkobj(struct dentry *, umode_t,
int (*f)(struct dentry *, umode_t, void *),
@@ -2355,11 +2361,31 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
name, flags, mode);
}
-extern struct file * dentry_open(const struct path *, int, const struct cred *);
-extern struct file *dentry_create(const struct path *path, int flags,
- umode_t mode, const struct cred *cred);
-extern struct file * open_with_fake_path(const struct path *, int,
- struct inode*, const struct cred *);
+struct file *dentry_open(const struct path *path, int flags,
+ const struct cred *creds);
+struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+ const struct cred *cred);
+struct file *backing_file_open(const struct path *path, int flags,
+ const struct path *real_path,
+ const struct cred *cred);
+struct path *backing_file_real_path(struct file *f);
+
+/*
+ * file_real_path - get the path corresponding to f_inode
+ *
+ * When opening a backing file for a stackable filesystem (e.g.,
+ * overlayfs) f_path may be on the stackable filesystem and f_inode on
+ * the underlying filesystem. When the path associated with f_inode is
+ * needed, this helper should be used instead of accessing f_path
+ * directly.
+*/
+static inline const struct path *file_real_path(struct file *f)
+{
+ if (unlikely(f->f_mode & FMODE_BACKING))
+ return backing_file_real_path(f);
+ return &f->f_path;
+}
+
static inline struct file *file_clone_open(struct file *file)
{
return dentry_open(&file->f_path, file->f_flags, file->f_cred);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index bb8467cd11ae..ed48e4f1e755 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -91,11 +91,13 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
static inline int fsnotify_file(struct file *file, __u32 mask)
{
- const struct path *path = &file->f_path;
+ const struct path *path;
if (file->f_mode & FMODE_NONOTIFY)
return 0;
+ /* Overlayfs internal files have fake f_path */
+ path = file_real_path(file);
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
}