summaryrefslogtreecommitdiff
path: root/fs/overlayfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/overlayfs/inode.c')
-rw-r--r--fs/overlayfs/inode.c188
1 files changed, 121 insertions, 67 deletions
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 3b1bd469accd..6e3815fb006b 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -16,13 +16,6 @@
#include "overlayfs.h"
-static dev_t ovl_get_pseudo_dev(struct dentry *dentry)
-{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- return oe->lowerstack[0].layer->pseudo_dev;
-}
-
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
{
int err;
@@ -66,6 +59,69 @@ out:
return err;
}
+static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
+ struct ovl_layer *lower_layer)
+{
+ bool samefs = ovl_same_sb(dentry->d_sb);
+ unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
+
+ if (samefs) {
+ /*
+ * When all layers are on the same fs, all real inode
+ * number are unique, so we use the overlay st_dev,
+ * which is friendly to du -x.
+ */
+ stat->dev = dentry->d_sb->s_dev;
+ return 0;
+ } else if (xinobits) {
+ unsigned int shift = 64 - xinobits;
+ /*
+ * All inode numbers of underlying fs should not be using the
+ * high xinobits, so we use high xinobits to partition the
+ * overlay st_ino address space. The high bits holds the fsid
+ * (upper fsid is 0). This way overlay inode numbers are unique
+ * and all inodes use overlay st_dev. Inode numbers are also
+ * persistent for a given layer configuration.
+ */
+ if (stat->ino >> shift) {
+ pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
+ dentry, stat->ino, xinobits);
+ } else {
+ if (lower_layer)
+ stat->ino |= ((u64)lower_layer->fsid) << shift;
+
+ stat->dev = dentry->d_sb->s_dev;
+ return 0;
+ }
+ }
+
+ /* The inode could not be mapped to a unified st_ino address space */
+ if (S_ISDIR(dentry->d_inode->i_mode)) {
+ /*
+ * Always use the overlay st_dev for directories, so 'find
+ * -xdev' will scan the entire overlay mount and won't cross the
+ * overlay mount boundaries.
+ *
+ * If not all layers are on the same fs the pair {real st_ino;
+ * overlay st_dev} is not unique, so use the non persistent
+ * overlay st_ino for directories.
+ */
+ stat->dev = dentry->d_sb->s_dev;
+ stat->ino = dentry->d_inode->i_ino;
+ } else if (lower_layer && lower_layer->fsid) {
+ /*
+ * For non-samefs setup, if we cannot map all layers st_ino
+ * to a unified address space, we need to make sure that st_dev
+ * is unique per lower fs. Upper layer uses real st_dev and
+ * lower layers use the unique anonymous bdev assigned to the
+ * lower fs.
+ */
+ stat->dev = lower_layer->fs->pseudo_dev;
+ }
+
+ return 0;
+}
+
int ovl_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -75,6 +131,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
const struct cred *old_cred;
bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
bool samefs = ovl_same_sb(dentry->d_sb);
+ struct ovl_layer *lower_layer = NULL;
int err;
type = ovl_path_real(dentry, &realpath);
@@ -84,14 +141,18 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
goto out;
/*
- * For non-dir or same fs, we use st_ino of the copy up origin, if we
- * know it. This guaranties constant st_dev/st_ino across copy up.
+ * For non-dir or same fs, we use st_ino of the copy up origin.
+ * This guaranties constant st_dev/st_ino across copy up.
+ * With xino feature and non-samefs, we use st_ino of the copy up
+ * origin masked with high bits that represent the layer id.
*
- * If filesystem supports NFS export ops, this also guaranties
+ * If lower filesystem supports NFS file handles, this also guaranties
* persistent st_ino across mount cycle.
*/
- if (!is_dir || samefs) {
- if (OVL_TYPE_ORIGIN(type)) {
+ if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) {
+ if (!OVL_TYPE_UPPER(type)) {
+ lower_layer = ovl_layer_lower(dentry);
+ } else if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
@@ -118,43 +179,17 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
*/
if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
(!ovl_verify_lower(dentry->d_sb) &&
- (is_dir || lowerstat.nlink == 1)))
+ (is_dir || lowerstat.nlink == 1))) {
stat->ino = lowerstat.ino;
-
- if (samefs)
- WARN_ON_ONCE(stat->dev != lowerstat.dev);
- else
- stat->dev = ovl_get_pseudo_dev(dentry);
- }
- if (samefs) {
- /*
- * When all layers are on the same fs, all real inode
- * number are unique, so we use the overlay st_dev,
- * which is friendly to du -x.
- */
- stat->dev = dentry->d_sb->s_dev;
- } else if (!OVL_TYPE_UPPER(type)) {
- /*
- * For non-samefs setup, to make sure that st_dev/st_ino
- * pair is unique across the system, we use a unique
- * anonymous st_dev for lower layer inode.
- */
- stat->dev = ovl_get_pseudo_dev(dentry);
+ lower_layer = ovl_layer_lower(dentry);
+ }
}
- } else {
- /*
- * Always use the overlay st_dev for directories, so 'find
- * -xdev' will scan the entire overlay mount and won't cross the
- * overlay mount boundaries.
- *
- * If not all layers are on the same fs the pair {real st_ino;
- * overlay st_dev} is not unique, so use the non persistent
- * overlay st_ino for directories.
- */
- stat->dev = dentry->d_sb->s_dev;
- stat->ino = dentry->d_inode->i_ino;
}
+ err = ovl_map_dev_ino(dentry, stat, lower_layer);
+ if (err)
+ goto out;
+
/*
* It's probably not worth it to count subdirs to get the
* correct link count. nlink=1 seems to pacify 'find' and
@@ -383,24 +418,18 @@ int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
int ovl_update_time(struct inode *inode, struct timespec *ts, int flags)
{
- struct dentry *alias;
- struct path upperpath;
-
- if (!(flags & S_ATIME))
- return 0;
-
- alias = d_find_any_alias(inode);
- if (!alias)
- return 0;
-
- ovl_path_upper(alias, &upperpath);
- if (upperpath.dentry) {
- touch_atime(&upperpath);
- inode->i_atime = d_inode(upperpath.dentry)->i_atime;
+ if (flags & S_ATIME) {
+ struct ovl_fs *ofs = inode->i_sb->s_fs_info;
+ struct path upperpath = {
+ .mnt = ofs->upper_mnt,
+ .dentry = ovl_upperdentry_dereference(OVL_I(inode)),
+ };
+
+ if (upperpath.dentry) {
+ touch_atime(&upperpath);
+ inode->i_atime = d_inode(upperpath.dentry)->i_atime;
+ }
}
-
- dput(alias);
-
return 0;
}
@@ -459,9 +488,27 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
#endif
}
-static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
+static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
+ unsigned long ino, int fsid)
{
- inode->i_ino = get_next_ino();
+ int xinobits = ovl_xino_bits(inode->i_sb);
+
+ /*
+ * When NFS export is enabled and d_ino is consistent with st_ino
+ * (samefs or i_ino has enough bits to encode layer), set the same
+ * value used for d_ino to i_ino, because nfsd readdirplus compares
+ * d_ino values to i_ino values of child entries. When called from
+ * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
+ * upper inode i_ino on ovl_inode_init() or ovl_inode_update().
+ */
+ if (inode->i_sb->s_export_op &&
+ (ovl_same_sb(inode->i_sb) || xinobits)) {
+ inode->i_ino = ino;
+ if (xinobits && fsid && !(ino >> (64 - xinobits)))
+ inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
+ } else {
+ inode->i_ino = get_next_ino();
+ }
inode->i_mode = mode;
inode->i_flags |= S_NOCMTIME;
#ifdef CONFIG_FS_POSIX_ACL
@@ -597,7 +644,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
inode = new_inode(sb);
if (inode)
- ovl_fill_inode(inode, mode, rdev);
+ ovl_fill_inode(inode, mode, rdev, 0, 0);
return inode;
}
@@ -703,13 +750,16 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
}
struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
- struct dentry *lowerdentry, struct dentry *index,
+ struct ovl_path *lowerpath, struct dentry *index,
unsigned int numlower)
{
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
+ struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL;
bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index);
+ int fsid = bylower ? lowerpath->layer->fsid : 0;
bool is_dir;
+ unsigned long ino = 0;
if (!realinode)
realinode = d_inode(lowerdentry);
@@ -748,18 +798,22 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
if (!is_dir)
nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
set_nlink(inode, nlink);
+ ino = key->i_ino;
} else {
/* Lower hardlink that will be broken on copy up */
inode = new_inode(sb);
if (!inode)
goto out_nomem;
}
- ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+ ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
ovl_inode_init(inode, upperdentry, lowerdentry);
if (upperdentry && ovl_is_impuredir(upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
+ if (index)
+ ovl_set_flag(OVL_INDEX, inode);
+
/* Check for non-merge dir that may have whiteouts */
if (is_dir) {
if (((upperdentry && lowerdentry) || numlower > 1) ||