summaryrefslogtreecommitdiff
path: root/fs/overlayfs
diff options
context:
space:
mode:
authorAmir Goldstein <amir73il@gmail.com>2020-02-21 16:34:44 +0200
committerMiklos Szeredi <mszeredi@redhat.com>2020-03-27 16:51:02 +0100
commitdfe51d47b7eeb5642ed92558b62eeff558f00eda (patch)
treefe0716c7482cdaa3395aa306375c90715ffaf412 /fs/overlayfs
parent4d314f7859dc1925ee28b4d4e74f6f3a80e6f34b (diff)
ovl: avoid possible inode number collisions with xino=on
When xino feature is enabled and a real directory inode number overflows the lower xino bits, we cannot map this directory inode number to a unique and persistent inode number and we fall back to the real inode st_ino and overlay st_dev. The real inode st_ino with high bits may collide with a lower inode number on overlay st_dev that was mapped using xino. To avoid possible collision with legitimate xino values, map a non persistent inode number to a dedicated range in the xino address space. The dedicated range is created by adding one more bit to the number of reserved high xino bits. We could have added just one more fsid, but that would have had the undesired effect of changing persistent overlay inode numbers on kernel or require more complex xino mapping code. Signed-off-by: Amir Goldstein <amir73il@gmail.com> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Diffstat (limited to 'fs/overlayfs')
-rw-r--r--fs/overlayfs/inode.c39
-rw-r--r--fs/overlayfs/readdir.c10
-rw-r--r--fs/overlayfs/super.c13
3 files changed, 45 insertions, 17 deletions
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 8130ae54a332..2d6251a4caaf 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -79,6 +79,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
{
bool samefs = ovl_same_fs(dentry->d_sb);
unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
+ unsigned int xinoshift = 64 - xinobits;
if (samefs) {
/*
@@ -89,20 +90,20 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
stat->dev = dentry->d_sb->s_dev;
return 0;
} else if (xinobits) {
- unsigned int shift = 64 - xinobits;
/*
* All inode numbers of underlying fs should not be using the
* high xinobits, so we use high xinobits to partition the
* overlay st_ino address space. The high bits holds the fsid
- * (upper fsid is 0). This way overlay inode numbers are unique
- * and all inodes use overlay st_dev. Inode numbers are also
- * persistent for a given layer configuration.
+ * (upper fsid is 0). The lowest xinobit is reserved for mapping
+ * the non-peresistent inode numbers range in case of overflow.
+ * This way all overlay inode numbers are unique and use the
+ * overlay st_dev.
*/
- if (stat->ino >> shift) {
+ if (unlikely(stat->ino >> xinoshift)) {
pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
dentry, stat->ino, xinobits);
} else {
- stat->ino |= ((u64)fsid) << shift;
+ stat->ino |= ((u64)fsid) << (xinoshift + 1);
stat->dev = dentry->d_sb->s_dev;
return 0;
}
@@ -573,6 +574,7 @@ static void ovl_next_ino(struct inode *inode)
static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid)
{
int xinobits = ovl_xino_bits(inode->i_sb);
+ unsigned int xinoshift = 64 - xinobits;
/*
* When d_ino is consistent with st_ino (samefs or i_ino has enough
@@ -582,11 +584,28 @@ static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid)
* with d_ino also causes nfsd readdirplus to fail.
*/
inode->i_ino = ino;
- if (ovl_same_dev(inode->i_sb)) {
- if (xinobits && fsid && !(ino >> (64 - xinobits)))
- inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
- } else if (S_ISDIR(inode->i_mode)) {
+ if (ovl_same_fs(inode->i_sb)) {
+ return;
+ } else if (xinobits && likely(!(ino >> xinoshift))) {
+ inode->i_ino |= (unsigned long)fsid << (xinoshift + 1);
+ return;
+ }
+
+ /*
+ * For directory inodes on non-samefs with xino disabled or xino
+ * overflow, we allocate a non-persistent inode number, to be used for
+ * resolving st_ino collisions in ovl_map_dev_ino().
+ *
+ * To avoid ino collision with legitimate xino values from upper
+ * layer (fsid 0), use the lowest xinobit to map the non
+ * persistent inode numbers to the unified st_ino address space.
+ */
+ if (S_ISDIR(inode->i_mode)) {
ovl_next_ino(inode);
+ if (xinobits) {
+ inode->i_ino &= ~0UL >> xinobits;
+ inode->i_ino |= 1UL << xinoshift;
+ }
}
}
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 40ac9ce2465a..6325dcc4c48b 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -440,13 +440,19 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
const char *name, int namelen)
{
- if (ino >> (64 - xinobits)) {
+ unsigned int xinoshift = 64 - xinobits;
+
+ if (unlikely(ino >> xinoshift)) {
pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
namelen, name, ino, xinobits);
return ino;
}
- return ino | ((u64)fsid) << (64 - xinobits);
+ /*
+ * The lowest xinobit is reserved for mapping the non-peresistent inode
+ * numbers range, but this range is only exposed via st_ino, not here.
+ */
+ return ino | ((u64)fsid) << (xinoshift + 1);
}
/*
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 1f1d40fc5e33..53dd094f970e 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1483,7 +1483,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
* free high bits in underlying fs to hold the unique fsid.
* If overlayfs does encounter underlying inodes using the high xino
* bits reserved for fsid, it emits a warning and uses the original
- * inode number.
+ * inode number or a non persistent inode number allocated from a
+ * dedicated range.
*/
if (ofs->numfs - !ofs->upper_mnt == 1) {
if (ofs->config.xino == OVL_XINO_ON)
@@ -1494,11 +1495,13 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
} else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
/*
* This is a roundup of number of bits needed for encoding
- * fsid, where fsid 0 is reserved for upper fs even with
- * lower only overlay.
+ * fsid, where fsid 0 is reserved for upper fs (even with
+ * lower only overlay) +1 extra bit is reserved for the non
+ * persistent inode number range that is used for resolving
+ * xino lower bits overflow.
*/
- BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
- ofs->xino_mode = ilog2(ofs->numfs - 1) + 1;
+ BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
+ ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
}
if (ofs->xino_mode > 0) {