From 41e327b586762833e48b3703d53312ac32f05f24 Mon Sep 17 00:00:00 2001
From: "zhangyi (F)" <yi.zhang@huawei.com>
Date: Mon, 7 Aug 2017 21:35:05 +0800
Subject: quota: correct space limit check

Currently we compare total space (curspace + rsvspace)
with space limit in quota-tools when setting grace time
and also in check_bdq(), but we missing rsvspace in
somewhere else, correct them. This patch also fix incorrect
zero dqb_btime and grace time updating failure when we use
rsvspace(e.g. ext4 dalloc feature).

Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 53a17496c5c5..566e6ef99f07 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1124,6 +1124,10 @@ void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
 		WARN_ON_ONCE(1);
 		dquot->dq_dqb.dqb_rsvspace = 0;
 	}
+	if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <=
+	    dquot->dq_dqb.dqb_bsoftlimit)
+		dquot->dq_dqb.dqb_btime = (time64_t) 0;
+	clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 }
 
 static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
@@ -1145,7 +1149,8 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number)
 		dquot->dq_dqb.dqb_curspace -= number;
 	else
 		dquot->dq_dqb.dqb_curspace = 0;
-	if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
+	if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <=
+	    dquot->dq_dqb.dqb_bsoftlimit)
 		dquot->dq_dqb.dqb_btime = (time64_t) 0;
 	clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 }
@@ -1381,14 +1386,18 @@ static int info_idq_free(struct dquot *dquot, qsize_t inodes)
 
 static int info_bdq_free(struct dquot *dquot, qsize_t space)
 {
+	qsize_t tspace;
+
+	tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace;
+
 	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
-	    dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
+	    tspace <= dquot->dq_dqb.dqb_bsoftlimit)
 		return QUOTA_NL_NOWARN;
 
-	if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
+	if (tspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
 		return QUOTA_NL_BSOFTBELOW;
-	if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit &&
-	    dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit)
+	if (tspace >= dquot->dq_dqb.dqb_bhardlimit &&
+	    tspace - space < dquot->dq_dqb.dqb_bhardlimit)
 		return QUOTA_NL_BHARDBELOW;
 	return QUOTA_NL_NOWARN;
 }
@@ -2681,7 +2690,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di)
 
 	if (check_blim) {
 		if (!dm->dqb_bsoftlimit ||
-		    dm->dqb_curspace < dm->dqb_bsoftlimit) {
+		    dm->dqb_curspace + dm->dqb_rsvspace < dm->dqb_bsoftlimit) {
 			dm->dqb_btime = 0;
 			clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 		} else if (!(di->d_fieldmask & QC_SPC_TIMER))
-- 
cgit 


From c44245b3d5435f533ca8346ece65918f84c057f9 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Fri, 11 Aug 2017 09:00:06 -0700
Subject: xfs: fix inobt inode allocation search optimization

When we try to allocate a free inode by searching the inobt, we try to
find the inode nearest the parent inode by searching chunks both left
and right of the chunk containing the parent. As an optimization, we
cache the leftmost and rightmost records that we previously searched; if
we do another allocation with the same parent inode, we'll pick up the
search where it last left off.

There's a bug in the case where we found a free inode to the left of the
parent's chunk: we need to update the cached left and right records, but
because we already reassigned the right record to point to the left, we
end up assigning the left record to both the cached left and right
records.

This isn't a correctness problem strictly, but it can result in the next
allocation rechecking chunks unnecessarily or allocating inodes further
away from the parent than it needs to. Fix it by swapping the record
pointer after we update the cached left and right records.

Fixes: bd169565993b ("xfs: speed up free inode search")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_ialloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index ffd5a15d1bb6..abf5beaae907 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1246,13 +1246,13 @@ xfs_dialloc_ag_inobt(
 
 			/* free inodes to the left? */
 			if (useleft && trec.ir_freecount) {
-				rec = trec;
 				xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 				cur = tcur;
 
 				pag->pagl_leftrec = trec.ir_startino;
 				pag->pagl_rightrec = rec.ir_startino;
 				pag->pagl_pagino = pagino;
+				rec = trec;
 				goto alloc_inode;
 			}
 
-- 
cgit 


From e28ae8e428fefe2facd72cea9f29906ecb9c861d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2017 12:45:35 -0700
Subject: iomap: fix integer truncation issues in the zeroing and dirtying
 helpers

Fix the min_t calls in the zeroing and dirtying helpers to perform the
comparisms on 64-bit types, which prevents them from incorrectly
being truncated, and larger zeroing operations being stuck in a never
ending loop.

Special thanks to Markus Stockhausen for spotting the bug.

Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 039266128b7f..59cc98ad7577 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -278,7 +278,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		unsigned long bytes;	/* Bytes to write to page */
 
 		offset = (pos & (PAGE_SIZE - 1));
-		bytes = min_t(unsigned long, PAGE_SIZE - offset, length);
+		bytes = min_t(loff_t, PAGE_SIZE - offset, length);
 
 		rpage = __iomap_read_page(inode, pos);
 		if (IS_ERR(rpage))
@@ -373,7 +373,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
 		unsigned offset, bytes;
 
 		offset = pos & (PAGE_SIZE - 1); /* Within page */
-		bytes = min_t(unsigned, PAGE_SIZE - offset, count);
+		bytes = min_t(loff_t, PAGE_SIZE - offset, count);
 
 		if (IS_DAX(inode))
 			status = iomap_dax_zero(pos, offset, bytes, iomap);
-- 
cgit 


From b80b32b6d5e79798b85cd4644206aaa069059390 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 14 Aug 2017 08:29:18 -0400
Subject: ext4: fix clang build regression

Arnd Bergmann <arnd@arndb.de>

As Stefan pointed out, I misremembered what clang can do specifically,
and it turns out that the variable-length array at the end of the
structure did not work (a flexible array would have worked here
but not solved the problem):

fs/ext4/mballoc.c:2303:17: error: fields must have a constant size:
'variable length array in structure' extension will never be supported
                ext4_grpblk_t counters[blocksize_bits + 2];

This reverts part of my previous patch, using a fixed-size array
again, but keeping the check for the array overflow.

Fixes: 2df2c3402fc8 ("ext4: fix warning about stack corruption")
Reported-by: Stefan Agner <stefan@agner.ch>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5a1052627a81..701085620cd8 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2300,7 +2300,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
 					     EXT4_MAX_BLOCK_LOG_SIZE);
 	struct sg {
 		struct ext4_group_info info;
-		ext4_grpblk_t counters[blocksize_bits + 2];
+		ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
 	} sg;
 
 	group--;
@@ -2309,6 +2309,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
 			      " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
 			      " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
 
+	i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
+		sizeof(struct ext4_group_info);
+
 	grinfo = ext4_get_group_info(sb, group);
 	/* Load the group info in memory only if not already loaded. */
 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
@@ -2320,7 +2323,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
 		buddy_loaded = 1;
 	}
 
-	memcpy(&sg, ext4_get_group_info(sb, group), sizeof(sg));
+	memcpy(&sg, ext4_get_group_info(sb, group), i);
 
 	if (buddy_loaded)
 		ext4_mb_unload_buddy(&e4b);
-- 
cgit 


From 32aaf194201e98db4235b7b71ac62a22e2ac355f Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Mon, 14 Aug 2017 08:30:06 -0400
Subject: ext4: add missing xattr hash update

When updating an extended attribute, if the padded value sizes are the
same, a shortcut is taken to avoid the bulk of the work. This was fine
until the xattr hash update was moved inside ext4_xattr_set_entry().
With that change, the hash update got missed in the shortcut case.

Thanks to ZhangYi (yizhang089@gmail.com) for root causing the problem.

Fixes: daf8328172df ("ext4: eliminate xattr entry e_hash recalculation for removes")

Reported-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/xattr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 82a5af9f6668..3dd970168448 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1543,7 +1543,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
 			/* Clear padding bytes. */
 			memset(val + i->value_len, 0, new_size - i->value_len);
 		}
-		return 0;
+		goto update_hash;
 	}
 
 	/* Compute min_offs and last. */
@@ -1707,6 +1707,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
 		here->e_value_size = cpu_to_le32(i->value_len);
 	}
 
+update_hash:
 	if (i->value) {
 		__le32 hash = 0;
 
@@ -1725,7 +1726,8 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
 						     here->e_name_len,
 						     &crc32c_hash, 1);
 		} else if (is_block) {
-			__le32 *value = s->base + min_offs - new_size;
+			__le32 *value = s->base + le16_to_cpu(
+							here->e_value_offs);
 
 			hash = ext4_xattr_hash_entry(here->e_name,
 						     here->e_name_len, value,
-- 
cgit 


From 01578e36163cdd0e4fd61d9976de15f13364e26d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 15 Aug 2017 17:40:11 +0200
Subject: x86/elf: Remove the unnecessary ADDR_NO_RANDOMIZE checks

The ADDR_NO_RANDOMIZE checks in stack_maxrandom_size() and
randomize_stack_top() are not required.

PF_RANDOMIZE is set by load_elf_binary() only if ADDR_NO_RANDOMIZE is not
set, no need to re-check after that.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: stable@vger.kernel.org
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Link: http://lkml.kernel.org/r/20170815154011.GB1076@redhat.com
---
 fs/binfmt_elf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 879ff9c7ffd0..6466153f2bf0 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -664,8 +664,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
 {
 	unsigned long random_variable = 0;
 
-	if ((current->flags & PF_RANDOMIZE) &&
-		!(current->personality & ADDR_NO_RANDOMIZE)) {
+	if (current->flags & PF_RANDOMIZE) {
 		random_variable = get_random_long();
 		random_variable &= STACK_RND_MASK;
 		random_variable <<= PAGE_SHIFT;
-- 
cgit 


From c8c03f1858331e85d397bacccd34ef409aae993c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 16 Aug 2017 17:08:07 -0700
Subject: pty: fix the cached path of the pty slave file descriptor in the
 master

Christian Brauner reported that if you use the TIOCGPTPEER ioctl() to
get a slave pty file descriptor, the resulting file descriptor doesn't
look right in /proc/<pid>/fd/<fd>.  In particular, he wanted to use
readlink() on /proc/self/fd/<fd> to get the pathname of the slave pty
(basically implementing "ptsname{_r}()").

The reason for that was that we had generated the wrong 'struct path'
when we create the pty in ptmx_open().

In particular, the dentry was correct, but the vfsmount pointed to the
mount of the ptmx node. That _can_ be correct - in case you use
"/dev/pts/ptmx" to open the master - but usually is not.  The normal
case is to use /dev/ptmx, which then looks up the pts/ directory, and
then the vfsmount of the ptmx node is obviously the /dev directory, not
the /dev/pts/ directory.

We actually did have the right vfsmount available, but in the wrong
place (it gets looked up in 'devpts_acquire()' when we get a reference
to the pts filesystem), and so ptmx_open() used the wrong mnt pointer.

The end result of this confusion was that the pty worked fine, but when
if you did TIOCGPTPEER to get the slave side of the pty, end end result
would also work, but have that dodgy 'struct path'.

And then when doing "d_path()" on to get the pathname, the vfsmount
would not match the root of the pts directory, and d_path() would return
an empty pathname thinking that the entry had escaped a bind mount into
another mount.

This fixes the problem by making devpts_acquire() return the vfsmount
for the pts filesystem, allowing ptmx_open() to trivially just use the
right mount for the pts dentry, and create the proper 'struct path'.

Reported-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/devpts/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 108df2e3602c..44dfbca9306f 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -133,7 +133,7 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
-struct pts_fs_info *devpts_acquire(struct file *filp)
+struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt)
 {
 	struct pts_fs_info *result;
 	struct path path;
@@ -142,6 +142,7 @@ struct pts_fs_info *devpts_acquire(struct file *filp)
 
 	path = filp->f_path;
 	path_get(&path);
+	*ptsmnt = NULL;
 
 	/* Has the devpts filesystem already been found? */
 	sb = path.mnt->mnt_sb;
@@ -165,6 +166,7 @@ struct pts_fs_info *devpts_acquire(struct file *filp)
 	 * pty code needs to hold extra references in case of last /dev/tty close
 	 */
 	atomic_inc(&sb->s_active);
+	*ptsmnt = mntget(path.mnt);
 	result = DEVPTS_SB(sb);
 
 out:
-- 
cgit 


From 8204f8ddaafafcae074746fcf2a05a45e6827603 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 10 Aug 2017 14:20:28 -0700
Subject: xfs: clear MS_ACTIVE after finishing log recovery

Way back when we established inode block-map redo log items, it was
discovered that we needed to prevent the VFS from evicting inodes during
log recovery because any given inode might be have bmap redo items to
replay even if the inode has no link count and is ultimately deleted,
and any eviction of an unlinked inode causes the inode to be truncated
and freed too early.

To make this possible, we set MS_ACTIVE so that inodes would not be torn
down immediately upon release.  Unfortunately, this also results in the
quota inodes not being released at all if a later part of the mount
process should fail, because we never reclaim the inodes.  So, set
MS_ACTIVE right before we do the last part of log recovery and clear it
immediately after we finish the log recovery so that everything
will be torn down properly if we abort the mount.

Fixes: 17c12bcd30 ("xfs: when replaying bmap operations, don't let unlinked inodes get reaped")
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_log.c   | 11 +++++++++++
 fs/xfs/xfs_mount.c | 10 ----------
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 0053bcf2b10a..4ebd0bafc914 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -749,9 +749,20 @@ xfs_log_mount_finish(
 		return 0;
 	}
 
+	/*
+	 * During the second phase of log recovery, we need iget and
+	 * iput to behave like they do for an active filesystem.
+	 * xfs_fs_drop_inode needs to be able to prevent the deletion
+	 * of inodes before we're done replaying log items on those
+	 * inodes.  Turn it off immediately after recovery finishes
+	 * so that we don't leak the quota inodes if subsequent mount
+	 * activities fail.
+	 */
+	mp->m_super->s_flags |= MS_ACTIVE;
 	error = xlog_recover_finish(mp->m_log);
 	if (!error)
 		xfs_log_work_queue(mp);
+	mp->m_super->s_flags &= ~MS_ACTIVE;
 
 	return error;
 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 40d4e8b4e193..151a82db0945 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -944,15 +944,6 @@ xfs_mountfs(
 		}
 	}
 
-	/*
-	 * During the second phase of log recovery, we need iget and
-	 * iput to behave like they do for an active filesystem.
-	 * xfs_fs_drop_inode needs to be able to prevent the deletion
-	 * of inodes before we're done replaying log items on those
-	 * inodes.
-	 */
-	mp->m_super->s_flags |= MS_ACTIVE;
-
 	/*
 	 * Finish recovering the file system.  This part needed to be delayed
 	 * until after the root and real-time bitmap inodes were consistently
@@ -1028,7 +1019,6 @@ xfs_mountfs(
  out_quota:
 	xfs_qm_unmount_quotas(mp);
  out_rtunmount:
-	mp->m_super->s_flags &= ~MS_ACTIVE;
 	xfs_rtunmount_inodes(mp);
  out_rele_rip:
 	IRELE(rip);
-- 
cgit 


From 77aff8c76425c8f49b50d0b9009915066739e7d2 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 10 Aug 2017 14:20:29 -0700
Subject: xfs: don't leak quotacheck dquots when cow recovery

If we fail a mount on account of cow recovery errors, it's possible that
a previous quotacheck left some dquots in memory.  The bailout clause of
xfs_mountfs forgets to purge these, and so we leak them.  Fix that.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_mount.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 151a82db0945..ea7d4b4e50d0 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1024,6 +1024,8 @@ xfs_mountfs(
 	IRELE(rip);
 	cancel_delayed_work_sync(&mp->m_reclaim_work);
 	xfs_reclaim_inodes(mp, SYNC_WAIT);
+	/* Clean out dquots that might be in memory after quotacheck. */
+	xfs_qm_unmount(mp);
  out_log_dealloc:
 	mp->m_flags |= XFS_MOUNT_UNMOUNTING;
 	xfs_log_mount_cancel(mp);
-- 
cgit 


From 42bec214d8bd432be6d32a1acb0a9079ecd4d142 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Thu, 3 Aug 2017 13:09:03 +0530
Subject: cifs: Fix df output for users with quota limits

The df for a SMB2 share triggers a GetInfo call for
FS_FULL_SIZE_INFORMATION. The values returned are used to populate
struct statfs.

The problem is that none of the information returned by the call
contains the total blocks available on the filesystem. Instead we use
the blocks available to the user ie. quota limitation when filling out
statfs.f_blocks. The information returned does contain Actual free units
on the filesystem and is used to populate statfs.f_bfree. For users with
quota enabled, it can lead to situations where the total free space
reported is more than the total blocks on the system ending up with df
reports like the following

 # df -h /mnt/a
Filesystem         Size  Used Avail Use% Mounted on
//192.168.22.10/a  2.5G -2.3G  2.5G    - /mnt/a

To fix this problem, we instead populate both statfs.f_bfree with the
same value as statfs.f_bavail ie. CallerAvailableAllocationUnits. This
is similar to what is done already in the code for cifs and df now
reports the quota information for the user used to mount the share.

 # df --si /mnt/a
Filesystem         Size  Used Avail Use% Mounted on
//192.168.22.10/a  2.7G  101M  2.6G   4% /mnt/a

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Pierguido Lambri <plambri@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Cc: <stable@vger.kernel.org>
---
 fs/cifs/smb2pdu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5fb2fc2d0080..97edb4d376cd 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3219,8 +3219,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
 	kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) *
 			  le32_to_cpu(pfs_inf->SectorsPerAllocationUnit);
 	kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits);
-	kst->f_bfree  = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits);
-	kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
+	kst->f_bfree  = kst->f_bavail =
+			le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
 	return;
 }
 
-- 
cgit 


From d3edede29f74d335f81d95a4588f5f136a9f7dcf Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 23 Aug 2017 14:48:14 +1000
Subject: cifs: return ENAMETOOLONG for overlong names in
 cifs_open()/cifs_lookup()

Add checking for the path component length and verify it is <= the maximum
that the server advertizes via FileFsAttributeInformation.

With this patch cifs.ko will now return ENAMETOOLONG instead of ENOENT
when users to access an overlong path.

To test this, try to cd into a (non-existing) directory on a CIFS share
that has a too long name:
cd /mnt/aaaaaaaaaaaaaaa...

and it now should show a good error message from the shell:
bash: cd: /mnt/aaaaaaaaaaaaaaaa...aaaaaa: File name too long

rh bz 1153996

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Cc: <stable@vger.kernel.org>
---
 fs/cifs/dir.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 56366e984076..569d3fb736be 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -194,15 +194,20 @@ cifs_bp_rename_retry:
 }
 
 /*
+ * Don't allow path components longer than the server max.
  * Don't allow the separator character in a path component.
  * The VFS will not allow "/", but "\" is allowed by posix.
  */
 static int
-check_name(struct dentry *direntry)
+check_name(struct dentry *direntry, struct cifs_tcon *tcon)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
 	int i;
 
+	if (unlikely(direntry->d_name.len >
+		     tcon->fsAttrInfo.MaxPathNameComponentLength))
+		return -ENAMETOOLONG;
+
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
 		for (i = 0; i < direntry->d_name.len; i++) {
 			if (direntry->d_name.name[i] == '\\') {
@@ -500,10 +505,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 		return finish_no_open(file, res);
 	}
 
-	rc = check_name(direntry);
-	if (rc)
-		return rc;
-
 	xid = get_xid();
 
 	cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
@@ -516,6 +517,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	}
 
 	tcon = tlink_tcon(tlink);
+
+	rc = check_name(direntry, tcon);
+	if (rc)
+		goto out_free_xid;
+
 	server = tcon->ses->server;
 
 	if (server->ops->new_lease_key)
@@ -776,7 +782,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	}
 	pTcon = tlink_tcon(tlink);
 
-	rc = check_name(direntry);
+	rc = check_name(direntry, pTcon);
 	if (rc)
 		goto lookup_out;
 
-- 
cgit 


From 143c97cc652949893c8056c679012f0aeccb80e5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 23 Aug 2017 18:16:11 -0700
Subject: Revert "pty: fix the cached path of the pty slave file descriptor in
 the master"

This reverts commit c8c03f1858331e85d397bacccd34ef409aae993c.

It turns out that while fixing the ptmx file descriptor to have the
correct 'struct path' to the associated slave pty is a really good
thing, it breaks some user space tools for a very annoying reason.

The problem is that /dev/ptmx and its associated slave pty (/dev/pts/X)
are on different mounts.  That was what caused us to have the wrong path
in the first place (we would mix up the vfsmount of the 'ptmx' node,
with the dentry of the pty slave node), but it also means that now while
we use the right vfsmount, having the pty master open also keeps the pts
mount busy.

And it turn sout that that makes 'pbuilder' very unhappy, as noted by
Stefan Lippers-Hollmann:

 "This patch introduces a regression for me when using pbuilder
  0.228.7[2] (a helper to build Debian packages in a chroot and to
  create and update its chroots) when trying to umount /dev/ptmx (inside
  the chroot) on Debian/ unstable (full log and pbuilder configuration
  file[3] attached).

  [...]
  Setting up build-essential (12.3) ...
  Processing triggers for libc-bin (2.24-15) ...
  I: unmounting dev/ptmx filesystem
  W: Could not unmount dev/ptmx: umount: /var/cache/pbuilder/build/1340/dev/ptmx: target is busy
          (In some cases useful info about processes that
           use the device is found by lsof(8) or fuser(1).)"

apparently pbuilder tries to unmount the /dev/pts filesystem while still
holding at least one master node open, which is arguably not very nice,
but we don't break user space even when fixing other bugs.

So this commit has to be reverted.

I'll try to figure out a way to avoid caching the path to the slave pty
in the master pty.  The only thing that actually wants that slave pty
path is the "TIOCGPTPEER" ioctl, and I think we could just recreate the
path at that time.

Reported-by: Stefan Lippers-Hollmann <s.l-h@gmx.de>
Cc: Eric W Biederman <ebiederm@xmission.com>
Cc: Christian Brauner <christian.brauner@canonical.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/devpts/inode.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 44dfbca9306f..108df2e3602c 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -133,7 +133,7 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
-struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt)
+struct pts_fs_info *devpts_acquire(struct file *filp)
 {
 	struct pts_fs_info *result;
 	struct path path;
@@ -142,7 +142,6 @@ struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt)
 
 	path = filp->f_path;
 	path_get(&path);
-	*ptsmnt = NULL;
 
 	/* Has the devpts filesystem already been found? */
 	sb = path.mnt->mnt_sb;
@@ -166,7 +165,6 @@ struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt)
 	 * pty code needs to hold extra references in case of last /dev/tty close
 	 */
 	atomic_inc(&sb->s_active);
-	*ptsmnt = mntget(path.mnt);
 	result = DEVPTS_SB(sb);
 
 out:
-- 
cgit 


From 58efbc9f5463308c43d812fd0748a4dee44c8a16 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 22 Aug 2017 23:45:59 -0700
Subject: Btrfs: fix blk_status_t/errno confusion

This fixes several instances of blk_status_t and bare errno ints being
mixed up, some of which are real bugs.

In the normal case, 0 matches BLK_STS_OK, so we don't observe any
effects of the missing conversion, but in case of errors or passes
through the repair/retry paths, the errors get mixed up.

The changes were identified using 'sparse', we don't have reports of the
buggy behaviour.

Fixes: 4e4cbee93d56 ("block: switch bios to blk_status_t")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c |  4 ++--
 fs/btrfs/inode.c   | 70 +++++++++++++++++++++++++++++-------------------------
 fs/btrfs/raid56.c  | 34 +++++++++++++-------------
 fs/btrfs/volumes.c | 10 ++++----
 fs/btrfs/volumes.h |  6 ++---
 5 files changed, 64 insertions(+), 60 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 080e2ebb8aa0..f45b61fe9a9a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3516,7 +3516,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device)
 	struct bio *bio = device->flush_bio;
 
 	if (!device->flush_bio_sent)
-		return 0;
+		return BLK_STS_OK;
 
 	device->flush_bio_sent = 0;
 	wait_for_completion_io(&device->flush_wait);
@@ -3563,7 +3563,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
 			continue;
 
 		write_dev_flush(dev);
-		dev->last_flush_error = 0;
+		dev->last_flush_error = BLK_STS_OK;
 	}
 
 	/* wait for all the barriers */
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 95c212037095..24bcd5cd9cf2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7924,11 +7924,12 @@ err:
 	return ret;
 }
 
-static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
-					int mirror_num)
+static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
+						 struct bio *bio,
+						 int mirror_num)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	int ret;
+	blk_status_t ret;
 
 	BUG_ON(bio_op(bio) == REQ_OP_WRITE);
 
@@ -7980,10 +7981,10 @@ static int btrfs_check_dio_repairable(struct inode *inode,
 	return 1;
 }
 
-static int dio_read_error(struct inode *inode, struct bio *failed_bio,
-			struct page *page, unsigned int pgoff,
-			u64 start, u64 end, int failed_mirror,
-			bio_end_io_t *repair_endio, void *repair_arg)
+static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
+				   struct page *page, unsigned int pgoff,
+				   u64 start, u64 end, int failed_mirror,
+				   bio_end_io_t *repair_endio, void *repair_arg)
 {
 	struct io_failure_record *failrec;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -7993,18 +7994,19 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 	int read_mode = 0;
 	int segs;
 	int ret;
+	blk_status_t status;
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
 	ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
 	if (ret)
-		return ret;
+		return errno_to_blk_status(ret);
 
 	ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
 					 failed_mirror);
 	if (!ret) {
 		free_io_failure(failure_tree, io_tree, failrec);
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 
 	segs = bio_segments(failed_bio);
@@ -8022,13 +8024,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 		    "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
 		    read_mode, failrec->this_mirror, failrec->in_validation);
 
-	ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
-	if (ret) {
+	status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
+	if (status) {
 		free_io_failure(failure_tree, io_tree, failrec);
 		bio_put(bio);
 	}
 
-	return ret;
+	return status;
 }
 
 struct btrfs_retry_complete {
@@ -8065,8 +8067,8 @@ end:
 	bio_put(bio);
 }
 
-static int __btrfs_correct_data_nocsum(struct inode *inode,
-				       struct btrfs_io_bio *io_bio)
+static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode,
+						struct btrfs_io_bio *io_bio)
 {
 	struct btrfs_fs_info *fs_info;
 	struct bio_vec bvec;
@@ -8076,8 +8078,8 @@ static int __btrfs_correct_data_nocsum(struct inode *inode,
 	unsigned int pgoff;
 	u32 sectorsize;
 	int nr_sectors;
-	int ret;
-	int err = 0;
+	blk_status_t ret;
+	blk_status_t err = BLK_STS_OK;
 
 	fs_info = BTRFS_I(inode)->root->fs_info;
 	sectorsize = fs_info->sectorsize;
@@ -8183,11 +8185,12 @@ static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
 	int csum_pos;
 	bool uptodate = (err == 0);
 	int ret;
+	blk_status_t status;
 
 	fs_info = BTRFS_I(inode)->root->fs_info;
 	sectorsize = fs_info->sectorsize;
 
-	err = 0;
+	err = BLK_STS_OK;
 	start = io_bio->logical;
 	done.inode = inode;
 	io_bio->bio.bi_iter = io_bio->iter;
@@ -8209,12 +8212,12 @@ try_again:
 		done.start = start;
 		init_completion(&done.done);
 
-		ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
-				pgoff, start, start + sectorsize - 1,
-				io_bio->mirror_num,
-				btrfs_retry_endio, &done);
-		if (ret) {
-			err = errno_to_blk_status(ret);
+		status = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
+					pgoff, start, start + sectorsize - 1,
+					io_bio->mirror_num, btrfs_retry_endio,
+					&done);
+		if (status) {
+			err = status;
 			goto next;
 		}
 
@@ -8250,7 +8253,7 @@ static blk_status_t btrfs_subio_endio_read(struct inode *inode,
 		if (unlikely(err))
 			return __btrfs_correct_data_nocsum(inode, io_bio);
 		else
-			return 0;
+			return BLK_STS_OK;
 	} else {
 		return __btrfs_subio_endio_read(inode, io_bio, err);
 	}
@@ -8423,9 +8426,9 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
 	return 0;
 }
 
-static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
-					 u64 file_offset, int skip_sum,
-					 int async_submit)
+static inline blk_status_t
+__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
+		       int skip_sum, int async_submit)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
@@ -8488,6 +8491,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
 	int clone_offset = 0;
 	int clone_len;
 	int ret;
+	blk_status_t status;
 
 	map_length = orig_bio->bi_iter.bi_size;
 	submit_len = map_length;
@@ -8537,9 +8541,9 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
 		 */
 		atomic_inc(&dip->pending_bios);
 
-		ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
-					     async_submit);
-		if (ret) {
+		status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
+						async_submit);
+		if (status) {
 			bio_put(bio);
 			atomic_dec(&dip->pending_bios);
 			goto out_err;
@@ -8557,9 +8561,9 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
 	} while (submit_len > 0);
 
 submit:
-	ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
-				     async_submit);
-	if (!ret)
+	status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
+					async_submit);
+	if (!status)
 		return 0;
 
 	bio_put(bio);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 208638384cd2..2cf6ba40f7c4 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -905,7 +905,7 @@ static void raid_write_end_io(struct bio *bio)
 	if (!atomic_dec_and_test(&rbio->stripes_pending))
 		return;
 
-	err = 0;
+	err = BLK_STS_OK;
 
 	/* OK, we have read all the stripes we need to. */
 	max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
@@ -1324,7 +1324,7 @@ write_data:
 	return;
 
 cleanup:
-	rbio_orig_end_io(rbio, -EIO);
+	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 }
 
 /*
@@ -1475,7 +1475,7 @@ static void raid_rmw_end_io(struct bio *bio)
 
 cleanup:
 
-	rbio_orig_end_io(rbio, -EIO);
+	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 }
 
 static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
@@ -1579,7 +1579,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
 	return 0;
 
 cleanup:
-	rbio_orig_end_io(rbio, -EIO);
+	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 	return -EIO;
 
 finish:
@@ -1795,12 +1795,12 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 	void **pointers;
 	int faila = -1, failb = -1;
 	struct page *page;
-	int err;
+	blk_status_t err;
 	int i;
 
 	pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
 	if (!pointers) {
-		err = -ENOMEM;
+		err = BLK_STS_RESOURCE;
 		goto cleanup_io;
 	}
 
@@ -1856,7 +1856,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 					 * a bad data or Q stripe.
 					 * TODO, we should redo the xor here.
 					 */
-					err = -EIO;
+					err = BLK_STS_IOERR;
 					goto cleanup;
 				}
 				/*
@@ -1882,7 +1882,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 			if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
 				if (rbio->bbio->raid_map[faila] ==
 				    RAID5_P_STRIPE) {
-					err = -EIO;
+					err = BLK_STS_IOERR;
 					goto cleanup;
 				}
 				/*
@@ -1954,13 +1954,13 @@ pstripe:
 		}
 	}
 
-	err = 0;
+	err = BLK_STS_OK;
 cleanup:
 	kfree(pointers);
 
 cleanup_io:
 	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
-		if (err == 0)
+		if (err == BLK_STS_OK)
 			cache_rbio_pages(rbio);
 		else
 			clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -1968,7 +1968,7 @@ cleanup_io:
 		rbio_orig_end_io(rbio, err);
 	} else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
 		rbio_orig_end_io(rbio, err);
-	} else if (err == 0) {
+	} else if (err == BLK_STS_OK) {
 		rbio->faila = -1;
 		rbio->failb = -1;
 
@@ -2005,7 +2005,7 @@ static void raid_recover_end_io(struct bio *bio)
 		return;
 
 	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
-		rbio_orig_end_io(rbio, -EIO);
+		rbio_orig_end_io(rbio, BLK_STS_IOERR);
 	else
 		__raid_recover_end_io(rbio);
 }
@@ -2104,7 +2104,7 @@ out:
 cleanup:
 	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
 	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
-		rbio_orig_end_io(rbio, -EIO);
+		rbio_orig_end_io(rbio, BLK_STS_IOERR);
 	return -EIO;
 }
 
@@ -2431,7 +2431,7 @@ submit_write:
 	nr_data = bio_list_size(&bio_list);
 	if (!nr_data) {
 		/* Every parity is right */
-		rbio_orig_end_io(rbio, 0);
+		rbio_orig_end_io(rbio, BLK_STS_OK);
 		return;
 	}
 
@@ -2451,7 +2451,7 @@ submit_write:
 	return;
 
 cleanup:
-	rbio_orig_end_io(rbio, -EIO);
+	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 }
 
 static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
@@ -2519,7 +2519,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
 	return;
 
 cleanup:
-	rbio_orig_end_io(rbio, -EIO);
+	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 }
 
 /*
@@ -2633,7 +2633,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
 	return;
 
 cleanup:
-	rbio_orig_end_io(rbio, -EIO);
+	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 	return;
 
 finish:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e8b9a269fdde..bd679bc7a1a9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6212,8 +6212,8 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 	}
 }
 
-int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
-		  int mirror_num, int async_submit)
+blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+			   int mirror_num, int async_submit)
 {
 	struct btrfs_device *dev;
 	struct bio *first_bio = bio;
@@ -6233,7 +6233,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 				&map_length, &bbio, mirror_num, 1);
 	if (ret) {
 		btrfs_bio_counter_dec(fs_info);
-		return ret;
+		return errno_to_blk_status(ret);
 	}
 
 	total_devs = bbio->num_stripes;
@@ -6256,7 +6256,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 		}
 
 		btrfs_bio_counter_dec(fs_info);
-		return ret;
+		return errno_to_blk_status(ret);
 	}
 
 	if (map_length < length) {
@@ -6283,7 +6283,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 				  dev_nr, async_submit);
 	}
 	btrfs_bio_counter_dec(fs_info);
-	return 0;
+	return BLK_STS_OK;
 }
 
 struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 6f45fd60d15a..93277fc60930 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -74,7 +74,7 @@ struct btrfs_device {
 	int missing;
 	int can_discard;
 	int is_tgtdev_for_dev_replace;
-	int last_flush_error;
+	blk_status_t last_flush_error;
 	int flush_bio_sent;
 
 #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
@@ -416,8 +416,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 		      struct btrfs_fs_info *fs_info, u64 type);
 void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
-int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
-		  int mirror_num, int async_submit);
+blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+			   int mirror_num, int async_submit);
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       fmode_t flags, void *holder);
 int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
-- 
cgit 


From 311fc65c9fb9c966bca8e6f3ff8132ce57344ab9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 24 Aug 2017 15:13:29 -0500
Subject: pty: Repair TIOCGPTPEER

The implementation of TIOCGPTPEER has two issues.

When /dev/ptmx (as opposed to /dev/pts/ptmx) is opened the wrong
vfsmount is passed to dentry_open.  Which results in the kernel displaying
the wrong pathname for the peer.

The second is simply by caching the vfsmount and dentry of the peer it leaves
them open, in a way they were not previously Which because of the inreased
reference counts can cause unnecessary behaviour differences resulting in
regressions.

To fix these move the ioctl into tty_io.c at a generic level allowing
the ioctl to have access to the struct file on which the ioctl is
being called.  This allows the path of the slave to be derived when
opening the slave through TIOCGPTPEER instead of requiring the path to
the slave be cached.  Thus removing the need for caching the path.

A new function devpts_ptmx_path is factored out of devpts_acquire and
used to implement a function devpts_mntget.   The new function devpts_mntget
takes a filp to perform the lookup on and fsi so that it can confirm
that the superblock that is found by devpts_ptmx_path is the proper superblock.

v2: Lots of fixes to make the code actually work
v3: Suggestions by Linus
    - Removed the unnecessary initialization of filp in ptm_open_peer
    - Simplified devpts_ptmx_path as gotos are no longer required

[ This is the fix for the issue that was reverted in commit
  143c97cc6529, but this time without breaking 'pbuilder' due to
  increased reference counts   - Linus ]

Fixes: 54ebbfb16034 ("tty: add TIOCGPTPEER ioctl")
Reported-by: Christian Brauner <christian.brauner@canonical.com>
Reported-and-tested-by: Stefan Lippers-Hollmann <s.l-h@gmx.de>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/devpts/inode.c | 65 +++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 108df2e3602c..7eae33ffa3fc 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -133,6 +133,50 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
+static int devpts_ptmx_path(struct path *path)
+{
+	struct super_block *sb;
+	int err;
+
+	/* Has the devpts filesystem already been found? */
+	if (path->mnt->mnt_sb->s_magic == DEVPTS_SUPER_MAGIC)
+		return 0;
+
+	/* Is a devpts filesystem at "pts" in the same directory? */
+	err = path_pts(path);
+	if (err)
+		return err;
+
+	/* Is the path the root of a devpts filesystem? */
+	sb = path->mnt->mnt_sb;
+	if ((sb->s_magic != DEVPTS_SUPER_MAGIC) ||
+	    (path->mnt->mnt_root != sb->s_root))
+		return -ENODEV;
+
+	return 0;
+}
+
+struct vfsmount *devpts_mntget(struct file *filp, struct pts_fs_info *fsi)
+{
+	struct path path;
+	int err;
+
+	path = filp->f_path;
+	path_get(&path);
+
+	err = devpts_ptmx_path(&path);
+	dput(path.dentry);
+	if (err) {
+		mntput(path.mnt);
+		path.mnt = ERR_PTR(err);
+	}
+	if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) {
+		mntput(path.mnt);
+		path.mnt = ERR_PTR(-ENODEV);
+	}
+	return path.mnt;
+}
+
 struct pts_fs_info *devpts_acquire(struct file *filp)
 {
 	struct pts_fs_info *result;
@@ -143,27 +187,16 @@ struct pts_fs_info *devpts_acquire(struct file *filp)
 	path = filp->f_path;
 	path_get(&path);
 
-	/* Has the devpts filesystem already been found? */
-	sb = path.mnt->mnt_sb;
-	if (sb->s_magic != DEVPTS_SUPER_MAGIC) {
-		/* Is a devpts filesystem at "pts" in the same directory? */
-		err = path_pts(&path);
-		if (err) {
-			result = ERR_PTR(err);
-			goto out;
-		}
-
-		/* Is the path the root of a devpts filesystem? */
-		result = ERR_PTR(-ENODEV);
-		sb = path.mnt->mnt_sb;
-		if ((sb->s_magic != DEVPTS_SUPER_MAGIC) ||
-		    (path.mnt->mnt_root != sb->s_root))
-			goto out;
+	err = devpts_ptmx_path(&path);
+	if (err) {
+		result = ERR_PTR(err);
+		goto out;
 	}
 
 	/*
 	 * pty code needs to hold extra references in case of last /dev/tty close
 	 */
+	sb = path.mnt->mnt_sb;
 	atomic_inc(&sb->s_active);
 	result = DEVPTS_SB(sb);
 
-- 
cgit 


From fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 18 Aug 2017 11:12:19 -0400
Subject: nfsd: Limit end of page list when decoding NFSv4 WRITE

When processing an NFSv4 WRITE operation, argp->end should never
point past the end of the data in the final page of the page list.
Otherwise, nfsd4_decode_compound can walk into uninitialized memory.

More critical, nfsd4_decode_write is failing to increment argp->pagelen
when it increments argp->pagelist.  This can cause later xdr decoders
to assume more data is available than really is, which can cause server
crashes on malformed requests.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 20fbcab97753..5f940d2a136b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -144,7 +144,7 @@ static void next_decode_page(struct nfsd4_compoundargs *argp)
 	argp->p = page_address(argp->pagelist[0]);
 	argp->pagelist++;
 	if (argp->pagelen < PAGE_SIZE) {
-		argp->end = argp->p + (argp->pagelen>>2);
+		argp->end = argp->p + XDR_QUADLEN(argp->pagelen);
 		argp->pagelen = 0;
 	} else {
 		argp->end = argp->p + (PAGE_SIZE>>2);
@@ -1279,9 +1279,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 		argp->pagelen -= pages * PAGE_SIZE;
 		len -= pages * PAGE_SIZE;
 
-		argp->p = (__be32 *)page_address(argp->pagelist[0]);
-		argp->pagelist++;
-		argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
+		next_decode_page(argp);
 	}
 	argp->p += XDR_QUADLEN(len);
 
-- 
cgit 


From fffa281b48a91ad6dac1a18c5907ece58fa3879b Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Fri, 25 Aug 2017 15:55:36 -0700
Subject: dax: fix deadlock due to misaligned PMD faults

In DAX there are two separate places where the 2MiB range of a PMD is
defined.

The first is in the page tables, where a PMD mapping inserted for a
given address spans from (vmf->address & PMD_MASK) to ((vmf->address &
PMD_MASK) + PMD_SIZE - 1).  That is, from the 2MiB boundary below the
address to the 2MiB boundary above the address.

So, for example, a fault at address 3MiB (0x30 0000) falls within the
PMD that ranges from 2MiB (0x20 0000) to 4MiB (0x40 0000).

The second PMD range is in the mapping->page_tree, where a given file
offset is covered by a radix tree entry that spans from one 2MiB aligned
file offset to another 2MiB aligned file offset.

So, for example, the file offset for 3MiB (pgoff 768) falls within the
PMD range for the order 9 radix tree entry that ranges from 2MiB (pgoff
512) to 4MiB (pgoff 1024).

This system works so long as the addresses and file offsets for a given
mapping both have the same offsets relative to the start of each PMD.

Consider the case where the starting address for a given file isn't 2MiB
aligned - say our faulting address is 3 MiB (0x30 0000), but that
corresponds to the beginning of our file (pgoff 0).  Now all the PMDs in
the mapping are misaligned so that the 2MiB range defined in the page
tables never matches up with the 2MiB range defined in the radix tree.

The current code notices this case for DAX faults to storage with the
following test in dax_pmd_insert_mapping():

	if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR)
		goto unlock_fallback;

This test makes sure that the pfn we get from the driver is 2MiB
aligned, and relies on the assumption that the 2MiB alignment of the pfn
we get back from the driver matches the 2MiB alignment of the faulting
address.

However, faults to holes were not checked and we could hit the problem
described above.

This was reported in response to the NVML nvml/src/test/pmempool_sync
TEST5:

	$ cd nvml/src/test/pmempool_sync
	$ make TEST5

You can grab NVML here:

	https://github.com/pmem/nvml/

The dmesg warning you see when you hit this error is:

  WARNING: CPU: 13 PID: 2900 at fs/dax.c:641 dax_insert_mapping_entry+0x2df/0x310

Where we notice in dax_insert_mapping_entry() that the radix tree entry
we are about to replace doesn't match the locked entry that we had
previously inserted into the tree.  This happens because the initial
insertion was done in grab_mapping_entry() using a pgoff calculated from
the faulting address (vmf->address), and the replacement in
dax_pmd_load_hole() => dax_insert_mapping_entry() is done using
vmf->pgoff.

In our failure case those two page offsets (one calculated from
vmf->address, one using vmf->pgoff) point to different order 9 radix
tree entries.

This failure case can result in a deadlock because the radix tree unlock
also happens on the pgoff calculated from vmf->address.  This means that
the locked radix tree entry that we swapped in to the tree in
dax_insert_mapping_entry() using vmf->pgoff is never unlocked, so all
future faults to that 2MiB range will block forever.

Fix this by validating that the faulting address's PMD offset matches
the PMD offset from the start of the file.  This check is done at the
very beginning of the fault and covers faults that would have mapped to
storage as well as faults to holes.  I left the COLOUR check in
dax_pmd_insert_mapping() in place in case we ever hit the insanity
condition where the alignment of the pfn we get from the driver doesn't
match the alignment of the userspace address.

Link: http://lkml.kernel.org/r/20170822222436.18926-1-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reported-by: "Slusarz, Marcin" <marcin.slusarz@intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/dax.c b/fs/dax.c
index 306c2b603fb8..865d42c63e23 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1383,6 +1383,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 
 	trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
 
+	/*
+	 * Make sure that the faulting address's PMD offset (color) matches
+	 * the PMD offset from the start of the file.  This is necessary so
+	 * that a PMD range in the page table overlaps exactly with a PMD
+	 * range in the radix tree.
+	 */
+	if ((vmf->pgoff & PG_PMD_COLOUR) !=
+	    ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
+		goto fallback;
+
 	/* Fall back to PTEs if we're going to COW */
 	if (write && !(vma->vm_flags & VM_SHARED))
 		goto fallback;
-- 
cgit