From bce41d601e58af12cee1398fe836e6b9a8fb5396 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 10 Jan 2012 15:32:29 +0200 Subject: jffs2: do not initialize variable unnecessarily Remove unnecessary initializer for a local variable. Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- fs/jffs2/erase.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index a01cdad6aad1..eafb8d37a6fb 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -335,7 +335,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl void *ebuf; uint32_t ofs; size_t retlen; - int ret = -EIO; + int ret; unsigned long *wordebuf; ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen, -- cgit From 803ab977618eae2b292cda0a97eed75f42250ddf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 24 Jan 2012 11:39:22 +0300 Subject: cifs: NULL dereference on allocation failure We should just return directly here, the goto causes a NULL dereference. Signed-off-by: Dan Carpenter Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 986709a8d903..026d6464335b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3857,10 +3857,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) struct smb_vol *vol_info; vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); - if (vol_info == NULL) { - tcon = ERR_PTR(-ENOMEM); - goto out; - } + if (vol_info == NULL) + return ERR_PTR(-ENOMEM); vol_info->local_nls = cifs_sb->local_nls; vol_info->linux_uid = fsuid; -- cgit From 4991a5faab7368daac463181e786608b4eb63675 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 Jan 2012 11:52:01 +0300 Subject: cifs: check offset in decode_ntlmssp_challenge() We should check that we're not copying memory from beyond the end of the blob. Signed-off-by: Dan Carpenter Reviewed-by: Jeff Layton --- fs/cifs/sess.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d85efad5765f..eb767412177d 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -395,6 +395,10 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset); tilen = le16_to_cpu(pblob->TargetInfoArray.Length); + if (tioffset > blob_len || tioffset + tilen > blob_len) { + cERROR(1, "tioffset + tilen too high %u + %u", tioffset, tilen); + return -EINVAL; + } if (tilen) { ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); if (!ses->auth_key.response) { -- cgit From 000f9bb83968ebd6959ff76870f16fc8f766ebd3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 30 Jan 2012 19:50:01 -0800 Subject: cifs: fix printk format warnings Fix printk format warnings for ssize_t variables: fs/cifs/connect.c:2145:3: warning: format '%ld' expects type 'long int', but argument 3 has type 'ssize_t' fs/cifs/connect.c:2152:3: warning: format '%ld' expects type 'long int', but argument 3 has type 'ssize_t' fs/cifs/connect.c:2160:3: warning: format '%ld' expects type 'long int', but argument 3 has type 'ssize_t' fs/cifs/connect.c:2170:3: warning: format '%ld' expects type 'long int', but argument 3 has type 'ssize_t' Signed-off-by: Randy Dunlap Acked-by: Jeff Layton Cc: linux-cifs@vger.kernel.org --- fs/cifs/connect.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 026d6464335b..9c288653e6d6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2142,14 +2142,14 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) len = delim - payload; if (len > MAX_USERNAME_SIZE || len <= 0) { - cFYI(1, "Bad value from username search (len=%ld)", len); + cFYI(1, "Bad value from username search (len=%zd)", len); rc = -EINVAL; goto out_key_put; } vol->username = kstrndup(payload, len, GFP_KERNEL); if (!vol->username) { - cFYI(1, "Unable to allocate %ld bytes for username", len); + cFYI(1, "Unable to allocate %zd bytes for username", len); rc = -ENOMEM; goto out_key_put; } @@ -2157,7 +2157,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) len = key->datalen - (len + 1); if (len > MAX_PASSWORD_SIZE || len <= 0) { - cFYI(1, "Bad len for password search (len=%ld)", len); + cFYI(1, "Bad len for password search (len=%zd)", len); rc = -EINVAL; kfree(vol->username); vol->username = NULL; @@ -2167,7 +2167,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) ++delim; vol->password = kstrndup(delim, len, GFP_KERNEL); if (!vol->password) { - cFYI(1, "Unable to allocate %ld bytes for password", len); + cFYI(1, "Unable to allocate %zd bytes for password", len); rc = -ENOMEM; kfree(vol->username); vol->username = NULL; -- cgit From 4505360376637832f79f84f352588b0a045ad113 Mon Sep 17 00:00:00 2001 From: Mitsuo Hayasaka Date: Fri, 27 Jan 2012 06:37:26 +0000 Subject: xfs: pass KM_SLEEP flag to kmem_realloc() in xlog_recover_add_to_cnt_trans() The kmem_realloc() in xfs is given KM_* memory allocation flags. And it allocates memory using kmalloc() after they are converted to gfp_mask flags. In xlog_recover_add_to_cont_trans(), 0u is passed to kmem_realloc(), instead of them. I guess it is preferred to use them, and here memory must be allocated but don't have to be done with GFP_ATOMIC. So, this patch changes it to KM_SLEEP. Signed-off-by: Mitsuo Hayasaka Cc: Ben Myers Cc: Alex Elder Cc: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_log_recover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 541a508adea1..15ff5392fb65 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1489,7 +1489,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); + ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; -- cgit From 2a73ca8208197d03f78d680b3c7953b897e91eb6 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 31 Jan 2012 12:51:24 -0600 Subject: [CIFS] Update cifs Kconfig title to match removal of experimental dependency Removed the dependency on CONFIG_EXPERIMENTAL but forgot to update the text description to be consistent. Signed-off-by: Steve French --- fs/cifs/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 0554b00a7b33..2b243af70aa3 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -139,7 +139,7 @@ config CIFS_DFS_UPCALL points. If unsure, say N. config CIFS_FSCACHE - bool "Provide CIFS client caching support (EXPERIMENTAL)" + bool "Provide CIFS client caching support" depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y help Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data @@ -147,7 +147,7 @@ config CIFS_FSCACHE manager. If unsure, say N. config CIFS_ACL - bool "Provide CIFS ACL support (EXPERIMENTAL)" + bool "Provide CIFS ACL support" depends on CIFS_XATTR && KEYS help Allows to fetch CIFS/NTFS ACL from the server. The DACL blob -- cgit From 15eb77a07c714ac80201abd0a9568888bcee6276 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 17 Jan 2012 11:18:56 -0600 Subject: writeback: fix NULL bdi->dev in trace writeback_single_inode bdi_prune_sb() resets sb->s_bdi to default_backing_dev_info when the tearing down the original bdi. Fix trace_writeback_single_inode to use sb->s_bdi=default_backing_dev_info rather than bdi->dev=NULL for a teared down bdi. Cc: Reported-by: Rabin Vincent Tested-by: Rabin Vincent Signed-off-by: Wu Fengguang --- fs/fs-writeback.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f855916657ba..5b4a9362d5aa 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -52,14 +52,6 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; -/* - * Include the creation of the trace points after defining the - * wb_writeback_work structure so that the definition remains local to this - * file. - */ -#define CREATE_TRACE_POINTS -#include - /* * We don't actually have pdflush, but this one is exported though /proc... */ @@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head) return list_entry(head, struct inode, i_wb_list); } +/* + * Include the creation of the trace points after defining the + * wb_writeback_work structure and inline functions so that the definition + * remains local to this file. + */ +#define CREATE_TRACE_POINTS +#include + /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ static void bdi_wakeup_flusher(struct backing_dev_info *bdi) { -- cgit From 7d731019218e49a9811f6d0adec4b1cfcb752bed Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 1 Feb 2012 11:10:24 -0800 Subject: mtd: fix merge conflict resolution breakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes merge conflict resolution breakage introduced by merge d3712b9dfcf4 ("Merge tag 'for-linus' of git://github.com/prasad-joshi/logfs_upstream"). The commit changed 'mtd_can_have_bb()' function and made it always return zero, which is incorrect. Instead, we need it to return whether the underlying flash device can have bad eraseblocks or not. UBI needs this information because it affects how it handles the underlying flash. E.g., if the underlying flash is NOR, it cannot have bad blocks and any write or erase error is fatal, and all we can do is to switch to R/O mode. We do not need to reserve a pool of good eraseblocks for bad eraseblocks handling, and so on. This patch also removes 'mtd_can_have_bb()' invocations from Logfs to ensure correct Logfs behavior. I've tested that with this patch UBI works on top of NOR and NAND flashes emulated by mtdram and nandsim correspondingly. This patch is based on patch from Linus Torvalds. Signed-off-by: Artem Bityutskiy Acked-by: Jörn Engel Acked-by: Prasad Joshi Acked-by: Brian Norris Signed-off-by: Linus Torvalds --- fs/logfs/dev_mtd.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index e97404d611e0..9c501449450d 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = 0; while (mtd_block_isbad(mtd, *ofs)) { *ofs += mtd->erasesize; @@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = mtd->size - mtd->erasesize; while (mtd_block_isbad(mtd, *ofs)) { *ofs -= mtd->erasesize; -- cgit From 71879d3cb3dd8f2dfdefb252775c1b3ea04a3dd4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Jan 2012 17:14:38 +0100 Subject: proc: mem_release() should check mm != NULL mem_release() can hit mm == NULL, add the necessary check. Cc: stable@kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 9cde9edf9c4d..c3617ea7830b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -822,8 +822,8 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig) static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; - - mmput(mm); + if (mm) + mmput(mm); return 0; } -- cgit From 572d34b946bae070debd42db1143034d9687e13f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Jan 2012 17:14:54 +0100 Subject: proc: unify mem_read() and mem_write() No functional changes, cleanup and preparation. mem_read() and mem_write() are very similar. Move this code into the new common helper, mem_rw(), which takes the additional "int write" argument. Cc: stable@kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 90 +++++++++++++++++++++------------------------------------- 1 file changed, 32 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index c3617ea7830b..be1909041685 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -718,57 +718,13 @@ static int mem_open(struct inode* inode, struct file* file) return 0; } -static ssize_t mem_read(struct file * file, char __user * buf, - size_t count, loff_t *ppos) +static ssize_t mem_rw(struct file *file, char __user *buf, + size_t count, loff_t *ppos, int write) { - int ret; - char *page; - unsigned long src = *ppos; struct mm_struct *mm = file->private_data; - - if (!mm) - return 0; - - page = (char *)__get_free_page(GFP_TEMPORARY); - if (!page) - return -ENOMEM; - - ret = 0; - - while (count > 0) { - int this_len, retval; - - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - retval = access_remote_vm(mm, src, page, this_len, 0); - if (!retval) { - if (!ret) - ret = -EIO; - break; - } - - if (copy_to_user(buf, page, retval)) { - ret = -EFAULT; - break; - } - - ret += retval; - src += retval; - buf += retval; - count -= retval; - } - *ppos = src; - - free_page((unsigned long) page); - return ret; -} - -static ssize_t mem_write(struct file * file, const char __user *buf, - size_t count, loff_t *ppos) -{ - int copied; + unsigned long addr = *ppos; + ssize_t copied; char *page; - unsigned long dst = *ppos; - struct mm_struct *mm = file->private_data; if (!mm) return 0; @@ -779,30 +735,48 @@ static ssize_t mem_write(struct file * file, const char __user *buf, copied = 0; while (count > 0) { - int this_len, retval; + int this_len = min_t(int, count, PAGE_SIZE); - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - if (copy_from_user(page, buf, this_len)) { + if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; break; } - retval = access_remote_vm(mm, dst, page, this_len, 1); - if (!retval) { + + this_len = access_remote_vm(mm, addr, page, this_len, write); + if (!this_len) { if (!copied) copied = -EIO; break; } - copied += retval; - buf += retval; - dst += retval; - count -= retval; + + if (!write && copy_to_user(buf, page, this_len)) { + copied = -EFAULT; + break; + } + + buf += this_len; + addr += this_len; + copied += this_len; + count -= this_len; } - *ppos = dst; + *ppos = addr; free_page((unsigned long) page); return copied; } +static ssize_t mem_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, buf, count, ppos, 0); +} + +static ssize_t mem_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, (char __user*)buf, count, ppos, 1); +} + loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { -- cgit From 6d08f2c7139790c268820a2e590795cb8333181a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Jan 2012 17:15:11 +0100 Subject: proc: make sure mem_open() doesn't pin the target's memory Once /proc/pid/mem is opened, the memory can't be released until mem_release() even if its owner exits. Change mem_open() to do atomic_inc(mm_count) + mmput(), this only pins mm_struct. Change mem_rw() to do atomic_inc_not_zero(mm_count) before access_remote_vm(), this verifies that this mm is still alive. I am not sure what should mem_rw() return if atomic_inc_not_zero() fails. With this patch it returns zero to match the "mm == NULL" case, may be it should return -EINVAL like it did before e268337d. Perhaps it makes sense to add the additional fatal_signal_pending() check into the main loop, to ensure we do not hold this memory if the target task was oom-killed. Cc: stable@kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index be1909041685..d9512bd03e6c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -711,6 +711,13 @@ static int mem_open(struct inode* inode, struct file* file) if (IS_ERR(mm)) return PTR_ERR(mm); + if (mm) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } + /* OK to pass negative loff_t, we can catch out-of-range */ file->f_mode |= FMODE_UNSIGNED_OFFSET; file->private_data = mm; @@ -734,6 +741,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf, return -ENOMEM; copied = 0; + if (!atomic_inc_not_zero(&mm->mm_users)) + goto free; + while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); @@ -761,6 +771,8 @@ static ssize_t mem_rw(struct file *file, char __user *buf, } *ppos = addr; + mmput(mm); +free: free_page((unsigned long) page); return copied; } @@ -797,7 +809,7 @@ static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; if (mm) - mmput(mm); + mmdrop(mm); return 0; } -- cgit From 114fc47492e23d93653e4a16664833e98d62a563 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 11 Jan 2012 17:41:01 -0800 Subject: ceph: change "ceph.layout" xattr to be "ceph.file.layout" The virtual extended attribute named "ceph.layout" is meaningful only for regular files. Change its name to be "ceph.file.layout" to more directly reflect that in the ceph xattr namespace. Preserve the old "ceph.layout" name for the time being (until we decide it's safe to get rid of it entirely). Add a missing initializer for "readonly" in the terminating entry. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- fs/ceph/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index a5e36e4488a7..9e6734e38c12 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -111,8 +111,10 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, } static struct ceph_vxattr_cb ceph_file_vxattrs[] = { + { true, "ceph.file.layout", ceph_vxattrcb_layout}, + /* The following extended attribute name is deprecated */ { true, "ceph.layout", ceph_vxattrcb_layout}, - { NULL, NULL } + { true, NULL, NULL } }; static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) -- cgit From 32852a81bccd9e3d1953b894966393d1b546576d Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sat, 14 Jan 2012 22:20:59 -0500 Subject: ceph: fix length validation in parse_reply_info() "len" is read from network and thus needs validation. Otherwise, given a bogus "len" value, p+len could be an out-of-bounds pointer, which is used in further parsing. Signed-off-by: Xi Wang Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6203d805eb45..be1415fcaac8 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -262,6 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg, /* trace */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { + ceph_decode_need(&p, end, len, bad); err = parse_reply_info_trace(&p, p+len, info, features); if (err < 0) goto out_bad; @@ -270,6 +271,7 @@ static int parse_reply_info(struct ceph_msg *msg, /* extra */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { + ceph_decode_need(&p, end, len, bad); err = parse_reply_info_extra(&p, p+len, info, features); if (err < 0) goto out_bad; -- cgit From d8fb02abdc39f92a1066313e2b17047876afa8f9 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 12 Jan 2012 17:48:10 -0800 Subject: ceph: create a new session lock to avoid lock inversion Lockdep was reporting a possible circular lock dependency in dentry_lease_is_valid(). That function needs to sample the session's s_cap_gen and and s_cap_ttl fields coherently, but needs to do so while holding a dentry lock. The s_cap_lock field was being used to protect the two fields, but that can't be taken while holding a lock on a dentry within the session. In most cases, the s_cap_gen and s_cap_ttl fields only get operated on separately. But in three cases they need to be updated together. Implement a new lock to protect the spots updating both fields atomically is required. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- fs/ceph/caps.c | 4 ++-- fs/ceph/dir.c | 4 ++-- fs/ceph/mds_client.c | 8 +++++--- fs/ceph/mds_client.h | 7 +++++-- 4 files changed, 14 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 8b53193e4f7c..90d789df9ce0 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -641,10 +641,10 @@ static int __cap_is_valid(struct ceph_cap *cap) unsigned long ttl; u32 gen; - spin_lock(&cap->session->s_cap_lock); + spin_lock(&cap->session->s_gen_ttl_lock); gen = cap->session->s_cap_gen; ttl = cap->session->s_cap_ttl; - spin_unlock(&cap->session->s_cap_lock); + spin_unlock(&cap->session->s_gen_ttl_lock); if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { dout("__cap_is_valid %p cap %p issued %s " diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 98954003a8d3..63c52f33361b 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -975,10 +975,10 @@ static int dentry_lease_is_valid(struct dentry *dentry) di = ceph_dentry(dentry); if (di && di->lease_session) { s = di->lease_session; - spin_lock(&s->s_cap_lock); + spin_lock(&s->s_gen_ttl_lock); gen = s->s_cap_gen; ttl = s->s_cap_ttl; - spin_unlock(&s->s_cap_lock); + spin_unlock(&s->s_gen_ttl_lock); if (di->lease_gen == gen && time_before(jiffies, dentry->d_time) && diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index be1415fcaac8..a4fdf9397a90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -400,9 +400,11 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; s->s_con.peer_name.num = cpu_to_le64(mds); - spin_lock_init(&s->s_cap_lock); + spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; s->s_cap_ttl = 0; + + spin_lock_init(&s->s_cap_lock); s->s_renew_requested = 0; s->s_renew_seq = 0; INIT_LIST_HEAD(&s->s_caps); @@ -2328,10 +2330,10 @@ static void handle_session(struct ceph_mds_session *session, case CEPH_SESSION_STALE: pr_info("mds%d caps went stale, renewing\n", session->s_mds); - spin_lock(&session->s_cap_lock); + spin_lock(&session->s_gen_ttl_lock); session->s_cap_gen++; session->s_cap_ttl = 0; - spin_unlock(&session->s_cap_lock); + spin_unlock(&session->s_gen_ttl_lock); send_renew_caps(mdsc, session); break; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index a50ca0e39475..8c7c04ebb595 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -117,10 +117,13 @@ struct ceph_mds_session { void *s_authorizer_buf, *s_authorizer_reply_buf; size_t s_authorizer_buf_len, s_authorizer_reply_buf_len; - /* protected by s_cap_lock */ - spinlock_t s_cap_lock; + /* protected by s_gen_ttl_lock */ + spinlock_t s_gen_ttl_lock; u32 s_cap_gen; /* inc each time we get mds stale msg */ unsigned long s_cap_ttl; /* when session caps expire */ + + /* protected by s_cap_lock */ + spinlock_t s_cap_lock; struct list_head s_caps; /* all caps issued by this session */ int s_nr_caps, s_trim_caps; int s_num_cap_releases; -- cgit From 8cdb878dcb359fd1137e9abdee9322f5e9bcfdf8 Mon Sep 17 00:00:00 2001 From: Christopher Yeoh Date: Thu, 2 Feb 2012 11:34:09 +1030 Subject: Fix race in process_vm_rw_core This fixes the race in process_vm_core found by Oleg (see http://article.gmane.org/gmane.linux.kernel/1235667/ for details). This has been updated since I last sent it as the creation of the new mm_access() function did almost exactly the same thing as parts of the previous version of this patch did. In order to use mm_access() even when /proc isn't enabled, we move it to kernel/fork.c where other related process mm access functions already are. Signed-off-by: Chris Yeoh Signed-off-by: Linus Torvalds --- fs/proc/base.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index d9512bd03e6c..d4548dd49b02 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) -{ - struct mm_struct *mm; - int err; - - err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return ERR_PTR(err); - - mm = get_task_mm(task); - if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { - mmput(mm); - mm = ERR_PTR(-EACCES); - } - mutex_unlock(&task->signal->cred_guard_mutex); - - return mm; -} - struct mm_struct *mm_for_maps(struct task_struct *task) { return mm_access(task, PTRACE_MODE_READ); -- cgit From de47a4176c532ef5961b8a46a2d541a3517412d3 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Thu, 2 Feb 2012 15:28:28 -0600 Subject: cifs: Fix oops in session setup code for null user mounts For null user mounts, do not invoke string length function during session setup. Cc: Acked-by: Jeff Layton Signed-off-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/sess.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index eb767412177d..551d0c2b9736 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -246,16 +246,15 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* copy user */ /* BB what about null user mounts - check that we do this BB */ /* copy user */ - if (ses->user_name != NULL) + if (ses->user_name != NULL) { strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE); + bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); + } /* else null user mount */ - - bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); *bcc_ptr = 0; bcc_ptr++; /* account for null termination */ /* copy domain */ - if (ses->domainName != NULL) { strncpy(bcc_ptr, ses->domainName, 256); bcc_ptr += strnlen(ses->domainName, 256); -- cgit From 96e02d1586782eadf051fa3d6bc4132d2447ac2c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 4 Feb 2012 10:47:10 +0100 Subject: exec: fix use-after-free bug in setup_new_exec() Setting the task name is done within setup_new_exec() by accessing bprm->filename. However this happens after flush_old_exec(). This may result in a use after free bug, flush_old_exec() may "complete" vfork_done, which will wake up the parent which in turn may free the passed in filename. To fix this add a new tcomm field in struct linux_binprm which contains the now early generated task name until it is used. Fixes this bug on s390: Unable to handle kernel pointer dereference at virtual kernel address 0000000039768000 Process kworker/u:3 (pid: 245, task: 000000003a3dc840, ksp: 0000000039453818) Krnl PSW : 0704000180000000 0000000000282e94 (setup_new_exec+0xa0/0x374) Call Trace: ([<0000000000282e2c>] setup_new_exec+0x38/0x374) [<00000000002dd12e>] load_elf_binary+0x402/0x1bf4 [<0000000000280a42>] search_binary_handler+0x38e/0x5bc [<0000000000282b6c>] do_execve_common+0x410/0x514 [<0000000000282cb6>] do_execve+0x46/0x58 [<00000000005bce58>] kernel_execve+0x28/0x70 [<000000000014ba2e>] ____call_usermodehelper+0x102/0x140 [<00000000005bc8da>] kernel_thread_starter+0x6/0xc [<00000000005bc8d4>] kernel_thread_starter+0x0/0xc Last Breaking-Event-Address: [<00000000002830f0>] setup_new_exec+0x2fc/0x374 Kernel panic - not syncing: Fatal exception: panic_on_oops Reported-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Linus Torvalds --- fs/exec.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index aeb135c7ff5c..92ce83a11e90 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf) perf_event_comm(tsk); } +static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len) +{ + int i, ch; + + /* Copies the binary name from after last slash */ + for (i = 0; (ch = *(fn++)) != '\0';) { + if (ch == '/') + i = 0; /* overwrite what we wrote */ + else + if (i < len - 1) + tcomm[i++] = ch; + } + tcomm[i] = '\0'; +} + int flush_old_exec(struct linux_binprm * bprm) { int retval; @@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm) set_mm_exe_file(bprm->mm, bprm->file); + filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm)); /* * Release all of the old mmap stuff */ @@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump); void setup_new_exec(struct linux_binprm * bprm) { - int i, ch; - const char *name; - char tcomm[sizeof(current->comm)]; - arch_pick_mmap_layout(current->mm); /* This is the point of no return */ @@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - name = bprm->filename; - - /* Copies the binary name from after last slash */ - for (i=0; (ch = *(name++)) != '\0';) { - if (ch == '/') - i = 0; /* overwrite what we wrote */ - else - if (i < (sizeof(tcomm) - 1)) - tcomm[i++] = ch; - } - tcomm[i] = '\0'; - set_task_comm(current, tcomm); + set_task_comm(current, bprm->tcomm); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on -- cgit From 11a3122f6cf2d988a77eb8883d0fc49cd013a6d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 7 Feb 2012 07:51:30 +0100 Subject: block: strip out locking optimization in put_io_context() put_io_context() performed a complex trylock dancing to avoid deferring ioc release to workqueue. It was also broken on UP because trylock was always assumed to succeed which resulted in unbalanced preemption count. While there are ways to fix the UP breakage, even the most pathological microbench (forced ioc allocation and tight fork/exit loop) fails to show any appreciable performance benefit of the optimization. Strip it out. If there turns out to be workloads which are affected by this change, simpler optimization from the discussion thread can be applied later. Signed-off-by: Tejun Heo LKML-Reference: <1328514611.21268.66.camel@sli10-conroe> Signed-off-by: Jens Axboe --- fs/ioprio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ioprio.c b/fs/ioprio.c index f84b380d65e5..0f1b9515213b 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { ioc_ioprio_changed(ioc, ioprio); - put_io_context(ioc, NULL); + put_io_context(ioc); } return err; -- cgit From 4edc53c1f8cdd99d349165d6c61c45aa4e8e2564 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 7 Feb 2012 06:30:51 -0500 Subject: cifs: fix error handling when cifscreds key payload is an error Reported-by: Dan Carpenter Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9c288653e6d6..940189bd6490 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2125,7 +2125,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) down_read(&key->sem); upayload = key->payload.data; if (IS_ERR_OR_NULL(upayload)) { - rc = PTR_ERR(key); + rc = upayload ? PTR_ERR(upayload) : -EINVAL; goto out_key_put; } -- cgit From 8b0192a5f478da1c1ae906bf3ffff53f26204f56 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 7 Feb 2012 06:30:52 -0500 Subject: cifs: request oplock when doing open on lookup Currently, it's always set to 0 (no oplock requested). Cc: Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index df8fecb5b993..63a196b97d50 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - __u32 oplock = 0; + __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; -- cgit From ff4fa4a25a33f92b5653bb43add0c63bea98d464 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 7 Feb 2012 06:31:05 -0500 Subject: cifs: don't return error from standard_receive3 after marking response malformed standard_receive3 will check the validity of the response from the server (via checkSMB). It'll pass the result of that check to handle_mid which will dequeue it and mark it with a status of MID_RESPONSE_MALFORMED if checkSMB returned an error. At that point, standard_receive3 will also return an error, which will make the demultiplex thread skip doing the callback for the mid. This is wrong -- if we were able to identify the request and the response is marked malformed, then we want the demultiplex thread to do the callback. Fix this by making standard_receive3 return 0 in this situation. Cc: stable@vger.kernel.org Reported-and-Tested-by: Mark Moseley Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 940189bd6490..602f77c304c9 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -773,10 +773,11 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dump_mem("Bad SMB: ", buf, min_t(unsigned int, server->total_read, 48)); - if (mid) - handle_mid(mid, server, smb_buffer, length); + if (!mid) + return length; - return length; + handle_mid(mid, server, smb_buffer, length); + return 0; } static int -- cgit From 5abebfdd02450fa1349daacf242e70b3736581e3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 8 Feb 2012 22:07:18 +0100 Subject: bio: don't overflow in bio_get_nr_vecs() There were two places bio_get_nr_vecs() could overflow: First, it did a left shift to convert from sectors to bytes immediately before dividing by PAGE_SIZE. If PAGE_SIZE ever was less than 512 a great many things would break, so dividing by PAGE_SIZE >> 9 is safe and will generate smaller code too. The nastier overflow was in the DIV_ROUND_UP() (that's what the code was effectively doing, anyways). If n + d overflowed, the whole thing would return 0 which breaks things rather effectively. bio_get_nr_vecs() doesn't claim to give an exact value anyways, so the DIV_ROUND_UP() is silly; we could do a straight divide except if a device's queue_max_sectors was less than PAGE_SIZE we'd return 0. So we just add 1; this should always be safe - things will break badly if bio_get_nr_vecs() returns > BIO_MAX_PAGES (bio_alloc() will suddenly start failing) but it's queue_max_segments that must guard against this, if queue_max_sectors is preventing this from happen things are going to explode on architectures with different PAGE_SIZE. Signed-off-by: Kent Overstreet Cc: Tejun Heo Acked-by: Valdis Kletnieks Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- fs/bio.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index b1fe82cf88cf..b980ecde026a 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -505,13 +505,9 @@ EXPORT_SYMBOL(bio_clone); int bio_get_nr_vecs(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - int nr_pages; - - nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (nr_pages > queue_max_segments(q)) - nr_pages = queue_max_segments(q); - - return nr_pages; + return min_t(unsigned, + queue_max_segments(q), + queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1); } EXPORT_SYMBOL(bio_get_nr_vecs); -- cgit From 1ecd3c7ea76488c63b4b0a2561fd7eaf96cc8028 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Wed, 8 Feb 2012 17:13:37 -0800 Subject: nilfs2: avoid overflowing segment numbers in nilfs_ioctl_clean_segments() nsegs is read from userspace. Limit its value and avoid overflowing nsegs * sizeof(__u64) in the subsequent call to memdup_user(). This patch complements 481fe17e973fb9 ("nilfs2: potential integer overflow in nilfs_ioctl_clean_segments()"). Signed-off-by: Xi Wang Cc: Haogang Chen Acked-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 886649627c3d..2a70fce70c65 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -603,6 +603,8 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) goto out; + if (nsegs > UINT_MAX / sizeof(__u64)) + goto out; /* * argv[4] points to segment numbers this ioctl cleans. We -- cgit From 04da0c8196ac0b12fb6b84f4b7a51ad2fa56d869 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Feb 2012 13:57:20 +0000 Subject: xfs: use a normal shrinker for the dquot freelist Stop reusing dquots from the freelist when allocating new ones directly, and implement a shrinker that actually follows the specifications for the interface. The shrinker implementation is still highly suboptimal at this point, but we can gradually work on it. This also fixes an bug in the previous lock ordering, where we would take the hash and dqlist locks inside of the freelist lock against the normal lock ordering. This is only solvable by introducing the dispose list, and thus not when using direct reclaim of unused dquots for new allocations. As a side-effect the quota upper bound and used to free ratio values in /proc/fs/xfs/xqm are set to 0 as these values don't make any sense in the new world order. Signed-off-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/kmem.h | 6 -- fs/xfs/xfs_dquot.c | 103 +++++------------- fs/xfs/xfs_qm.c | 291 +++++++++++++++++++------------------------------- fs/xfs/xfs_qm.h | 14 --- fs/xfs/xfs_qm_stats.c | 4 +- fs/xfs/xfs_trace.h | 5 +- 6 files changed, 141 insertions(+), 282 deletions(-) (limited to 'fs') diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 292eff198030..ab7c53fe346e 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone) extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); -static inline int -kmem_shake_allow(gfp_t gfp_mask) -{ - return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); -} - #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index b4ff40b5f918..cbcb7bea38e2 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -62,82 +62,6 @@ int xfs_dqerror_mod = 33; static struct lock_class_key xfs_dquot_other_class; -/* - * Allocate and initialize a dquot. We don't always allocate fresh memory; - * we try to reclaim a free dquot if the number of incore dquots are above - * a threshold. - * The only field inside the core that gets initialized at this point - * is the d_id field. The idea is to fill in the entire q_core - * when we read in the on disk dquot. - */ -STATIC xfs_dquot_t * -xfs_qm_dqinit( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type) -{ - xfs_dquot_t *dqp; - boolean_t brandnewdquot; - - brandnewdquot = xfs_qm_dqalloc_incore(&dqp); - dqp->dq_flags = type; - dqp->q_core.d_id = cpu_to_be32(id); - dqp->q_mount = mp; - - /* - * No need to re-initialize these if this is a reclaimed dquot. - */ - if (brandnewdquot) { - INIT_LIST_HEAD(&dqp->q_freelist); - mutex_init(&dqp->q_qlock); - init_waitqueue_head(&dqp->q_pinwait); - - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&dqp->q_flush); - complete(&dqp->q_flush); - - trace_xfs_dqinit(dqp); - } else { - /* - * Only the q_core portion was zeroed in dqreclaim_one(). - * So, we need to reset others. - */ - dqp->q_nrefs = 0; - dqp->q_blkno = 0; - INIT_LIST_HEAD(&dqp->q_mplist); - INIT_LIST_HEAD(&dqp->q_hashlist); - dqp->q_bufoffset = 0; - dqp->q_fileoffset = 0; - dqp->q_transp = NULL; - dqp->q_gdquot = NULL; - dqp->q_res_bcount = 0; - dqp->q_res_icount = 0; - dqp->q_res_rtbcount = 0; - atomic_set(&dqp->q_pincount, 0); - dqp->q_hash = NULL; - ASSERT(list_empty(&dqp->q_freelist)); - - trace_xfs_dqreuse(dqp); - } - - /* - * In either case we need to make sure group quotas have a different - * lock class than user quotas, to make sure lockdep knows we can - * locks of one of each at the same time. - */ - if (!(type & XFS_DQ_USER)) - lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); - - /* - * log item gets initialized later - */ - return (dqp); -} - /* * This is called to free all the memory associated with a dquot */ @@ -567,7 +491,32 @@ xfs_qm_dqread( int error; int cancelflags = 0; - dqp = xfs_qm_dqinit(mp, id, type); + + dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); + + dqp->dq_flags = type; + dqp->q_core.d_id = cpu_to_be32(id); + dqp->q_mount = mp; + INIT_LIST_HEAD(&dqp->q_freelist); + mutex_init(&dqp->q_qlock); + init_waitqueue_head(&dqp->q_pinwait); + + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&dqp->q_flush); + complete(&dqp->q_flush); + + /* + * Make sure group quotas have a different lock class than user + * quotas. + */ + if (!(type & XFS_DQ_USER)) + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); + + atomic_inc(&xfs_Gqm->qm_totaldquots); trace_xfs_dqread(dqp); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 671f37eae1c7..c436def733bf 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -50,7 +50,6 @@ */ struct mutex xfs_Gqm_lock; struct xfs_qm *xfs_Gqm; -uint ndquot; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; @@ -93,7 +92,6 @@ xfs_Gqm_init(void) goto out_free_udqhash; hsize /= sizeof(xfs_dqhash_t); - ndquot = hsize << 8; xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); xqm->qm_dqhashmask = hsize - 1; @@ -137,7 +135,6 @@ xfs_Gqm_init(void) xqm->qm_dqtrxzone = qm_dqtrxzone; atomic_set(&xqm->qm_totaldquots, 0); - xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; xqm->qm_nrefs = 0; return xqm; @@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos( return 0; } +STATIC void +xfs_qm_dqfree_one( + struct xfs_dquot *dqp) +{ + struct xfs_mount *mp = dqp->q_mount; + struct xfs_quotainfo *qi = mp->m_quotainfo; + mutex_lock(&dqp->q_hash->qh_lock); + list_del_init(&dqp->q_hashlist); + dqp->q_hash->qh_version++; + mutex_unlock(&dqp->q_hash->qh_lock); -/* - * Pop the least recently used dquot off the freelist and recycle it. - */ -STATIC struct xfs_dquot * -xfs_qm_dqreclaim_one(void) + mutex_lock(&qi->qi_dqlist_lock); + list_del_init(&dqp->q_mplist); + qi->qi_dquots--; + qi->qi_dqreclaims++; + mutex_unlock(&qi->qi_dqlist_lock); + + xfs_qm_dqdestroy(dqp); +} + +STATIC void +xfs_qm_dqreclaim_one( + struct xfs_dquot *dqp, + struct list_head *dispose_list) { - struct xfs_dquot *dqp; - int restarts = 0; + struct xfs_mount *mp = dqp->q_mount; + int error; - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); -restart: - list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { - struct xfs_mount *mp = dqp->q_mount; + if (!xfs_dqlock_nowait(dqp)) + goto out_busy; - if (!xfs_dqlock_nowait(dqp)) - continue; + /* + * This dquot has acquired a reference in the meantime remove it from + * the freelist and try again. + */ + if (dqp->q_nrefs) { + xfs_dqunlock(dqp); - /* - * This dquot has already been grabbed by dqlookup. - * Remove it from the freelist and try again. - */ - if (dqp->q_nrefs) { - trace_xfs_dqreclaim_want(dqp); - XQM_STATS_INC(xqmstats.xs_qm_dqwants); - - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - restarts++; - goto dqunlock; - } + trace_xfs_dqreclaim_want(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqwants); - ASSERT(dqp->q_hash); - ASSERT(!list_empty(&dqp->q_mplist)); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + return; + } - /* - * Try to grab the flush lock. If this dquot is in the process - * of getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) - goto dqunlock; + ASSERT(dqp->q_hash); + ASSERT(!list_empty(&dqp->q_mplist)); - /* - * We have the flush lock so we know that this is not in the - * process of being flushed. So, if this is dirty, flush it - * DELWRI so that we don't get a freelist infested with - * dirty dquots. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; + /* + * Try to grab the flush lock. If this dquot is in the process of + * getting flushed to disk, we don't want to reclaim it. + */ + if (!xfs_dqflock_nowait(dqp)) + goto out_busy; - trace_xfs_dqreclaim_dirty(dqp); + /* + * We have the flush lock so we know that this is not in the + * process of being flushed. So, if this is dirty, flush it + * DELWRI so that we don't get a freelist infested with + * dirty dquots. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + trace_xfs_dqreclaim_dirty(dqp); - /* - * We flush it delayed write, so don't bother - * releasing the freelist lock. - */ - error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK); - if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - } - goto dqunlock; + /* + * We flush it delayed write, so don't bother releasing the + * freelist lock. + */ + error = xfs_qm_dqflush(dqp, 0); + if (error) { + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); } - xfs_dqfunlock(dqp); /* - * Prevent lookup now that we are going to reclaim the dquot. - * Once XFS_DQ_FREEING is set lookup won't touch the dquot, - * thus we can drop the lock now. + * Give the dquot another try on the freelist, as the + * flushing will take some time. */ - dqp->dq_flags |= XFS_DQ_FREEING; - xfs_dqunlock(dqp); - - mutex_lock(&dqp->q_hash->qh_lock); - list_del_init(&dqp->q_hashlist); - dqp->q_hash->qh_version++; - mutex_unlock(&dqp->q_hash->qh_lock); - - mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dquots--; - mp->m_quotainfo->qi_dqreclaims++; - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + goto out_busy; + } + xfs_dqfunlock(dqp); - ASSERT(dqp->q_nrefs == 0); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; + /* + * Prevent lookups now that we are past the point of no return. + */ + dqp->dq_flags |= XFS_DQ_FREEING; + xfs_dqunlock(dqp); - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return dqp; -dqunlock: - xfs_dqunlock(dqp); - if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - break; - goto restart; - } + ASSERT(dqp->q_nrefs == 0); + list_move_tail(&dqp->q_freelist, dispose_list); + xfs_Gqm->qm_dqfrlist_cnt--; - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return NULL; -} + trace_xfs_dqreclaim_done(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); + return; -/* - * Traverse the freelist of dquots and attempt to reclaim a maximum of - * 'howmany' dquots. This operation races with dqlookup(), and attempts to - * favor the lookup function ... - */ -STATIC int -xfs_qm_shake_freelist( - int howmany) -{ - int nreclaimed = 0; - xfs_dquot_t *dqp; +out_busy: + xfs_dqunlock(dqp); - if (howmany <= 0) - return 0; + /* + * Move the dquot to the tail of the list so that we don't spin on it. + */ + list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); - while (nreclaimed < howmany) { - dqp = xfs_qm_dqreclaim_one(); - if (!dqp) - return nreclaimed; - xfs_qm_dqdestroy(dqp); - nreclaimed++; - } - return nreclaimed; + trace_xfs_dqreclaim_busy(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); } -/* - * The kmem_shake interface is invoked when memory is running low. - */ -/* ARGSUSED */ STATIC int xfs_qm_shake( - struct shrinker *shrink, - struct shrink_control *sc) + struct shrinker *shrink, + struct shrink_control *sc) { - int ndqused, nfree, n; - gfp_t gfp_mask = sc->gfp_mask; - - if (!kmem_shake_allow(gfp_mask)) - return 0; - if (!xfs_Gqm) - return 0; - - nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ - /* incore dquots in all f/s's */ - ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; - - ASSERT(ndqused >= 0); + int nr_to_scan = sc->nr_to_scan; + LIST_HEAD (dispose_list); + struct xfs_dquot *dqp; - if (nfree <= ndqused && nfree < ndquot) + if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) return 0; + if (!nr_to_scan) + goto out; - ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ - n = nfree - ndqused - ndquot; /* # over target */ - - return xfs_qm_shake_freelist(MAX(nfree, n)); -} - - -/*------------------------------------------------------------------*/ - -/* - * Return a new incore dquot. Depending on the number of - * dquots in the system, we either allocate a new one on the kernel heap, - * or reclaim a free one. - * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed - * to reclaim an existing one from the freelist. - */ -boolean_t -xfs_qm_dqalloc_incore( - xfs_dquot_t **O_dqpp) -{ - xfs_dquot_t *dqp; - - /* - * Check against high water mark to see if we want to pop - * a nincompoop dquot off the freelist. - */ - if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) { - /* - * Try to recycle a dquot from the freelist. - */ - if ((dqp = xfs_qm_dqreclaim_one())) { - XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); - /* - * Just zero the core here. The rest will get - * reinitialized by caller. XXX we shouldn't even - * do this zero ... - */ - memset(&dqp->q_core, 0, sizeof(dqp->q_core)); - *O_dqpp = dqp; - return B_FALSE; - } - XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + while (!list_empty(&xfs_Gqm->qm_dqfrlist)) { + if (nr_to_scan-- <= 0) + break; + dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot, + q_freelist); + xfs_qm_dqreclaim_one(dqp, &dispose_list); } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - /* - * Allocate a brand new dquot on the kernel heap and return it - * to the caller to initialize. - */ - ASSERT(xfs_Gqm->qm_dqzone != NULL); - *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); - atomic_inc(&xfs_Gqm->qm_totaldquots); - - return B_TRUE; + while (!list_empty(&dispose_list)) { + dqp = list_first_entry(&dispose_list, struct xfs_dquot, + q_freelist); + list_del_init(&dqp->q_freelist); + xfs_qm_dqfree_one(dqp); + } +out: + return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure; } - /* * Start a transaction and write the incore superblock changes to * disk. flags parameter indicates which fields have changed. diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 9b4f3adefbc5..9a9b997e1a0a 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -26,23 +26,11 @@ struct xfs_qm; struct xfs_inode; -extern uint ndquot; extern struct mutex xfs_Gqm_lock; extern struct xfs_qm *xfs_Gqm; extern kmem_zone_t *qm_dqzone; extern kmem_zone_t *qm_dqtrxzone; -/* - * Ditto, for xfs_qm_dqreclaim_one. - */ -#define XFS_QM_RECLAIM_MAX_RESTARTS 4 - -/* - * Ideal ratio of free to in use dquots. Quota manager makes an attempt - * to keep this balance. - */ -#define XFS_QM_DQFREE_RATIO 2 - /* * Dquot hashtable constants/threshold values. */ @@ -74,7 +62,6 @@ typedef struct xfs_qm { int qm_dqfrlist_cnt; atomic_t qm_totaldquots; /* total incore dquots */ uint qm_nrefs; /* file systems with quota on */ - int qm_dqfree_ratio;/* ratio of free to inuse dquots */ kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ } xfs_qm_t; @@ -143,7 +130,6 @@ extern int xfs_qm_quotacheck(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); /* dquot stuff */ -extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c index 8671a0b32644..5729ba570877 100644 --- a/fs/xfs/xfs_qm_stats.c +++ b/fs/xfs/xfs_qm_stats.c @@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v) { /* maximum; incore; ratio free to inuse; freelist */ seq_printf(m, "%d\t%d\t%d\t%u\n", - ndquot, + 0, xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, - xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, + 0, xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); return 0; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 6b6df5802e95..bb134a819930 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \ DEFINE_DQUOT_EVENT(xfs_dqadjust); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_done); DEFINE_DQUOT_EVENT(xfs_dqattach_found); DEFINE_DQUOT_EVENT(xfs_dqattach_get); -DEFINE_DQUOT_EVENT(xfs_dqinit); -DEFINE_DQUOT_EVENT(xfs_dqreuse); DEFINE_DQUOT_EVENT(xfs_dqalloc); DEFINE_DQUOT_EVENT(xfs_dqtobp_read); DEFINE_DQUOT_EVENT(xfs_dqread); -- cgit From 4a26620df451ad46151ad21d711ed43e963c004e Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Sat, 5 Nov 2011 13:45:08 -0400 Subject: eCryptfs: Improve statfs reporting statfs() calls on eCryptfs files returned the wrong filesystem type and, when using filename encryption, the wrong maximum filename length. If mount-wide filename encryption is enabled, the cipher block size and the lower filesystem's max filename length will determine the max eCryptfs filename length. Pre-tested, known good lengths are used when the lower filesystem's namelen is 255 and a cipher with 8 or 16 byte block sizes is used. In other, less common cases, we fall back to a safe rounded-down estimate when determining the eCryptfs namelen. https://launchpad.net/bugs/885744 Signed-off-by: Tyler Hicks Reported-by: Kees Cook Reviewed-by: Kees Cook Reviewed-by: John Johansen --- fs/ecryptfs/crypto.c | 68 ++++++++++++++++++++++++++++++++++++++----- fs/ecryptfs/ecryptfs_kernel.h | 6 ++++ fs/ecryptfs/keystore.c | 9 ++---- fs/ecryptfs/super.c | 14 ++++++++- 4 files changed, 83 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 63ab24510649..ea9931281557 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1990,6 +1990,17 @@ out: return; } +static size_t ecryptfs_max_decoded_size(size_t encoded_size) +{ + /* Not exact; conservatively long. Every block of 4 + * encoded characters decodes into a block of 3 + * decoded characters. This segment of code provides + * the caller with the maximum amount of allocated + * space that @dst will need to point to in a + * subsequent call. */ + return ((encoded_size + 1) * 3) / 4; +} + /** * ecryptfs_decode_from_filename * @dst: If NULL, this function only sets @dst_size and returns. If @@ -2008,13 +2019,7 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size, size_t dst_byte_offset = 0; if (dst == NULL) { - /* Not exact; conservatively long. Every block of 4 - * encoded characters decodes into a block of 3 - * decoded characters. This segment of code provides - * the caller with the maximum amount of allocated - * space that @dst will need to point to in a - * subsequent call. */ - (*dst_size) = (((src_size + 1) * 3) / 4); + (*dst_size) = ecryptfs_max_decoded_size(src_size); goto out; } while (src_byte_offset < src_size) { @@ -2239,3 +2244,52 @@ out_free: out: return rc; } + +#define ENC_NAME_MAX_BLOCKLEN_8_OR_16 143 + +int ecryptfs_set_f_namelen(long *namelen, long lower_namelen, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) +{ + struct blkcipher_desc desc; + struct mutex *tfm_mutex; + size_t cipher_blocksize; + int rc; + + if (!(mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) { + (*namelen) = lower_namelen; + return 0; + } + + rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex, + mount_crypt_stat->global_default_fn_cipher_name); + if (unlikely(rc)) { + (*namelen) = 0; + return rc; + } + + mutex_lock(tfm_mutex); + cipher_blocksize = crypto_blkcipher_blocksize(desc.tfm); + mutex_unlock(tfm_mutex); + + /* Return an exact amount for the common cases */ + if (lower_namelen == NAME_MAX + && (cipher_blocksize == 8 || cipher_blocksize == 16)) { + (*namelen) = ENC_NAME_MAX_BLOCKLEN_8_OR_16; + return 0; + } + + /* Return a safe estimate for the uncommon cases */ + (*namelen) = lower_namelen; + (*namelen) -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE; + /* Since this is the max decoded size, subtract 1 "decoded block" len */ + (*namelen) = ecryptfs_max_decoded_size(*namelen) - 3; + (*namelen) -= ECRYPTFS_TAG_70_MAX_METADATA_SIZE; + (*namelen) -= ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES; + /* Worst case is that the filename is padded nearly a full block size */ + (*namelen) -= cipher_blocksize - 1; + + if ((*namelen) < 0) + (*namelen) = 0; + + return 0; +} diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index a2362df58ae8..867b64c5d84f 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -162,6 +162,10 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */ #define MD5_DIGEST_SIZE 16 #define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE +#define ECRYPTFS_TAG_70_MIN_METADATA_SIZE (1 + ECRYPTFS_MIN_PKT_LEN_SIZE \ + + ECRYPTFS_SIG_SIZE + 1 + 1) +#define ECRYPTFS_TAG_70_MAX_METADATA_SIZE (1 + ECRYPTFS_MAX_PKT_LEN_SIZE \ + + ECRYPTFS_SIG_SIZE + 1 + 1) #define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED." #define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23 #define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED." @@ -701,6 +705,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, size_t *packet_size, struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *data, size_t max_packet_size); +int ecryptfs_set_f_namelen(long *namelen, long lower_namelen, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat); int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, loff_t offset); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 8e3b943e330f..2333203a120b 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -679,10 +679,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, * Octets N3-N4: Block-aligned encrypted filename * - Consists of a minimum number of random characters, a \0 * separator, and then the filename */ - s->max_packet_size = (1 /* Tag 70 identifier */ - + 3 /* Max Tag 70 packet size */ - + ECRYPTFS_SIG_SIZE /* FNEK sig */ - + 1 /* Cipher identifier */ + s->max_packet_size = (ECRYPTFS_TAG_70_MAX_METADATA_SIZE + s->block_aligned_filename_size); if (dest == NULL) { (*packet_size) = s->max_packet_size; @@ -934,10 +931,10 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, goto out; } s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - if (max_packet_size < (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)) { + if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) { printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be " "at least [%d]\n", __func__, max_packet_size, - (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)); + ECRYPTFS_TAG_70_MIN_METADATA_SIZE); rc = -EINVAL; goto out; } diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 9df7fd6e0c39..cf152823bbf4 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include "ecryptfs_kernel.h" struct kmem_cache *ecryptfs_inode_info_cache; @@ -102,10 +104,20 @@ static void ecryptfs_destroy_inode(struct inode *inode) static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + int rc; if (!lower_dentry->d_sb->s_op->statfs) return -ENOSYS; - return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf); + + rc = lower_dentry->d_sb->s_op->statfs(lower_dentry, buf); + if (rc) + return rc; + + buf->f_type = ECRYPTFS_SUPER_MAGIC; + rc = ecryptfs_set_f_namelen(&buf->f_namelen, buf->f_namelen, + &ecryptfs_superblock_to_private(dentry->d_sb)->mount_crypt_stat); + + return rc; } /** -- cgit From 545d680938be1e86a6c5250701ce9abaf360c495 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 7 Feb 2012 17:55:40 -0600 Subject: eCryptfs: Copy up lower inode attrs after setting lower xattr After passing through a ->setxattr() call, eCryptfs needs to copy the inode attributes from the lower inode to the eCryptfs inode, as they may have changed in the lower filesystem's ->setxattr() path. One example is if an extended attribute containing a POSIX Access Control List is being set. The new ACL may cause the lower filesystem to modify the mode of the lower inode and the eCryptfs inode would need to be updated to reflect the new mode. https://launchpad.net/bugs/926292 Signed-off-by: Tyler Hicks Reported-by: Sebastien Bacher Cc: John Johansen Cc: --- fs/ecryptfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 19892d7d2ed1..ab35b113003b 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1085,6 +1085,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, } rc = vfs_setxattr(lower_dentry, name, value, size, flags); + if (!rc) + fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode); out: return rc; } -- cgit From 465c9343c5b746ec2325a220fa3e50cc647d2db7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 10 Feb 2012 13:39:50 +0800 Subject: ecryptfs: remove the second argument of k[un]map_atomic() Signed-off-by: Cong Wang Signed-off-by: Tyler Hicks --- fs/ecryptfs/mmap.c | 4 ++-- fs/ecryptfs/read_write.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 10ec695ccd68..a46b3a8fee1e 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -150,7 +150,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, /* This is a header extent */ char *page_virt; - page_virt = kmap_atomic(page, KM_USER0); + page_virt = kmap_atomic(page); memset(page_virt, 0, PAGE_CACHE_SIZE); /* TODO: Support more than one header extent */ if (view_extent_num == 0) { @@ -163,7 +163,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, crypt_stat, &written); } - kunmap_atomic(page_virt, KM_USER0); + kunmap_atomic(page_virt); flush_dcache_page(page); if (rc) { printk(KERN_ERR "%s: Error reading xattr " diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 5c0106f75775..b2a34a192f4f 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -156,7 +156,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, ecryptfs_page_idx, rc); goto out; } - ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); + ecryptfs_page_virt = kmap_atomic(ecryptfs_page); /* * pos: where we're now writing, offset: where the request was @@ -179,7 +179,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, (data + data_offset), num_bytes); data_offset += num_bytes; } - kunmap_atomic(ecryptfs_page_virt, KM_USER0); + kunmap_atomic(ecryptfs_page_virt); flush_dcache_page(ecryptfs_page); SetPageUptodate(ecryptfs_page); unlock_page(ecryptfs_page); -- cgit