From 32b4560b04af6e4fee241ea6de6db780eaf354f2 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Mon, 30 Jul 2012 14:39:10 -0700 Subject: ntfs: remove references to long gone super operations and unimplemented methods ->delete_inode(), ->write_super_lockfs(), ->unlockfs() are gone so remove refereces to them in the NTFS code. Remove unnecessary comments about unimplemented methods while at it (suggested by Christoph Hellwig). Noticed while cleaning up the fsfreeze mess. Signed-off-by: Fernando Luis Vazquez Cao Cc: Anton Altaparmakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ntfs/super.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index b341492542ca..2bc149d6a784 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -2660,31 +2660,14 @@ static const struct super_operations ntfs_sops = { .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */ .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */ #ifdef NTFS_RW - //.dirty_inode = NULL, /* VFS: Called from - // __mark_inode_dirty(). */ .write_inode = ntfs_write_inode, /* VFS: Write dirty inode to disk. */ - //.drop_inode = NULL, /* VFS: Called just after the - // inode reference count has - // been decreased to zero. - // NOTE: The inode lock is - // held. See fs/inode.c:: - // generic_drop_inode(). */ - //.delete_inode = NULL, /* VFS: Delete inode from disk. - // Called when i_count becomes - // 0 and i_nlink is also 0. */ - //.write_super = NULL, /* Flush dirty super block to - // disk. */ - //.sync_fs = NULL, /* ? */ - //.write_super_lockfs = NULL, /* ? */ - //.unlockfs = NULL, /* ? */ #endif /* NTFS_RW */ .put_super = ntfs_put_super, /* Syscall: umount. */ .statfs = ntfs_statfs, /* Syscall: statfs */ .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ .evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is removed from memory. */ - //.umount_begin = NULL, /* Forced umount. */ .show_options = ntfs_show_options, /* Show mount options in proc. */ }; -- cgit From 779302e67835fe9a6b74327e54969ba59cb3478a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 30 Jul 2012 14:39:13 -0700 Subject: fs/xattr.c:getxattr(): improve handling of allocation failures This allocation can be as large as 64k. - Add __GFP_NOWARN so the falied kmalloc() is silent - Fall back to vmalloc() if the kmalloc() failed Signed-off-by: Sasha Levin Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/xattr.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xattr.c b/fs/xattr.c index 1d7ac3790458..4d45b7189e7e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -427,6 +427,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, { ssize_t error; void *kvalue = NULL; + void *vvalue = NULL; char kname[XATTR_NAME_MAX + 1]; error = strncpy_from_user(kname, name, sizeof(kname)); @@ -438,9 +439,13 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, if (size) { if (size > XATTR_SIZE_MAX) size = XATTR_SIZE_MAX; - kvalue = kzalloc(size, GFP_KERNEL); - if (!kvalue) - return -ENOMEM; + kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!kvalue) { + vvalue = vmalloc(size); + if (!vvalue) + return -ENOMEM; + kvalue = vvalue; + } } error = vfs_getxattr(d, kname, kvalue, size); @@ -452,7 +457,10 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, than XATTR_SIZE_MAX bytes. Not possible. */ error = -E2BIG; } - kfree(kvalue); + if (vvalue) + vfree(vvalue); + else + kfree(kvalue); return error; } -- cgit From 9520628e8ceb69fa9a4aee6b57f22675d9e1b709 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Jul 2012 14:39:15 -0700 Subject: fs: make dumpable=2 require fully qualified path When the suid_dumpable sysctl is set to "2", and there is no core dump pipe defined in the core_pattern sysctl, a local user can cause core files to be written to root-writable directories, potentially with user-controlled content. This means an admin can unknowningly reintroduce a variation of CVE-2006-2451, allowing local users to gain root privileges. $ cat /proc/sys/fs/suid_dumpable 2 $ cat /proc/sys/kernel/core_pattern core $ ulimit -c unlimited $ cd / $ ls -l core ls: cannot access core: No such file or directory $ touch core touch: cannot touch `core': Permission denied $ OHAI="evil-string-here" ping localhost >/dev/null 2>&1 & $ pid=$! $ sleep 1 $ kill -SEGV $pid $ ls -l core -rw------- 1 root kees 458752 Jun 21 11:35 core $ sudo strings core | grep evil OHAI=evil-string-here While cron has been fixed to abort reading a file when there is any parse error, there are still other sensitive directories that will read any file present and skip unparsable lines. Instead of introducing a suid_dumpable=3 mode and breaking all users of mode 2, this only disables the unsafe portion of mode 2 (writing to disk via relative path). Most users of mode 2 (e.g. Chrome OS) already use a core dump pipe handler, so this change will not break them. For the situations where a pipe handler is not defined but mode 2 is still active, crash dumps will only be written to fully qualified paths. If a relative path is defined (e.g. the default "core" pattern), dump attempts will trigger a printk yelling about the lack of a fully qualified path. Signed-off-by: Kees Cook Cc: Alexander Viro Cc: Alan Cox Cc: "Eric W. Biederman" Cc: Doug Ledford Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index e95aeeddd25c..95aae3f9c036 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2111,6 +2111,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) int retval = 0; int flag = 0; int ispipe; + bool need_nonrelative = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { .signr = signr, @@ -2136,14 +2137,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (!cred) goto fail; /* - * We cannot trust fsuid as being the "true" uid of the - * process nor do we know its entire history. We only know it - * was tainted so we dump it as root in mode 2. + * We cannot trust fsuid as being the "true" uid of the process + * nor do we know its entire history. We only know it was tainted + * so we dump it as root in mode 2, and only into a controlled + * environment (pipe handler or fully qualified path). */ if (__get_dumpable(cprm.mm_flags) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ + need_nonrelative = true; } retval = coredump_wait(exit_code, &core_state); @@ -2223,6 +2226,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (cprm.limit < binfmt->min_coredump) goto fail_unlock; + if (need_nonrelative && cn.corename[0] != '/') { + printk(KERN_WARNING "Pid %d(%s) can only dump core "\ + "to fully qualified path!\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_unlock; + } + cprm.file = filp_open(cn.corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); -- cgit From 54b501992dd2a839e94e76aa392c392b55080ce8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Jul 2012 14:39:18 -0700 Subject: coredump: warn about unsafe suid_dumpable / core_pattern combo When suid_dumpable=2, detect unsafe core_pattern settings and warn when they are seen. Signed-off-by: Kees Cook Suggested-by: Andrew Morton Cc: Alexander Viro Cc: Alan Cox Cc: "Eric W. Biederman" Cc: Doug Ledford Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 95aae3f9c036..5af8390e0fae 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2002,17 +2002,17 @@ static void coredump_finish(struct mm_struct *mm) void set_dumpable(struct mm_struct *mm, int value) { switch (value) { - case 0: + case SUID_DUMPABLE_DISABLED: clear_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case 1: + case SUID_DUMPABLE_ENABLED: set_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case 2: + case SUID_DUMPABLE_SAFE: set_bit(MMF_DUMP_SECURELY, &mm->flags); smp_wmb(); set_bit(MMF_DUMPABLE, &mm->flags); @@ -2025,7 +2025,7 @@ static int __get_dumpable(unsigned long mm_flags) int ret; ret = mm_flags & MMF_DUMPABLE_MASK; - return (ret >= 2) ? 2 : ret; + return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret; } int get_dumpable(struct mm_struct *mm) @@ -2142,7 +2142,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) * so we dump it as root in mode 2, and only into a controlled * environment (pipe handler or fully qualified path). */ - if (__get_dumpable(cprm.mm_flags) == 2) { + if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ -- cgit From 533574c6bc30cf526cc1c41bde050c854a945efb Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 30 Jul 2012 14:40:13 -0700 Subject: btrfs: use printk_get_level and printk_skip_level, add __printf, fix fallout Use the generic printk_get_level() to search a message for a kern_level. Add __printf to verify format and arguments. Fix a few messages that had mismatches in format and arguments. Add #ifdef CONFIG_PRINTK blocks to shrink the object size a bit when not using printk. [akpm@linux-foundation.org: whitespace tweak] Signed-off-by: Joe Perches Cc: Kay Sievers Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/ctree.h | 13 +++++++++++++ fs/btrfs/disk-io.c | 2 +- fs/btrfs/relocation.c | 2 +- fs/btrfs/super.c | 41 +++++++++++++++++++++++++++++++++++------ 4 files changed, 50 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index adb1cd7ceb9b..4bab807227ad 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3342,10 +3342,22 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* super.c */ int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); + +#ifdef CONFIG_PRINTK +__printf(2, 3) void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...); +#else +static inline __printf(2, 3) +void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) +{ +} +#endif + +__printf(5, 6) void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); + void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *function, unsigned int line, int errno); @@ -3386,6 +3398,7 @@ do { \ (errno), fmt, ##args); \ } while (0) +__printf(5, 6) void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 502b20c56e84..fadeba6a5db9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1114,7 +1114,7 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, spin_unlock(&root->fs_info->delalloc_lock); btrfs_panic(root->fs_info, -EOVERFLOW, "Can't clear %lu bytes from " - " dirty_mdatadata_bytes (%lu)", + " dirty_mdatadata_bytes (%llu)", buf->len, root->fs_info->dirty_metadata_bytes); } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c5dbd9149679..4da08652004d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1241,7 +1241,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) if (rb_node) { btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " "for start=%llu while inserting into relocation " - "tree\n"); + "tree\n", node->bytenr); kfree(node); return -EEXIST; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fa61ef59cd61..8c6e61d6eed5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -125,6 +125,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) } } +#ifdef CONFIG_PRINTK /* * __btrfs_std_error decodes expected errors from the caller and * invokes the approciate error response. @@ -167,7 +168,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, va_end(args); } -const char *logtypes[] = { +static const char * const logtypes[] = { "emergency", "alert", "critical", @@ -185,22 +186,50 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) struct va_format vaf; va_list args; const char *type = logtypes[4]; + int kern_level; va_start(args, fmt); - if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { - memcpy(lvl, fmt, 3); - lvl[3] = '\0'; - fmt += 3; - type = logtypes[fmt[1] - '0']; + kern_level = printk_get_level(fmt); + if (kern_level) { + size_t size = printk_skip_level(fmt) - fmt; + memcpy(lvl, fmt, size); + lvl[size] = '\0'; + fmt += size; + type = logtypes[kern_level - '0']; } else *lvl = '\0'; vaf.fmt = fmt; vaf.va = &args; + printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf); + + va_end(args); } +#else + +void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, + unsigned int line, int errno, const char *fmt, ...) +{ + struct super_block *sb = fs_info->sb; + + /* + * Special case: if the error is EROFS, and we're already + * under MS_RDONLY, then it is safe here. + */ + if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) + return; + + /* Don't go through full error handling during mount */ + if (sb->s_flags & MS_BORN) { + save_error_info(fs_info); + btrfs_handle_error(fs_info); + } +} +#endif + /* * We only mark the transaction aborted and then set the file system read-only. * This will prevent new transactions from starting or trying to join this -- cgit From 9b58f6d4aaef070bf6e0744713b6d2b6fc1b3578 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:40:57 -0700 Subject: qnx4fs: use memweight() Use memweight() to count the total number of bits clear in memory area. Note that this memweight() call can't be replaced with a single bitmap_weight() call, although the pointer to the memory area is aligned to long-word boundary. Because the size of the memory area may not be a multiple of BITS_PER_LONG, then it returns wrong value on big-endian architecture. Signed-off-by: Akinobu Mita Acked-by: Anders Larsen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/qnx4/bitmap.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index 22e0d60e53ef..76a7a697b778 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c @@ -17,23 +17,6 @@ #include #include "qnx4.h" -static void count_bits(register const char *bmPart, register int size, - int *const tf) -{ - char b; - int tot = *tf; - - if (size > QNX4_BLOCK_SIZE) { - size = QNX4_BLOCK_SIZE; - } - do { - b = *bmPart++; - tot += 8 - hweight8(b); - size--; - } while (size != 0); - *tf = tot; -} - unsigned long qnx4_count_free_blocks(struct super_block *sb) { int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1; @@ -44,13 +27,16 @@ unsigned long qnx4_count_free_blocks(struct super_block *sb) struct buffer_head *bh; while (total < size) { + int bytes = min(size - total, QNX4_BLOCK_SIZE); + if ((bh = sb_bread(sb, start + offset)) == NULL) { printk(KERN_ERR "qnx4: I/O error in counting free blocks\n"); break; } - count_bits(bh->b_data, size - total, &total_free); + total_free += bytes * BITS_PER_BYTE - + memweight(bh->b_data, bytes); brelse(bh); - total += QNX4_BLOCK_SIZE; + total += bytes; offset++; } -- cgit From 0121ad62c20ed779e38ad689071da2805f03249f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:00 -0700 Subject: affs: use memweight() Use memweight() to count the total number of bits set in memory area. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/affs/bitmap.c | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 6e0be43ef6ef..a32246b8359e 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -10,30 +10,6 @@ #include #include "affs.h" -/* This is, of course, shamelessly stolen from fs/minix */ - -static const int nibblemap[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 }; - -static u32 -affs_count_free_bits(u32 blocksize, const void *data) -{ - const u32 *map; - u32 free; - u32 tmp; - - map = data; - free = 0; - for (blocksize /= 4; blocksize > 0; blocksize--) { - tmp = *map++; - while (tmp) { - free += nibblemap[tmp & 0xf]; - tmp >>= 4; - } - } - - return free; -} - u32 affs_count_free_blocks(struct super_block *sb) { @@ -317,7 +293,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags) goto out; } pr_debug("AFFS: read bitmap block %d: %d\n", blk, bm->bm_key); - bm->bm_free = affs_count_free_bits(sb->s_blocksize - 4, bh->b_data + 4); + bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4); /* Don't try read the extension if this is the last block, * but we also need the right bm pointer below @@ -367,7 +343,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags) /* recalculate bitmap count for last block */ bm--; - bm->bm_free = affs_count_free_bits(sb->s_blocksize - 4, bh->b_data + 4); + bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4); out: affs_brelse(bh); -- cgit From a75613ec73ec87726a81fe421385a13c25fdcfc4 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:03 -0700 Subject: ocfs2: use memweight() Use memweight to count the total number of bits set in memory area. Signed-off-by: Akinobu Mita Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/localalloc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 210c35237548..a9f78c74d687 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -784,14 +784,10 @@ bail: static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) { - int i; - u8 *buffer; - u32 count = 0; + u32 count; struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); - buffer = la->la_bitmap; - for (i = 0; i < le16_to_cpu(la->la_size); i++) - count += hweight8(buffer[i]); + count = memweight(la->la_bitmap, le16_to_cpu(la->la_size)); trace_ocfs2_local_alloc_count_bits(count); return count; -- cgit From ecd0afa3ced0ebf36901b53fd9ee431f8a34a161 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:05 -0700 Subject: ext2: use memweight() Convert ext2_count_free() to use memweight() instead of table lookup based counting clear bits implementation. This change only affects the code segments enabled by EXT2FS_DEBUG. Note that this memweight() call can't be replaced with a single bitmap_weight() call, although the pointer to the memory area is aligned to long-word boundary. Because the size of the memory area may not be a multiple of BITS_PER_LONG, then it returns wrong value on big-endian architecture. This also includes the following changes. - Remove unnecessary map == NULL check in ext2_count_free() which always takes non-null pointer as the memory area. - Fix printk format warning that only reveals with EXT2FS_DEBUG. Signed-off-by: Akinobu Mita Acked-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/balloc.c | 14 ++------------ fs/ext2/ialloc.c | 1 + 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 1c3613998862..376aa77f3ca7 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1444,19 +1444,9 @@ ext2_fsblk_t ext2_new_block(struct inode *inode, unsigned long goal, int *errp) #ifdef EXT2FS_DEBUG -static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - -unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) +unsigned long ext2_count_free(struct buffer_head *map, unsigned int numchars) { - unsigned int i; - unsigned long sum = 0; - - if (!map) - return (0); - for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; - return (sum); + return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars); } #endif /* EXT2FS_DEBUG */ diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index c13eb7b91a11..8f370e012e61 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -644,6 +644,7 @@ unsigned long ext2_count_free_inodes (struct super_block * sb) } brelse(bitmap_bh); printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", + (unsigned long) percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter), desc_count, bitmap_count); return desc_count; -- cgit From 10d470849a7c6dd360e8ad4770160ad7af9adb4b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:06 -0700 Subject: ext3: use memweight() Convert ext3_count_free() to use memweight() instead of table lookup based counting clear bits implementation. This change only affects the code segments enabled by EXT3FS_DEBUG. Note that this memweight() call can't be replaced with a single bitmap_weight() call, although the pointer to the memory area is aligned to long-word boundary. Because the size of the memory area may not be a multiple of BITS_PER_LONG, then it returns wrong value on big-endian architecture. This also includes the following changes. - Remove unnecessary map == NULL check in ext3_count_free() which always takes non-null pointer as the memory area. - Fix printk format warning that only reveals with EXT3FS_DEBUG. Signed-off-by: Akinobu Mita Acked-by: Jan Kara Cc: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/balloc.c | 2 +- fs/ext3/bitmap.c | 12 +----------- 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 25cd60892116..90d901f0486b 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -1813,7 +1813,7 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) brelse(bitmap_bh); printk("ext3_count_free_blocks: stored = "E3FSBLK ", computed = "E3FSBLK", "E3FSBLK"\n", - le32_to_cpu(es->s_free_blocks_count), + (ext3_fsblk_t)le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count); return bitmap_count; #else diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c index 909d13e26560..ef9c643e8e9d 100644 --- a/fs/ext3/bitmap.c +++ b/fs/ext3/bitmap.c @@ -11,19 +11,9 @@ #ifdef EXT3FS_DEBUG -static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars) { - unsigned int i; - unsigned long sum = 0; - - if (!map) - return (0); - for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; - return (sum); + return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars); } #endif /* EXT3FS_DEBUG */ -- cgit From 6017b485caeae5915956190b4f3d8307021e785d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:41:08 -0700 Subject: ext4: use memweight() Convert ext4_count_free() to use memweight() instead of table lookup based counting clear bits implementation. This change only affects the code segments enabled by EXT4FS_DEBUG. Note that this memweight() call can't be replaced with a single bitmap_weight() call, although the pointer to the memory area is aligned to long-word boundary. Because the size of the memory area may not be a multiple of BITS_PER_LONG, then it returns wrong value on big-endian architecture. This also includes the following change. - Remove unnecessary map == NULL check in ext4_count_free() which always takes non-null pointer as the memory area. Signed-off-by: Akinobu Mita Cc: "Theodore Ts'o" Cc: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/bitmap.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index a94b9c63ee5c..f8716eab9995 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -11,16 +11,9 @@ #include #include "ext4.h" -static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - unsigned int ext4_count_free(char *bitmap, unsigned int numchars) { - unsigned int i, sum = 0; - - for (i = 0; i < numchars; i++) - sum += nibblemap[bitmap[i] & 0xf] + - nibblemap[(bitmap[i] >> 4) & 0xf]; - return sum; + return numchars * BITS_PER_BYTE - memweight(bitmap, numchars); } int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, -- cgit From 6ed6a722f9abac25b0549e7507a2b745ede4475c Mon Sep 17 00:00:00 2001 From: Vladimir Serbinenko Date: Mon, 30 Jul 2012 14:42:00 -0700 Subject: minixfs: fix block limit check On minix2 and minix3 usually max_size is 7fffffff and the check in question prohibits creation of last block spanning right before 7fffffff, due to downward rounding during the division. Fix it by using multiplication instead. [akpm@linux-foundation.org: fix up code layout, use local `sb'] Signed-off-by: Vladimir Serbinenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/minix/itree_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index 13487ad16894..78e2d93e5c83 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c @@ -32,7 +32,8 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %s\n", block, bdevname(sb->s_bdev, b)); - } else if (block >= (minix_sb(inode->i_sb)->s_max_size/sb->s_blocksize)) { + } else if ((u64)block * (u64)sb->s_blocksize >= + minix_sb(sb)->s_max_size) { if (printk_ratelimit()) printk("MINIX-fs: block_to_path: " "block %ld too big on dev %s\n", -- cgit From 6b0f3393e38584ec22bab62fe01df58ae5a73ee7 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Mon, 30 Jul 2012 14:42:02 -0700 Subject: nilfs2: add omitted comment for ns_mount_state field of the_nilfs structure Add omitted comment for ns_mount_state field of the_nilfs structure. Signed-off-by: Vyacheslav Dubeyko Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/the_nilfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 9992b11312ff..ef40a510e2f3 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -52,6 +52,7 @@ enum { * @ns_sbwtime: previous write time of super block * @ns_sbwcount: write count of super block * @ns_sbsize: size of valid data in super block + * @ns_mount_state: file system state * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. * @ns_nextnum: index number of the full segment index to be used next -- cgit From 278038ac53c6c4f53d1d34f978beb9aba1410b2c Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Mon, 30 Jul 2012 14:42:03 -0700 Subject: nilfs2: remove references to long gone super operations ->delete_inode(), ->write_super_lockfs(), ->unlockfs() are gone so remove references to them in the NTFS code. Noticed while cleaning up the fsfreeze mess. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index d57c42f974ea..a76d6ea51ffb 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -677,7 +677,6 @@ static const struct super_operations nilfs_sops = { .destroy_inode = nilfs_destroy_inode, .dirty_inode = nilfs_dirty_inode, /* .write_inode = nilfs_write_inode, */ - /* .put_inode = nilfs_put_inode, */ /* .drop_inode = nilfs_drop_inode, */ .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, @@ -685,8 +684,6 @@ static const struct super_operations nilfs_sops = { .sync_fs = nilfs_sync_fs, .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, - /* .write_super_lockfs */ - /* .unlockfs */ .statfs = nilfs_statfs, .remount_fs = nilfs_remount, /* .umount_begin */ -- cgit From fe0627e7b3d32a41c16fac6e0af091991545865e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 30 Jul 2012 14:42:05 -0700 Subject: nilfs2: fix timing issue between rmcp and chcp ioctls The checkpoint deletion ioctl (rmcp ioctl) has potential for breaking snapshot because it is not fully exclusive with checkpoint mode change ioctl (chcp ioctl). The rmcp ioctl first tests if the specified checkpoint is a snapshot or not within nilfs_cpfile_delete_checkpoint function, and then calls nilfs_cpfile_delete_checkpoints function to actually invalidate the checkpoint only if it's not a snapshot. However, the checkpoint can be changed into a snapshot by the chcp ioctl between these two operations. In that case, calling nilfs_cpfile_delete_checkpoints() wrongly invalidates the snapshot, which leads to snapshot list corruption and snapshot count mismatch. This fixes the issue by changing nilfs_cpfile_delete_checkpoints() so that it reconfirms the target checkpoints are snapshot or not. This second check is exclusive with the chcp operation since it is protected by an existing semaphore. Signed-off-by: Ryusuke Konishi Cc: Fernando Luis Vazquez Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/cpfile.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index dab5c4c6dfaf..deaa3d33a0aa 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -286,7 +286,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, __u64 cno; void *kaddr; unsigned long tnicps; - int ret, ncps, nicps, count, i; + int ret, ncps, nicps, nss, count, i; if (unlikely(start == 0 || start > end)) { printk(KERN_ERR "%s: invalid range of checkpoint numbers: " @@ -301,6 +301,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, if (ret < 0) goto out_sem; tnicps = 0; + nss = 0; for (cno = start; cno < end; cno += ncps) { ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end); @@ -318,8 +319,9 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, cpfile, cno, cp_bh, kaddr); nicps = 0; for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { - WARN_ON(nilfs_checkpoint_snapshot(cp)); - if (!nilfs_checkpoint_invalid(cp)) { + if (nilfs_checkpoint_snapshot(cp)) { + nss++; + } else if (!nilfs_checkpoint_invalid(cp)) { nilfs_checkpoint_set_invalid(cp); nicps++; } @@ -364,6 +366,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, } brelse(header_bh); + if (nss > 0) + ret = -EBUSY; out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); -- cgit From 572d8b3945a31bee7c40d21556803e4807fd9141 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 30 Jul 2012 14:42:07 -0700 Subject: nilfs2: fix deadlock issue between chcp and thaw ioctls An fs-thaw ioctl causes deadlock with a chcp or mkcp -s command: chcp D ffff88013870f3d0 0 1325 1324 0x00000004 ... Call Trace: nilfs_transaction_begin+0x11c/0x1a0 [nilfs2] wake_up_bit+0x20/0x20 copy_from_user+0x18/0x30 [nilfs2] nilfs_ioctl_change_cpmode+0x7d/0xcf [nilfs2] nilfs_ioctl+0x252/0x61a [nilfs2] do_page_fault+0x311/0x34c get_unmapped_area+0x132/0x14e do_vfs_ioctl+0x44b/0x490 __set_task_blocked+0x5a/0x61 vm_mmap_pgoff+0x76/0x87 __set_current_blocked+0x30/0x4a sys_ioctl+0x4b/0x6f system_call_fastpath+0x16/0x1b thaw D ffff88013870d890 0 1352 1351 0x00000004 ... Call Trace: rwsem_down_failed_common+0xdb/0x10f call_rwsem_down_write_failed+0x13/0x20 down_write+0x25/0x27 thaw_super+0x13/0x9e do_vfs_ioctl+0x1f5/0x490 vm_mmap_pgoff+0x76/0x87 sys_ioctl+0x4b/0x6f filp_close+0x64/0x6c system_call_fastpath+0x16/0x1b where the thaw ioctl deadlocked at thaw_super() when called while chcp was waiting at nilfs_transaction_begin() called from nilfs_ioctl_change_cpmode(). This deadlock is 100% reproducible. This is because nilfs_ioctl_change_cpmode() first locks sb->s_umount in read mode and then waits for unfreezing in nilfs_transaction_begin(), whereas thaw_super() locks sb->s_umount in write mode. The locking of sb->s_umount here was intended to make snapshot mounts and the downgrade of snapshots to checkpoints exclusive. This fixes the deadlock issue by replacing the sb->s_umount usage in nilfs_ioctl_change_cpmode() with a dedicated mutex which protects snapshot mounts. Signed-off-by: Ryusuke Konishi Cc: Fernando Luis Vazquez Cao Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ioctl.c | 4 ++-- fs/nilfs2/super.c | 3 +++ fs/nilfs2/the_nilfs.c | 1 + fs/nilfs2/the_nilfs.h | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 06658caa18bd..0b6387c67e6c 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -182,7 +182,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, if (copy_from_user(&cpmode, argp, sizeof(cpmode))) goto out; - down_read(&inode->i_sb->s_umount); + mutex_lock(&nilfs->ns_snapshot_mount_mutex); nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_change_cpmode( @@ -192,7 +192,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, else nilfs_transaction_commit(inode->i_sb); /* never fails */ - up_read(&inode->i_sb->s_umount); + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); out: mnt_drop_write_file(filp); return ret; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index a76d6ea51ffb..6522cac6057c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -945,6 +945,8 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, struct nilfs_root *root; int ret; + mutex_lock(&nilfs->ns_snapshot_mount_mutex); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); up_read(&nilfs->ns_segctor_sem); @@ -969,6 +971,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, ret = nilfs_get_root_dentry(s, root, root_dentry); nilfs_put_root(root); out: + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); return ret; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 501b7f8b739f..41e6a04a561f 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -76,6 +76,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) nilfs->ns_bdev = bdev; atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); + mutex_init(&nilfs->ns_snapshot_mount_mutex); INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index ef40a510e2f3..2558f320b821 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -47,6 +47,7 @@ enum { * @ns_flags: flags * @ns_bdev: block device * @ns_sem: semaphore for shared states + * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data * @ns_sbwtime: previous write time of super block @@ -100,6 +101,7 @@ struct the_nilfs { struct block_device *ns_bdev; struct rw_semaphore ns_sem; + struct mutex ns_snapshot_mount_mutex; /* * used for -- cgit From f5974c8f8cf431baf44e7127b669e3b1960f184f Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Mon, 30 Jul 2012 14:42:10 -0700 Subject: nilfs2: add omitted comments for different structures in driver implementation Add omitted comments for different structures in driver implementation. Signed-off-by: Vyacheslav Dubeyko Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.h | 14 +++++++++++--- fs/nilfs2/bmap.h | 7 +++++++ fs/nilfs2/btnode.h | 8 +++++++- fs/nilfs2/dat.c | 6 ++++++ fs/nilfs2/export.h | 8 ++++++++ fs/nilfs2/ifile.c | 6 +++++- fs/nilfs2/inode.c | 7 +++++++ fs/nilfs2/mdt.h | 7 +++++++ fs/nilfs2/nilfs.h | 17 +++++++++++++++-- fs/nilfs2/sufile.c | 8 +++++++- fs/nilfs2/the_nilfs.h | 3 +-- 11 files changed, 81 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index f5fde36b9e28..fb7238100548 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -76,15 +76,23 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_clear_bit_atomic ext2_clear_bit_atomic #define nilfs_find_next_zero_bit find_next_zero_bit_le -/* - * persistent object allocator cache +/** + * struct nilfs_bh_assoc - block offset and buffer head association + * @blkoff: block offset + * @bh: buffer head */ - struct nilfs_bh_assoc { unsigned long blkoff; struct buffer_head *bh; }; +/** + * struct nilfs_palloc_cache - persistent object allocator cache + * @lock: cache protecting lock + * @prev_desc: blockgroup descriptors cache + * @prev_bitmap: blockgroup bitmap cache + * @prev_entry: translation entries cache + */ struct nilfs_palloc_cache { spinlock_t lock; struct nilfs_bh_assoc prev_desc; diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 40d9f453d31c..b89e68076adc 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -135,6 +135,13 @@ struct nilfs_bmap { /* state */ #define NILFS_BMAP_DIRTY 0x00000001 +/** + * struct nilfs_bmap_store - shadow copy of bmap state + * @data: cached raw block mapping of on-disk inode + * @last_allocated_key: cached value of last allocated key for data block + * @last_allocated_ptr: cached value of last allocated ptr for data block + * @state: cached value of state field of bmap structure + */ struct nilfs_bmap_store { __le64 data[NILFS_BMAP_SIZE / sizeof(__le64)]; __u64 last_allocated_key; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 3a4dd2d8d3fc..d876b565ce64 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -29,7 +29,13 @@ #include #include - +/** + * struct nilfs_btnode_chkey_ctxt - change key context + * @oldkey: old key of block's moving content + * @newkey: new key for block's content + * @bh: buffer head of old buffer + * @newbh: buffer head of new buffer + */ struct nilfs_btnode_chkey_ctxt { __u64 oldkey; __u64 newkey; diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index b5c13f3576b9..fa0f80308c2d 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -33,6 +33,12 @@ #define NILFS_CNO_MIN ((__u64)1) #define NILFS_CNO_MAX (~(__u64)0) +/** + * struct nilfs_dat_info - on-memory private data of DAT file + * @mi: on-memory private data of metadata file + * @palloc_cache: persistent object allocator cache of DAT file + * @shadow: shadow map of DAT file + */ struct nilfs_dat_info { struct nilfs_mdt_info mi; struct nilfs_palloc_cache palloc_cache; diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h index a71cc412b651..19ccbf9522ab 100644 --- a/fs/nilfs2/export.h +++ b/fs/nilfs2/export.h @@ -5,6 +5,14 @@ extern const struct export_operations nilfs_export_ops; +/** + * struct nilfs_fid - NILFS file id type + * @cno: checkpoint number + * @ino: inode number + * @gen: file generation (version) for NFS + * @parent_gen: parent generation (version) for NFS + * @parent_ino: parent inode number + */ struct nilfs_fid { u64 cno; u64 ino; diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 5a48df79d674..d8e65bde083c 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -29,7 +29,11 @@ #include "alloc.h" #include "ifile.h" - +/** + * struct nilfs_ifile_info - on-memory private data of ifile + * @mi: on-memory private data of metadata file + * @palloc_cache: persistent object allocator cache of ifile + */ struct nilfs_ifile_info { struct nilfs_mdt_info mi; struct nilfs_palloc_cache palloc_cache; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7cc64465ec26..6e2c3db976b2 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -34,6 +34,13 @@ #include "cpfile.h" #include "ifile.h" +/** + * struct nilfs_iget_args - arguments used during comparison between inodes + * @ino: inode number + * @cno: checkpoint number + * @root: pointer on NILFS root object (mounted checkpoint) + * @for_gc: inode for GC flag + */ struct nilfs_iget_args { u64 ino; __u64 cno; diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index ab20a4baa50f..ab172e8549c5 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -28,6 +28,13 @@ #include "nilfs.h" #include "page.h" +/** + * struct nilfs_shadow_map - shadow mapping of meta data file + * @bmap_store: shadow copy of bmap state + * @frozen_data: shadowed dirty data pages + * @frozen_btnodes: shadowed dirty b-tree nodes' pages + * @frozen_buffers: list of frozen buffers + */ struct nilfs_shadow_map { struct nilfs_bmap_store bmap_store; struct address_space frozen_data; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 250add84da76..74cece80e9a3 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -32,8 +32,21 @@ #include "the_nilfs.h" #include "bmap.h" -/* - * nilfs inode data in memory +/** + * struct nilfs_inode_info - nilfs inode data in memory + * @i_flags: inode flags + * @i_state: dynamic state flags + * @i_bmap: pointer on i_bmap_data + * @i_bmap_data: raw block mapping + * @i_xattr: + * @i_dir_start_lookup: page index of last successful search + * @i_cno: checkpoint number for GC inode + * @i_btnode_cache: cached pages of b-tree nodes + * @i_dirty: list for connecting dirty files + * @xattr_sem: semaphore for extended attributes processing + * @i_bh: buffer contains disk inode + * @i_root: root object of the current filesystem tree + * @vfs_inode: VFS inode object */ struct nilfs_inode_info { __u32 i_flags; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index c5b7653a4391..3127e9f438a7 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -30,7 +30,13 @@ #include "mdt.h" #include "sufile.h" - +/** + * struct nilfs_sufile_info - on-memory private data of sufile + * @mi: on-memory private data of metadata file + * @ncleansegs: number of clean segments + * @allocmin: lower limit of allocatable segment range + * @allocmax: upper limit of allocatable segment range + */ struct nilfs_sufile_info { struct nilfs_mdt_info mi; unsigned long ncleansegs;/* number of clean segments */ diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 2558f320b821..6eee4177807b 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -232,9 +232,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty) * @count: refcount of this structure * @nilfs: nilfs object * @ifile: inode file - * @root: root inode * @inodes_count: number of inodes - * @blocks_count: number of blocks (Reserved) + * @blocks_count: number of blocks */ struct nilfs_root { __u64 cno; -- cgit From 497d48bd27ec1c44b4600e8e98a776188f2e11f2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 30 Jul 2012 14:42:11 -0700 Subject: hfsplus: use -ENOMEM when kzalloc() fails Use -ENOMEM return value instead of -EINVAL when kzalloc() fails. Signed-off-by: Namjae Jeon Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 473332098013..fdafb2d71654 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -365,7 +365,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) u64 last_fs_block, last_fs_page; int err; - err = -EINVAL; + err = -ENOMEM; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) goto out; -- cgit From a943ed71c9171fb5e3b256e8022bbedff95cc826 Mon Sep 17 00:00:00 2001 From: "Steven J. Magnani" Date: Mon, 30 Jul 2012 14:42:13 -0700 Subject: fat: accessors for msdos_dir_entry 'start' fields Simplify code by providing accessor functions for the directory entry start cluster fields. Signed-off-by: Steven J. Magnani Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 15 +++++++++++++++ fs/fat/inode.c | 12 +++--------- fs/fat/namei_msdos.c | 11 +++-------- fs/fat/namei_vfat.c | 11 +++-------- 4 files changed, 24 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/fat/fat.h b/fs/fat/fat.h index fc35c5c69136..2deeeb86f331 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -217,6 +217,21 @@ static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) #endif } +static inline int fat_get_start(const struct msdos_sb_info *sbi, + const struct msdos_dir_entry *de) +{ + int cluster = le16_to_cpu(de->start); + if (sbi->fat_bits == 32) + cluster |= (le16_to_cpu(de->starthi) << 16); + return cluster; +} + +static inline void fat_set_start(struct msdos_dir_entry *de, int cluster) +{ + de->start = cpu_to_le16(cluster); + de->starthi = cpu_to_le16(cluster >> 16); +} + static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) { #ifdef __BIG_ENDIAN diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0038b32cb362..05e897fe9866 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -369,10 +369,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; - MSDOS_I(inode)->i_start = le16_to_cpu(de->start); - if (sbi->fat_bits == 32) - MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); - + MSDOS_I(inode)->i_start = fat_get_start(sbi, de); MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start; error = fat_calc_dir_size(inode); if (error < 0) @@ -385,9 +382,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_mode = fat_make_mode(sbi, de->attr, ((sbi->options.showexec && !is_exec(de->name + 8)) ? S_IRUGO|S_IWUGO : S_IRWXUGO)); - MSDOS_I(inode)->i_start = le16_to_cpu(de->start); - if (sbi->fat_bits == 32) - MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); + MSDOS_I(inode)->i_start = fat_get_start(sbi, de); MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start; inode->i_size = le32_to_cpu(de->size); @@ -613,8 +608,7 @@ retry: else raw_entry->size = cpu_to_le32(inode->i_size); raw_entry->attr = fat_make_attrs(inode); - raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); - raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); + fat_set_start(raw_entry, MSDOS_I(inode)->i_logstart); fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, &raw_entry->date, NULL); if (sbi->options.isvfat) { diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 70d993a93805..b0e12bf9f4a1 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -246,8 +246,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, de.ctime_cs = 0; de.time = time; de.date = date; - de.start = cpu_to_le16(cluster); - de.starthi = cpu_to_le16(cluster >> 16); + fat_set_start(&de, cluster); de.size = 0; err = fat_add_entries(dir, &de, 1, sinfo); @@ -530,9 +529,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, mark_inode_dirty(old_inode); if (update_dotdot) { - int start = MSDOS_I(new_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); + fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); @@ -572,9 +569,7 @@ error_dotdot: corrupt = 1; if (update_dotdot) { - int start = MSDOS_I(old_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); + fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); corrupt |= sync_dirty_buffer(dotdot_bh); } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6cc480652433..6a6d8c0715a1 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -651,8 +651,7 @@ shortname: de->time = de->ctime = time; de->date = de->cdate = de->adate = date; de->ctime_cs = time_cs; - de->start = cpu_to_le16(cluster); - de->starthi = cpu_to_le16(cluster >> 16); + fat_set_start(de, cluster); de->size = 0; out_free: __putname(uname); @@ -965,9 +964,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, mark_inode_dirty(old_inode); if (update_dotdot) { - int start = MSDOS_I(new_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); + fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); @@ -1009,9 +1006,7 @@ error_dotdot: corrupt = 1; if (update_dotdot) { - int start = MSDOS_I(old_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); + fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); corrupt |= sync_dirty_buffer(dotdot_bh); } -- cgit From deb8274a0cf44827ec260330cc1d94d0f3dcfb94 Mon Sep 17 00:00:00 2001 From: "Steven J. Magnani" Date: Mon, 30 Jul 2012 14:42:16 -0700 Subject: fat: refactor shortname parsing Nearly identical shortname parsing is performed in fat_search_long() and __fat_readdir(). Extract this code into a function that may be called by both. Signed-off-by: Steven J. Magnani Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 255 ++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 137 insertions(+), 118 deletions(-) (limited to 'fs') diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 6eaa28c98ad1..dc49ed2cbffa 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -35,6 +35,11 @@ #define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) #define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) +static inline unsigned char fat_tolower(unsigned char c) +{ + return ((c >= 'A') && (c <= 'Z')) ? c+32 : c; +} + static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, struct msdos_dir_entry *de) @@ -333,6 +338,124 @@ parse_long: return 0; } +/** + * fat_parse_short - Parse MS-DOS (short) directory entry. + * @sb: superblock + * @de: directory entry to parse + * @name: FAT_MAX_SHORT_SIZE array in which to place extracted name + * @dot_hidden: Nonzero == prepend '.' to names with ATTR_HIDDEN + * + * Returns the number of characters extracted into 'name'. + */ +static int fat_parse_short(struct super_block *sb, + const struct msdos_dir_entry *de, + unsigned char *name, int dot_hidden) +{ + const struct msdos_sb_info *sbi = MSDOS_SB(sb); + int isvfat = sbi->options.isvfat; + int nocase = sbi->options.nocase; + unsigned short opt_shortname = sbi->options.shortname; + struct nls_table *nls_disk = sbi->nls_disk; + wchar_t uni_name[14]; + unsigned char c, work[MSDOS_NAME]; + unsigned char *ptname = name; + int chi, chl, i, j, k; + int dotoffset = 0; + int name_len = 0, uni_len = 0; + + if (!isvfat && dot_hidden && (de->attr & ATTR_HIDDEN)) { + *ptname++ = '.'; + dotoffset = 1; + } + + memcpy(work, de->name, sizeof(work)); + /* see namei.c, msdos_format_name */ + if (work[0] == 0x05) + work[0] = 0xE5; + + /* Filename */ + for (i = 0, j = 0; i < 8;) { + c = work[i]; + if (!c) + break; + chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, + &uni_name[j++], opt_shortname, + de->lcase & CASE_LOWER_BASE); + if (chl <= 1) { + if (!isvfat) + ptname[i] = nocase ? c : fat_tolower(c); + i++; + if (c != ' ') { + name_len = i; + uni_len = j; + } + } else { + uni_len = j; + if (isvfat) + i += min(chl, 8-i); + else { + for (chi = 0; chi < chl && i < 8; chi++, i++) + ptname[i] = work[i]; + } + if (chl) + name_len = i; + } + } + + i = name_len; + j = uni_len; + fat_short2uni(nls_disk, ".", 1, &uni_name[j++]); + if (!isvfat) + ptname[i] = '.'; + i++; + + /* Extension */ + for (k = 8; k < MSDOS_NAME;) { + c = work[k]; + if (!c) + break; + chl = fat_shortname2uni(nls_disk, &work[k], MSDOS_NAME - k, + &uni_name[j++], opt_shortname, + de->lcase & CASE_LOWER_EXT); + if (chl <= 1) { + k++; + if (!isvfat) + ptname[i] = nocase ? c : fat_tolower(c); + i++; + if (c != ' ') { + name_len = i; + uni_len = j; + } + } else { + uni_len = j; + if (isvfat) { + int offset = min(chl, MSDOS_NAME-k); + k += offset; + i += offset; + } else { + for (chi = 0; chi < chl && k < MSDOS_NAME; + chi++, i++, k++) { + ptname[i] = work[k]; + } + } + if (chl) + name_len = i; + } + } + + if (name_len > 0) { + name_len += dotoffset; + + if (sbi->options.isvfat) { + uni_name[uni_len] = 0x0000; + name_len = fat_uni_to_x8(sb, uni_name, name, + FAT_MAX_SHORT_SIZE); + } + } + + return name_len; +} + /* * Return values: negative -> error, 0 -> not found, positive -> found, * value is the total amount of slots, including the shortname entry. @@ -344,15 +467,11 @@ int fat_search_long(struct inode *inode, const unsigned char *name, struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh = NULL; struct msdos_dir_entry *de; - struct nls_table *nls_disk = sbi->nls_disk; unsigned char nr_slots; - wchar_t bufuname[14]; wchar_t *unicode = NULL; - unsigned char work[MSDOS_NAME]; unsigned char bufname[FAT_MAX_SHORT_SIZE]; - unsigned short opt_shortname = sbi->options.shortname; loff_t cpos = 0; - int chl, i, j, last_u, err, len; + int err, len; err = -ENOENT; while (1) { @@ -380,47 +499,16 @@ parse_record: goto end_of_dir; } - memcpy(work, de->name, sizeof(de->name)); - /* see namei.c, msdos_format_name */ - if (work[0] == 0x05) - work[0] = 0xE5; - for (i = 0, j = 0, last_u = 0; i < 8;) { - if (!work[i]) - break; - chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, - &bufuname[j++], opt_shortname, - de->lcase & CASE_LOWER_BASE); - if (chl <= 1) { - if (work[i] != ' ') - last_u = j; - } else { - last_u = j; - } - i += chl; - } - j = last_u; - fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); - for (i = 8; i < MSDOS_NAME;) { - if (!work[i]) - break; - chl = fat_shortname2uni(nls_disk, &work[i], - MSDOS_NAME - i, - &bufuname[j++], opt_shortname, - de->lcase & CASE_LOWER_EXT); - if (chl <= 1) { - if (work[i] != ' ') - last_u = j; - } else { - last_u = j; - } - i += chl; - } - if (!last_u) + /* Never prepend '.' to hidden files here. + * That is done only for msdos mounts (and only when + * 'dotsOK=yes'); if we are executing here, it is in the + * context of a vfat mount. + */ + len = fat_parse_short(sb, de, bufname, 0); + if (len == 0) continue; /* Compare shortname */ - bufuname[last_u] = 0x0000; - len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname)); if (fat_name_match(sbi, name, name_len, bufname, len)) goto found; @@ -469,20 +557,15 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh; struct msdos_dir_entry *de; - struct nls_table *nls_disk = sbi->nls_disk; unsigned char nr_slots; - wchar_t bufuname[14]; wchar_t *unicode = NULL; - unsigned char c, work[MSDOS_NAME]; - unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname; - unsigned short opt_shortname = sbi->options.shortname; + unsigned char bufname[FAT_MAX_SHORT_SIZE]; int isvfat = sbi->options.isvfat; - int nocase = sbi->options.nocase; const char *fill_name = NULL; unsigned long inum; unsigned long lpos, dummy, *furrfu = &lpos; loff_t cpos; - int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0; + int short_len = 0, fill_len = 0; int ret = 0; lock_super(sb); @@ -556,74 +639,10 @@ parse_record: } } - if (sbi->options.dotsOK) { - ptname = bufname; - dotoffset = 0; - if (de->attr & ATTR_HIDDEN) { - *ptname++ = '.'; - dotoffset = 1; - } - } - - memcpy(work, de->name, sizeof(de->name)); - /* see namei.c, msdos_format_name */ - if (work[0] == 0x05) - work[0] = 0xE5; - for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) { - if (!(c = work[i])) - break; - chl = fat_shortname2uni(nls_disk, &work[i], 8 - i, - &bufuname[j++], opt_shortname, - de->lcase & CASE_LOWER_BASE); - if (chl <= 1) { - ptname[i++] = (!nocase && c>='A' && c<='Z') ? c+32 : c; - if (c != ' ') { - last = i; - last_u = j; - } - } else { - last_u = j; - for (chi = 0; chi < chl && i < 8; chi++) { - ptname[i] = work[i]; - i++; last = i; - } - } - } - i = last; - j = last_u; - fat_short2uni(nls_disk, ".", 1, &bufuname[j++]); - ptname[i++] = '.'; - for (i2 = 8; i2 < MSDOS_NAME;) { - if (!(c = work[i2])) - break; - chl = fat_shortname2uni(nls_disk, &work[i2], MSDOS_NAME - i2, - &bufuname[j++], opt_shortname, - de->lcase & CASE_LOWER_EXT); - if (chl <= 1) { - i2++; - ptname[i++] = (!nocase && c>='A' && c<='Z') ? c+32 : c; - if (c != ' ') { - last = i; - last_u = j; - } - } else { - last_u = j; - for (chi = 0; chi < chl && i2 < MSDOS_NAME; chi++) { - ptname[i++] = work[i2++]; - last = i; - } - } - } - if (!last) + short_len = fat_parse_short(sb, de, bufname, sbi->options.dotsOK); + if (short_len == 0) goto record_end; - i = last + dotoffset; - j = last_u; - - if (isvfat) { - bufuname[j] = 0x0000; - i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname)); - } if (nr_slots) { /* hack for fat_ioctl_filldir() */ struct fat_ioctl_filldir_callback *p = dirent; @@ -631,12 +650,12 @@ parse_record: p->longname = fill_name; p->long_len = fill_len; p->shortname = bufname; - p->short_len = i; + p->short_len = short_len; fill_name = NULL; fill_len = 0; } else { fill_name = bufname; - fill_len = i; + fill_len = short_len; } start_filldir: -- cgit From 108ceeb020bb3558fe175a3fc8b60fd6c1a2a279 Mon Sep 17 00:00:00 2001 From: Jovi Zhang Date: Mon, 30 Jul 2012 14:42:23 -0700 Subject: coredump: fix wrong comments on core limits of pipe coredump case In commit 898b374af6f7 ("exec: replace call_usermodehelper_pipe with use of umh init function and resolve limit"), the core limits recursive check value was changed from 0 to 1, but the corresponding comments were not updated. Signed-off-by: Jovi Zhang Cc: Oleg Nesterov Cc: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 5af8390e0fae..3684353ebd5f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2174,15 +2174,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) } if (cprm.limit == 1) { - /* + /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + * * Normally core limits are irrelevant to pipes, since * we're not writing to the file system, but we use - * cprm.limit of 1 here as a speacial value. Any - * non-1 limit gets set to RLIM_INFINITY below, but - * a limit of 0 skips the dump. This is a consistent - * way to catch recursive crashes. We can still crash - * if the core_pattern binary sets RLIM_CORE = !1 - * but it runs as root, and can do lots of stupid things + * cprm.limit of 1 here as a speacial value, this is a + * consistent way to catch recursive crashes. + * We can still crash if the core_pattern binary sets + * RLIM_CORE = !1, but it runs as root, and can do + * lots of stupid things. + * * Note that we use task_tgid_vnr here to grab the pid * of the process group leader. That way we get the * right pid if a thread in a multi-threaded -- cgit From e8905ec27e2f4ea1b9f7e03df68a060b3ae6fca8 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Mon, 30 Jul 2012 14:42:26 -0700 Subject: proc: environ_read() make sure offset points to environment address range Currently the following offset and environment address range check in environ_read() of /proc//environ is buggy: int this_len = mm->env_end - (mm->env_start + src); if (this_len <= 0) break; Large or negative offsets on /proc//environ converted to 'unsigned long' may pass this check since '(mm->env_start + src)' can overflow and 'this_len' will be positive. This can turn /proc//environ to act like /proc//mem since (mm->env_start + src) will point and read from another VMA. There are two fixes here plus some code cleaning: 1) Fix the overflow by checking if the offset that was converted to unsigned long will always point to the [mm->env_start, mm->env_end] address range. 2) Remove the truncation that was made to the result of the check, storing the result in 'int this_len' will alter its value and we can not depend on it. For kernels that have commit b409e578d ("proc: clean up /proc//environ handling") which adds the appropriate ptrace check and saves the 'mm' at ->open() time, this is not a security issue. This patch is taken from the grsecurity patch since it was just made available. Signed-off-by: Djalal Harouni Cc: Oleg Nesterov Cc: Brad Spengler Acked-by: Kees Cook Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 2772208338f8..39ee093b5e96 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -827,15 +827,16 @@ static ssize_t environ_read(struct file *file, char __user *buf, if (!atomic_inc_not_zero(&mm->mm_users)) goto free; while (count > 0) { - int this_len, retval, max_len; + size_t this_len, max_len; + int retval; - this_len = mm->env_end - (mm->env_start + src); - - if (this_len <= 0) + if (src >= (mm->env_end - mm->env_start)) break; - max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - this_len = (this_len > max_len) ? max_len : this_len; + this_len = mm->env_end - (mm->env_start + src); + + max_len = min_t(size_t, PAGE_SIZE, count); + this_len = min(max_len, this_len); retval = access_remote_vm(mm, (mm->env_start + src), page, this_len, 0); -- cgit From bc452b4b65bd589083a7a7ba4f14f85dfc8454fa Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Mon, 30 Jul 2012 14:42:28 -0700 Subject: proc: do not allow negative offsets on /proc//environ __mem_open() which is called by both /proc//environ and /proc//mem ->open() handlers will allow the use of negative offsets. /proc//mem has negative offsets but not /proc//environ. Clean this by moving the 'force FMODE_UNSIGNED_OFFSET flag' to mem_open() to allow negative offsets only on /proc//mem. Signed-off-by: Djalal Harouni Cc: Oleg Nesterov Cc: Brad Spengler Acked-by: Kees Cook Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 39ee093b5e96..1b6c84cbdb73 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -695,8 +695,6 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) mmput(mm); } - /* OK to pass negative loff_t, we can catch out-of-range */ - file->f_mode |= FMODE_UNSIGNED_OFFSET; file->private_data = mm; return 0; @@ -704,7 +702,12 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) static int mem_open(struct inode *inode, struct file *file) { - return __mem_open(inode, file, PTRACE_MODE_ATTACH); + int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); + + /* OK to pass negative loff_t, we can catch out-of-range */ + file->f_mode |= FMODE_UNSIGNED_OFFSET; + + return ret; } static ssize_t mem_rw(struct file *file, char __user *buf, -- cgit From 98c350cda2c14a343d34ea01a3d9c24fea5ec66d Mon Sep 17 00:00:00 2001 From: Justin Lecher Date: Mon, 30 Jul 2012 14:42:53 -0700 Subject: fs: cachefiles: add support for large files in filesystem caching Support the caching of large files. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=31182 Signed-off-by: Justin Lecher Signed-off-by: Suresh Jayaraman Tested-by: Suresh Jayaraman Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cachefiles/rdwr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index c0353dfac51f..c994691d9445 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -919,7 +919,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) * own time */ path.mnt = cache->mnt; path.dentry = object->backer; - file = dentry_open(&path, O_RDWR, cache->cache_cred); + file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred); if (IS_ERR(file)) { ret = PTR_ERR(file); } else { -- cgit From 1d151c337d79fa3de88654d2514f58fbd916a8e0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 30 Jul 2012 14:43:00 -0700 Subject: c/r: fcntl: add F_GETOWNER_UIDS option When we restore file descriptors we would like them to look exactly as they were at dumping time. With help of fcntl it's almost possible, the missing snippet is file owners UIDs. To be able to read their values the F_GETOWNER_UIDS is introduced. This option is valid iif CONFIG_CHECKPOINT_RESTORE is turned on, otherwise returning -EINVAL. Signed-off-by: Cyrill Gorcunov Acked-by: "Eric W. Biederman" Cc: "Serge E. Hallyn" Cc: Oleg Nesterov Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fcntl.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 81b70e665bf0..887b5ba8c9b5 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -340,6 +341,31 @@ static int f_getown_ex(struct file *filp, unsigned long arg) return ret; } +#ifdef CONFIG_CHECKPOINT_RESTORE +static int f_getowner_uids(struct file *filp, unsigned long arg) +{ + struct user_namespace *user_ns = current_user_ns(); + uid_t * __user dst = (void * __user)arg; + uid_t src[2]; + int err; + + read_lock(&filp->f_owner.lock); + src[0] = from_kuid(user_ns, filp->f_owner.uid); + src[1] = from_kuid(user_ns, filp->f_owner.euid); + read_unlock(&filp->f_owner.lock); + + err = put_user(src[0], &dst[0]); + err |= put_user(src[1], &dst[1]); + + return err; +} +#else +static int f_getowner_uids(struct file *filp, unsigned long arg) +{ + return -EINVAL; +} +#endif + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -396,6 +422,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_SETOWN_EX: err = f_setown_ex(filp, arg); break; + case F_GETOWNER_UIDS: + err = f_getowner_uids(filp, arg); + break; case F_GETSIG: err = filp->f_owner.signum; break; -- cgit