From 8c4d5a4387161c58a0f3d4c6c849b59bbb6c098a Mon Sep 17 00:00:00 2001 From: Gang He Date: Thu, 6 Jul 2017 15:36:07 -0700 Subject: ocfs2: fix a static checker warning Fix a static code checker warning: fs/ocfs2/inode.c:179 ocfs2_iget() warn: passing zero to 'ERR_PTR' Fixes: d56a8f32e4c6 ("ocfs2: check/fix inode block for online file check") Link: http://lkml.kernel.org/r/1495516634-1952-1-git-send-email-ghe@suse.com Signed-off-by: Gang He Reviewed-by: Joseph Qi Reviewed-by: Eric Ren Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 382401d3e88f..1a1e0078ab38 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -136,7 +136,7 @@ struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno) struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, int sysfile_type) { - int rc = 0; + int rc = -ESTALE; struct inode *inode = NULL; struct super_block *sb = osb->sb; struct ocfs2_find_inode_args args; -- cgit From 62aa81d7c4c24b90fdb61da70ac0dbbc414f9939 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 6 Jul 2017 15:36:10 -0700 Subject: ocfs2: use magic.h Filesystems generally use SUPER_MAGIC values from magic.h instead of a local definition. Link: http://lkml.kernel.org/r/20170521154217.27917-1-fabf@skynet.be Signed-off-by: Fabian Frederick Reviewed-by: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/ocfs2_fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 44d178b8d1aa..5bb4a89f9045 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -25,6 +25,8 @@ #ifndef _OCFS2_FS_H #define _OCFS2_FS_H +#include + /* Version */ #define OCFS2_MAJOR_REV_LEVEL 0 #define OCFS2_MINOR_REV_LEVEL 90 @@ -56,9 +58,6 @@ #define OCFS2_MIN_BLOCKSIZE 512 #define OCFS2_MAX_BLOCKSIZE OCFS2_MIN_CLUSTERSIZE -/* Filesystem magic number */ -#define OCFS2_SUPER_MAGIC 0x7461636f - /* Object signatures */ #define OCFS2_SUPER_BLOCK_SIGNATURE "OCFSV2" #define OCFS2_INODE_SIGNATURE "INODE01" -- cgit From 25b1c72e15b89bfcdcce11c5f61d729d87afe8c5 Mon Sep 17 00:00:00 2001 From: piaojun Date: Thu, 6 Jul 2017 15:36:13 -0700 Subject: ocfs2: free 'dummy_sc' in sc_fop_release() to prevent memory leak 'sd->dbg_sock' is malloced in sc_common_open(), but not freed at the end of sc_fop_release(). Link: http://lkml.kernel.org/r/594FB0A4.2050105@huawei.com Signed-off-by: Jun Piao Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/netdebug.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 564c504d6efd..74a21f6695c8 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -426,6 +426,7 @@ static int sc_fop_release(struct inode *inode, struct file *file) struct o2net_sock_container *dummy_sc = sd->dbg_sock; o2net_debug_del_sc(dummy_sc); + kfree(dummy_sc); return seq_release_private(inode, file); } -- cgit From b74271e40e5de75c4316a68ebbb43150b509e708 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 6 Jul 2017 15:36:16 -0700 Subject: ocfs2: constify attribute_group structures attribute_groups are not supposed to change at runtime. All functions working with attribute_groups provided by work with const attribute_group. So mark the non-const structs as const. File size before: text data bss dec hex filename 4402 1088 38 5528 1598 fs/ocfs2/stackglue.o File size After adding 'const': text data bss dec hex filename 4442 1024 38 5504 1580 fs/ocfs2/stackglue.o Link: http://lkml.kernel.org/r/cab4e59b4918db3ed2ec77073a4cb310c4429ef5.1498808026.git.arvind.yadav.cs@gmail.com Signed-off-by: Arvind Yadav Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/stackglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 820359096c7a..d6c350ba25b9 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -631,7 +631,7 @@ static struct attribute *ocfs2_attrs[] = { NULL, }; -static struct attribute_group ocfs2_attr_group = { +static const struct attribute_group ocfs2_attr_group = { .attrs = ocfs2_attrs, }; -- cgit From c823bd9244337aef3699c546eb521c71fd60012e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:36:19 -0700 Subject: fs/file.c: replace alloc_fdmem() with kvmalloc() alternative There is no real reason to duplicate kvmalloc* helpers so drop alloc_fdmem and replace it with the appropriate library function. Link: http://lkml.kernel.org/r/20170531155145.17111-2-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/file.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 1c2972e3a405..1fc7fbbb4510 100644 --- a/fs/file.c +++ b/fs/file.c @@ -30,21 +30,6 @@ unsigned int sysctl_nr_open_min = BITS_PER_LONG; unsigned int sysctl_nr_open_max = __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG; -static void *alloc_fdmem(size_t size) -{ - /* - * Very large allocations can stress page reclaim, so fall back to - * vmalloc() if the allocation size will be considered "large" by the VM. - */ - if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - void *data = kmalloc(size, GFP_KERNEL_ACCOUNT | - __GFP_NOWARN | __GFP_NORETRY); - if (data != NULL) - return data; - } - return __vmalloc(size, GFP_KERNEL_ACCOUNT, PAGE_KERNEL); -} - static void __free_fdtable(struct fdtable *fdt) { kvfree(fdt->fd); @@ -131,13 +116,14 @@ static struct fdtable * alloc_fdtable(unsigned int nr) if (!fdt) goto out; fdt->max_fds = nr; - data = alloc_fdmem(nr * sizeof(struct file *)); + data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT); if (!data) goto out_fdt; fdt->fd = data; - data = alloc_fdmem(max_t(size_t, - 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES)); + data = kvmalloc(max_t(size_t, + 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES), + GFP_KERNEL_ACCOUNT); if (!data) goto out_arr; fdt->open_fds = data; -- cgit From f93ae36462091eca8c8d70ced64b97056a8ecbd6 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 6 Jul 2017 15:38:35 -0700 Subject: fs/userfaultfd.c: drop dead code Calculation of start end end in __wake_userfault function are not used and can be removed. Link: http://lkml.kernel.org/r/1494930917-3134-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 6148ccd6cccf..3d0dd082337a 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1114,11 +1114,6 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf, static void __wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { - unsigned long start, end; - - start = range->start; - end = range->start + range->len; - spin_lock(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) -- cgit From 3d375d78593cd5daeead34ed3279c4ff63dd04f2 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 6 Jul 2017 15:39:11 -0700 Subject: mm: update callers to use HASH_ZERO flag Update dcache, inode, pid, mountpoint, and mount hash tables to use HASH_ZERO, and remove initialization after allocations. In case of places where HASH_EARLY was used such as in __pv_init_lock_hash the zeroed hash table was already assumed, because memblock zeroes the memory. CPU: SPARC M6, Memory: 7T Before fix: Dentry cache hash table entries: 1073741824 Inode-cache hash table entries: 536870912 Mount-cache hash table entries: 16777216 Mountpoint-cache hash table entries: 16777216 ftrace: allocating 20414 entries in 40 pages Total time: 11.798s After fix: Dentry cache hash table entries: 1073741824 Inode-cache hash table entries: 536870912 Mount-cache hash table entries: 16777216 Mountpoint-cache hash table entries: 16777216 ftrace: allocating 20414 entries in 40 pages Total time: 3.198s CPU: Intel Xeon E5-2630, Memory: 2.2T: Before fix: Dentry cache hash table entries: 536870912 Inode-cache hash table entries: 268435456 Mount-cache hash table entries: 8388608 Mountpoint-cache hash table entries: 8388608 CPU: Physical Processor ID: 0 Total time: 3.245s After fix: Dentry cache hash table entries: 536870912 Inode-cache hash table entries: 268435456 Mount-cache hash table entries: 8388608 Mountpoint-cache hash table entries: 8388608 CPU: Physical Processor ID: 0 Total time: 3.244s Link: http://lkml.kernel.org/r/1488432825-92126-4-git-send-email-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Babu Moger Cc: David Miller Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dcache.c | 18 ++++-------------- fs/inode.c | 14 ++------------ fs/namespace.c | 10 ++-------- 3 files changed, 8 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index a9f995f6859e..a140fe1dbb1a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3546,8 +3546,6 @@ __setup("dhash_entries=", set_dhash_entries); static void __init dcache_init_early(void) { - unsigned int loop; - /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. */ @@ -3559,24 +3557,19 @@ static void __init dcache_init_early(void) sizeof(struct hlist_bl_head), dhash_entries, 13, - HASH_EARLY, + HASH_EARLY | HASH_ZERO, &d_hash_shift, &d_hash_mask, 0, 0); - - for (loop = 0; loop < (1U << d_hash_shift); loop++) - INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } static void __init dcache_init(void) { - unsigned int loop; - - /* + /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature - * of the dcache. + * of the dcache. */ dentry_cache = KMEM_CACHE(dentry, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT); @@ -3590,14 +3583,11 @@ static void __init dcache_init(void) sizeof(struct hlist_bl_head), dhash_entries, 13, - 0, + HASH_ZERO, &d_hash_shift, &d_hash_mask, 0, 0); - - for (loop = 0; loop < (1U << d_hash_shift); loop++) - INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } /* SLAB cache for __getname() consumers */ diff --git a/fs/inode.c b/fs/inode.c index ab3b9a795c0b..5cbc8e6e9390 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1915,8 +1915,6 @@ __setup("ihash_entries=", set_ihash_entries); */ void __init inode_init_early(void) { - unsigned int loop; - /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. */ @@ -1928,20 +1926,15 @@ void __init inode_init_early(void) sizeof(struct hlist_head), ihash_entries, 14, - HASH_EARLY, + HASH_EARLY | HASH_ZERO, &i_hash_shift, &i_hash_mask, 0, 0); - - for (loop = 0; loop < (1U << i_hash_shift); loop++) - INIT_HLIST_HEAD(&inode_hashtable[loop]); } void __init inode_init(void) { - unsigned int loop; - /* inode slab cache */ inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), @@ -1959,14 +1952,11 @@ void __init inode_init(void) sizeof(struct hlist_head), ihash_entries, 14, - 0, + HASH_ZERO, &i_hash_shift, &i_hash_mask, 0, 0); - - for (loop = 0; loop < (1U << i_hash_shift); loop++) - INIT_HLIST_HEAD(&inode_hashtable[loop]); } void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) diff --git a/fs/namespace.c b/fs/namespace.c index f70914a859a4..81f934b5d571 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3239,7 +3239,6 @@ static void __init init_mount_tree(void) void __init mnt_init(void) { - unsigned u; int err; mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), @@ -3248,22 +3247,17 @@ void __init mnt_init(void) mount_hashtable = alloc_large_system_hash("Mount-cache", sizeof(struct hlist_head), mhash_entries, 19, - 0, + HASH_ZERO, &m_hash_shift, &m_hash_mask, 0, 0); mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", sizeof(struct hlist_head), mphash_entries, 19, - 0, + HASH_ZERO, &mp_hash_shift, &mp_hash_mask, 0, 0); if (!mount_hashtable || !mountpoint_hashtable) panic("Failed to allocate mount hash table\n"); - for (u = 0; u <= m_hash_mask; u++) - INIT_HLIST_HEAD(&mount_hashtable[u]); - for (u = 0; u <= mp_hash_mask; u++) - INIT_HLIST_HEAD(&mountpoint_hashtable[u]); - kernfs_init(); err = sysfs_init(); -- cgit From 7868a2087ec13ec4a5df0c5e00999863be132ba8 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 6 Jul 2017 15:39:42 -0700 Subject: mm/hugetlb: add size parameter to huge_pte_offset() A poisoned or migrated hugepage is stored as a swap entry in the page tables. On architectures that support hugepages consisting of contiguous page table entries (such as on arm64) this leads to ambiguity in determining the page table entry to return in huge_pte_offset() when a poisoned entry is encountered. Let's remove the ambiguity by adding a size parameter to convey additional information about the requested address. Also fixup the definition/usage of huge_pte_offset() throughout the tree. Link: http://lkml.kernel.org/r/20170522133604.11392-4-punit.agrawal@arm.com Signed-off-by: Punit Agrawal Acked-by: Steve Capper Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: James Hogan (odd fixer:METAG ARCHITECTURE) Cc: Ralf Baechle (supporter:MIPS) Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Chris Metcalf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Alexander Viro Cc: Michal Hocko Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Hillf Danton Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 3d0dd082337a..cadcd12a3d35 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -214,6 +214,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address, * hugepmd ranges. */ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, + struct vm_area_struct *vma, unsigned long address, unsigned long flags, unsigned long reason) @@ -224,7 +225,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); - pte = huge_pte_offset(mm, address); + pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); if (!pte) goto out; @@ -243,6 +244,7 @@ out: } #else static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, + struct vm_area_struct *vma, unsigned long address, unsigned long flags, unsigned long reason) @@ -448,7 +450,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, reason); else - must_wait = userfaultfd_huge_must_wait(ctx, vmf->address, + must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma, + vmf->address, vmf->flags, reason); up_read(&mm->mmap_sem); -- cgit From 2262185c5b287f2758afda79c149b7cf6bee165c Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 6 Jul 2017 15:40:25 -0700 Subject: mm: per-cgroup memory reclaim stats Track the following reclaim counters for every memory cgroup: PGREFILL, PGSCAN, PGSTEAL, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE and PGLAZYFREED. These values are exposed using the memory.stats interface of cgroup v2. The meaning of each value is the same as for global counters, available using /proc/vmstat. Also, for consistency, rename mem_cgroup_count_vm_event() to count_memcg_event_mm(). Link: http://lkml.kernel.org/r/1494530183-30808-1-git-send-email-guro@fb.com Signed-off-by: Roman Gushchin Suggested-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Tejun Heo Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 2 +- fs/ncpfs/mmap.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index 33d05aa02aad..cd8fced592d0 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1213,7 +1213,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); + count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev, diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 0c3905e0542e..6719c0be674d 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -89,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_fault *vmf) * -- nyc */ count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); + count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); return VM_FAULT_MAJOR; } -- cgit