From 523334ba508a8baaf5fc9f15fbad9ed04f334f48 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Thu, 24 Mar 2011 08:48:39 +0800 Subject: ext3: Fix writepage credits computation for ordered mode Original computation forgets to count writes of indirect block themselves (it only counts with blocks necessary for their allocation) in ordered mode. Acked-by: Amir Goldstein Signed-off-by:Yongqiang Yang Signed-off-by: Jan Kara --- fs/ext3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ae94f6d949f5..7f5db46308a5 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3294,7 +3294,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode) if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else - ret = 2 * (bpp + indirects) + 2; + ret = 2 * (bpp + indirects) + indirects + 2; #ifdef CONFIG_QUOTA /* We know that structure was already allocated during dquot_initialize so -- cgit From 954032d2527f2fce7355ba70709b5e143d6b686f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 24 Mar 2011 22:51:14 -0400 Subject: nfsd: fix auth_domain reference leak on nlm operations This was noticed by users who performed more than 2^32 lock operations and hence made this counter overflow (eventually leading to use-after-free's). Setting rq_client to NULL here means that it won't later get auth_domain_put() when it should be. Appears to have been introduced in 2.5.42 by "[PATCH] kNFSd: Move auth domain lookup into svcauth" which moved most of the rq_client handling to common svcauth code, but left behind this one line. Cc: Neil Brown Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/lockd.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 0c6d81670137..7c831a2731fa 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -38,7 +38,6 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) exp_readlock(); nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); fh_put(&fh); - rqstp->rq_client = NULL; exp_readunlock(); /* We return nlm error codes as nlm doesn't know * about nfsd, but nfsd does know about nlm.. -- cgit From ef550f6f4f6c9345a27ec85d98f4f7de1adce79c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 25 Mar 2011 13:27:48 -0700 Subject: ceph: flush msgr_wq during mds_client shutdown The release method for mds connections uses a backpointer to the mds_client, so we need to flush the workqueue of any pending work (and ceph_connection references) prior to freeing the mds_client. This fixes an oops easily triggered under UML by while true ; do mount ... ; umount ... ; done Also fix an outdated comment: the flush in ceph_destroy_client only flushes OSD connections out. This bug is basically an artifact of the ceph -> ceph+libceph conversion. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a1ee8fa3a8e7..f60b07b0feb0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3215,9 +3215,15 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) { struct ceph_mds_client *mdsc = fsc->mdsc; + dout("mdsc_destroy %p\n", mdsc); ceph_mdsc_stop(mdsc); + + /* flush out any connection work with references to us */ + ceph_msgr_flush(); + fsc->mdsc = NULL; kfree(mdsc); + dout("mdsc_destroy %p done\n", mdsc); } -- cgit From 8323c3aa74cd92465350294567142d12ffdcc963 Mon Sep 17 00:00:00 2001 From: Tommi Virtanen Date: Fri, 25 Mar 2011 16:32:57 -0700 Subject: ceph: Move secret key parsing earlier. This makes the base64 logic be contained in mount option parsing, and prepares us for replacing the homebew key management with the kernel key retention service. Signed-off-by: Tommi Virtanen Signed-off-by: Sage Weil --- fs/ceph/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index a9e78b4a258c..f2f77fd3c14c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -353,7 +353,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) if (opt->name) seq_printf(m, ",name=%s", opt->name); - if (opt->secret) + if (opt->key) seq_puts(m, ",secret="); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) -- cgit From 34094537943113467faee98fe67c8a3d3f9a0a8b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 27 Mar 2011 22:50:49 +0900 Subject: nilfs2: fix data loss in mmap page write for hole blocks From the result of a function test of mmap, mmap write to shared pages turned out to be broken for hole blocks. It doesn't write out filled blocks and the data will be lost after umount. This is due to a bug that the target file is not queued for log writer when filling hole blocks. Also, nilfs_page_mkwrite function exits normal code path even after successfully filled hole blocks due to a change of block_page_mkwrite function; just after nilfs was merged into the mainline, block_page_mkwrite() started to return VM_FAULT_LOCKED instead of zero by the patch "mm: close page_mkwrite races" (commit: b827e496c893de0c). The current nilfs_page_mkwrite() is not handling this value properly. This corrects nilfs_page_mkwrite() and will resolve the data loss problem in mmap write. [This should be applied to every kernel since 2.6.30 but a fix is needed for 2.6.37 and prior kernels] Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: stable [2.6.38] --- fs/nilfs2/file.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 93589fccdd97..397e73258631 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -72,10 +72,9 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /* * check to see if the page is mapped already (no holes) */ - if (PageMappedToDisk(page)) { - unlock_page(page); + if (PageMappedToDisk(page)) goto mapped; - } + if (page_has_buffers(page)) { struct buffer_head *bh, *head; int fully_mapped = 1; @@ -90,7 +89,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (fully_mapped) { SetPageMappedToDisk(page); - unlock_page(page); goto mapped; } } @@ -105,16 +103,17 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; ret = block_page_mkwrite(vma, vmf, nilfs_get_block); - if (unlikely(ret)) { + if (ret != VM_FAULT_LOCKED) { nilfs_transaction_abort(inode->i_sb); return ret; } + nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); nilfs_transaction_commit(inode->i_sb); mapped: SetPageChecked(page); wait_on_page_writeback(page); - return 0; + return VM_FAULT_LOCKED; } static const struct vm_operations_struct nilfs_file_vm_ops = { -- cgit From d611b22f1a5ddd0823e9d6a30bac91219f800e41 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 30 Mar 2011 11:49:20 +0900 Subject: nilfs2: fix oops due to a bad aops initialization Nilfs in 2.6.39-rc1 hit the following oops: BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 IP: [] try_to_release_page+0x2a/0x3d PGD 234cb6067 PUD 234c72067 PMD 0 Oops: 0000 [#1] SMP Process truncate (pid: 10995, threadinfo ffff8802353c2000, task ffff880234cfa000) Stack: ffff8802333c77b8 ffffffff810b64b0 0000000000003802 ffffffffa0052cca 0000000000000000 ffff8802353c3b58 0000000000000000 ffff8802353c3b58 0000000000000001 0000000000000000 ffffea0007b92308 ffffea0007b92308 Call Trace: [] ? invalidate_inode_pages2_range+0x15f/0x273 [] ? nilfs_palloc_get_block+0x2d/0xaf [nilfs2] [] ? bit_waitqueue+0x14/0xa1 [] ? wake_up_bit+0x10/0x20 [] ? nilfs_forget_buffer+0x66/0x7a [nilfs2] [] ? nilfs_btree_concat_left+0x5c/0x77 [nilfs2] [] ? nilfs_btree_delete+0x395/0x3cf [nilfs2] [] ? nilfs_bmap_do_delete+0x6e/0x79 [nilfs2] [] ? nilfs_btree_last_key+0x14b/0x15e [nilfs2] [] ? nilfs_bmap_truncate+0x2f/0x83 [nilfs2] [] ? nilfs_bmap_last_key+0x35/0x62 [nilfs2] [] ? nilfs_truncate_bmap+0x6b/0xc7 [nilfs2] [] ? nilfs_truncate+0x79/0xe4 [nilfs2] [] ? vmtruncate+0x33/0x3b [] ? nilfs_setattr+0x4d/0x8c [nilfs2] [] ? do_page_fault+0x31b/0x356 [] ? notify_change+0x17d/0x262 [] ? do_truncate+0x65/0x80 [] ? sys_ftruncate+0xf1/0xf6 [] ? system_call_fastpath+0x16/0x1b Code: c3 48 83 ec 08 48 8b 17 48 8b 47 18 80 e2 01 75 04 0f 0b eb fe 48 8b 17 80 e6 20 74 05 31 c0 41 59 c3 48 85 c0 74 11 48 8b 40 58 8b 40 48 48 85 c0 74 04 41 58 ff e0 59 e9 b1 b5 05 00 41 54 RIP [] try_to_release_page+0x2a/0x3d RSP CR2: 0000000000000048 This oops was brought in by the change "block: remove per-queue plugging" (commit: 7eaceaccab5f40bb). It initializes mapping->a_ops with a NULL pointer for some pages in nilfs (e.g. btree node pages), but mm code doesn't NULL pointer checks against mapping->a_ops. (the check is done for each callback function) This corrects the aops initialization and fixes the oops. Signed-off-by: Ryusuke Konishi Acked-by: Jens Axboe --- fs/nilfs2/page.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 4d2a1ee0eb47..9d2dc6b4348e 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -495,12 +495,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, void nilfs_mapping_init(struct address_space *mapping, struct backing_dev_info *bdi) { + static const struct address_space_operations empty_aops; + mapping->host = NULL; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); mapping->assoc_mapping = NULL; mapping->backing_dev_info = bdi; - mapping->a_ops = NULL; + mapping->a_ops = &empty_aops; } /* -- cgit From af71dda0b8e667d03eec21d25f90265eec5efea1 Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Sat, 19 Mar 2011 16:47:54 +0100 Subject: nilfs2: fix whitespace coding style issues Fixes whitespace coding style issues. Signed-off-by: Nicolas Kaiser Signed-off-by: Ryusuke Konishi --- fs/nilfs2/nilfs.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 856e8e4e0b74..a8dd344303cb 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -114,19 +114,19 @@ enum { * Macros to check inode numbers */ #define NILFS_MDT_INO_BITS \ - ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ - 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ - 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) + ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ + 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ + 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) #define NILFS_SYS_INO_BITS \ - ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) + ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) #define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) #define NILFS_MDT_INODE(sb, ino) \ - ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) + ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) #define NILFS_VALID_INODE(sb, ino) \ - ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) + ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) /** * struct nilfs_transaction_info: context information for synchronization @@ -285,7 +285,7 @@ extern void nilfs_destroy_inode(struct inode *); extern void nilfs_error(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern void nilfs_warning(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); + __attribute__ ((format (printf, 3, 4))); extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, -- cgit From 89b3600ccfb01aed6873bc499442fc0bed00bbdd Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 29 Mar 2011 07:09:20 +0000 Subject: xfs: fix unreferenced var error in xfs_buf.c Signed-off-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 596bb2c9de42..d917146c043b 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -657,8 +657,6 @@ xfs_buf_readahead( xfs_off_t ioff, size_t isize) { - struct backing_dev_info *bdi; - if (bdi_read_congested(target->bt_bdi)) return; -- cgit From 25985edcedea6396277003854657b5f3cb31a628 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 30 Mar 2011 22:57:33 -0300 Subject: Fix common misspellings Fixes generated by 'codespell' and manually reviewed. Signed-off-by: Lucas De Marchi --- fs/adfs/map.c | 2 +- fs/afs/cache.c | 12 ++++++------ fs/afs/cell.c | 2 +- fs/attr.c | 2 +- fs/autofs4/root.c | 2 +- fs/befs/ChangeLog | 10 +++++----- fs/befs/befs_fs_types.h | 2 +- fs/befs/btree.c | 2 +- fs/befs/linuxvfs.c | 2 +- fs/binfmt_flat.c | 2 +- fs/bio.c | 2 +- fs/block_dev.c | 2 +- fs/btrfs/extent_map.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/relocation.c | 2 +- fs/cachefiles/interface.c | 2 +- fs/ceph/addr.c | 2 +- fs/ceph/caps.c | 2 +- fs/ceph/snap.c | 2 +- fs/cifs/AUTHORS | 2 +- fs/cifs/cifs_dfs_ref.c | 2 +- fs/cifs/cifssmb.c | 2 +- fs/cifs/connect.c | 4 ++-- fs/cifs/dir.c | 2 +- fs/configfs/dir.c | 2 +- fs/dlm/lock.c | 2 +- fs/dlm/lowcomms.c | 2 +- fs/dlm/recover.c | 2 +- fs/ecryptfs/main.c | 4 ++-- fs/eventpoll.c | 8 ++++---- fs/exofs/common.h | 4 ++-- fs/ext2/balloc.c | 6 +++--- fs/ext2/inode.c | 8 ++++---- fs/ext2/super.c | 2 +- fs/ext2/xattr.c | 2 +- fs/ext3/balloc.c | 10 +++++----- fs/ext3/inode.c | 6 +++--- fs/ext3/resize.c | 2 +- fs/ext3/super.c | 2 +- fs/ext4/balloc.c | 2 +- fs/ext4/extents.c | 10 +++++----- fs/ext4/fsync.c | 2 +- fs/ext4/inode.c | 20 ++++++++++---------- fs/ext4/mballoc.c | 2 +- fs/ext4/migrate.c | 2 +- fs/ext4/super.c | 4 ++-- fs/freevxfs/vxfs_fshead.c | 2 +- fs/freevxfs/vxfs_lookup.c | 2 +- fs/freevxfs/vxfs_olt.h | 2 +- fs/fs-writeback.c | 2 +- fs/fuse/file.c | 2 +- fs/gfs2/bmap.c | 2 +- fs/gfs2/glock.c | 2 +- fs/gfs2/super.c | 2 +- fs/jbd/commit.c | 2 +- fs/jbd/journal.c | 4 ++-- fs/jbd/revoke.c | 2 +- fs/jbd/transaction.c | 2 +- fs/jbd2/commit.c | 2 +- fs/jbd2/journal.c | 4 ++-- fs/jbd2/revoke.c | 2 +- fs/jbd2/transaction.c | 2 +- fs/jffs2/TODO | 2 +- fs/jffs2/readinode.c | 2 +- fs/jffs2/summary.c | 4 ++-- fs/jffs2/wbuf.c | 2 +- fs/jfs/jfs_dmap.c | 4 ++-- fs/jfs/jfs_extent.c | 6 +++--- fs/jfs/jfs_imap.c | 14 +++++++------- fs/jfs/jfs_logmgr.h | 2 +- fs/jfs/jfs_metapage.h | 2 +- fs/jfs/jfs_txnmgr.c | 2 +- fs/jfs/resize.c | 4 ++-- fs/jfs/super.c | 2 +- fs/logfs/dev_mtd.c | 2 +- fs/logfs/dir.c | 2 +- fs/logfs/readwrite.c | 2 +- fs/mbcache.c | 2 +- fs/namei.c | 2 +- fs/ncpfs/inode.c | 2 +- fs/nfs/callback_xdr.c | 2 +- fs/nfs/file.c | 2 +- fs/nfs/nfs4filelayout.h | 2 +- fs/nfs_common/nfsacl.c | 2 +- fs/nfsd/nfs3xdr.c | 2 +- fs/nfsd/nfs4state.c | 4 ++-- fs/nfsd/nfsxdr.c | 2 +- fs/notify/fanotify/fanotify_user.c | 2 +- fs/notify/inotify/inotify_fsnotify.c | 2 +- fs/notify/mark.c | 2 +- fs/ntfs/attrib.c | 4 ++-- fs/ntfs/compress.c | 2 +- fs/ntfs/inode.c | 4 ++-- fs/ntfs/layout.h | 12 ++++++------ fs/ntfs/logfile.c | 2 +- fs/ntfs/logfile.h | 2 +- fs/ntfs/mft.c | 8 ++++---- fs/ntfs/runlist.c | 2 +- fs/ntfs/super.c | 14 +++++++------- fs/ocfs2/alloc.c | 2 +- fs/ocfs2/aops.h | 2 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/ocfs2/cluster/quorum.c | 4 ++-- fs/ocfs2/cluster/tcp.c | 2 +- fs/ocfs2/dlm/dlmmaster.c | 4 ++-- fs/ocfs2/inode.c | 4 ++-- fs/ocfs2/journal.c | 2 +- fs/ocfs2/journal.h | 2 +- fs/ocfs2/namei.c | 2 +- fs/ocfs2/ocfs2_fs.h | 4 ++-- fs/ocfs2/quota_global.c | 2 +- fs/ocfs2/reservations.h | 2 +- fs/ocfs2/stackglue.h | 2 +- fs/ocfs2/suballoc.c | 4 ++-- fs/ocfs2/super.c | 2 +- fs/ocfs2/xattr.c | 4 ++-- fs/partitions/check.c | 4 ++-- fs/proc/base.c | 2 +- fs/pstore/Kconfig | 2 +- fs/quota/dquot.c | 2 +- fs/reiserfs/journal.c | 4 ++-- fs/reiserfs/lock.c | 2 +- fs/reiserfs/super.c | 4 ++-- fs/reiserfs/xattr.c | 2 +- fs/squashfs/cache.c | 4 ++-- fs/ubifs/budget.c | 2 +- fs/ufs/inode.c | 2 +- fs/ufs/super.c | 6 +++--- fs/xfs/linux-2.6/xfs_aops.c | 2 +- fs/xfs/linux-2.6/xfs_buf.c | 4 ++-- fs/xfs/linux-2.6/xfs_file.c | 2 +- fs/xfs/linux-2.6/xfs_iops.c | 2 +- fs/xfs/linux-2.6/xfs_sync.c | 2 +- fs/xfs/quota/xfs_dquot.c | 2 +- fs/xfs/quota/xfs_qm_bhv.c | 2 +- fs/xfs/quota/xfs_qm_syscalls.c | 4 ++-- fs/xfs/xfs_buf_item.c | 2 +- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_inode.h | 4 ++-- fs/xfs/xfs_log_priv.h | 2 +- fs/xfs/xfs_log_recover.c | 4 ++-- fs/xfs/xfs_trans_inode.c | 2 +- fs/xfs/xfs_vnodeops.c | 4 ++-- 143 files changed, 230 insertions(+), 230 deletions(-) (limited to 'fs') diff --git a/fs/adfs/map.c b/fs/adfs/map.c index d1a5932bb0f1..6935f05202ac 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -51,7 +51,7 @@ static DEFINE_RWLOCK(adfs_map_lock); /* * This is fun. We need to load up to 19 bits from the map at an - * arbitary bit alignment. (We're limited to 19 bits by F+ version 2). + * arbitrary bit alignment. (We're limited to 19 bits by F+ version 2). */ #define GET_FRAG_ID(_map,_start,_idmask) \ ({ \ diff --git a/fs/afs/cache.c b/fs/afs/cache.c index 0fb315dd4d2a..577763c3d88b 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c @@ -98,7 +98,7 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, } /* - * provide new auxilliary cache data + * provide new auxiliary cache data */ static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, void *buffer, uint16_t bufmax) @@ -117,7 +117,7 @@ static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, } /* - * check that the auxilliary data indicates that the entry is still valid + * check that the auxiliary data indicates that the entry is still valid */ static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, const void *buffer, @@ -150,7 +150,7 @@ static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, } /* - * provide new auxilliary cache data + * provide new auxiliary cache data */ static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, void *buffer, uint16_t bufmax) @@ -172,7 +172,7 @@ static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, } /* - * check that the auxilliary data indicates that the entry is still valid + * check that the auxiliary data indicates that the entry is still valid */ static enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data, @@ -283,7 +283,7 @@ static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, } /* - * provide new auxilliary cache data + * provide new auxiliary cache data */ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, void *buffer, uint16_t bufmax) @@ -309,7 +309,7 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, } /* - * check that the auxilliary data indicates that the entry is still valid + * check that the auxiliary data indicates that the entry is still valid */ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, const void *buffer, diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 0d5eeadf6121..3c090b7555ea 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -293,7 +293,7 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz, if (!cell) { /* this should not happen unless user tries to mount * when root cell is not set. Return an impossibly - * bizzare errno to alert the user. Things like + * bizarre errno to alert the user. Things like * ENOENT might be "more appropriate" but they happen * for other reasons. */ diff --git a/fs/attr.c b/fs/attr.c index 1007ed616314..91dbe2a107f2 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -128,7 +128,7 @@ EXPORT_SYMBOL(inode_newsize_ok); * setattr_copy must be called with i_mutex held. * * setattr_copy updates the inode's metadata with that specified - * in attr. Noticably missing is inode size update, which is more complex + * in attr. Noticeably missing is inode size update, which is more complex * as it requires pagecache updates. * * The inode is not marked as dirty after this operation. The rationale is diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 96804a17bbd0..f55ae23b137e 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -612,7 +612,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves * of the directory tree. There is no need to clear the automount flag * following a mount or restore it after an expire because these mounts - * are always covered. However, it is neccessary to ensure that these + * are always covered. However, it is necessary to ensure that these * flags are clear on non-empty directories to avoid unnecessary calls * during path walks. */ diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog index ce8c787916be..75a461cfaca6 100644 --- a/fs/befs/ChangeLog +++ b/fs/befs/ChangeLog @@ -24,7 +24,7 @@ Version 0.9 (2002-03-14) Version 0.64 (2002-02-07) ========== -* Did the string comparision really right this time (btree.c) [WD] +* Did the string comparison really right this time (btree.c) [WD] * Fixed up some places where I assumed that a long int could hold a pointer value. (btree.c) [WD] @@ -114,7 +114,7 @@ Version 0.6 (2001-12-15) More flexible. Will soon be controllable at mount time (see TODO). [WD] -* Rewrote datastream positon lookups. +* Rewrote datastream position lookups. (datastream.c) [WD] * Moved the TODO list to its own file. @@ -150,7 +150,7 @@ Version 0.50 (2001-11-13) * Anton also told me that the blocksize is not allowed to be larger than the page size in linux, which is 4k i386. Oops. Added a test for (blocksize > PAGE_SIZE), and refuse to mount in that case. What this - practicaly means is that 8k blocksize volumes won't work without a major + practically means is that 8k blocksize volumes won't work without a major restructuring of the driver (or an alpha or other 64bit hardware). [WD] * Cleaned up the befs_count_blocks() function. Much smarter now. @@ -183,7 +183,7 @@ Version 0.45 (2001-10-29) structures into the generic pointer fields of the public structures with kmalloc(). put_super and put_inode free them. This allows us not to have to touch the definitions of the public structures in - include/linux/fs.h. Also, befs_inode_info is huge (becuase of the + include/linux/fs.h. Also, befs_inode_info is huge (because of the symlink string). (super.c, inode.c, befs_fs.h) [WD] * Fixed a thinko that was corrupting file reads after the first block_run @@ -404,7 +404,7 @@ Version 0.4 (2001-10-28) * Fixed compile errors on 2.4.1 kernel (WD) Resolve rejected patches - Accomodate changed NLS interface (util.h) + Accommodate changed NLS interface (util.h) Needed to include in most files Makefile changes fs/Config.in changes diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h index 7893eaa1e58c..eb557d9dc8be 100644 --- a/fs/befs/befs_fs_types.h +++ b/fs/befs/befs_fs_types.h @@ -234,7 +234,7 @@ typedef struct { } PACKED befs_btree_super; /* - * Header stucture of each btree node + * Header structure of each btree node */ typedef struct { fs64 left; diff --git a/fs/befs/btree.c b/fs/befs/btree.c index 4202db7496cb..a66c9b1136e0 100644 --- a/fs/befs/btree.c +++ b/fs/befs/btree.c @@ -5,7 +5,7 @@ * * Licensed under the GNU GPL. See the file COPYING for details. * - * 2002-02-05: Sergey S. Kostyliov added binary search withing + * 2002-02-05: Sergey S. Kostyliov added binary search within * btree nodes. * * Many thanks to: diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 06457ed8f3e7..54b8c28bebc8 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -734,7 +734,7 @@ parse_options(char *options, befs_mount_options * opts) /* This function has the responsibiltiy of getting the * filesystem ready for unmounting. - * Basicly, we free everything that we allocated in + * Basically, we free everything that we allocated in * befs_read_inode */ static void diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 811384bec8de..397d3057d336 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -717,7 +717,7 @@ static int load_flat_file(struct linux_binprm * bprm, * help simplify all this mumbo jumbo * * We've got two different sections of relocation entries. - * The first is the GOT which resides at the begining of the data segment + * The first is the GOT which resides at the beginning of the data segment * and is terminated with a -1. This one can be relocated in place. * The second is the extra relocation entries tacked after the image's * data segment. These require a little more processing as the entry is diff --git a/fs/bio.c b/fs/bio.c index 4d6d4b6c2bf1..840a0d755248 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1436,7 +1436,7 @@ EXPORT_SYMBOL(bio_flush_dcache_pages); * preferred way to end I/O on a bio, it takes care of clearing * BIO_UPTODATE on error. @error is 0 on success, and and one of the * established -Exxxx (-EIO, for instance) error values in case - * something went wrong. Noone should call bi_end_io() directly on a + * something went wrong. No one should call bi_end_io() directly on a * bio unless they own it and thus know that it has an end_io * function. **/ diff --git a/fs/block_dev.c b/fs/block_dev.c index c1511c674f53..5147bdd3b8e1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -653,7 +653,7 @@ void bd_forget(struct inode *inode) * @whole: whole block device containing @bdev, may equal @bdev * @holder: holder trying to claim @bdev * - * Test whther @bdev can be claimed by @holder. + * Test whether @bdev can be claimed by @holder. * * CONTEXT: * spin_lock(&bdev_lock). diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2b6c12e983b3..a24a3f2fa13e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -243,7 +243,7 @@ out: * Insert @em into @tree or perform a simple forward/backward merge with * existing mappings. The extent_map struct passed in will be inserted * into the tree directly, with an additional reference taken, or a - * reference dropped if the merge attempt was successfull. + * reference dropped if the merge attempt was successful. */ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 93c28a1d6bdc..80920bce01ab 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2324,7 +2324,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) /* * if ret == 0 means we found what we were searching for, which - * is weird, but possible, so only screw with path if we didnt + * is weird, but possible, so only screw with path if we didn't * find the key and see if we have stuff that matches */ if (ret > 0) { diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 58250e09eb05..199a80134312 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2346,7 +2346,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, root = next->root; BUG_ON(!root); - /* no other choice for non-refernce counted tree */ + /* no other choice for non-references counted tree */ if (!root->ref_cows) return root; diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 37fe101a4e0d..1064805e653b 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -197,7 +197,7 @@ struct fscache_object *cachefiles_grab_object(struct fscache_object *_object) } /* - * update the auxilliary data for an object object on disk + * update the auxiliary data for an object object on disk */ static void cachefiles_update_object(struct fscache_object *_object) { diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 37368ba2e67c..e159c529fd2b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -24,7 +24,7 @@ * context needs to be associated with the osd write during writeback. * * Similarly, struct ceph_inode_info maintains a set of counters to - * count dirty pages on the inode. In the absense of snapshots, + * count dirty pages on the inode. In the absence of snapshots, * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count. * * When a snapshot is taken (that is, when the client receives diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 6b61ded701e1..5323c330bbf3 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -765,7 +765,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) if (touch) { struct rb_node *q; - /* touch this + preceeding caps */ + /* touch this + preceding caps */ __touch_cap(cap); for (q = rb_first(&ci->i_caps); q != p; q = rb_next(q)) { diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 0aee66b92af3..e86ec1155f8f 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -342,7 +342,7 @@ static int build_snap_context(struct ceph_snap_realm *realm) num = 0; snapc->seq = realm->seq; if (parent) { - /* include any of parent's snaps occuring _after_ my + /* include any of parent's snaps occurring _after_ my parent became my parent */ for (i = 0; i < parent->cached_context->num_snaps; i++) if (parent->cached_context->snaps[i] >= diff --git a/fs/cifs/AUTHORS b/fs/cifs/AUTHORS index 7f7fa3c302af..ea940b1db77b 100644 --- a/fs/cifs/AUTHORS +++ b/fs/cifs/AUTHORS @@ -35,7 +35,7 @@ Adrian Bunk (kcalloc cleanups) Miklos Szeredi Kazeon team for various fixes especially for 2.4 version. Asser Ferno (Change Notify support) -Shaggy (Dave Kleikamp) for inumerable small fs suggestions and some good cleanup +Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanup Gunter Kukkukk (testing and suggestions for support of old servers) Igor Mammedov (DFS support) Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code) diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 0a265ad9e426..2b68ac57d97d 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -53,7 +53,7 @@ void cifs_dfs_release_automount_timer(void) * * Extracts sharename form full UNC. * i.e. strips from UNC trailing path that is not part of share - * name and fixup missing '\' in the begining of DFS node refferal + * name and fixup missing '\' in the beginning of DFS node refferal * if necessary. * Returns pointer to share name on success or ERR_PTR on error. * Caller is responsible for freeing returned string. diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 904aa47e3515..2644a5d6cc67 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -5247,7 +5247,7 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset, * Samba server ignores set of file size to zero due to bugs in some * older clients, but we should be precise - we use SetFileSize to * set file size and do not want to truncate file size to zero - * accidently as happened on one Samba server beta by putting + * accidentally as happened on one Samba server beta by putting * zero instead of -1 here */ data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8d6c17ab593d..6e2b2addfc78 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1572,7 +1572,7 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol) return false; } - /* now check if signing mode is acceptible */ + /* now check if signing mode is acceptable */ if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && (server->secMode & SECMODE_SIGN_REQUIRED)) return false; @@ -2933,7 +2933,7 @@ mount_fail_check: if (mount_data != mount_data_global) kfree(mount_data); /* If find_unc succeeded then rc == 0 so we can not end */ - /* up accidently freeing someone elses tcon struct */ + /* up accidentally freeing someone elses tcon struct */ if (tcon) cifs_put_tcon(tcon); else if (pSesInfo) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index dd5f22918c33..9ea65cf36714 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -189,7 +189,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); /* EIO could indicate that (posix open) operation is not supported, despite what server claimed in capability - negotation. EREMOTE indicates DFS junction, which is not + negotiation. EREMOTE indicates DFS junction, which is not handled in posix open */ if (rc == 0) { diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 90ff3cb10de3..3313dd19f543 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -990,7 +990,7 @@ static int configfs_dump(struct configfs_dirent *sd, int level) * This describes these functions and their helpers. * * Allow another kernel system to depend on a config_item. If this - * happens, the item cannot go away until the dependant can live without + * happens, the item cannot go away until the dependent can live without * it. The idea is to give client modules as simple an interface as * possible. When a system asks them to depend on an item, they just * call configfs_depend_item(). If the item is live and the client diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 04b8c449303f..56d6bfcc1e48 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -519,7 +519,7 @@ static void toss_rsb(struct kref *kref) } } -/* When all references to the rsb are gone it's transfered to +/* When all references to the rsb are gone it's transferred to the tossed list for later disposal. */ static void put_rsb(struct dlm_rsb *r) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index bffa1e73b9a9..5e2c71f05e46 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -810,7 +810,7 @@ static int tcp_accept_from_sock(struct connection *con) /* * Add it to the active queue in case we got data - * beween processing the accept adding the socket + * between processing the accept adding the socket * to the read_sockets list */ if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index eda43f362616..14638235f7b2 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -304,7 +304,7 @@ static void set_master_lkbs(struct dlm_rsb *r) } /* - * Propogate the new master nodeid to locks + * Propagate the new master nodeid to locks * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which * rsb's to consider. diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index c27c0ecf90bc..fdb2eb0ad09e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -276,7 +276,7 @@ static void ecryptfs_init_mount_crypt_stat( /** * ecryptfs_parse_options * @sb: The ecryptfs super block - * @options: The options pased to the kernel + * @options: The options passed to the kernel * * Parse mount options: * debug=N - ecryptfs_verbosity level for debug output @@ -840,7 +840,7 @@ static int __init ecryptfs_init(void) } rc = ecryptfs_init_messaging(); if (rc) { - printk(KERN_ERR "Failure occured while attempting to " + printk(KERN_ERR "Failure occurred while attempting to " "initialize the communications channel to " "ecryptfsd\n"); goto out_destroy_kthread; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ed38801b57a7..f9cfd168fbe2 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -181,7 +181,7 @@ struct eventpoll { /* * This is a single linked list that chains all the "struct epitem" that - * happened while transfering ready events to userspace w/out + * happened while transferring ready events to userspace w/out * holding ->lock. */ struct epitem *ovflist; @@ -606,7 +606,7 @@ static void ep_free(struct eventpoll *ep) * We do not need to hold "ep->mtx" here because the epoll file * is on the way to be removed and no one has references to it * anymore. The only hit might come from eventpoll_release_file() but - * holding "epmutex" is sufficent here. + * holding "epmutex" is sufficient here. */ mutex_lock(&epmutex); @@ -720,7 +720,7 @@ void eventpoll_release_file(struct file *file) /* * We don't want to get "file->f_lock" because it is not * necessary. It is not necessary because we're in the "struct file" - * cleanup path, and this means that noone is using this file anymore. + * cleanup path, and this means that no one is using this file anymore. * So, for example, epoll_ctl() cannot hit here since if we reach this * point, the file counter already went to zero and fget() would fail. * The only hit might come from ep_free() but by holding the mutex @@ -1112,7 +1112,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * Trigger mode, we need to insert back inside * the ready list, so that the next call to * epoll_wait() will check again the events - * availability. At this point, noone can insert + * availability. At this point, no one can insert * into ep->rdllist besides us. The epoll_ctl() * callers are locked out by * ep_scan_ready_list() holding "mtx" and the diff --git a/fs/exofs/common.h b/fs/exofs/common.h index 5e74ad3d4009..3bbd46956d77 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h @@ -115,7 +115,7 @@ struct exofs_sb_stats { * Describes the raid used in the FS. It is part of the device table. * This here is taken from the pNFS-objects definition. In exofs we * use one raid policy through-out the filesystem. (NOTE: the funny - * alignment at begining. We take care of it at exofs_device_table. + * alignment at beginning. We take care of it at exofs_device_table. */ struct exofs_dt_data_map { __le32 cb_num_comps; @@ -136,7 +136,7 @@ struct exofs_dt_device_info { u8 systemid[OSD_SYSTEMID_LEN]; __le64 long_name_offset; /* If !0 then offset-in-file */ __le32 osdname_len; /* */ - u8 osdname[44]; /* Embbeded, Ususally an asci uuid */ + u8 osdname[44]; /* Embbeded, Usually an asci uuid */ } __packed; /* diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 0d06f4e75699..8f44cef1b3ef 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -850,7 +850,7 @@ static int find_next_reservable_window( rsv_window_remove(sb, my_rsv); /* - * Let's book the whole avaliable window for now. We will check the + * Let's book the whole available window for now. We will check the * disk bitmap later and then, if there are free blocks then we adjust * the window size if it's larger than requested. * Otherwise, we will remove this node from the tree next time @@ -1357,9 +1357,9 @@ retry_alloc: goto allocated; } /* - * We may end up a bogus ealier ENOSPC error due to + * We may end up a bogus earlier ENOSPC error due to * filesystem is "full" of reservations, but - * there maybe indeed free blocks avaliable on disk + * there maybe indeed free blocks available on disk * In this case, we just forget about the reservations * just do block allocation as without reservations. */ diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c47f706878b5..788e09a07f7e 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -305,7 +305,7 @@ static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind) return ind->bh->b_blocknr; /* - * It is going to be refered from inode itself? OK, just put it into + * It is going to be referred from inode itself? OK, just put it into * the same cylinder group then. */ bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group); @@ -913,7 +913,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) * * When we do truncate() we may have to clean the ends of several indirect * blocks but leave the blocks themselves alive. Block is partially - * truncated if some data below the new i_size is refered from it (and + * truncated if some data below the new i_size is referred from it (and * it is on the path to the first completely truncated data block, indeed). * We have to free the top of that path along with everything to the right * of the path. Since no allocation past the truncation point is possible @@ -990,7 +990,7 @@ no_top: * @p: array of block numbers * @q: points immediately past the end of array * - * We are freeing all blocks refered from that array (numbers are + * We are freeing all blocks referred from that array (numbers are * stored as little-endian 32-bit) and updating @inode->i_blocks * appropriately. */ @@ -1030,7 +1030,7 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) * @q: pointer immediately past the end of array * @depth: depth of the branches to free * - * We are freeing all blocks refered from these branches (numbers are + * We are freeing all blocks referred from these branches (numbers are * stored as little-endian 32-bit) and updating @inode->i_blocks * appropriately. */ diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7731695e65d9..0a78dae7e2cb 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1382,7 +1382,7 @@ static struct dentry *ext2_mount(struct file_system_type *fs_type, /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) + * itself serializes the operations (and no one else should touch the files) * we don't have to be afraid of races */ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index c2e4dce984d2..529970617a21 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -35,7 +35,7 @@ * +------------------+ * * The block header is followed by multiple entry descriptors. These entry - * descriptors are variable in size, and alligned to EXT2_XATTR_PAD + * descriptors are variable in size, and aligned to EXT2_XATTR_PAD * byte boundaries. The entry descriptors are sorted by attribute name, * so that two extended attribute blocks can be compared efficiently. * diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 153242187fce..fe52297e31ad 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -590,7 +590,7 @@ do_more: BUFFER_TRACE(debug_bh, "Deleted!"); if (!bh2jh(bitmap_bh)->b_committed_data) BUFFER_TRACE(debug_bh, - "No commited data in bitmap"); + "No committed data in bitmap"); BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); __brelse(debug_bh); } @@ -1063,7 +1063,7 @@ static int find_next_reservable_window( rsv_window_remove(sb, my_rsv); /* - * Let's book the whole avaliable window for now. We will check the + * Let's book the whole available window for now. We will check the * disk bitmap later and then, if there are free blocks then we adjust * the window size if it's larger than requested. * Otherwise, we will remove this node from the tree next time @@ -1456,7 +1456,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi) * * ext3_should_retry_alloc() is called when ENOSPC is returned, and if * it is profitable to retry the operation, this function will wait - * for the current or commiting transaction to complete, and then + * for the current or committing transaction to complete, and then * return TRUE. * * if the total number of retries exceed three times, return FALSE. @@ -1632,9 +1632,9 @@ retry_alloc: goto allocated; } /* - * We may end up a bogus ealier ENOSPC error due to + * We may end up a bogus earlier ENOSPC error due to * filesystem is "full" of reservations, but - * there maybe indeed free blocks avaliable on disk + * there maybe indeed free blocks available on disk * In this case, we just forget about the reservations * just do block allocation as without reservations. */ diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index fe2541d250e4..b5c2f3c97d71 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2055,7 +2055,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) * * When we do truncate() we may have to clean the ends of several * indirect blocks but leave the blocks themselves alive. Block is - * partially truncated if some data below the new i_size is refered + * partially truncated if some data below the new i_size is referred * from it (and it is on the path to the first completely truncated * data block, indeed). We have to free the top of that path along * with everything to the right of the path. Since no allocation @@ -2184,7 +2184,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode, * @first: array of block numbers * @last: points immediately past the end of array * - * We are freeing all blocks refered from that array (numbers are stored as + * We are freeing all blocks referred from that array (numbers are stored as * little-endian 32-bit) and updating @inode->i_blocks appropriately. * * We accumulate contiguous runs of blocks to free. Conveniently, if these @@ -2272,7 +2272,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode, * @last: pointer immediately past the end of array * @depth: depth of the branches to free * - * We are freeing all blocks refered from these branches (numbers are + * We are freeing all blocks referred from these branches (numbers are * stored as little-endian 32-bit) and updating @inode->i_blocks * appropriately. */ diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 108b142e11ed..7916e4ce166a 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -1009,7 +1009,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK - " upto "E3FSBLK" blocks\n", + " up to "E3FSBLK" blocks\n", o_blocks_count, n_blocks_count); if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 071689f86e18..3c6a9e0eadc1 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2925,7 +2925,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) + * itself serializes the operations (and no one else should touch the files) * we don't have to be afraid of races */ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 97b970e7dd13..1c67139ad4b4 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -547,7 +547,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, * * ext4_should_retry_alloc() is called when ENOSPC is returned, and if * it is profitable to retry the operation, this function will wait - * for the current or commiting transaction to complete, and then + * for the current or committing transaction to complete, and then * return TRUE. * * if the total number of retries exceed three times, return FALSE. diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index dd2cb5076ff9..4890d6f3ad15 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1729,7 +1729,7 @@ repeat: BUG_ON(npath->p_depth != path->p_depth); eh = npath[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { - ext_debug("next leaf isnt full(%d)\n", + ext_debug("next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); path = npath; goto repeat; @@ -2533,7 +2533,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) /* * This function is called by ext4_ext_map_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized - * extent into multiple extents (upto three - one initialized and two + * extent into multiple extents (up to three - one initialized and two * uninitialized). * There are three possibilities: * a> There is no split required: Entire extent should be initialized @@ -3174,7 +3174,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, path, flags); /* * Flag the inode(non aio case) or end_io struct (aio case) - * that this IO needs to convertion to written when IO is + * that this IO needs to conversion to written when IO is * completed */ if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { @@ -3460,10 +3460,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_ext_mark_uninitialized(&newex); /* * io_end structure was created for every IO write to an - * uninitialized extent. To avoid unecessary conversion, + * uninitialized extent. To avoid unnecessary conversion, * here we flag the IO that really needs the conversion. * For non asycn direct IO case, flag the inode state - * that we need to perform convertion when IO is done. + * that we need to perform conversion when IO is done. */ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 7f74019d6d77..4673bc05274f 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -101,7 +101,7 @@ extern int ext4_flush_completed_IO(struct inode *inode) * to the work-to-be schedule is freed. * * Thus we need to keep the io structure still valid here after - * convertion finished. The io structure has a flag to + * conversion finished. The io structure has a flag to * avoid double converting from both fsync and background work * queue work. */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1a86282b9024..ad8e303c0d29 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2588,7 +2588,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); * because we should have holes filled from ext4_page_mkwrite(). We even don't * need to file the inode to the transaction's list in ordered mode because if * we are writing back data added by write(), the inode is already there and if - * we are writing back data modified via mmap(), noone guarantees in which + * we are writing back data modified via mmap(), no one guarantees in which * transaction the data will hit the disk. In case we are journaling data, we * cannot start transaction directly because transaction start ranks above page * lock so we have to do some magic. @@ -2690,7 +2690,7 @@ static int ext4_writepage(struct page *page, /* * This is called via ext4_da_writepages() to - * calulate the total number of credits to reserve to fit + * calculate the total number of credits to reserve to fit * a single extent allocation into a single transaction, * ext4_da_writpeages() will loop calling this before * the block allocation. @@ -3304,7 +3304,7 @@ int ext4_alloc_da_blocks(struct inode *inode) * the pages by calling redirty_page_for_writepage() but that * would be ugly in the extreme. So instead we would need to * replicate parts of the code in the above functions, - * simplifying them becuase we wouldn't actually intend to + * simplifying them because we wouldn't actually intend to * write out the pages, but rather only collect contiguous * logical block extents, call the multi-block allocator, and * then update the buffer heads with the block allocations. @@ -3694,7 +3694,7 @@ retry: * * The unwrritten extents will be converted to written when DIO is completed. * For async direct IO, since the IO may still pending when return, we - * set up an end_io call back function, which will do the convertion + * set up an end_io call back function, which will do the conversion * when async direct IO completed. * * If the O_DIRECT write will extend the file then add this inode to the @@ -3717,7 +3717,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * We could direct write to holes and fallocate. * * Allocated blocks to fill the hole are marked as uninitialized - * to prevent paralel buffered read to expose the stale data + * to prevent parallel buffered read to expose the stale data * before DIO complete the data IO. * * As to previously fallocated extents, ext4 get_block @@ -3778,7 +3778,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, int err; /* * for non AIO case, since the IO is already - * completed, we could do the convertion right here + * completed, we could do the conversion right here */ err = ext4_convert_unwritten_extents(inode, offset, ret); @@ -4025,7 +4025,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) * * When we do truncate() we may have to clean the ends of several * indirect blocks but leave the blocks themselves alive. Block is - * partially truncated if some data below the new i_size is refered + * partially truncated if some data below the new i_size is referred * from it (and it is on the path to the first completely truncated * data block, indeed). We have to free the top of that path along * with everything to the right of the path. Since no allocation @@ -4169,7 +4169,7 @@ out_err: * @first: array of block numbers * @last: points immediately past the end of array * - * We are freeing all blocks refered from that array (numbers are stored as + * We are freeing all blocks referred from that array (numbers are stored as * little-endian 32-bit) and updating @inode->i_blocks appropriately. * * We accumulate contiguous runs of blocks to free. Conveniently, if these @@ -4261,7 +4261,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, * @last: pointer immediately past the end of array * @depth: depth of the branches to free * - * We are freeing all blocks refered from these branches (numbers are + * We are freeing all blocks referred from these branches (numbers are * stored as little-endian 32-bit) and updating @inode->i_blocks * appropriately. */ @@ -5478,7 +5478,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) } /* - * Calulate the total number of credits to reserve to fit + * Calculate the total number of credits to reserve to fit * the modification of a single pages into a single transaction, * which may include multiple chunks of block allocations. * diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a5837a837a8b..d8a16eecf1d5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -92,7 +92,7 @@ * between CPUs. It is possible to get scheduled at this point. * * The locality group prealloc space is used looking at whether we have - * enough free space (pa_free) withing the prealloc space. + * enough free space (pa_free) within the prealloc space. * * If we can't allocate blocks via inode prealloc or/and locality group * prealloc then we look at the buddy cache. The buddy cache is represented diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index d1bafa57f483..92816b4e0f16 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -517,7 +517,7 @@ int ext4_ext_migrate(struct inode *inode) * start with one credit accounted for * superblock modification. * - * For the tmp_inode we already have commited the + * For the tmp_inode we already have committed the * trascation that created the inode. Later as and * when we add extents we extent the journal */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 22546ad7f0ae..056474b7b8e0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -617,7 +617,7 @@ __acquires(bitlock) * filesystem will have already been marked read/only and the * journal has been aborted. We return 1 as a hint to callers * who might what to use the return value from - * ext4_grp_locked_error() to distinguish beween the + * ext4_grp_locked_error() to distinguish between the * ERRORS_CONT and ERRORS_RO case, and perhaps return more * aggressively from the ext4 function in question, with a * more appropriate error code. @@ -4624,7 +4624,7 @@ static int ext4_quota_off(struct super_block *sb, int type) /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) + * itself serializes the operations (and no one else should touch the files) * we don't have to be afraid of races */ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c index 78948b4b1894..c9a6a94e58e9 100644 --- a/fs/freevxfs/vxfs_fshead.c +++ b/fs/freevxfs/vxfs_fshead.c @@ -164,7 +164,7 @@ vxfs_read_fshead(struct super_block *sbp) goto out_free_pfp; } if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) { - printk(KERN_ERR "vxfs: structual list inode is of wrong type (%x)\n", + printk(KERN_ERR "vxfs: structural list inode is of wrong type (%x)\n", VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK); goto out_iput_stilist; } diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 6c5131d592f0..3360f1e678ad 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -162,7 +162,7 @@ vxfs_find_entry(struct inode *ip, struct dentry *dp, struct page **ppp) /** * vxfs_inode_by_name - find inode number for dentry * @dip: directory to search in - * @dp: dentry we seach for + * @dp: dentry we search for * * Description: * vxfs_inode_by_name finds out the inode number of diff --git a/fs/freevxfs/vxfs_olt.h b/fs/freevxfs/vxfs_olt.h index d8324296486f..b7b3af502615 100644 --- a/fs/freevxfs/vxfs_olt.h +++ b/fs/freevxfs/vxfs_olt.h @@ -60,7 +60,7 @@ enum { * * The Object Location Table header is placed at the beginning of each * OLT extent. It is used to fing certain filesystem-wide metadata, e.g. - * the inital inode list, the fileset header or the device configuration. + * the initial inode list, the fileset header or the device configuration. */ struct vxfs_olt { u_int32_t olt_magic; /* magic number */ diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b5ed541fb137..34591ee804b5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -144,7 +144,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, * * Description: * This does WB_SYNC_NONE opportunistic writeback. The IO is only - * started when this function returns, we make no guarentees on + * started when this function returns, we make no guarantees on * completion. Caller need not hold sb s_umount semaphore. * */ diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6ea00734984e..82a66466a24c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -523,7 +523,7 @@ static int fuse_readpage(struct file *file, struct page *page) goto out; /* - * Page writeback can extend beyond the liftime of the + * Page writeback can extend beyond the lifetime of the * page-cache page, so make sure we read a properly synced * page. */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index ef3dc4b9fae2..74add2ddcc3f 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1136,7 +1136,7 @@ void gfs2_trim_blocks(struct inode *inode) * earlier versions of GFS2 have a bug in the stuffed file reading * code which will result in a buffer overrun if the size is larger * than the max stuffed file size. In order to prevent this from - * occuring, such files are unstuffed, but in other cases we can + * occurring, such files are unstuffed, but in other cases we can * just update the inode size directly. * * Returns: 0 on success, or -ve on error diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e2431313491f..f07643e21bfa 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1123,7 +1123,7 @@ void gfs2_glock_dq_uninit(struct gfs2_holder *gh) * @number: the lock number * @glops: the glock operations for the type of glock * @state: the state to acquire the glock in - * @flags: modifier flags for the aquisition + * @flags: modifier flags for the acquisition * @gh: the struct gfs2_holder * * Returns: errno diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ec73ed70bae1..a4e23d68a398 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -657,7 +657,7 @@ out: * @sdp: the file system * * This function flushes data and meta data for all machines by - * aquiring the transaction log exclusively. All journals are + * acquiring the transaction log exclusively. All journals are * ensured to be in a clean state as well. * * Returns: errno diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index da871ee084d3..69b180459463 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -362,7 +362,7 @@ void journal_commit_transaction(journal_t *journal) * we do not require it to remember exactly which old buffers it * has reserved. This is consistent with the existing behaviour * that multiple journal_get_write_access() calls to the same - * buffer are perfectly permissable. + * buffer are perfectly permissible. */ while (commit_transaction->t_reserved_list) { jh = commit_transaction->t_reserved_list; diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index eb11601f2e00..b3713afaaa9e 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -770,7 +770,7 @@ journal_t * journal_init_dev(struct block_device *bdev, journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", __func__); goto out_err; } @@ -831,7 +831,7 @@ journal_t * journal_init_inode (struct inode *inode) journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", __func__); goto out_err; } diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index d29018307e2e..305a90763154 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -71,7 +71,7 @@ * switching hash tables under them. For operations on the lists of entries in * the hash table j_revoke_lock is used. * - * Finally, also replay code uses the hash tables but at this moment noone else + * Finally, also replay code uses the hash tables but at this moment no one else * can touch them (filesystem isn't mounted yet) and hence no locking is * needed. */ diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 5b2e4c30a2a1..60d2319651b2 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1392,7 +1392,7 @@ int journal_stop(handle_t *handle) * by 30x or more... * * We try and optimize the sleep time against what the underlying disk - * can do, instead of having a static sleep time. This is usefull for + * can do, instead of having a static sleep time. This is useful for * the case where our storage is so fast that it is more optimal to go * ahead and force a flush and wait for the transaction to be committed * than it is to wait for an arbitrary amount of time for new writers to diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index fa36d7662b21..20af62f4304b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -403,7 +403,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) * we do not require it to remember exactly which old buffers it * has reserved. This is consistent with the existing behaviour * that multiple jbd2_journal_get_write_access() calls to the same - * buffer are perfectly permissable. + * buffer are perfectly permissible. */ while (commit_transaction->t_reserved_list) { jh = commit_transaction->t_reserved_list; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 90407b8fece7..aba8ebaec25c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -917,7 +917,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", __func__); goto out_err; } @@ -983,7 +983,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", __func__); goto out_err; } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 9ad321fd63fd..69fd93588118 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -71,7 +71,7 @@ * switching hash tables under them. For operations on the lists of entries in * the hash table j_revoke_lock is used. * - * Finally, also replay code uses the hash tables but at this moment noone else + * Finally, also replay code uses the hash tables but at this moment no one else * can touch them (filesystem isn't mounted yet) and hence no locking is * needed. */ diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 1d1191050f99..05fa77a23711 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1403,7 +1403,7 @@ int jbd2_journal_stop(handle_t *handle) /* * Once we drop t_updates, if it goes to zero the transaction - * could start commiting on us and eventually disappear. So + * could start committing on us and eventually disappear. So * once we do this, we must not dereference transaction * pointer again. */ diff --git a/fs/jffs2/TODO b/fs/jffs2/TODO index 5d3ea4070f01..ca28964abd4b 100644 --- a/fs/jffs2/TODO +++ b/fs/jffs2/TODO @@ -11,7 +11,7 @@ - checkpointing (do we need this? scan is quite fast) - make the scan code populate real inodes so read_inode just after mount doesn't have to read the flash twice for large files. - Make this a per-inode option, changable with chattr, so you can + Make this a per-inode option, changeable with chattr, so you can decide which inodes should be in-core immediately after mount. - test, test, test diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index d32ee9412cb9..2ab1a0d91210 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -24,7 +24,7 @@ * * Returns: 0 if the data CRC is correct; * 1 - if incorrect; - * error code if an error occured. + * error code if an error occurred. */ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn) { diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index 800171dca53b..e537fb0e0184 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -121,7 +121,7 @@ int jffs2_sum_add_inode_mem(struct jffs2_summary *s, struct jffs2_raw_inode *ri, temp->nodetype = ri->nodetype; temp->inode = ri->ino; temp->version = ri->version; - temp->offset = cpu_to_je32(ofs); /* relative offset from the begining of the jeb */ + temp->offset = cpu_to_je32(ofs); /* relative offset from the beginning of the jeb */ temp->totlen = ri->totlen; temp->next = NULL; @@ -139,7 +139,7 @@ int jffs2_sum_add_dirent_mem(struct jffs2_summary *s, struct jffs2_raw_dirent *r temp->nodetype = rd->nodetype; temp->totlen = rd->totlen; - temp->offset = cpu_to_je32(ofs); /* relative from the begining of the jeb */ + temp->offset = cpu_to_je32(ofs); /* relative from the beginning of the jeb */ temp->pino = rd->pino; temp->version = rd->version; temp->ino = rd->ino; diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 07ee1546b2fa..4515bea0268f 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1116,7 +1116,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, /* * On NAND we try to mark this block bad. If the block was erased more - * than MAX_ERASE_FAILURES we mark it finaly bad. + * than MAX_ERASE_FAILURES we mark it finally bad. * Don't care about failures. This block remains on the erase-pending * or badblock list as long as nobody manipulates the flash with * a bootloader or something like that. diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index c92ea3b3ea5e..4496872cf4e7 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1649,7 +1649,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) } /* search the tree within the dmap control page for - * sufficent free space. if sufficient free space is found, + * sufficient free space. if sufficient free space is found, * dbFindLeaf() returns the index of the leaf at which * free space was found. */ @@ -2744,7 +2744,7 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) /* check which (leafno or buddy) is the left buddy. * the left buddy gets to claim the blocks resulting * from the join while the right gets to claim none. - * the left buddy is also eligable to participate in + * the left buddy is also eligible to participate in * a join at the next higher level while the right * is not. * diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 5d3bbd10f8db..e5fe8506ed16 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -126,7 +126,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) /* allocate the disk blocks for the extent. initially, extBalloc() * will try to allocate disk blocks for the requested size (xlen). - * if this fails (xlen contiguous free blocks not avaliable), it'll + * if this fails (xlen contiguous free blocks not available), it'll * try to allocate a smaller number of blocks (producing a smaller * extent), with this smaller number of blocks consisting of the * requested number of blocks rounded down to the next smaller @@ -481,7 +481,7 @@ int extFill(struct inode *ip, xad_t * xp) * * initially, we will try to allocate disk blocks for the * requested size (nblocks). if this fails (nblocks - * contiguous free blocks not avaliable), we'll try to allocate + * contiguous free blocks not available), we'll try to allocate * a smaller number of blocks (producing a smaller extent), with * this smaller number of blocks consisting of the requested * number of blocks rounded down to the next smaller power of 2 @@ -575,7 +575,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) * to a new set of blocks. If moving the extent, we initially * will try to allocate disk blocks for the requested size * (newnblks). if this fails (new contiguous free blocks not - * avaliable), we'll try to allocate a smaller number of + * available), we'll try to allocate a smaller number of * blocks (producing a smaller extent), with this smaller * number of blocks consisting of the requested number of * blocks rounded down to the next smaller power of 2 diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 3a09423b6c22..ed53a4740168 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -1069,7 +1069,7 @@ int diFree(struct inode *ip) */ if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { /* in preparation for removing the iag from the - * ag extent free list, read the iags preceeding + * ag extent free list, read the iags preceding * and following the iag on the ag extent free * list. */ @@ -1095,7 +1095,7 @@ int diFree(struct inode *ip) int inofreefwd = le32_to_cpu(iagp->inofreefwd); /* in preparation for removing the iag from the - * ag inode free list, read the iags preceeding + * ag inode free list, read the iags preceding * and following the iag on the ag inode free * list. before reading these iags, we must make * sure that we already don't have them in hand @@ -1681,7 +1681,7 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) * try to allocate a new extent of free inodes. */ if (addext) { - /* if free space is not avaliable for this new extent, try + /* if free space is not available for this new extent, try * below to allocate a free and existing (already backed) * inode from the ag. */ @@ -2036,7 +2036,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) /* check if this is the last free inode within the iag. * if so, it will have to be removed from the ag free - * inode list, so get the iags preceeding and following + * inode list, so get the iags preceding and following * it on the list. */ if (iagp->nfreeinos == cpu_to_le32(1)) { @@ -2208,7 +2208,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* check if this is the last free extent within the * iag. if so, the iag must be removed from the ag - * free extent list, so get the iags preceeding and + * free extent list, so get the iags preceding and * following the iag on this list. */ if (iagp->nfreeexts == cpu_to_le32(1)) { @@ -2504,7 +2504,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) } - /* get the next avaliable iag number */ + /* get the next available iag number */ iagno = imap->im_nextiag; /* make sure that we have not exceeded the maximum inode @@ -2615,7 +2615,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) duplicateIXtree(sb, blkno, xlen, &xaddr); - /* update the next avaliable iag number */ + /* update the next available iag number */ imap->im_nextiag += 1; /* Add the iag to the iag free list so we don't lose the iag diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index 9236bc49ae7f..e38c21598850 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -288,7 +288,7 @@ struct lrd { /* * SYNCPT: log sync point * - * replay log upto syncpt address specified; + * replay log up to syncpt address specified; */ struct { __le32 sync; /* 4: syncpt address (0 = here) */ diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index d94f8d9e87d7..a78beda85f68 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -75,7 +75,7 @@ extern void grab_metapage(struct metapage *); extern void force_metapage(struct metapage *); /* - * hold_metapage and put_metapage are used in conjuction. The page lock + * hold_metapage and put_metapage are used in conjunction. The page lock * is not dropped between the two, so no other threads can get or release * the metapage */ diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 9466957ec841..f6cc0c09ec63 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -636,7 +636,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, * the inode of the page and available to all anonymous * transactions until txCommit() time at which point * they are transferred to the transaction tlock list of - * the commiting transaction of the inode) + * the committing transaction of the inode) */ if (xtid == 0) { tlck->tid = tid; diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 1aba0039f1c9..8ea5efb5a34e 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -57,7 +57,7 @@ * 2. compute new FSCKSize from new LVSize; * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where * assert(new FSSize >= old FSSize), - * i.e., file system must not be shrinked; + * i.e., file system must not be shrunk; */ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) { @@ -182,7 +182,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) */ newFSSize = newLVSize - newLogSize - newFSCKSize; - /* file system cannot be shrinked */ + /* file system cannot be shrunk */ if (newFSSize < bmp->db_mapsize) { rc = -EINVAL; goto out; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index eeca48a031ab..06c8a67cbe76 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -644,7 +644,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) + * itself serializes the operations (and no one else should touch the files) * we don't have to be afraid of races */ static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 7466e9dcc8c5..339e17e9133d 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -60,7 +60,7 @@ static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf) * asynchronous properties. So just to prevent the first implementor of such * a thing from breaking logfs in 2350, we do the usual pointless dance to * declare a completion variable and wait for completion before returning - * from mtd_erase(). What an excercise in futility! + * from mtd_erase(). What an exercise in futility! */ static void logfs_erase_callback(struct erase_info *ei) { diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index f9ddf0c388c8..9ed89d1663f8 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -92,7 +92,7 @@ static int beyond_eof(struct inode *inode, loff_t bix) * so short names (len <= 9) don't even occupy the complete 32bit name * space. A prime >256 ensures short names quickly spread the 32bit * name space. Add about 26 for the estimated amount of information - * of each character and pick a prime nearby, preferrably a bit-sparse + * of each character and pick a prime nearby, preferably a bit-sparse * one. */ static u32 hash_32(const char *s, int len, u32 seed) diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index ee99a9f5dfd3..9e22085231b3 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -1616,7 +1616,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, err = logfs_write_buf(inode, page, flags); if (!err && shrink_level(gc_level) == 0) { /* Rewrite cannot mark the inode dirty but has to - * write it immediatly. + * write it immediately. * Q: Can't we just create an alias for the inode * instead? And if not, why not? */ diff --git a/fs/mbcache.c b/fs/mbcache.c index a25444ab2baf..2f174be06555 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -542,7 +542,7 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head, * mb_cache_entry_find_first() * * Find the first cache entry on a given device with a certain key in - * an additional index. Additonal matches can be found with + * an additional index. Additional matches can be found with * mb_cache_entry_find_next(). Returns NULL if no match was found. The * returned cache entry is locked for shared access ("multiple readers"). * diff --git a/fs/namei.c b/fs/namei.c index 3cb616d38d9c..e6cd6113872c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -70,7 +70,7 @@ * name indicated by the symlink. The old code always complained that the * name already exists, due to not following the symlink even if its target * is nonexistent. The new semantics affects also mknod() and link() when - * the name is a symlink pointing to a non-existant name. + * the name is a symlink pointing to a non-existent name. * * I don't know which semantics is the right one, since I have no access * to standards. But I found by trial that HP-UX 9.0 has the full "new" diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 00a1d1c3d3a4..0250e4ce4893 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -596,7 +596,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) /* server->priv.data = NULL; */ server->m = data; - /* Althought anything producing this is buggy, it happens + /* Although anything producing this is buggy, it happens now because of PATH_MAX changes.. */ if (server->m.time_out < 1) { server->m.time_out = 10; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 14e0f9371d14..00ecf62ce7c1 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -241,7 +241,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, args->cbl_layout_type = ntohl(*p++); /* Depite the spec's xdr, iomode really belongs in the FILE switch, - * as it is unuseable and ignored with the other types. + * as it is unusable and ignored with the other types. */ iomode = ntohl(*p++); args->cbl_layoutchanged = ntohl(*p++); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3ac5bd695e5e..2f093ed16980 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -301,7 +301,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * disk, but it retrieves and clears ctx->error after synching, despite * the two being set at the same time in nfs_context_set_write_error(). * This is because the former is used to notify the _next_ call to - * nfs_file_write() that a write error occured, and hence cause it to + * nfs_file_write() that a write error occurred, and hence cause it to * fall back to doing a synchronous write. */ static int diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 085a354e0f08..7c44579f5832 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -33,7 +33,7 @@ #include "pnfs.h" /* - * Field testing shows we need to support upto 4096 stripe indices. + * Field testing shows we need to support up to 4096 stripe indices. * We store each index as a u8 (u32 on the wire) to keep the memory footprint * reasonable. This in turn means we support a maximum of 256 * RFC 5661 multipath_list4 structures. diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index ec0f277be7f5..6940439bd609 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -173,7 +173,7 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem) return -EINVAL; break; case ACL_MASK: - /* Solaris sometimes sets additonal bits in the mask */ + /* Solaris sometimes sets additional bits in the mask */ entry->e_perm &= S_IRWXO; break; default: diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7e84a852cdae..ad48faca20fc 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -702,7 +702,7 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p, *p++ = htonl(resp->eof); *p++ = htonl(resp->count); /* xdr opaque count */ xdr_ressize_check(rqstp, p); - /* now update rqstp->rq_res to reflect data aswell */ + /* now update rqstp->rq_res to reflect data as well */ rqstp->rq_res.page_len = resp->count; if (resp->count & 3) { /* need to pad the tail */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fbde6f79922e..4b36ec3eb8ea 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3055,7 +3055,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; else if (locks_in_grace()) { - /* Answer in remaining cases depends on existance of + /* Answer in remaining cases depends on existence of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; } else if (flags & WR_STATE) @@ -3675,7 +3675,7 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid, /* * Alloc a lock owner structure. * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has - * occured. + * occurred. * * strhashval = lock_ownerstr_hashval */ diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 4ce005dbf3e6..65ec595e2226 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -451,7 +451,7 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p, *p++ = htonl(resp->count); xdr_ressize_check(rqstp, p); - /* now update rqstp->rq_res to reflect data aswell */ + /* now update rqstp->rq_res to reflect data as well */ rqstp->rq_res.page_len = resp->count; if (resp->count & 3) { /* need to pad the tail */ diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6b1305dc26c0..9fde1c00a296 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -164,7 +164,7 @@ static int process_access_response(struct fsnotify_group *group, fd, response); /* * make sure the response is valid, if invalid we do nothing and either - * userspace can send a valid responce or we will clean it up after the + * userspace can send a valid response or we will clean it up after the * timeout */ switch (response) { diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index a91b69a6a291..7400898c0339 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -194,7 +194,7 @@ static int idr_callback(int id, void *p, void *data) static void inotify_free_group_priv(struct fsnotify_group *group) { - /* ideally the idr is empty and we won't hit the BUG in teh callback */ + /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_remove_all(&group->inotify_data.idr); idr_destroy(&group->inotify_data.idr); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 50c00856f730..252ab1f6452b 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -24,7 +24,7 @@ * referencing this object. The object typically will live inside the kernel * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task * which can find this object holding the appropriete locks, can take a reference - * and the object itself is guarenteed to survive until the reference is dropped. + * and the object itself is guaranteed to survive until the reference is dropped. * * LOCKING: * There are 3 spinlocks involved with fsnotify inode marks and they MUST diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index f5094ee224c1..f14fde2b03d6 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -197,7 +197,7 @@ err_out: } else if (ctx_needs_reset) { /* * If there is no attribute list, restoring the search context - * is acomplished simply by copying the saved context back over + * is accomplished simply by copying the saved context back over * the caller supplied context. If there is an attribute list, * things are more complicated as we need to deal with mapping * of mft records and resulting potential changes in pointers. @@ -1181,7 +1181,7 @@ not_found: * for, i.e. if one wants to add the attribute to the mft record this is the * correct place to insert its attribute list entry into. * - * When -errno != -ENOENT, an error occured during the lookup. @ctx->attr is + * When -errno != -ENOENT, an error occurred during the lookup. @ctx->attr is * then undefined and in particular you should not rely on it not changing. */ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index ef9ed854255c..ee4144ce5d7c 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -501,7 +501,7 @@ int ntfs_read_compressed_block(struct page *page) VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >> vol->cluster_size_bits; /* - * The first vcn after the last wanted vcn (minumum alignment is again + * The first vcn after the last wanted vcn (minimum alignment is again * PAGE_CACHE_SIZE. */ VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1) diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 0b56c6b7ec01..c05d6dcf77a4 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -622,7 +622,7 @@ static int ntfs_read_locked_inode(struct inode *vi) */ /* Everyone gets all permissions. */ vi->i_mode |= S_IRWXUGO; - /* If read-only, noone gets write permissions. */ + /* If read-only, no one gets write permissions. */ if (IS_RDONLY(vi)) vi->i_mode &= ~S_IWUGO; if (m->flags & MFT_RECORD_IS_DIRECTORY) { @@ -2529,7 +2529,7 @@ retry_truncate: * specifies that the behaviour is unspecified thus we do not * have to do anything. This means that in our implementation * in the rare case that the file is mmap()ped and a write - * occured into the mmap()ped region just beyond the file size + * occurred into the mmap()ped region just beyond the file size * and writepage has not yet been called to write out the page * (which would clear the area beyond the file size) and we now * extend the file size to incorporate this dirty region diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 8b2549f672bf..faece7190866 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -286,7 +286,7 @@ typedef le16 MFT_RECORD_FLAGS; * fragmented. Volume free space includes the empty part of the mft zone and * when the volume's free 88% are used up, the mft zone is shrunk by a factor * of 2, thus making more space available for more files/data. This process is - * repeated everytime there is no more free space except for the mft zone until + * repeated every time there is no more free space except for the mft zone until * there really is no more free space. */ @@ -1657,13 +1657,13 @@ typedef enum { * pointed to by the Owner field was provided by a defaulting mechanism * rather than explicitly provided by the original provider of the * security descriptor. This may affect the treatment of the SID with - * respect to inheritence of an owner. + * respect to inheritance of an owner. * * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in * the Group field was provided by a defaulting mechanism rather than * explicitly provided by the original provider of the security * descriptor. This may affect the treatment of the SID with respect to - * inheritence of a primary group. + * inheritance of a primary group. * * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security * descriptor contains a discretionary ACL. If this flag is set and the @@ -1674,7 +1674,7 @@ typedef enum { * pointed to by the Dacl field was provided by a defaulting mechanism * rather than explicitly provided by the original provider of the * security descriptor. This may affect the treatment of the ACL with - * respect to inheritence of an ACL. This flag is ignored if the + * respect to inheritance of an ACL. This flag is ignored if the * DaclPresent flag is not set. * * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security @@ -1686,7 +1686,7 @@ typedef enum { * pointed to by the Sacl field was provided by a defaulting mechanism * rather than explicitly provided by the original provider of the * security descriptor. This may affect the treatment of the ACL with - * respect to inheritence of an ACL. This flag is ignored if the + * respect to inheritance of an ACL. This flag is ignored if the * SaclPresent flag is not set. * * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security @@ -2283,7 +2283,7 @@ typedef struct { // the key_length is zero, then the vcn immediately // follows the INDEX_ENTRY_HEADER. Regardless of // key_length, the address of the 8-byte boundary - // alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by + // aligned vcn of INDEX_ENTRY{_HEADER} *ie is given by // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN), // where sizeof(VCN) can be hardcoded as 8 if wanted. */ } __attribute__ ((__packed__)) INDEX_ENTRY; diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 4dadcdf3d451..c71de292c5ad 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -669,7 +669,7 @@ err_out: * of cases where we think that a volume is dirty when in fact it is clean. * This should only affect volumes that have not been shutdown cleanly but did * not have any pending, non-check-pointed i/o, i.e. they were completely idle - * at least for the five seconds preceeding the unclean shutdown. + * at least for the five seconds preceding the unclean shutdown. * * This function assumes that the $LogFile journal has already been consistency * checked by a call to ntfs_check_logfile() and in particular if the $LogFile diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h index b5a6f08bd35c..aa2b6ac3f0a4 100644 --- a/fs/ntfs/logfile.h +++ b/fs/ntfs/logfile.h @@ -222,7 +222,7 @@ typedef struct { /* 24*/ sle64 file_size; /* Usable byte size of the log file. If the restart_area_offset + the offset of the file_size are > 510 then corruption has - occured. This is the very first check when + occurred. This is the very first check when starting with the restart_area as if it fails it means that some of the above values will be corrupted by the multi sector diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 326e7475a22a..382857f9c7db 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -73,7 +73,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs + vol->mft_record_size) { page = ERR_PTR(-ENOENT); - ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " + ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, " "which is beyond the end of the mft. " "This is probably a bug in the ntfs " "driver.", ni->mft_no); @@ -1442,7 +1442,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) // Note: It will need to be a special mft record and if none of // those are available it gets rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " - "accomodate extended mft bitmap attribute " + "accommodate extended mft bitmap attribute " "extent. Cannot handle this yet."); ret = -EOPNOTSUPP; goto undo_alloc; @@ -1879,7 +1879,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) // and we would then need to update all references to this mft // record appropriately. This is rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " - "accomodate extended mft data attribute " + "accommodate extended mft data attribute " "extent. Cannot handle this yet."); ret = -EOPNOTSUPP; goto undo_alloc; @@ -2357,7 +2357,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, } #ifdef DEBUG read_lock_irqsave(&mftbmp_ni->size_lock, flags); - ntfs_debug("Status of mftbmp after initialized extention: " + ntfs_debug("Status of mftbmp after initialized extension: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mftbmp_ni->allocated_size, diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index 56a9a6d25a2a..eac7d6788a10 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -1243,7 +1243,7 @@ err_out: * write. * * This is used when building the mapping pairs array of a runlist to compress - * a given logical cluster number (lcn) or a specific run length to the minumum + * a given logical cluster number (lcn) or a specific run length to the minimum * size possible. * * Return the number of bytes written on success. On error, i.e. the diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 29099a07b9fe..b52706da4645 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -458,7 +458,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) * the volume on boot and updates them. * * When remounting read-only, mark the volume clean if no volume errors - * have occured. + * have occurred. */ if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { static const char *es = ". Cannot remount read-write."; @@ -1269,7 +1269,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol) "hibernated on the volume."); return 0; } - /* A real error occured. */ + /* A real error occurred. */ ntfs_error(vol->sb, "Failed to find inode number for " "hiberfil.sys."); return ret; @@ -1370,7 +1370,7 @@ static bool load_and_init_quota(ntfs_volume *vol) NVolSetQuotaOutOfDate(vol); return true; } - /* A real error occured. */ + /* A real error occurred. */ ntfs_error(vol->sb, "Failed to find inode number for $Quota."); return false; } @@ -1454,7 +1454,7 @@ not_enabled: NVolSetUsnJrnlStamped(vol); return true; } - /* A real error occured. */ + /* A real error occurred. */ ntfs_error(vol->sb, "Failed to find inode number for " "$UsnJrnl."); return false; @@ -2292,7 +2292,7 @@ static void ntfs_put_super(struct super_block *sb) ntfs_commit_inode(vol->mft_ino); /* - * If a read-write mount and no volume errors have occured, mark the + * If a read-write mount and no volume errors have occurred, mark the * volume clean. Also, re-commit all affected inodes. */ if (!(sb->s_flags & MS_RDONLY)) { @@ -2496,7 +2496,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol) if (vol->nr_clusters & 63) nr_free += 64 - (vol->nr_clusters & 63); up_read(&vol->lcnbmp_lock); - /* If errors occured we may well have gone below zero, fix this. */ + /* If errors occurred we may well have gone below zero, fix this. */ if (nr_free < 0) nr_free = 0; ntfs_debug("Exiting."); @@ -2561,7 +2561,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, } ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", index - 1); - /* If errors occured we may well have gone below zero, fix this. */ + /* If errors occurred we may well have gone below zero, fix this. */ if (nr_free < 0) nr_free = 0; ntfs_debug("Exiting."); diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b27a0d86f8c5..48aa9c7401c7 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -4519,7 +4519,7 @@ set_tail_append: } /* - * Helper function called at the begining of an insert. + * Helper function called at the beginning of an insert. * * This computes a few things that are commonly used in the process of * inserting into the btree: diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index eceb456037c1..75cf3ad987a6 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -71,7 +71,7 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level) /* * Using a named enum representing lock types in terms of #N bit stored in - * iocb->private, which is going to be used for communication bewteen + * iocb->private, which is going to be used for communication between * ocfs2_dio_end_io() and ocfs2_file_aio_write/read(). */ enum ocfs2_iocb_lock_bits { diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 2461eb3272ed..643720209a98 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -2275,7 +2275,7 @@ void o2hb_free_hb_set(struct config_group *group) kfree(hs); } -/* hb callback registration and issueing */ +/* hb callback registration and issuing */ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type) { diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index a87366750f23..8f9cea1597af 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -89,7 +89,7 @@ static void o2quo_fence_self(void) }; } -/* Indicate that a timeout occured on a hearbeat region write. The +/* Indicate that a timeout occurred on a hearbeat region write. The * other nodes in the cluster may consider us dead at that time so we * want to "fence" ourselves so that we don't scribble on the disk * after they think they've recovered us. This can't solve all @@ -261,7 +261,7 @@ void o2quo_hb_still_up(u8 node) spin_unlock(&qs->qs_lock); } -/* This is analagous to hb_up. as a node's connection comes up we delay the +/* This is analogous to hb_up. as a node's connection comes up we delay the * quorum decision until we see it heartbeating. the hold will be droped in * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if * it's already heartbeating we we might be dropping a hold that conn_up got. diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index ee04ff5ee603..db5ee4b4f47a 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -565,7 +565,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, * the work queue actually being up. */ if (!valid && o2net_wq) { unsigned long delay; - /* delay if we're withing a RECONNECT_DELAY of the + /* delay if we're within a RECONNECT_DELAY of the * last attempt */ delay = (nn->nn_last_connect_attempt + msecs_to_jiffies(o2net_reconnect_delay())) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9d67610dfc74..fede57ed005f 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -808,7 +808,7 @@ lookup: dlm_mle_detach_hb_events(dlm, mle); dlm_put_mle(mle); mle = NULL; - /* this is lame, but we cant wait on either + /* this is lame, but we can't wait on either * the mle or lockres waitqueue here */ if (mig) msleep(100); @@ -843,7 +843,7 @@ lookup: /* finally add the lockres to its hash bucket */ __dlm_insert_lockres(dlm, res); - /* since this lockres is new it doesnt not require the spinlock */ + /* since this lockres is new it doesn't not require the spinlock */ dlm_lockres_grab_inflight_ref_new(dlm, res); /* if this node does not become the master make sure to drop diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 177d3a6c2a5f..b4c8bb6b8d28 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -835,7 +835,7 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) /* If we have allowd wipe of this inode for another node, it * will be marked here so we can safely skip it. Recovery will - * cleanup any inodes we might inadvertantly skip here. */ + * cleanup any inodes we might inadvertently skip here. */ if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) goto bail_unlock; @@ -917,7 +917,7 @@ static int ocfs2_query_inode_wipe(struct inode *inode, * the inode open lock in ocfs2_read_locked_inode(). When we * get to ->delete_inode(), each node tries to convert it's * lock to an exclusive. Trylocks are serialized by the inode - * meta data lock. If the upconvert suceeds, we know the inode + * meta data lock. If the upconvert succeeds, we know the inode * is no longer live and can be deleted. * * Though we call this with the meta data lock held, the diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index dcc2d9327150..b141a44605ca 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1368,7 +1368,7 @@ skip_recovery: mlog_errno(status); /* Now it is right time to recover quotas... We have to do this under - * superblock lock so that noone can start using the slot (and crash) + * superblock lock so that no one can start using the slot (and crash) * before we recover it */ for (i = 0; i < rm_quota_used; i++) { qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 6180da1e37e6..68cf2f6d3c6a 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -215,7 +215,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode) /* WARNING: This only kicks off a single * checkpoint. If someone races you and adds more * metadata to the journal, you won't know, and will - * wind up waiting *alot* longer than necessary. Right + * wind up waiting *a lot* longer than necessary. Right * now we only use this in clear_inode so that's * OK. */ ocfs2_start_checkpoint(osb); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 28f2cc1080d8..e5d738cd9cc0 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2128,7 +2128,7 @@ leave: } /** - * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly + * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to receive a newly * allocated file. This is different from the typical 'add to orphan dir' * operation in that the inode does not yet exist. This is a problem because * the orphan dir stringifies the inode block number to come up with it's diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index bf2e7764920e..b68f87a83924 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -441,7 +441,7 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = { struct ocfs2_block_check { /*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */ __le16 bc_ecc; /* Single-error-correction parity vector. - This is a simple Hamming code dependant + This is a simple Hamming code dependent on the blocksize. OCFS2's maximum blocksize, 4K, requires 16 parity bits, so we fit in __le16. */ @@ -750,7 +750,7 @@ struct ocfs2_dinode { after an unclean shutdown */ } journal1; - } id1; /* Inode type dependant 1 */ + } id1; /* Inode type dependent 1 */ /*C0*/ union { struct ocfs2_super_block i_super; struct ocfs2_local_alloc i_lab; diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 279aef68025b..92fcd575775a 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -556,7 +556,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) spin_unlock(&dq_data_lock); err = ocfs2_qinfo_lock(info, freeing); if (err < 0) { - mlog(ML_ERROR, "Failed to lock quota info, loosing quota write" + mlog(ML_ERROR, "Failed to lock quota info, losing quota write" " (type=%d, id=%u)\n", dquot->dq_type, (unsigned)dquot->dq_id); goto out; diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h index 1e49cc29d06c..42c2b804f3fd 100644 --- a/fs/ocfs2/reservations.h +++ b/fs/ocfs2/reservations.h @@ -29,7 +29,7 @@ struct ocfs2_alloc_reservation { struct rb_node r_node; - unsigned int r_start; /* Begining of current window */ + unsigned int r_start; /* Beginning of current window */ unsigned int r_len; /* Length of the window */ unsigned int r_last_len; /* Length of most recent alloc */ diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 8ce7398ae1d2..1ec56fdb8d0d 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -126,7 +126,7 @@ struct ocfs2_stack_operations { * * ->connect() must not return until it is guaranteed that * - * - Node down notifications for the filesystem will be recieved + * - Node down notifications for the filesystem will be received * and passed to conn->cc_recovery_handler(). * - Locking requests for the filesystem will be processed. */ diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ab6e2061074f..ba5d97e4a73e 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1511,7 +1511,7 @@ static int ocfs2_cluster_group_search(struct inode *inode, max_bits = le16_to_cpu(gd->bg_bits); /* Tail groups in cluster bitmaps which aren't cpg - * aligned are prone to partial extention by a failed + * aligned are prone to partial extension by a failed * fs resize. If the file system resize never got to * update the dinode cluster count, then we don't want * to trust any clusters past it, regardless of what @@ -2459,7 +2459,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, /* The alloc_bh comes from ocfs2_free_dinode() or * ocfs2_free_clusters(). The callers have all locked the * allocator and gotten alloc_bh from the lock call. This - * validates the dinode buffer. Any corruption that has happended + * validates the dinode buffer. Any corruption that has happened * is a code bug. */ BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 69fa11b35aa4..5a521c748859 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -78,7 +78,7 @@ static struct kmem_cache *ocfs2_inode_cachep = NULL; struct kmem_cache *ocfs2_dquot_cachep; struct kmem_cache *ocfs2_qf_chunk_cachep; -/* OCFS2 needs to schedule several differnt types of work which +/* OCFS2 needs to schedule several different types of work which * require cluster locking, disk I/O, recovery waits, etc. Since these * types of work tend to be heavy we avoid using the kernel events * workqueue and schedule on our own. */ diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 57a215dc2d9b..81ecf9c0bf0a 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3554,7 +3554,7 @@ int ocfs2_xattr_set(struct inode *inode, down_write(&OCFS2_I(inode)->ip_xattr_sem); /* * Scan inode and external block to find the same name - * extended attribute and collect search infomation. + * extended attribute and collect search information. */ ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); if (ret) @@ -3578,7 +3578,7 @@ int ocfs2_xattr_set(struct inode *inode, goto cleanup; } - /* Check whether the value is refcounted and do some prepartion. */ + /* Check whether the value is refcounted and do some preparation. */ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && (!xis.not_found || !xbs.not_found)) { ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, diff --git a/fs/partitions/check.c b/fs/partitions/check.c index ac546975031f..d545e97d99c3 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -500,7 +500,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, /* everything is up and running, commence */ rcu_assign_pointer(ptbl->part[partno], p); - /* suppress uevent if the disk supresses it */ + /* suppress uevent if the disk suppresses it */ if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); @@ -585,7 +585,7 @@ rescan: /* * If any partition code tried to read beyond EOD, try * unlocking native capacity even if partition table is - * sucessfully read as we could be missing some partitions. + * successfully read as we could be missing some partitions. */ if (state->access_beyond_eod) { printk(KERN_WARNING diff --git a/fs/proc/base.c b/fs/proc/base.c index 5a670c11aeac..dd6628d3ba42 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -220,7 +220,7 @@ static struct mm_struct *__check_mem_permission(struct task_struct *task) } /* - * Noone else is allowed. + * No one else is allowed. */ mmput(mm); return ERR_PTR(-EPERM); diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 867d0ac026ce..8007ae7c0d8c 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -1,5 +1,5 @@ config PSTORE - bool "Persistant store support" + bool "Persistent store support" default n help This option enables generic access to platform level diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index fcc8ae75d874..a925bf205497 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -956,7 +956,7 @@ static inline int dqput_blocks(struct dquot *dquot) /* * Remove references to dquots from inode and add dquot to list for freeing - * if we have the last referece to dquot + * if we have the last reference to dquot * We can't race with anybody because we hold dqptr_sem for writing... */ static int remove_inode_dquot_ref(struct inode *inode, int type, diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c77514bd5776..c5e82ece7c6c 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1,7 +1,7 @@ /* ** Write ahead logging implementation copyright Chris Mason 2000 ** -** The background commits make this code very interelated, and +** The background commits make this code very interrelated, and ** overly complex. I need to rethink things a bit....The major players: ** ** journal_begin -- call with the number of blocks you expect to log. @@ -2725,7 +2725,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name, REISERFS_DISK_OFFSET_IN_BYTES / sb->s_blocksize + 2); - /* Sanity check to see is the standard journal fitting withing first bitmap + /* Sanity check to see is the standard journal fitting within first bitmap (actual for small blocksizes) */ if (!SB_ONDISK_JOURNAL_DEVICE(sb) && (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c index b87aa2c1afc1..7df1ce48203a 100644 --- a/fs/reiserfs/lock.c +++ b/fs/reiserfs/lock.c @@ -15,7 +15,7 @@ * for this mutex, no need for a system wide mutex facility. * * Also this lock is often released before a call that could block because - * reiserfs performances were partialy based on the release while schedule() + * reiserfs performances were partially based on the release while schedule() * property of the Bkl. */ void reiserfs_write_lock(struct super_block *s) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0aab04f46827..b216ff6be1c9 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -393,7 +393,7 @@ void add_save_link(struct reiserfs_transaction_handle *th, /* body of "save" link */ link = INODE_PKEY(inode)->k_dir_id; - /* put "save" link inot tree, don't charge quota to anyone */ + /* put "save" link into tree, don't charge quota to anyone */ retval = reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); if (retval) { @@ -2104,7 +2104,7 @@ out: /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) + * itself serializes the operations (and no one else should touch the files) * we don't have to be afraid of races */ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5c11ca82b782..47d2a4498b03 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -396,7 +396,7 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n) struct address_space *mapping = dir->i_mapping; struct page *page; /* We can deadlock if we try to free dentries, - and an unlink/rmdir has just occured - GFP_NOFS avoids this */ + and an unlink/rmdir has just occurred - GFP_NOFS avoids this */ mapping_set_gfp_mask(mapping, GFP_NOFS); page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); if (!IS_ERR(page)) { diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 26b15ae34d6f..c37b520132ff 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -104,7 +104,7 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, entry = &cache->entry[i]; /* - * Initialise choosen cache entry, and fill it in from + * Initialise chosen cache entry, and fill it in from * disk. */ cache->unused--; @@ -286,7 +286,7 @@ cleanup: /* - * Copy upto length bytes from cache entry to buffer starting at offset bytes + * Copy up to length bytes from cache entry to buffer starting at offset bytes * into the cache entry. If there's not length bytes then copy the number of * bytes available. In all cases return the number of bytes copied. */ diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index c8ff0d1ae5d3..8b3a7da531eb 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -147,7 +147,7 @@ static int make_free_space(struct ubifs_info *c) if (liab2 < liab1) return -EAGAIN; - dbg_budg("new liability %lld (not shrinked)", liab2); + dbg_budg("new liability %lld (not shrunk)", liab2); /* Liability did not shrink again, try GC */ dbg_budg("Run GC"); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 27a4babe7df0..e765743cf9f3 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -78,7 +78,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off /* * Returns the location of the fragment from - * the begining of the filesystem. + * the beginning of the filesystem. */ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 7693d6293404..3915ade6f9a8 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -483,9 +483,9 @@ static int ufs_parse_options (char * options, unsigned * mount_options) } /* - * Diffrent types of UFS hold fs_cstotal in different - * places, and use diffrent data structure for it. - * To make things simplier we just copy fs_cstotal to ufs_sb_private_info + * Different types of UFS hold fs_cstotal in different + * places, and use different data structure for it. + * To make things simpler we just copy fs_cstotal to ufs_sb_private_info */ static void ufs_setup_cstotal(struct super_block *sb) { diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 52dbd14260ba..79ce38be15a1 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1295,7 +1295,7 @@ xfs_get_blocks_direct( * If the private argument is non-NULL __xfs_get_blocks signals us that we * need to issue a transaction to convert the range from unwritten to written * extents. In case this is regular synchronous I/O we just call xfs_end_io - * to do this and we are done. But in case this was a successfull AIO + * to do this and we are done. But in case this was a successful AIO * request this handler is called from interrupt context, from which we * can't start transactions. In that case offload the I/O completion to * the workqueues we also use for buffered I/O completion. diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 596bb2c9de42..5ea402023ebd 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -120,7 +120,7 @@ xfs_buf_lru_add( * The unlocked check is safe here because it only occurs when there are not * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there * to optimise the shrinker removing the buffer from the LRU and calling - * xfs_buf_free(). i.e. it removes an unneccessary round trip on the + * xfs_buf_free(). i.e. it removes an unnecessary round trip on the * bt_lru_lock. */ STATIC void @@ -380,7 +380,7 @@ out_free_pages: } /* - * Map buffer into kernel address-space if nessecary. + * Map buffer into kernel address-space if necessary. */ STATIC int _xfs_buf_map_pages( diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 52aadfbed132..f4213ba1ff85 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -381,7 +381,7 @@ xfs_aio_write_isize_update( /* * If this was a direct or synchronous I/O that failed (such as ENOSPC) then - * part of the I/O may have been written to disk before the error occured. In + * part of the I/O may have been written to disk before the error occurred. In * this case the on-disk file size may have been adjusted beyond the in-memory * file size and now needs to be truncated back. */ diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 9ff7fc603d2f..dd21784525a8 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -70,7 +70,7 @@ xfs_synchronize_times( /* * If the linux inode is valid, mark it dirty. - * Used when commiting a dirty inode into a transaction so that + * Used when committing a dirty inode into a transaction so that * the inode will get written back by the linux code */ void diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 594cd822d84d..9cf35a688f53 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -401,7 +401,7 @@ xfs_quiesce_fs( /* * Second stage of a quiesce. The data is already synced, now we have to take * care of the metadata. New transactions are already blocked, so we need to - * wait for any remaining transactions to drain out before proceding. + * wait for any remaining transactions to drain out before proceeding. */ void xfs_quiesce_attr( diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 7e2416478503..6fa214603819 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -600,7 +600,7 @@ xfs_qm_dqread( /* * Reservation counters are defined as reservation plus current usage - * to avoid having to add everytime. + * to avoid having to add every time. */ dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 774d7ec6df8e..a0a829addca9 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -134,7 +134,7 @@ xfs_qm_newmount( */ if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { /* - * If an error occured, qm_mount_quotas code + * If an error occurred, qm_mount_quotas code * has already disabled quotas. So, just finish * mounting, and get on with the boring life * without disk quotas. diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index c82f06778a27..0d62a07b7fd8 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -172,7 +172,7 @@ xfs_qm_scall_quotaoff( /* * Next we make the changes in the quota flag in the mount struct. * This isn't protected by a particular lock directly, because we - * don't want to take a mrlock everytime we depend on quotas being on. + * don't want to take a mrlock every time we depend on quotas being on. */ mp->m_qflags &= ~(flags); @@ -354,7 +354,7 @@ xfs_qm_scall_quotaon( return XFS_ERROR(EINVAL); } /* - * If everything's upto-date incore, then don't waste time. + * If everything's up to-date incore, then don't waste time. */ if ((mp->m_qflags & flags) == flags) return XFS_ERROR(EEXIST); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index e5413d96f1af..7b7e005e3dcc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -992,7 +992,7 @@ xfs_buf_iodone_callbacks( lasttarg = XFS_BUF_TARGET(bp); /* - * If the write was asynchronous then noone will be looking for the + * If the write was asynchronous then no one will be looking for the * error. Clear the error state and write the buffer out again. * * During sync or umount we'll write all pending buffers again diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 742c8330994a..a37480a6e023 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2789,7 +2789,7 @@ xfs_iflush( /* * We can't flush the inode until it is unpinned, so wait for it if we - * are allowed to block. We know noone new can pin it, because we are + * are allowed to block. We know no one new can pin it, because we are * holding the inode lock shared and you need to hold it exclusively to * pin the inode. * diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f753200cef8d..ff4e2a30227d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -111,7 +111,7 @@ struct xfs_imap { * Generally, we do not want to hold the i_rlock while holding the * i_ilock. Hierarchy is i_iolock followed by i_rlock. * - * xfs_iptr_t contains all the inode fields upto and including the + * xfs_iptr_t contains all the inode fields up to and including the * i_mnext and i_mprev fields, it is used as a marker in the inode * chain off the mount structure by xfs_sync calls. */ @@ -336,7 +336,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) /* * Project quota id helpers (previously projid was 16bit only - * and using two 16bit values to hold new 32bit projid was choosen + * and using two 16bit values to hold new 32bit projid was chosen * to retain compatibility with "old" filesystems). */ static inline prid_t diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 15dbf1f9c2be..ffae692c9832 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -570,7 +570,7 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector, * When we crack an atomic LSN, we sample it first so that the value will not * change while we are cracking it into the component values. This means we * will always get consistent component values to work from. This should always - * be used to smaple and crack LSNs taht are stored and updated in atomic + * be used to sample and crack LSNs that are stored and updated in atomic * variables. */ static inline void diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0c4a5618e7af..5cc464a17c93 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -101,7 +101,7 @@ xlog_get_bp( /* * We do log I/O in units of log sectors (a power-of-2 * multiple of the basic block size), so we round up the - * requested size to acommodate the basic blocks required + * requested size to accommodate the basic blocks required * for complete log sectors. * * In addition, the buffer may be used for a non-sector- @@ -112,7 +112,7 @@ xlog_get_bp( * an issue. Nor will this be a problem if the log I/O is * done in basic blocks (sector size 1). But otherwise we * extend the buffer by one extra log sector to ensure - * there's space to accomodate this possiblility. + * there's space to accommodate this possibility. */ if (nbblks > 1 && log->l_sectBBsize > 1) nbblks += log->l_sectBBsize; diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 16084d8ea231..048b0c689d3e 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -81,7 +81,7 @@ xfs_trans_ijoin( * * * Grabs a reference to the inode which will be dropped when the transaction - * is commited. The inode will also be unlocked at that point. The inode + * is committed. The inode will also be unlocked at that point. The inode * must be locked, and it cannot be associated with any transaction. */ void diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c48b4217ec47..b7a5fe7c52c8 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -953,7 +953,7 @@ xfs_release( * If we previously truncated this file and removed old data * in the process, we want to initiate "early" writeout on * the last close. This is an attempt to combat the notorious - * NULL files problem which is particularly noticable from a + * NULL files problem which is particularly noticeable from a * truncate down, buffered (re-)write (delalloc), followed by * a crash. What we are effectively doing here is * significantly reducing the time window where we'd otherwise @@ -982,7 +982,7 @@ xfs_release( * * Further, check if the inode is being opened, written and * closed frequently and we have delayed allocation blocks - * oustanding (e.g. streaming writes from the NFS server), + * outstanding (e.g. streaming writes from the NFS server), * truncating the blocks past EOF will cause fragmentation to * occur. * -- cgit From b03f24567ce7caf2420b8be4c6eb74c191d59a91 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 31 Mar 2011 18:36:52 +0200 Subject: quota: Don't write quota info in dquot_commit() There's no reason to write quota info in dquot_commit(). The writing is a relict from the old days when we didn't have dquot_acquire() and dquot_release() and thus dquot_commit() could have created / removed quota structures from the file. These days dquot_commit() only updates usage counters / limits in quota structure and thus there's no need to write quota info. This also fixes an issue with journaling filesystem which didn't reserve enough space in the transaction for write of quota info (it could have been dirty at the time of dquot_commit() because of a race with other operation changing it). CC: stable@kernel.org Reported-and-tested-by: Lukas Czerner Signed-off-by: Jan Kara --- fs/quota/dquot.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index a2a622e079f0..b59ee61f4b9a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -442,7 +442,7 @@ EXPORT_SYMBOL(dquot_acquire); */ int dquot_commit(struct dquot *dquot) { - int ret = 0, ret2 = 0; + int ret = 0; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dqopt->dqio_mutex); @@ -454,15 +454,10 @@ int dquot_commit(struct dquot *dquot) spin_unlock(&dq_list_lock); /* Inactive dquot can be only if there was error during read/init * => we have better not writing it */ - if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); - if (info_dirty(&dqopt->info[dquot->dq_type])) { - ret2 = dqopt->ops[dquot->dq_type]->write_file_info( - dquot->dq_sb, dquot->dq_type); - } - if (ret >= 0) - ret = ret2; - } + else + ret = -EIO; out_sem: mutex_unlock(&dqopt->dqio_mutex); return ret; -- cgit From 50f689af019b19f9b9a39be782c21b6f52b1615a Mon Sep 17 00:00:00 2001 From: Zhu Yanhai Date: Mon, 4 Apr 2011 12:58:12 -0400 Subject: jbd2: move bdget out of critical section bdget() should not be called when we hold spinlocks since it might sleep. Reviewed-by: Jan Kara Signed-off-by: Zhu Yanhai Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 90407b8fece7..33dd3ef0ccd0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2413,10 +2413,12 @@ const char *jbd2_dev_to_name(dev_t device) new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); if (!new_dev) return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ + bd = bdget(device); spin_lock(&devname_cache_lock); if (devcache[i]) { if (devcache[i]->device == device) { kfree(new_dev); + bdput(bd); ret = devcache[i]->devname; spin_unlock(&devname_cache_lock); return ret; @@ -2425,7 +2427,6 @@ const char *jbd2_dev_to_name(dev_t device) } devcache[i] = new_dev; devcache[i]->device = device; - bd = bdget(device); if (bd) { bdevname(bd, devcache[i]->devname); bdput(bd); -- cgit From 21f976975cbecbdaf23ceeacc1cab2b1c05a028e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 4 Apr 2011 15:33:39 -0400 Subject: ext4: remove unnecessary [cm]time update of quota file It is not necessary to update [cm]time of quota file on each quota file write and it wastes journal space and IO throughput with inode writes. So just remove the updating from ext4_quota_write() and only update times when quotas are being turned off. Userspace cannot get anything reliable from quota files while they are used by the kernel anyway. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4_jbd2.h | 4 ++-- fs/ext4/super.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index e25e99bf7ee1..d0f53538a57f 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -86,8 +86,8 @@ #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was - * allocated so we need to update only inode+data */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) + * allocated so we need to update only data block */ +#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 22546ad7f0ae..35ff9fef68bd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4614,11 +4614,24 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, static int ext4_quota_off(struct super_block *sb, int type) { + struct inode *inode = sb_dqopt(sb)->files[type]; + handle_t *handle; + /* Force all delayed allocation blocks to be allocated. * Caller already holds s_umount sem */ if (test_opt(sb, DELALLOC)) sync_filesystem(sb); + /* Update modification times of quota files when userspace can + * start looking at them */ + handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) + goto out; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + +out: return dquot_quota_off(sb, type); } @@ -4714,9 +4727,8 @@ out: if (inode->i_size < off + len) { i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; + ext4_mark_inode_dirty(handle, inode); } - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ext4_mark_inode_dirty(handle, inode); mutex_unlock(&inode->i_mutex); return len; } -- cgit From 5b41395fcc0265fc9f193aef9df39ce49d64677c Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Mon, 4 Apr 2011 15:40:24 -0400 Subject: ext4: fix credits computing for indirect mapped files When writing a contiguous set of blocks, two indirect blocks could be needed depending on how the blocks are aligned, so we need to increase the number of credits needed by one. [ Also fixed a another bug which could further underestimate the number of journal credits needed by 1; the code was using integer division instead of DIV_ROUND_UP() -- tytso] Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1a86282b9024..7d11e02ad01d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5398,13 +5398,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, /* if nrblocks are contiguous */ if (chunk) { /* - * With N contiguous data blocks, it need at most - * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks - * 2 dindirect blocks - * 1 tindirect block + * With N contiguous data blocks, we need at most + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, + * 2 dindirect blocks, and 1 tindirect block */ - indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); - return indirects + 3; + return DIV_ROUND_UP(nrblocks, + EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; } /* * if nrblocks are not contiguous, worse case, each block touch -- cgit From 46e4690bbd9a4f8d9e7c4f34e34b48f703ad47e0 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 4 Apr 2011 16:00:49 -0400 Subject: ext4: fix a double free in ext4_register_li_request In ext4_register_li_request, we malloc a ext4_li_request and inserts it into ext4_li_info->li_request_list. In case of any error later, we free it in the end. But if we have some error in ext4_run_lazyinit_thread, the whole li_request_list will be dropped and freed in it. So we will double free this ext4_li_request. This patch just sets elr to NULL after it is inserted to the list so that the latter kfree won't double free it. Signed-off-by: Tao Ma Reviewed-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/super.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 35ff9fef68bd..35bd0206bbb3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2975,6 +2975,12 @@ static int ext4_register_li_request(struct super_block *sb, mutex_unlock(&ext4_li_info->li_list_mtx); sbi->s_li_request = elr; + /* + * set elr to NULL here since it has been inserted to + * the request_list and the removal and free of it is + * handled by ext4_clear_request_list from now on. + */ + elr = NULL; if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { ret = ext4_run_lazyinit_thread(); -- cgit From c9149235a42ab93914434fff45c44b45023363f3 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Wed, 30 Mar 2011 00:57:23 +0000 Subject: Btrfs: fix compiler warning in file.c While compiling Btrfs, I got following messages: CC [M] fs/btrfs/file.o fs/btrfs/file.c: In function '__btrfs_buffered_write': fs/btrfs/file.c:909: warning: 'ret' may be used uninitialized in this function CC [M] fs/btrfs/tree-defrag.o This patch fixes compiler warning. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 656bc0a892b1..e621ea54a3fd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -906,7 +906,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, unsigned long last_index; size_t num_written = 0; int nrptrs; - int ret; + int ret = 0; nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / -- cgit From 200da64e0b039f873f0f20481e6a7d056e7cc6c9 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 31 Mar 2011 00:44:29 +0000 Subject: Btrfs: fix /proc/mounts info. Some mount options are not displayed by /proc/mounts. This patch displays the option such as compress_type by /proc/mounts. Ex. [before] $ mount | grep sdc2 /dev/sdc2 on /test12 type btrfs (rw,space_cache,compress=lzo) $ cat /proc/mounts | grep sdc2 /dev/sdc2 /test12 btrfs rw,relatime,compress 0 0 [after] $ mount | grep sdc2 /dev/sdc2 on /test12 type btrfs (rw,space_cache,compress=lzo) $ cat /proc/mounts | grep sdc2 /dev/sdc2 /test12 btrfs rw,relatime,compress=lzo,space_cache 0 0 Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/super.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2edfc039f098..58e7de9cc90c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -644,6 +644,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); struct btrfs_fs_info *info = root->fs_info; + char *compress_type; if (btrfs_test_opt(root, DEGRADED)) seq_puts(seq, ",degraded"); @@ -662,8 +663,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) if (info->thread_pool_size != min_t(unsigned long, num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); - if (btrfs_test_opt(root, COMPRESS)) - seq_puts(seq, ",compress"); + if (btrfs_test_opt(root, COMPRESS)) { + if (info->compress_type == BTRFS_COMPRESS_ZLIB) + compress_type = "zlib"; + else + compress_type = "lzo"; + if (btrfs_test_opt(root, FORCE_COMPRESS)) + seq_printf(seq, ",compress-force=%s", compress_type); + else + seq_printf(seq, ",compress=%s", compress_type); + } if (btrfs_test_opt(root, NOSSD)) seq_puts(seq, ",nossd"); if (btrfs_test_opt(root, SSD_SPREAD)) @@ -678,6 +687,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",discard"); if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) seq_puts(seq, ",noacl"); + if (btrfs_test_opt(root, SPACE_CACHE)) + seq_puts(seq, ",space_cache"); + if (btrfs_test_opt(root, CLEAR_CACHE)) + seq_puts(seq, ",clear_cache"); + if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) + seq_puts(seq, ",user_subvol_rm_allowed"); return 0; } -- cgit From fe3f566cd19bb6d787c92b2e202c85f929abf3ac Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 28 Mar 2011 08:30:38 +0000 Subject: Btrfs: Fix oops for defrag with compression turned on When we defrag a file, whose size can be fit into an inline extent, with compression enabled, the compress type is set to be fs_info->compress_type, which is 0 if the btrfs filesystem is mounted without compress option. This leads to oops. Reported-by: Daniel Blueman Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 06274186b290..62ae9d5da806 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -111,6 +111,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, size_t size, size_t compressed_size, + int compress_type, struct page **compressed_pages) { struct btrfs_key key; @@ -125,12 +126,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, size_t cur_size = size; size_t datasize; unsigned long offset; - int compress_type = BTRFS_COMPRESS_NONE; - if (compressed_size && compressed_pages) { - compress_type = root->fs_info->compress_type; + if (compressed_size && compressed_pages) cur_size = compressed_size; - } path = btrfs_alloc_path(); if (!path) @@ -220,7 +218,7 @@ fail: static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, - size_t compressed_size, + size_t compressed_size, int compress_type, struct page **compressed_pages) { u64 isize = i_size_read(inode); @@ -253,7 +251,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, inline_len = min_t(u64, isize, actual_end); ret = insert_inline_extent(trans, root, inode, start, inline_len, compressed_size, - compressed_pages); + compress_type, compressed_pages); BUG_ON(ret); btrfs_delalloc_release_metadata(inode, end + 1 - start); btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); @@ -432,12 +430,13 @@ again: * to make an uncompressed inline extent. */ ret = cow_file_range_inline(trans, root, inode, - start, end, 0, NULL); + start, end, 0, 0, NULL); } else { /* try making a compressed inline extent */ ret = cow_file_range_inline(trans, root, inode, start, end, - total_compressed, pages); + total_compressed, + compress_type, pages); } if (ret == 0) { /* @@ -791,7 +790,7 @@ static noinline int cow_file_range(struct inode *inode, if (start == 0) { /* lets try to make an inline extent */ ret = cow_file_range_inline(trans, root, inode, - start, end, 0, NULL); + start, end, 0, 0, NULL); if (ret == 0) { extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, -- cgit From b44c59a80ded004e1a82712e5f9e17b131c03221 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Thu, 31 Mar 2011 13:23:47 +0000 Subject: Btrfs: fix subvol_sem leak in btrfs_rename() btrfs_rename() does not release the subvol_sem if the transaction failed to start. Signed-off-by: Johann Lombardi Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 62ae9d5da806..1ca3e68586cf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6961,8 +6961,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, * should cover the worst case number of items we'll modify. */ trans = btrfs_start_transaction(root, 20); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out_notrans; + } btrfs_set_trans_block_group(trans, new_dir); @@ -7062,7 +7064,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, } out_fail: btrfs_end_transaction_throttle(trans, root); - +out_notrans: if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&root->fs_info->subvol_sem); -- cgit From 8b2b2d3cbefb605501342adaf64d601b545ed154 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 4 Apr 2011 01:52:13 +0000 Subject: Btrfs: fix memory leak in btrfs_ioctl_start_sync() Call btrfs_end_transaction() if btrfs_commit_transaction_async() fails. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6b70e0e2bd1e..255c7c5279c4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2436,8 +2436,10 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp return PTR_ERR(trans); transid = trans->transid; ret = btrfs_commit_transaction_async(trans, root, 0); - if (ret) + if (ret) { + btrfs_end_transaction(trans, root); return ret; + } if (argp) if (copy_to_user(argp, &transid, sizeof(transid))) -- cgit From 6e8df2ae89ab37730c0062782f844c66ecfc97a7 Mon Sep 17 00:00:00 2001 From: Yoshinori Sano Date: Sun, 3 Apr 2011 12:31:28 +0000 Subject: Btrfs: fix memory leak in start_transaction() Free btrfs_trans_handle when join_transaction() fails in start_transaction() Signed-off-by: Yoshinori Sano Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ce48eb59d615..d01cc249a8d3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -197,6 +197,7 @@ again: ret = join_transaction(root); if (ret < 0) { + kmem_cache_free(btrfs_trans_handle_cachep, h); if (type != TRANS_JOIN_NOLOCK) mutex_unlock(&root->fs_info->trans_mutex); return ERR_PTR(ret); -- cgit From adae52b94e18afa1f84fab67df2a8a872c2f5533 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 31 Mar 2011 09:43:23 +0000 Subject: btrfs: clear __GFP_FS flag in the space cache inode the object id of the space cache inode's key is allocated from the relative root, just like the regular file. So we can't identify space cache inode by checking the object id of the inode's key, and we have to clear __GFP_FS flag at the time we look up the space cache inode. Signed-off-by: Miao Xie Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 2 ++ fs/btrfs/inode.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0037427d8a9d..13575de85543 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -81,6 +81,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return ERR_PTR(-ENOENT); } + inode->i_mapping->flags &= ~__GFP_FS; + spin_lock(&block_group->lock); if (!root->fs_info->closing) { block_group->inode = igrab(inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1ca3e68586cf..57a03f6eb224 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2535,8 +2535,6 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); alloc_group_block = btrfs_inode_block_group(leaf, inode_item); - if (location.objectid == BTRFS_FREE_SPACE_OBJECTID) - inode->i_mapping->flags &= ~__GFP_FS; /* * try to precache a NULL acl entry for files that don't have -- cgit From 08fe4db170b4193603d9d31f40ebaf652d07ac9c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 28 Mar 2011 02:01:25 +0000 Subject: Btrfs: Fix uninitialized root flags for subvolumes root_item->flags and root_item->byte_limit are not initialized when a subvolume is created. This bug is not revealed until we added readonly snapshot support - now you mount a btrfs filesystem and you may find the subvolumes in it are readonly. To work around this problem, we steal a bit from root_item->inode_item->flags, and use it to indicate if those fields have been properly initialized. When we read a tree root from disk, we check if the bit is set, and if not we'll set the flag and initialize the two fields of the root item. Reported-by: Andreas Philipp Signed-off-by: Li Zefan Tested-by: Andreas Philipp cc: stable@kernel.org Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++++ fs/btrfs/disk-io.c | 4 +++- fs/btrfs/ioctl.c | 4 ++++ fs/btrfs/root-tree.c | 18 ++++++++++++++++++ fs/btrfs/transaction.c | 1 + 5 files changed, 30 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d47ce8307854..3458b5725540 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1284,6 +1284,8 @@ struct btrfs_root { #define BTRFS_INODE_DIRSYNC (1 << 10) #define BTRFS_INODE_COMPRESS (1 << 11) +#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) + /* some macros to generate set/get funcs for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple * one for u8: @@ -2359,6 +2361,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); int btrfs_find_orphan_roots(struct btrfs_root *tree_root); int btrfs_set_root_node(struct btrfs_root_item *item, struct extent_buffer *node); +void btrfs_check_and_init_root_item(struct btrfs_root_item *item); + /* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5cf3aa7b125c..a272bfd74ea0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1276,8 +1276,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, root->commit_root = btrfs_root_node(root); BUG_ON(!root->node); out: - if (location->objectid != BTRFS_TREE_LOG_OBJECTID) + if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { root->ref_cows = 1; + btrfs_check_and_init_root_item(&root->root_item); + } return root; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 255c7c5279c4..f9c93a9ed4a7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -373,6 +373,10 @@ static noinline int create_subvol(struct btrfs_root *root, inode_item->nbytes = cpu_to_le64(root->leafsize); inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + root_item.flags = 0; + root_item.byte_limit = 0; + inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); + btrfs_set_root_bytenr(&root_item, leaf->start); btrfs_set_root_generation(&root_item, trans->transid); btrfs_set_root_level(&root_item, 0); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 29b2d7c930eb..6928bff62daa 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -473,3 +473,21 @@ again: btrfs_free_path(path); return 0; } + +/* + * Old btrfs forgets to init root_item->flags and root_item->byte_limit + * for subvolumes. To work around this problem, we steal a bit from + * root_item->inode_item->flags, and use it to indicate if those fields + * have been properly initialized. + */ +void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) +{ + u64 inode_flags = le64_to_cpu(root_item->inode.flags); + + if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { + inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; + root_item->inode.flags = cpu_to_le64(inode_flags); + root_item->flags = 0; + root_item->byte_limit = 0; + } +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d01cc249a8d3..5b158da7e0bb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -976,6 +976,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, record_root_in_trans(trans, root); btrfs_set_root_last_snapshot(&root->root_item, trans->transid); memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); + btrfs_check_and_init_root_item(new_root_item); root_flags = btrfs_root_flags(new_root_item); if (pending->readonly) -- cgit From 43be21462d8c263e2449b52b23326232fd710bee Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 1 Apr 2011 14:55:00 +0000 Subject: Btrfs: fix free space cache when there are pinned extents and clusters V2 I noticed a huge problem with the free space cache that was presenting as an early ENOSPC. Turns out when writing the free space cache out I forgot to take into account pinned extents and more importantly clusters. This would result in us leaking free space everytime we unmounted the filesystem and remounted it. I fix this by making sure to check and see if the current block group has a cluster and writing out any entries that are in the cluster to the cache, as well as writing any pinned extents we currently have to the cache since those will be available for us to use the next time the fs mounts. This patch also adds a check to the end of load_free_space_cache to make sure we got the right amount of free space cache, and if not make sure to clear the cache and re-cache the old fashioned way. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 82 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 13575de85543..f561c953205b 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -24,6 +24,7 @@ #include "free-space-cache.h" #include "transaction.h" #include "disk-io.h" +#include "extent_io.h" #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) @@ -224,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, u64 num_entries; u64 num_bitmaps; u64 generation; + u64 used = btrfs_block_group_used(&block_group->item); u32 cur_crc = ~(u32)0; pgoff_t index = 0; unsigned long first_page_offset; @@ -469,6 +471,17 @@ next: index++; } + spin_lock(&block_group->tree_lock); + if (block_group->free_space != (block_group->key.offset - used - + block_group->bytes_super)) { + spin_unlock(&block_group->tree_lock); + printk(KERN_ERR "block group %llu has an wrong amount of free " + "space\n", block_group->key.objectid); + ret = 0; + goto free_cache; + } + spin_unlock(&block_group->tree_lock); + ret = 1; out: kfree(checksums); @@ -497,8 +510,11 @@ int btrfs_write_out_cache(struct btrfs_root *root, struct list_head *pos, *n; struct page *page; struct extent_state *cached_state = NULL; + struct btrfs_free_cluster *cluster = NULL; + struct extent_io_tree *unpin = NULL; struct list_head bitmap_list; struct btrfs_key key; + u64 start, end, len; u64 bytes = 0; u32 *crc, *checksums; pgoff_t index = 0, last_index = 0; @@ -507,6 +523,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, int entries = 0; int bitmaps = 0; int ret = 0; + bool next_page = false; root = root->fs_info->tree_root; @@ -553,6 +570,18 @@ int btrfs_write_out_cache(struct btrfs_root *root, */ first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); + /* Get the cluster for this block_group if it exists */ + if (!list_empty(&block_group->cluster_list)) + cluster = list_entry(block_group->cluster_list.next, + struct btrfs_free_cluster, + block_group_list); + + /* + * We shouldn't have switched the pinned extents yet so this is the + * right one + */ + unpin = root->fs_info->pinned_extents; + /* * Lock all pages first so we can lock the extent safely. * @@ -582,6 +611,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 0, &cached_state, GFP_NOFS); + /* + * When searching for pinned extents, we need to start at our start + * offset. + */ + start = block_group->key.objectid; + /* Write out the extent entries */ do { struct btrfs_free_space_entry *entry; @@ -589,6 +624,8 @@ int btrfs_write_out_cache(struct btrfs_root *root, unsigned long offset = 0; unsigned long start_offset = 0; + next_page = false; + if (index == 0) { start_offset = first_page_offset; offset = start_offset; @@ -600,7 +637,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, entry = addr + start_offset; memset(addr, 0, PAGE_CACHE_SIZE); - while (1) { + while (node && !next_page) { struct btrfs_free_space *e; e = rb_entry(node, struct btrfs_free_space, offset_index); @@ -616,12 +653,49 @@ int btrfs_write_out_cache(struct btrfs_root *root, entry->type = BTRFS_FREE_SPACE_EXTENT; } node = rb_next(node); - if (!node) - break; + if (!node && cluster) { + node = rb_first(&cluster->root); + cluster = NULL; + } offset += sizeof(struct btrfs_free_space_entry); if (offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) + next_page = true; + entry++; + } + + /* + * We want to add any pinned extents to our free space cache + * so we don't leak the space + */ + while (!next_page && (start < block_group->key.objectid + + block_group->key.offset)) { + ret = find_first_extent_bit(unpin, start, &start, &end, + EXTENT_DIRTY); + if (ret) { + ret = 0; + break; + } + + /* This pinned extent is out of our range */ + if (start >= block_group->key.objectid + + block_group->key.offset) break; + + len = block_group->key.objectid + + block_group->key.offset - start; + len = min(len, end + 1 - start); + + entries++; + entry->offset = cpu_to_le64(start); + entry->bytes = cpu_to_le64(len); + entry->type = BTRFS_FREE_SPACE_EXTENT; + + start = end + 1; + offset += sizeof(struct btrfs_free_space_entry); + if (offset + sizeof(struct btrfs_free_space_entry) >= + PAGE_CACHE_SIZE) + next_page = true; entry++; } *crc = ~(u32)0; @@ -652,7 +726,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, page_cache_release(page); index++; - } while (node); + } while (node || next_page); /* Write out the bitmaps */ list_for_each_safe(pos, n, &bitmap_list) { -- cgit From c9ddec74aa950a220cc4caa5215cfc5d886050b7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 28 Mar 2011 13:43:25 +0000 Subject: Btrfs: don't warn in btrfs_add_orphan When I moved the orphan adding to btrfs_truncate I missed the fact that during orphan cleanup we just add the orphan items to the orphan list without going through btrfs_orphan_add, which results in lots of warnings on mount if you have any orphan items that need to be truncated. Just remove this warning since it's ok, this will allow all of the normal space accounting take place. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 57a03f6eb224..cc6022842e0c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2220,8 +2220,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) insert = 1; #endif insert = 1; - } else { - WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); } if (!BTRFS_I(inode)->orphan_meta_reserved) { -- cgit From 8b229c76765816796eec7ccd428f03bd8de8b525 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 25 Mar 2011 18:33:57 +0200 Subject: UBIFS: do not read flash unnecessarily This fix makes the 'dbg_check_old_index()' function return immediately if debugging is disabled, instead of executing incorrect 'goto out' which causes UBIFS to: 1. Allocate memory 2. Read the flash On every commit. OK, we do not commit that often, but it is still silly to do unneeded I/O anyway. Credits to coverity for spotting this silly issue. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org --- fs/ubifs/commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index b148fbc80f8d..1bd01ded7123 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -577,7 +577,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) size_t sz; if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) - goto out; + return 0; INIT_LIST_HEAD(&list); -- cgit From 54acbaaa523ca0bd284a18f67ad213c379679e86 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 25 Mar 2011 19:09:54 +0200 Subject: UBIFS: fix oops on error path in read_pnode Thanks to coverity which spotted that UBIFS will oops if 'kmalloc()' in 'read_pnode()' fails and we dereference a NULL 'pnode' pointer when we 'goto out'. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org --- fs/ubifs/lpt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 72775d35b99e..ef5155e109a2 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1270,10 +1270,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) lnum = branch->lnum; offs = branch->offs; pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); - if (!pnode) { - err = -ENOMEM; - goto out; - } + if (!pnode) + return -ENOMEM; + if (lnum == 0) { /* * This pnode was not written which just means that the LEB -- cgit From c88ac00c5af70c2a0741da14b22cdcf8507ddd92 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 29 Mar 2011 09:45:21 +0300 Subject: UBIFS: fix assertion warnings This patch fixes UBIFS assertion warnings like: UBIFS assert failed in ubifs_leb_unmap at 135 (pid 29365) Pid: 29365, comm: integck Tainted: G I 2.6.37-ubi-2.6+ #34 Call Trace: [] ubifs_lpt_init+0x95e/0x9ee [ubifs] [] ubifs_remount_fs+0x2c7/0x762 [ubifs] [] do_remount_sb+0xb6/0x101 [] ? do_mount+0x191/0x78e [] do_mount+0x258/0x78e [] ? alloc_pages_current+0xa2/0xc5 [] sys_mount+0x83/0xbd [] system_call_fastpath+0x16/0x1b They happen when we re-mount from R/O mode to R/W mode. While re-mounting, we write to the media, but we still have the c->ro_mount flag set. The fix is very simple - just clear the flag before starting re-mounting R/W. These warnings are caused by the following commit: 2ef13294d29bcfb306e0d360f1b97f37b647b0c0 For -stable guys: this bug was introduced in 2.6.38, this is materieal for 2.6.38-stable. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org [2.6.38] --- fs/ubifs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 6ddd9973e681..c75f6133206c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1568,6 +1568,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) mutex_lock(&c->umount_mutex); dbg_save_space_info(c); c->remounting_rw = 1; + c->ro_mount = 0; err = check_free_space(c); if (err) @@ -1676,13 +1677,13 @@ static int ubifs_remount_rw(struct ubifs_info *c) } dbg_gen("re-mounted read-write"); - c->ro_mount = 0; c->remounting_rw = 0; err = dbg_check_space_info(c); mutex_unlock(&c->umount_mutex); return err; out: + c->ro_mount = 1; vfree(c->orph_buf); c->orph_buf = NULL; if (c->bgt) { -- cgit From 81354de3d8691c2dedcc686cd2c167819ff0df10 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 30 Mar 2011 11:18:54 +0300 Subject: UBIFS: do not select KALLSYMS_ALL All UBIFS needs is to make sure we stacktraces when UBIFS debugging is enabled. It is enough to select KALLSYMS for this, KALLSYMS_ALL is not necessary. Moreover, Randy Dunlap reported that UBIFS causes the following Kconfig dependency warning: warning: (UBIFS_FS_DEBUG && LOCKDEP && LATENCYTOP) selects KALLSYMS_ALL which has unmet direct dependencies (DEBUG_KERNEL && KALLSYMS) The reason is that KALLSYMS_ALL requires DEBUG_KERNEL and KALLSYMS, so ideally, to select KALLSYMS_ALL we'd need to select DEBUG_KERNEL and KALLSYMS first. This seems to be too much to select. The easiest way to go is to forget about KALLSYMS_ALL and just select KALLSYMS when UBIFS debugging is enabled - that should be enough for stackdumps. Reported-by: Randy Dunlap Signed-off-by: Artem Bityutskiy Acked-by: Randy Dunlap --- fs/ubifs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index d7440904be17..f8b0160da2da 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -47,7 +47,7 @@ config UBIFS_FS_DEBUG bool "Enable debugging support" depends on UBIFS_FS select DEBUG_FS - select KALLSYMS_ALL + select KALLSYMS help This option enables UBIFS debugging support. It makes sure various assertions, self-checks, debugging messages and test modes are compiled -- cgit From cc6a86b950d69cfe542ee0d0ff30790152936a00 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Apr 2011 10:10:52 +0300 Subject: UBIFS: unify error path dbg_debugfs_init_fs This is just a small clean-up patch which simlifies and unifies the error path in the dbg_debugfs_init_fs(). We have common error path for all failure cases in this function except of the very first case. And this patch makes the first failure case use the same error path as the other cases by using the 'fname' and 'dent' variables. Signed-off-by: Artem Bityutskiy Acked-by: Phil Carmody --- fs/ubifs/debug.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index f25a7339f800..304cc6e1c84b 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2806,13 +2806,11 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) struct ubifs_debug_info *d = c->dbg; sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); - if (IS_ERR(d->dfs_dir)) { - err = PTR_ERR(d->dfs_dir); - ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", - d->dfs_dir_name, err); + fname = d->dfs_dir_name; + dent = debugfs_create_dir(fname, dfs_rootdir); + if (IS_ERR(dent)) goto out; - } + d->dfs_dir = dent; fname = "dump_lprops"; dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); @@ -2835,11 +2833,11 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) return 0; out_remove: + debugfs_remove_recursive(d->dfs_dir); +out: err = PTR_ERR(dent); ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", fname, err); - debugfs_remove_recursive(d->dfs_dir); -out: return err; } -- cgit From 95169535113073993a3ed97ecc21831657f42a80 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Apr 2011 10:16:17 +0300 Subject: UBIFS: fix error path in dbg_debugfs_init_fs The debug interface is substandard and on error returns either NULL or an error code packed in the pointer. So using "IS_ERR" for the pointers returned by debugfs function is incorrect. Instead, we should use IS_ERR_OR_NULL. This path is an improved vestion of the original patch from Phil Carmody. Reported-by: Phil Carmody Signed-off-by: Artem Bityutskiy Acked-by: Phil Carmody --- fs/ubifs/debug.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 304cc6e1c84b..645737769c98 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2808,25 +2808,25 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); fname = d->dfs_dir_name; dent = debugfs_create_dir(fname, dfs_rootdir); - if (IS_ERR(dent)) + if (IS_ERR_OR_NULL(dent)) goto out; d->dfs_dir = dent; fname = "dump_lprops"; dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); - if (IS_ERR(dent)) + if (IS_ERR_OR_NULL(dent)) goto out_remove; d->dfs_dump_lprops = dent; fname = "dump_budg"; dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); - if (IS_ERR(dent)) + if (IS_ERR_OR_NULL(dent)) goto out_remove; d->dfs_dump_budg = dent; fname = "dump_tnc"; dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); - if (IS_ERR(dent)) + if (IS_ERR_OR_NULL(dent)) goto out_remove; d->dfs_dump_tnc = dent; @@ -2835,7 +2835,7 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) out_remove: debugfs_remove_recursive(d->dfs_dir); out: - err = PTR_ERR(dent); + err = dent ? PTR_ERR(dent) : -ENODEV; ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", fname, err); return err; -- cgit From 7da6443aca9be29c6948dcbd636ad50154d0bc0c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 4 Apr 2011 17:16:39 +0300 Subject: UBIFS: fix debugging failure in dbg_check_space_info This patch fixes a debugging failure with which looks like this: UBIFS error (pid 32313): dbg_check_space_info: free space changed from 6019344 to 6022654 The reason for this failure is described in the comment this patch adds to the code. But in short - 'c->freeable_cnt' may be different before and after re-mounting, and this is normal. So the debugging code should make sure that free space calculations do not depend on 'c->freeable_cnt'. A similar issue has been reported here: http://lists.infradead.org/pipermail/linux-mtd/2011-April/034647.html This patch should fix it. For the -stable guys: this patch is only relevant for kernels 2.6.30 onwards. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org [2.6.30+] --- fs/ubifs/debug.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 645737769c98..004d3745dc45 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -972,11 +972,39 @@ void dbg_dump_index(struct ubifs_info *c) void dbg_save_space_info(struct ubifs_info *c) { struct ubifs_debug_info *d = c->dbg; - - ubifs_get_lp_stats(c, &d->saved_lst); + int freeable_cnt; spin_lock(&c->space_lock); + memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); + + /* + * We use a dirty hack here and zero out @c->freeable_cnt, because it + * affects the free space calculations, and UBIFS might not know about + * all freeable eraseblocks. Indeed, we know about freeable eraseblocks + * only when we read their lprops, and we do this only lazily, upon the + * need. So at any given point of time @c->freeable_cnt might be not + * exactly accurate. + * + * Just one example about the issue we hit when we did not zero + * @c->freeable_cnt. + * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the + * amount of free space in @d->saved_free + * 2. We re-mount R/W, which makes UBIFS to read the "lsave" + * information from flash, where we cache LEBs from various + * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()' + * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()' + * -> 'ubifs_get_pnode()' -> 'update_cats()' + * -> 'ubifs_add_to_cat()'). + * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt + * becomes %1. + * 4. We calculate the amount of free space when the re-mount is + * finished in 'dbg_check_space_info()' and it does not match + * @d->saved_free. + */ + freeable_cnt = c->freeable_cnt; + c->freeable_cnt = 0; d->saved_free = ubifs_get_free_space_nolock(c); + c->freeable_cnt = freeable_cnt; spin_unlock(&c->space_lock); } @@ -993,12 +1021,15 @@ int dbg_check_space_info(struct ubifs_info *c) { struct ubifs_debug_info *d = c->dbg; struct ubifs_lp_stats lst; - long long avail, free; + long long free; + int freeable_cnt; spin_lock(&c->space_lock); - avail = ubifs_calc_available(c, c->min_idx_lebs); + freeable_cnt = c->freeable_cnt; + c->freeable_cnt = 0; + free = ubifs_get_free_space_nolock(c); + c->freeable_cnt = freeable_cnt; spin_unlock(&c->space_lock); - free = ubifs_get_free_space(c); if (free != d->saved_free) { ubifs_err("free space changed from %lld to %lld", -- cgit From ee3dea3549444e6e76d27af48b4929983e6f023c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Mar 2011 12:17:43 +0200 Subject: ufs: remove unessecary blk_flush_plug We already flush the per-process plugging list when context switching, so a blk_flush_plug call just before a yield() is not needed. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/ufs/truncate.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 11014302c9ca..5f821dbc0579 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -479,7 +479,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) break; if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) ufs_sync_inode (inode); - blk_flush_plug(current); yield(); } -- cgit From 7dcda1c96d7c643101d4a05579ef4512a4baa7ef Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Apr 2011 23:51:48 +0200 Subject: fs: export empty_aops With the ->sync_page() hook gone, we have a few users that add their own static address_space_operations without any functions defined. fs/inode.c already has an empty_aops that it uses for init purposes. Lets export that and use it in the places where an otherwise empty aops was defined. Signed-off-by: Jens Axboe --- fs/inode.c | 9 ++++++++- fs/nilfs2/page.c | 2 -- fs/ubifs/xattr.c | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 5f4e11aaeb5c..33c963d08ab4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -124,6 +124,14 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); */ static DECLARE_RWSEM(iprune_sem); +/* + * Empty aops. Can be used for the cases where the user does not + * define any of the address_space operations. + */ +const struct address_space_operations empty_aops = { +}; +EXPORT_SYMBOL(empty_aops); + /* * Statistics gathering.. */ @@ -176,7 +184,6 @@ int proc_nr_inodes(ctl_table *table, int write, */ int inode_init_always(struct super_block *sb, struct inode *inode) { - static const struct address_space_operations empty_aops; static const struct inode_operations empty_iops; static const struct file_operations empty_fops; struct address_space *const mapping = &inode->i_data; diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 9d2dc6b4348e..1168059c7efd 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -495,8 +495,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, void nilfs_mapping_init(struct address_space *mapping, struct backing_dev_info *bdi) { - static const struct address_space_operations empty_aops; - mapping->host = NULL; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index c74400f88fe0..3299f469e712 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -56,6 +56,7 @@ */ #include "ubifs.h" +#include #include #include #include @@ -80,7 +81,6 @@ enum { }; static const struct inode_operations none_inode_operations; -static const struct address_space_operations none_address_operations; static const struct file_operations none_file_operations; /** @@ -130,7 +130,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, } /* Re-define all operations to be "nothing" */ - inode->i_mapping->a_ops = &none_address_operations; + inode->i_mapping->a_ops = &empty_aops; inode->i_op = &none_inode_operations; inode->i_fop = &none_file_operations; -- cgit From d0de4dc584ec6aa3b26fffea320a8457827768fc Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 5 Apr 2011 17:20:50 -0400 Subject: inotify: fix double free/corruption of stuct user On an error path in inotify_init1 a normal user can trigger a double free of struct user. This is a regression introduced by a2ae4cc9a16e ("inotify: stop kernel memory leak on file creation failure"). We fix this by making sure that if a group exists the user reference is dropped when the group is cleaned up. We should not explictly drop the reference on error and also drop the reference when the group is cleaned up. The new lifetime rules are that an inotify group lives from inotify_new_group to the last fsnotify_put_group. Since the struct user and inotify_devs are directly tied to this lifetime they are only changed/updated in those two locations. We get rid of all special casing of struct user or user->inotify_devs. Signed-off-by: Eric Paris Cc: stable@kernel.org (2.6.37 and up) Signed-off-by: Linus Torvalds --- fs/notify/inotify/inotify_fsnotify.c | 1 + fs/notify/inotify/inotify_user.c | 39 ++++++++++++------------------------ 2 files changed, 14 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index a91b69a6a291..0348d0c8f65e 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -198,6 +198,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group) idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_remove_all(&group->inotify_data.idr); idr_destroy(&group->inotify_data.idr); + atomic_dec(&group->inotify_data.user->inotify_devs); free_uid(group->inotify_data.user); } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index bd46e7c8a0ef..8445fbc8985c 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -290,7 +290,6 @@ static int inotify_fasync(int fd, struct file *file, int on) static int inotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - struct user_struct *user = group->inotify_data.user; pr_debug("%s: group=%p\n", __func__, group); @@ -299,8 +298,6 @@ static int inotify_release(struct inode *ignored, struct file *file) /* free this group, matching get was inotify_init->fsnotify_obtain_group */ fsnotify_put_group(group); - atomic_dec(&user->inotify_devs); - return 0; } @@ -697,7 +694,7 @@ retry: return ret; } -static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) +static struct fsnotify_group *inotify_new_group(unsigned int max_events) { struct fsnotify_group *group; @@ -710,8 +707,14 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); group->inotify_data.last_wd = 0; - group->inotify_data.user = user; group->inotify_data.fa = NULL; + group->inotify_data.user = get_current_user(); + + if (atomic_inc_return(&group->inotify_data.user->inotify_devs) > + inotify_max_user_instances) { + fsnotify_put_group(group); + return ERR_PTR(-EMFILE); + } return group; } @@ -721,7 +724,6 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign SYSCALL_DEFINE1(inotify_init1, int, flags) { struct fsnotify_group *group; - struct user_struct *user; int ret; /* Check the IN_* constants for consistency. */ @@ -731,31 +733,16 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) return -EINVAL; - user = get_current_user(); - if (unlikely(atomic_read(&user->inotify_devs) >= - inotify_max_user_instances)) { - ret = -EMFILE; - goto out_free_uid; - } - /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ - group = inotify_new_group(user, inotify_max_queued_events); - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto out_free_uid; - } - - atomic_inc(&user->inotify_devs); + group = inotify_new_group(inotify_max_queued_events); + if (IS_ERR(group)) + return PTR_ERR(group); ret = anon_inode_getfd("inotify", &inotify_fops, group, O_RDONLY | flags); - if (ret >= 0) - return ret; + if (ret < 0) + fsnotify_put_group(group); - fsnotify_put_group(group); - atomic_dec(&user->inotify_devs); -out_free_uid: - free_uid(user); return ret; } -- cgit From 6cba611e600ded15f642552ce6b5f7ee243bacf0 Mon Sep 17 00:00:00 2001 From: Zhang Huan Date: Tue, 5 Apr 2011 19:16:20 -0400 Subject: jbd2: fix potential memory leak on transaction commit There is potential memory leak of journal head in function jbd2_journal_commit_transaction. The problem is that JBD2 will not reclaim the journal head of commit record if error occurs or journal is abotred. I use the following script to reproduce this issue, on a RHEL6 system. I found it very easy to reproduce with async commit enabled. mount /dev/sdb /mnt -o journal_checksum,journal_async_commit touch /mnt/xxx echo offline > /sys/block/sdb/device/state sync umount /mnt rmmod ext4 rmmod jbd2 Removal of the jbd2 module will make slab complaining that "cache `jbd2_journal_head': can't free all objects". Signed-off-by: Zhang Huan Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index fa36d7662b21..b98e4c1eecff 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal, int ret; struct timespec now = current_kernel_time(); + *cbh = NULL; + if (is_journal_aborted(journal)) return 0; @@ -806,7 +808,7 @@ wait_for_iobuf: if (err) __jbd2_journal_abort_hard(journal); } - if (!err && !is_journal_aborted(journal)) + if (cbh) err = journal_wait_on_commit_record(journal, cbh); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && -- cgit From 0449641130f5652b344ef6fa39fa019d7e94660a Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 5 Apr 2011 19:55:28 -0400 Subject: ext4: init timer earlier to avoid a kernel panic in __save_error_info During mount, when we fail to open journal inode or root inode, the __save_error_info will mod_timer. But actually s_err_report isn't initialized yet and the kernel oops. The detailed information can be found https://bugzilla.kernel.org/show_bug.cgi?id=32082. The best way is to check whether the timer s_err_report is initialized or not. But it seems that in include/linux/timer.h, we can't find a good function to check the status of this timer, so this patch just move the initializtion of s_err_report earlier so that we can avoid the kernel panic. The corresponding del_timer is also added in the error path. Reported-by: Sami Liedes Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 35bd0206bbb3..551cb8e2110c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3391,6 +3391,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); + init_timer(&sbi->s_err_report); + sbi->s_err_report.function = print_daily_error_info; + sbi->s_err_report.data = (unsigned long) sb; + err = percpu_counter_init(&sbi->s_freeblocks_counter, ext4_count_free_blocks(sb)); if (!err) { @@ -3652,9 +3656,6 @@ no_journal: "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, *sbi->s_es->s_mount_opts ? "; " : "", orig_data); - init_timer(&sbi->s_err_report); - sbi->s_err_report.function = print_daily_error_info; - sbi->s_err_report.data = (unsigned long) sb; if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -3678,6 +3679,7 @@ failed_mount_wq: sbi->s_journal = NULL; } failed_mount3: + del_timer(&sbi->s_err_report); if (sbi->s_flex_groups) { if (is_vmalloc_addr(sbi->s_flex_groups)) vfree(sbi->s_flex_groups); -- cgit From 418875900e3de4831c84f86ae4756690dac5be77 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 6 Apr 2011 14:33:28 -0400 Subject: NFS: Fix a signed vs. unsigned secinfo bug rpc_authflavor_t is cast from an unsigned int, but the initial code tried to use it as a signed int. I fix this by passing an rpc_authflavor_t pointer around, and returning signed integers from functions. Signed-off-by: Bryan Schumaker Reported-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 58 +++++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 9166fcb66da2..89fc160fd5b0 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -148,67 +148,64 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, return pseudoflavor; } -static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry) +static int nfs_negotiate_security(const struct dentry *parent, + const struct dentry *dentry, + rpc_authflavor_t *flavor) { - int status = 0; struct page *page; struct nfs4_secinfo_flavors *flavors; int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); - rpc_authflavor_t flavor = RPC_AUTH_UNIX; + int ret = -EPERM; secinfo = NFS_PROTO(parent->d_inode)->secinfo; if (secinfo != NULL) { page = alloc_page(GFP_KERNEL); if (!page) { - status = -ENOMEM; + ret = -ENOMEM; goto out; } flavors = page_address(page); - status = secinfo(parent->d_inode, &dentry->d_name, flavors); - flavor = nfs_find_best_sec(flavors, dentry->d_inode); + ret = secinfo(parent->d_inode, &dentry->d_name, flavors); + *flavor = nfs_find_best_sec(flavors, dentry->d_inode); put_page(page); } - return flavor; - out: - status = -ENOMEM; - return status; + return ret; } -static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent, - struct dentry *dentry, struct path *path, - struct nfs_fh *fh, struct nfs_fattr *fattr) +static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent, + struct dentry *dentry, struct path *path, + struct nfs_fh *fh, struct nfs_fattr *fattr, + rpc_authflavor_t *flavor) { - rpc_authflavor_t flavor; struct rpc_clnt *clone; struct rpc_auth *auth; int err; - flavor = nfs_negotiate_security(parent, path->dentry); - if (flavor < 0) + err = nfs_negotiate_security(parent, path->dentry, flavor); + if (err < 0) goto out; clone = rpc_clone_client(server->client); - auth = rpcauth_create(flavor, clone); + auth = rpcauth_create(*flavor, clone); if (!auth) { - flavor = -EIO; + err = -EIO; goto out_shutdown; } err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode, &path->dentry->d_name, fh, fattr); - if (err < 0) - flavor = err; out_shutdown: rpc_shutdown_client(clone); out: - return flavor; + return err; } #else /* CONFIG_NFS_V4 */ -static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, - struct dentry *parent, struct dentry *dentry, - struct path *path, struct nfs_fh *fh, - struct nfs_fattr *fattr) +static inline int nfs_lookup_with_sec(struct nfs_server *server, + struct dentry *parent, struct dentry *dentry, + struct path *path, struct nfs_fh *fh, + struct nfs_fattr *fattr, + rpc_authflavor_t *flavor) { return -EPERM; } @@ -234,7 +231,7 @@ struct vfsmount *nfs_d_automount(struct path *path) struct nfs_fh *fh = NULL; struct nfs_fattr *fattr = NULL; int err; - rpc_authflavor_t flavor = 1; + rpc_authflavor_t flavor = RPC_AUTH_UNIX; dprintk("--> nfs_d_automount()\n"); @@ -255,13 +252,8 @@ struct vfsmount *nfs_d_automount(struct path *path) err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, &path->dentry->d_name, fh, fattr); - if (err == -EPERM) { - flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr); - if (flavor < 0) - err = flavor; - else - err = 0; - } + if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL) + err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor); dput(parent); if (err != 0) { mnt = ERR_PTR(err); -- cgit From 37adb89fadd65ce47f7e5cfd564938a76b351948 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 7 Apr 2011 16:02:20 -0400 Subject: NFS: Change initial mount authflavor only when server returns NFS4ERR_WRONGSEC When attempting an initial mount, we should only attempt other authflavors if AUTH_UNIX receives a NFS4ERR_WRONGSEC error. This allows other errors to be passed back to userspace programs. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dfd1e6d7e6c3..9bf41eab3e46 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2204,8 +2204,6 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl goto out; } ret = nfs4_lookup_root(server, fhandle, info); - if (ret < 0) - ret = -EAGAIN; out: return ret; } @@ -2226,7 +2224,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, for (i = 0; i < len; i++) { status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); - if (status == 0) + if (status != -EPERM) break; } if (status == 0) -- cgit From e828776a8abe6b9bae7ed9638710bff7642c568a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Apr 2011 12:45:07 +1000 Subject: xfs: fix extent format buffer allocation size When formatting an inode item, we have to allocate a separate buffer to hold extents when there are delayed allocation extents on the inode and it is in extent format. The allocation size is derived from the in-core data fork representation, which accounts for delayed allocation extents, while the on-disk representation does not contain any delalloc extents. As a result of this mismatch, the allocated buffer can be far larger than needed to hold the real extent list which, due to the fact the inode is in extent format, is limited to the size of the literal area of the inode. However, we can have thousands of delalloc extents, resulting in an allocation size orders of magnitude larger than is needed to hold all the real extents. Fix this by limiting the size of the buffer being allocated to the size of the literal area of the inodes in the filesystem (i.e. the maximum size an inode fork can grow to). Signed-off-by: Dave Chinner Reviewed-by: Alex Elder --- fs/xfs/xfs_inode_item.c | 67 +++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 46cc40131d4a..576fdfe81d60 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -197,6 +197,41 @@ xfs_inode_item_size( return nvecs; } +/* + * xfs_inode_item_format_extents - convert in-core extents to on-disk form + * + * For either the data or attr fork in extent format, we need to endian convert + * the in-core extent as we place them into the on-disk inode. In this case, we + * need to do this conversion before we write the extents into the log. Because + * we don't have the disk inode to write into here, we allocate a buffer and + * format the extents into it via xfs_iextents_copy(). We free the buffer in + * the unlock routine after the copy for the log has been made. + * + * In the case of the data fork, the in-core and on-disk fork sizes can be + * different due to delayed allocation extents. We only log on-disk extents + * here, so always use the physical fork size to determine the size of the + * buffer we need to allocate. + */ +STATIC void +xfs_inode_item_format_extents( + struct xfs_inode *ip, + struct xfs_log_iovec *vecp, + int whichfork, + int type) +{ + xfs_bmbt_rec_t *ext_buffer; + + ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); + if (whichfork == XFS_DATA_FORK) + ip->i_itemp->ili_extents_buf = ext_buffer; + else + ip->i_itemp->ili_aextents_buf = ext_buffer; + + vecp->i_addr = ext_buffer; + vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); + vecp->i_type = type; +} + /* * This is called to fill in the vector of log iovecs for the * given inode log item. It fills the first item with an inode @@ -213,7 +248,6 @@ xfs_inode_item_format( struct xfs_inode *ip = iip->ili_inode; uint nvecs; size_t data_bytes; - xfs_bmbt_rec_t *ext_buffer; xfs_mount_t *mp; vecp->i_addr = &iip->ili_format; @@ -320,22 +354,8 @@ xfs_inode_item_format( } else #endif { - /* - * There are delayed allocation extents - * in the inode, or we need to convert - * the extents to on disk format. - * Use xfs_iextents_copy() - * to copy only the real extents into - * a separate buffer. We'll free the - * buffer in the unlock routine. - */ - ext_buffer = kmem_alloc(ip->i_df.if_bytes, - KM_SLEEP); - iip->ili_extents_buf = ext_buffer; - vecp->i_addr = ext_buffer; - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, - XFS_DATA_FORK); - vecp->i_type = XLOG_REG_TYPE_IEXT; + xfs_inode_item_format_extents(ip, vecp, + XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; @@ -445,19 +465,12 @@ xfs_inode_item_format( */ vecp->i_addr = ip->i_afp->if_u1.if_extents; vecp->i_len = ip->i_afp->if_bytes; + vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; #else ASSERT(iip->ili_aextents_buf == NULL); - /* - * Need to endian flip before logging - */ - ext_buffer = kmem_alloc(ip->i_afp->if_bytes, - KM_SLEEP); - iip->ili_aextents_buf = ext_buffer; - vecp->i_addr = ext_buffer; - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, - XFS_ATTR_FORK); + xfs_inode_item_format_extents(ip, vecp, + XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); #endif - vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; -- cgit From c6d09b666de11eb272326a6eb6cd3246da571014 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Apr 2011 12:45:07 +1000 Subject: xfs: introduce a xfssyncd workqueue All of the work xfssyncd does is background functionality. There is no need for a thread per filesystem to do this work - it can al be managed by a global workqueue now they manage concurrency effectively. Introduce a new gglobal xfssyncd workqueue, and convert the periodic work to use this new functionality. To do this, use a delayed work construct to schedule the next running of the periodic sync work for the filesystem. When the sync work is complete, queue a new delayed work for the next running of the sync work. For laptop mode, we wait on completion for the sync works, so ensure that the sync work queuing interface can flush and wait for work to complete to enable the work queue infrastructure to replace the current sequence number and wakeup that is used. Because the sync work does non-trivial amounts of work, mark the new work queue as CPU intensive. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Alex Elder --- fs/xfs/linux-2.6/xfs_super.c | 30 ++++++++------- fs/xfs/linux-2.6/xfs_sync.c | 88 ++++++++++++++++++++++---------------------- fs/xfs/linux-2.6/xfs_sync.h | 2 + fs/xfs/xfs_mount.h | 4 +- 4 files changed, 63 insertions(+), 61 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1ba5c451da36..c71b6ed45e41 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1191,22 +1191,12 @@ xfs_fs_sync_fs( return -error; if (laptop_mode) { - int prev_sync_seq = mp->m_sync_seq; - /* * The disk must be active because we're syncing. * We schedule xfssyncd now (now that the disk is * active) instead of later (when it might not be). */ - wake_up_process(mp->m_sync_task); - /* - * We have to wait for the sync iteration to complete. - * If we don't, the disk activity caused by the sync - * will come after the sync is completed, and that - * triggers another sync from laptop mode. - */ - wait_event(mp->m_wait_single_sync_task, - mp->m_sync_seq != prev_sync_seq); + flush_delayed_work_sync(&mp->m_sync_work); } return 0; @@ -1492,7 +1482,6 @@ xfs_fs_fill_super( atomic_set(&mp->m_active_trans, 0); INIT_LIST_HEAD(&mp->m_sync_list); spin_lock_init(&mp->m_sync_lock); - init_waitqueue_head(&mp->m_wait_single_sync_task); mp->m_super = sb; sb->s_fs_info = mp; @@ -1833,13 +1822,27 @@ init_xfs_fs(void) if (error) goto out_cleanup_procfs; + /* + * max_active is set to 8 to give enough concurency to allow + * multiple work operations on each CPU to run. This allows multiple + * filesystems to be running sync work concurrently, and scales with + * the number of CPUs in the system. + */ + xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); + if (!xfs_syncd_wq) { + error = -ENOMEM; + goto out_sysctl_unregister; + } + vfs_initquota(); error = register_filesystem(&xfs_fs_type); if (error) - goto out_sysctl_unregister; + goto out_destroy_xfs_syncd; return 0; + out_destroy_xfs_syncd: + destroy_workqueue(xfs_syncd_wq); out_sysctl_unregister: xfs_sysctl_unregister(); out_cleanup_procfs: @@ -1861,6 +1864,7 @@ exit_xfs_fs(void) { vfs_exitquota(); unregister_filesystem(&xfs_fs_type); + destroy_workqueue(xfs_syncd_wq); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 594cd822d84d..4a582d8100e4 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -39,6 +39,8 @@ #include #include +struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + /* * The inode lookup is done in batches to keep the amount of lock traffic and * radix tree lookups to a minimum. The batch size is a trade off between @@ -489,32 +491,6 @@ xfs_flush_inodes( xfs_log_force(ip->i_mount, XFS_LOG_SYNC); } -/* - * Every sync period we need to unpin all items, reclaim inodes and sync - * disk quotas. We might need to cover the log to indicate that the - * filesystem is idle and not frozen. - */ -STATIC void -xfs_sync_worker( - struct xfs_mount *mp, - void *unused) -{ - int error; - - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - /* dgc: errors ignored here */ - if (mp->m_super->s_frozen == SB_UNFROZEN && - xfs_log_need_covered(mp)) - error = xfs_fs_log_dummy(mp); - else - xfs_log_force(mp, 0); - xfs_reclaim_inodes(mp, 0); - error = xfs_qm_sync(mp, SYNC_TRYLOCK); - } - mp->m_sync_seq++; - wake_up(&mp->m_wait_single_sync_task); -} - STATIC int xfssyncd( void *arg) @@ -528,34 +504,19 @@ xfssyncd( timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); for (;;) { if (list_empty(&mp->m_sync_list)) - timeleft = schedule_timeout_interruptible(timeleft); + schedule_timeout_interruptible(timeleft); /* swsusp */ try_to_freeze(); if (kthread_should_stop() && list_empty(&mp->m_sync_list)) break; spin_lock(&mp->m_sync_lock); - /* - * We can get woken by laptop mode, to do a sync - - * that's the (only!) case where the list would be - * empty with time remaining. - */ - if (!timeleft || list_empty(&mp->m_sync_list)) { - if (!timeleft) - timeleft = xfs_syncd_centisecs * - msecs_to_jiffies(10); - INIT_LIST_HEAD(&mp->m_sync_work.w_list); - list_add_tail(&mp->m_sync_work.w_list, - &mp->m_sync_list); - } list_splice_init(&mp->m_sync_list, &tmp); spin_unlock(&mp->m_sync_lock); list_for_each_entry_safe(work, n, &tmp, w_list) { (*work->w_syncer)(mp, work->w_data); list_del(&work->w_list); - if (work == &mp->m_sync_work) - continue; if (work->w_completion) complete(work->w_completion); kmem_free(work); @@ -565,13 +526,49 @@ xfssyncd( return 0; } +static void +xfs_syncd_queue_sync( + struct xfs_mount *mp) +{ + queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, + msecs_to_jiffies(xfs_syncd_centisecs * 10)); +} + +/* + * Every sync period we need to unpin all items, reclaim inodes and sync + * disk quotas. We might need to cover the log to indicate that the + * filesystem is idle and not frozen. + */ +STATIC void +xfs_sync_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_sync_work); + int error; + + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + /* dgc: errors ignored here */ + if (mp->m_super->s_frozen == SB_UNFROZEN && + xfs_log_need_covered(mp)) + error = xfs_fs_log_dummy(mp); + else + xfs_log_force(mp, 0); + xfs_reclaim_inodes(mp, 0); + error = xfs_qm_sync(mp, SYNC_TRYLOCK); + } + + /* queue us up again */ + xfs_syncd_queue_sync(mp); +} + int xfs_syncd_init( struct xfs_mount *mp) { - mp->m_sync_work.w_syncer = xfs_sync_worker; - mp->m_sync_work.w_mount = mp; - mp->m_sync_work.w_completion = NULL; + INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); + xfs_syncd_queue_sync(mp); + mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); if (IS_ERR(mp->m_sync_task)) return -PTR_ERR(mp->m_sync_task); @@ -582,6 +579,7 @@ void xfs_syncd_stop( struct xfs_mount *mp) { + cancel_delayed_work_sync(&mp->m_sync_work); kthread_stop(mp->m_sync_task); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 32ba6628290c..e3a6ad27415f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -32,6 +32,8 @@ typedef struct xfs_sync_work { #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ +extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a62e8971539d..2c11e62be888 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -203,12 +203,10 @@ typedef struct xfs_mount { struct mutex m_icsb_mutex; /* balancer sync lock */ #endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ + struct delayed_work m_sync_work; /* background sync work */ struct task_struct *m_sync_task; /* generalised sync thread */ - xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ struct list_head m_sync_list; /* sync thread work item list */ spinlock_t m_sync_lock; /* work item list lock */ - int m_sync_seq; /* sync thread generation no. */ - wait_queue_head_t m_wait_single_sync_task; __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ struct shrinker m_inode_shrink; /* inode reclaim shrinker */ -- cgit From 89e4cb550a492cfca038a555fcc1bdac58822ec3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Apr 2011 12:45:07 +1000 Subject: xfs: convert ENOSPC inode flushing to use new syncd workqueue On of the problems with the current inode flush at ENOSPC is that we queue a flush per ENOSPC event, regardless of how many are already queued. Thi can result in hundreds of queued flushes, most of which simply burn CPU scanned and do no real work. This simply slows down allocation at ENOSPC. We really only need one active flush at a time, and we can easily implement that via the new xfs_syncd_wq. All we need to do is queue a flush if one is not already active, then block waiting for the currently active flush to complete. The result is that we only ever have a single ENOSPC inode flush active at a time and this greatly reduces the overhead of ENOSPC processing. On my 2p test machine, this results in tests exercising ENOSPC conditions running significantly faster - 042 halves execution time, 083 drops from 60s to 5s, etc - while not introducing test regressions. This allows us to remove the old xfssyncd threads and infrastructure as they are no longer used. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Alex Elder --- fs/xfs/linux-2.6/xfs_super.c | 2 - fs/xfs/linux-2.6/xfs_sync.c | 132 ++++++++++++------------------------------- fs/xfs/xfs_mount.h | 4 +- 3 files changed, 36 insertions(+), 102 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c71b6ed45e41..ee0e981aa9d1 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1480,8 +1480,6 @@ xfs_fs_fill_super( spin_lock_init(&mp->m_sb_lock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); - INIT_LIST_HEAD(&mp->m_sync_list); - spin_lock_init(&mp->m_sync_lock); mp->m_super = sb; sb->s_fs_info = mp; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 4a582d8100e4..af3275965c77 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -433,99 +433,6 @@ xfs_quiesce_attr( xfs_unmountfs_writesb(mp); } -/* - * Enqueue a work item to be picked up by the vfs xfssyncd thread. - * Doing this has two advantages: - * - It saves on stack space, which is tight in certain situations - * - It can be used (with care) as a mechanism to avoid deadlocks. - * Flushing while allocating in a full filesystem requires both. - */ -STATIC void -xfs_syncd_queue_work( - struct xfs_mount *mp, - void *data, - void (*syncer)(struct xfs_mount *, void *), - struct completion *completion) -{ - struct xfs_sync_work *work; - - work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); - INIT_LIST_HEAD(&work->w_list); - work->w_syncer = syncer; - work->w_data = data; - work->w_mount = mp; - work->w_completion = completion; - spin_lock(&mp->m_sync_lock); - list_add_tail(&work->w_list, &mp->m_sync_list); - spin_unlock(&mp->m_sync_lock); - wake_up_process(mp->m_sync_task); -} - -/* - * Flush delayed allocate data, attempting to free up reserved space - * from existing allocations. At this point a new allocation attempt - * has failed with ENOSPC and we are in the process of scratching our - * heads, looking about for more room... - */ -STATIC void -xfs_flush_inodes_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - xfs_sync_data(mp, SYNC_TRYLOCK); - xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); - iput(inode); -} - -void -xfs_flush_inodes( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - DECLARE_COMPLETION_ONSTACK(completion); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); - wait_for_completion(&completion); - xfs_log_force(ip->i_mount, XFS_LOG_SYNC); -} - -STATIC int -xfssyncd( - void *arg) -{ - struct xfs_mount *mp = arg; - long timeleft; - xfs_sync_work_t *work, *n; - LIST_HEAD (tmp); - - set_freezable(); - timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); - for (;;) { - if (list_empty(&mp->m_sync_list)) - schedule_timeout_interruptible(timeleft); - /* swsusp */ - try_to_freeze(); - if (kthread_should_stop() && list_empty(&mp->m_sync_list)) - break; - - spin_lock(&mp->m_sync_lock); - list_splice_init(&mp->m_sync_list, &tmp); - spin_unlock(&mp->m_sync_lock); - - list_for_each_entry_safe(work, n, &tmp, w_list) { - (*work->w_syncer)(mp, work->w_data); - list_del(&work->w_list); - if (work->w_completion) - complete(work->w_completion); - kmem_free(work); - } - } - - return 0; -} - static void xfs_syncd_queue_sync( struct xfs_mount *mp) @@ -562,16 +469,47 @@ xfs_sync_worker( xfs_syncd_queue_sync(mp); } +/* + * Flush delayed allocate data, attempting to free up reserved space + * from existing allocations. At this point a new allocation attempt + * has failed with ENOSPC and we are in the process of scratching our + * heads, looking about for more room. + * + * Queue a new data flush if there isn't one already in progress and + * wait for completion of the flush. This means that we only ever have one + * inode flush in progress no matter how many ENOSPC events are occurring and + * so will prevent the system from bogging down due to every concurrent + * ENOSPC event scanning all the active inodes in the system for writeback. + */ +void +xfs_flush_inodes( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + queue_work(xfs_syncd_wq, &mp->m_flush_work); + flush_work_sync(&mp->m_flush_work); +} + +STATIC void +xfs_flush_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(work, + struct xfs_mount, m_flush_work); + + xfs_sync_data(mp, SYNC_TRYLOCK); + xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); +} + int xfs_syncd_init( struct xfs_mount *mp) { + INIT_WORK(&mp->m_flush_work, xfs_flush_worker); INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); xfs_syncd_queue_sync(mp); - mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); - if (IS_ERR(mp->m_sync_task)) - return -PTR_ERR(mp->m_sync_task); return 0; } @@ -580,7 +518,7 @@ xfs_syncd_stop( struct xfs_mount *mp) { cancel_delayed_work_sync(&mp->m_sync_work); - kthread_stop(mp->m_sync_task); + cancel_work_sync(&mp->m_flush_work); } void diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 2c11e62be888..a0ad90e95299 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -204,9 +204,7 @@ typedef struct xfs_mount { #endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct delayed_work m_sync_work; /* background sync work */ - struct task_struct *m_sync_task; /* generalised sync thread */ - struct list_head m_sync_list; /* sync thread work item list */ - spinlock_t m_sync_lock; /* work item list lock */ + struct work_struct m_flush_work; /* background inode flush */ __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ struct shrinker m_inode_shrink; /* inode reclaim shrinker */ -- cgit From a7b339f1b8698667eada006e717cdb4523be2ed5 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Apr 2011 12:45:07 +1000 Subject: xfs: introduce background inode reclaim work Background inode reclaim needs to run more frequently that the XFS syncd work is run as 30s is too long between optimal reclaim runs. Add a new periodic work item to the xfs syncd workqueue to run a fast, non-blocking inode reclaim scan. Background inode reclaim is kicked by the act of marking inodes for reclaim. When an AG is first marked as having reclaimable inodes, the background reclaim work is kicked. It will continue to run periodically untill it detects that there are no more reclaimable inodes. It will be kicked again when the first inode is queued for reclaim. To ensure shrinker based inode reclaim throttles to the inode cleaning and reclaim rate but still reclaim inodes efficiently, make it kick the background inode reclaim so that when we are low on memory we are trying to reclaim inodes as efficiently as possible. This kick shoul d not be necessary, but it will protect against failures to kick the background reclaim when inodes are first dirtied. To provide the rate throttling, make the shrinker pass do synchronous inode reclaim so that it blocks on inodes under IO. This means that the shrinker will reclaim inodes rather than just skipping over them, but it does not adversely affect the rate of reclaim because most dirty inodes are already under IO due to the background reclaim work the shrinker kicked. These two modifications solve one of the two OOM killer invocations Chris Mason reported recently when running a stress testing script. The particular workload trigger for the OOM killer invocation is where there are more threads than CPUs all unlinking files in an extremely memory constrained environment. Unlike other solutions, this one does not have a performance impact on performance when memory is not constrained or the number of concurrent threads operating is <= to the number of CPUs. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 69 +++++++++++++++++++++++++++++++++++++++++++-- fs/xfs/xfs_mount.h | 1 + 2 files changed, 67 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index af3275965c77..debe2822c930 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -461,7 +461,6 @@ xfs_sync_worker( error = xfs_fs_log_dummy(mp); else xfs_log_force(mp, 0); - xfs_reclaim_inodes(mp, 0); error = xfs_qm_sync(mp, SYNC_TRYLOCK); } @@ -469,6 +468,52 @@ xfs_sync_worker( xfs_syncd_queue_sync(mp); } +/* + * Queue a new inode reclaim pass if there are reclaimable inodes and there + * isn't a reclaim pass already in progress. By default it runs every 5s based + * on the xfs syncd work default of 30s. Perhaps this should have it's own + * tunable, but that can be done if this method proves to be ineffective or too + * aggressive. + */ +static void +xfs_syncd_queue_reclaim( + struct xfs_mount *mp) +{ + + /* + * We can have inodes enter reclaim after we've shut down the syncd + * workqueue during unmount, so don't allow reclaim work to be queued + * during unmount. + */ + if (!(mp->m_super->s_flags & MS_ACTIVE)) + return; + + rcu_read_lock(); + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, + msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); + } + rcu_read_unlock(); +} + +/* + * This is a fast pass over the inode cache to try to get reclaim moving on as + * many inodes as possible in a short period of time. It kicks itself every few + * seconds, as well as being kicked by the inode cache shrinker when memory + * goes low. It scans as quickly as possible avoiding locked inodes or those + * already being flushed, and once done schedules a future pass. + */ +STATIC void +xfs_reclaim_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_reclaim_work); + + xfs_reclaim_inodes(mp, SYNC_TRYLOCK); + xfs_syncd_queue_reclaim(mp); +} + /* * Flush delayed allocate data, attempting to free up reserved space * from existing allocations. At this point a new allocation attempt @@ -508,7 +553,10 @@ xfs_syncd_init( { INIT_WORK(&mp->m_flush_work, xfs_flush_worker); INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + xfs_syncd_queue_sync(mp); + xfs_syncd_queue_reclaim(mp); return 0; } @@ -518,6 +566,7 @@ xfs_syncd_stop( struct xfs_mount *mp) { cancel_delayed_work_sync(&mp->m_sync_work); + cancel_delayed_work_sync(&mp->m_reclaim_work); cancel_work_sync(&mp->m_flush_work); } @@ -537,6 +586,10 @@ __xfs_inode_set_reclaim_tag( XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); spin_unlock(&ip->i_mount->m_perag_lock); + + /* schedule periodic background inode reclaim */ + xfs_syncd_queue_reclaim(ip->i_mount); + trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } @@ -953,7 +1006,13 @@ xfs_reclaim_inodes( } /* - * Shrinker infrastructure. + * Inode cache shrinker. + * + * When called we make sure that there is a background (fast) inode reclaim in + * progress, while we will throttle the speed of reclaim via doiing synchronous + * reclaim of inodes. That means if we come across dirty inodes, we wait for + * them to be cleaned, which we hope will not be very long due to the + * background walker having already kicked the IO off on those dirty inodes. */ static int xfs_reclaim_inode_shrink( @@ -968,10 +1027,14 @@ xfs_reclaim_inode_shrink( mp = container_of(shrink, struct xfs_mount, m_inode_shrink); if (nr_to_scan) { + /* kick background reclaimer */ + xfs_syncd_queue_reclaim(mp); + if (!(gfp_mask & __GFP_FS)) return -1; - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, + &nr_to_scan); /* terminate if we don't exhaust the scan */ if (nr_to_scan > 0) return -1; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a0ad90e95299..19af0ab0d0c6 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -204,6 +204,7 @@ typedef struct xfs_mount { #endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct delayed_work m_sync_work; /* background sync work */ + struct delayed_work m_reclaim_work; /* background inode reclaim */ struct work_struct m_flush_work; /* background inode flush */ __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ -- cgit From 0bf6a5bd4b55b466964ead6fa566d8f346a828ee Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Apr 2011 12:45:07 +1000 Subject: xfs: convert the xfsaild threads to a workqueue Similar to the xfssyncd, the per-filesystem xfsaild threads can be converted to a global workqueue and run periodically by delayed works. This makes sense for the AIL pushing because it uses variable timeouts depending on the work that needs to be done. By removing the xfsaild, we simplify the AIL pushing code and remove the need to spread the code to implement the threading and pushing across multiple files. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Alex Elder --- fs/xfs/linux-2.6/xfs_super.c | 127 ++++++++++++++--------------------------- fs/xfs/xfs_trans_ail.c | 133 ++++++++++++++++++++++++------------------- fs/xfs/xfs_trans_priv.h | 15 ++--- 3 files changed, 124 insertions(+), 151 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index ee0e981aa9d1..67d5b2cddb98 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -816,75 +816,6 @@ xfs_setup_devices( return 0; } -/* - * XFS AIL push thread support - */ -void -xfsaild_wakeup( - struct xfs_ail *ailp, - xfs_lsn_t threshold_lsn) -{ - /* only ever move the target forwards */ - if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) { - ailp->xa_target = threshold_lsn; - wake_up_process(ailp->xa_task); - } -} - -STATIC int -xfsaild( - void *data) -{ - struct xfs_ail *ailp = data; - xfs_lsn_t last_pushed_lsn = 0; - long tout = 0; /* milliseconds */ - - while (!kthread_should_stop()) { - /* - * for short sleeps indicating congestion, don't allow us to - * get woken early. Otherwise all we do is bang on the AIL lock - * without making progress. - */ - if (tout && tout <= 20) - __set_current_state(TASK_KILLABLE); - else - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(tout ? - msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); - - /* swsusp */ - try_to_freeze(); - - ASSERT(ailp->xa_mount->m_log); - if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) - continue; - - tout = xfsaild_push(ailp, &last_pushed_lsn); - } - - return 0; -} /* xfsaild */ - -int -xfsaild_start( - struct xfs_ail *ailp) -{ - ailp->xa_target = 0; - ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", - ailp->xa_mount->m_fsname); - if (IS_ERR(ailp->xa_task)) - return -PTR_ERR(ailp->xa_task); - return 0; -} - -void -xfsaild_stop( - struct xfs_ail *ailp) -{ - kthread_stop(ailp->xa_task); -} - - /* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( @@ -1785,6 +1716,38 @@ xfs_destroy_zones(void) } +STATIC int __init +xfs_init_workqueues(void) +{ + /* + * max_active is set to 8 to give enough concurency to allow + * multiple work operations on each CPU to run. This allows multiple + * filesystems to be running sync work concurrently, and scales with + * the number of CPUs in the system. + */ + xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); + if (!xfs_syncd_wq) + goto out; + + xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); + if (!xfs_ail_wq) + goto out_destroy_syncd; + + return 0; + +out_destroy_syncd: + destroy_workqueue(xfs_syncd_wq); +out: + return -ENOMEM; +} + +STATIC void __exit +xfs_destroy_workqueues(void) +{ + destroy_workqueue(xfs_ail_wq); + destroy_workqueue(xfs_syncd_wq); +} + STATIC int __init init_xfs_fs(void) { @@ -1800,10 +1763,14 @@ init_xfs_fs(void) if (error) goto out; - error = xfs_mru_cache_init(); + error = xfs_init_workqueues(); if (error) goto out_destroy_zones; + error = xfs_mru_cache_init(); + if (error) + goto out_destroy_wq; + error = xfs_filestream_init(); if (error) goto out_mru_cache_uninit; @@ -1820,27 +1787,17 @@ init_xfs_fs(void) if (error) goto out_cleanup_procfs; - /* - * max_active is set to 8 to give enough concurency to allow - * multiple work operations on each CPU to run. This allows multiple - * filesystems to be running sync work concurrently, and scales with - * the number of CPUs in the system. - */ - xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); - if (!xfs_syncd_wq) { - error = -ENOMEM; + error = xfs_init_workqueues(); + if (error) goto out_sysctl_unregister; - } vfs_initquota(); error = register_filesystem(&xfs_fs_type); if (error) - goto out_destroy_xfs_syncd; + goto out_sysctl_unregister; return 0; - out_destroy_xfs_syncd: - destroy_workqueue(xfs_syncd_wq); out_sysctl_unregister: xfs_sysctl_unregister(); out_cleanup_procfs: @@ -1851,6 +1808,8 @@ init_xfs_fs(void) xfs_filestream_uninit(); out_mru_cache_uninit: xfs_mru_cache_uninit(); + out_destroy_wq: + xfs_destroy_workqueues(); out_destroy_zones: xfs_destroy_zones(); out: @@ -1862,12 +1821,12 @@ exit_xfs_fs(void) { vfs_exitquota(); unregister_filesystem(&xfs_fs_type); - destroy_workqueue(xfs_syncd_wq); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); xfs_filestream_uninit(); xfs_mru_cache_uninit(); + xfs_destroy_workqueues(); xfs_destroy_zones(); } diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 12aff9584e29..cb3aeac929bc 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -28,6 +28,8 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" +struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ + STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); @@ -68,36 +70,6 @@ xfs_trans_ail_tail( return lsn; } -/* - * xfs_trans_push_ail - * - * This routine is called to move the tail of the AIL forward. It does this by - * trying to flush items in the AIL whose lsns are below the given - * threshold_lsn. - * - * the push is run asynchronously in a separate thread, so we return the tail - * of the log right now instead of the tail after the push. This means we will - * either continue right away, or we will sleep waiting on the async thread to - * do its work. - * - * We do this unlocked - we only need to know whether there is anything in the - * AIL at the time we are called. We don't need to access the contents of - * any of the objects, so the lock is not needed. - */ -void -xfs_trans_ail_push( - struct xfs_ail *ailp, - xfs_lsn_t threshold_lsn) -{ - xfs_log_item_t *lip; - - lip = xfs_ail_min(ailp); - if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { - if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) - xfsaild_wakeup(ailp, threshold_lsn); - } -} - /* * AIL traversal cursor initialisation. * @@ -236,16 +208,16 @@ out: } /* - * xfsaild_push does the work of pushing on the AIL. Returning a timeout of - * zero indicates that the caller should sleep until woken. + * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself + * to run at a later time if there is more work to do to complete the push. */ -long -xfsaild_push( - struct xfs_ail *ailp, - xfs_lsn_t *last_lsn) +STATIC void +xfs_ail_worker( + struct work_struct *work) { - long tout = 0; - xfs_lsn_t last_pushed_lsn = *last_lsn; + struct xfs_ail *ailp = container_of(to_delayed_work(work), + struct xfs_ail, xa_work); + long tout; xfs_lsn_t target = ailp->xa_target; xfs_lsn_t lsn; xfs_log_item_t *lip; @@ -256,15 +228,15 @@ xfsaild_push( spin_lock(&ailp->xa_lock); xfs_trans_ail_cursor_init(ailp, cur); - lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); + lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. */ xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); - *last_lsn = 0; - return tout; + ailp->xa_last_pushed_lsn = 0; + return; } XFS_STATS_INC(xs_push_ail); @@ -301,13 +273,13 @@ xfsaild_push( case XFS_ITEM_SUCCESS: XFS_STATS_INC(xs_push_ail_success); IOP_PUSH(lip); - last_pushed_lsn = lsn; + ailp->xa_last_pushed_lsn = lsn; break; case XFS_ITEM_PUSHBUF: XFS_STATS_INC(xs_push_ail_pushbuf); IOP_PUSHBUF(lip); - last_pushed_lsn = lsn; + ailp->xa_last_pushed_lsn = lsn; push_xfsbufd = 1; break; @@ -319,7 +291,7 @@ xfsaild_push( case XFS_ITEM_LOCKED: XFS_STATS_INC(xs_push_ail_locked); - last_pushed_lsn = lsn; + ailp->xa_last_pushed_lsn = lsn; stuck++; break; @@ -374,9 +346,23 @@ xfsaild_push( wake_up_process(mp->m_ddev_targp->bt_task); } + /* assume we have more work to do in a short while */ + tout = 10; if (!count) { /* We're past our target or empty, so idle */ - last_pushed_lsn = 0; + ailp->xa_last_pushed_lsn = 0; + + /* + * Check for an updated push target before clearing the + * XFS_AIL_PUSHING_BIT. If the target changed, we've got more + * work to do. Wait a bit longer before starting that work. + */ + smp_rmb(); + if (ailp->xa_target == target) { + clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); + return; + } + tout = 50; } else if (XFS_LSN_CMP(lsn, target) >= 0) { /* * We reached the target so wait a bit longer for I/O to @@ -384,7 +370,7 @@ xfsaild_push( * start the next scan from the start of the AIL. */ tout = 50; - last_pushed_lsn = 0; + ailp->xa_last_pushed_lsn = 0; } else if ((stuck * 100) / count > 90) { /* * Either there is a lot of contention on the AIL or we @@ -396,14 +382,48 @@ xfsaild_push( * continuing from where we were. */ tout = 20; - } else { - /* more to do, but wait a short while before continuing */ - tout = 10; } - *last_lsn = last_pushed_lsn; - return tout; + + /* There is more to do, requeue us. */ + queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, + msecs_to_jiffies(tout)); } +/* + * This routine is called to move the tail of the AIL forward. It does this by + * trying to flush items in the AIL whose lsns are below the given + * threshold_lsn. + * + * The push is run asynchronously in a workqueue, which means the caller needs + * to handle waiting on the async flush for space to become available. + * We don't want to interrupt any push that is in progress, hence we only queue + * work if we set the pushing bit approriately. + * + * We do this unlocked - we only need to know whether there is anything in the + * AIL at the time we are called. We don't need to access the contents of + * any of the objects, so the lock is not needed. + */ +void +xfs_trans_ail_push( + struct xfs_ail *ailp, + xfs_lsn_t threshold_lsn) +{ + xfs_log_item_t *lip; + + lip = xfs_ail_min(ailp); + if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) || + XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0) + return; + + /* + * Ensure that the new target is noticed in push code before it clears + * the XFS_AIL_PUSHING_BIT. + */ + smp_wmb(); + ailp->xa_target = threshold_lsn; + if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) + queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); +} /* * This is to be called when an item is unlocked that may have @@ -615,7 +635,6 @@ xfs_trans_ail_init( xfs_mount_t *mp) { struct xfs_ail *ailp; - int error; ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); if (!ailp) @@ -624,15 +643,9 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); spin_lock_init(&ailp->xa_lock); - error = xfsaild_start(ailp); - if (error) - goto out_free_ailp; + INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); mp->m_ail = ailp; return 0; - -out_free_ailp: - kmem_free(ailp); - return error; } void @@ -641,7 +654,7 @@ xfs_trans_ail_destroy( { struct xfs_ail *ailp = mp->m_ail; - xfsaild_stop(ailp); + cancel_delayed_work_sync(&ailp->xa_work); kmem_free(ailp); } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 35162c238fa3..6ebd322bd37c 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -65,16 +65,22 @@ struct xfs_ail_cursor { struct xfs_ail { struct xfs_mount *xa_mount; struct list_head xa_ail; - uint xa_gen; - struct task_struct *xa_task; xfs_lsn_t xa_target; struct xfs_ail_cursor xa_cursors; spinlock_t xa_lock; + struct delayed_work xa_work; + xfs_lsn_t xa_last_pushed_lsn; + unsigned long xa_flags; }; +#define XFS_AIL_PUSHING_BIT 0 + /* * From xfs_trans_ail.c */ + +extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ + void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock); @@ -112,11 +118,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); -long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); -void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); -int xfsaild_start(struct xfs_ail *); -void xfsaild_stop(struct xfs_ail *); - #if BITS_PER_LONG != 64 static inline void xfs_trans_ail_copy_lsn( -- cgit From cd4a3c503c185f5f0a20f04f90da0a6966dd03bd Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Apr 2011 12:45:07 +1000 Subject: xfs: clean up code layout in xfs_trans_ail.c This patch rearranges the location of functions in xfs_trans_ail.c to remove the need for forward declarations of those functions in preparation for adding new functions without the need for forward declarations. Signed-off-by: Dave Chinner Reviewed-by: Alex Elder --- fs/xfs/xfs_trans_ail.c | 254 +++++++++++++++++++++++-------------------------- 1 file changed, 118 insertions(+), 136 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index cb3aeac929bc..8012bfbc6dc0 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -30,41 +30,100 @@ struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ -STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); -STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); -STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); -STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); - #ifdef DEBUG -STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); -#else +/* + * Check that the list is sorted as it should be. + */ +STATIC void +xfs_ail_check( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + xfs_log_item_t *prev_lip; + + if (list_empty(&ailp->xa_ail)) + return; + + /* + * Check the next and previous entries are valid. + */ + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); + prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); + if (&prev_lip->li_ail != &ailp->xa_ail) + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); + + prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); + if (&prev_lip->li_ail != &ailp->xa_ail) + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); + + +#ifdef XFS_TRANS_DEBUG + /* + * Walk the list checking lsn ordering, and that every entry has the + * XFS_LI_IN_AIL flag set. This is really expensive, so only do it + * when specifically debugging the transaction subsystem. + */ + prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); + list_for_each_entry(lip, &ailp->xa_ail, li_ail) { + if (&prev_lip->li_ail != &ailp->xa_ail) + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); + prev_lip = lip; + } +#endif /* XFS_TRANS_DEBUG */ +} +#else /* !DEBUG */ #define xfs_ail_check(a,l) #endif /* DEBUG */ +/* + * Return a pointer to the first item in the AIL. If the AIL is empty, then + * return NULL. + */ +static xfs_log_item_t * +xfs_ail_min( + struct xfs_ail *ailp) +{ + if (list_empty(&ailp->xa_ail)) + return NULL; + + return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); +} + +/* + * Return a pointer to the item which follows the given item in the AIL. If + * the given item is the last item in the list, then return NULL. + */ +static xfs_log_item_t * +xfs_ail_next( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + if (lip->li_ail.next == &ailp->xa_ail) + return NULL; + + return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); +} /* - * This is called by the log manager code to determine the LSN - * of the tail of the log. This is exactly the LSN of the first - * item in the AIL. If the AIL is empty, then this function - * returns 0. + * This is called by the log manager code to determine the LSN of the tail of + * the log. This is exactly the LSN of the first item in the AIL. If the AIL + * is empty, then this function returns 0. * - * We need the AIL lock in order to get a coherent read of the - * lsn of the last item in the AIL. + * We need the AIL lock in order to get a coherent read of the lsn of the last + * item in the AIL. */ xfs_lsn_t xfs_trans_ail_tail( struct xfs_ail *ailp) { - xfs_lsn_t lsn; + xfs_lsn_t lsn = 0; xfs_log_item_t *lip; spin_lock(&ailp->xa_lock); lip = xfs_ail_min(ailp); - if (lip == NULL) { - lsn = (xfs_lsn_t)0; - } else { + if (lip) lsn = lip->li_lsn; - } spin_unlock(&ailp->xa_lock); return lsn; @@ -207,6 +266,47 @@ out: return lip; } +/* + * splice the log item list into the AIL at the given LSN. + */ +static void +xfs_ail_splice( + struct xfs_ail *ailp, + struct list_head *list, + xfs_lsn_t lsn) +{ + xfs_log_item_t *next_lip; + + /* If the list is empty, just insert the item. */ + if (list_empty(&ailp->xa_ail)) { + list_splice(list, &ailp->xa_ail); + return; + } + + list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) + break; + } + + ASSERT(&next_lip->li_ail == &ailp->xa_ail || + XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); + + list_splice_init(list, &next_lip->li_ail); +} + +/* + * Delete the given item from the AIL. Return a pointer to the item. + */ +static void +xfs_ail_delete( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + xfs_ail_check(ailp, lip); + list_del(&lip->li_ail); + xfs_trans_ail_cursor_clear(ailp, lip); +} + /* * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself * to run at a later time if there is more work to do to complete the push. @@ -657,121 +757,3 @@ xfs_trans_ail_destroy( cancel_delayed_work_sync(&ailp->xa_work); kmem_free(ailp); } - -/* - * splice the log item list into the AIL at the given LSN. - */ -STATIC void -xfs_ail_splice( - struct xfs_ail *ailp, - struct list_head *list, - xfs_lsn_t lsn) -{ - xfs_log_item_t *next_lip; - - /* - * If the list is empty, just insert the item. - */ - if (list_empty(&ailp->xa_ail)) { - list_splice(list, &ailp->xa_ail); - return; - } - - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) - break; - } - - ASSERT((&next_lip->li_ail == &ailp->xa_ail) || - (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)); - - list_splice_init(list, &next_lip->li_ail); - return; -} - -/* - * Delete the given item from the AIL. Return a pointer to the item. - */ -STATIC void -xfs_ail_delete( - struct xfs_ail *ailp, - xfs_log_item_t *lip) -{ - xfs_ail_check(ailp, lip); - list_del(&lip->li_ail); - xfs_trans_ail_cursor_clear(ailp, lip); -} - -/* - * Return a pointer to the first item in the AIL. - * If the AIL is empty, then return NULL. - */ -STATIC xfs_log_item_t * -xfs_ail_min( - struct xfs_ail *ailp) -{ - if (list_empty(&ailp->xa_ail)) - return NULL; - - return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); -} - -/* - * Return a pointer to the item which follows - * the given item in the AIL. If the given item - * is the last item in the list, then return NULL. - */ -STATIC xfs_log_item_t * -xfs_ail_next( - struct xfs_ail *ailp, - xfs_log_item_t *lip) -{ - if (lip->li_ail.next == &ailp->xa_ail) - return NULL; - - return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); -} - -#ifdef DEBUG -/* - * Check that the list is sorted as it should be. - */ -STATIC void -xfs_ail_check( - struct xfs_ail *ailp, - xfs_log_item_t *lip) -{ - xfs_log_item_t *prev_lip; - - if (list_empty(&ailp->xa_ail)) - return; - - /* - * Check the next and previous entries are valid. - */ - ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); - prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); - - prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); - - -#ifdef XFS_TRANS_DEBUG - /* - * Walk the list checking lsn ordering, and that every entry has the - * XFS_LI_IN_AIL flag set. This is really expensive, so only do it - * when specifically debugging the transaction subsystem. - */ - prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); - list_for_each_entry(lip, &ailp->xa_ail, li_ail) { - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); - ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); - prev_lip = lip; - } -#endif /* XFS_TRANS_DEBUG */ -} -#endif /* DEBUG */ -- cgit From fd074841cfe01b006465fb9388091012585e8dfb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Apr 2011 12:45:07 +1000 Subject: xfs: push the AIL from memory reclaim and periodic sync When we are short on memory, we want to expedite the cleaning of dirty objects. Hence when we run short on memory, we need to kick the AIL flushing into action to clean as many dirty objects as quickly as possible. To implement this, sample the lsn of the log item at the head of the AIL and use that as the push target for the AIL flush. Further, we keep items in the AIL that are dirty that are not tracked any other way, so we can get objects sitting in the AIL that don't get written back until the AIL is pushed. Hence to get the filesystem to the idle state, we might need to push the AIL to flush out any remaining dirty objects sitting in the AIL. This requires the same push mechanism as the reclaim push. This patch also renames xfs_trans_ail_tail() to xfs_ail_min_lsn() to match the new xfs_ail_max_lsn() function introduced in this patch. Similarly for xfs_trans_ail_push -> xfs_ail_push. Signed-off-by: Dave Chinner Reviewed-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 7 ++++++- fs/xfs/xfs_log.c | 6 +++--- fs/xfs/xfs_trans_ail.c | 50 +++++++++++++++++++++++++++++++++++++++++++-- fs/xfs/xfs_trans_priv.h | 7 ++++--- 4 files changed, 61 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index debe2822c930..9ad956052b69 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -22,6 +22,7 @@ #include "xfs_log.h" #include "xfs_inum.h" #include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" @@ -462,6 +463,9 @@ xfs_sync_worker( else xfs_log_force(mp, 0); error = xfs_qm_sync(mp, SYNC_TRYLOCK); + + /* start pushing all the metadata that is currently dirty */ + xfs_ail_push_all(mp->m_ail); } /* queue us up again */ @@ -1027,8 +1031,9 @@ xfs_reclaim_inode_shrink( mp = container_of(shrink, struct xfs_mount, m_inode_shrink); if (nr_to_scan) { - /* kick background reclaimer */ + /* kick background reclaimer and push the AIL */ xfs_syncd_queue_reclaim(mp); + xfs_ail_push_all(mp->m_ail); if (!(gfp_mask & __GFP_FS)) return -1; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 25efa9b8a602..24643169e632 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -761,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp) break; case XLOG_STATE_COVER_NEED: case XLOG_STATE_COVER_NEED2: - if (!xfs_trans_ail_tail(log->l_ailp) && + if (!xfs_ail_min_lsn(log->l_ailp) && xlog_iclogs_empty(log)) { if (log->l_covered_state == XLOG_STATE_COVER_NEED) log->l_covered_state = XLOG_STATE_COVER_DONE; @@ -801,7 +801,7 @@ xlog_assign_tail_lsn( xfs_lsn_t tail_lsn; struct log *log = mp->m_log; - tail_lsn = xfs_trans_ail_tail(mp->m_ail); + tail_lsn = xfs_ail_min_lsn(mp->m_ail); if (!tail_lsn) tail_lsn = atomic64_read(&log->l_last_sync_lsn); @@ -1239,7 +1239,7 @@ xlog_grant_push_ail( * the filesystem is shutting down. */ if (!XLOG_FORCED_SHUTDOWN(log)) - xfs_trans_ail_push(log->l_ailp, threshold_lsn); + xfs_ail_push(log->l_ailp, threshold_lsn); } /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 8012bfbc6dc0..acdb92f14d51 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -90,6 +90,20 @@ xfs_ail_min( return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); } + /* + * Return a pointer to the last item in the AIL. If the AIL is empty, then + * return NULL. + */ +static xfs_log_item_t * +xfs_ail_max( + struct xfs_ail *ailp) +{ + if (list_empty(&ailp->xa_ail)) + return NULL; + + return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail); +} + /* * Return a pointer to the item which follows the given item in the AIL. If * the given item is the last item in the list, then return NULL. @@ -114,7 +128,7 @@ xfs_ail_next( * item in the AIL. */ xfs_lsn_t -xfs_trans_ail_tail( +xfs_ail_min_lsn( struct xfs_ail *ailp) { xfs_lsn_t lsn = 0; @@ -129,6 +143,25 @@ xfs_trans_ail_tail( return lsn; } +/* + * Return the maximum lsn held in the AIL, or zero if the AIL is empty. + */ +static xfs_lsn_t +xfs_ail_max_lsn( + struct xfs_ail *ailp) +{ + xfs_lsn_t lsn = 0; + xfs_log_item_t *lip; + + spin_lock(&ailp->xa_lock); + lip = xfs_ail_max(ailp); + if (lip) + lsn = lip->li_lsn; + spin_unlock(&ailp->xa_lock); + + return lsn; +} + /* * AIL traversal cursor initialisation. * @@ -504,7 +537,7 @@ xfs_ail_worker( * any of the objects, so the lock is not needed. */ void -xfs_trans_ail_push( +xfs_ail_push( struct xfs_ail *ailp, xfs_lsn_t threshold_lsn) { @@ -525,6 +558,19 @@ xfs_trans_ail_push( queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); } +/* + * Push out all items in the AIL immediately + */ +void +xfs_ail_push_all( + struct xfs_ail *ailp) +{ + xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp); + + if (threshold_lsn) + xfs_ail_push(ailp, threshold_lsn); +} + /* * This is to be called when an item is unlocked that may have * been in the AIL. It will wake up the first member of the AIL diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 6ebd322bd37c..6b164e9e9a1f 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -104,12 +104,13 @@ xfs_trans_ail_delete( xfs_trans_ail_delete_bulk(ailp, &lip, 1); } -void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); +void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); +void xfs_ail_push_all(struct xfs_ail *); +xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); + void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); -xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); - struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); -- cgit From be65b18a10e62321c5ba09a1dc0f70babeb0eba1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Apr 2011 12:45:07 +1000 Subject: xfs: catch bad block numbers freeing extents. A fuzzed filesystem crashed a kernel when freeing an extent with a block number beyond the end of the filesystem. Convert all the debug asserts in xfs_free_extent() to active checks so that we catch bad extents and return that the filesytsem is corrupted rather than crashing. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Alex Elder --- fs/xfs/xfs_alloc.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 4bc3c649aee4..27d64d752eab 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2395,17 +2395,33 @@ xfs_free_extent( memset(&args, 0, sizeof(xfs_alloc_arg_t)); args.tp = tp; args.mp = tp->t_mountp; + + /* + * validate that the block number is legal - the enables us to detect + * and handle a silent filesystem corruption rather than crashing. + */ args.agno = XFS_FSB_TO_AGNO(args.mp, bno); - ASSERT(args.agno < args.mp->m_sb.sb_agcount); + if (args.agno >= args.mp->m_sb.sb_agcount) + return EFSCORRUPTED; + args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); + if (args.agbno >= args.mp->m_sb.sb_agblocks) + return EFSCORRUPTED; + args.pag = xfs_perag_get(args.mp, args.agno); - if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) + ASSERT(args.pag); + + error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); + if (error) goto error0; -#ifdef DEBUG - ASSERT(args.agbp != NULL); - ASSERT((args.agbno + len) <= - be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); -#endif + + /* validate the extent size is legal now we have the agf locked */ + if (args.agbno + len > + be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { + error = EFSCORRUPTED; + goto error0; + } + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: xfs_perag_put(args.pag); -- cgit From da8a1a4a4dfc1ead12c343b992fc8300a22d33d0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Apr 2011 12:45:07 +1000 Subject: xfs: convert log tail checking to a warning On the Power platform, the log tail debug checks fire excessively causing the system to panic early in testing. The debug checks are known to be racy, though on x86_64 there is no evidence that they trigger at all. We want to keep the checks active on debug systems to alert us to problems with log space accounting, but we need to reduce the impact of a racy check on testing on the Power platform. As a result, convert the ASSERT conditions to warnings, and allow them to fire only once per filesystem mount. This will prevent false positives from interfering with testing, whilst still providing us with the indication that they may be a problem with log space accounting should that occur. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Alex Elder --- fs/xfs/xfs_log.c | 32 ++++++++++++++++++++++++-------- fs/xfs/xfs_log_priv.h | 1 + 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 24643169e632..b612ce4520ae 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3407,6 +3407,17 @@ xlog_verify_dest_ptr( xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); } +/* + * Check to make sure the grant write head didn't just over lap the tail. If + * the cycles are the same, we can't be overlapping. Otherwise, make sure that + * the cycles differ by exactly one and check the byte count. + * + * This check is run unlocked, so can give false positives. Rather than assert + * on failures, use a warn-once flag and a panic tag to allow the admin to + * determine if they want to panic the machine when such an error occurs. For + * debug kernels this will have the same effect as using an assert but, unlinke + * an assert, it can be turned off at runtime. + */ STATIC void xlog_verify_grant_tail( struct log *log) @@ -3414,17 +3425,22 @@ xlog_verify_grant_tail( int tail_cycle, tail_blocks; int cycle, space; - /* - * Check to make sure the grant write head didn't just over lap the - * tail. If the cycles are the same, we can't be overlapping. - * Otherwise, make sure that the cycles differ by exactly one and - * check the byte count. - */ xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); if (tail_cycle != cycle) { - ASSERT(cycle - 1 == tail_cycle); - ASSERT(space <= BBTOB(tail_blocks)); + if (cycle - 1 != tail_cycle && + !(log->l_flags & XLOG_TAIL_WARN)) { + xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, + "%s: cycle - 1 != tail_cycle", __func__); + log->l_flags |= XLOG_TAIL_WARN; + } + + if (space > BBTOB(tail_blocks) && + !(log->l_flags & XLOG_TAIL_WARN)) { + xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, + "%s: space > BBTOB(tail_blocks)", __func__); + log->l_flags |= XLOG_TAIL_WARN; + } } } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 15dbf1f9c2be..bc988d4ef958 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -144,6 +144,7 @@ static inline uint xlog_get_client_id(__be32 i) #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being shutdown */ +#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ #ifdef __KERNEL__ /* -- cgit From ecb697c16c1718ae97bb73ce41a5d5ac2aed29ec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Apr 2011 12:55:44 +0000 Subject: xfs: fix variable set but not used warnings GCC 4.6 now warnings about variables set but not used. Fix the trivially fixable warnings of this sort. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 2 -- fs/xfs/quota/xfs_qm.c | 7 ------- fs/xfs/quota/xfs_qm.h | 5 ----- fs/xfs/quota/xfs_qm_syscalls.c | 2 -- fs/xfs/xfs_itable.c | 2 -- 5 files changed, 18 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index d917146c043b..2eef165f4f39 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -293,7 +293,6 @@ xfs_buf_allocate_memory( size_t nbytes, offset; gfp_t gfp_mask = xb_to_gfp(flags); unsigned short page_count, i; - pgoff_t first; xfs_off_t end; int error; @@ -333,7 +332,6 @@ use_alloc_page: return error; offset = bp->b_offset; - first = bp->b_file_offset >> PAGE_SHIFT; bp->b_flags |= _XBF_PAGES; for (i = 0; i < bp->b_page_count; i++) { diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 254ee062bd7d..69228aa8605a 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -461,12 +461,10 @@ xfs_qm_dqflush_all( struct xfs_quotainfo *q = mp->m_quotainfo; int recl; struct xfs_dquot *dqp; - int niters; int error; if (!q) return 0; - niters = 0; again: mutex_lock(&q->qi_dqlist_lock); list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { @@ -1314,14 +1312,9 @@ xfs_qm_dqiter_bufs( { xfs_buf_t *bp; int error; - int notcommitted; - int incr; int type; ASSERT(blkcnt > 0); - notcommitted = 0; - incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? - XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); error = 0; diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index c9446f1c726d..567b29b9f1b3 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone; * block in the dquot/xqm code. */ #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 -/* - * When doing a quotacheck, we log dquot clusters of this many FSBs at most - * in a single transaction. We don't want to ask for too huge a log reservation. - */ -#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 typedef xfs_dqhash_t xfs_dqlist_t; diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index c82f06778a27..c79859eaac7a 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -313,14 +313,12 @@ xfs_qm_scall_quotaon( { int error; uint qf; - uint accflags; __int64_t sbflags; flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); /* * Switching on quota accounting must be done at mount time. */ - accflags = flags & XFS_ALL_QUOTA_ACCT; flags &= ~(XFS_ALL_QUOTA_ACCT); sbflags = 0; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index dc1882adaf54..751e94fe1f77 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -204,7 +204,6 @@ xfs_bulkstat( xfs_agi_t *agi; /* agi header data */ xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ - xfs_daddr_t bno; /* inode cluster start daddr */ int chunkidx; /* current index into inode chunk */ int clustidx; /* current index into inode cluster */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ @@ -463,7 +462,6 @@ xfs_bulkstat( mp->m_sb.sb_inopblog); } ino = XFS_AGINO_TO_INO(mp, agno, agino); - bno = XFS_AGB_TO_DADDR(mp, agno, agbno); /* * Skip if this inode is free. */ -- cgit From 957935dcd8e11d6f789b4ed769b376040e15565b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 2 Apr 2011 18:13:40 +0000 Subject: xfs: fix xfs_debug warnings For a CONFIG_XFS_DEBUG=n build gcc complains about statements with no effect in xfs_debug: fs/xfs/quota/xfs_qm_syscalls.c: In function 'xfs_qm_scall_trunc_qfiles': fs/xfs/quota/xfs_qm_syscalls.c:291:3: warning: statement with no effect The reason for that is that the various new xfs message functions have a return value which is never used, and in case of the non-debug build xfs_debug the macro evaluates to a plain 0 which produces the above warnings. This can be fixed by turning xfs_debug into an inline function instead of a macro, but in addition to that I've also changed all the message helpers to return void as we never use their return values. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_message.c | 27 +++++++++------------------ fs/xfs/linux-2.6/xfs_message.h | 24 +++++++++++++----------- 2 files changed, 22 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c index 508e06fd7d1e..3ca795609113 100644 --- a/fs/xfs/linux-2.6/xfs_message.c +++ b/fs/xfs/linux-2.6/xfs_message.c @@ -28,53 +28,47 @@ /* * XFS logging functions */ -static int +static void __xfs_printk( const char *level, const struct xfs_mount *mp, struct va_format *vaf) { if (mp && mp->m_fsname) - return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); - return printk("%sXFS: %pV\n", level, vaf); + printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); + printk("%sXFS: %pV\n", level, vaf); } -int xfs_printk( +void xfs_printk( const char *level, const struct xfs_mount *mp, const char *fmt, ...) { struct va_format vaf; va_list args; - int r; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - r = __xfs_printk(level, mp, &vaf); + __xfs_printk(level, mp, &vaf); va_end(args); - - return r; } #define define_xfs_printk_level(func, kern_level) \ -int func(const struct xfs_mount *mp, const char *fmt, ...) \ +void func(const struct xfs_mount *mp, const char *fmt, ...) \ { \ struct va_format vaf; \ va_list args; \ - int r; \ \ va_start(args, fmt); \ \ vaf.fmt = fmt; \ vaf.va = &args; \ \ - r = __xfs_printk(kern_level, mp, &vaf); \ + __xfs_printk(kern_level, mp, &vaf); \ va_end(args); \ - \ - return r; \ } \ define_xfs_printk_level(xfs_emerg, KERN_EMERG); @@ -88,7 +82,7 @@ define_xfs_printk_level(xfs_info, KERN_INFO); define_xfs_printk_level(xfs_debug, KERN_DEBUG); #endif -int +void xfs_alert_tag( const struct xfs_mount *mp, int panic_tag, @@ -97,7 +91,6 @@ xfs_alert_tag( struct va_format vaf; va_list args; int do_panic = 0; - int r; if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { xfs_printk(KERN_ALERT, mp, @@ -110,12 +103,10 @@ xfs_alert_tag( vaf.fmt = fmt; vaf.va = &args; - r = __xfs_printk(KERN_ALERT, mp, &vaf); + __xfs_printk(KERN_ALERT, mp, &vaf); va_end(args); BUG_ON(do_panic); - - return r; } void diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h index e77ffa16745b..f1b3fc1b6c4e 100644 --- a/fs/xfs/linux-2.6/xfs_message.h +++ b/fs/xfs/linux-2.6/xfs_message.h @@ -3,32 +3,34 @@ struct xfs_mount; -extern int xfs_printk(const char *level, const struct xfs_mount *mp, +extern void xfs_printk(const char *level, const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_alert_tag(const struct xfs_mount *mp, int tag, +extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); #ifdef DEBUG -extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) +extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); #else -#define xfs_debug(mp, fmt, ...) (0) +static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) +{ +} #endif extern void assfail(char *expr, char *f, int l); -- cgit From a1b7ea5d58c53c13f082110e535d98bc4e8e5cfe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Mar 2011 11:05:09 +0000 Subject: xfs: use proper interfaces for on-stack plugging Add proper blk_start_plug/blk_finish_plug pairs for the two places where we issue buffer I/O, and remove the blk_flush_plug in xfs_buf_lock and xfs_buf_iowait, given that context switches already flush the per-process plugging lists. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 2eef165f4f39..478ca1547ee5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -915,8 +915,6 @@ xfs_buf_lock( if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) xfs_log_force(bp->b_target->bt_mount, 0); - if (atomic_read(&bp->b_io_remaining)) - blk_flush_plug(current); down(&bp->b_sema); XB_SET_OWNER(bp); @@ -1305,8 +1303,6 @@ xfs_buf_iowait( { trace_xfs_buf_iowait(bp, _RET_IP_); - if (atomic_read(&bp->b_io_remaining)) - blk_flush_plug(current); wait_for_completion(&bp->b_iowait); trace_xfs_buf_iowait_done(bp, _RET_IP_); @@ -1743,8 +1739,8 @@ xfsbufd( do { long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); - int count = 0; struct list_head tmp; + struct blk_plug plug; if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); @@ -1760,16 +1756,15 @@ xfsbufd( xfs_buf_delwri_split(target, &tmp, age); list_sort(NULL, &tmp, xfs_buf_cmp); + + blk_start_plug(&plug); while (!list_empty(&tmp)) { struct xfs_buf *bp; bp = list_first_entry(&tmp, struct xfs_buf, b_list); list_del_init(&bp->b_list); xfs_bdstrat_cb(bp); - count++; } - if (count) - blk_flush_plug(current); - + blk_finish_plug(&plug); } while (!kthread_should_stop()); return 0; @@ -1789,6 +1784,7 @@ xfs_flush_buftarg( int pincount = 0; LIST_HEAD(tmp_list); LIST_HEAD(wait_list); + struct blk_plug plug; xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); @@ -1803,6 +1799,8 @@ xfs_flush_buftarg( * we do that after issuing all the IO. */ list_sort(NULL, &tmp_list, xfs_buf_cmp); + + blk_start_plug(&plug); while (!list_empty(&tmp_list)) { bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); ASSERT(target == bp->b_target); @@ -1813,10 +1811,10 @@ xfs_flush_buftarg( } xfs_bdstrat_cb(bp); } + blk_finish_plug(&plug); if (wait) { - /* Expedite and wait for IO to complete. */ - blk_flush_plug(current); + /* Wait for IO to complete. */ while (!list_empty(&wait_list)) { bp = list_first_entry(&wait_list, struct xfs_buf, b_list); -- cgit From 23fcf2ec93fb8573a653408316af599939ff9a8e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 28 Mar 2011 15:15:09 +0800 Subject: nfsd4: fix oops on lock failure Lock stateid's can have access_bmap 0 if they were only partially initialized (due to a failed lock request); handle that case in free_generic_stateid. ------------[ cut here ]------------ kernel BUG at fs/nfsd/nfs4state.c:380! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/kernel/mm/ksm/run Modules linked in: nfs fscache md4 nls_utf8 cifs ip6table_filter ip6_tables ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat bridge stp llc nfsd lockd nfs_acl auth_rpcgss sunrpc ipv6 ppdev parport_pc parport pcnet32 mii pcspkr microcode i2c_piix4 BusLogic floppy [last unloaded: mperf] Pid: 1468, comm: nfsd Not tainted 2.6.38+ #120 VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform EIP: 0060:[] EFLAGS: 00010297 CPU: 0 EIP is at nfs4_access_to_omode+0x1c/0x29 [nfsd] EAX: ffffffff EBX: dd758120 ECX: 00000000 EDX: 00000004 ESI: dd758120 EDI: ddfe657c EBP: dd54dde0 ESP: dd54dde0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process nfsd (pid: 1468, ti=dd54c000 task=ddc92580 task.ti=dd54c000) Stack: dd54ddf0 e24f19ca 00000000 ddfe6560 dd54de08 e24f1a5d dd758130 deee3a20 ddfe6560 31270000 dd54df1c e24f52fd 0000000f dd758090 e2505dd0 0be304cf dbb51d68 0000000e ddfe657c ddcd8020 dd758130 dd758128 dd7580d8 dd54de68 Call Trace: [] free_generic_stateid+0x1c/0x3e [nfsd] [] release_lockowner+0x71/0x8a [nfsd] [] nfsd4_lock+0x617/0x66c [nfsd] [] ? nfsd_setuser+0x199/0x1bb [nfsd] [] ? nfsd_setuser_and_check_port+0x65/0x81 [nfsd] [] ? _cond_resched+0x8/0x1c [] ? slab_pre_alloc_hook.clone.33+0x23/0x27 [] ? kmem_cache_alloc+0x1a/0xd2 [] ? __call_rcu+0xd7/0xdd [] ? fh_verify+0x401/0x452 [nfsd] [] ? nfsd4_encode_operation+0x52/0x117 [nfsd] [] ? nfsd4_putfh+0x33/0x3b [nfsd] [] ? nfsd4_delegreturn+0xd4/0xd4 [nfsd] [] nfsd4_proc_compound+0x1ea/0x33e [nfsd] [] nfsd_dispatch+0xd1/0x1a5 [nfsd] [] svc_process_common+0x282/0x46f [sunrpc] [] svc_process+0xdc/0xfa [sunrpc] [] nfsd+0xd6/0x115 [nfsd] [] ? nfsd_shutdown+0x24/0x24 [nfsd] [] kthread+0x62/0x67 [] ? kthread_worker_fn+0x114/0x114 [] kernel_thread_helper+0x6/0x10 Code: eb 05 b8 00 00 27 4f 8d 65 f4 5b 5e 5f 5d c3 83 e0 03 55 83 f8 02 89 e5 74 17 83 f8 03 74 05 48 75 09 eb 09 b8 02 00 00 00 eb 0b <0f> 0b 31 c0 eb 05 b8 01 00 00 00 5d c3 55 89 e5 57 56 89 d6 8d EIP: [] nfs4_access_to_omode+0x1c/0x29 [nfsd] SS:ESP 0068:dd54dde0 ---[ end trace 2b0bf6c6557cb284 ]--- The trace route is: -> nfsd4_lock() -> if (lock->lk_is_new) { -> alloc_init_lock_stateid() 3739: stp->st_access_bmap = 0; ->if (status && lock->lk_is_new && lock_sop) -> release_lockowner() -> free_generic_stateid() -> nfs4_access_bmap_to_omode() -> nfs4_access_to_omode() 380: BUG(); ***** This problem was introduced by 0997b173609b9229ece28941c118a2a9b278796e. Reported-by: Mi Jinlong Tested-by: Mi Jinlong Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fbde6f79922e..8e3c407df370 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -397,10 +397,13 @@ static void unhash_generic_stateid(struct nfs4_stateid *stp) static void free_generic_stateid(struct nfs4_stateid *stp) { - int oflag = nfs4_access_bmap_to_omode(stp); + int oflag; - nfs4_file_put_access(stp->st_file, oflag); - put_nfs4_file(stp->st_file); + if (stp->st_access_bmap) { + oflag = nfs4_access_bmap_to_omode(stp); + nfs4_file_put_access(stp->st_file, oflag); + put_nfs4_file(stp->st_file); + } kmem_cache_free(stateid_slab, stp); } -- cgit From 0893ed458b4b1d7c7667ca7ffb8b11febe7e7e6c Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Sun, 10 Apr 2011 22:05:31 -0400 Subject: ext4: sync the directory inode in ext4_sync_parent() ext4 has taken the stance that, in the absence of a journal, when an fsync/fdatasync of an inode is done, the parent directory should be sync'ed if this inode entry is new. ext4_sync_parent(), which implements this, does indeed sync the dirent pages for parent directories, but it does not sync the directory *inode*. This patch fixes this. Also now return error status from ext4_sync_parent(). I tested this using a power fail test, which panics a machine running a file server getting requests from a client. Without this patch, on about every other test run, the server is missing many, many files that had been synced. With this patch, on > 6 runs, I see zero files being lost. Google-Bug-Id: 4179519 Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 7f74019d6d77..b1f9b5fc93eb 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode) * the parent directory's parent as well, and so on recursively, if * they are also freshly created. */ -static void ext4_sync_parent(struct inode *inode) +static int ext4_sync_parent(struct inode *inode) { + struct writeback_control wbc; struct dentry *dentry = NULL; + int ret = 0; while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); @@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode) if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) break; inode = dentry->d_parent->d_inode; - sync_mapping_buffers(inode->i_mapping); + ret = sync_mapping_buffers(inode->i_mapping); + if (ret) + break; + memset(&wbc, 0, sizeof(wbc)); + wbc.sync_mode = WB_SYNC_ALL; + wbc.nr_to_write = 0; /* only write out the inode */ + ret = sync_inode(inode, &wbc); + if (ret) + break; } + return ret; } /* @@ -176,7 +187,7 @@ int ext4_sync_file(struct file *file, int datasync) if (!journal) { ret = generic_file_fsync(file, datasync); if (!ret && !list_empty(&inode->i_dentry)) - ext4_sync_parent(inode); + ret = ext4_sync_parent(inode); goto out; } -- cgit From be4f27d324e8ddd57cc0d4d604fe85ee0425cba9 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sun, 10 Apr 2011 22:06:07 -0400 Subject: ext4: allow an active handle to be started when freezing ext4_journal_start_sb() should not prevent an active handle from being started due to s_frozen. Otherwise, deadlock is easy to happen, below is a situation. ================================================ freeze | truncate ================================================ | ext4_ext_truncate() freeze_super() | starts a handle sets s_frozen | | ext4_ext_truncate() | holds i_data_sem ext4_freeze() | waits for updates | | ext4_free_blocks() | calls dquot_free_block() | | dquot_free_blocks() | calls ext4_dirty_inode() | | ext4_dirty_inode() | trys to start an active | handle | | block due to s_frozen ================================================ Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Reported-by: Amir Goldstein Reviewed-by: Jan Kara Reviewed-by: Andreas Dilger --- fs/ext4/super.c | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 551cb8e2110c..7b636ceb878a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle) * journal_end calls result in the superblock being marked dirty, so * that sync() will call the filesystem's write_super callback if * appropriate. + * + * To avoid j_barrier hold in userspace when a user calls freeze(), + * ext4 prevents a new handle from being started by s_frozen, which + * is in an upper layer. */ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) { journal_t *journal; + handle_t *handle; if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); - vfs_check_frozen(sb, SB_FREEZE_TRANS); - /* Special case here: if the journal has aborted behind our - * backs (eg. EIO in the commit thread), then we still need to - * take the FS itself readonly cleanly. */ journal = EXT4_SB(sb)->s_journal; - if (journal) { - if (is_journal_aborted(journal)) { - ext4_abort(sb, "Detected aborted journal"); - return ERR_PTR(-EROFS); - } - return jbd2_journal_start(journal, nblocks); + handle = ext4_journal_current_handle(); + + /* + * If a handle has been started, it should be allowed to + * finish, otherwise deadlock could happen between freeze + * and others(e.g. truncate) due to the restart of the + * journal handle if the filesystem is forzen and active + * handles are not stopped. + */ + if (!handle) + vfs_check_frozen(sb, SB_FREEZE_TRANS); + + if (!journal) + return ext4_get_nojournal(); + /* + * Special case here: if the journal has aborted behind our + * backs (eg. EIO in the commit thread), then we still need to + * take the FS itself readonly cleanly. + */ + if (is_journal_aborted(journal)) { + ext4_abort(sb, "Detected aborted journal"); + return ERR_PTR(-EROFS); } - return ext4_get_nojournal(); + return jbd2_journal_start(journal, nblocks); } /* @@ -4146,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait) /* * LVM calls this function before a (read-only) snapshot is created. This * gives us a chance to flush the journal completely and mark the fs clean. + * + * Note that only this function cannot bring a filesystem to be in a clean + * state independently, because ext4 prevents a new handle from being started + * by @sb->s_frozen, which stays in an upper layer. It thus needs help from + * the upper layer. */ static int ext4_freeze(struct super_block *sb) { -- cgit From f80da1e70f1ffec3825aa0a1d0801f4896e002b6 Mon Sep 17 00:00:00 2001 From: Kazuya Mio Date: Sun, 10 Apr 2011 22:06:36 -0400 Subject: ext4: Allow indirect-block file to grow the file size to max file size We can create 4402345721856 byte file with indirect block mapping. However, if we grow an indirect-block file to the size with ftruncate(), we can see an ext4 warning. The following patch fixes this problem. How to reproduce: # dd if=/dev/zero of=/mnt/mp1/hoge bs=1 count=0 seek=4402345721856 0+0 records in 0+0 records out 0 bytes (0 B) copied, 0.000221428 s, 0.0 kB/s # tail -n 1 /var/log/messages Nov 25 15:10:27 test kernel: EXT4-fs warning (device sda8): ext4_block_to_path:345: block 1074791436 > max in inode 12 Signed-off-by: Kazuya Mio Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7d11e02ad01d..5560f78690ac 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4429,8 +4429,8 @@ void ext4_truncate(struct inode *inode) Indirect chain[4]; Indirect *partial; __le32 nr = 0; - int n; - ext4_lblk_t last_block; + int n = 0; + ext4_lblk_t last_block, max_block; unsigned blocksize = inode->i_sb->s_blocksize; trace_ext4_truncate_enter(inode); @@ -4455,14 +4455,18 @@ void ext4_truncate(struct inode *inode) last_block = (inode->i_size + blocksize-1) >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); if (inode->i_size & (blocksize - 1)) if (ext4_block_truncate_page(handle, mapping, inode->i_size)) goto out_stop; - n = ext4_block_to_path(inode, last_block, offsets, NULL); - if (n == 0) - goto out_stop; /* error */ + if (last_block != max_block) { + n = ext4_block_to_path(inode, last_block, offsets, NULL); + if (n == 0) + goto out_stop; /* error */ + } /* * OK. This truncate is going to happen. We add the inode to the @@ -4493,7 +4497,13 @@ void ext4_truncate(struct inode *inode) */ ei->i_disksize = inode->i_size; - if (n == 1) { /* direct blocks */ + if (last_block == max_block) { + /* + * It is unnecessary to free any data blocks if last_block is + * equal to the indirect block limit. + */ + goto out_unlock; + } else if (n == 1) { /* direct blocks */ ext4_free_data(handle, inode, NULL, i_data+offsets[0], i_data + EXT4_NDIR_BLOCKS); goto do_indirects; @@ -4553,6 +4563,7 @@ do_indirects: ; } +out_unlock: up_write(&ei->i_data_sem); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); -- cgit From c8205636029fc869278c55b7336053b3e7ae3ef4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 10 Apr 2011 22:30:07 -0400 Subject: ext4: fix data corruption regression by reverting commit 6de9843dab3f Revert commit 6de9843dab3f2a1d4d66d80aa9e5782f80977d20, since it caused a data corruption regression with BitTorrent downloads. Thanks to Damien for discovering and bisecting to find the problem commit. https://bugzilla.kernel.org/show_bug.cgi?id=32972 Reported-by: Damien Grassart Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5560f78690ac..9c8cf811d93a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2502,6 +2502,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, * for partial write. */ set_buffer_new(bh); + set_buffer_mapped(bh); } return 0; } -- cgit From 39411f81eec7dc01677b14dda97684c0ce23ac1b Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Mon, 11 Apr 2011 12:06:12 -0700 Subject: xfs_destroy_workqueues() should not be tagged with__exit ia64 throws away .exit sections for the built-in CONFIG case, so routines that are used in other circumstances should not be tagged as __exit. Signed-off-by: Tony Luck Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder Signed-off-by: Linus Torvalds --- fs/xfs/linux-2.6/xfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 67d5b2cddb98..b38e58d02299 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1741,7 +1741,7 @@ out: return -ENOMEM; } -STATIC void __exit +STATIC void xfs_destroy_workqueues(void) { destroy_workqueue(xfs_ail_wq); -- cgit