diff options
Diffstat (limited to 'Documentation/filesystems/locking.rst')
-rw-r--r-- | Documentation/filesystems/locking.rst | 69 |
1 files changed, 12 insertions, 57 deletions
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index e664061ed55d..2e567e341c3b 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -17,7 +17,8 @@ dentry_operations prototypes:: - int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_revalidate)(struct inode *, const struct qstr *, + struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, @@ -30,6 +31,8 @@ prototypes:: struct vfsmount *(*d_automount)(struct path *path); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); + bool (*d_unalias_trylock)(const struct dentry *); + void (*d_unalias_unlock)(const struct dentry *); locking rules: @@ -49,6 +52,8 @@ d_dname: no no no no d_automount: no no yes no d_manage: no no yes (ref-walk) maybe d_real no no yes no +d_unalias_trylock yes no no no +d_unalias_unlock yes no no no ================== =========== ======== ============== ======== inode_operations @@ -61,7 +66,7 @@ prototypes:: int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,const char *); - int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t); + struct dentry *(*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *, @@ -244,17 +249,16 @@ address_space_operations ======================== prototypes:: - int (*writepage)(struct page *page, struct writeback_control *wbc); int (*read_folio)(struct file *, struct folio *); int (*writepages)(struct address_space *, struct writeback_control *); bool (*dirty_folio)(struct address_space *, struct folio *folio); void (*readahead)(struct readahead_control *); int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata); + struct folio **foliop, void **fsdata); int (*write_end)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); + struct folio *folio, void *fsdata); sector_t (*bmap)(struct address_space *, sector_t); void (*invalidate_folio) (struct folio *, size_t start, size_t len); bool (*release_folio)(struct folio *, gfp_t); @@ -275,12 +279,11 @@ locking rules: ====================== ======================== ========= =============== ops folio locked i_rwsem invalidate_lock ====================== ======================== ========= =============== -writepage: yes, unlocks (see below) read_folio: yes, unlocks shared writepages: dirty_folio: maybe readahead: yes, unlocks shared -write_begin: locks the page exclusive +write_begin: locks the folio exclusive write_end: yes, unlocks exclusive bmap: invalidate_folio: yes exclusive @@ -304,54 +307,6 @@ completion. ->readahead() unlocks the folios that I/O is attempted on like ->read_folio(). -->writepage() is used for two purposes: for "memory cleansing" and for -"sync". These are quite different operations and the behaviour may differ -depending upon the mode. - -If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then -it *must* start I/O against the page, even if that would involve -blocking on in-progress I/O. - -If writepage is called for memory cleansing (sync_mode == -WBC_SYNC_NONE) then its role is to get as much writeout underway as -possible. So writepage should try to avoid blocking against -currently-in-progress I/O. - -If the filesystem is not called for "sync" and it determines that it -would need to block against in-progress I/O to be able to start new I/O -against the page the filesystem should redirty the page with -redirty_page_for_writepage(), then unlock the page and return zero. -This may also be done to avoid internal deadlocks, but rarely. - -If the filesystem is called for sync then it must wait on any -in-progress I/O and then start new I/O. - -The filesystem should unlock the page synchronously, before returning to the -caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE -value. WRITEPAGE_ACTIVATE means that page cannot really be written out -currently, and VM should stop calling ->writepage() on this page for some -time. VM does this by moving page to the head of the active list, hence the -name. - -Unless the filesystem is going to redirty_page_for_writepage(), unlock the page -and return zero, writepage *must* run set_page_writeback() against the page, -followed by unlocking it. Once set_page_writeback() has been run against the -page, write I/O can be submitted and the write I/O completion handler must run -end_page_writeback() once the I/O is complete. If no I/O is submitted, the -filesystem must run end_page_writeback() against the page before returning from -writepage. - -That is: after 2.5.12, pages which are under writeout are *not* locked. Note, -if the filesystem needs the page to be locked during writeout, that is ok, too, -the page is allowed to be unlocked at any point in time between the calls to -set_page_writeback() and end_page_writeback(). - -Note, failure to run either redirty_page_for_writepage() or the combination of -set_page_writeback()/end_page_writeback() on a page submitted to writepage -will leave the page itself marked clean but it will be tagged as dirty in the -radix tree. This incoherency can lead to all sorts of hard-to-debug problems -in the filesystem like having dirty inodes at umount and losing written data. - ->writepages() is used for periodic writeback and for syscall-initiated sync operations. The address_space should start I/O against at least ``*nr_to_write`` pages. ``*nr_to_write`` must be decremented for each page @@ -359,8 +314,8 @@ which is written. The address_space implementation may write more (or less) pages than ``*nr_to_write`` asks for, but it should try to be reasonably close. If nr_to_write is NULL, all dirty pages must be written. -writepages should _only_ write pages which are present on -mapping->io_pages. +writepages should _only_ write pages which are present in +mapping->i_pages. ->dirty_folio() is called from various places in the kernel when the target folio is marked as needing writeback. The folio cannot be |