diff options
Diffstat (limited to 'fs')
37 files changed, 640 insertions, 284 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 44b6cdd36dc1..c654a3642897 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -59,7 +59,7 @@ endif # BLOCK config FS_DAX bool "File system based Direct Access (DAX) support" depends on MMU - depends on ZONE_DEVICE || FS_DAX_LIMITED + depends on ZONE_DEVICE select FS_IOMAP select DAX help @@ -95,13 +95,6 @@ config FS_DAX_PMD depends on ZONE_DEVICE depends on TRANSPARENT_HUGEPAGE -# Selected by DAX drivers that do not expect filesystem DAX to support -# get_user_pages() of DAX mappings. I.e. "limited" indicates no support -# for fork() of processes with MAP_SHARED mappings or support for -# direct-I/O to a DAX mapping. -config FS_DAX_LIMITED - bool - # Posix ACL utility routines # # Note: Posix ACLs can be implemented without these helpers. Never use @@ -256,7 +249,7 @@ config ARCH_SUPPORTS_HUGETLBFS menuconfig HUGETLBFS bool "HugeTLB file system support" - depends on X86 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN + depends on ARCH_SUPPORTS_HUGETLBFS depends on (SYSFS || SYSCTL) select MEMFD_CREATE select PADATA if SMP diff --git a/fs/ceph/file.c b/fs/ceph/file.c index bdde6ac5a60d..c02f100f8552 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -2529,19 +2529,19 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) return generic_file_llseek(file, offset, whence); } -static inline void ceph_zero_partial_page( - struct inode *inode, loff_t offset, unsigned size) +static inline void ceph_zero_partial_page(struct inode *inode, + loff_t offset, size_t size) { - struct page *page; - pgoff_t index = offset >> PAGE_SHIFT; + struct folio *folio; - page = find_lock_page(inode->i_mapping, index); - if (page) { - wait_on_page_writeback(page); - zero_user(page, offset & (PAGE_SIZE - 1), size); - unlock_page(page); - put_page(page); - } + folio = filemap_lock_folio(inode->i_mapping, offset >> PAGE_SHIFT); + if (IS_ERR(folio)) + return; + + folio_wait_writeback(folio); + folio_zero_range(folio, offset_in_folio(folio, offset), size); + folio_unlock(folio); + folio_put(folio); } static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset, diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index b84d1747a020..b002e9b734f9 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -17,7 +17,6 @@ #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> -#include <linux/pfn_t.h> #include <linux/ramfs.h> #include <linux/init.h> #include <linux/string.h> @@ -412,8 +411,8 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) for (i = 0; i < pages && !ret; i++) { vm_fault_t vmf; unsigned long off = i * PAGE_SIZE; - pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV); - vmf = vmf_insert_mixed(vma, vma->vm_start + off, pfn); + vmf = vmf_insert_mixed(vma, vma->vm_start + off, + address + off); if (vmf & VM_FAULT_ERROR) ret = vm_fault_to_errno(vmf, 0); } @@ -20,7 +20,6 @@ #include <linux/sched/signal.h> #include <linux/uio.h> #include <linux/vmstat.h> -#include <linux/pfn_t.h> #include <linux/sizes.h> #include <linux/mmu_notifier.h> #include <linux/iomap.h> @@ -76,9 +75,9 @@ static struct folio *dax_to_folio(void *entry) return page_folio(pfn_to_page(dax_to_pfn(entry))); } -static void *dax_make_entry(pfn_t pfn, unsigned long flags) +static void *dax_make_entry(unsigned long pfn, unsigned long flags) { - return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); + return xa_mk_value(flags | (pfn << DAX_SHIFT)); } static bool dax_is_locked(void *entry) @@ -449,9 +448,6 @@ static void dax_associate_entry(void *entry, struct address_space *mapping, if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) return; - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) - return; - index = linear_page_index(vma, address & ~(size - 1)); if (shared && (folio->mapping || dax_folio_is_shared(folio))) { if (folio->mapping) @@ -474,9 +470,6 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping, { struct folio *folio = dax_to_folio(entry); - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) - return; - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) return; @@ -719,7 +712,7 @@ retry: if (order > 0) flags |= DAX_PMD; - entry = dax_make_entry(pfn_to_pfn_t(0), flags); + entry = dax_make_entry(0, flags); dax_lock_entry(xas, entry); if (xas_error(xas)) goto out_unlock; @@ -768,12 +761,6 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t end_idx; XA_STATE(xas, &mapping->i_pages, start_idx); - /* - * In the 'limited' case get_user_pages() for dax is disabled. - */ - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) - return NULL; - if (!dax_mapping(mapping)) return NULL; @@ -1053,7 +1040,7 @@ static bool dax_fault_is_synchronous(const struct iomap_iter *iter, * appropriate. */ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, - const struct iomap_iter *iter, void *entry, pfn_t pfn, + const struct iomap_iter *iter, void *entry, unsigned long pfn, unsigned long flags) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; @@ -1251,7 +1238,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, - size_t size, void **kaddr, pfn_t *pfnp) + size_t size, void **kaddr, unsigned long *pfnp) { pgoff_t pgoff = dax_iomap_pgoff(iomap, pos); int id, rc = 0; @@ -1269,7 +1256,7 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, rc = -EINVAL; if (PFN_PHYS(length) < size) goto out; - if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1)) + if (*pfnp & (PHYS_PFN(size)-1)) goto out; rc = 0; @@ -1373,12 +1360,12 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf, { struct inode *inode = iter->inode; unsigned long vaddr = vmf->address; - pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); + unsigned long pfn = my_zero_pfn(vaddr); vm_fault_t ret; *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE); - ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), false); + ret = vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn), false); trace_dax_load_hole(inode, vmf, ret); return ret; } @@ -1395,14 +1382,14 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, struct folio *zero_folio; spinlock_t *ptl; pmd_t pmd_entry; - pfn_t pfn; + unsigned long pfn; zero_folio = mm_get_huge_zero_folio(vmf->vma->vm_mm); if (unlikely(!zero_folio)) goto fallback; - pfn = page_to_pfn_t(&zero_folio->page); + pfn = page_to_pfn(&zero_folio->page); *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_PMD | DAX_ZERO_PAGE); @@ -1791,7 +1778,8 @@ static vm_fault_t dax_fault_return(int error) * insertion for now and return the pfn so that caller can insert it after the * fsync is done. */ -static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn) +static vm_fault_t dax_fault_synchronous_pfnp(unsigned long *pfnp, + unsigned long pfn) { if (WARN_ON_ONCE(!pfnp)) return VM_FAULT_SIGBUS; @@ -1839,7 +1827,7 @@ static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, * @pmd: distinguish whether it is a pmd fault */ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, - const struct iomap_iter *iter, pfn_t *pfnp, + const struct iomap_iter *iter, unsigned long *pfnp, struct xa_state *xas, void **entry, bool pmd) { const struct iomap *iomap = &iter->iomap; @@ -1850,7 +1838,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, unsigned long entry_flags = pmd ? DAX_PMD : 0; struct folio *folio; int ret, err = 0; - pfn_t pfn; + unsigned long pfn; void *kaddr; if (!pmd && vmf->cow_page) @@ -1887,16 +1875,15 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, folio_ref_inc(folio); if (pmd) - ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn_t_to_pfn(pfn)), - write); + ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn), write); else - ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), write); + ret = vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn), write); folio_put(folio); return ret; } -static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, +static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, unsigned long *pfnp, int *iomap_errp, const struct iomap_ops *ops) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; @@ -1937,7 +1924,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, * the PTE we need to set up. If so just return and the fault will be * retried. */ - if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { + if (pmd_trans_huge(*vmf->pmd)) { ret = VM_FAULT_NOPAGE; goto unlock_entry; } @@ -2008,7 +1995,7 @@ static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas, return false; } -static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, +static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, unsigned long *pfnp, const struct iomap_ops *ops) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; @@ -2060,8 +2047,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * the PMD we need to set up. If so just return and the fault will be * retried. */ - if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) && - !pmd_devmap(*vmf->pmd)) { + if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd)) { ret = 0; goto unlock_entry; } @@ -2090,7 +2076,7 @@ out: return ret; } #else -static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, +static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, unsigned long *pfnp, const struct iomap_ops *ops) { return VM_FAULT_FALLBACK; @@ -2111,7 +2097,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * successfully. */ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, - pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) + unsigned long *pfnp, int *iomap_errp, + const struct iomap_ops *ops) { if (order == 0) return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops); @@ -2131,8 +2118,8 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault); * This function inserts a writeable PTE or PMD entry into the page tables * for an mmaped DAX file. It also marks the page cache entry as dirty. */ -static vm_fault_t -dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) +static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, + unsigned long pfn, unsigned int order) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order); @@ -2154,7 +2141,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) xas_set_mark(&xas, PAGECACHE_TAG_DIRTY); dax_lock_entry(&xas, entry); xas_unlock_irq(&xas); - folio = pfn_folio(pfn_t_to_pfn(pfn)); + folio = pfn_folio(pfn); folio_ref_inc(folio); if (order == 0) ret = vmf_insert_page_mkwrite(vmf, &folio->page, true); @@ -2181,7 +2168,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) * table entry. */ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order, - pfn_t pfn) + unsigned long pfn) { int err; loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; diff --git a/fs/direct-io.c b/fs/direct-io.c index 1694ee9a9382..2267f5ae7f77 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -996,7 +996,7 @@ do_holes: dio_unpin_page(dio, page); goto out; } - zero_user(page, from, 1 << blkbits); + memzero_page(page, from, 1 << blkbits); sdio->block_in_file++; from += 1 << blkbits; dio->result += 1 << blkbits; diff --git a/fs/exec.c b/fs/exec.c index fe895e47f1dd..2a1e5e4042a1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -604,7 +604,7 @@ int setup_arg_pages(struct linux_binprm *bprm, struct mm_struct *mm = current->mm; struct vm_area_struct *vma = bprm->vma; struct vm_area_struct *prev = NULL; - unsigned long vm_flags; + vm_flags_t vm_flags; unsigned long stack_base; unsigned long stack_size; unsigned long stack_expand; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 48908ce0c3ea..93240e35ee36 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -747,7 +747,7 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order) bool write = (vmf->flags & FAULT_FLAG_WRITE) && (vmf->vma->vm_flags & VM_SHARED); struct address_space *mapping = vmf->vma->vm_file->f_mapping; - pfn_t pfn; + unsigned long pfn; if (write) { sb_start_pagefault(sb); diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 0502bf3cdf6a..ac6d4c1064cc 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -10,7 +10,6 @@ #include <linux/dax.h> #include <linux/uio.h> #include <linux/pagemap.h> -#include <linux/pfn_t.h> #include <linux/iomap.h> #include <linux/interval_tree.h> @@ -757,7 +756,7 @@ static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order, vm_fault_t ret; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; - pfn_t pfn; + unsigned long pfn; int error = 0; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn_dax *fcd = fc->dax; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 95275a1e2f54..5525a4520b0f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1966,17 +1966,6 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) struct fuse_file *ff; int err; - /* - * Inode is always written before the last reference is dropped and - * hence this should not be reached from reclaim. - * - * Writing back the inode from reclaim can deadlock if the request - * processing itself needs an allocation. Allocations triggering - * reclaim while serving a request can't be prevented, because it can - * involve any number of unrelated userspace processes. - */ - WARN_ON(wbc->for_reclaim); - ff = __fuse_write_file_get(fi); err = fuse_flush_times(inode, ff); if (ff) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 3fbfb1a2942b..c826e7ca49f5 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -9,7 +9,6 @@ #include <linux/pci.h> #include <linux/interrupt.h> #include <linux/group_cpus.h> -#include <linux/pfn_t.h> #include <linux/memremap.h> #include <linux/module.h> #include <linux/virtio.h> @@ -1008,7 +1007,7 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev) */ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, enum dax_access_mode mode, - void **kaddr, pfn_t *pfn) + void **kaddr, unsigned long *pfn) { struct virtio_fs *fs = dax_get_private(dax_dev); phys_addr_t offset = PFN_PHYS(pgoff); @@ -1017,7 +1016,7 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, if (kaddr) *kaddr = fs->window_kaddr + offset; if (pfn) - *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 0); + *pfn = fs->window_phys_addr + offset; return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9ddd67da0eeb..09d4baef29cf 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -150,10 +150,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) if (inode->i_flags & S_PRIVATE) vm_flags |= VM_NORESERVE; - if (!hugetlb_reserve_pages(inode, + if (hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, - vm_flags)) + vm_flags) < 0) goto out; ret = 0; @@ -179,12 +179,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (len & ~huge_page_mask(h)) return -EINVAL; - if (flags & MAP_FIXED) { - if (addr & ~huge_page_mask(h)) - return -EINVAL; - if (prepare_hugepage_range(file, addr, len)) - return -EINVAL; - } + if ((flags & MAP_FIXED) && (addr & ~huge_page_mask(h))) + return -EINVAL; if (addr) addr0 = ALIGN(addr, huge_page_size(h)); @@ -1563,9 +1559,9 @@ struct file *hugetlb_file_setup(const char *name, size_t size, inode->i_size = size; clear_nlink(inode); - if (!hugetlb_reserve_pages(inode, 0, + if (hugetlb_reserve_pages(inode, 0, size >> huge_page_shift(hstate_inode(inode)), NULL, - acctflag)) + acctflag) < 0) file = ERR_PTR(-ENOMEM); else file = alloc_file_pseudo(inode, mnt, name, O_RDWR, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 374fc6b34c79..cf1d720b8251 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -720,7 +720,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate || - wbc->for_background || wbc->for_sync || wbc->for_reclaim) { + wbc->for_background || wbc->for_sync) { ioc = nfs_io_completion_alloc(GFP_KERNEL); if (ioc) nfs_io_completion_init(ioc, nfs_io_completion_commit, diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 5635453cd476..76e800e38c8f 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -571,6 +571,8 @@ static void pde_set_flags(struct proc_dir_entry *pde) if (pde->proc_ops->proc_compat_ioctl) pde->flags |= PROC_ENTRY_proc_compat_ioctl; #endif + if (pde->proc_ops->proc_lseek) + pde->flags |= PROC_ENTRY_proc_lseek; } struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 3604b616311c..129490151be1 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -473,7 +473,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) typeof_member(struct proc_ops, proc_open) open; struct pde_opener *pdeo; - if (!pde->proc_ops->proc_lseek) + if (!pde_has_proc_lseek(pde)) file->f_mode &= ~FMODE_LSEEK; if (pde_is_permanent(pde)) { diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 520c4742101d..e737401d7383 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -98,6 +98,11 @@ static inline bool pde_has_proc_compat_ioctl(const struct proc_dir_entry *pde) #endif } +static inline bool pde_has_proc_lseek(const struct proc_dir_entry *pde) +{ + return pde->flags & PROC_ENTRY_proc_lseek; +} + extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); @@ -378,6 +383,11 @@ struct proc_maps_private { struct task_struct *task; struct mm_struct *mm; struct vma_iterator iter; + loff_t last_pos; +#ifdef CONFIG_PER_VMA_LOCK + bool mmap_locked; + struct vm_area_struct *locked_vma; +#endif #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index bc2bc60c36cc..a458f1e112fd 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -121,8 +121,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "NFS_Unstable: ", 0); show_val_kb(m, "Bounce: ", 0); - show_val_kb(m, "WritebackTmp: ", - global_node_page_state(NR_WRITEBACK_TEMP)); + show_val_kb(m, "WritebackTmp: ", 0); show_val_kb(m, "CommitLimit: ", vm_commit_limit()); show_val_kb(m, "Committed_AS: ", committed); seq_printf(m, "VmallocTotal: %8lu kB\n", diff --git a/fs/proc/page.c b/fs/proc/page.c index 999af26c7298..ba3568e97fd1 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -43,6 +43,22 @@ static inline unsigned long get_max_dump_pfn(void) #endif } +static u64 get_kpage_count(const struct page *page) +{ + struct page_snapshot ps; + u64 ret; + + snapshot_page(&ps, page); + + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + ret = folio_precise_page_mapcount(&ps.folio_snapshot, + &ps.page_snapshot); + else + ret = folio_average_page_mapcount(&ps.folio_snapshot); + + return ret; +} + static ssize_t kpage_read(struct file *file, char __user *buf, size_t count, loff_t *ppos, enum kpage_operation op) @@ -75,10 +91,7 @@ static ssize_t kpage_read(struct file *file, char __user *buf, info = stable_page_flags(page); break; case KPAGE_COUNT: - if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) - info = folio_precise_page_mapcount(page_folio(page), page); - else - info = folio_average_page_mapcount(page_folio(page)); + info = get_kpage_count(page); break; case KPAGE_CGROUP: info = page_cgroup_ino(page); @@ -134,6 +147,7 @@ static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) u64 stable_page_flags(const struct page *page) { const struct folio *folio; + struct page_snapshot ps; unsigned long k; unsigned long mapping; bool is_anon; @@ -145,20 +159,22 @@ u64 stable_page_flags(const struct page *page) */ if (!page) return 1 << KPF_NOPAGE; - folio = page_folio(page); + + snapshot_page(&ps, page); + folio = &ps.folio_snapshot; k = folio->flags; mapping = (unsigned long)folio->mapping; - is_anon = mapping & PAGE_MAPPING_ANON; + is_anon = mapping & FOLIO_MAPPING_ANON; /* * pseudo flags for the well known (anonymous) memory mapped pages */ - if (page_mapped(page)) + if (folio_mapped(folio)) u |= 1 << KPF_MMAP; if (is_anon) { u |= 1 << KPF_ANON; - if (mapping & PAGE_MAPPING_KSM) + if (mapping & FOLIO_MAPPING_KSM) u |= 1 << KPF_KSM; } @@ -166,7 +182,7 @@ u64 stable_page_flags(const struct page *page) * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ - if (page == &folio->page) + if (ps.idx == 0) u |= kpf_copy_bit(k, KPF_COMPOUND_HEAD, PG_head); else u |= 1 << KPF_COMPOUND_TAIL; @@ -176,25 +192,19 @@ u64 stable_page_flags(const struct page *page) folio_test_large_rmappable(folio)) { /* Note: we indicate any THPs here, not just PMD-sized ones */ u |= 1 << KPF_THP; - } else if (is_huge_zero_folio(folio)) { + } else if (is_huge_zero_pfn(ps.pfn)) { u |= 1 << KPF_ZERO_PAGE; u |= 1 << KPF_THP; - } else if (is_zero_folio(folio)) { + } else if (is_zero_pfn(ps.pfn)) { u |= 1 << KPF_ZERO_PAGE; } - /* - * Caveats on high order pages: PG_buddy and PG_slab will only be set - * on the head page. - */ - if (PageBuddy(page)) - u |= 1 << KPF_BUDDY; - else if (page_count(page) == 0 && is_free_buddy_page(page)) + if (ps.flags & PAGE_SNAPSHOT_PG_BUDDY) u |= 1 << KPF_BUDDY; - if (PageOffline(page)) + if (folio_test_offline(folio)) u |= 1 << KPF_OFFLINE; - if (PageTable(page)) + if (folio_test_pgtable(folio)) u |= 1 << KPF_PGTABLE; if (folio_test_slab(folio)) u |= 1 << KPF_SLAB; @@ -202,7 +212,7 @@ u64 stable_page_flags(const struct page *page) #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) u |= kpf_copy_bit(k, KPF_IDLE, PG_idle); #else - if (folio_test_idle(folio)) + if (ps.flags & PAGE_SNAPSHOT_PG_IDLE) u |= 1 << KPF_IDLE; #endif @@ -228,7 +238,7 @@ u64 stable_page_flags(const struct page *page) if (u & (1 << KPF_HUGE)) u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); else - u |= kpf_copy_bit(page->flags, KPF_HWPOISON, PG_hwpoison); + u |= kpf_copy_bit(ps.page_snapshot.flags, KPF_HWPOISON, PG_hwpoison); #endif u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 751479eb128f..3d6d8a9f13fc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -29,6 +29,9 @@ #include <asm/tlbflush.h> #include "internal.h" +#define SENTINEL_VMA_END -1 +#define SENTINEL_VMA_GATE -2 + #define SEQ_PUT_DEC(str, val) \ seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8) void task_mem(struct seq_file *m, struct mm_struct *mm) @@ -127,15 +130,134 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif -static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv, - loff_t *ppos) +#ifdef CONFIG_PER_VMA_LOCK + +static void unlock_vma(struct proc_maps_private *priv) +{ + if (priv->locked_vma) { + vma_end_read(priv->locked_vma); + priv->locked_vma = NULL; + } +} + +static const struct seq_operations proc_pid_maps_op; + +static inline bool lock_vma_range(struct seq_file *m, + struct proc_maps_private *priv) +{ + /* + * smaps and numa_maps perform page table walk, therefore require + * mmap_lock but maps can be read with locking just the vma and + * walking the vma tree under rcu read protection. + */ + if (m->op != &proc_pid_maps_op) { + if (mmap_read_lock_killable(priv->mm)) + return false; + + priv->mmap_locked = true; + } else { + rcu_read_lock(); + priv->locked_vma = NULL; + priv->mmap_locked = false; + } + + return true; +} + +static inline void unlock_vma_range(struct proc_maps_private *priv) +{ + if (priv->mmap_locked) { + mmap_read_unlock(priv->mm); + } else { + unlock_vma(priv); + rcu_read_unlock(); + } +} + +static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv, + loff_t last_pos) +{ + struct vm_area_struct *vma; + + if (priv->mmap_locked) + return vma_next(&priv->iter); + + unlock_vma(priv); + vma = lock_next_vma(priv->mm, &priv->iter, last_pos); + if (!IS_ERR_OR_NULL(vma)) + priv->locked_vma = vma; + + return vma; +} + +static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv, + loff_t pos) +{ + if (priv->mmap_locked) + return false; + + rcu_read_unlock(); + mmap_read_lock(priv->mm); + /* Reinitialize the iterator after taking mmap_lock */ + vma_iter_set(&priv->iter, pos); + priv->mmap_locked = true; + + return true; +} + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline bool lock_vma_range(struct seq_file *m, + struct proc_maps_private *priv) +{ + return mmap_read_lock_killable(priv->mm) == 0; +} + +static inline void unlock_vma_range(struct proc_maps_private *priv) +{ + mmap_read_unlock(priv->mm); +} + +static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv, + loff_t last_pos) +{ + return vma_next(&priv->iter); +} + +static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv, + loff_t pos) { - struct vm_area_struct *vma = vma_next(&priv->iter); + return false; +} + +#endif /* CONFIG_PER_VMA_LOCK */ + +static struct vm_area_struct *proc_get_vma(struct seq_file *m, loff_t *ppos) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma; + +retry: + vma = get_next_vma(priv, *ppos); + /* EINTR of EAGAIN is possible */ + if (IS_ERR(vma)) { + if (PTR_ERR(vma) == -EAGAIN && fallback_to_mmap_lock(priv, *ppos)) + goto retry; + return vma; + } + + /* Store previous position to be able to restart if needed */ + priv->last_pos = *ppos; if (vma) { - *ppos = vma->vm_start; + /* + * Track the end of the reported vma to ensure position changes + * even if previous vma was merged with the next vma and we + * found the extended vma with the same vm_start. + */ + *ppos = vma->vm_end; } else { - *ppos = -2UL; + *ppos = SENTINEL_VMA_GATE; vma = get_gate_vma(priv->mm); } @@ -145,11 +267,11 @@ static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv, static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; - unsigned long last_addr = *ppos; + loff_t last_addr = *ppos; struct mm_struct *mm; /* See m_next(). Zero at the start or after lseek. */ - if (last_addr == -1UL) + if (last_addr == SENTINEL_VMA_END) return NULL; priv->task = get_proc_task(priv->inode); @@ -163,28 +285,34 @@ static void *m_start(struct seq_file *m, loff_t *ppos) return NULL; } - if (mmap_read_lock_killable(mm)) { + if (!lock_vma_range(m, priv)) { mmput(mm); put_task_struct(priv->task); priv->task = NULL; return ERR_PTR(-EINTR); } - vma_iter_init(&priv->iter, mm, last_addr); + /* + * Reset current position if last_addr was set before + * and it's not a sentinel. + */ + if (last_addr > 0) + *ppos = last_addr = priv->last_pos; + vma_iter_init(&priv->iter, mm, (unsigned long)last_addr); hold_task_mempolicy(priv); - if (last_addr == -2UL) + if (last_addr == SENTINEL_VMA_GATE) return get_gate_vma(mm); - return proc_get_vma(priv, ppos); + return proc_get_vma(m, ppos); } static void *m_next(struct seq_file *m, void *v, loff_t *ppos) { - if (*ppos == -2UL) { - *ppos = -1UL; + if (*ppos == SENTINEL_VMA_GATE) { + *ppos = SENTINEL_VMA_END; return NULL; } - return proc_get_vma(m->private, ppos); + return proc_get_vma(m, ppos); } static void m_stop(struct seq_file *m, void *v) @@ -196,7 +324,7 @@ static void m_stop(struct seq_file *m, void *v) return; release_task_mempolicy(priv); - mmap_read_unlock(mm); + unlock_vma_range(priv); mmput(mm); put_task_struct(priv->task); priv->task = NULL; diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 368e870624da..b69daeb1301b 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -195,6 +195,7 @@ replay_again: * from @cfids->entries. Caller will put last reference if the latter. */ if (cfid->has_lease && cfid->time) { + cfid->last_access_time = jiffies; spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; kfree(utf16_path); @@ -363,6 +364,7 @@ replay_again: cfid->file_all_info_is_valid = true; cfid->time = jiffies; + cfid->last_access_time = jiffies; spin_unlock(&cfids->cfid_list_lock); /* At this point the directory handle is fully cached */ rc = 0; @@ -617,7 +619,7 @@ static void cached_dir_put_work(struct work_struct *work) queue_work(serverclose_wq, &cfid->close_work); } -int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) +bool cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) { struct cached_fids *cfids = tcon->cfids; struct cached_fid *cfid; @@ -730,8 +732,8 @@ static void cfids_laundromat_worker(struct work_struct *work) spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { - if (cfid->time && - time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) { + if (cfid->last_access_time && + time_after(jiffies, cfid->last_access_time + HZ * dir_cache_timeout)) { cfid->on_list = false; list_move(&cfid->entry, &entry); cfids->num_entries--; diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index a28f7cae3caa..46b5a2fdf15b 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -14,7 +14,6 @@ struct cached_dirent { char *name; int namelen; loff_t pos; - struct cifs_fattr fattr; }; @@ -39,6 +38,7 @@ struct cached_fid { bool on_list:1; bool file_all_info_is_valid:1; unsigned long time; /* jiffies of when lease was taken */ + unsigned long last_access_time; /* jiffies of when last accessed */ struct kref refcount; struct cifs_fid fid; spinlock_t fid_lock; @@ -80,6 +80,6 @@ extern void drop_cached_dir_by_name(const unsigned int xid, struct cifs_sb_info *cifs_sb); extern void close_all_cached_dirs(struct cifs_sb_info *cifs_sb); extern void invalidate_all_cached_dirs(struct cifs_tcon *tcon); -extern int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]); +extern bool cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]); #endif /* _CACHED_DIR_H */ diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 3fdf75737d43..f1cea365b6f1 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -26,6 +26,7 @@ #include "smbdirect.h" #endif #include "cifs_swn.h" +#include "cached_dir.h" void cifs_dump_mem(char *label, void *data, int length) @@ -280,6 +281,54 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) return 0; } +static int cifs_debug_dirs_proc_show(struct seq_file *m, void *v) +{ + struct list_head *stmp, *tmp, *tmp1; + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct cached_fids *cfids; + struct cached_fid *cfid; + LIST_HEAD(entry); + + seq_puts(m, "# Version:1\n"); + seq_puts(m, "# Format:\n"); + seq_puts(m, "# <tree id> <sess id> <persistent fid> <path>\n"); + + spin_lock(&cifs_tcp_ses_lock); + list_for_each(stmp, &cifs_tcp_ses_list) { + server = list_entry(stmp, struct TCP_Server_Info, + tcp_ses_list); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp1, &ses->tcon_list) { + tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + cfids = tcon->cfids; + spin_lock(&cfids->cfid_list_lock); /* check lock ordering */ + seq_printf(m, "Num entries: %d\n", cfids->num_entries); + list_for_each_entry(cfid, &cfids->entries, entry) { + seq_printf(m, "0x%x 0x%llx 0x%llx %s", + tcon->tid, + ses->Suid, + cfid->fid.persistent_fid, + cfid->path); + if (cfid->file_all_info_is_valid) + seq_printf(m, "\tvalid file info"); + if (cfid->dirents.is_valid) + seq_printf(m, ", valid dirents"); + seq_printf(m, "\n"); + } + spin_unlock(&cfids->cfid_list_lock); + + + } + } + } + spin_unlock(&cifs_tcp_ses_lock); + seq_putc(m, '\n'); + return 0; +} + static __always_inline const char *compression_alg_str(__le16 alg) { switch (alg) { @@ -863,6 +912,9 @@ cifs_proc_init(void) proc_create_single("open_files", 0400, proc_fs_cifs, cifs_debug_files_proc_show); + proc_create_single("open_dirs", 0400, proc_fs_cifs, + cifs_debug_dirs_proc_show); + proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_ops); proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_ops); proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_ops); @@ -907,6 +959,7 @@ cifs_proc_clean(void) remove_proc_entry("DebugData", proc_fs_cifs); remove_proc_entry("open_files", proc_fs_cifs); + remove_proc_entry("open_dirs", proc_fs_cifs); remove_proc_entry("cifsFYI", proc_fs_cifs); remove_proc_entry("traceSMB", proc_fs_cifs); remove_proc_entry("Stats", proc_fs_cifs); diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 35892df7335c..3cc686246908 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -343,7 +343,7 @@ static struct ntlmssp2_name *find_next_av(struct cifs_ses *ses, len = AV_LEN(av); if (AV_TYPE(av) == NTLMSSP_AV_EOL) return NULL; - if (!len || (u8 *)av + sizeof(*av) + len > end) + if ((u8 *)av + sizeof(*av) + len > end) return NULL; return av; } @@ -363,7 +363,7 @@ static int find_av_name(struct cifs_ses *ses, u16 type, char **name, u16 maxlen) av_for_each_entry(ses, av) { len = AV_LEN(av); - if (AV_TYPE(av) != type) + if (AV_TYPE(av) != type || !len) continue; if (!IS_ALIGNED(len, sizeof(__le16))) { cifs_dbg(VFS | ONCE, "%s: bad length(%u) for type %u\n", @@ -532,17 +532,67 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_ return rc; } +/* + * Set up NTLMv2 response blob with SPN (cifs/<hostname>) appended to the + * existing list of AV pairs. + */ +static int set_auth_key_response(struct cifs_ses *ses) +{ + size_t baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp); + size_t len, spnlen, tilen = 0, num_avs = 2 /* SPN + EOL */; + struct TCP_Server_Info *server = ses->server; + char *spn __free(kfree) = NULL; + struct ntlmssp2_name *av; + char *rsp = NULL; + int rc; + + spnlen = strlen(server->hostname); + len = sizeof("cifs/") + spnlen; + spn = kmalloc(len, GFP_KERNEL); + if (!spn) { + rc = -ENOMEM; + goto out; + } + + spnlen = scnprintf(spn, len, "cifs/%.*s", + (int)spnlen, server->hostname); + + av_for_each_entry(ses, av) + tilen += sizeof(*av) + AV_LEN(av); + + len = baselen + tilen + spnlen * sizeof(__le16) + num_avs * sizeof(*av); + rsp = kmalloc(len, GFP_KERNEL); + if (!rsp) { + rc = -ENOMEM; + goto out; + } + + memcpy(rsp + baselen, ses->auth_key.response, tilen); + av = (void *)(rsp + baselen + tilen); + av->type = cpu_to_le16(NTLMSSP_AV_TARGET_NAME); + av->length = cpu_to_le16(spnlen * sizeof(__le16)); + cifs_strtoUTF16((__le16 *)av->data, spn, spnlen, ses->local_nls); + av = (void *)((__u8 *)av + sizeof(*av) + AV_LEN(av)); + av->type = cpu_to_le16(NTLMSSP_AV_EOL); + av->length = 0; + + rc = 0; + ses->auth_key.len = len; +out: + ses->auth_key.response = rsp; + return rc; +} + int setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) { struct shash_desc *hmacmd5 = NULL; - int rc; - int baselen; - unsigned int tilen; + unsigned char *tiblob = NULL; /* target info blob */ struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; - unsigned char *tiblob = NULL; /* target info blob */ __le64 rsp_timestamp; + __u64 cc; + int rc; if (nls_cp == NULL) { cifs_dbg(VFS, "%s called with nls_cp==NULL\n", __func__); @@ -588,32 +638,25 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) * (as Windows 7 does) */ rsp_timestamp = find_timestamp(ses); + get_random_bytes(&cc, sizeof(cc)); - baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp); - tilen = ses->auth_key.len; - tiblob = ses->auth_key.response; + cifs_server_lock(ses->server); - ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); - if (!ses->auth_key.response) { - rc = -ENOMEM; + tiblob = ses->auth_key.response; + rc = set_auth_key_response(ses); + if (rc) { ses->auth_key.len = 0; - goto setup_ntlmv2_rsp_ret; + goto unlock; } - ses->auth_key.len += baselen; ntlmv2 = (struct ntlmv2_resp *) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); ntlmv2->blob_signature = cpu_to_le32(0x00000101); ntlmv2->reserved = 0; ntlmv2->time = rsp_timestamp; - - get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal)); + ntlmv2->client_chal = cc; ntlmv2->reserved2 = 0; - memcpy(ses->auth_key.response + baselen, tiblob, tilen); - - cifs_server_lock(ses->server); - rc = cifs_alloc_hash("hmac(md5)", &hmacmd5); if (rc) { cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 89160bc34d35..19dd901fe8ab 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -627,12 +627,14 @@ struct smb_version_operations { bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); struct reparse_data_buffer * (*get_reparse_point_buffer)(const struct kvec *rsp_iov, u32 *plen); - int (*create_reparse_symlink)(const unsigned int xid, - struct inode *inode, - struct dentry *dentry, - struct cifs_tcon *tcon, - const char *full_path, - const char *symname); + struct inode * (*create_reparse_inode)(struct cifs_open_info_data *data, + struct super_block *sb, + const unsigned int xid, + struct cifs_tcon *tcon, + const char *full_path, + bool directory, + struct kvec *reparse_iov, + struct kvec *xattr_iov); }; struct smb_version_values { diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 045227ed4efc..40ec0634377f 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -483,6 +483,14 @@ extern int cifs_query_reparse_point(const unsigned int xid, const char *full_path, u32 *tag, struct kvec *rsp, int *rsp_buftype); +extern struct inode *cifs_create_reparse_inode(struct cifs_open_info_data *data, + struct super_block *sb, + const unsigned int xid, + struct cifs_tcon *tcon, + const char *full_path, + bool directory, + struct kvec *reparse_iov, + struct kvec *xattr_iov); extern int CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid); extern int CIFS_open(const unsigned int xid, struct cifs_open_parms *oparms, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 75142f49d65d..6c890db06593 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -2851,6 +2851,134 @@ error: return rc; } +struct inode *cifs_create_reparse_inode(struct cifs_open_info_data *data, + struct super_block *sb, + const unsigned int xid, + struct cifs_tcon *tcon, + const char *full_path, + bool directory, + struct kvec *reparse_iov, + struct kvec *xattr_iov) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_open_parms oparms; + TRANSACT_IOCTL_REQ *io_req; + struct inode *new = NULL; + struct kvec in_iov[2]; + struct kvec out_iov; + struct cifs_fid fid; + int io_req_len; + int oplock = 0; + int buf_type = 0; + int rc; + + cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); + + /* + * If server filesystem does not support reparse points then do not + * attempt to create reparse point. This will prevent creating unusable + * empty object on the server. + */ + if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)) + return ERR_PTR(-EOPNOTSUPP); + +#ifndef CONFIG_CIFS_XATTR + if (xattr_iov) + return ERR_PTR(-EOPNOTSUPP); +#endif + + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, + FILE_READ_ATTRIBUTES | FILE_WRITE_DATA | FILE_WRITE_EA, + FILE_CREATE, + (directory ? CREATE_NOT_FILE : CREATE_NOT_DIR) | OPEN_REPARSE_POINT, + ACL_NO_MODE); + oparms.fid = &fid; + + rc = CIFS_open(xid, &oparms, &oplock, NULL); + if (rc) + return ERR_PTR(rc); + +#ifdef CONFIG_CIFS_XATTR + if (xattr_iov) { + struct smb2_file_full_ea_info *ea; + + ea = &((struct smb2_create_ea_ctx *)xattr_iov->iov_base)->ea; + while (1) { + rc = CIFSSMBSetEA(xid, + tcon, + full_path, + &ea->ea_data[0], + &ea->ea_data[ea->ea_name_length+1], + le16_to_cpu(ea->ea_value_length), + cifs_sb->local_nls, + cifs_sb); + if (rc) + goto out_close; + if (le32_to_cpu(ea->next_entry_offset) == 0) + break; + ea = (struct smb2_file_full_ea_info *)((u8 *)ea + + le32_to_cpu(ea->next_entry_offset)); + } + } +#endif + + rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **)&io_req, NULL); + if (rc) + goto out_close; + + inc_rfc1001_len(io_req, sizeof(io_req->Pad)); + + io_req_len = be32_to_cpu(io_req->hdr.smb_buf_length) + sizeof(io_req->hdr.smb_buf_length); + + /* NT IOCTL response contains one-word long output setup buffer with size of output data. */ + io_req->MaxSetupCount = 1; + /* NT IOCTL response does not contain output parameters. */ + io_req->MaxParameterCount = cpu_to_le32(0); + /* FSCTL_SET_REPARSE_POINT response contains empty output data. */ + io_req->MaxDataCount = cpu_to_le32(0); + + io_req->TotalParameterCount = cpu_to_le32(0); + io_req->TotalDataCount = cpu_to_le32(reparse_iov->iov_len); + io_req->ParameterCount = io_req->TotalParameterCount; + io_req->ParameterOffset = cpu_to_le32(0); + io_req->DataCount = io_req->TotalDataCount; + io_req->DataOffset = cpu_to_le32(offsetof(typeof(*io_req), Data) - + sizeof(io_req->hdr.smb_buf_length)); + io_req->SetupCount = 4; + io_req->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); + io_req->FunctionCode = cpu_to_le32(FSCTL_SET_REPARSE_POINT); + io_req->Fid = fid.netfid; + io_req->IsFsctl = 1; + io_req->IsRootFlag = 0; + io_req->ByteCount = cpu_to_le16(le32_to_cpu(io_req->DataCount) + sizeof(io_req->Pad)); + + inc_rfc1001_len(io_req, reparse_iov->iov_len); + + in_iov[0].iov_base = (char *)io_req; + in_iov[0].iov_len = io_req_len; + in_iov[1] = *reparse_iov; + rc = SendReceive2(xid, tcon->ses, in_iov, ARRAY_SIZE(in_iov), &buf_type, + CIFS_NO_RSP_BUF, &out_iov); + + cifs_buf_release(io_req); + + if (!rc) + rc = cifs_get_inode_info(&new, full_path, data, sb, xid, NULL); + +out_close: + CIFSSMBClose(xid, tcon, fid.netfid); + + /* + * If CREATE was successful but FSCTL_SET_REPARSE_POINT failed then + * remove the intermediate object created by CREATE. Otherwise + * empty object stay on the server when reparse call failed. + */ + if (rc) + CIFSSMBDelFile(xid, tcon, full_path, cifs_sb, NULL); + + return rc ? ERR_PTR(rc) : new; +} + int CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid) @@ -4020,6 +4148,12 @@ findFirstRetry: pSMB->FileName[name_len] = 0; pSMB->FileName[name_len+1] = 0; name_len += 2; + } else if (!searchName[0]) { + pSMB->FileName[0] = CIFS_DIR_SEP(cifs_sb); + pSMB->FileName[1] = 0; + pSMB->FileName[2] = 0; + pSMB->FileName[3] = 0; + name_len = 4; } } else { name_len = copy_path_name(pSMB->FileName, searchName); @@ -4031,6 +4165,10 @@ findFirstRetry: pSMB->FileName[name_len] = '*'; pSMB->FileName[name_len+1] = 0; name_len += 2; + } else if (!searchName[0]) { + pSMB->FileName[0] = CIFS_DIR_SEP(cifs_sb); + pSMB->FileName[1] = 0; + name_len = 2; } } @@ -4057,7 +4195,7 @@ findFirstRetry: pSMB->SearchAttributes = cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY); - pSMB->SearchCount = cpu_to_le16(CIFSMaxBufSize/sizeof(FILE_UNIX_INFO)); + pSMB->SearchCount = cpu_to_le16(msearch ? CIFSMaxBufSize/sizeof(FILE_UNIX_INFO) : 1); pSMB->SearchFlags = cpu_to_le16(search_flags); pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level); diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 205f547ca49e..5eec8957f2a9 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3362,18 +3362,15 @@ generic_ip_connect(struct TCP_Server_Info *server) struct net *net = cifs_net_ns(server); struct sock *sk; - rc = __sock_create(net, sfamily, SOCK_STREAM, - IPPROTO_TCP, &server->ssocket, 1); + rc = sock_create_kern(net, sfamily, SOCK_STREAM, + IPPROTO_TCP, &server->ssocket); if (rc < 0) { cifs_server_dbg(VFS, "Error %d creating socket\n", rc); return rc; } sk = server->ssocket->sk; - __netns_tracker_free(net, &sk->ns_tracker, false); - sk->sk_net_refcnt = 1; - get_net_track(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); /* BB other socket options to set KEEPALIVE, NODELAY? */ cifs_dbg(FYI, "Socket created\n"); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 59ccc2229ab3..3f34bb07997b 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1475,35 +1475,21 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, pr_warn("username too long\n"); goto cifs_parse_mount_err; } - ctx->username = kstrdup(param->string, GFP_KERNEL); - if (ctx->username == NULL) { - cifs_errorf(fc, "OOM when copying username string\n"); - goto cifs_parse_mount_err; - } + ctx->username = no_free_ptr(param->string); break; case Opt_pass: kfree_sensitive(ctx->password); ctx->password = NULL; if (strlen(param->string) == 0) break; - - ctx->password = kstrdup(param->string, GFP_KERNEL); - if (ctx->password == NULL) { - cifs_errorf(fc, "OOM when copying password string\n"); - goto cifs_parse_mount_err; - } + ctx->password = no_free_ptr(param->string); break; case Opt_pass2: kfree_sensitive(ctx->password2); ctx->password2 = NULL; if (strlen(param->string) == 0) break; - - ctx->password2 = kstrdup(param->string, GFP_KERNEL); - if (ctx->password2 == NULL) { - cifs_errorf(fc, "OOM when copying password2 string\n"); - goto cifs_parse_mount_err; - } + ctx->password2 = no_free_ptr(param->string); break; case Opt_ip: if (strlen(param->string) == 0) { @@ -1526,11 +1512,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, } kfree(ctx->domainname); - ctx->domainname = kstrdup(param->string, GFP_KERNEL); - if (ctx->domainname == NULL) { - cifs_errorf(fc, "OOM when copying domainname string\n"); - goto cifs_parse_mount_err; - } + ctx->domainname = no_free_ptr(param->string); cifs_dbg(FYI, "Domain name set\n"); break; case Opt_srcaddr: @@ -1550,11 +1532,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, if (strncasecmp(param->string, "default", 7) != 0) { kfree(ctx->iocharset); - ctx->iocharset = kstrdup(param->string, GFP_KERNEL); - if (ctx->iocharset == NULL) { - cifs_errorf(fc, "OOM when copying iocharset string\n"); - goto cifs_parse_mount_err; - } + ctx->iocharset = no_free_ptr(param->string); } /* if iocharset not set then load_nls_default * is used by caller diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index 769752ad2c5c..2ecd705e9e8c 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -19,6 +19,7 @@ #include "smb2proto.h" #include "cifs_ioctl.h" #include "fs_context.h" +#include "reparse.h" /* * M-F Symlink Functions - Begin @@ -570,7 +571,6 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, int rc = -EOPNOTSUPP; unsigned int xid; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct TCP_Server_Info *server; struct tcon_link *tlink; struct cifs_tcon *pTcon; const char *full_path; @@ -593,7 +593,6 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, goto symlink_exit; } pTcon = tlink_tcon(tlink); - server = cifs_pick_channel(pTcon->ses); full_path = build_path_from_dentry(direntry, page); if (IS_ERR(full_path)) { @@ -643,13 +642,9 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, case CIFS_SYMLINK_TYPE_NATIVE: case CIFS_SYMLINK_TYPE_NFS: case CIFS_SYMLINK_TYPE_WSL: - if (server->ops->create_reparse_symlink && - (le32_to_cpu(pTcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)) { - rc = server->ops->create_reparse_symlink(xid, inode, - direntry, - pTcon, - full_path, - symname); + if (le32_to_cpu(pTcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) { + rc = create_reparse_symlink(xid, inode, direntry, pTcon, + full_path, symname); goto symlink_exit; } break; diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 5fa29a97ac15..33c1d970747c 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -34,7 +34,7 @@ static int detect_directory_symlink_target(struct cifs_sb_info *cifs_sb, const char *symname, bool *directory); -int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, +int create_reparse_symlink(const unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, const char *symname) { @@ -227,7 +227,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, iov.iov_base = buf; iov.iov_len = len; - new = smb2_get_reparse_inode(&data, inode->i_sb, xid, + new = tcon->ses->server->ops->create_reparse_inode( + &data, inode->i_sb, xid, tcon, full_path, directory, &iov, NULL); if (!IS_ERR(new)) @@ -399,7 +400,8 @@ static int create_native_socket(const unsigned int xid, struct inode *inode, struct inode *new; int rc = 0; - new = smb2_get_reparse_inode(&data, inode->i_sb, xid, + new = tcon->ses->server->ops->create_reparse_inode( + &data, inode->i_sb, xid, tcon, full_path, false, &iov, NULL); if (!IS_ERR(new)) d_instantiate(dentry, new); @@ -492,7 +494,8 @@ static int mknod_nfs(unsigned int xid, struct inode *inode, .symlink_target = kstrdup(symname, GFP_KERNEL), }; - new = smb2_get_reparse_inode(&data, inode->i_sb, xid, + new = tcon->ses->server->ops->create_reparse_inode( + &data, inode->i_sb, xid, tcon, full_path, false, &iov, NULL); if (!IS_ERR(new)) d_instantiate(dentry, new); @@ -685,7 +688,8 @@ static int mknod_wsl(unsigned int xid, struct inode *inode, memcpy(data.wsl.eas, &cc->ea, len); data.wsl.eas_len = len; - new = smb2_get_reparse_inode(&data, inode->i_sb, + new = tcon->ses->server->ops->create_reparse_inode( + &data, inode->i_sb, xid, tcon, full_path, false, &reparse_iov, &xattr_iov); if (!IS_ERR(new)) @@ -698,7 +702,7 @@ static int mknod_wsl(unsigned int xid, struct inode *inode, return rc; } -int smb2_mknod_reparse(unsigned int xid, struct inode *inode, +int mknod_reparse(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev) { diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 08de853b36a8..66269c10beba 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -129,10 +129,10 @@ static inline bool cifs_open_data_reparse(struct cifs_open_info_data *data) bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct cifs_open_info_data *data); -int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, +int create_reparse_symlink(const unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, const char *symname); -int smb2_mknod_reparse(unsigned int xid, struct inode *inode, +int mknod_reparse(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev); struct reparse_data_buffer *smb2_get_reparse_point_buffer(const struct kvec *rsp_iov, u32 *len); diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 330bc3d25bad..0a8c2fcc9ded 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -332,6 +332,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) struct cifs_server_iface *old_iface = NULL; struct cifs_server_iface *last_iface = NULL; struct sockaddr_storage ss; + int retry = 0; spin_lock(&ses->chan_lock); chan_index = cifs_ses_get_chan_index(ses, server); @@ -360,6 +361,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) return; } +try_again: last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface, iface_head); iface_min_speed = last_iface->speed; @@ -397,6 +399,13 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) } if (list_entry_is_head(iface, &ses->iface_list, iface_head)) { + list_for_each_entry(iface, &ses->iface_list, iface_head) + iface->weight_fulfilled = 0; + + /* see if it can be satisfied in second attempt */ + if (!retry++) + goto try_again; + iface = NULL; cifs_dbg(FYI, "unable to find a suitable iface\n"); } diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index b27a182629ec..e364b6515af3 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -16,6 +16,7 @@ #include "fs_context.h" #include "nterr.h" #include "smberr.h" +#include "reparse.h" /* * An NT cancel request header looks just like the original request except: @@ -1263,17 +1264,26 @@ cifs_make_node(unsigned int xid, struct inode *inode, if (rc == 0) d_instantiate(dentry, newinode); return rc; + } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { + /* + * Check if mounted with mount parm 'sfu' mount parm. + * SFU emulation should work with all servers + * and was used by default in earlier versions of Windows. + */ + return cifs_sfu_make_node(xid, inode, dentry, tcon, + full_path, mode, dev); + } else if (le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) { + /* + * mknod via reparse points requires server support for + * storing reparse points, which is available since + * Windows 2000, but was not widely used until release + * of Windows Server 2012 by the Windows NFS server. + */ + return mknod_reparse(xid, inode, dentry, tcon, + full_path, mode, dev); + } else { + return -EOPNOTSUPP; } - /* - * Check if mounted with mount parm 'sfu' mount parm. - * SFU emulation should work with all servers, but only - * supports block and char device, socket & fifo, - * and was used by default in earlier versions of Windows - */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - return -EPERM; - return cifs_sfu_make_node(xid, inode, dentry, tcon, - full_path, mode, dev); } static bool @@ -1370,6 +1380,7 @@ struct smb_version_operations smb1_operations = { .create_hardlink = CIFSCreateHardLink, .query_symlink = cifs_query_symlink, .get_reparse_point_buffer = cifs_get_reparse_point_buffer, + .create_reparse_inode = cifs_create_reparse_inode, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index a11a2a693c51..69d251726c02 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1058,10 +1058,11 @@ int smb2_query_path_info(const unsigned int xid, * Skip SMB2_OP_GET_REPARSE if symlink already parsed in create * response. */ - if (data->reparse.tag != IO_REPARSE_TAG_SYMLINK) + if (data->reparse.tag != IO_REPARSE_TAG_SYMLINK) { cmds[num_cmds++] = SMB2_OP_GET_REPARSE; - if (!tcon->posix_extensions) - cmds[num_cmds++] = SMB2_OP_QUERY_WSL_EA; + if (!tcon->posix_extensions) + cmds[num_cmds++] = SMB2_OP_QUERY_WSL_EA; + } oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_READ_ATTRIBUTES | @@ -1320,7 +1321,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, return rc; } -struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data, +struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data, struct super_block *sb, const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 938a8a7c5d21..1b4a31894f43 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5262,7 +5262,7 @@ static int smb2_make_node(unsigned int xid, struct inode *inode, full_path, mode, dev); } else if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) || (tcon->posix_extensions)) { - rc = smb2_mknod_reparse(xid, inode, dentry, tcon, + rc = mknod_reparse(xid, inode, dentry, tcon, full_path, mode, dev); } return rc; @@ -5321,7 +5321,7 @@ struct smb_version_operations smb20_operations = { .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, - .create_reparse_symlink = smb2_create_reparse_symlink, + .create_reparse_inode = smb2_create_reparse_inode, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, @@ -5424,7 +5424,7 @@ struct smb_version_operations smb21_operations = { .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, - .create_reparse_symlink = smb2_create_reparse_symlink, + .create_reparse_inode = smb2_create_reparse_inode, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, @@ -5531,7 +5531,7 @@ struct smb_version_operations smb30_operations = { .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, - .create_reparse_symlink = smb2_create_reparse_symlink, + .create_reparse_inode = smb2_create_reparse_inode, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, @@ -5647,7 +5647,7 @@ struct smb_version_operations smb311_operations = { .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, - .create_reparse_symlink = smb2_create_reparse_symlink, + .create_reparse_inode = smb2_create_reparse_inode, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 035aa1624053..6e805ece6a7b 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -54,7 +54,7 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server, extern int smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *path, __u32 *reparse_tag); -struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data, +struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data, struct super_block *sb, const unsigned int xid, struct cifs_tcon *tcon, @@ -314,9 +314,6 @@ int smb311_posix_query_path_info(const unsigned int xid, int posix_info_parse(const void *beg, const void *end, struct smb2_posix_info_parsed *out); int posix_info_sid_size(const void *beg, const void *end); -int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, - struct dentry *dentry, struct cifs_tcon *tcon, - const char *full_path, const char *symname); int smb2_make_nfs_node(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 22f4bf956ba1..54c6cc7fe9c6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -165,14 +165,14 @@ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx) { if (refcount_dec_and_test(&ctx->refcount)) { - VM_BUG_ON(spin_is_locked(&ctx->fault_pending_wqh.lock)); - VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh)); - VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock)); - VM_BUG_ON(waitqueue_active(&ctx->fault_wqh)); - VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock)); - VM_BUG_ON(waitqueue_active(&ctx->event_wqh)); - VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock)); - VM_BUG_ON(waitqueue_active(&ctx->fd_wqh)); + VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_pending_wqh.lock)); + VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_pending_wqh)); + VM_WARN_ON_ONCE(spin_is_locked(&ctx->fault_wqh.lock)); + VM_WARN_ON_ONCE(waitqueue_active(&ctx->fault_wqh)); + VM_WARN_ON_ONCE(spin_is_locked(&ctx->event_wqh.lock)); + VM_WARN_ON_ONCE(waitqueue_active(&ctx->event_wqh)); + VM_WARN_ON_ONCE(spin_is_locked(&ctx->fd_wqh.lock)); + VM_WARN_ON_ONCE(waitqueue_active(&ctx->fd_wqh)); mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); } @@ -304,7 +304,7 @@ again: goto out; ret = false; - if (!pmd_present(_pmd) || pmd_devmap(_pmd)) + if (!pmd_present(_pmd)) goto out; if (pmd_trans_huge(_pmd)) { @@ -383,12 +383,12 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) if (!ctx) goto out; - BUG_ON(ctx->mm != mm); + VM_WARN_ON_ONCE(ctx->mm != mm); /* Any unrecognized flag is a bug. */ - VM_BUG_ON(reason & ~__VM_UFFD_FLAGS); + VM_WARN_ON_ONCE(reason & ~__VM_UFFD_FLAGS); /* 0 or > 1 flags set is a bug; we expect exactly 1. */ - VM_BUG_ON(!reason || (reason & (reason - 1))); + VM_WARN_ON_ONCE(!reason || (reason & (reason - 1))); if (ctx->features & UFFD_FEATURE_SIGBUS) goto out; @@ -411,12 +411,11 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * to be sure not to return SIGBUS erroneously on * nowait invocations. */ - BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT); + VM_WARN_ON_ONCE(vmf->flags & FAULT_FLAG_RETRY_NOWAIT); #ifdef CONFIG_DEBUG_VM if (printk_ratelimit()) { - printk(KERN_WARNING - "FAULT_FLAG_ALLOW_RETRY missing %x\n", - vmf->flags); + pr_warn("FAULT_FLAG_ALLOW_RETRY missing %x\n", + vmf->flags); dump_stack(); } #endif @@ -602,7 +601,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, */ out: atomic_dec(&ctx->mmap_changing); - VM_BUG_ON(atomic_read(&ctx->mmap_changing) < 0); + VM_WARN_ON_ONCE(atomic_read(&ctx->mmap_changing) < 0); userfaultfd_ctx_put(ctx); } @@ -710,7 +709,7 @@ void dup_userfaultfd_fail(struct list_head *fcs) struct userfaultfd_ctx *ctx = fctx->new; atomic_dec(&octx->mmap_changing); - VM_BUG_ON(atomic_read(&octx->mmap_changing) < 0); + VM_WARN_ON_ONCE(atomic_read(&octx->mmap_changing) < 0); userfaultfd_ctx_put(octx); userfaultfd_ctx_put(ctx); @@ -751,11 +750,6 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, if (!ctx) return; - if (to & ~PAGE_MASK) { - userfaultfd_ctx_put(ctx); - return; - } - msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMAP; @@ -766,6 +760,16 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, userfaultfd_event_wait_completion(ctx, &ewq); } +void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *vm_ctx) +{ + struct userfaultfd_ctx *ctx = vm_ctx->ctx; + + if (!ctx) + return; + + userfaultfd_ctx_put(ctx); +} + bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { @@ -1243,7 +1247,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, int ret; struct uffdio_register uffdio_register; struct uffdio_register __user *user_uffdio_register; - unsigned long vm_flags; + vm_flags_t vm_flags; bool found; bool basic_ioctls; unsigned long start, end; @@ -1317,8 +1321,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, do { cond_resched(); - BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^ - !!(cur->vm_flags & __VM_UFFD_FLAGS)); + VM_WARN_ON_ONCE(!!cur->vm_userfaultfd_ctx.ctx ^ + !!(cur->vm_flags & __VM_UFFD_FLAGS)); /* check not compatible vmas */ ret = -EINVAL; @@ -1372,7 +1376,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, found = true; } for_each_vma_range(vmi, cur, end); - BUG_ON(!found); + VM_WARN_ON_ONCE(!found); ret = userfaultfd_register_range(ctx, vma, vm_flags, start, end, wp_async); @@ -1464,8 +1468,16 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, do { cond_resched(); - BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^ - !!(cur->vm_flags & __VM_UFFD_FLAGS)); + VM_WARN_ON_ONCE(!!cur->vm_userfaultfd_ctx.ctx ^ + !!(cur->vm_flags & __VM_UFFD_FLAGS)); + + /* + * Prevent unregistering through a different userfaultfd than + * the one used for registration. + */ + if (cur->vm_userfaultfd_ctx.ctx && + cur->vm_userfaultfd_ctx.ctx != ctx) + goto out_unlock; /* * Check not compatible vmas, not strictly required @@ -1479,7 +1491,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, found = true; } for_each_vma_range(vmi, cur, end); - BUG_ON(!found); + VM_WARN_ON_ONCE(!found); vma_iter_set(&vmi, start); prev = vma_prev(&vmi); @@ -1490,16 +1502,13 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, for_each_vma_range(vmi, vma, end) { cond_resched(); - BUG_ON(!vma_can_userfault(vma, vma->vm_flags, wp_async)); - - /* - * Nothing to do: this vma is already registered into this - * userfaultfd and with the right tracking mode too. - */ + /* VMA not registered with userfaultfd. */ if (!vma->vm_userfaultfd_ctx.ctx) goto skip; - WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); + VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx != ctx); + VM_WARN_ON_ONCE(!vma_can_userfault(vma, vma->vm_flags, wp_async)); + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE)); if (vma->vm_start > start) start = vma->vm_start; @@ -1564,7 +1573,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx, * len == 0 means wake all and we don't want to wake all here, * so check it again to be sure. */ - VM_BUG_ON(!range.len); + VM_WARN_ON_ONCE(!range.len); wake_userfault(ctx, &range); ret = 0; @@ -1621,7 +1630,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, return -EFAULT; if (ret < 0) goto out; - BUG_ON(!ret); + VM_WARN_ON_ONCE(!ret); /* len == 0 would wake all */ range.len = ret; if (!(uffdio_copy.mode & UFFDIO_COPY_MODE_DONTWAKE)) { @@ -1676,7 +1685,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, if (ret < 0) goto out; /* len == 0 would wake all */ - BUG_ON(!ret); + VM_WARN_ON_ONCE(!ret); range.len = ret; if (!(uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_DONTWAKE)) { range.start = uffdio_zeropage.range.start; @@ -1788,7 +1797,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) goto out; /* len == 0 would wake all */ - BUG_ON(!ret); + VM_WARN_ON_ONCE(!ret); range.len = ret; if (!(uffdio_continue.mode & UFFDIO_CONTINUE_MODE_DONTWAKE)) { range.start = uffdio_continue.range.start; @@ -1845,7 +1854,7 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long goto out; /* len == 0 would wake all */ - BUG_ON(!ret); + VM_WARN_ON_ONCE(!ret); range.len = ret; if (!(uffdio_poison.mode & UFFDIO_POISON_MODE_DONTWAKE)) { range.start = uffdio_poison.range.start; @@ -2106,12 +2115,10 @@ static int new_userfaultfd(int flags) struct file *file; int fd; - BUG_ON(!current->mm); + VM_WARN_ON_ONCE(!current->mm); /* Check the UFFD_* constants for consistency. */ BUILD_BUG_ON(UFFD_USER_MODE_ONLY & UFFD_SHARED_FCNTL_FLAGS); - BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC); - BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK); if (flags & ~(UFFD_SHARED_FCNTL_FLAGS | UFFD_USER_MODE_ONLY)) return -EINVAL; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index b04c59d87378..55a304cb3aef 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1732,7 +1732,7 @@ xfs_dax_fault_locked( bool write_fault) { vm_fault_t ret; - pfn_t pfn; + unsigned long pfn; if (!IS_ENABLED(CONFIG_FS_DAX)) { ASSERT(0); |