diff options
Diffstat (limited to 'fs/ubifs')
-rw-r--r-- | fs/ubifs/compress.c | 2 | ||||
-rw-r--r-- | fs/ubifs/debug.c | 38 | ||||
-rw-r--r-- | fs/ubifs/debug.h | 7 | ||||
-rw-r--r-- | fs/ubifs/dir.c | 154 | ||||
-rw-r--r-- | fs/ubifs/file.c | 450 | ||||
-rw-r--r-- | fs/ubifs/find.c | 40 | ||||
-rw-r--r-- | fs/ubifs/ioctl.c | 6 | ||||
-rw-r--r-- | fs/ubifs/journal.c | 198 | ||||
-rw-r--r-- | fs/ubifs/lprops.c | 8 | ||||
-rw-r--r-- | fs/ubifs/lpt.c | 1 | ||||
-rw-r--r-- | fs/ubifs/lpt_commit.c | 6 | ||||
-rw-r--r-- | fs/ubifs/master.c | 5 | ||||
-rw-r--r-- | fs/ubifs/orphan.c | 157 | ||||
-rw-r--r-- | fs/ubifs/replay.c | 1 | ||||
-rw-r--r-- | fs/ubifs/super.c | 416 | ||||
-rw-r--r-- | fs/ubifs/sysfs.c | 6 | ||||
-rw-r--r-- | fs/ubifs/tnc.c | 11 | ||||
-rw-r--r-- | fs/ubifs/tnc_commit.c | 2 | ||||
-rw-r--r-- | fs/ubifs/tnc_misc.c | 22 | ||||
-rw-r--r-- | fs/ubifs/ubifs.h | 22 | ||||
-rw-r--r-- | fs/ubifs/xattr.c | 47 |
21 files changed, 829 insertions, 770 deletions
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 75461777c466..0b48cbab8a3d 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -82,6 +82,7 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /** * ubifs_compress - compress data. + * @c: UBIFS file-system description object * @in_buf: data to compress * @in_len: length of the data to compress * @out_buf: output buffer where compressed data should be stored @@ -140,6 +141,7 @@ no_compr: /** * ubifs_decompress - decompress data. + * @c: UBIFS file-system description object * @in_buf: data to decompress * @in_len: length of the data to decompress * @out_buf: output buffer where decompressed data should diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index d013c5b3f1ed..b01f382ce8db 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -863,7 +863,6 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) out: vfree(buf); - return; } void ubifs_dump_znode(const struct ubifs_info *c, @@ -946,16 +945,20 @@ void ubifs_dump_tnc(struct ubifs_info *c) pr_err("\n"); pr_err("(pid %d) start dumping TNC tree\n", current->pid); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); - level = znode->level; - pr_err("== Level %d ==\n", level); - while (znode) { - if (level != znode->level) { - level = znode->level; - pr_err("== Level %d ==\n", level); + if (c->zroot.znode) { + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); + level = znode->level; + pr_err("== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + pr_err("== Level %d ==\n", level); + } + ubifs_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); } - ubifs_dump_znode(c, znode); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); + } else { + pr_err("empty TNC tree in memory\n"); } pr_err("(pid %d) finish dumping TNC tree\n", current->pid); } @@ -1742,17 +1745,22 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) err = dbg_walk_index(c, NULL, add_size, &calc); if (err) { ubifs_err(c, "error %d while walking the index", err); - return err; + goto out_err; } if (calc != idx_size) { ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld", calc, idx_size); dump_stack(); - return -EINVAL; + err = -EINVAL; + goto out_err; } return 0; + +out_err: + ubifs_destroy_tnc_tree(c); + return err; } /** @@ -2802,7 +2810,6 @@ static const struct file_operations dfs_fops = { .read = dfs_file_read, .write = dfs_file_write, .owner = THIS_MODULE, - .llseek = no_llseek, }; /** @@ -2822,9 +2829,9 @@ void dbg_debugfs_init_fs(struct ubifs_info *c) const char *fname; struct ubifs_debug_info *d = c->dbg; - n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); - if (n > UBIFS_DFS_DIR_LEN) { + if (n >= UBIFS_DFS_DIR_LEN) { /* The array size is too small */ return; } @@ -2947,7 +2954,6 @@ static const struct file_operations dfs_global_fops = { .read = dfs_global_file_read, .write = dfs_global_file_write, .owner = THIS_MODULE, - .llseek = no_llseek, }; /** diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index ed966108da80..d425861e6b82 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -19,10 +19,11 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, /* * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" - * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. + * + 1 for "_" and 2 for UBI device numbers and 3 for volume number and 1 for + * the trailing zero byte. */ #define UBIFS_DFS_DIR_NAME "ubi%d_%d" -#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) +#define UBIFS_DFS_DIR_LEN (3 + 1 + 2 + 3 + 1) /** * ubifs_debug_info - per-FS debugging information. @@ -103,7 +104,7 @@ struct ubifs_debug_info { unsigned int chk_fs:1; unsigned int tst_rcvry:1; - char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; + char dfs_dir_name[UBIFS_DFS_DIR_LEN]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index e413a9cf8ee3..fda82f3e16e8 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -71,8 +71,13 @@ static int inherit_flags(const struct inode *dir, umode_t mode) * @is_xattr: whether the inode is xattr inode * * This function finds an unused inode number, allocates new inode and - * initializes it. Returns new inode in case of success and an error code in - * case of failure. + * initializes it. Non-xattr new inode may be written with xattrs(selinux/ + * encryption) before writing dentry, which could cause inconsistent problem + * when powercut happens between two operations. To deal with it, non-xattr + * new inode is initialized with zero-nlink and added into orphan list, caller + * should make sure that inode is relinked later, and make sure that orphan + * removing and journal writing into an committing atomic operation. Returns + * new inode in case of success and an error code in case of failure. */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, umode_t mode, bool is_xattr) @@ -163,9 +168,25 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, ui->creat_sqnum = ++c->max_sqnum; spin_unlock(&c->cnt_lock); + if (!is_xattr) { + set_nlink(inode, 0); + err = ubifs_add_orphan(c, inode->i_ino); + if (err) { + ubifs_err(c, "ubifs_add_orphan failed: %i", err); + goto out_iput; + } + down_read(&c->commit_sem); + ui->del_cmtno = c->cmt_no; + up_read(&c->commit_sem); + } + if (encrypted) { err = fscrypt_set_context(inode, NULL); if (err) { + if (!is_xattr) { + set_nlink(inode, 1); + ubifs_delete_orphan(c, inode->i_ino); + } ubifs_err(c, "fscrypt_set_context failed: %i", err); goto out_iput; } @@ -205,7 +226,6 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); err = fscrypt_prepare_lookup(dir, dentry, &nm); - generic_set_encrypted_ci_d_ops(dentry); if (err == -ENOENT) return d_splice_alias(NULL, dentry); if (err) @@ -321,12 +341,13 @@ static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -341,8 +362,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -387,7 +408,6 @@ static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) return inode; out_inode: - make_bad_inode(inode); iput(inode); out_free: ubifs_err(c, "cannot create whiteout file, error %d", err); @@ -471,6 +491,7 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&ui->ui_mutex); insert_inode_hash(inode); d_tmpfile(file, inode); @@ -480,7 +501,7 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, mutex_unlock(&ui->ui_mutex); lock_2_inodes(dir, inode); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 1); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -493,7 +514,6 @@ static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, out_cancel: unlock_2_inodes(dir, inode); out_inode: - make_bad_inode(inode); if (!instantiated) iput(inode); out_budg: @@ -535,6 +555,11 @@ static unsigned int vfs_dent_type(uint8_t type) return 0; } +struct ubifs_dir_data { + struct ubifs_dent_node *dent; + u64 cookie; +}; + /* * The classical Unix view for directory is that it is a linear array of * (name, inode number) entries. Linux/VFS assumes this model as well. @@ -562,6 +587,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) struct inode *dir = file_inode(file); struct ubifs_info *c = dir->i_sb->s_fs_info; bool encrypted = IS_ENCRYPTED(dir); + struct ubifs_dir_data *data = file->private_data; dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); @@ -584,27 +610,27 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) fstr_real_len = fstr.len; } - if (file->f_version == 0) { + if (data->cookie == 0) { /* - * The file was seek'ed, which means that @file->private_data + * The file was seek'ed, which means that @data->dent * is now invalid. This may also be just the first * 'ubifs_readdir()' invocation, in which case - * @file->private_data is NULL, and the below code is + * @data->dent is NULL, and the below code is * basically a no-op. */ - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; } /* - * 'generic_file_llseek()' unconditionally sets @file->f_version to - * zero, and we use this for detecting whether the file was seek'ed. + * 'ubifs_dir_llseek()' sets @data->cookie to zero, and we use this + * for detecting whether the file was seek'ed. */ - file->f_version = 1; + data->cookie = 1; /* File positions 0 and 1 correspond to "." and ".." */ if (ctx->pos < 2) { - ubifs_assert(c, !file->private_data); + ubifs_assert(c, !data->dent); if (!dir_emit_dots(file, ctx)) { if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -621,10 +647,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } - dent = file->private_data; + dent = data->dent; if (!dent) { /* * The directory was seek'ed to and is now readdir'ed. @@ -638,7 +664,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } while (1) { @@ -681,15 +707,15 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } - kfree(file->private_data); + kfree(data->dent); ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; cond_resched(); } out: - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -713,7 +739,10 @@ out: /* Free saved readdir() state when the directory is closed */ static int ubifs_dir_release(struct inode *dir, struct file *file) { - kfree(file->private_data); + struct ubifs_dir_data *data = file->private_data; + + kfree(data->dent); + kfree(data); file->private_data = NULL; return 0; } @@ -761,10 +790,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, lock_2_inodes(dir, inode); - /* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */ - if (inode->i_nlink == 0) - ubifs_delete_orphan(c, inode->i_ino); - inc_nlink(inode); ihold(inode); inode_set_ctime_current(inode); @@ -772,7 +797,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, inode->i_nlink == 1); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -786,8 +811,6 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; drop_nlink(inode); - if (inode->i_nlink == 0) - ubifs_add_orphan(c, inode->i_ino); unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); iput(inode); @@ -847,7 +870,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -951,7 +974,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -1018,6 +1041,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); insert_inode_hash(inode); inc_nlink(inode); @@ -1026,7 +1050,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir, dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) { ubifs_err(c, "cannot create directory, error %d", err); goto out_cancel; @@ -1043,8 +1067,8 @@ out_cancel: dir_ui->ui_size = dir->i_size; drop_nlink(dir); mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1103,22 +1127,25 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir, goto out_fname; } + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) { + kfree(dev); + goto out_inode; + } + init_special_inode(inode, inode->i_mode, rdev); inode->i_size = ubifs_inode(inode)->ui_size = devlen; ui = ubifs_inode(inode); ui->data = dev; ui->data_len = devlen; - - err = ubifs_init_security(dir, inode, &dentry->d_name); - if (err) - goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -1133,8 +1160,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1185,6 +1212,10 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir, goto out_fname; } + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) + goto out_inode; + ui = ubifs_inode(inode); ui->data = kmalloc(disk_link.len, GFP_NOFS); if (!ui->data) { @@ -1209,17 +1240,14 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir, */ ui->data_len = disk_link.len - 1; inode->i_size = ubifs_inode(inode)->ui_size = disk_link.len - 1; - - err = ubifs_init_security(dir, inode, &dentry->d_name); - if (err) - goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -1233,10 +1261,10 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: /* Free inode->i_link before inode is marked as bad. */ fscrypt_free_inode(inode); - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1404,14 +1432,10 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, */ err = ubifs_budget_space(c, &wht_req); if (err) { - /* - * Whiteout inode can not be written on flash by - * ubifs_jnl_write_inode(), because it's neither - * dirty nor zero-nlink. - */ iput(whiteout); goto out_release; } + set_nlink(whiteout, 1); /* Add the old_dentry size to the old_dir size. */ old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); @@ -1490,7 +1514,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, } err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir, - new_inode, &new_nm, whiteout, sync); + new_inode, &new_nm, whiteout, sync, !!whiteout); if (err) goto out_cancel; @@ -1543,6 +1567,7 @@ out_cancel: unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); if (whiteout) { ubifs_release_budget(c, &wht_req); + set_nlink(whiteout, 0); iput(whiteout); } out_release: @@ -1696,6 +1721,24 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path, return 0; } +static int ubifs_dir_open(struct inode *inode, struct file *file) +{ + struct ubifs_dir_data *data; + + data = kzalloc(sizeof(struct ubifs_dir_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + file->private_data = data; + return 0; +} + +static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct ubifs_dir_data *data = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &data->cookie); +} + const struct inode_operations ubifs_dir_inode_operations = { .lookup = ubifs_lookup, .create = ubifs_create, @@ -1716,7 +1759,8 @@ const struct inode_operations ubifs_dir_inode_operations = { }; const struct file_operations ubifs_dir_operations = { - .llseek = generic_file_llseek, + .open = ubifs_dir_open, + .llseek = ubifs_dir_llseek, .release = ubifs_dir_release, .read = generic_read_dir, .iterate_shared = ubifs_readdir, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5029eb3390a5..5130123005e4 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -96,36 +96,36 @@ dump: return -EINVAL; } -static int do_readpage(struct page *page) +static int do_readpage(struct folio *folio) { void *addr; int err = 0, i; unsigned int block, beyond; - struct ubifs_data_node *dn; - struct inode *inode = page->mapping->host; + struct ubifs_data_node *dn = NULL; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; loff_t i_size = i_size_read(inode); dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", - inode->i_ino, page->index, i_size, page->flags); - ubifs_assert(c, !PageChecked(page)); - ubifs_assert(c, !PagePrivate(page)); + inode->i_ino, folio->index, i_size, folio->flags); + ubifs_assert(c, !folio_test_checked(folio)); + ubifs_assert(c, !folio->private); - addr = kmap(page); + addr = kmap_local_folio(folio, 0); - block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; if (block >= beyond) { /* Reading beyond inode */ - SetPageChecked(page); - memset(addr, 0, PAGE_SIZE); + folio_set_checked(folio); + addr = folio_zero_tail(folio, 0, addr); goto out; } dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); if (!dn) { err = -ENOMEM; - goto error; + goto out; } i = 0; @@ -150,39 +150,35 @@ static int do_readpage(struct page *page) memset(addr + ilen, 0, dlen - ilen); } } - if (++i >= UBIFS_BLOCKS_PER_PAGE) + if (++i >= (UBIFS_BLOCKS_PER_PAGE << folio_order(folio))) break; block += 1; addr += UBIFS_BLOCK_SIZE; + if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) { + kunmap_local(addr - UBIFS_BLOCK_SIZE); + addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE); + } } + if (err) { struct ubifs_info *c = inode->i_sb->s_fs_info; if (err == -ENOENT) { /* Not found, so it must be a hole */ - SetPageChecked(page); + folio_set_checked(folio); dbg_gen("hole"); - goto out_free; + err = 0; + } else { + ubifs_err(c, "cannot read page %lu of inode %lu, error %d", + folio->index, inode->i_ino, err); } - ubifs_err(c, "cannot read page %lu of inode %lu, error %d", - page->index, inode->i_ino, err); - goto error; } -out_free: - kfree(dn); out: - SetPageUptodate(page); - ClearPageError(page); - flush_dcache_page(page); - kunmap(page); - return 0; - -error: kfree(dn); - ClearPageUptodate(page); - SetPageError(page); - flush_dcache_page(page); - kunmap(page); + if (!err) + folio_mark_uptodate(folio); + flush_dcache_folio(folio); + kunmap_local(addr); return err; } @@ -215,23 +211,23 @@ static void release_existing_page_budget(struct ubifs_info *c) } static int write_begin_slow(struct address_space *mapping, - loff_t pos, unsigned len, struct page **pagep) + loff_t pos, unsigned len, struct folio **foliop) { struct inode *inode = mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; pgoff_t index = pos >> PAGE_SHIFT; struct ubifs_budget_req req = { .new_page = 1 }; int err, appending = !!(pos + len > inode->i_size); - struct page *page; + struct folio *folio; dbg_gen("ino %lu, pos %llu, len %u, i_size %lld", inode->i_ino, pos, len, inode->i_size); /* - * At the slow path we have to budget before locking the page, because - * budgeting may force write-back, which would wait on locked pages and - * deadlock if we had the page locked. At this point we do not know - * anything about the page, so assume that this is a new page which is + * At the slow path we have to budget before locking the folio, because + * budgeting may force write-back, which would wait on locked folios and + * deadlock if we had the folio locked. At this point we do not know + * anything about the folio, so assume that this is a new folio which is * written to a hole. This corresponds to largest budget. Later the * budget will be amended if this is not true. */ @@ -243,45 +239,43 @@ static int write_begin_slow(struct address_space *mapping, if (unlikely(err)) return err; - page = grab_cache_page_write_begin(mapping, index); - if (unlikely(!page)) { + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) { ubifs_release_budget(c, &req); - return -ENOMEM; + return PTR_ERR(folio); } - if (!PageUptodate(page)) { - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) - SetPageChecked(page); + if (!folio_test_uptodate(folio)) { + if (pos == folio_pos(folio) && len >= folio_size(folio)) + folio_set_checked(folio); else { - err = do_readpage(page); + err = do_readpage(folio); if (err) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); ubifs_release_budget(c, &req); return err; } } - - SetPageUptodate(page); - ClearPageError(page); } - if (PagePrivate(page)) + if (folio->private) /* - * The page is dirty, which means it was budgeted twice: + * The folio is dirty, which means it was budgeted twice: * o first time the budget was allocated by the task which - * made the page dirty and set the PG_private flag; + * made the folio dirty and set the private field; * o and then we budgeted for it for the second time at the * very beginning of this function. * - * So what we have to do is to release the page budget we + * So what we have to do is to release the folio budget we * allocated. */ release_new_page_budget(c); - else if (!PageChecked(page)) + else if (!folio_test_checked(folio)) /* - * We are changing a page which already exists on the media. - * This means that changing the page does not make the amount + * We are changing a folio which already exists on the media. + * This means that changing the folio does not make the amount * of indexing information larger, and this part of the budget * which we have already acquired may be released. */ @@ -304,14 +298,14 @@ static int write_begin_slow(struct address_space *mapping, ubifs_release_dirty_inode_budget(c, ui); } - *pagep = page; + *foliop = folio; return 0; } /** * allocate_budget - allocate budget for 'ubifs_write_begin()'. * @c: UBIFS file-system description object - * @page: page to allocate budget for + * @folio: folio to allocate budget for * @ui: UBIFS inode object the page belongs to * @appending: non-zero if the page is appended * @@ -322,15 +316,15 @@ static int write_begin_slow(struct address_space *mapping, * * Returns: %0 in case of success and %-ENOSPC in case of failure. */ -static int allocate_budget(struct ubifs_info *c, struct page *page, +static int allocate_budget(struct ubifs_info *c, struct folio *folio, struct ubifs_inode *ui, int appending) { struct ubifs_budget_req req = { .fast = 1 }; - if (PagePrivate(page)) { + if (folio->private) { if (!appending) /* - * The page is dirty and we are not appending, which + * The folio is dirty and we are not appending, which * means no budget is needed at all. */ return 0; @@ -354,11 +348,11 @@ static int allocate_budget(struct ubifs_info *c, struct page *page, */ req.dirtied_ino = 1; } else { - if (PageChecked(page)) + if (folio_test_checked(folio)) /* * The page corresponds to a hole and does not * exist on the media. So changing it makes - * make the amount of indexing information + * the amount of indexing information * larger, and we have to budget for a new * page. */ @@ -420,7 +414,7 @@ static int allocate_budget(struct ubifs_info *c, struct page *page, */ static int ubifs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata) + struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -428,7 +422,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; int err, appending = !!(pos + len > inode->i_size); int skipped_read = 0; - struct page *page; + struct folio *folio; ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size); ubifs_assert(c, !c->ro_media && !c->ro_mount); @@ -437,13 +431,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, return -EROFS; /* Try out the fast-path part first */ - page = grab_cache_page_write_begin(mapping, index); - if (unlikely(!page)) - return -ENOMEM; + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { /* The page is not loaded from the flash */ - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) { + if (pos == folio_pos(folio) && len >= folio_size(folio)) { /* * We change whole page so no need to load it. But we * do not know whether this page exists on the media or @@ -453,32 +448,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * media. Thus, we are setting the @PG_checked flag * here. */ - SetPageChecked(page); + folio_set_checked(folio); skipped_read = 1; } else { - err = do_readpage(page); + err = do_readpage(folio); if (err) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return err; } } - - SetPageUptodate(page); - ClearPageError(page); } - err = allocate_budget(c, page, ui, appending); + err = allocate_budget(c, folio, ui, appending); if (unlikely(err)) { ubifs_assert(c, err == -ENOSPC); /* * If we skipped reading the page because we were going to * write all of it, then it is not up to date. */ - if (skipped_read) { - ClearPageChecked(page); - ClearPageUptodate(page); - } + if (skipped_read) + folio_clear_checked(folio); /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the @@ -490,10 +480,10 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); mutex_unlock(&ui->ui_mutex); } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); - return write_begin_slow(mapping, pos, len, pagep); + return write_begin_slow(mapping, pos, len, foliop); } /* @@ -502,22 +492,21 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * with @ui->ui_mutex locked if we are appending pages, and unlocked * otherwise. This is an optimization (slightly hacky though). */ - *pagep = page; + *foliop = folio; return 0; - } /** * cancel_budget - cancel budget. * @c: UBIFS file-system description object - * @page: page to cancel budget for + * @folio: folio to cancel budget for * @ui: UBIFS inode object the page belongs to * @appending: non-zero if the page is appended * * This is a helper function for a page write operation. It unlocks the * @ui->ui_mutex in case of appending. */ -static void cancel_budget(struct ubifs_info *c, struct page *page, +static void cancel_budget(struct ubifs_info *c, struct folio *folio, struct ubifs_inode *ui, int appending) { if (appending) { @@ -525,8 +514,8 @@ static void cancel_budget(struct ubifs_info *c, struct page *page, ubifs_release_dirty_inode_budget(c, ui); mutex_unlock(&ui->ui_mutex); } - if (!PagePrivate(page)) { - if (PageChecked(page)) + if (!folio->private) { + if (folio_test_checked(folio)) release_new_page_budget(c); else release_existing_page_budget(c); @@ -535,7 +524,7 @@ static void cancel_budget(struct ubifs_info *c, struct page *page, static int ubifs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { struct inode *inode = mapping->host; struct ubifs_inode *ui = ubifs_inode(inode); @@ -544,44 +533,47 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, int appending = !!(end_pos > inode->i_size); dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld", - inode->i_ino, pos, page->index, len, copied, inode->i_size); + inode->i_ino, pos, folio->index, len, copied, inode->i_size); - if (unlikely(copied < len && len == PAGE_SIZE)) { + if (unlikely(copied < len && !folio_test_uptodate(folio))) { /* - * VFS copied less data to the page that it intended and + * VFS copied less data to the folio than it intended and * declared in its '->write_begin()' call via the @len - * argument. If the page was not up-to-date, and @len was - * @PAGE_SIZE, the 'ubifs_write_begin()' function did + * argument. If the folio was not up-to-date, + * the 'ubifs_write_begin()' function did * not load it from the media (for optimization reasons). This - * means that part of the page contains garbage. So read the - * page now. + * means that part of the folio contains garbage. So read the + * folio now. */ dbg_gen("copied %d instead of %d, read page and repeat", copied, len); - cancel_budget(c, page, ui, appending); - ClearPageChecked(page); + cancel_budget(c, folio, ui, appending); + folio_clear_checked(folio); /* * Return 0 to force VFS to repeat the whole operation, or the * error code if 'do_readpage()' fails. */ - copied = do_readpage(page); + copied = do_readpage(folio); goto out; } - if (!PagePrivate(page)) { - attach_page_private(page, (void *)1); + if (len == folio_size(folio)) + folio_mark_uptodate(folio); + + if (!folio->private) { + folio_attach_private(folio, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); - __set_page_dirty_nobuffers(page); + filemap_dirty_folio(mapping, folio); } if (appending) { i_size_write(inode, end_pos); ui->ui_size = end_pos; /* - * Note, we do not set @I_DIRTY_PAGES (which means that the - * inode has dirty pages), this has been done in - * '__set_page_dirty_nobuffers()'. + * We do not set @I_DIRTY_PAGES (which means that + * the inode has dirty pages), this was done in + * filemap_dirty_folio(). */ __mark_inode_dirty(inode, I_DIRTY_DATASYNC); ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); @@ -589,43 +581,43 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, } out: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return copied; } /** * populate_page - copy data nodes into a page for bulk-read. * @c: UBIFS file-system description object - * @page: page + * @folio: folio * @bu: bulk-read information * @n: next zbranch slot * * Returns: %0 on success and a negative error code on failure. */ -static int populate_page(struct ubifs_info *c, struct page *page, +static int populate_page(struct ubifs_info *c, struct folio *folio, struct bu_info *bu, int *n) { int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; loff_t i_size = i_size_read(inode); unsigned int page_block; void *addr, *zaddr; pgoff_t end_index; dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", - inode->i_ino, page->index, i_size, page->flags); + inode->i_ino, folio->index, i_size, folio->flags); - addr = zaddr = kmap(page); + addr = zaddr = kmap_local_folio(folio, 0); end_index = (i_size - 1) >> PAGE_SHIFT; - if (!i_size || page->index > end_index) { + if (!i_size || folio->index > end_index) { hole = 1; - memset(addr, 0, PAGE_SIZE); + addr = folio_zero_tail(folio, 0, addr); goto out_hole; } - page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + page_block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; while (1) { int err, len, out_len, dlen; @@ -674,9 +666,13 @@ static int populate_page(struct ubifs_info *c, struct page *page, break; addr += UBIFS_BLOCK_SIZE; page_block += 1; + if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) { + kunmap_local(addr - UBIFS_BLOCK_SIZE); + addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE); + } } - if (end_index == page->index) { + if (end_index == folio->index) { int len = i_size & (PAGE_SIZE - 1); if (len && len < read) @@ -685,22 +681,19 @@ static int populate_page(struct ubifs_info *c, struct page *page, out_hole: if (hole) { - SetPageChecked(page); + folio_set_checked(folio); dbg_gen("hole"); } - SetPageUptodate(page); - ClearPageError(page); - flush_dcache_page(page); - kunmap(page); + folio_mark_uptodate(folio); + flush_dcache_folio(folio); + kunmap_local(addr); *n = nn; return 0; out_err: - ClearPageUptodate(page); - SetPageError(page); - flush_dcache_page(page); - kunmap(page); + flush_dcache_folio(folio); + kunmap_local(addr); ubifs_err(c, "bad data node (block %u, inode %lu)", page_block, inode->i_ino); return -EINVAL; @@ -710,15 +703,15 @@ out_err: * ubifs_do_bulk_read - do bulk-read. * @c: UBIFS file-system description object * @bu: bulk-read information - * @page1: first page to read + * @folio1: first folio to read * * Returns: %1 if the bulk-read is done, otherwise %0 is returned. */ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, - struct page *page1) + struct folio *folio1) { - pgoff_t offset = page1->index, end_index; - struct address_space *mapping = page1->mapping; + pgoff_t offset = folio1->index, end_index; + struct address_space *mapping = folio1->mapping; struct inode *inode = mapping->host; struct ubifs_inode *ui = ubifs_inode(inode); int err, page_idx, page_cnt, ret = 0, n = 0; @@ -768,11 +761,11 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, goto out_warn; } - err = populate_page(c, page1, bu, &n); + err = populate_page(c, folio1, bu, &n); if (err) goto out_warn; - unlock_page(page1); + folio_unlock(folio1); ret = 1; isize = i_size_read(inode); @@ -782,19 +775,19 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, for (page_idx = 1; page_idx < page_cnt; page_idx++) { pgoff_t page_offset = offset + page_idx; - struct page *page; + struct folio *folio; if (page_offset > end_index) break; - page = pagecache_get_page(mapping, page_offset, + folio = __filemap_get_folio(mapping, page_offset, FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT, ra_gfp_mask); - if (!page) + if (IS_ERR(folio)) break; - if (!PageUptodate(page)) - err = populate_page(c, page, bu, &n); - unlock_page(page); - put_page(page); + if (!folio_test_uptodate(folio)) + err = populate_page(c, folio, bu, &n); + folio_unlock(folio); + folio_put(folio); if (err) break; } @@ -817,7 +810,7 @@ out_bu_off: /** * ubifs_bulk_read - determine whether to bulk-read and, if so, do it. - * @page: page from which to start bulk-read. + * @folio: folio from which to start bulk-read. * * Some flash media are capable of reading sequentially at faster rates. UBIFS * bulk-read facility is designed to take advantage of that, by reading in one @@ -826,12 +819,12 @@ out_bu_off: * * Returns: %1 if a bulk-read is done and %0 otherwise. */ -static int ubifs_bulk_read(struct page *page) +static int ubifs_bulk_read(struct folio *folio) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); - pgoff_t index = page->index, last_page_read = ui->last_page_read; + pgoff_t index = folio->index, last_page_read = ui->last_page_read; struct bu_info *bu; int err = 0, allocated = 0; @@ -879,8 +872,8 @@ static int ubifs_bulk_read(struct page *page) bu->buf_len = c->max_bu_buf_len; data_key_init(c, &bu->key, inode->i_ino, - page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); - err = ubifs_do_bulk_read(c, bu, page); + folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); + err = ubifs_do_bulk_read(c, bu, folio); if (!allocated) mutex_unlock(&c->bu_mutex); @@ -894,69 +887,71 @@ out_unlock: static int ubifs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - - if (ubifs_bulk_read(page)) + if (ubifs_bulk_read(folio)) return 0; - do_readpage(page); + do_readpage(folio); folio_unlock(folio); return 0; } -static int do_writepage(struct page *page, int len) +static int do_writepage(struct folio *folio, size_t len) { - int err = 0, i, blen; + int err = 0, blen; unsigned int block; void *addr; + size_t offset = 0; union ubifs_key key; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; #ifdef UBIFS_DEBUG struct ubifs_inode *ui = ubifs_inode(inode); spin_lock(&ui->ui_lock); - ubifs_assert(c, page->index <= ui->synced_i_size >> PAGE_SHIFT); + ubifs_assert(c, folio->index <= ui->synced_i_size >> PAGE_SHIFT); spin_unlock(&ui->ui_lock); #endif - /* Update radix tree tags */ - set_page_writeback(page); + folio_start_writeback(folio); - addr = kmap(page); - block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; - i = 0; - while (len) { - blen = min_t(int, len, UBIFS_BLOCK_SIZE); + addr = kmap_local_folio(folio, offset); + block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + for (;;) { + blen = min_t(size_t, len, UBIFS_BLOCK_SIZE); data_key_init(c, &key, inode->i_ino, block); err = ubifs_jnl_write_data(c, inode, &key, addr, blen); if (err) break; - if (++i >= UBIFS_BLOCKS_PER_PAGE) + len -= blen; + if (!len) break; block += 1; addr += blen; - len -= blen; + if (folio_test_highmem(folio) && !offset_in_page(addr)) { + kunmap_local(addr - blen); + offset += PAGE_SIZE; + addr = kmap_local_folio(folio, offset); + } } + kunmap_local(addr); if (err) { - SetPageError(page); - ubifs_err(c, "cannot write page %lu of inode %lu, error %d", - page->index, inode->i_ino, err); + mapping_set_error(folio->mapping, err); + ubifs_err(c, "cannot write folio %lu of inode %lu, error %d", + folio->index, inode->i_ino, err); ubifs_ro_mode(c, err); } - ubifs_assert(c, PagePrivate(page)); - if (PageChecked(page)) + ubifs_assert(c, folio->private != NULL); + if (folio_test_checked(folio)) release_new_page_budget(c); else release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - detach_page_private(page); - ClearPageChecked(page); + folio_detach_private(folio); + folio_clear_checked(folio); - kunmap(page); - unlock_page(page); - end_page_writeback(page); + folio_unlock(folio); + folio_end_writeback(folio); return err; } @@ -1006,22 +1001,21 @@ static int do_writepage(struct page *page, int len) * on the page lock and it would not write the truncated inode node to the * journal before we have finished. */ -static int ubifs_writepage(struct page *page, struct writeback_control *wbc) +static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc, + void *data) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); loff_t i_size = i_size_read(inode), synced_i_size; - pgoff_t end_index = i_size >> PAGE_SHIFT; - int err, len = i_size & (PAGE_SIZE - 1); - void *kaddr; + int err, len = folio_size(folio); dbg_gen("ino %lu, pg %lu, pg flags %#lx", - inode->i_ino, page->index, page->flags); - ubifs_assert(c, PagePrivate(page)); + inode->i_ino, folio->index, folio->flags); + ubifs_assert(c, folio->private != NULL); - /* Is the page fully outside @i_size? (truncate in progress) */ - if (page->index > end_index || (page->index == end_index && !len)) { + /* Is the folio fully outside @i_size? (truncate in progress) */ + if (folio_pos(folio) >= i_size) { err = 0; goto out_unlock; } @@ -1030,9 +1024,9 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) synced_i_size = ui->synced_i_size; spin_unlock(&ui->ui_lock); - /* Is the page fully inside @i_size? */ - if (page->index < end_index) { - if (page->index >= synced_i_size >> PAGE_SHIFT) { + /* Is the folio fully inside i_size? */ + if (folio_pos(folio) + len <= i_size) { + if (folio_pos(folio) + len > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_redirty; @@ -1045,20 +1039,18 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) * with this. */ } - return do_writepage(page, PAGE_SIZE); + return do_writepage(folio, len); } /* - * The page straddles @i_size. It must be zeroed out on each and every + * The folio straddles @i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped * in multiples of the page size. For a file that is not a multiple of * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page); - memset(kaddr + len, 0, PAGE_SIZE - len); - flush_dcache_page(page); - kunmap_atomic(kaddr); + len = i_size - folio_pos(folio); + folio_zero_segment(folio, len, folio_size(folio)); if (i_size > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); @@ -1066,19 +1058,25 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) goto out_redirty; } - return do_writepage(page, len); + return do_writepage(folio, len); out_redirty: /* - * redirty_page_for_writepage() won't call ubifs_dirty_inode() because + * folio_redirty_for_writepage() won't call ubifs_dirty_inode() because * it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so * there is no need to do space budget for dirty inode. */ - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); out_unlock: - unlock_page(page); + folio_unlock(folio); return err; } +static int ubifs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return write_cache_pages(mapping, wbc, ubifs_writepage, NULL); +} + /** * do_attr_changes - change inode attributes. * @inode: inode to change attributes for @@ -1155,11 +1153,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, if (offset) { pgoff_t index = new_size >> PAGE_SHIFT; - struct page *page; + struct folio *folio; - page = find_lock_page(inode->i_mapping, index); - if (page) { - if (PageDirty(page)) { + folio = filemap_lock_folio(inode->i_mapping, index); + if (!IS_ERR(folio)) { + if (folio_test_dirty(folio)) { /* * 'ubifs_jnl_truncate()' will try to truncate * the last data node, but it contains @@ -1168,14 +1166,14 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, * 'ubifs_jnl_truncate()' will see an already * truncated (and up to date) data node. */ - ubifs_assert(c, PagePrivate(page)); + ubifs_assert(c, folio->private != NULL); - clear_page_dirty_for_io(page); + folio_clear_dirty_for_io(folio); if (UBIFS_BLOCKS_PER_PAGE_SHIFT) - offset = new_size & - (PAGE_SIZE - 1); - err = do_writepage(page, offset); - put_page(page); + offset = offset_in_folio(folio, + new_size); + err = do_writepage(folio, offset); + folio_put(folio); if (err) goto out_budg; /* @@ -1188,8 +1186,8 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, * to 'ubifs_jnl_truncate()' to save it from * having to read it. */ - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } } } @@ -1512,14 +1510,14 @@ static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags) */ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec64 now = current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; int err, update_time; - dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, + dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, folio->index, i_size_read(inode)); ubifs_assert(c, !c->ro_media && !c->ro_mount); @@ -1527,17 +1525,17 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) return VM_FAULT_SIGBUS; /* -EROFS */ /* - * We have not locked @page so far so we may budget for changing the - * page. Note, we cannot do this after we locked the page, because + * We have not locked @folio so far so we may budget for changing the + * folio. Note, we cannot do this after we locked the folio, because * budgeting may cause write-back which would cause deadlock. * - * At the moment we do not know whether the page is dirty or not, so we - * assume that it is not and budget for a new page. We could look at + * At the moment we do not know whether the folio is dirty or not, so we + * assume that it is not and budget for a new folio. We could look at * the @PG_private flag and figure this out, but we may race with write - * back and the page state may change by the time we lock it, so this + * back and the folio state may change by the time we lock it, so this * would need additional care. We do not bother with this at the * moment, although it might be good idea to do. Instead, we allocate - * budget for a new page and amend it later on if the page was in fact + * budget for a new folio and amend it later on if the folio was in fact * dirty. * * The budgeting-related logic of this function is similar to what we @@ -1560,21 +1558,21 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - lock_page(page); - if (unlikely(page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode))) { - /* Page got truncated out from underneath us */ + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) >= i_size_read(inode))) { + /* Folio got truncated out from underneath us */ goto sigbus; } - if (PagePrivate(page)) + if (folio->private) release_new_page_budget(c); else { - if (!PageChecked(page)) + if (!folio_test_checked(folio)) ubifs_convert_page_budget(c); - attach_page_private(page, (void *)1); + folio_attach_private(folio, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); - __set_page_dirty_nobuffers(page); + filemap_dirty_folio(folio->mapping, folio); } if (update_time) { @@ -1590,11 +1588,11 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) ubifs_release_dirty_inode_budget(c, ui); } - wait_for_stable_page(page); + folio_wait_stable(folio); return VM_FAULT_LOCKED; sigbus: - unlock_page(page); + folio_unlock(folio); ubifs_release_budget(c, &req); return VM_FAULT_SIGBUS; } @@ -1648,7 +1646,7 @@ static int ubifs_symlink_getattr(struct mnt_idmap *idmap, const struct address_space_operations ubifs_file_address_operations = { .read_folio = ubifs_read_folio, - .writepage = ubifs_writepage, + .writepages = ubifs_writepages, .write_begin = ubifs_write_begin, .write_end = ubifs_write_end, .invalidate_folio = ubifs_invalidate_folio, diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 873e6e1c92b5..643718906b9f 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -73,7 +73,7 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -82,8 +82,9 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) */ static int scan_for_dirty_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -166,8 +167,7 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, data.pick_free = pick_free; data.lnum = -1; data.exclude_index = exclude_index; - err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_for_dirty_cb, + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_dirty_cb, &data); if (err) return ERR_PTR(err); @@ -340,7 +340,7 @@ out: * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -349,8 +349,9 @@ out: */ static int scan_for_free_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -446,7 +447,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, data.pick_free = pick_free; data.lnum = -1; err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_for_free_cb, + scan_for_free_cb, &data); if (err) return ERR_PTR(err); @@ -580,7 +581,7 @@ out: * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -589,8 +590,9 @@ out: */ static int scan_for_idx_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -625,8 +627,7 @@ static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c) int err; data.lnum = -1; - err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_for_idx_cb, + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_idx_cb, &data); if (err) return ERR_PTR(err); @@ -726,11 +727,10 @@ out: return err; } -static int cmp_dirty_idx(const struct ubifs_lprops **a, - const struct ubifs_lprops **b) +static int cmp_dirty_idx(const void *a, const void *b) { - const struct ubifs_lprops *lpa = *a; - const struct ubifs_lprops *lpb = *b; + const struct ubifs_lprops *lpa = *(const struct ubifs_lprops **)a; + const struct ubifs_lprops *lpb = *(const struct ubifs_lprops **)b; return lpa->dirty + lpa->free - lpb->dirty - lpb->free; } @@ -754,7 +754,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) sizeof(void *) * c->dirty_idx.cnt); /* Sort it so that the dirtiest is now at the end */ sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *), - (int (*)(const void *, const void *))cmp_dirty_idx, NULL); + cmp_dirty_idx, NULL); dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt); if (c->dirty_idx.cnt) dbg_find("dirtiest index LEB is %d with dirty %d and free %d", @@ -773,7 +773,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -782,8 +782,9 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) */ static int scan_dirty_idx_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -842,8 +843,7 @@ static int find_dirty_idx_leb(struct ubifs_info *c) if (c->pnodes_have >= c->pnode_cnt) /* All pnodes are in memory, so skip scan */ return -ENOSPC; - err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_dirty_idx_cb, + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_dirty_idx_cb, &data); if (err) return err; diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index d79cabe193c3..2c99349cf537 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -213,12 +213,6 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { - case FS_IOC32_GETFLAGS: - cmd = FS_IOC_GETFLAGS; - break; - case FS_IOC32_SETFLAGS: - cmd = FS_IOC_SETFLAGS; - break; case FS_IOC_SET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY_EX: diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index f0a5538c84b0..36ba79fbd2ff 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -293,6 +293,96 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, } /** + * __queue_and_wait - queue a task and wait until the task is waked up. + * @c: UBIFS file-system description object + * + * This function adds current task in queue and waits until the task is waked + * up. This function should be called with @c->reserve_space_wq locked. + */ +static void __queue_and_wait(struct ubifs_info *c) +{ + DEFINE_WAIT(wait); + + __add_wait_queue_entry_tail_exclusive(&c->reserve_space_wq, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&c->reserve_space_wq.lock); + + schedule(); + finish_wait(&c->reserve_space_wq, &wait); +} + +/** + * wait_for_reservation - try queuing current task to wait until waked up. + * @c: UBIFS file-system description object + * + * This function queues current task to wait until waked up, if queuing is + * started(@c->need_wait_space is not %0). Returns %true if current task is + * added in queue, otherwise %false is returned. + */ +static bool wait_for_reservation(struct ubifs_info *c) +{ + if (likely(atomic_read(&c->need_wait_space) == 0)) + /* Quick path to check whether queuing is started. */ + return false; + + spin_lock(&c->reserve_space_wq.lock); + if (atomic_read(&c->need_wait_space) == 0) { + /* Queuing is not started, don't queue current task. */ + spin_unlock(&c->reserve_space_wq.lock); + return false; + } + + __queue_and_wait(c); + return true; +} + +/** + * wake_up_reservation - wake up first task in queue or stop queuing. + * @c: UBIFS file-system description object + * + * This function wakes up the first task in queue if it exists, or stops + * queuing if no tasks in queue. + */ +static void wake_up_reservation(struct ubifs_info *c) +{ + spin_lock(&c->reserve_space_wq.lock); + if (waitqueue_active(&c->reserve_space_wq)) + wake_up_locked(&c->reserve_space_wq); + else + /* + * Compared with wait_for_reservation(), set @c->need_wait_space + * under the protection of wait queue lock, which can avoid that + * @c->need_wait_space is set to 0 after new task queued. + */ + atomic_set(&c->need_wait_space, 0); + spin_unlock(&c->reserve_space_wq.lock); +} + +/** + * add_or_start_queue - add current task in queue or start queuing. + * @c: UBIFS file-system description object + * + * This function starts queuing if queuing is not started, otherwise adds + * current task in queue. + */ +static void add_or_start_queue(struct ubifs_info *c) +{ + spin_lock(&c->reserve_space_wq.lock); + if (atomic_cmpxchg(&c->need_wait_space, 0, 1) == 0) { + /* Starts queuing, task can go on directly. */ + spin_unlock(&c->reserve_space_wq.lock); + return; + } + + /* + * There are at least two tasks have retried more than 32 times + * at certain point, first task has started queuing, just queue + * the left tasks. + */ + __queue_and_wait(c); +} + +/** * make_reservation - reserve journal space. * @c: UBIFS file-system description object * @jhead: journal head @@ -311,33 +401,27 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, static int make_reservation(struct ubifs_info *c, int jhead, int len) { int err, cmt_retries = 0, nospc_retries = 0; + bool blocked = wait_for_reservation(c); again: down_read(&c->commit_sem); err = reserve_space(c, jhead, len); - if (!err) + if (!err) { /* c->commit_sem will get released via finish_reservation(). */ - return 0; + goto out_wake_up; + } up_read(&c->commit_sem); if (err == -ENOSPC) { /* * GC could not make any progress. We should try to commit - * once because it could make some dirty space and GC would - * make progress, so make the error -EAGAIN so that the below + * because it could make some dirty space and GC would make + * progress, so make the error -EAGAIN so that the below * will commit and re-try. */ - if (nospc_retries++ < 2) { - dbg_jnl("no space, retry"); - err = -EAGAIN; - } - - /* - * This means that the budgeting is incorrect. We always have - * to be able to write to the media, because all operations are - * budgeted. Deletions are not budgeted, though, but we reserve - * an extra LEB for them. - */ + nospc_retries++; + dbg_jnl("no space, retry"); + err = -EAGAIN; } if (err != -EAGAIN) @@ -349,15 +433,37 @@ again: */ if (cmt_retries > 128) { /* - * This should not happen unless the journal size limitations - * are too tough. + * This should not happen unless: + * 1. The journal size limitations are too tough. + * 2. The budgeting is incorrect. We always have to be able to + * write to the media, because all operations are budgeted. + * Deletions are not budgeted, though, but we reserve an + * extra LEB for them. */ - ubifs_err(c, "stuck in space allocation"); + ubifs_err(c, "stuck in space allocation, nospc_retries %d", + nospc_retries); err = -ENOSPC; goto out; - } else if (cmt_retries > 32) - ubifs_warn(c, "too many space allocation re-tries (%d)", - cmt_retries); + } else if (cmt_retries > 32) { + /* + * It's almost impossible to happen, unless there are many tasks + * making reservation concurrently and someone task has retried + * gc + commit for many times, generated available space during + * this period are grabbed by other tasks. + * But if it happens, start queuing up all tasks that will make + * space reservation, then there is only one task making space + * reservation at any time, and it can always make success under + * the premise of correct budgeting. + */ + ubifs_warn(c, "too many space allocation cmt_retries (%d) " + "nospc_retries (%d), start queuing tasks", + cmt_retries, nospc_retries); + + if (!blocked) { + blocked = true; + add_or_start_queue(c); + } + } dbg_jnl("-EAGAIN, commit and retry (retried %d times)", cmt_retries); @@ -365,7 +471,7 @@ again: err = ubifs_run_commit(c); if (err) - return err; + goto out_wake_up; goto again; out: @@ -380,6 +486,27 @@ out: cmt_retries = dbg_check_lprops(c); up_write(&c->commit_sem); } +out_wake_up: + if (blocked) { + /* + * Only tasks that have ever started queuing or ever been queued + * can wake up other queued tasks, which can make sure that + * there is only one task waked up to make space reservation. + * For example: + * task A task B task C + * make_reservation make_reservation + * reserve_space // 0 + * wake_up_reservation + * atomic_cmpxchg // 0, start queuing + * reserve_space + * wait_for_reservation + * __queue_and_wait + * add_wait_queue + * if (blocked) // false + * // So that task C won't be waked up to race with task B + */ + wake_up_reservation(c); + } return err; } @@ -516,6 +643,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) * @inode: inode to update * @deletion: indicates a directory entry deletion i.e unlink or rmdir * @xent: non-zero if the directory entry is an extended attribute entry + * @in_orphan: indicates whether the @inode is in orphan list * * This function updates an inode by writing a directory entry (or extended * attribute entry), the inode itself, and the parent directory inode (or the @@ -537,7 +665,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) */ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, const struct fscrypt_name *nm, const struct inode *inode, - int deletion, int xent) + int deletion, int xent, int in_orphan) { int err, dlen, ilen, len, lnum, ino_offs, dent_offs, orphan_added = 0; int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); @@ -623,7 +751,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_release; - if (last_reference) { + if (last_reference && !in_orphan) { err = ubifs_add_orphan(c, inode->i_ino); if (err) { release_head(c, BASEHD); @@ -679,6 +807,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_ro; + if (in_orphan && inode->i_nlink) + ubifs_delete_orphan(c, inode->i_ino); + finish_reservation(c); spin_lock(&ui->ui_lock); ui->synced_i_size = ui->ui_size; @@ -850,6 +981,13 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink); + if (kill_xattrs && ui->xattr_cnt > ubifs_xattr_max_cnt(c)) { + ubifs_err(c, "Cannot delete inode, it has too much xattrs!"); + err = -EPERM; + ubifs_ro_mode(c, err); + return err; + } + /* * If the inode is being deleted, do not write the attached data. No * need to synchronize the write-buffer either. @@ -881,12 +1019,6 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) struct inode *xino; struct ubifs_dent_node *xent, *pxent = NULL; - if (ui->xattr_cnt > ubifs_xattr_max_cnt(c)) { - err = -EPERM; - ubifs_err(c, "Cannot delete inode, it has too much xattrs!"); - goto out_release; - } - lowest_xent_key(c, &key, inode->i_ino); while (1) { xent = ubifs_tnc_next_ent(c, &key, &nm); @@ -1209,6 +1341,7 @@ out_free: * @new_nm: new name of the new directory entry * @whiteout: whiteout inode * @sync: non-zero if the write-buffer has to be synchronized + * @delete_orphan: indicates an orphan entry deletion for @whiteout * * This function implements the re-name operation which may involve writing up * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) @@ -1221,7 +1354,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *new_dir, const struct inode *new_inode, const struct fscrypt_name *new_nm, - const struct inode *whiteout, int sync) + const struct inode *whiteout, int sync, int delete_orphan) { void *p; union ubifs_key key; @@ -1438,6 +1571,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, goto out_ro; } + if (delete_orphan) + ubifs_delete_orphan(c, whiteout->i_ino); + finish_reservation(c); if (new_inode) { mark_inode_clean(c, new_ui); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 6d6cd85c2b4c..8788740ec57f 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1005,7 +1005,7 @@ out: * @c: the UBIFS file-system description object * @lp: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @lst: lprops statistics to update + * @arg: lprops statistics to update * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -1014,8 +1014,9 @@ out: */ static int scan_check_cb(struct ubifs_info *c, const struct ubifs_lprops *lp, int in_tree, - struct ubifs_lp_stats *lst) + void *arg) { + struct ubifs_lp_stats *lst = arg; struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; @@ -1269,8 +1270,7 @@ int dbg_check_lprops(struct ubifs_info *c) memset(&lst, 0, sizeof(struct ubifs_lp_stats)); err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, - (ubifs_lpt_scan_callback)scan_check_cb, - &lst); + scan_check_cb, &lst); if (err && err != -ENOSPC) goto out; diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 778a22bf9a92..441d0beca4cf 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1918,6 +1918,7 @@ out_err: * @pnode: where to keep a pnode * @cnode: where to keep a cnode * @in_tree: is the node in the tree in memory + * @ptr: union of node pointers * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in * the tree * @ptr.pnode: ditto for pnode diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index c4d079328b92..f2cb214581fd 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -577,7 +577,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, /* Go right */ nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return (void *)nnode; + return ERR_CAST(nnode); /* Go down to level 1 */ while (nnode->level > 1) { @@ -594,7 +594,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, } nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return (void *)nnode; + return ERR_CAST(nnode); } for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) @@ -1646,7 +1646,6 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) len -= node_len; } - err = 0; out: vfree(buf); return err; @@ -1933,7 +1932,6 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); out: vfree(buf); - return; } /** diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 7adc37c10b6a..a148760fa49e 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -67,10 +67,13 @@ static int mst_node_check_hash(const struct ubifs_info *c, { u8 calc[UBIFS_MAX_HASH_LEN]; const void *node = mst; + int ret; - crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch), + ret = crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch), UBIFS_MST_NODE_SZ - sizeof(struct ubifs_ch), calc); + if (ret) + return ret; if (ubifs_check_hash(c, expected, calc)) return -EPERM; diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 4909321d84cf..5555dd740889 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -42,24 +42,30 @@ static int dbg_check_orphans(struct ubifs_info *c); -static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, - struct ubifs_orphan *parent_orphan) +/** + * ubifs_add_orphan - add an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Add an orphan. This function is called when an inodes link count drops to + * zero. + */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) { struct ubifs_orphan *orphan, *o; struct rb_node **p, *parent = NULL; orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); if (!orphan) - return ERR_PTR(-ENOMEM); + return -ENOMEM; orphan->inum = inum; orphan->new = 1; - INIT_LIST_HEAD(&orphan->child_list); spin_lock(&c->orphan_lock); if (c->tot_orphans >= c->max_orphans) { spin_unlock(&c->orphan_lock); kfree(orphan); - return ERR_PTR(-ENFILE); + return -ENFILE; } p = &c->orph_tree.rb_node; while (*p) { @@ -70,10 +76,10 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, else if (inum > o->inum) p = &(*p)->rb_right; else { - ubifs_err(c, "orphaned twice"); + ubifs_err(c, "ino %lu orphaned twice", (unsigned long)inum); spin_unlock(&c->orphan_lock); kfree(orphan); - return ERR_PTR(-EINVAL); + return -EINVAL; } } c->tot_orphans += 1; @@ -83,14 +89,9 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, list_add_tail(&orphan->list, &c->orph_list); list_add_tail(&orphan->new_list, &c->orph_new); - if (parent_orphan) { - list_add_tail(&orphan->child_list, - &parent_orphan->child_list); - } - spin_unlock(&c->orphan_lock); dbg_gen("ino %lu", (unsigned long)inum); - return orphan; + return 0; } static struct ubifs_orphan *lookup_orphan(struct ubifs_info *c, ino_t inum) @@ -135,6 +136,7 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) if (orph->cmt) { orph->del = 1; + rb_erase(&orph->rb, &c->orph_tree); orph->dnext = c->orph_dnext; c->orph_dnext = orph; dbg_gen("delete later ino %lu", (unsigned long)orph->inum); @@ -145,59 +147,6 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) } /** - * ubifs_add_orphan - add an orphan. - * @c: UBIFS file-system description object - * @inum: orphan inode number - * - * Add an orphan. This function is called when an inodes link count drops to - * zero. - */ -int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) -{ - int err = 0; - ino_t xattr_inum; - union ubifs_key key; - struct ubifs_dent_node *xent, *pxent = NULL; - struct fscrypt_name nm = {0}; - struct ubifs_orphan *xattr_orphan; - struct ubifs_orphan *orphan; - - orphan = orphan_add(c, inum, NULL); - if (IS_ERR(orphan)) - return PTR_ERR(orphan); - - lowest_xent_key(c, &key, inum); - while (1) { - xent = ubifs_tnc_next_ent(c, &key, &nm); - if (IS_ERR(xent)) { - err = PTR_ERR(xent); - if (err == -ENOENT) - break; - kfree(pxent); - return err; - } - - fname_name(&nm) = xent->name; - fname_len(&nm) = le16_to_cpu(xent->nlen); - xattr_inum = le64_to_cpu(xent->inum); - - xattr_orphan = orphan_add(c, xattr_inum, orphan); - if (IS_ERR(xattr_orphan)) { - kfree(pxent); - kfree(xent); - return PTR_ERR(xattr_orphan); - } - - kfree(pxent); - pxent = xent; - key_read(c, &xent->key, &key); - } - kfree(pxent); - - return 0; -} - -/** * ubifs_delete_orphan - delete an orphan. * @c: UBIFS file-system description object * @inum: orphan inode number @@ -206,7 +155,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) */ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) { - struct ubifs_orphan *orph, *child_orph, *tmp_o; + struct ubifs_orphan *orph; spin_lock(&c->orphan_lock); @@ -219,11 +168,6 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) return; } - list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) { - list_del(&child_orph->child_list); - orphan_delete(c, child_orph); - } - orphan_delete(c, orph); spin_unlock(&c->orphan_lock); @@ -518,7 +462,6 @@ static void erase_deleted(struct ubifs_info *c) dnext = orphan->dnext; ubifs_assert(c, !orphan->new); ubifs_assert(c, orphan->del); - rb_erase(&orphan->rb, &c->orph_tree); list_del(&orphan->list); c->tot_orphans -= 1; dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum); @@ -571,51 +514,6 @@ int ubifs_clear_orphans(struct ubifs_info *c) } /** - * insert_dead_orphan - insert an orphan. - * @c: UBIFS file-system description object - * @inum: orphan inode number - * - * This function is a helper to the 'do_kill_orphans()' function. The orphan - * must be kept until the next commit, so it is added to the rb-tree and the - * deletion list. - */ -static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) -{ - struct ubifs_orphan *orphan, *o; - struct rb_node **p, *parent = NULL; - - orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); - if (!orphan) - return -ENOMEM; - orphan->inum = inum; - - p = &c->orph_tree.rb_node; - while (*p) { - parent = *p; - o = rb_entry(parent, struct ubifs_orphan, rb); - if (inum < o->inum) - p = &(*p)->rb_left; - else if (inum > o->inum) - p = &(*p)->rb_right; - else { - /* Already added - no problem */ - kfree(orphan); - return 0; - } - } - c->tot_orphans += 1; - rb_link_node(&orphan->rb, parent, p); - rb_insert_color(&orphan->rb, &c->orph_tree); - list_add_tail(&orphan->list, &c->orph_list); - orphan->del = 1; - orphan->dnext = c->orph_dnext; - c->orph_dnext = orphan; - dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, - c->new_orphans, c->tot_orphans); - return 0; -} - -/** * do_kill_orphans - remove orphan inodes from the index. * @c: UBIFS file-system description object * @sleb: scanned LEB @@ -691,12 +589,12 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; for (i = 0; i < n; i++) { - union ubifs_key key1, key2; + union ubifs_key key; inum = le64_to_cpu(orph->inos[i]); - ino_key_init(c, &key1, inum); - err = ubifs_tnc_lookup(c, &key1, ino); + ino_key_init(c, &key, inum); + err = ubifs_tnc_lookup(c, &key, ino); if (err && err != -ENOENT) goto out_free; @@ -708,17 +606,10 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, dbg_rcvry("deleting orphaned inode %lu", (unsigned long)inum); - lowest_ino_key(c, &key1, inum); - highest_ino_key(c, &key2, inum); - - err = ubifs_tnc_remove_range(c, &key1, &key2); + err = ubifs_tnc_remove_ino(c, inum); if (err) goto out_ro; } - - err = insert_dead_orphan(c, inum); - if (err) - goto out_free; } *last_cmt_no = cmt_no; @@ -925,8 +816,12 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, inum = key_inum(c, &zbr->key); if (inum != ci->last_ino) { - /* Lowest node type is the inode node, so it comes first */ - if (key_type(c, &zbr->key) != UBIFS_INO_KEY) + /* + * Lowest node type is the inode node or xattr entry(when + * selinux/encryption is enabled), so it comes first + */ + if (key_type(c, &zbr->key) != UBIFS_INO_KEY && + key_type(c, &zbr->key) != UBIFS_XENT_KEY) ubifs_err(c, "found orphan node ino %lu, type %d", (unsigned long)inum, key_type(c, &zbr->key)); ci->last_ino = inum; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 17da28d6247a..a950c5f2560e 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -29,6 +29,7 @@ * @lnum: logical eraseblock number of the node * @offs: node offset * @len: node length + * @hash: node hash * @deletion: non-zero if this entry corresponds to a node deletion * @sqnum: node sequence number * @list: links the replay list diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 09e270d6ed02..f3e3b2068608 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -19,9 +19,9 @@ #include <linux/module.h> #include <linux/ctype.h> #include <linux/kthread.h> -#include <linux/parser.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/seq_file.h> -#include <linux/mount.h> #include <linux/math64.h> #include <linux/writeback.h> #include "ubifs.h" @@ -773,10 +773,10 @@ static void init_constants_master(struct ubifs_info *c) * necessary to report something for the 'statfs()' call. * * Subtract the LEB reserved for GC, the LEB which is reserved for - * deletions, minimum LEBs for the index, and assume only one journal - * head is available. + * deletions, minimum LEBs for the index, the LEBs which are reserved + * for each journal head. */ - tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1; + tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt; tmp64 *= (long long)c->leb_size - c->leb_overhead; tmp64 = ubifs_reported_space(c, tmp64); c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; @@ -981,177 +981,120 @@ enum { Opt_auth_key, Opt_auth_hash_name, Opt_ignore, - Opt_err, }; -static const match_table_t tokens = { - {Opt_fast_unmount, "fast_unmount"}, - {Opt_norm_unmount, "norm_unmount"}, - {Opt_bulk_read, "bulk_read"}, - {Opt_no_bulk_read, "no_bulk_read"}, - {Opt_chk_data_crc, "chk_data_crc"}, - {Opt_no_chk_data_crc, "no_chk_data_crc"}, - {Opt_override_compr, "compr=%s"}, - {Opt_auth_key, "auth_key=%s"}, - {Opt_auth_hash_name, "auth_hash_name=%s"}, - {Opt_ignore, "ubi=%s"}, - {Opt_ignore, "vol=%s"}, - {Opt_assert, "assert=%s"}, - {Opt_err, NULL}, +static const struct constant_table ubifs_param_compr[] = { + { "none", UBIFS_COMPR_NONE }, + { "lzo", UBIFS_COMPR_LZO }, + { "zlib", UBIFS_COMPR_ZLIB }, + { "zstd", UBIFS_COMPR_ZSTD }, + {} }; -/** - * parse_standard_option - parse a standard mount option. - * @option: the option to parse - * - * Normally, standard mount options like "sync" are passed to file-systems as - * flags. However, when a "rootflags=" kernel boot parameter is used, they may - * be present in the options string. This function tries to deal with this - * situation and parse standard options. Returns 0 if the option was not - * recognized, and the corresponding integer flag if it was. - * - * UBIFS is only interested in the "sync" option, so do not check for anything - * else. - */ -static int parse_standard_option(const char *option) -{ +static const struct constant_table ubifs_param_assert[] = { + { "report", ASSACT_REPORT }, + { "read-only", ASSACT_RO }, + { "panic", ASSACT_PANIC }, + {} +}; - pr_notice("UBIFS: parse %s\n", option); - if (!strcmp(option, "sync")) - return SB_SYNCHRONOUS; - return 0; -} +static const struct fs_parameter_spec ubifs_fs_param_spec[] = { + fsparam_flag ("fast_unmount", Opt_fast_unmount), + fsparam_flag ("norm_unmount", Opt_norm_unmount), + fsparam_flag ("bulk_read", Opt_bulk_read), + fsparam_flag ("no_bulk_read", Opt_no_bulk_read), + fsparam_flag ("chk_data_crc", Opt_chk_data_crc), + fsparam_flag ("no_chk_data_crc", Opt_no_chk_data_crc), + fsparam_enum ("compr", Opt_override_compr, ubifs_param_compr), + fsparam_enum ("assert", Opt_assert, ubifs_param_assert), + fsparam_string ("auth_key", Opt_auth_key), + fsparam_string ("auth_hash_name", Opt_auth_hash_name), + fsparam_string ("ubi", Opt_ignore), + fsparam_string ("vol", Opt_ignore), + {} +}; + +struct ubifs_fs_context { + struct ubifs_mount_opts mount_opts; + char *auth_key_name; + char *auth_hash_name; + unsigned int no_chk_data_crc:1; + unsigned int bulk_read:1; + unsigned int default_compr:2; + unsigned int assert_action:2; +}; /** - * ubifs_parse_options - parse mount parameters. - * @c: UBIFS file-system description object - * @options: parameters to parse - * @is_remount: non-zero if this is FS re-mount + * ubifs_parse_param - parse a parameter. + * @fc: the filesystem context + * @param: the parameter to parse * * This function parses UBIFS mount options and returns zero in case success * and a negative error code in case of failure. */ -static int ubifs_parse_options(struct ubifs_info *c, char *options, - int is_remount) +static int ubifs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - char *p; - substring_t args[MAX_OPT_ARGS]; - - if (!options) - return 0; + struct ubifs_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + bool is_remount = (fc->purpose & FS_CONTEXT_FOR_RECONFIGURE); + int opt; - while ((p = strsep(&options, ","))) { - int token; + opt = fs_parse(fc, ubifs_fs_param_spec, param, &result); + if (opt < 0) + return opt; - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { + switch (opt) { /* * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. * We accept them in order to be backward-compatible. But this * should be removed at some point. */ - case Opt_fast_unmount: - c->mount_opts.unmount_mode = 2; - break; - case Opt_norm_unmount: - c->mount_opts.unmount_mode = 1; - break; - case Opt_bulk_read: - c->mount_opts.bulk_read = 2; - c->bulk_read = 1; - break; - case Opt_no_bulk_read: - c->mount_opts.bulk_read = 1; - c->bulk_read = 0; - break; - case Opt_chk_data_crc: - c->mount_opts.chk_data_crc = 2; - c->no_chk_data_crc = 0; - break; - case Opt_no_chk_data_crc: - c->mount_opts.chk_data_crc = 1; - c->no_chk_data_crc = 1; - break; - case Opt_override_compr: - { - char *name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "none")) - c->mount_opts.compr_type = UBIFS_COMPR_NONE; - else if (!strcmp(name, "lzo")) - c->mount_opts.compr_type = UBIFS_COMPR_LZO; - else if (!strcmp(name, "zlib")) - c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; - else if (!strcmp(name, "zstd")) - c->mount_opts.compr_type = UBIFS_COMPR_ZSTD; - else { - ubifs_err(c, "unknown compressor \"%s\"", name); //FIXME: is c ready? - kfree(name); - return -EINVAL; - } - kfree(name); - c->mount_opts.override_compr = 1; - c->default_compr = c->mount_opts.compr_type; - break; - } - case Opt_assert: - { - char *act = match_strdup(&args[0]); - - if (!act) - return -ENOMEM; - if (!strcmp(act, "report")) - c->assert_action = ASSACT_REPORT; - else if (!strcmp(act, "read-only")) - c->assert_action = ASSACT_RO; - else if (!strcmp(act, "panic")) - c->assert_action = ASSACT_PANIC; - else { - ubifs_err(c, "unknown assert action \"%s\"", act); - kfree(act); - return -EINVAL; - } - kfree(act); - break; - } - case Opt_auth_key: - if (!is_remount) { - c->auth_key_name = kstrdup(args[0].from, - GFP_KERNEL); - if (!c->auth_key_name) - return -ENOMEM; - } - break; - case Opt_auth_hash_name: - if (!is_remount) { - c->auth_hash_name = kstrdup(args[0].from, - GFP_KERNEL); - if (!c->auth_hash_name) - return -ENOMEM; - } - break; - case Opt_ignore: - break; - default: - { - unsigned long flag; - struct super_block *sb = c->vfs_sb; - - flag = parse_standard_option(p); - if (!flag) { - ubifs_err(c, "unrecognized mount option \"%s\" or missing value", - p); - return -EINVAL; - } - sb->s_flags |= flag; - break; + case Opt_fast_unmount: + ctx->mount_opts.unmount_mode = 2; + break; + case Opt_norm_unmount: + ctx->mount_opts.unmount_mode = 1; + break; + case Opt_bulk_read: + ctx->mount_opts.bulk_read = 2; + ctx->bulk_read = 1; + break; + case Opt_no_bulk_read: + ctx->mount_opts.bulk_read = 1; + ctx->bulk_read = 0; + break; + case Opt_chk_data_crc: + ctx->mount_opts.chk_data_crc = 2; + ctx->no_chk_data_crc = 0; + break; + case Opt_no_chk_data_crc: + ctx->mount_opts.chk_data_crc = 1; + ctx->no_chk_data_crc = 1; + break; + case Opt_override_compr: + ctx->mount_opts.compr_type = result.uint_32; + ctx->mount_opts.override_compr = 1; + ctx->default_compr = ctx->mount_opts.compr_type; + break; + case Opt_assert: + ctx->assert_action = result.uint_32; + break; + case Opt_auth_key: + if (!is_remount) { + kfree(ctx->auth_key_name); + ctx->auth_key_name = param->string; + param->string = NULL; } + break; + case Opt_auth_hash_name: + if (!is_remount) { + kfree(ctx->auth_hash_name); + ctx->auth_hash_name = param->string; + param->string = NULL; } + break; + case Opt_ignore: + break; } return 0; @@ -2003,21 +1946,27 @@ static void ubifs_put_super(struct super_block *sb) mutex_unlock(&c->umount_mutex); } -static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) +static int ubifs_reconfigure(struct fs_context *fc) { + struct ubifs_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; int err; struct ubifs_info *c = sb->s_fs_info; sync_filesystem(sb); - dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); + dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, fc->sb_flags); - err = ubifs_parse_options(c, data, 1); - if (err) { - ubifs_err(c, "invalid or unknown remount parameter"); - return err; - } + /* + * Apply the mount option changes. + * auth_key_name and auth_hash_name are ignored on remount. + */ + c->mount_opts = ctx->mount_opts; + c->bulk_read = ctx->bulk_read; + c->no_chk_data_crc = ctx->no_chk_data_crc; + c->default_compr = ctx->default_compr; + c->assert_action = ctx->assert_action; - if (c->ro_mount && !(*flags & SB_RDONLY)) { + if (c->ro_mount && !(fc->sb_flags & SB_RDONLY)) { if (c->ro_error) { ubifs_msg(c, "cannot re-mount R/W due to prior errors"); return -EROFS; @@ -2029,7 +1978,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) err = ubifs_remount_rw(c); if (err) return err; - } else if (!c->ro_mount && (*flags & SB_RDONLY)) { + } else if (!c->ro_mount && (fc->sb_flags & SB_RDONLY)) { if (c->ro_error) { ubifs_msg(c, "cannot re-mount R/O due to prior errors"); return -EROFS; @@ -2062,14 +2011,13 @@ const struct super_operations ubifs_super_operations = { .evict_inode = ubifs_evict_inode, .statfs = ubifs_statfs, .dirty_inode = ubifs_dirty_inode, - .remount_fs = ubifs_remount_fs, .show_options = ubifs_show_options, .sync_fs = ubifs_sync_fs, }; /** * open_ubi - parse UBI device name string and open the UBI device. - * @name: UBI volume name + * @fc: The filesystem context * @mode: UBI volume open mode * * The primary method of mounting UBIFS is by specifying the UBI volume @@ -2086,15 +2034,13 @@ const struct super_operations ubifs_super_operations = { * returns UBI volume description object in case of success and a negative * error code in case of failure. */ -static struct ubi_volume_desc *open_ubi(const char *name, int mode) +static struct ubi_volume_desc *open_ubi(struct fs_context *fc, int mode) { struct ubi_volume_desc *ubi; + const char *name = fc->source; int dev, vol; char *endptr; - if (!name || !*name) - return ERR_PTR(-EINVAL); - /* First, try to open using the device node path method */ ubi = ubi_open_volume_path(name, mode); if (!IS_ERR(ubi)) @@ -2102,14 +2048,14 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) /* Try the "nodev" method */ if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') - return ERR_PTR(-EINVAL); + goto invalid_source; /* ubi:NAME method */ if ((name[3] == ':' || name[3] == '!') && name[4] != '\0') return ubi_open_volume_nm(0, name + 4, mode); if (!isdigit(name[3])) - return ERR_PTR(-EINVAL); + goto invalid_source; dev = simple_strtoul(name + 3, &endptr, 0); @@ -2121,7 +2067,7 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) if (*endptr == '_' && isdigit(endptr[1])) { vol = simple_strtoul(endptr + 1, &endptr, 0); if (*endptr != '\0') - return ERR_PTR(-EINVAL); + goto invalid_source; return ubi_open_volume(dev, vol, mode); } @@ -2129,7 +2075,8 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0') return ubi_open_volume_nm(dev, ++endptr, mode); - return ERR_PTR(-EINVAL); +invalid_source: + return ERR_PTR(invalf(fc, "Invalid source name")); } static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) @@ -2151,6 +2098,8 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) mutex_init(&c->bu_mutex); mutex_init(&c->write_reserve_mutex); init_waitqueue_head(&c->cmt_wq); + init_waitqueue_head(&c->reserve_space_wq); + atomic_set(&c->need_wait_space, 0); c->buds = RB_ROOT; c->old_idx = RB_ROOT; c->size_tree = RB_ROOT; @@ -2179,9 +2128,10 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) return c; } -static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +static int ubifs_fill_super(struct super_block *sb, struct fs_context *fc) { struct ubifs_info *c = sb->s_fs_info; + struct ubifs_fs_context *ctx = fc->fs_private; struct inode *root; int err; @@ -2193,9 +2143,18 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out; } - err = ubifs_parse_options(c, data, 0); - if (err) - goto out_close; + /* Copy in parsed mount options */ + c->mount_opts = ctx->mount_opts; + c->auth_key_name = ctx->auth_key_name; + c->auth_hash_name = ctx->auth_hash_name; + c->no_chk_data_crc = ctx->no_chk_data_crc; + c->bulk_read = ctx->bulk_read; + c->default_compr = ctx->default_compr; + c->assert_action = ctx->assert_action; + + /* ubifs_info owns auth strings now */ + ctx->auth_key_name = NULL; + ctx->auth_hash_name = NULL; /* * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For @@ -2239,13 +2198,16 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out_umount; } + generic_set_sb_d_ops(sb); sb->s_root = d_make_root(root); if (!sb->s_root) { err = -ENOMEM; goto out_umount; } - import_uuid(&sb->s_uuid, c->uuid); + super_set_uuid(sb, c->uuid, sizeof(c->uuid)); + super_set_sysfs_name_generic(sb, UBIFS_DFS_DIR_NAME, + c->vi.ubi_num, c->vi.vol_id); mutex_unlock(&c->umount_mutex); return 0; @@ -2261,41 +2223,38 @@ out: return err; } -static int sb_test(struct super_block *sb, void *data) +static int sb_test(struct super_block *sb, struct fs_context *fc) { - struct ubifs_info *c1 = data; + struct ubifs_info *c1 = fc->s_fs_info; struct ubifs_info *c = sb->s_fs_info; return c->vi.cdev == c1->vi.cdev; } -static int sb_set(struct super_block *sb, void *data) -{ - sb->s_fs_info = data; - return set_anon_super(sb, NULL); -} - -static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, - const char *name, void *data) +static int ubifs_get_tree(struct fs_context *fc) { struct ubi_volume_desc *ubi; struct ubifs_info *c; struct super_block *sb; int err; - dbg_gen("name %s, flags %#x", name, flags); + if (!fc->source || !*fc->source) + return invalf(fc, "No source specified"); + + dbg_gen("name %s, flags %#x", fc->source, fc->sb_flags); /* * Get UBI device number and volume ID. Mount it read-only so far * because this might be a new mount point, and UBI allows only one * read-write user at a time. */ - ubi = open_ubi(name, UBI_READONLY); + ubi = open_ubi(fc, UBI_READONLY); if (IS_ERR(ubi)) { - if (!(flags & SB_SILENT)) + err = PTR_ERR(ubi); + if (!(fc->sb_flags & SB_SILENT)) pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d", - current->pid, name, (int)PTR_ERR(ubi)); - return ERR_CAST(ubi); + current->pid, fc->source, err); + return err; } c = alloc_ubifs_info(ubi); @@ -2303,10 +2262,11 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, err = -ENOMEM; goto out_close; } + fc->s_fs_info = c; dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - sb = sget(fs_type, sb_test, sb_set, flags, c); + sb = sget_fc(fc, sb_test, set_anon_super_fc); if (IS_ERR(sb)) { err = PTR_ERR(sb); kfree(c); @@ -2318,12 +2278,12 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, kfree(c); /* A new mount point for already mounted UBIFS */ dbg_gen("this ubi volume is already mounted"); - if (!!(flags & SB_RDONLY) != c1->ro_mount) { + if (!!(fc->sb_flags & SB_RDONLY) != c1->ro_mount) { err = -EBUSY; goto out_deact; } } else { - err = ubifs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0); + err = ubifs_fill_super(sb, fc); if (err) goto out_deact; /* We do not support atime */ @@ -2337,13 +2297,14 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, /* 'fill_super()' opens ubi again so we must close it here */ ubi_close_volume(ubi); - return dget(sb->s_root); + fc->root = dget(sb->s_root); + return 0; out_deact: deactivate_locked_super(sb); out_close: ubi_close_volume(ubi); - return ERR_PTR(err); + return err; } static void kill_ubifs_super(struct super_block *s) @@ -2353,10 +2314,61 @@ static void kill_ubifs_super(struct super_block *s) kfree(c); } +static void ubifs_free_fc(struct fs_context *fc) +{ + struct ubifs_fs_context *ctx = fc->fs_private; + + if (ctx) { + kfree(ctx->auth_key_name); + kfree(ctx->auth_hash_name); + kfree(ctx); + } +} + +static const struct fs_context_operations ubifs_context_ops = { + .free = ubifs_free_fc, + .parse_param = ubifs_parse_param, + .get_tree = ubifs_get_tree, + .reconfigure = ubifs_reconfigure, +}; + +static int ubifs_init_fs_context(struct fs_context *fc) +{ + struct ubifs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct ubifs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + if (fc->purpose != FS_CONTEXT_FOR_RECONFIGURE) { + /* Iniitialize for first mount */ + ctx->no_chk_data_crc = 1; + ctx->assert_action = ASSACT_RO; + } else { + struct ubifs_info *c = fc->root->d_sb->s_fs_info; + + /* + * Preserve existing options across remounts. + * auth_key_name and auth_hash_name are not remountable. + */ + ctx->mount_opts = c->mount_opts; + ctx->bulk_read = c->bulk_read; + ctx->no_chk_data_crc = c->no_chk_data_crc; + ctx->default_compr = c->default_compr; + ctx->assert_action = c->assert_action; + } + + fc->ops = &ubifs_context_ops; + fc->fs_private = ctx; + + return 0; +} + static struct file_system_type ubifs_fs_type = { .name = "ubifs", .owner = THIS_MODULE, - .mount = ubifs_mount, + .init_fs_context = ubifs_init_fs_context, + .parameters = ubifs_fs_param_spec, .kill_sb = kill_ubifs_super, }; MODULE_ALIAS_FS("ubifs"); @@ -2433,8 +2445,8 @@ static int __init ubifs_init(void) ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", sizeof(struct ubifs_inode), 0, - SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT | - SLAB_ACCOUNT, &inode_slab_ctor); + SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, + &inode_slab_ctor); if (!ubifs_inode_slab) return -ENOMEM; diff --git a/fs/ubifs/sysfs.c b/fs/ubifs/sysfs.c index 1c958148bb87..aae32222f11b 100644 --- a/fs/ubifs/sysfs.c +++ b/fs/ubifs/sysfs.c @@ -91,17 +91,17 @@ static struct kset ubifs_kset = { int ubifs_sysfs_register(struct ubifs_info *c) { int ret, n; - char dfs_dir_name[UBIFS_DFS_DIR_LEN+1]; + char dfs_dir_name[UBIFS_DFS_DIR_LEN]; c->stats = kzalloc(sizeof(struct ubifs_stats_info), GFP_KERNEL); if (!c->stats) { ret = -ENOMEM; goto out_last; } - n = snprintf(dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + n = snprintf(dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); - if (n > UBIFS_DFS_DIR_LEN) { + if (n >= UBIFS_DFS_DIR_LEN) { /* The array size is too small */ ret = -EINVAL; goto out_free; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index f4728e65d1bd..33946b518148 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2930,8 +2930,6 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) dbg_tnc("xent '%s', ino %lu", xent->name, (unsigned long)xattr_inum); - ubifs_evict_xattr_inode(c, xattr_inum); - fname_name(&nm) = xent->name; fname_len(&nm) = le16_to_cpu(xent->nlen); err = ubifs_tnc_remove_nm(c, &key1, &nm); @@ -3116,14 +3114,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) void ubifs_tnc_close(struct ubifs_info *c) { tnc_destroy_cnext(c); - if (c->zroot.znode) { - long n, freed; - - n = atomic_long_read(&c->clean_zn_cnt); - freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode); - ubifs_assert(c, freed == n); - atomic_long_sub(n, &ubifs_clean_zn_cnt); - } + ubifs_destroy_tnc_tree(c); kfree(c->gap_lebs); kfree(c->ilebs); destroy_old_idx(c); diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index a55e04822d16..7c43e0ccf6d4 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -657,6 +657,8 @@ static int get_znodes_to_commit(struct ubifs_info *c) znode->alt = 0; cnext = find_next_dirty(znode); if (!cnext) { + ubifs_assert(c, !znode->parent); + znode->cparent = NULL; znode->cnext = c->cnext; break; } diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index 4d686e34e64d..d3f8a6aa1f49 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -251,6 +251,28 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c, } /** + * ubifs_destroy_tnc_tree - destroy all znodes connected to the TNC tree. + * @c: UBIFS file-system description object + * + * This function destroys the whole TNC tree and updates clean global znode + * count. + */ +void ubifs_destroy_tnc_tree(struct ubifs_info *c) +{ + long n, freed; + + if (!c->zroot.znode) + return; + + n = atomic_long_read(&c->clean_zn_cnt); + freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode); + ubifs_assert(c, freed == n); + atomic_long_sub(n, &ubifs_clean_zn_cnt); + + c->zroot.znode = NULL; +} + +/** * read_znode - read an indexing node from flash and fill znode. * @c: UBIFS file-system description object * @zzbr: the zbranch describing the node to read diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 3916dc4f30ca..3375bbe0508c 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -158,13 +158,6 @@ #endif /* - * The UBIFS sysfs directory name pattern and maximum name length (3 for "ubi" - * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. - */ -#define UBIFS_DFS_DIR_NAME "ubi%d_%d" -#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) - -/* * Lockdep classes for UBIFS inode @ui_mutex. */ enum { @@ -923,8 +916,6 @@ struct ubifs_budget_req { * @rb: rb-tree node of rb-tree of orphans sorted by inode number * @list: list head of list of orphans in order added * @new_list: list head of list of orphans added since the last commit - * @child_list: list of xattr children if this orphan hosts xattrs, list head - * if this orphan is a xattr, not used otherwise. * @cnext: next orphan to commit * @dnext: next orphan to delete * @inum: inode number @@ -936,7 +927,6 @@ struct ubifs_orphan { struct rb_node rb; struct list_head list; struct list_head new_list; - struct list_head child_list; struct ubifs_orphan *cnext; struct ubifs_orphan *dnext; ino_t inum; @@ -1047,6 +1037,8 @@ struct ubifs_debug_info; * @bg_bud_bytes: number of bud bytes when background commit is initiated * @old_buds: buds to be released after commit ends * @max_bud_cnt: maximum number of buds + * @need_wait_space: Non %0 means space reservation tasks need to wait in queue + * @reserve_space_wq: wait queue to sleep on if @need_wait_space is not %0 * * @commit_sem: synchronizes committer with other processes * @cmt_state: commit state @@ -1305,6 +1297,8 @@ struct ubifs_info { long long bg_bud_bytes; struct list_head old_buds; int max_bud_cnt; + atomic_t need_wait_space; + wait_queue_head_t reserve_space_wq; struct rw_semaphore commit_sem; int cmt_state; @@ -1799,7 +1793,7 @@ int ubifs_consolidate_log(struct ubifs_info *c); /* journal.c */ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, const struct fscrypt_name *nm, const struct inode *inode, - int deletion, int xent); + int deletion, int xent, int in_orphan); int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, const union ubifs_key *key, const void *buf, int len); int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); @@ -1816,7 +1810,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *new_dir, const struct inode *new_inode, const struct fscrypt_name *new_nm, - const struct inode *whiteout, int sync); + const struct inode *whiteout, int sync, int delete_orphan); int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, loff_t old_size, loff_t new_size); int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, @@ -1903,6 +1897,7 @@ struct ubifs_znode *ubifs_tnc_postorder_next(const struct ubifs_info *c, struct ubifs_znode *znode); long ubifs_destroy_tnc_subtree(const struct ubifs_info *c, struct ubifs_znode *zr); +void ubifs_destroy_tnc_tree(struct ubifs_info *c); struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr, struct ubifs_znode *parent, int iip); @@ -2045,13 +2040,10 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, #ifdef CONFIG_UBIFS_FS_XATTR extern const struct xattr_handler * const ubifs_xattr_handlers[]; ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); -void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum); int ubifs_purge_xattrs(struct inode *host); #else #define ubifs_listxattr NULL #define ubifs_xattr_handlers NULL -static inline void ubifs_evict_xattr_inode(struct ubifs_info *c, - ino_t xattr_inum) { } static inline int ubifs_purge_xattrs(struct inode *host) { return 0; diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 0847db521984..c21a0c2b3e90 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -48,19 +48,6 @@ #include <linux/slab.h> #include <linux/xattr.h> -/* - * Extended attribute type constants. - * - * USER_XATTR: user extended attribute ("user.*") - * TRUSTED_XATTR: trusted extended attribute ("trusted.*) - * SECURITY_XATTR: security extended attribute ("security.*") - */ -enum { - USER_XATTR, - TRUSTED_XATTR, - SECURITY_XATTR, -}; - static const struct inode_operations empty_iops; static const struct file_operations empty_fops; @@ -149,7 +136,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, if (strcmp(fname_name(nm), UBIFS_XATTR_NAME_ENCRYPTION_CONTEXT) == 0) host_ui->flags |= UBIFS_CRYPT_FL; - err = ubifs_jnl_update(c, host, nm, inode, 0, 1); + err = ubifs_jnl_update(c, host, nm, inode, 0, 1, 0); if (err) goto out_cancel; ubifs_set_inode_flags(host); @@ -532,8 +519,6 @@ int ubifs_purge_xattrs(struct inode *host) ubifs_err(c, "dead directory entry '%s', error %d", xent->name, err); ubifs_ro_mode(c, err); - kfree(pxent); - kfree(xent); goto out_err; } @@ -541,16 +526,12 @@ int ubifs_purge_xattrs(struct inode *host) clear_nlink(xino); err = remove_xattr(c, host, xino, &nm); + iput(xino); if (err) { - kfree(pxent); - kfree(xent); - iput(xino); ubifs_err(c, "cannot remove xattr, error %d", err); goto out_err; } - iput(xino); - kfree(pxent); pxent = xent; key_read(c, &xent->key, &key); @@ -566,32 +547,12 @@ int ubifs_purge_xattrs(struct inode *host) return 0; out_err: + kfree(pxent); + kfree(xent); up_write(&ubifs_inode(host)->xattr_sem); return err; } -/** - * ubifs_evict_xattr_inode - Evict an xattr inode. - * @c: UBIFS file-system description object - * @xattr_inum: xattr inode number - * - * When an inode that hosts xattrs is being removed we have to make sure - * that cached inodes of the xattrs also get removed from the inode cache - * otherwise we'd waste memory. This function looks up an inode from the - * inode cache and clears the link counter such that iput() will evict - * the inode. - */ -void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum) -{ - struct inode *inode; - - inode = ilookup(c->vfs_sb, xattr_inum); - if (inode) { - clear_nlink(inode); - iput(inode); - } -} - static int ubifs_xattr_remove(struct inode *host, const char *name) { struct inode *inode; |