diff options
Diffstat (limited to 'fs/ubifs')
| -rw-r--r-- | fs/ubifs/Makefile | 2 | ||||
| -rw-r--r-- | fs/ubifs/auth.c | 33 | ||||
| -rw-r--r-- | fs/ubifs/budget.c | 16 | ||||
| -rw-r--r-- | fs/ubifs/commit.c | 23 | ||||
| -rw-r--r-- | fs/ubifs/compress.c | 246 | ||||
| -rw-r--r-- | fs/ubifs/crypto.c | 19 | ||||
| -rw-r--r-- | fs/ubifs/debug.c | 177 | ||||
| -rw-r--r-- | fs/ubifs/debug.h | 12 | ||||
| -rw-r--r-- | fs/ubifs/dir.c | 559 | ||||
| -rw-r--r-- | fs/ubifs/file.c | 713 | ||||
| -rw-r--r-- | fs/ubifs/find.c | 40 | ||||
| -rw-r--r-- | fs/ubifs/gc.c | 30 | ||||
| -rw-r--r-- | fs/ubifs/io.c | 129 | ||||
| -rw-r--r-- | fs/ubifs/ioctl.c | 89 | ||||
| -rw-r--r-- | fs/ubifs/journal.c | 329 | ||||
| -rw-r--r-- | fs/ubifs/lprops.c | 12 | ||||
| -rw-r--r-- | fs/ubifs/lpt.c | 19 | ||||
| -rw-r--r-- | fs/ubifs/lpt_commit.c | 20 | ||||
| -rw-r--r-- | fs/ubifs/master.c | 11 | ||||
| -rw-r--r-- | fs/ubifs/misc.h | 2 | ||||
| -rw-r--r-- | fs/ubifs/orphan.c | 161 | ||||
| -rw-r--r-- | fs/ubifs/recovery.c | 10 | ||||
| -rw-r--r-- | fs/ubifs/replay.c | 27 | ||||
| -rw-r--r-- | fs/ubifs/sb.c | 10 | ||||
| -rw-r--r-- | fs/ubifs/scan.c | 4 | ||||
| -rw-r--r-- | fs/ubifs/super.c | 561 | ||||
| -rw-r--r-- | fs/ubifs/sysfs.c | 156 | ||||
| -rw-r--r-- | fs/ubifs/tnc.c | 186 | ||||
| -rw-r--r-- | fs/ubifs/tnc_commit.c | 6 | ||||
| -rw-r--r-- | fs/ubifs/tnc_misc.c | 42 | ||||
| -rw-r--r-- | fs/ubifs/ubifs.h | 113 | ||||
| -rw-r--r-- | fs/ubifs/xattr.c | 106 |
32 files changed, 2281 insertions, 1582 deletions
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile index 5c4b845754a7..314c80b24a76 100644 --- a/fs/ubifs/Makefile +++ b/fs/ubifs/Makefile @@ -5,7 +5,7 @@ ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o debug.o -ubifs-y += misc.o +ubifs-y += misc.o sysfs.o ubifs-$(CONFIG_FS_ENCRYPTION) += crypto.o ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o ubifs-$(CONFIG_UBIFS_FS_AUTHENTICATION) += auth.o diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index cc5c0abfd536..a4a0158f712d 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -9,18 +9,16 @@ * This file implements various helper functions for UBIFS authentication support */ -#include <linux/crypto.h> #include <linux/verification.h> #include <crypto/hash.h> -#include <crypto/sha.h> -#include <crypto/algapi.h> +#include <crypto/utils.h> #include <keys/user-type.h> #include <keys/asymmetric-type.h> #include "ubifs.h" /** - * ubifs_node_calc_hash - calculate the hash of a UBIFS node + * __ubifs_node_calc_hash - calculate the hash of a UBIFS node * @c: UBIFS file-system description object * @node: the node to calculate a hash for * @hash: the returned hash @@ -54,7 +52,7 @@ static int ubifs_hash_calc_hmac(const struct ubifs_info *c, const u8 *hash, * ubifs_prepare_auth_node - Prepare an authentication node * @c: UBIFS file-system description object * @node: the node to calculate a hash for - * @hash: input hash of previous nodes + * @inhash: input hash of previous nodes * * This function prepares an authentication node for writing onto flash. * It creates a HMAC from the given input hash and writes it to the node. @@ -328,7 +326,7 @@ int ubifs_init_authentication(struct ubifs_info *c) ubifs_err(c, "hmac %s is bigger than maximum allowed hmac size (%d > %d)", hmac_name, c->hmac_desc_len, UBIFS_HMAC_ARR_SZ); err = -EINVAL; - goto out_free_hash; + goto out_free_hmac; } err = crypto_shash_setkey(c->hmac_tfm, ukp->data, ukp->datalen); @@ -338,8 +336,10 @@ int ubifs_init_authentication(struct ubifs_info *c) c->authenticated = true; c->log_hash = ubifs_hash_get_desc(c); - if (IS_ERR(c->log_hash)) + if (IS_ERR(c->log_hash)) { + err = PTR_ERR(c->log_hash); goto out_free_hmac; + } err = 0; @@ -507,28 +507,13 @@ out: */ int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac) { - SHASH_DESC_ON_STACK(shash, c->hmac_tfm); - int err; const char well_known_message[] = "UBIFS"; if (!ubifs_authenticated(c)) return 0; - shash->tfm = c->hmac_tfm; - - err = crypto_shash_init(shash); - if (err) - return err; - - err = crypto_shash_update(shash, well_known_message, - sizeof(well_known_message) - 1); - if (err < 0) - return err; - - err = crypto_shash_final(shash, hmac); - if (err) - return err; - return 0; + return crypto_shash_tfm_digest(c->hmac_tfm, well_known_message, + sizeof(well_known_message) - 1, hmac); } /* diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index c0b84e960b20..d76eb7b39f56 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -65,7 +65,7 @@ static void shrink_liability(struct ubifs_info *c, int nr_to_write) */ static int run_gc(struct ubifs_info *c) { - int err, lnum; + int lnum; /* Make some free space by garbage-collecting dirty space */ down_read(&c->commit_sem); @@ -76,10 +76,7 @@ static int run_gc(struct ubifs_info *c) /* GC freed one LEB, return it to lprops */ dbg_budg("GC freed LEB %d", lnum); - err = ubifs_return_leb(c, lnum); - if (err) - return err; - return 0; + return ubifs_return_leb(c, lnum); } /** @@ -212,11 +209,10 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) subtract_lebs += 1; /* - * The GC journal head LEB is not really accessible. And since - * different write types go to different heads, we may count only on - * one head's space. + * Since different write types go to different heads, we should + * reserve one leb for each head. */ - subtract_lebs += c->jhead_cnt - 1; + subtract_lebs += c->jhead_cnt; /* We also reserve one LEB for deletions, which bypass budgeting */ subtract_lebs += 1; @@ -403,7 +399,7 @@ static int calc_dd_growth(const struct ubifs_info *c, dd_growth = req->dirtied_page ? c->bi.page_budget : 0; if (req->dirtied_ino) - dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); + dd_growth += c->bi.inode_budget * req->dirtied_ino; if (req->mod_dent) dd_growth += c->bi.dent_budget; dd_growth += req->dirtied_ino_d; diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index ad292c5a43a9..5b3a840098b0 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -70,18 +70,29 @@ static int nothing_to_commit(struct ubifs_info *c) return 0; /* + * Increasing @c->dirty_pn_cnt/@c->dirty_nn_cnt and marking + * nnodes/pnodes as dirty in run_gc() could race with following + * checking, which leads inconsistent states between @c->nroot + * and @c->dirty_pn_cnt/@c->dirty_nn_cnt, holding @c->lp_mutex + * to avoid that. + */ + mutex_lock(&c->lp_mutex); + /* * Even though the TNC is clean, the LPT tree may have dirty nodes. For * example, this may happen if the budgeting subsystem invoked GC to * make some free space, and the GC found an LEB with only dirty and * free space. In this case GC would just change the lprops of this * LEB (by turning all space into free space) and unmap it. */ - if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags)) + if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags)) { + mutex_unlock(&c->lp_mutex); return 0; + } ubifs_assert(c, atomic_long_read(&c->dirty_zn_cnt) == 0); ubifs_assert(c, c->dirty_pn_cnt == 0); ubifs_assert(c, c->dirty_nn_cnt == 0); + mutex_unlock(&c->lp_mutex); return 1; } @@ -552,11 +563,11 @@ out: */ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) { - int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; + int lnum, offs, len, err = 0, last_level, child_cnt; int first = 1, iip; struct ubifs_debug_info *d = c->dbg; - union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key; - unsigned long long uninitialized_var(last_sqnum); + union ubifs_key lower_key, upper_key, l_key, u_key; + unsigned long long last_sqnum; struct ubifs_idx_node *idx; struct list_head list; struct idx_node *i; @@ -701,13 +712,13 @@ out: out_dump: ubifs_err(c, "dumping index node (iip=%d)", i->iip); - ubifs_dump_node(c, idx); + ubifs_dump_node(c, idx, ubifs_idx_node_sz(c, c->fanout)); list_del(&i->list); kfree(i); if (!list_empty(&list)) { i = list_entry(list.prev, struct idx_node, list); ubifs_err(c, "dumping parent index node"); - ubifs_dump_node(c, &i->idx); + ubifs_dump_node(c, &i->idx, ubifs_idx_node_sz(c, c->fanout)); } out_free: while (!list_empty(&list)) { diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 3a92e6af69b2..059a02691edd 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -15,9 +15,15 @@ * decompression. */ -#include <linux/crypto.h> +#include <crypto/acompress.h> +#include <linux/highmem.h> #include "ubifs.h" +union ubifs_in_ptr { + const void *buf; + struct folio *folio; +}; + /* Fake description object for the "none" compressor */ static struct ubifs_compressor none_compr = { .compr_type = UBIFS_COMPR_NONE, @@ -26,11 +32,8 @@ static struct ubifs_compressor none_compr = { }; #ifdef CONFIG_UBIFS_FS_LZO -static DEFINE_MUTEX(lzo_mutex); - static struct ubifs_compressor lzo_compr = { .compr_type = UBIFS_COMPR_LZO, - .comp_mutex = &lzo_mutex, .name = "lzo", .capi_name = "lzo", }; @@ -42,13 +45,8 @@ static struct ubifs_compressor lzo_compr = { #endif #ifdef CONFIG_UBIFS_FS_ZLIB -static DEFINE_MUTEX(deflate_mutex); -static DEFINE_MUTEX(inflate_mutex); - static struct ubifs_compressor zlib_compr = { .compr_type = UBIFS_COMPR_ZLIB, - .comp_mutex = &deflate_mutex, - .decomp_mutex = &inflate_mutex, .name = "zlib", .capi_name = "deflate", }; @@ -60,13 +58,8 @@ static struct ubifs_compressor zlib_compr = { #endif #ifdef CONFIG_UBIFS_FS_ZSTD -static DEFINE_MUTEX(zstd_enc_mutex); -static DEFINE_MUTEX(zstd_dec_mutex); - static struct ubifs_compressor zstd_compr = { .compr_type = UBIFS_COMPR_ZSTD, - .comp_mutex = &zstd_enc_mutex, - .decomp_mutex = &zstd_dec_mutex, .name = "zstd", .capi_name = "zstd", }; @@ -80,8 +73,66 @@ static struct ubifs_compressor zstd_compr = { /* All UBIFS compressors */ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; +static void ubifs_compress_common(int *compr_type, union ubifs_in_ptr in_ptr, + size_t in_offset, int in_len, bool in_folio, + void *out_buf, int *out_len) +{ + struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; + int dlen = *out_len; + int err; + + if (*compr_type == UBIFS_COMPR_NONE) + goto no_compr; + + /* If the input data is small, do not even try to compress it */ + if (in_len < UBIFS_MIN_COMPR_LEN) + goto no_compr; + + dlen = min(dlen, in_len - UBIFS_MIN_COMPRESS_DIFF); + + do { + ACOMP_REQUEST_ON_STACK(req, compr->cc); + DECLARE_CRYPTO_WAIT(wait); + + acomp_request_set_callback(req, 0, NULL, NULL); + if (in_folio) + acomp_request_set_src_folio(req, in_ptr.folio, + in_offset, in_len); + else + acomp_request_set_src_dma(req, in_ptr.buf, in_len); + acomp_request_set_dst_dma(req, out_buf, dlen); + err = crypto_acomp_compress(req); + dlen = req->dlen; + if (err != -EAGAIN) + break; + + req = ACOMP_REQUEST_CLONE(req, GFP_NOFS | __GFP_NOWARN); + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + err = crypto_acomp_compress(req); + err = crypto_wait_req(err, &wait); + dlen = req->dlen; + acomp_request_free(req); + } while (0); + + *out_len = dlen; + if (err) + goto no_compr; + + return; + +no_compr: + if (in_folio) + memcpy_from_folio(out_buf, in_ptr.folio, in_offset, in_len); + else + memcpy(out_buf, in_ptr.buf, in_len); + *out_len = in_len; + *compr_type = UBIFS_COMPR_NONE; +} + /** * ubifs_compress - compress data. + * @c: UBIFS file-system description object * @in_buf: data to compress * @in_len: length of the data to compress * @out_buf: output buffer where compressed data should be stored @@ -101,60 +152,51 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; void ubifs_compress(const struct ubifs_info *c, const void *in_buf, int in_len, void *out_buf, int *out_len, int *compr_type) { - int err; - struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; - - if (*compr_type == UBIFS_COMPR_NONE) - goto no_compr; - - /* If the input data is small, do not even try to compress it */ - if (in_len < UBIFS_MIN_COMPR_LEN) - goto no_compr; - - if (compr->comp_mutex) - mutex_lock(compr->comp_mutex); - err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, - (unsigned int *)out_len); - if (compr->comp_mutex) - mutex_unlock(compr->comp_mutex); - if (unlikely(err)) { - ubifs_warn(c, "cannot compress %d bytes, compressor %s, error %d, leave data uncompressed", - in_len, compr->name, err); - goto no_compr; - } - - /* - * If the data compressed only slightly, it is better to leave it - * uncompressed to improve read speed. - */ - if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF) - goto no_compr; + union ubifs_in_ptr in_ptr = { .buf = in_buf }; - return; - -no_compr: - memcpy(out_buf, in_buf, in_len); - *out_len = in_len; - *compr_type = UBIFS_COMPR_NONE; + ubifs_compress_common(compr_type, in_ptr, 0, in_len, false, + out_buf, out_len); } /** - * ubifs_decompress - decompress data. - * @in_buf: data to decompress - * @in_len: length of the data to decompress - * @out_buf: output buffer where decompressed data should - * @out_len: output length is returned here - * @compr_type: type of compression + * ubifs_compress_folio - compress folio. + * @c: UBIFS file-system description object + * @in_folio: data to compress + * @in_offset: offset into @in_folio + * @in_len: length of the data to compress + * @out_buf: output buffer where compressed data should be stored + * @out_len: output buffer length is returned here + * @compr_type: type of compression to use on enter, actually used compression + * type on exit * - * This function decompresses data from buffer @in_buf into buffer @out_buf. - * The length of the uncompressed data is returned in @out_len. This functions - * returns %0 on success or a negative error code on failure. + * This function compresses input folio @in_folio of length @in_len and + * stores the result in the output buffer @out_buf and the resulting length + * in @out_len. If the input buffer does not compress, it is just copied + * to the @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE + * or if compression error occurred. + * + * Note, if the input buffer was not compressed, it is copied to the output + * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. */ -int ubifs_decompress(const struct ubifs_info *c, const void *in_buf, - int in_len, void *out_buf, int *out_len, int compr_type) +void ubifs_compress_folio(const struct ubifs_info *c, struct folio *in_folio, + size_t in_offset, int in_len, void *out_buf, + int *out_len, int *compr_type) +{ + union ubifs_in_ptr in_ptr = { .folio = in_folio }; + + ubifs_compress_common(compr_type, in_ptr, in_offset, in_len, true, + out_buf, out_len); +} + +static int ubifs_decompress_common(const struct ubifs_info *c, + const void *in_buf, int in_len, + void *out_ptr, size_t out_offset, + int *out_len, bool out_folio, + int compr_type) { - int err; struct ubifs_compressor *compr; + int dlen = *out_len; + int err; if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { ubifs_err(c, "invalid compression type %d", compr_type); @@ -169,17 +211,39 @@ int ubifs_decompress(const struct ubifs_info *c, const void *in_buf, } if (compr_type == UBIFS_COMPR_NONE) { - memcpy(out_buf, in_buf, in_len); + if (out_folio) + memcpy_to_folio(out_ptr, out_offset, in_buf, in_len); + else + memcpy(out_ptr, in_buf, in_len); *out_len = in_len; return 0; } - if (compr->decomp_mutex) - mutex_lock(compr->decomp_mutex); - err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, - (unsigned int *)out_len); - if (compr->decomp_mutex) - mutex_unlock(compr->decomp_mutex); + do { + ACOMP_REQUEST_ON_STACK(req, compr->cc); + DECLARE_CRYPTO_WAIT(wait); + + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + acomp_request_set_src_dma(req, in_buf, in_len); + if (out_folio) + acomp_request_set_dst_folio(req, out_ptr, out_offset, + dlen); + else + acomp_request_set_dst_dma(req, out_ptr, dlen); + err = crypto_acomp_decompress(req); + dlen = req->dlen; + if (err != -EAGAIN) + break; + + req = ACOMP_REQUEST_CLONE(req, GFP_NOFS | __GFP_NOWARN); + err = crypto_acomp_decompress(req); + err = crypto_wait_req(err, &wait); + dlen = req->dlen; + acomp_request_free(req); + } while (0); + + *out_len = dlen; if (err) ubifs_err(c, "cannot decompress %d bytes, compressor %s, error %d", in_len, compr->name, err); @@ -188,6 +252,49 @@ int ubifs_decompress(const struct ubifs_info *c, const void *in_buf, } /** + * ubifs_decompress - decompress data. + * @c: UBIFS file-system description object + * @in_buf: data to decompress + * @in_len: length of the data to decompress + * @out_buf: output buffer where decompressed data should + * @out_len: output length is returned here + * @compr_type: type of compression + * + * This function decompresses data from buffer @in_buf into buffer @out_buf. + * The length of the uncompressed data is returned in @out_len. This functions + * returns %0 on success or a negative error code on failure. + */ +int ubifs_decompress(const struct ubifs_info *c, const void *in_buf, + int in_len, void *out_buf, int *out_len, int compr_type) +{ + return ubifs_decompress_common(c, in_buf, in_len, out_buf, 0, out_len, + false, compr_type); +} + +/** + * ubifs_decompress_folio - decompress folio. + * @c: UBIFS file-system description object + * @in_buf: data to decompress + * @in_len: length of the data to decompress + * @out_folio: output folio where decompressed data should + * @out_offset: offset into @out_folio + * @out_len: output length is returned here + * @compr_type: type of compression + * + * This function decompresses data from buffer @in_buf into folio + * @out_folio. The length of the uncompressed data is returned in + * @out_len. This functions returns %0 on success or a negative error + * code on failure. + */ +int ubifs_decompress_folio(const struct ubifs_info *c, const void *in_buf, + int in_len, struct folio *out_folio, + size_t out_offset, int *out_len, int compr_type) +{ + return ubifs_decompress_common(c, in_buf, in_len, out_folio, + out_offset, out_len, true, compr_type); +} + +/** * compr_init - initialize a compressor. * @compr: compressor description object * @@ -197,7 +304,7 @@ int ubifs_decompress(const struct ubifs_info *c, const void *in_buf, static int __init compr_init(struct ubifs_compressor *compr) { if (compr->capi_name) { - compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); + compr->cc = crypto_alloc_acomp(compr->capi_name, 0, 0); if (IS_ERR(compr->cc)) { pr_err("UBIFS error (pid %d): cannot initialize compressor %s, error %ld", current->pid, compr->name, PTR_ERR(compr->cc)); @@ -216,8 +323,7 @@ static int __init compr_init(struct ubifs_compressor *compr) static void compr_exit(struct ubifs_compressor *compr) { if (compr->capi_name) - crypto_free_comp(compr->cc); - return; + crypto_free_acomp(compr->cc); } /** diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 22be7aeb96c4..0b14d004a095 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -24,6 +24,17 @@ static bool ubifs_crypt_empty_dir(struct inode *inode) return ubifs_check_dir_empty(inode) == 0; } +/** + * ubifs_encrypt - Encrypt data. + * @inode: inode which refers to the data node + * @dn: data node to encrypt + * @in_len: length of data to be compressed + * @out_len: allocated memory size for the data area of @dn + * @block: logical block number of the block + * + * This function encrypt a possibly-compressed data in the data node. + * The encrypted data length will store in @out_len. + */ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, unsigned int in_len, unsigned int *out_len, int block) { @@ -40,7 +51,7 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, memset(p + in_len, 0, pad_len - in_len); err = fscrypt_encrypt_block_inplace(inode, virt_to_page(p), pad_len, - offset_in_page(p), block, GFP_NOFS); + offset_in_page(p), block); if (err) { ubifs_err(c, "fscrypt_encrypt_block_inplace() failed: %d", err); return err; @@ -77,10 +88,10 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn, } const struct fscrypt_operations ubifs_crypt_operations = { - .flags = FS_CFLG_OWN_PAGES, - .key_prefix = "ubifs:", + .inode_info_offs = (int)offsetof(struct ubifs_inode, i_crypt_info) - + (int)offsetof(struct ubifs_inode, vfs_inode), + .legacy_key_prefix = "ubifs:", .get_context = ubifs_crypt_get_context, .set_context = ubifs_crypt_set_context, .empty_dir = ubifs_crypt_empty_dir, - .max_namelen = UBIFS_MAX_NLEN, }; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 31288d8fa2ce..b01f382ce8db 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -237,14 +237,14 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) pr_err("\tuid %u\n", (unsigned int)i_uid_read(inode)); pr_err("\tgid %u\n", (unsigned int)i_gid_read(inode)); pr_err("\tatime %u.%u\n", - (unsigned int)inode->i_atime.tv_sec, - (unsigned int)inode->i_atime.tv_nsec); + (unsigned int) inode_get_atime_sec(inode), + (unsigned int) inode_get_atime_nsec(inode)); pr_err("\tmtime %u.%u\n", - (unsigned int)inode->i_mtime.tv_sec, - (unsigned int)inode->i_mtime.tv_nsec); + (unsigned int) inode_get_mtime_sec(inode), + (unsigned int) inode_get_mtime_nsec(inode)); pr_err("\tctime %u.%u\n", - (unsigned int)inode->i_ctime.tv_sec, - (unsigned int)inode->i_ctime.tv_nsec); + (unsigned int) inode_get_ctime_sec(inode), + (unsigned int) inode_get_ctime_nsec(inode)); pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum); pr_err("\txattr_size %u\n", ui->xattr_size); pr_err("\txattr_cnt %u\n", ui->xattr_cnt); @@ -291,9 +291,9 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) kfree(pdent); } -void ubifs_dump_node(const struct ubifs_info *c, const void *node) +void ubifs_dump_node(const struct ubifs_info *c, const void *node, int node_len) { - int i, n; + int i, n, type, safe_len, max_node_len, min_node_len; union ubifs_key key; const struct ubifs_ch *ch = node; char key_buf[DBG_KEY_BUF_LEN]; @@ -306,10 +306,40 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) return; } + /* Skip dumping unknown type node */ + type = ch->node_type; + if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { + pr_err("node type %d was not recognized\n", type); + return; + } + spin_lock(&dbg_lock); dump_ch(node); - switch (ch->node_type) { + if (c->ranges[type].max_len == 0) { + max_node_len = min_node_len = c->ranges[type].len; + } else { + max_node_len = c->ranges[type].max_len; + min_node_len = c->ranges[type].min_len; + } + safe_len = le32_to_cpu(ch->len); + safe_len = safe_len > 0 ? safe_len : 0; + safe_len = min3(safe_len, max_node_len, node_len); + if (safe_len < min_node_len) { + pr_err("node len(%d) is too short for %s, left %d bytes:\n", + safe_len, dbg_ntype(type), + safe_len > UBIFS_CH_SZ ? + safe_len - (int)UBIFS_CH_SZ : 0); + if (safe_len > UBIFS_CH_SZ) + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, + (void *)node + UBIFS_CH_SZ, + safe_len - UBIFS_CH_SZ, 0); + goto out_unlock; + } + if (safe_len != le32_to_cpu(ch->len)) + pr_err("\ttruncated node length %d\n", safe_len); + + switch (type) { case UBIFS_PAD_NODE: { const struct ubifs_pad_node *pad = node; @@ -453,7 +483,8 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) pr_err("\tnlen %d\n", nlen); pr_err("\tname "); - if (nlen > UBIFS_MAX_NLEN) + if (nlen > UBIFS_MAX_NLEN || + nlen > safe_len - UBIFS_DENT_NODE_SZ) pr_err("(bad name length, not printing, bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) @@ -467,7 +498,6 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) case UBIFS_DATA_NODE: { const struct ubifs_data_node *dn = node; - int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; key_read(c, &dn->key, &key); pr_err("\tkey %s\n", @@ -475,10 +505,13 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) pr_err("\tsize %u\n", le32_to_cpu(dn->size)); pr_err("\tcompr_typ %d\n", (int)le16_to_cpu(dn->compr_type)); - pr_err("\tdata size %d\n", dlen); - pr_err("\tdata:\n"); + pr_err("\tdata size %u\n", + le32_to_cpu(ch->len) - (unsigned int)UBIFS_DATA_NODE_SZ); + pr_err("\tdata (length = %d):\n", + safe_len - (int)UBIFS_DATA_NODE_SZ); print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, - (void *)&dn->data, dlen, 0); + (void *)&dn->data, + safe_len - (int)UBIFS_DATA_NODE_SZ, 0); break; } case UBIFS_TRUN_NODE: @@ -495,13 +528,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) case UBIFS_IDX_NODE: { const struct ubifs_idx_node *idx = node; + int max_child_cnt = (safe_len - UBIFS_IDX_NODE_SZ) / + (ubifs_idx_node_sz(c, 1) - + UBIFS_IDX_NODE_SZ); - n = le16_to_cpu(idx->child_cnt); - pr_err("\tchild_cnt %d\n", n); + n = min_t(int, le16_to_cpu(idx->child_cnt), max_child_cnt); + pr_err("\tchild_cnt %d\n", (int)le16_to_cpu(idx->child_cnt)); pr_err("\tlevel %d\n", (int)le16_to_cpu(idx->level)); pr_err("\tBranches:\n"); - for (i = 0; i < n && i < c->fanout - 1; i++) { + for (i = 0; i < n && i < c->fanout; i++) { const struct ubifs_branch *br; br = ubifs_idx_branch(c, idx, i); @@ -525,7 +561,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) le64_to_cpu(orph->cmt_no) & LLONG_MAX); pr_err("\tlast node flag %llu\n", (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); - n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; + n = (safe_len - UBIFS_ORPH_NODE_SZ) >> 3; pr_err("\t%d orphan inode numbers:\n", n); for (i = 0; i < n; i++) pr_err("\t ino %llu\n", @@ -537,9 +573,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) break; } default: - pr_err("node type %d was not recognized\n", - (int)ch->node_type); + pr_err("node type %d was not recognized\n", type); } + +out_unlock: spin_unlock(&dbg_lock); } @@ -764,7 +801,7 @@ void ubifs_dump_lpt_info(struct ubifs_info *c) pr_err("\tnnode_sz: %d\n", c->nnode_sz); pr_err("\tltab_sz: %d\n", c->ltab_sz); pr_err("\tlsave_sz: %d\n", c->lsave_sz); - pr_err("\tbig_lpt: %d\n", c->big_lpt); + pr_err("\tbig_lpt: %u\n", c->big_lpt); pr_err("\tlpt_hght: %d\n", c->lpt_hght); pr_err("\tpnode_cnt: %d\n", c->pnode_cnt); pr_err("\tnnode_cnt: %d\n", c->nnode_cnt); @@ -791,22 +828,6 @@ void ubifs_dump_lpt_info(struct ubifs_info *c) spin_unlock(&dbg_lock); } -void ubifs_dump_sleb(const struct ubifs_info *c, - const struct ubifs_scan_leb *sleb, int offs) -{ - struct ubifs_scan_node *snod; - - pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n", - current->pid, sleb->lnum, offs); - - list_for_each_entry(snod, &sleb->nodes, list) { - cond_resched(); - pr_err("Dumping node at LEB %d:%d len %d\n", - sleb->lnum, snod->offs, snod->len); - ubifs_dump_node(c, snod->node); - } -} - void ubifs_dump_leb(const struct ubifs_info *c, int lnum) { struct ubifs_scan_leb *sleb; @@ -834,7 +855,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) cond_resched(); pr_err("Dumping node at LEB %d:%d len %d\n", lnum, snod->offs, snod->len); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); } pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); @@ -842,7 +863,6 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) out: vfree(buf); - return; } void ubifs_dump_znode(const struct ubifs_info *c, @@ -925,16 +945,20 @@ void ubifs_dump_tnc(struct ubifs_info *c) pr_err("\n"); pr_err("(pid %d) start dumping TNC tree\n", current->pid); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); - level = znode->level; - pr_err("== Level %d ==\n", level); - while (znode) { - if (level != znode->level) { - level = znode->level; - pr_err("== Level %d ==\n", level); + if (c->zroot.znode) { + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); + level = znode->level; + pr_err("== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + pr_err("== Level %d ==\n", level); + } + ubifs_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); } - ubifs_dump_znode(c, znode); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); + } else { + pr_err("empty TNC tree in memory\n"); } pr_err("(pid %d) finish dumping TNC tree\n", current->pid); } @@ -1012,7 +1036,7 @@ void dbg_save_space_info(struct ubifs_info *c) * * This function compares current flash space information with the information * which was saved when the 'dbg_save_space_info()' function was called. - * Returns zero if the information has not changed, and %-EINVAL it it has + * Returns zero if the information has not changed, and %-EINVAL if it has * changed. */ int dbg_check_space_info(struct ubifs_info *c) @@ -1123,6 +1147,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) err = PTR_ERR(dent); if (err == -ENOENT) break; + kfree(pdent); return err; } @@ -1211,7 +1236,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, ubifs_err(c, "but it should have key %s according to tnc", dbg_snprintf_key(c, &zbr1->key, key_buf, DBG_KEY_BUF_LEN)); - ubifs_dump_node(c, dent1); + ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ); goto out_free; } @@ -1223,7 +1248,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, ubifs_err(c, "but it should have key %s according to tnc", dbg_snprintf_key(c, &zbr2->key, key_buf, DBG_KEY_BUF_LEN)); - ubifs_dump_node(c, dent2); + ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ); goto out_free; } @@ -1242,9 +1267,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ubifs_msg(c, "first node at %d:%d\n", zbr1->lnum, zbr1->offs); - ubifs_dump_node(c, dent1); + ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ); ubifs_msg(c, "second node at %d:%d\n", zbr2->lnum, zbr2->offs); - ubifs_dump_node(c, dent2); + ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ); out_free: kfree(dent2); @@ -1720,17 +1745,22 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) err = dbg_walk_index(c, NULL, add_size, &calc); if (err) { ubifs_err(c, "error %d while walking the index", err); - return err; + goto out_err; } if (calc != idx_size) { ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld", calc, idx_size); dump_stack(); - return -EINVAL; + err = -EINVAL; + goto out_err; } return 0; + +out_err: + ubifs_destroy_tnc_tree(c); + return err; } /** @@ -2109,7 +2139,7 @@ out: out_dump: ubifs_msg(c, "dump of node at LEB %d:%d", zbr->lnum, zbr->offs); - ubifs_dump_node(c, node); + ubifs_dump_node(c, node, zbr->len); out_free: kfree(node); return err; @@ -2242,7 +2272,7 @@ out_dump: ubifs_msg(c, "dump of the inode %lu sitting in LEB %d:%d", (unsigned long)fscki->inum, zbr->lnum, zbr->offs); - ubifs_dump_node(c, ino); + ubifs_dump_node(c, ino, zbr->len); kfree(ino); return -EINVAL; } @@ -2313,12 +2343,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type != UBIFS_DATA_NODE) { ubifs_err(c, "bad node type %d", sa->type); - ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); return -EINVAL; } if (sb->type != UBIFS_DATA_NODE) { ubifs_err(c, "bad node type %d", sb->type); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; } @@ -2349,8 +2379,8 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) return 0; error_dump: - ubifs_dump_node(c, sa->node); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; } @@ -2381,13 +2411,13 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && sa->type != UBIFS_XENT_NODE) { ubifs_err(c, "bad node type %d", sa->type); - ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); return -EINVAL; } if (sb->type != UBIFS_INO_NODE && sb->type != UBIFS_DENT_NODE && sb->type != UBIFS_XENT_NODE) { ubifs_err(c, "bad node type %d", sb->type); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; } @@ -2437,16 +2467,15 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) error_dump: ubifs_msg(c, "dumping first node"); - ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); ubifs_msg(c, "dumping second node"); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; - return 0; } static inline int chance(unsigned int n, unsigned int out_of) { - return !!((prandom_u32() % out_of) + 1 <= n); + return !!(get_random_u32_below(out_of) + 1 <= n); } @@ -2464,13 +2493,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) if (chance(1, 2)) { d->pc_delay = 1; /* Fail within 1 minute */ - delay = prandom_u32() % 60000; + delay = get_random_u32_below(60000); d->pc_timeout = jiffies; d->pc_timeout += msecs_to_jiffies(delay); ubifs_warn(c, "failing after %lums", delay); } else { d->pc_delay = 2; - delay = prandom_u32() % 10000; + delay = get_random_u32_below(10000); /* Fail within 10000 operations */ d->pc_cnt_max = delay; ubifs_warn(c, "failing after %lu calls", delay); @@ -2550,7 +2579,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf, unsigned int from, to, ffs = chance(1, 2); unsigned char *p = (void *)buf; - from = prandom_u32() % len; + from = get_random_u32_below(len); /* Corruption span max to end of write unit */ to = min(len, ALIGN(from + 1, c->max_write_size)); @@ -2560,7 +2589,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf, if (ffs) memset(p + from, 0xFF, to - from); else - prandom_bytes(p + from, to - from); + get_random_bytes(p + from, to - from); return to; } @@ -2781,7 +2810,6 @@ static const struct file_operations dfs_fops = { .read = dfs_file_read, .write = dfs_file_write, .owner = THIS_MODULE, - .llseek = no_llseek, }; /** @@ -2801,9 +2829,9 @@ void dbg_debugfs_init_fs(struct ubifs_info *c) const char *fname; struct ubifs_debug_info *d = c->dbg; - n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); - if (n == UBIFS_DFS_DIR_LEN) { + if (n >= UBIFS_DFS_DIR_LEN) { /* The array size is too small */ return; } @@ -2926,7 +2954,6 @@ static const struct file_operations dfs_global_fops = { .read = dfs_global_file_read, .write = dfs_global_file_write, .owner = THIS_MODULE, - .llseek = no_llseek, }; /** diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 7763639a426b..d425861e6b82 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -19,10 +19,11 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, /* * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" - * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. + * + 1 for "_" and 2 for UBI device numbers and 3 for volume number and 1 for + * the trailing zero byte. */ #define UBIFS_DFS_DIR_NAME "ubi%d_%d" -#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) +#define UBIFS_DFS_DIR_LEN (3 + 1 + 2 + 3 + 1) /** * ubifs_debug_info - per-FS debugging information. @@ -103,7 +104,7 @@ struct ubifs_debug_info { unsigned int chk_fs:1; unsigned int tst_rcvry:1; - char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; + char dfs_dir_name[UBIFS_DFS_DIR_LEN]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; @@ -242,7 +243,8 @@ const char *dbg_get_key_dump(const struct ubifs_info *c, const char *dbg_snprintf_key(const struct ubifs_info *c, const union ubifs_key *key, char *buffer, int len); void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode); -void ubifs_dump_node(const struct ubifs_info *c, const void *node); +void ubifs_dump_node(const struct ubifs_info *c, const void *node, + int node_len); void ubifs_dump_budget_req(const struct ubifs_budget_req *req); void ubifs_dump_lstats(const struct ubifs_lp_stats *lst); void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); @@ -251,8 +253,6 @@ void ubifs_dump_lprop(const struct ubifs_info *c, void ubifs_dump_lprops(struct ubifs_info *c); void ubifs_dump_lpt_info(struct ubifs_info *c); void ubifs_dump_leb(const struct ubifs_info *c, int lnum); -void ubifs_dump_sleb(const struct ubifs_info *c, - const struct ubifs_scan_leb *sleb, int offs); void ubifs_dump_znode(const struct ubifs_info *c, const struct ubifs_znode *znode); void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ef85ec167a84..3c3d3ad4fa6c 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -68,32 +68,25 @@ static int inherit_flags(const struct inode *dir, umode_t mode) * @c: UBIFS file-system description object * @dir: parent directory inode * @mode: inode mode flags + * @is_xattr: whether the inode is xattr inode * * This function finds an unused inode number, allocates new inode and - * initializes it. Returns new inode in case of success and an error code in - * case of failure. + * initializes it. Non-xattr new inode may be written with xattrs(selinux/ + * encryption) before writing dentry, which could cause inconsistent problem + * when powercut happens between two operations. To deal with it, non-xattr + * new inode is initialized with zero-nlink and added into orphan list, caller + * should make sure that inode is relinked later, and make sure that orphan + * removing and journal writing into an committing atomic operation. Returns + * new inode in case of success and an error code in case of failure. */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, - umode_t mode) + umode_t mode, bool is_xattr) { int err; struct inode *inode; struct ubifs_inode *ui; bool encrypted = false; - if (IS_ENCRYPTED(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) { - ubifs_err(c, "fscrypt_get_encryption_info failed: %i", err); - return ERR_PTR(err); - } - - if (!fscrypt_has_encryption_key(dir)) - return ERR_PTR(-EPERM); - - encrypted = true; - } - inode = new_inode(c->vfs_sb); ui = ubifs_inode(inode); if (!inode) @@ -107,11 +100,18 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, */ inode->i_flags |= S_NOCMTIME; - inode_init_owner(inode, dir, mode); - inode->i_mtime = inode->i_atime = inode->i_ctime = - current_time(inode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); + simple_inode_init_ts(inode); inode->i_mapping->nrpages = 0; + if (!is_xattr) { + err = fscrypt_prepare_new_inode(dir, inode, &encrypted); + if (err) { + ubifs_err(c, "fscrypt_prepare_new_inode failed: %i", err); + goto out_iput; + } + } + switch (mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &ubifs_file_address_operations; @@ -131,7 +131,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, case S_IFBLK: case S_IFCHR: inode->i_op = &ubifs_file_inode_operations; - encrypted = false; break; default: BUG(); @@ -151,9 +150,8 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, if (c->highest_inum >= INUM_WATERMARK) { spin_unlock(&c->cnt_lock); ubifs_err(c, "out of inode numbers"); - make_bad_inode(inode); - iput(inode); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto out_iput; } ubifs_warn(c, "running out of inode numbers (current %lu, max %u)", (unsigned long)c->highest_inum, INUM_WATERMARK); @@ -170,17 +168,36 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, ui->creat_sqnum = ++c->max_sqnum; spin_unlock(&c->cnt_lock); + if (!is_xattr) { + set_nlink(inode, 0); + err = ubifs_add_orphan(c, inode->i_ino); + if (err) { + ubifs_err(c, "ubifs_add_orphan failed: %i", err); + goto out_iput; + } + down_read(&c->commit_sem); + ui->del_cmtno = c->cmt_no; + up_read(&c->commit_sem); + } + if (encrypted) { - err = fscrypt_inherit_context(dir, inode, &encrypted, true); + err = fscrypt_set_context(inode, NULL); if (err) { - ubifs_err(c, "fscrypt_inherit_context failed: %i", err); - make_bad_inode(inode); - iput(inode); - return ERR_PTR(err); + if (!is_xattr) { + set_nlink(inode, 1); + ubifs_delete_orphan(c, inode->i_ino); + } + ubifs_err(c, "fscrypt_set_context failed: %i", err); + goto out_iput; } } return inode; + +out_iput: + make_bad_inode(inode); + iput(inode); + return ERR_PTR(err); } static int dbg_check_name(const struct ubifs_info *c, @@ -276,8 +293,17 @@ done: return d_splice_alias(inode, dentry); } -static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl) +static int ubifs_prepare_create(struct inode *dir, struct dentry *dentry, + struct fscrypt_name *nm) +{ + if (fscrypt_is_nokey_name(dentry)) + return -ENOKEY; + + return fscrypt_setup_filename(dir, &dentry->d_name, 0, nm); +} + +static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) { struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; @@ -299,13 +325,13 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (err) return err; - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + err = ubifs_prepare_create(dir, dentry, &nm); if (err) goto out_budg; sz_change = CALC_DENT_SIZE(fname_len(&nm)); - inode = ubifs_new_inode(c, dir, mode); + inode = ubifs_new_inode(c, dir, mode, false); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fname; @@ -315,11 +341,13 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -334,8 +362,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -345,20 +373,91 @@ out_budg: return err; } -static int do_tmpfile(struct inode *dir, struct dentry *dentry, - umode_t mode, struct inode **whiteout) +static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) +{ + int err; + umode_t mode = S_IFCHR | WHITEOUT_MODE; + struct inode *inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; + + /* + * Create an inode('nlink = 1') for whiteout without updating journal, + * let ubifs_jnl_rename() store it on flash to complete rename whiteout + * atomically. + */ + + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", + dentry, mode, dir->i_ino); + + inode = ubifs_new_inode(c, dir, mode, false); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_free; + } + + init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); + ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); + + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) + goto out_inode; + + /* The dir size is updated by do_rename. */ + insert_inode_hash(inode); + + return inode; + +out_inode: + iput(inode); +out_free: + ubifs_err(c, "cannot create whiteout file, error %d", err); + return ERR_PTR(err); +} + +/** + * lock_2_inodes - a wrapper for locking two UBIFS inodes. + * @inode1: first inode + * @inode2: second inode + * + * We do not implement any tricks to guarantee strict lock ordering, because + * VFS has already done it for us on the @i_mutex. So this is just a simple + * wrapper function. + */ +static void lock_2_inodes(struct inode *inode1, struct inode *inode2) +{ + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); +} + +/** + * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. + * @inode1: first inode + * @inode2: second inode + */ +static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) { + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); +} + +static int ubifs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, + struct file *file, umode_t mode) +{ + struct dentry *dentry = file->f_path.dentry; struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1}; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct ubifs_budget_req ino_req = { .dirtied_ino = 1 }; - struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir); + struct ubifs_inode *ui; int err, instantiated = 0; struct fscrypt_name nm; /* - * Budget request settings: new dirty inode, new direntry, - * budget for dirtied inode will be released via writeback. + * Budget request settings: new inode, new direntry, changing the + * parent directory inode. + * Allocate budget separately for new dirtied inode, the budget will + * be released via writeback. */ dbg_gen("dent '%pd', mode %#hx in dir ino %lu", @@ -381,51 +480,40 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, return err; } - inode = ubifs_new_inode(c, dir, mode); + inode = ubifs_new_inode(c, dir, mode, false); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_budg; } ui = ubifs_inode(inode); - if (whiteout) { - init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); - ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); - } - err = ubifs_init_security(dir, inode, &dentry->d_name); if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&ui->ui_mutex); insert_inode_hash(inode); - - if (whiteout) { - mark_inode_dirty(inode); - drop_nlink(inode); - *whiteout = inode; - } else { - d_tmpfile(dentry, inode); - } + d_tmpfile(file, inode); ubifs_assert(c, ui->dirty); instantiated = 1; mutex_unlock(&ui->ui_mutex); - mutex_lock(&dir_ui->ui_mutex); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + lock_2_inodes(dir, inode); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 1); if (err) goto out_cancel; - mutex_unlock(&dir_ui->ui_mutex); + unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); + fscrypt_free_filename(&nm); - return 0; + return finish_open_simple(file, 0); out_cancel: - mutex_unlock(&dir_ui->ui_mutex); + unlock_2_inodes(dir, inode); out_inode: - make_bad_inode(inode); if (!instantiated) iput(inode); out_budg: @@ -437,12 +525,6 @@ out_budg: return err; } -static int ubifs_tmpfile(struct inode *dir, struct dentry *dentry, - umode_t mode) -{ - return do_tmpfile(dir, dentry, mode, NULL); -} - /** * vfs_dent_type - get VFS directory entry type. * @type: UBIFS directory entry type @@ -473,6 +555,11 @@ static unsigned int vfs_dent_type(uint8_t type) return 0; } +struct ubifs_dir_data { + struct ubifs_dent_node *dent; + u64 cookie; +}; + /* * The classical Unix view for directory is that it is a linear array of * (name, inode number) entries. Linux/VFS assumes this model as well. @@ -500,6 +587,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) struct inode *dir = file_inode(file); struct ubifs_info *c = dir->i_sb->s_fs_info; bool encrypted = IS_ENCRYPTED(dir); + struct ubifs_dir_data *data = file->private_data; dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); @@ -511,38 +599,38 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) return 0; if (encrypted) { - err = fscrypt_get_encryption_info(dir); + err = fscrypt_prepare_readdir(dir); if (err) return err; - err = fscrypt_fname_alloc_buffer(dir, UBIFS_MAX_NLEN, &fstr); + err = fscrypt_fname_alloc_buffer(UBIFS_MAX_NLEN, &fstr); if (err) return err; fstr_real_len = fstr.len; } - if (file->f_version == 0) { + if (data->cookie == 0) { /* - * The file was seek'ed, which means that @file->private_data + * The file was seek'ed, which means that @data->dent * is now invalid. This may also be just the first * 'ubifs_readdir()' invocation, in which case - * @file->private_data is NULL, and the below code is + * @data->dent is NULL, and the below code is * basically a no-op. */ - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; } /* - * 'generic_file_llseek()' unconditionally sets @file->f_version to - * zero, and we use this for detecting whether the file was seek'ed. + * 'ubifs_dir_llseek()' sets @data->cookie to zero, and we use this + * for detecting whether the file was seek'ed. */ - file->f_version = 1; + data->cookie = 1; /* File positions 0 and 1 correspond to "." and ".." */ if (ctx->pos < 2) { - ubifs_assert(c, !file->private_data); + ubifs_assert(c, !data->dent); if (!dir_emit_dots(file, ctx)) { if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -559,10 +647,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } - dent = file->private_data; + dent = data->dent; if (!dent) { /* * The directory was seek'ed to and is now readdir'ed. @@ -576,7 +664,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } while (1) { @@ -619,15 +707,15 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } - kfree(file->private_data); + kfree(data->dent); ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; cond_resched(); } out: - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -651,37 +739,14 @@ out: /* Free saved readdir() state when the directory is closed */ static int ubifs_dir_release(struct inode *dir, struct file *file) { - kfree(file->private_data); + struct ubifs_dir_data *data = file->private_data; + + kfree(data->dent); + kfree(data); file->private_data = NULL; return 0; } -/** - * lock_2_inodes - a wrapper for locking two UBIFS inodes. - * @inode1: first inode - * @inode2: second inode - * - * We do not implement any tricks to guarantee strict lock ordering, because - * VFS has already done it for us on the @i_mutex. So this is just a simple - * wrapper function. - */ -static void lock_2_inodes(struct inode *inode1, struct inode *inode2) -{ - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); - mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); -} - -/** - * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. - * @inode1: first inode - * @inode2: second inode - */ -static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) -{ - mutex_unlock(&ubifs_inode(inode2)->ui_mutex); - mutex_unlock(&ubifs_inode(inode1)->ui_mutex); -} - static int ubifs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { @@ -689,7 +754,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, struct inode *inode = d_inode(old_dentry); struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_inode *dir_ui = ubifs_inode(dir); - int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); + int err, sz_change; struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, .dirtied_ino_d = ALIGN(ui->data_len, 8) }; struct fscrypt_name nm; @@ -713,6 +778,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, if (err) return err; + sz_change = CALC_DENT_SIZE(fname_len(&nm)); + err = dbg_check_synced_i_size(c, inode); if (err) goto out_fname; @@ -723,17 +790,14 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, lock_2_inodes(dir, inode); - /* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */ - if (inode->i_nlink == 0) - ubifs_delete_orphan(c, inode->i_ino); - inc_nlink(inode); ihold(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, inode->i_nlink == 1); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -747,8 +811,6 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; drop_nlink(inode); - if (inode->i_nlink == 0) - ubifs_add_orphan(c, inode->i_ino); unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); iput(inode); @@ -802,12 +864,13 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) } lock_2_inodes(dir, inode); - inode->i_ctime = current_time(dir); + inode_set_ctime_current(inode); drop_nlink(inode); dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -835,12 +898,12 @@ out_fname: } /** - * check_dir_empty - check if a directory is empty or not. + * ubifs_check_dir_empty - check if a directory is empty or not. * @dir: VFS inode object of the directory to check * * This function checks if directory @dir is empty. Returns zero if the * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes - * in case of of errors. + * in case of errors. */ int ubifs_check_dir_empty(struct inode *dir) { @@ -904,13 +967,14 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) } lock_2_inodes(dir, inode); - inode->i_ctime = current_time(dir); + inode_set_ctime_current(inode); clear_nlink(inode); drop_nlink(dir); dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0, 0); if (err) goto out_cancel; unlock_2_inodes(dir, inode); @@ -938,13 +1002,15 @@ out_fname: return err; } -static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static struct dentry *ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct inode *inode; struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; int err, sz_change; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct fscrypt_name nm; /* @@ -957,15 +1023,15 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) err = ubifs_budget_space(c, &req); if (err) - return err; + return ERR_PTR(err); - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + err = ubifs_prepare_create(dir, dentry, &nm); if (err) goto out_budg; sz_change = CALC_DENT_SIZE(fname_len(&nm)); - inode = ubifs_new_inode(c, dir, S_IFDIR | mode); + inode = ubifs_new_inode(c, dir, S_IFDIR | mode, false); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fname; @@ -975,14 +1041,16 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (err) goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); insert_inode_hash(inode); inc_nlink(inode); inc_nlink(dir); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) { ubifs_err(c, "cannot create directory, error %d", err); goto out_cancel; @@ -992,25 +1060,25 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ubifs_release_budget(c, &req); d_instantiate(dentry, inode); fscrypt_free_filename(&nm); - return 0; + return NULL; out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; drop_nlink(dir); mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); out_budg: ubifs_release_budget(c, &req); - return err; + return ERR_PTR(err); } -static int ubifs_mknod(struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t rdev) +static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; struct ubifs_inode *ui; @@ -1044,7 +1112,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, return err; } - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + err = ubifs_prepare_create(dir, dentry, &nm); if (err) { kfree(dev); goto out_budg; @@ -1052,28 +1120,32 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, sz_change = CALC_DENT_SIZE(fname_len(&nm)); - inode = ubifs_new_inode(c, dir, mode); + inode = ubifs_new_inode(c, dir, mode, false); if (IS_ERR(inode)) { kfree(dev); err = PTR_ERR(inode); goto out_fname; } + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) { + kfree(dev); + goto out_inode; + } + init_special_inode(inode, inode->i_mode, rdev); inode->i_size = ubifs_inode(inode)->ui_size = devlen; ui = ubifs_inode(inode); ui->data = dev; ui->data_len = devlen; - - err = ubifs_init_security(dir, inode, &dentry->d_name); - if (err) - goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -1088,8 +1160,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1098,8 +1170,8 @@ out_budg: return err; } -static int ubifs_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) +static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, const char *symname) { struct inode *inode; struct ubifs_inode *ui; @@ -1108,7 +1180,6 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, int err, sz_change, len = strlen(symname); struct fscrypt_str disk_link; struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .new_ino_d = ALIGN(len, 8), .dirtied_ino = 1 }; struct fscrypt_name nm; @@ -1124,22 +1195,27 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, * Budget request settings: new inode, new direntry and changing parent * directory inode. */ + req.new_ino_d = ALIGN(disk_link.len - 1, 8); err = ubifs_budget_space(c, &req); if (err) return err; - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + err = ubifs_prepare_create(dir, dentry, &nm); if (err) goto out_budg; sz_change = CALC_DENT_SIZE(fname_len(&nm)); - inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO); + inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO, false); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fname; } + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) + goto out_inode; + ui = ubifs_inode(inode); ui->data = kmalloc(disk_link.len, GFP_NOFS); if (!ui->data) { @@ -1164,16 +1240,14 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, */ ui->data_len = disk_link.len - 1; inode->i_size = ubifs_inode(inode)->ui_size = disk_link.len - 1; - - err = ubifs_init_security(dir, inode, &dentry->d_name); - if (err) - goto out_inode; + set_nlink(inode, 1); mutex_lock(&dir_ui->ui_mutex); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - dir->i_mtime = dir->i_ctime = inode->i_ctime; - err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_get_ctime(inode))); + err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0, 1); if (err) goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); @@ -1187,8 +1261,10 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); + set_nlink(inode, 0); out_inode: - make_bad_inode(inode); + /* Free inode->i_link before inode is marked as bad. */ + fscrypt_free_inode(inode); iput(inode); out_fname: fscrypt_free_filename(&nm); @@ -1202,7 +1278,7 @@ out_budg: * @inode1: first inode * @inode2: second inode * @inode3: third inode - * @inode4: fouth inode + * @inode4: fourth inode * * This function is used for 'ubifs_rename()' and @inode1 may be the same as * @inode2 whereas @inode3 and @inode4 may be %NULL. @@ -1228,7 +1304,7 @@ static void lock_4_inodes(struct inode *inode1, struct inode *inode2, * @inode1: first inode * @inode2: second inode * @inode3: third inode - * @inode4: fouth inode + * @inode4: fourth inode */ static void unlock_4_inodes(struct inode *inode1, struct inode *inode2, struct inode *inode3, struct inode *inode4) @@ -1259,17 +1335,18 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, .dirtied_ino = 3 }; struct ubifs_budget_req ino_req = { .dirtied_ino = 1, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; - struct timespec64 time; - unsigned int uninitialized_var(saved_nlink); + struct ubifs_budget_req wht_req; + unsigned int saved_nlink; struct fscrypt_name old_nm, new_nm; /* - * Budget request settings: deletion direntry, new direntry, removing - * the old inode, and changing old and new parent directory inodes. + * Budget request settings: + * req: deletion direntry, new direntry, removing the old inode, + * and changing old and new parent directory inodes. * - * However, this operation also marks the target inode as dirty and - * does not write it, so we allocate budget for the target inode - * separately. + * wht_req: new whiteout inode for RENAME_WHITEOUT. + * + * ino_req: marks the target inode as dirty and does not write it. */ dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x", @@ -1279,6 +1356,8 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlink) { ubifs_assert(c, inode_is_locked(new_inode)); + /* Budget for old inode's data when its nlink > 1. */ + req.dirtied_ino_d = ALIGN(ubifs_inode(new_inode)->data_len, 8); err = ubifs_purge_xattrs(new_inode); if (err) return err; @@ -1326,17 +1405,40 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_release; } - err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout); - if (err) { + /* + * The whiteout inode without dentry is pinned in memory, + * umount won't happen during rename process because we + * got parent dentry. + */ + whiteout = create_whiteout(old_dir, old_dentry); + if (IS_ERR(whiteout)) { + err = PTR_ERR(whiteout); kfree(dev); goto out_release; } - whiteout->i_state |= I_LINKABLE; whiteout_ui = ubifs_inode(whiteout); whiteout_ui->data = dev; whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0)); ubifs_assert(c, !whiteout_ui->dirty); + + memset(&wht_req, 0, sizeof(struct ubifs_budget_req)); + wht_req.new_ino = 1; + wht_req.new_ino_d = ALIGN(whiteout_ui->data_len, 8); + /* + * To avoid deadlock between space budget (holds ui_mutex and + * waits wb work) and writeback work(waits ui_mutex), do space + * budget before ubifs inodes locked. + */ + err = ubifs_budget_space(c, &wht_req); + if (err) { + iput(whiteout); + goto out_release; + } + set_nlink(whiteout, 1); + + /* Add the old_dentry size to the old_dir size. */ + old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); } lock_4_inodes(old_dir, new_dir, new_inode, whiteout); @@ -1345,8 +1447,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the @i_ctime for inodes on a * rename. */ - time = current_time(old_dir); - old_inode->i_ctime = time; + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); /* We must adjust parent link count when renaming directories */ if (is_dir) { @@ -1375,13 +1476,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir->i_size -= old_sz; ubifs_inode(old_dir)->ui_size = old_dir->i_size; - old_dir->i_mtime = old_dir->i_ctime = time; - new_dir->i_mtime = new_dir->i_ctime = time; /* * And finally, if we unlinked a direntry which happened to have the * same name as the moved direntry, we have to decrement @i_nlink of - * the unlinked inode and change its ctime. + * the unlinked inode. */ if (unlink) { /* @@ -1393,7 +1492,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, clear_nlink(new_inode); else drop_nlink(new_inode); - new_inode->i_ctime = time; } else { new_dir->i_size += new_sz; ubifs_inode(new_dir)->ui_size = new_dir->i_size; @@ -1408,35 +1506,26 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); if (unlink && IS_SYNC(new_inode)) sync = 1; - } - - if (whiteout) { - struct ubifs_budget_req wht_req = { .dirtied_ino = 1, - .dirtied_ino_d = \ - ALIGN(ubifs_inode(whiteout)->data_len, 8) }; - - err = ubifs_budget_space(c, &wht_req); - if (err) { - kfree(whiteout_ui->data); - whiteout_ui->data_len = 0; - iput(whiteout); - goto out_release; - } - - inc_nlink(whiteout); - mark_inode_dirty(whiteout); - whiteout->i_state &= ~I_LINKABLE; - iput(whiteout); + /* + * S_SYNC flag of whiteout inherits from the old_dir, and we + * have already checked the old dir inode. So there is no need + * to check whiteout. + */ } err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir, - new_inode, &new_nm, whiteout, sync); + new_inode, &new_nm, whiteout, sync, !!whiteout); if (err) goto out_cancel; unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); ubifs_release_budget(c, &req); + if (whiteout) { + ubifs_release_budget(c, &wht_req); + iput(whiteout); + } + mutex_lock(&old_inode_ui->ui_mutex); release = old_inode_ui->dirty; mark_inode_dirty_sync(old_inode); @@ -1445,11 +1534,16 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, if (release) ubifs_release_budget(c, &ino_req); if (IS_SYNC(old_inode)) - err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); + /* + * Rename finished here. Although old inode cannot be updated + * on flash, old ctime is not a big problem, don't return err + * code to userspace. + */ + old_inode->i_sb->s_op->write_inode(old_inode, NULL); fscrypt_free_filename(&old_nm); fscrypt_free_filename(&new_nm); - return err; + return 0; out_cancel: if (unlink) { @@ -1470,11 +1564,12 @@ out_cancel: inc_nlink(old_dir); } } + unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); if (whiteout) { - drop_nlink(whiteout); + ubifs_release_budget(c, &wht_req); + set_nlink(whiteout, 0); iput(whiteout); } - unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); out_release: ubifs_release_budget(c, &ino_req); ubifs_release_budget(c, &req); @@ -1492,12 +1587,20 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, int sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); struct inode *fst_inode = d_inode(old_dentry); struct inode *snd_inode = d_inode(new_dentry); - struct timespec64 time; int err; struct fscrypt_name fst_nm, snd_nm; ubifs_assert(c, fst_inode && snd_inode); + /* + * Budget request settings: changing two direntries, changing the two + * parent directory inodes. + */ + + dbg_gen("dent '%pd' ino %lu in dir ino %lu exchange dent '%pd' ino %lu in dir ino %lu", + old_dentry, fst_inode->i_ino, old_dir->i_ino, + new_dentry, snd_inode->i_ino, new_dir->i_ino); + err = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &fst_nm); if (err) return err; @@ -1508,13 +1611,13 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, return err; } + err = ubifs_budget_space(c, &req); + if (err) + goto out; + lock_4_inodes(old_dir, new_dir, NULL, NULL); - time = current_time(old_dir); - fst_inode->i_ctime = time; - snd_inode->i_ctime = time; - old_dir->i_mtime = old_dir->i_ctime = time; - new_dir->i_mtime = new_dir->i_ctime = time; + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); if (old_dir != new_dir) { if (S_ISDIR(fst_inode->i_mode) && !S_ISDIR(snd_inode->i_mode)) { @@ -1533,12 +1636,14 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, unlock_4_inodes(old_dir, new_dir, NULL, NULL); ubifs_release_budget(c, &req); +out: fscrypt_free_filename(&fst_nm); fscrypt_free_filename(&snd_nm); return err; } -static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, +static int ubifs_rename(struct mnt_idmap *idmap, + struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -1562,8 +1667,8 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags); } -int ubifs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) +int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags) { loff_t size; struct inode *inode = d_inode(path->dentry); @@ -1585,7 +1690,7 @@ int ubifs_getattr(const struct path *path, struct kstat *stat, STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE); - generic_fillattr(inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->blksize = UBIFS_BLOCK_SIZE; stat->size = ui->ui_size; @@ -1616,14 +1721,24 @@ int ubifs_getattr(const struct path *path, struct kstat *stat, return 0; } -static int ubifs_dir_open(struct inode *dir, struct file *file) +static int ubifs_dir_open(struct inode *inode, struct file *file) { - if (IS_ENCRYPTED(dir)) - return fscrypt_get_encryption_info(dir) ? -EACCES : 0; + struct ubifs_dir_data *data; + data = kzalloc(sizeof(struct ubifs_dir_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + file->private_data = data; return 0; } +static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct ubifs_dir_data *data = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &data->cookie); +} + const struct inode_operations ubifs_dir_inode_operations = { .lookup = ubifs_lookup, .create = ubifs_create, @@ -1636,21 +1751,21 @@ const struct inode_operations ubifs_dir_inode_operations = { .rename = ubifs_rename, .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, -#endif .update_time = ubifs_update_time, .tmpfile = ubifs_tmpfile, + .fileattr_get = ubifs_fileattr_get, + .fileattr_set = ubifs_fileattr_set, }; const struct file_operations ubifs_dir_operations = { - .llseek = generic_file_llseek, + .open = ubifs_dir_open, + .llseek = ubifs_dir_llseek, .release = ubifs_dir_release, .read = generic_read_dir, .iterate_shared = ubifs_readdir, .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, - .open = ubifs_dir_open, #ifdef CONFIG_COMPAT .compat_ioctl = ubifs_compat_ioctl, #endif diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 49fe062ce45e..c3265b8804f5 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -31,9 +31,9 @@ * in the "sys_write -> alloc_pages -> direct reclaim path". So, in * 'ubifs_writepage()' we are only guaranteed that the page is locked. * - * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the + * Similarly, @i_mutex is not always locked in 'ubifs_read_folio()', e.g., the * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> - * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not + * ondemand_readahead -> read_folio"). In case of readahead, @I_SYNC flag is not * set as well. However, UBIFS disables readahead. */ @@ -42,8 +42,8 @@ #include <linux/slab.h> #include <linux/migrate.h> -static int read_block(struct inode *inode, void *addr, unsigned int block, - struct ubifs_data_node *dn) +static int read_block(struct inode *inode, struct folio *folio, size_t offset, + unsigned int block, struct ubifs_data_node *dn) { struct ubifs_info *c = inode->i_sb->s_fs_info; int err, len, out_len; @@ -55,7 +55,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, if (err) { if (err == -ENOENT) /* Not found, so it must be a hole */ - memset(addr, 0, UBIFS_BLOCK_SIZE); + folio_zero_range(folio, offset, UBIFS_BLOCK_SIZE); return err; } @@ -74,8 +74,8 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, } out_len = UBIFS_BLOCK_SIZE; - err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len, - le16_to_cpu(dn->compr_type)); + err = ubifs_decompress_folio(c, &dn->data, dlen, folio, offset, + &out_len, le16_to_cpu(dn->compr_type)); if (err || len != out_len) goto dump; @@ -85,47 +85,45 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, * appending data). Ensure that the remainder is zeroed out. */ if (len < UBIFS_BLOCK_SIZE) - memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); + folio_zero_range(folio, offset + len, UBIFS_BLOCK_SIZE - len); return 0; dump: ubifs_err(c, "bad data node (block %u, inode %lu)", block, inode->i_ino); - ubifs_dump_node(c, dn); + ubifs_dump_node(c, dn, UBIFS_MAX_DATA_NODE_SZ); return -EINVAL; } -static int do_readpage(struct page *page) +static int do_readpage(struct folio *folio) { - void *addr; int err = 0, i; unsigned int block, beyond; - struct ubifs_data_node *dn; - struct inode *inode = page->mapping->host; + struct ubifs_data_node *dn = NULL; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; loff_t i_size = i_size_read(inode); + size_t offset = 0; dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", - inode->i_ino, page->index, i_size, page->flags); - ubifs_assert(c, !PageChecked(page)); - ubifs_assert(c, !PagePrivate(page)); + inode->i_ino, folio->index, i_size, folio->flags.f); + ubifs_assert(c, !folio_test_checked(folio)); + ubifs_assert(c, !folio->private); - addr = kmap(page); - - block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; if (block >= beyond) { /* Reading beyond inode */ - SetPageChecked(page); - memset(addr, 0, PAGE_SIZE); + folio_set_checked(folio); + folio_zero_range(folio, 0, folio_size(folio)); goto out; } dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); if (!dn) { err = -ENOMEM; - goto error; + goto out; } i = 0; @@ -135,9 +133,9 @@ static int do_readpage(struct page *page) if (block >= beyond) { /* Reading beyond inode */ err = -ENOENT; - memset(addr, 0, UBIFS_BLOCK_SIZE); + folio_zero_range(folio, offset, UBIFS_BLOCK_SIZE); } else { - ret = read_block(inode, addr, block, dn); + ret = read_block(inode, folio, offset, block, dn); if (ret) { err = ret; if (err != -ENOENT) @@ -147,42 +145,32 @@ static int do_readpage(struct page *page) int ilen = i_size & (UBIFS_BLOCK_SIZE - 1); if (ilen && ilen < dlen) - memset(addr + ilen, 0, dlen - ilen); + folio_zero_range(folio, offset + ilen, dlen - ilen); } } - if (++i >= UBIFS_BLOCKS_PER_PAGE) + if (++i >= (UBIFS_BLOCKS_PER_PAGE << folio_order(folio))) break; block += 1; - addr += UBIFS_BLOCK_SIZE; + offset += UBIFS_BLOCK_SIZE; } + if (err) { struct ubifs_info *c = inode->i_sb->s_fs_info; if (err == -ENOENT) { /* Not found, so it must be a hole */ - SetPageChecked(page); + folio_set_checked(folio); dbg_gen("hole"); - goto out_free; + err = 0; + } else { + ubifs_err(c, "cannot read page %lu of inode %lu, error %d", + folio->index, inode->i_ino, err); } - ubifs_err(c, "cannot read page %lu of inode %lu, error %d", - page->index, inode->i_ino, err); - goto error; } -out_free: - kfree(dn); out: - SetPageUptodate(page); - ClearPageError(page); - flush_dcache_page(page); - kunmap(page); - return 0; - -error: kfree(dn); - ClearPageUptodate(page); - SetPageError(page); - flush_dcache_page(page); - kunmap(page); + if (!err) + folio_mark_uptodate(folio); return err; } @@ -205,7 +193,7 @@ static void release_new_page_budget(struct ubifs_info *c) * @c: UBIFS file-system description object * * This is a helper function which releases budget corresponding to the budget - * of changing one one page of data which already exists on the flash media. + * of changing one page of data which already exists on the flash media. */ static void release_existing_page_budget(struct ubifs_info *c) { @@ -215,24 +203,23 @@ static void release_existing_page_budget(struct ubifs_info *c) } static int write_begin_slow(struct address_space *mapping, - loff_t pos, unsigned len, struct page **pagep, - unsigned flags) + loff_t pos, unsigned len, struct folio **foliop) { struct inode *inode = mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; pgoff_t index = pos >> PAGE_SHIFT; struct ubifs_budget_req req = { .new_page = 1 }; - int uninitialized_var(err), appending = !!(pos + len > inode->i_size); - struct page *page; + int err, appending = !!(pos + len > inode->i_size); + struct folio *folio; dbg_gen("ino %lu, pos %llu, len %u, i_size %lld", inode->i_ino, pos, len, inode->i_size); /* - * At the slow path we have to budget before locking the page, because - * budgeting may force write-back, which would wait on locked pages and - * deadlock if we had the page locked. At this point we do not know - * anything about the page, so assume that this is a new page which is + * At the slow path we have to budget before locking the folio, because + * budgeting may force write-back, which would wait on locked folios and + * deadlock if we had the folio locked. At this point we do not know + * anything about the folio, so assume that this is a new folio which is * written to a hole. This corresponds to largest budget. Later the * budget will be amended if this is not true. */ @@ -244,45 +231,43 @@ static int write_begin_slow(struct address_space *mapping, if (unlikely(err)) return err; - page = grab_cache_page_write_begin(mapping, index, flags); - if (unlikely(!page)) { + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) { ubifs_release_budget(c, &req); - return -ENOMEM; + return PTR_ERR(folio); } - if (!PageUptodate(page)) { - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) - SetPageChecked(page); + if (!folio_test_uptodate(folio)) { + if (pos == folio_pos(folio) && len >= folio_size(folio)) + folio_set_checked(folio); else { - err = do_readpage(page); + err = do_readpage(folio); if (err) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); ubifs_release_budget(c, &req); return err; } } - - SetPageUptodate(page); - ClearPageError(page); } - if (PagePrivate(page)) + if (folio->private) /* - * The page is dirty, which means it was budgeted twice: + * The folio is dirty, which means it was budgeted twice: * o first time the budget was allocated by the task which - * made the page dirty and set the PG_private flag; + * made the folio dirty and set the private field; * o and then we budgeted for it for the second time at the * very beginning of this function. * - * So what we have to do is to release the page budget we + * So what we have to do is to release the folio budget we * allocated. */ release_new_page_budget(c); - else if (!PageChecked(page)) + else if (!folio_test_checked(folio)) /* - * We are changing a page which already exists on the media. - * This means that changing the page does not make the amount + * We are changing a folio which already exists on the media. + * This means that changing the folio does not make the amount * of indexing information larger, and this part of the budget * which we have already acquired may be released. */ @@ -305,32 +290,33 @@ static int write_begin_slow(struct address_space *mapping, ubifs_release_dirty_inode_budget(c, ui); } - *pagep = page; + *foliop = folio; return 0; } /** * allocate_budget - allocate budget for 'ubifs_write_begin()'. * @c: UBIFS file-system description object - * @page: page to allocate budget for + * @folio: folio to allocate budget for * @ui: UBIFS inode object the page belongs to * @appending: non-zero if the page is appended * * This is a helper function for 'ubifs_write_begin()' which allocates budget * for the operation. The budget is allocated differently depending on whether * this is appending, whether the page is dirty or not, and so on. This - * function leaves the @ui->ui_mutex locked in case of appending. Returns zero - * in case of success and %-ENOSPC in case of failure. + * function leaves the @ui->ui_mutex locked in case of appending. + * + * Returns: %0 in case of success and %-ENOSPC in case of failure. */ -static int allocate_budget(struct ubifs_info *c, struct page *page, +static int allocate_budget(struct ubifs_info *c, struct folio *folio, struct ubifs_inode *ui, int appending) { struct ubifs_budget_req req = { .fast = 1 }; - if (PagePrivate(page)) { + if (folio->private) { if (!appending) /* - * The page is dirty and we are not appending, which + * The folio is dirty and we are not appending, which * means no budget is needed at all. */ return 0; @@ -354,11 +340,11 @@ static int allocate_budget(struct ubifs_info *c, struct page *page, */ req.dirtied_ino = 1; } else { - if (PageChecked(page)) + if (folio_test_checked(folio)) /* * The page corresponds to a hole and does not * exist on the media. So changing it makes - * make the amount of indexing information + * the amount of indexing information * larger, and we have to budget for a new * page. */ @@ -418,17 +404,18 @@ static int allocate_budget(struct ubifs_info *c, struct page *page, * there is a plenty of flash space and the budget will be acquired quickly, * without forcing write-back. The slow path does not make this assumption. */ -static int ubifs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +static int ubifs_write_begin(const struct kiocb *iocb, + struct address_space *mapping, + loff_t pos, unsigned len, + struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); pgoff_t index = pos >> PAGE_SHIFT; - int uninitialized_var(err), appending = !!(pos + len > inode->i_size); + int err, appending = !!(pos + len > inode->i_size); int skipped_read = 0; - struct page *page; + struct folio *folio; ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size); ubifs_assert(c, !c->ro_media && !c->ro_mount); @@ -437,13 +424,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, return -EROFS; /* Try out the fast-path part first */ - page = grab_cache_page_write_begin(mapping, index, flags); - if (unlikely(!page)) - return -ENOMEM; + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { /* The page is not loaded from the flash */ - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) { + if (pos == folio_pos(folio) && len >= folio_size(folio)) { /* * We change whole page so no need to load it. But we * do not know whether this page exists on the media or @@ -453,32 +441,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * media. Thus, we are setting the @PG_checked flag * here. */ - SetPageChecked(page); + folio_set_checked(folio); skipped_read = 1; } else { - err = do_readpage(page); + err = do_readpage(folio); if (err) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return err; } } - - SetPageUptodate(page); - ClearPageError(page); } - err = allocate_budget(c, page, ui, appending); + err = allocate_budget(c, folio, ui, appending); if (unlikely(err)) { ubifs_assert(c, err == -ENOSPC); /* * If we skipped reading the page because we were going to * write all of it, then it is not up to date. */ - if (skipped_read) { - ClearPageChecked(page); - ClearPageUptodate(page); - } + if (skipped_read) + folio_clear_checked(folio); /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the @@ -490,10 +473,10 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); mutex_unlock(&ui->ui_mutex); } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); - return write_begin_slow(mapping, pos, len, pagep, flags); + return write_begin_slow(mapping, pos, len, foliop); } /* @@ -502,22 +485,21 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * with @ui->ui_mutex locked if we are appending pages, and unlocked * otherwise. This is an optimization (slightly hacky though). */ - *pagep = page; + *foliop = folio; return 0; - } /** * cancel_budget - cancel budget. * @c: UBIFS file-system description object - * @page: page to cancel budget for + * @folio: folio to cancel budget for * @ui: UBIFS inode object the page belongs to * @appending: non-zero if the page is appended * * This is a helper function for a page write operation. It unlocks the * @ui->ui_mutex in case of appending. */ -static void cancel_budget(struct ubifs_info *c, struct page *page, +static void cancel_budget(struct ubifs_info *c, struct folio *folio, struct ubifs_inode *ui, int appending) { if (appending) { @@ -525,17 +507,18 @@ static void cancel_budget(struct ubifs_info *c, struct page *page, ubifs_release_dirty_inode_budget(c, ui); mutex_unlock(&ui->ui_mutex); } - if (!PagePrivate(page)) { - if (PageChecked(page)) + if (!folio->private) { + if (folio_test_checked(folio)) release_new_page_budget(c); else release_existing_page_budget(c); } } -static int ubifs_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) +static int ubifs_write_end(const struct kiocb *iocb, + struct address_space *mapping, loff_t pos, + unsigned len, unsigned copied, + struct folio *folio, void *fsdata) { struct inode *inode = mapping->host; struct ubifs_inode *ui = ubifs_inode(inode); @@ -544,44 +527,47 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, int appending = !!(end_pos > inode->i_size); dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld", - inode->i_ino, pos, page->index, len, copied, inode->i_size); + inode->i_ino, pos, folio->index, len, copied, inode->i_size); - if (unlikely(copied < len && len == PAGE_SIZE)) { + if (unlikely(copied < len && !folio_test_uptodate(folio))) { /* - * VFS copied less data to the page that it intended and + * VFS copied less data to the folio than it intended and * declared in its '->write_begin()' call via the @len - * argument. If the page was not up-to-date, and @len was - * @PAGE_SIZE, the 'ubifs_write_begin()' function did + * argument. If the folio was not up-to-date, + * the 'ubifs_write_begin()' function did * not load it from the media (for optimization reasons). This - * means that part of the page contains garbage. So read the - * page now. + * means that part of the folio contains garbage. So read the + * folio now. */ dbg_gen("copied %d instead of %d, read page and repeat", copied, len); - cancel_budget(c, page, ui, appending); - ClearPageChecked(page); + cancel_budget(c, folio, ui, appending); + folio_clear_checked(folio); /* * Return 0 to force VFS to repeat the whole operation, or the * error code if 'do_readpage()' fails. */ - copied = do_readpage(page); + copied = do_readpage(folio); goto out; } - if (!PagePrivate(page)) { - SetPagePrivate(page); + if (len == folio_size(folio)) + folio_mark_uptodate(folio); + + if (!folio->private) { + folio_attach_private(folio, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); - __set_page_dirty_nobuffers(page); + filemap_dirty_folio(mapping, folio); } if (appending) { i_size_write(inode, end_pos); ui->ui_size = end_pos; /* - * Note, we do not set @I_DIRTY_PAGES (which means that the - * inode has dirty pages), this has been done in - * '__set_page_dirty_nobuffers()'. + * We do not set @I_DIRTY_PAGES (which means that + * the inode has dirty pages), this was done in + * filemap_dirty_folio(). */ __mark_inode_dirty(inode, I_DIRTY_DATASYNC); ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); @@ -589,49 +575,47 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, } out: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return copied; } /** * populate_page - copy data nodes into a page for bulk-read. * @c: UBIFS file-system description object - * @page: page + * @folio: folio * @bu: bulk-read information * @n: next zbranch slot * - * This function returns %0 on success and a negative error code on failure. + * Returns: %0 on success and a negative error code on failure. */ -static int populate_page(struct ubifs_info *c, struct page *page, +static int populate_page(struct ubifs_info *c, struct folio *folio, struct bu_info *bu, int *n) { int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; loff_t i_size = i_size_read(inode); unsigned int page_block; - void *addr, *zaddr; + size_t offset = 0; pgoff_t end_index; dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", - inode->i_ino, page->index, i_size, page->flags); - - addr = zaddr = kmap(page); + inode->i_ino, folio->index, i_size, folio->flags.f); end_index = (i_size - 1) >> PAGE_SHIFT; - if (!i_size || page->index > end_index) { + if (!i_size || folio->index > end_index) { hole = 1; - memset(addr, 0, PAGE_SIZE); + folio_zero_range(folio, 0, folio_size(folio)); goto out_hole; } - page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + page_block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; while (1) { int err, len, out_len, dlen; if (nn >= bu->cnt) { hole = 1; - memset(addr, 0, UBIFS_BLOCK_SIZE); + folio_zero_range(folio, offset, UBIFS_BLOCK_SIZE); } else if (key_block(c, &bu->zbranch[nn].key) == page_block) { struct ubifs_data_node *dn; @@ -653,13 +637,15 @@ static int populate_page(struct ubifs_info *c, struct page *page, goto out_err; } - err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len, - le16_to_cpu(dn->compr_type)); + err = ubifs_decompress_folio( + c, &dn->data, dlen, folio, offset, &out_len, + le16_to_cpu(dn->compr_type)); if (err || len != out_len) goto out_err; if (len < UBIFS_BLOCK_SIZE) - memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); + folio_zero_range(folio, offset + len, + UBIFS_BLOCK_SIZE - len); nn += 1; read = (i << UBIFS_BLOCK_SHIFT) + len; @@ -668,39 +654,32 @@ static int populate_page(struct ubifs_info *c, struct page *page, continue; } else { hole = 1; - memset(addr, 0, UBIFS_BLOCK_SIZE); + folio_zero_range(folio, offset, UBIFS_BLOCK_SIZE); } if (++i >= UBIFS_BLOCKS_PER_PAGE) break; - addr += UBIFS_BLOCK_SIZE; + offset += UBIFS_BLOCK_SIZE; page_block += 1; } - if (end_index == page->index) { + if (end_index == folio->index) { int len = i_size & (PAGE_SIZE - 1); if (len && len < read) - memset(zaddr + len, 0, read - len); + folio_zero_range(folio, len, read - len); } out_hole: if (hole) { - SetPageChecked(page); + folio_set_checked(folio); dbg_gen("hole"); } - SetPageUptodate(page); - ClearPageError(page); - flush_dcache_page(page); - kunmap(page); + folio_mark_uptodate(folio); *n = nn; return 0; out_err: - ClearPageUptodate(page); - SetPageError(page); - flush_dcache_page(page); - kunmap(page); ubifs_err(c, "bad data node (block %u, inode %lu)", page_block, inode->i_ino); return -EINVAL; @@ -710,15 +689,15 @@ out_err: * ubifs_do_bulk_read - do bulk-read. * @c: UBIFS file-system description object * @bu: bulk-read information - * @page1: first page to read + * @folio1: first folio to read * - * This function returns %1 if the bulk-read is done, otherwise %0 is returned. + * Returns: %1 if the bulk-read is done, otherwise %0 is returned. */ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, - struct page *page1) + struct folio *folio1) { - pgoff_t offset = page1->index, end_index; - struct address_space *mapping = page1->mapping; + pgoff_t offset = folio1->index, end_index; + struct address_space *mapping = folio1->mapping; struct inode *inode = mapping->host; struct ubifs_inode *ui = ubifs_inode(inode); int err, page_idx, page_cnt, ret = 0, n = 0; @@ -768,11 +747,11 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, goto out_warn; } - err = populate_page(c, page1, bu, &n); + err = populate_page(c, folio1, bu, &n); if (err) goto out_warn; - unlock_page(page1); + folio_unlock(folio1); ret = 1; isize = i_size_read(inode); @@ -782,19 +761,19 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, for (page_idx = 1; page_idx < page_cnt; page_idx++) { pgoff_t page_offset = offset + page_idx; - struct page *page; + struct folio *folio; if (page_offset > end_index) break; - page = pagecache_get_page(mapping, page_offset, + folio = __filemap_get_folio(mapping, page_offset, FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT, ra_gfp_mask); - if (!page) + if (IS_ERR(folio)) break; - if (!PageUptodate(page)) - err = populate_page(c, page, bu, &n); - unlock_page(page); - put_page(page); + if (!folio_test_uptodate(folio)) + err = populate_page(c, folio, bu, &n); + folio_unlock(folio); + folio_put(folio); if (err) break; } @@ -817,19 +796,21 @@ out_bu_off: /** * ubifs_bulk_read - determine whether to bulk-read and, if so, do it. - * @page: page from which to start bulk-read. + * @folio: folio from which to start bulk-read. * * Some flash media are capable of reading sequentially at faster rates. UBIFS * bulk-read facility is designed to take advantage of that, by reading in one * go consecutive data nodes that are also located consecutively in the same - * LEB. This function returns %1 if a bulk-read is done and %0 otherwise. + * LEB. + * + * Returns: %1 if a bulk-read is done and %0 otherwise. */ -static int ubifs_bulk_read(struct page *page) +static int ubifs_bulk_read(struct folio *folio) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); - pgoff_t index = page->index, last_page_read = ui->last_page_read; + pgoff_t index = folio->index, last_page_read = ui->last_page_read; struct bu_info *bu; int err = 0, allocated = 0; @@ -877,8 +858,8 @@ static int ubifs_bulk_read(struct page *page) bu->buf_len = c->max_bu_buf_len; data_key_init(c, &bu->key, inode->i_ino, - page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); - err = ubifs_do_bulk_read(c, bu, page); + folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); + err = ubifs_do_bulk_read(c, bu, folio); if (!allocated) mutex_unlock(&c->bu_mutex); @@ -890,69 +871,65 @@ out_unlock: return err; } -static int ubifs_readpage(struct file *file, struct page *page) +static int ubifs_read_folio(struct file *file, struct folio *folio) { - if (ubifs_bulk_read(page)) + if (ubifs_bulk_read(folio)) return 0; - do_readpage(page); - unlock_page(page); + do_readpage(folio); + folio_unlock(folio); return 0; } -static int do_writepage(struct page *page, int len) +static int do_writepage(struct folio *folio, size_t len) { - int err = 0, i, blen; + int err = 0, blen; unsigned int block; - void *addr; + size_t offset = 0; union ubifs_key key; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; #ifdef UBIFS_DEBUG struct ubifs_inode *ui = ubifs_inode(inode); spin_lock(&ui->ui_lock); - ubifs_assert(c, page->index <= ui->synced_i_size >> PAGE_SHIFT); + ubifs_assert(c, folio->index <= ui->synced_i_size >> PAGE_SHIFT); spin_unlock(&ui->ui_lock); #endif - /* Update radix tree tags */ - set_page_writeback(page); + folio_start_writeback(folio); - addr = kmap(page); - block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; - i = 0; - while (len) { - blen = min_t(int, len, UBIFS_BLOCK_SIZE); + block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + for (;;) { + blen = min_t(size_t, len, UBIFS_BLOCK_SIZE); data_key_init(c, &key, inode->i_ino, block); - err = ubifs_jnl_write_data(c, inode, &key, addr, blen); + err = ubifs_jnl_write_data(c, inode, &key, folio, offset, blen); if (err) break; - if (++i >= UBIFS_BLOCKS_PER_PAGE) + len -= blen; + if (!len) break; block += 1; - addr += blen; - len -= blen; + offset += blen; } if (err) { - SetPageError(page); - ubifs_err(c, "cannot write page %lu of inode %lu, error %d", - page->index, inode->i_ino, err); + mapping_set_error(folio->mapping, err); + ubifs_err(c, "cannot write folio %lu of inode %lu, error %d", + folio->index, inode->i_ino, err); ubifs_ro_mode(c, err); } - ubifs_assert(c, PagePrivate(page)); - if (PageChecked(page)) + ubifs_assert(c, folio->private != NULL); + if (folio_test_checked(folio)) release_new_page_budget(c); else release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); - ClearPageChecked(page); + folio_detach_private(folio); + folio_clear_checked(folio); - kunmap(page); - unlock_page(page); - end_page_writeback(page); + folio_unlock(folio); + folio_end_writeback(folio); return err; } @@ -1002,22 +979,20 @@ static int do_writepage(struct page *page, int len) * on the page lock and it would not write the truncated inode node to the * journal before we have finished. */ -static int ubifs_writepage(struct page *page, struct writeback_control *wbc) +static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); loff_t i_size = i_size_read(inode), synced_i_size; - pgoff_t end_index = i_size >> PAGE_SHIFT; - int err, len = i_size & (PAGE_SIZE - 1); - void *kaddr; + int err, len = folio_size(folio); dbg_gen("ino %lu, pg %lu, pg flags %#lx", - inode->i_ino, page->index, page->flags); - ubifs_assert(c, PagePrivate(page)); + inode->i_ino, folio->index, folio->flags.f); + ubifs_assert(c, folio->private != NULL); - /* Is the page fully outside @i_size? (truncate in progress) */ - if (page->index > end_index || (page->index == end_index && !len)) { + /* Is the folio fully outside @i_size? (truncate in progress) */ + if (folio_pos(folio) >= i_size) { err = 0; goto out_unlock; } @@ -1026,12 +1001,12 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) synced_i_size = ui->synced_i_size; spin_unlock(&ui->ui_lock); - /* Is the page fully inside @i_size? */ - if (page->index < end_index) { - if (page->index >= synced_i_size >> PAGE_SHIFT) { + /* Is the folio fully inside i_size? */ + if (folio_pos(folio) + len <= i_size) { + if (folio_pos(folio) + len > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) - goto out_unlock; + goto out_redirty; /* * The inode has been written, but the write-buffer has * not been synchronized, so in case of an unclean @@ -1041,34 +1016,49 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) * with this. */ } - return do_writepage(page, PAGE_SIZE); + return do_writepage(folio, len); } /* - * The page straddles @i_size. It must be zeroed out on each and every + * The folio straddles @i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped * in multiples of the page size. For a file that is not a multiple of * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page); - memset(kaddr + len, 0, PAGE_SIZE - len); - flush_dcache_page(page); - kunmap_atomic(kaddr); + len = i_size - folio_pos(folio); + folio_zero_segment(folio, len, folio_size(folio)); if (i_size > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) - goto out_unlock; + goto out_redirty; } - return do_writepage(page, len); - + return do_writepage(folio, len); +out_redirty: + /* + * folio_redirty_for_writepage() won't call ubifs_dirty_inode() because + * it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so + * there is no need to do space budget for dirty inode. + */ + folio_redirty_for_writepage(wbc, folio); out_unlock: - unlock_page(page); + folio_unlock(folio); return err; } +static int ubifs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct folio *folio = NULL; + int error; + + while ((folio = writeback_iter(mapping, wbc, folio, &error))) + error = ubifs_writepage(folio, wbc); + return error; +} + /** * do_attr_changes - change inode attributes. * @inode: inode to change attributes for @@ -1081,11 +1071,11 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr) if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; if (attr->ia_valid & ATTR_ATIME) - inode->i_atime = attr->ia_atime; + inode_set_atime_to_ts(inode, attr->ia_atime); if (attr->ia_valid & ATTR_MTIME) - inode->i_mtime = attr->ia_mtime; + inode_set_mtime_to_ts(inode, attr->ia_mtime); if (attr->ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; + inode_set_ctime_to_ts(inode, attr->ia_ctime); if (attr->ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; @@ -1102,7 +1092,9 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr) * @attr: inode attribute changes description * * This function implements VFS '->setattr()' call when the inode is truncated - * to a smaller size. Returns zero in case of success and a negative error code + * to a smaller size. + * + * Returns: %0 in case of success and a negative error code * in case of failure. */ static int do_truncation(struct ubifs_info *c, struct inode *inode, @@ -1143,11 +1135,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, if (offset) { pgoff_t index = new_size >> PAGE_SHIFT; - struct page *page; + struct folio *folio; - page = find_lock_page(inode->i_mapping, index); - if (page) { - if (PageDirty(page)) { + folio = filemap_lock_folio(inode->i_mapping, index); + if (!IS_ERR(folio)) { + if (folio_test_dirty(folio)) { /* * 'ubifs_jnl_truncate()' will try to truncate * the last data node, but it contains @@ -1156,14 +1148,14 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, * 'ubifs_jnl_truncate()' will see an already * truncated (and up to date) data node. */ - ubifs_assert(c, PagePrivate(page)); + ubifs_assert(c, folio->private != NULL); - clear_page_dirty_for_io(page); + folio_clear_dirty_for_io(folio); if (UBIFS_BLOCKS_PER_PAGE_SHIFT) - offset = new_size & - (PAGE_SIZE - 1); - err = do_writepage(page, offset); - put_page(page); + offset = offset_in_folio(folio, + new_size); + err = do_writepage(folio, offset); + folio_put(folio); if (err) goto out_budg; /* @@ -1176,8 +1168,8 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, * to 'ubifs_jnl_truncate()' to save it from * having to read it. */ - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } } } @@ -1185,7 +1177,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); ui->ui_size = inode->i_size; /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = current_time(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); /* Other attributes may be changed at the same time as well */ do_attr_changes(inode, attr); err = ubifs_jnl_truncate(c, inode, old_size, new_size); @@ -1208,7 +1200,9 @@ out_budg: * @attr: inode attribute changes description * * This function implements VFS '->setattr()' call for all cases except - * truncations to smaller size. Returns zero in case of success and a negative + * truncations to smaller size. + * + * Returns: %0 in case of success and a negative * error code in case of failure. */ static int do_setattr(struct ubifs_info *c, struct inode *inode, @@ -1232,7 +1226,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); if (attr->ia_valid & ATTR_SIZE) { /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = current_time(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); /* 'truncate_setsize()' changed @i_size, update @ui_size */ ui->ui_size = inode->i_size; } @@ -1257,7 +1251,8 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, return err; } -int ubifs_setattr(struct dentry *dentry, struct iattr *attr) +int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) { int err; struct inode *inode = d_inode(dentry); @@ -1265,7 +1260,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) dbg_gen("ino %lu, mode %#x, ia_valid %#x", inode->i_ino, inode->i_mode, attr->ia_valid); - err = setattr_prepare(dentry, attr); + err = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (err) return err; @@ -1286,25 +1281,25 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) return err; } -static void ubifs_invalidatepage(struct page *page, unsigned int offset, - unsigned int length) +static void ubifs_invalidate_folio(struct folio *folio, size_t offset, + size_t length) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; - ubifs_assert(c, PagePrivate(page)); - if (offset || length < PAGE_SIZE) - /* Partial page remains dirty */ + ubifs_assert(c, folio_test_private(folio)); + if (offset || length < folio_size(folio)) + /* Partial folio remains dirty */ return; - if (PageChecked(page)) + if (folio_test_checked(folio)) release_new_page_budget(c); else release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); - ClearPageChecked(page); + folio_detach_private(folio); + folio_clear_checked(folio); } int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) @@ -1328,7 +1323,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) inode_lock(inode); /* Synchronize the inode unless this is a 'datasync()' call. */ - if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { + if (!datasync || (inode_state_read_once(inode) & I_DIRTY_DATASYNC)) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out; @@ -1352,12 +1347,16 @@ out: * This helper function checks if the inode mtime/ctime should be updated or * not. If current values of the time-stamps are within the UBIFS inode time * granularity, they are not updated. This is an optimization. + * + * Returns: %1 if time update is needed, %0 if not */ static inline int mctime_update_needed(const struct inode *inode, const struct timespec64 *now) { - if (!timespec64_equal(&inode->i_mtime, now) || - !timespec64_equal(&inode->i_ctime, now)) + struct timespec64 ctime = inode_get_ctime(inode); + struct timespec64 mtime = inode_get_mtime(inode); + + if (!timespec64_equal(&mtime, now) || !timespec64_equal(&ctime, now)) return 1; return 0; } @@ -1365,11 +1364,14 @@ static inline int mctime_update_needed(const struct inode *inode, /** * ubifs_update_time - update time of inode. * @inode: inode to update + * @flags: time updating control flag determines updating + * which time fields of @inode * * This function updates time of the inode. + * + * Returns: %0 for success or a negative error code otherwise. */ -int ubifs_update_time(struct inode *inode, struct timespec64 *time, - int flags) +int ubifs_update_time(struct inode *inode, int flags) { struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -1377,21 +1379,17 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time, .dirtied_ino_d = ALIGN(ui->data_len, 8) }; int err, release; - if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT)) - return generic_update_time(inode, time, flags); + if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT)) { + generic_update_time(inode, flags); + return 0; + } err = ubifs_budget_space(c, &req); if (err) return err; mutex_lock(&ui->ui_mutex); - if (flags & S_ATIME) - inode->i_atime = *time; - if (flags & S_CTIME) - inode->i_ctime = *time; - if (flags & S_MTIME) - inode->i_mtime = *time; - + inode_update_timestamps(inode, flags); release = ui->dirty; __mark_inode_dirty(inode, I_DIRTY_SYNC); mutex_unlock(&ui->ui_mutex); @@ -1405,7 +1403,9 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time, * @inode: inode to update * * This function updates mtime and ctime of the inode if it is not equivalent to - * current time. Returns zero in case of success and a negative error code in + * current time. + * + * Returns: %0 in case of success and a negative error code in * case of failure. */ static int update_mctime(struct inode *inode) @@ -1424,7 +1424,7 @@ static int update_mctime(struct inode *inode) return err; mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); @@ -1444,60 +1444,46 @@ static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from) return generic_file_write_iter(iocb, from); } -static int ubifs_set_page_dirty(struct page *page) +static bool ubifs_dirty_folio(struct address_space *mapping, + struct folio *folio) { - int ret; - struct inode *inode = page->mapping->host; - struct ubifs_info *c = inode->i_sb->s_fs_info; + bool ret; + struct ubifs_info *c = mapping->host->i_sb->s_fs_info; - ret = __set_page_dirty_nobuffers(page); + ret = filemap_dirty_folio(mapping, folio); /* * An attempt to dirty a page without budgeting for it - should not * happen. */ - ubifs_assert(c, ret == 0); + ubifs_assert(c, ret == false); return ret; } -#ifdef CONFIG_MIGRATION -static int ubifs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) -{ - int rc; - - rc = migrate_page_move_mapping(mapping, newpage, page, 0); - if (rc != MIGRATEPAGE_SUCCESS) - return rc; - - if (PagePrivate(page)) { - ClearPagePrivate(page); - SetPagePrivate(newpage); - } - - if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); - else - migrate_page_states(newpage, page); - return MIGRATEPAGE_SUCCESS; -} -#endif - -static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) +static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; + if (folio_test_writeback(folio)) + return false; + /* - * An attempt to release a dirty page without budgeting for it - should - * not happen. + * Page is private but not dirty, weird? There is one condition + * making it happened. ubifs_writepage skipped the page because + * page index beyonds isize (for example. truncated by other + * process named A), then the page is invalidated by fadvise64 + * syscall before being truncated by process A. */ - if (PageWriteback(page)) - return 0; - ubifs_assert(c, PagePrivate(page)); - ubifs_assert(c, 0); - ClearPagePrivate(page); - ClearPageChecked(page); - return 1; + ubifs_assert(c, folio_test_private(folio)); + if (folio_test_checked(folio)) + release_new_page_budget(c); + else + release_existing_page_budget(c); + + atomic_long_dec(&c->dirty_pg_cnt); + folio_detach_private(folio); + folio_clear_checked(folio); + return true; } /* @@ -1506,14 +1492,14 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) */ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec64 now = current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; int err, update_time; - dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, + dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, folio->index, i_size_read(inode)); ubifs_assert(c, !c->ro_media && !c->ro_mount); @@ -1521,17 +1507,17 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) return VM_FAULT_SIGBUS; /* -EROFS */ /* - * We have not locked @page so far so we may budget for changing the - * page. Note, we cannot do this after we locked the page, because + * We have not locked @folio so far so we may budget for changing the + * folio. Note, we cannot do this after we locked the folio, because * budgeting may cause write-back which would cause deadlock. * - * At the moment we do not know whether the page is dirty or not, so we - * assume that it is not and budget for a new page. We could look at + * At the moment we do not know whether the folio is dirty or not, so we + * assume that it is not and budget for a new folio. We could look at * the @PG_private flag and figure this out, but we may race with write - * back and the page state may change by the time we lock it, so this + * back and the folio state may change by the time we lock it, so this * would need additional care. We do not bother with this at the * moment, although it might be good idea to do. Instead, we allocate - * budget for a new page and amend it later on if the page was in fact + * budget for a new folio and amend it later on if the folio was in fact * dirty. * * The budgeting-related logic of this function is similar to what we @@ -1554,21 +1540,21 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - lock_page(page); - if (unlikely(page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode))) { - /* Page got truncated out from underneath us */ + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) >= i_size_read(inode))) { + /* Folio got truncated out from underneath us */ goto sigbus; } - if (PagePrivate(page)) + if (folio->private) release_new_page_budget(c); else { - if (!PageChecked(page)) + if (!folio_test_checked(folio)) ubifs_convert_page_budget(c); - SetPagePrivate(page); + folio_attach_private(folio, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); - __set_page_dirty_nobuffers(page); + filemap_dirty_folio(folio->mapping, folio); } if (update_time) { @@ -1576,7 +1562,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) struct ubifs_inode *ui = ubifs_inode(inode); mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = current_time(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); @@ -1584,11 +1570,11 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) ubifs_release_dirty_inode_budget(c, ui); } - wait_for_stable_page(page); + folio_wait_stable(folio); return VM_FAULT_LOCKED; sigbus: - unlock_page(page); + folio_unlock(folio); ubifs_release_budget(c, &req); return VM_FAULT_SIGBUS; } @@ -1599,17 +1585,17 @@ static const struct vm_operations_struct ubifs_file_vm_ops = { .page_mkwrite = ubifs_vm_page_mkwrite, }; -static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int ubifs_file_mmap_prepare(struct vm_area_desc *desc) { int err; - err = generic_file_mmap(file, vma); + err = generic_file_mmap_prepare(desc); if (err) return err; - vma->vm_ops = &ubifs_file_vm_ops; + desc->vm_ops = &ubifs_file_vm_ops; if (IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT)) - file_accessed(file); + file_accessed(desc->file); return 0; } @@ -1629,35 +1615,42 @@ static const char *ubifs_get_link(struct dentry *dentry, return fscrypt_get_symlink(inode, ui->data, ui->data_len, done); } +static int ubifs_symlink_getattr(struct mnt_idmap *idmap, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + ubifs_getattr(idmap, path, stat, request_mask, query_flags); + + if (IS_ENCRYPTED(d_inode(path->dentry))) + return fscrypt_symlink_getattr(path, stat); + return 0; +} + const struct address_space_operations ubifs_file_address_operations = { - .readpage = ubifs_readpage, - .writepage = ubifs_writepage, + .read_folio = ubifs_read_folio, + .writepages = ubifs_writepages, .write_begin = ubifs_write_begin, .write_end = ubifs_write_end, - .invalidatepage = ubifs_invalidatepage, - .set_page_dirty = ubifs_set_page_dirty, -#ifdef CONFIG_MIGRATION - .migratepage = ubifs_migrate_page, -#endif - .releasepage = ubifs_releasepage, + .invalidate_folio = ubifs_invalidate_folio, + .dirty_folio = ubifs_dirty_folio, + .migrate_folio = filemap_migrate_folio, + .release_folio = ubifs_release_folio, }; const struct inode_operations ubifs_file_inode_operations = { .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, -#endif .update_time = ubifs_update_time, + .fileattr_get = ubifs_fileattr_get, + .fileattr_set = ubifs_fileattr_set, }; const struct inode_operations ubifs_symlink_inode_operations = { .get_link = ubifs_get_link, .setattr = ubifs_setattr, - .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR + .getattr = ubifs_symlink_getattr, .listxattr = ubifs_listxattr, -#endif .update_time = ubifs_update_time, }; @@ -1665,10 +1658,10 @@ const struct file_operations ubifs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = ubifs_write_iter, - .mmap = ubifs_file_mmap, + .mmap_prepare = ubifs_file_mmap_prepare, .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, - .splice_read = generic_file_splice_read, + .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .open = fscrypt_file_open, #ifdef CONFIG_COMPAT diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 873e6e1c92b5..643718906b9f 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -73,7 +73,7 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -82,8 +82,9 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) */ static int scan_for_dirty_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -166,8 +167,7 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, data.pick_free = pick_free; data.lnum = -1; data.exclude_index = exclude_index; - err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_for_dirty_cb, + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_dirty_cb, &data); if (err) return ERR_PTR(err); @@ -340,7 +340,7 @@ out: * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -349,8 +349,9 @@ out: */ static int scan_for_free_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -446,7 +447,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, data.pick_free = pick_free; data.lnum = -1; err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_for_free_cb, + scan_for_free_cb, &data); if (err) return ERR_PTR(err); @@ -580,7 +581,7 @@ out: * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -589,8 +590,9 @@ out: */ static int scan_for_idx_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -625,8 +627,7 @@ static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c) int err; data.lnum = -1; - err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_for_idx_cb, + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_idx_cb, &data); if (err) return ERR_PTR(err); @@ -726,11 +727,10 @@ out: return err; } -static int cmp_dirty_idx(const struct ubifs_lprops **a, - const struct ubifs_lprops **b) +static int cmp_dirty_idx(const void *a, const void *b) { - const struct ubifs_lprops *lpa = *a; - const struct ubifs_lprops *lpb = *b; + const struct ubifs_lprops *lpa = *(const struct ubifs_lprops **)a; + const struct ubifs_lprops *lpb = *(const struct ubifs_lprops **)b; return lpa->dirty + lpa->free - lpb->dirty - lpb->free; } @@ -754,7 +754,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) sizeof(void *) * c->dirty_idx.cnt); /* Sort it so that the dirtiest is now at the end */ sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *), - (int (*)(const void *, const void *))cmp_dirty_idx, NULL); + cmp_dirty_idx, NULL); dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt); if (c->dirty_idx.cnt) dbg_find("dirtiest index LEB is %d with dirty %d and free %d", @@ -773,7 +773,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) * @c: the UBIFS file-system description object * @lprops: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @arg: information passed to and from the caller of the scan * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -782,8 +782,9 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) */ static int scan_dirty_idx_cb(struct ubifs_info *c, const struct ubifs_lprops *lprops, int in_tree, - struct scan_data *data) + void *arg) { + struct scan_data *data = arg; int ret = LPT_SCAN_CONTINUE; /* Exclude LEBs that are currently in use */ @@ -842,8 +843,7 @@ static int find_dirty_idx_leb(struct ubifs_info *c) if (c->pnodes_have >= c->pnode_cnt) /* All pnodes are in memory, so skip scan */ return -ENOSPC; - err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, - (ubifs_lpt_scan_callback)scan_dirty_idx_cb, + err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_dirty_idx_cb, &data); if (err) return err; diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 62cb3db44e6e..3134d070fcc0 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -57,10 +57,6 @@ /** * switch_gc_head - switch the garbage collection journal head. * @c: UBIFS file-system description object - * @buf: buffer to write - * @len: length of the buffer to write - * @lnum: LEB number written is returned here - * @offs: offset written is returned here * * This function switch the GC head to the next LEB which is reserved in * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required, @@ -106,7 +102,8 @@ static int switch_gc_head(struct ubifs_info *c) * This function compares data nodes @a and @b. Returns %1 if @a has greater * inode or block number, and %-1 otherwise. */ -static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +static int data_nodes_cmp(void *priv, const struct list_head *a, + const struct list_head *b) { ino_t inuma, inumb; struct ubifs_info *c = priv; @@ -149,8 +146,8 @@ static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) * first and sorted by length in descending order. Directory entry nodes go * after inode nodes and are sorted in ascending hash valuer order. */ -static int nondata_nodes_cmp(void *priv, struct list_head *a, - struct list_head *b) +static int nondata_nodes_cmp(void *priv, const struct list_head *a, + const struct list_head *b) { ino_t inuma, inumb; struct ubifs_info *c = priv; @@ -695,6 +692,9 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) for (i = 0; ; i++) { int space_before, space_after; + /* Maybe continue after find and break before find */ + lp.lnum = -1; + cond_resched(); /* Give the commit an opportunity to run */ @@ -756,8 +756,19 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) * caller instead of the original '-EAGAIN'. */ err = ubifs_return_leb(c, lp.lnum); - if (err) + if (err) { ret = err; + /* + * An LEB may always be "taken", + * so setting ubifs to read-only, + * and then executing sync wbuf will + * return -EROFS and enter the "out" + * error branch. + */ + ubifs_ro_mode(c, ret); + } + /* Maybe double return LEB if goto out */ + lp.lnum = -1; break; } goto out; @@ -846,7 +857,8 @@ out: ubifs_wbuf_sync_nolock(wbuf); ubifs_ro_mode(c, ret); mutex_unlock(&wbuf->io_mutex); - ubifs_return_leb(c, lp.lnum); + if (lp.lnum != -1) + ubifs_return_leb(c, lp.lnum); return ret; } diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 7e4bfaf2871f..6c6d68242779 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -194,10 +194,29 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum) return err; } +static void record_magic_error(struct ubifs_stats_info *stats) +{ + if (stats) + stats->magic_errors++; +} + +static void record_node_error(struct ubifs_stats_info *stats) +{ + if (stats) + stats->node_errors++; +} + +static void record_crc_error(struct ubifs_stats_info *stats) +{ + if (stats) + stats->crc_errors++; +} + /** * ubifs_check_node - check node. * @c: UBIFS file-system description object * @buf: node to check + * @len: node length * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock * @quiet: print no messages @@ -222,10 +241,10 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum) * This function returns zero in case of success and %-EUCLEAN in case of bad * CRC or magic. */ -int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int must_chk_crc) +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len, + int lnum, int offs, int quiet, int must_chk_crc) { - int err = -EINVAL, type, node_len, dump_node = 1; + int err = -EINVAL, type, node_len; uint32_t crc, node_crc, magic; const struct ubifs_ch *ch = buf; @@ -237,6 +256,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, if (!quiet) ubifs_err(c, "bad magic %#08x, expected %#08x", magic, UBIFS_NODE_MAGIC); + record_magic_error(c->stats); err = -EUCLEAN; goto out; } @@ -245,6 +265,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { if (!quiet) ubifs_err(c, "bad node type %d", type); + record_node_error(c->stats); goto out; } @@ -269,6 +290,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, if (!quiet) ubifs_err(c, "bad CRC: calculated %#08x, read %#08x", crc, node_crc); + record_crc_error(c->stats); err = -EUCLEAN; goto out; } @@ -278,22 +300,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, out_len: if (!quiet) ubifs_err(c, "bad node length %d", node_len); - if (type == UBIFS_DATA_NODE && node_len > UBIFS_DATA_NODE_SZ) - dump_node = 0; out: if (!quiet) { ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); - if (dump_node) { - ubifs_dump_node(c, buf); - } else { - int safe_len = min3(node_len, c->leb_size - offs, - (int)UBIFS_MAX_DATA_NODE_SZ); - pr_err("\tprevent out-of-bounds memory access\n"); - pr_err("\ttruncated data node length %d\n", safe_len); - pr_err("\tcorrupted data node:\n"); - print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, - buf, safe_len, 0); - } + ubifs_dump_node(c, buf, len); dump_stack(); } return err; @@ -317,9 +327,7 @@ out: */ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) { - uint32_t crc; - - ubifs_assert(c, pad >= 0 && !(pad & 7)); + ubifs_assert(c, pad >= 0); if (pad >= UBIFS_PAD_NODE_SZ) { struct ubifs_ch *ch = buf; @@ -333,8 +341,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); pad -= UBIFS_PAD_NODE_SZ; pad_node->pad_len = cpu_to_le32(pad); - crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); - ch->crc = cpu_to_le32(crc); + ubifs_crc_node(buf, UBIFS_PAD_NODE_SZ); memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); } else if (pad > 0) /* Too little space, padding node won't fit */ @@ -385,7 +392,7 @@ void ubifs_init_node(struct ubifs_info *c, void *node, int len, int pad) } } -void ubifs_crc_node(struct ubifs_info *c, void *node, int len) +void ubifs_crc_node(void *node, int len) { struct ubifs_ch *ch = node; uint32_t crc; @@ -422,7 +429,7 @@ int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len, return err; } - ubifs_crc_node(c, node, len); + ubifs_crc_node(node, len); return 0; } @@ -459,7 +466,6 @@ void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) */ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) { - uint32_t crc; struct ubifs_ch *ch = node; unsigned long long sqnum = next_sqnum(c); @@ -473,12 +479,11 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) ch->group_type = UBIFS_IN_NODE_GROUP; ch->sqnum = cpu_to_le64(sqnum); ch->padding[0] = ch->padding[1] = 0; - crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); - ch->crc = cpu_to_le32(crc); + ubifs_crc_node(node, len); } /** - * wbuf_timer_callback - write-buffer timer callback function. + * wbuf_timer_callback_nolock - write-buffer timer callback function. * @timer: timer data (write-buffer descriptor) * * This function is called when the write-buffer timer expires. @@ -495,7 +500,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) } /** - * new_wbuf_timer - start new write-buffer timer. + * new_wbuf_timer_nolock - start new write-buffer timer. * @c: UBIFS file-system description object * @wbuf: write-buffer descriptor */ @@ -521,7 +526,7 @@ static void new_wbuf_timer_nolock(struct ubifs_info *c, struct ubifs_wbuf *wbuf) } /** - * cancel_wbuf_timer - cancel write-buffer timer. + * cancel_wbuf_timer_nolock - cancel write-buffer timer. * @wbuf: write-buffer descriptor */ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) @@ -730,7 +735,7 @@ out_timers: int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) { struct ubifs_info *c = wbuf->c; - int err, written, n, aligned_len = ALIGN(len, 8); + int err, n, written = 0, aligned_len = ALIGN(len, 8); dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, dbg_ntype(((struct ubifs_ch *)buf)->node_type), @@ -764,6 +769,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) * write-buffer. */ memcpy(wbuf->buf + wbuf->used, buf, len); + if (aligned_len > len) { + ubifs_assert(c, aligned_len - len < 8); + ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len); + } if (aligned_len == wbuf->avail) { dbg_io("flush jhead %s wbuf to LEB %d:%d", @@ -793,8 +802,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) goto exit; } - written = 0; - if (wbuf->used) { /* * The node is large enough and does not fit entirely within @@ -842,27 +849,58 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ n = aligned_len >> c->max_write_shift; if (n) { - n <<= c->max_write_shift; + int m = n - 1; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf + written, - wbuf->offs, n); + + if (m) { + /* '(n-1)<<c->max_write_shift < len' is always true. */ + m <<= c->max_write_shift; + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, m); + if (err) + goto out; + wbuf->offs += m; + aligned_len -= m; + len -= m; + written += m; + } + + /* + * The non-written len of buf may be less than 'n' because + * parameter 'len' is not 8 bytes aligned, so here we read + * min(len, n) bytes from buf. + */ + n = 1 << c->max_write_shift; + memcpy(wbuf->buf, buf + written, min(len, n)); + if (n > len) { + ubifs_assert(c, n - len < 8); + ubifs_pad(c, wbuf->buf + len, n - len); + } + + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); if (err) goto out; wbuf->offs += n; aligned_len -= n; - len -= n; + len -= min(len, n); written += n; } spin_lock(&wbuf->lock); - if (aligned_len) + if (aligned_len) { /* * And now we have what's left and what does not take whole * max. write unit, so write it to the write-buffer and we are * done. */ memcpy(wbuf->buf, buf + written, len); + if (aligned_len > len) { + ubifs_assert(c, aligned_len - len < 8); + ubifs_pad(c, wbuf->buf + len, aligned_len - len); + } + } if (c->leb_size - wbuf->offs >= c->max_write_size) wbuf->size = c->max_write_size; @@ -890,7 +928,7 @@ exit: out: ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d", len, wbuf->lnum, wbuf->offs, err); - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, written + len); dump_stack(); ubifs_dump_leb(c, wbuf->lnum); return err; @@ -933,7 +971,7 @@ int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum, err = ubifs_leb_write(c, lnum, buf, offs, buf_len); if (err) - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, len); return err; } @@ -1016,7 +1054,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, goto out; } - err = ubifs_check_node(c, buf, lnum, offs, 0, 0); + err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); if (err) { ubifs_err(c, "expected node type %d", type); return err; @@ -1032,7 +1070,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, out: ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, len); dump_stack(); return -EINVAL; } @@ -1046,7 +1084,7 @@ out: * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock * - * This function reads a node of known type and and length, checks it and + * This function reads a node of known type and length, checks it and * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched * and a negative error code in case of failure. */ @@ -1072,7 +1110,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, goto out; } - err = ubifs_check_node(c, buf, lnum, offs, 0, 0); + err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); if (err) { ubifs_errc(c, "expected node type %d", type); return err; @@ -1090,7 +1128,7 @@ out: ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, ubi_is_mapped(c->ubi, lnum)); if (!c->probing) { - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, len); dump_stack(); } return -EINVAL; @@ -1136,8 +1174,7 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) wbuf->c = c; wbuf->next_ino = 0; - hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - wbuf->timer.function = wbuf_timer_callback_nolock; + hrtimer_setup(&wbuf->timer, wbuf_timer_callback_nolock, CLOCK_MONOTONIC, HRTIMER_MODE_REL); return 0; } diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 3df9be2c684c..79536b2e3d7a 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -14,6 +14,7 @@ #include <linux/compat.h> #include <linux/mount.h> +#include <linux/fileattr.h> #include "ubifs.h" /* Need to be kept consistent with checked flags in ioctl2ubifs() */ @@ -103,26 +104,21 @@ static int ubifs2ioctl(int ubifs_flags) static int setflags(struct inode *inode, int flags) { - int oldflags, err, release; + int err, release; struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_budget_req req = { .dirtied_ino = 1, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; err = ubifs_budget_space(c, &req); if (err) return err; mutex_lock(&ui->ui_mutex); - oldflags = ubifs2ioctl(ui->flags); - err = vfs_ioc_setflags_prepare(inode, oldflags, flags); - if (err) - goto out_unlock; - ui->flags &= ~ioctl2ubifs(UBIFS_SETTABLE_IOCTL_FLAGS); ui->flags |= ioctl2ubifs(flags); ubifs_set_inode_flags(inode); - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); @@ -132,55 +128,52 @@ static int setflags(struct inode *inode, int flags) if (IS_SYNC(inode)) err = write_inode_now(inode, 1); return err; +} -out_unlock: - ubifs_err(c, "can't modify inode %lu attributes", inode->i_ino); - mutex_unlock(&ui->ui_mutex); - ubifs_release_budget(c, &req); - return err; +int ubifs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) +{ + struct inode *inode = d_inode(dentry); + int flags = ubifs2ioctl(ubifs_inode(inode)->flags); + + if (d_is_special(dentry)) + return -ENOTTY; + + dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags); + fileattr_fill_flags(fa, flags); + + return 0; } -long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +int ubifs_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa) { - int flags, err; - struct inode *inode = file_inode(file); + struct inode *inode = d_inode(dentry); + int flags = fa->flags; - switch (cmd) { - case FS_IOC_GETFLAGS: - flags = ubifs2ioctl(ubifs_inode(inode)->flags); + if (d_is_special(dentry)) + return -ENOTTY; - dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags); - return put_user(flags, (int __user *) arg); + if (fileattr_has_fsx(fa)) + return -EOPNOTSUPP; - case FS_IOC_SETFLAGS: { - if (IS_RDONLY(inode)) - return -EROFS; + if (flags & ~UBIFS_GETTABLE_IOCTL_FLAGS) + return -EOPNOTSUPP; - if (!inode_owner_or_capable(inode)) - return -EACCES; + flags &= UBIFS_SETTABLE_IOCTL_FLAGS; - if (get_user(flags, (int __user *) arg)) - return -EFAULT; + if (!S_ISDIR(inode->i_mode)) + flags &= ~FS_DIRSYNC_FL; - if (flags & ~UBIFS_GETTABLE_IOCTL_FLAGS) - return -EOPNOTSUPP; - flags &= UBIFS_SETTABLE_IOCTL_FLAGS; + dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags); + return setflags(inode, flags); +} - if (!S_ISDIR(inode->i_mode)) - flags &= ~FS_DIRSYNC_FL; +long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int err; + struct inode *inode = file_inode(file); - /* - * Make sure the file-system is read-write and make sure it - * will not become read-only while we are changing the flags. - */ - err = mnt_want_write_file(file); - if (err) - return err; - dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags); - err = setflags(inode, flags); - mnt_drop_write_file(file); - return err; - } + switch (cmd) { case FS_IOC_SET_ENCRYPTION_POLICY: { struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -220,12 +213,6 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { - case FS_IOC32_GETFLAGS: - cmd = FS_IOC_GETFLAGS; - break; - case FS_IOC32_SETFLAGS: - cmd = FS_IOC_SETFLAGS; - break; case FS_IOC_SET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY_EX: diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index e5ec1afe1c66..e28ab4395e5c 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -293,6 +293,96 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, } /** + * __queue_and_wait - queue a task and wait until the task is waked up. + * @c: UBIFS file-system description object + * + * This function adds current task in queue and waits until the task is waked + * up. This function should be called with @c->reserve_space_wq locked. + */ +static void __queue_and_wait(struct ubifs_info *c) +{ + DEFINE_WAIT(wait); + + __add_wait_queue_entry_tail_exclusive(&c->reserve_space_wq, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&c->reserve_space_wq.lock); + + schedule(); + finish_wait(&c->reserve_space_wq, &wait); +} + +/** + * wait_for_reservation - try queuing current task to wait until waked up. + * @c: UBIFS file-system description object + * + * This function queues current task to wait until waked up, if queuing is + * started(@c->need_wait_space is not %0). Returns %true if current task is + * added in queue, otherwise %false is returned. + */ +static bool wait_for_reservation(struct ubifs_info *c) +{ + if (likely(atomic_read(&c->need_wait_space) == 0)) + /* Quick path to check whether queuing is started. */ + return false; + + spin_lock(&c->reserve_space_wq.lock); + if (atomic_read(&c->need_wait_space) == 0) { + /* Queuing is not started, don't queue current task. */ + spin_unlock(&c->reserve_space_wq.lock); + return false; + } + + __queue_and_wait(c); + return true; +} + +/** + * wake_up_reservation - wake up first task in queue or stop queuing. + * @c: UBIFS file-system description object + * + * This function wakes up the first task in queue if it exists, or stops + * queuing if no tasks in queue. + */ +static void wake_up_reservation(struct ubifs_info *c) +{ + spin_lock(&c->reserve_space_wq.lock); + if (waitqueue_active(&c->reserve_space_wq)) + wake_up_locked(&c->reserve_space_wq); + else + /* + * Compared with wait_for_reservation(), set @c->need_wait_space + * under the protection of wait queue lock, which can avoid that + * @c->need_wait_space is set to 0 after new task queued. + */ + atomic_set(&c->need_wait_space, 0); + spin_unlock(&c->reserve_space_wq.lock); +} + +/** + * add_or_start_queue - add current task in queue or start queuing. + * @c: UBIFS file-system description object + * + * This function starts queuing if queuing is not started, otherwise adds + * current task in queue. + */ +static void add_or_start_queue(struct ubifs_info *c) +{ + spin_lock(&c->reserve_space_wq.lock); + if (atomic_cmpxchg(&c->need_wait_space, 0, 1) == 0) { + /* Starts queuing, task can go on directly. */ + spin_unlock(&c->reserve_space_wq.lock); + return; + } + + /* + * There are at least two tasks have retried more than 32 times + * at certain point, first task has started queuing, just queue + * the left tasks. + */ + __queue_and_wait(c); +} + +/** * make_reservation - reserve journal space. * @c: UBIFS file-system description object * @jhead: journal head @@ -311,33 +401,27 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, static int make_reservation(struct ubifs_info *c, int jhead, int len) { int err, cmt_retries = 0, nospc_retries = 0; + bool blocked = wait_for_reservation(c); again: down_read(&c->commit_sem); err = reserve_space(c, jhead, len); - if (!err) + if (!err) { /* c->commit_sem will get released via finish_reservation(). */ - return 0; + goto out_wake_up; + } up_read(&c->commit_sem); if (err == -ENOSPC) { /* * GC could not make any progress. We should try to commit - * once because it could make some dirty space and GC would - * make progress, so make the error -EAGAIN so that the below + * because it could make some dirty space and GC would make + * progress, so make the error -EAGAIN so that the below * will commit and re-try. */ - if (nospc_retries++ < 2) { - dbg_jnl("no space, retry"); - err = -EAGAIN; - } - - /* - * This means that the budgeting is incorrect. We always have - * to be able to write to the media, because all operations are - * budgeted. Deletions are not budgeted, though, but we reserve - * an extra LEB for them. - */ + nospc_retries++; + dbg_jnl("no space, retry"); + err = -EAGAIN; } if (err != -EAGAIN) @@ -349,15 +433,37 @@ again: */ if (cmt_retries > 128) { /* - * This should not happen unless the journal size limitations - * are too tough. + * This should not happen unless: + * 1. The journal size limitations are too tough. + * 2. The budgeting is incorrect. We always have to be able to + * write to the media, because all operations are budgeted. + * Deletions are not budgeted, though, but we reserve an + * extra LEB for them. */ - ubifs_err(c, "stuck in space allocation"); + ubifs_err(c, "stuck in space allocation, nospc_retries %d", + nospc_retries); err = -ENOSPC; goto out; - } else if (cmt_retries > 32) - ubifs_warn(c, "too many space allocation re-tries (%d)", - cmt_retries); + } else if (cmt_retries > 32) { + /* + * It's almost impossible to happen, unless there are many tasks + * making reservation concurrently and someone task has retried + * gc + commit for many times, generated available space during + * this period are grabbed by other tasks. + * But if it happens, start queuing up all tasks that will make + * space reservation, then there is only one task making space + * reservation at any time, and it can always make success under + * the premise of correct budgeting. + */ + ubifs_warn(c, "too many space allocation cmt_retries (%d) " + "nospc_retries (%d), start queuing tasks", + cmt_retries, nospc_retries); + + if (!blocked) { + blocked = true; + add_or_start_queue(c); + } + } dbg_jnl("-EAGAIN, commit and retry (retried %d times)", cmt_retries); @@ -365,7 +471,7 @@ again: err = ubifs_run_commit(c); if (err) - return err; + goto out_wake_up; goto again; out: @@ -380,6 +486,27 @@ out: cmt_retries = dbg_check_lprops(c); up_write(&c->commit_sem); } +out_wake_up: + if (blocked) { + /* + * Only tasks that have ever started queuing or ever been queued + * can wake up other queued tasks, which can make sure that + * there is only one task waked up to make space reservation. + * For example: + * task A task B task C + * make_reservation make_reservation + * reserve_space // 0 + * wake_up_reservation + * atomic_cmpxchg // 0, start queuing + * reserve_space + * wait_for_reservation + * __queue_and_wait + * add_wait_queue + * if (blocked) // false + * // So that task C won't be waked up to race with task B + */ + wake_up_reservation(c); + } return err; } @@ -452,12 +579,12 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, ino->ch.node_type = UBIFS_INO_NODE; ino_key_init_flash(c, &ino->key, inode->i_ino); ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum); - ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec); - ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); - ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec); - ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); - ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); - ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + ino->atime_sec = cpu_to_le64(inode_get_atime_sec(inode)); + ino->atime_nsec = cpu_to_le32(inode_get_atime_nsec(inode)); + ino->ctime_sec = cpu_to_le64(inode_get_ctime_sec(inode)); + ino->ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); + ino->mtime_sec = cpu_to_le64(inode_get_mtime_sec(inode)); + ino->mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); ino->uid = cpu_to_le32(i_uid_read(inode)); ino->gid = cpu_to_le32(i_gid_read(inode)); ino->mode = cpu_to_le32(inode->i_mode); @@ -503,7 +630,7 @@ static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui) static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) { if (c->double_hash) - dent->cookie = (__force __le32) prandom_u32(); + dent->cookie = (__force __le32) get_random_u32(); else dent->cookie = 0; } @@ -516,6 +643,7 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) * @inode: inode to update * @deletion: indicates a directory entry deletion i.e unlink or rmdir * @xent: non-zero if the directory entry is an extended attribute entry + * @in_orphan: indicates whether the @inode is in orphan list * * This function updates an inode by writing a directory entry (or extended * attribute entry), the inode itself, and the parent directory inode (or the @@ -537,9 +665,9 @@ static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent) */ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, const struct fscrypt_name *nm, const struct inode *inode, - int deletion, int xent) + int deletion, int xent, int in_orphan) { - int err, dlen, ilen, len, lnum, ino_offs, dent_offs; + int err, dlen, ilen, len, lnum, ino_offs, dent_offs, orphan_added = 0; int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); int last_reference = !!(deletion && inode->i_nlink == 0); struct ubifs_inode *ui = ubifs_inode(inode); @@ -623,13 +751,14 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_release; - if (last_reference) { + if (last_reference && !in_orphan) { err = ubifs_add_orphan(c, inode->i_ino); if (err) { release_head(c, BASEHD); goto out_finish; } ui->del_cmtno = c->cmt_no; + orphan_added = 1; } err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); @@ -678,6 +807,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (err) goto out_ro; + if (in_orphan && inode->i_nlink) + ubifs_delete_orphan(c, inode->i_ino); + finish_reservation(c); spin_lock(&ui->ui_lock); ui->synced_i_size = ui->ui_size; @@ -702,7 +834,7 @@ out_release: kfree(dent); out_ro: ubifs_ro_mode(c, err); - if (last_reference) + if (orphan_added) ubifs_delete_orphan(c, inode->i_ino); finish_reservation(c); return err; @@ -713,14 +845,16 @@ out_ro: * @c: UBIFS file-system description object * @inode: inode the data node belongs to * @key: node key - * @buf: buffer to write + * @folio: buffer to write + * @offset: offset to write at * @len: data length (must not exceed %UBIFS_BLOCK_SIZE) * * This function writes a data node to the journal. Returns %0 if the data node * was successfully written, and a negative error code in case of failure. */ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, - const union ubifs_key *key, const void *buf, int len) + const union ubifs_key *key, struct folio *folio, + size_t offset, int len) { struct ubifs_data_node *data; int err, lnum, offs, compr_type, out_len, compr_len, auth_len; @@ -764,7 +898,8 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, compr_type = ui->compr_type; out_len = compr_len = dlen - UBIFS_DATA_NODE_SZ; - ubifs_compress(c, buf, len, &data->data, &compr_len, &compr_type); + ubifs_compress_folio(c, folio, offset, len, &data->data, &compr_len, + &compr_type); ubifs_assert(c, compr_len <= UBIFS_BLOCK_SIZE); if (encrypted) { @@ -849,6 +984,13 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink); + if (kill_xattrs && ui->xattr_cnt > ubifs_xattr_max_cnt(c)) { + ubifs_err(c, "Cannot delete inode, it has too many xattrs!"); + err = -EPERM; + ubifs_ro_mode(c, err); + return err; + } + /* * If the inode is being deleted, do not write the attached data. No * need to synchronize the write-buffer either. @@ -880,11 +1022,6 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) struct inode *xino; struct ubifs_dent_node *xent, *pxent = NULL; - if (ui->xattr_cnt >= ubifs_xattr_max_cnt(c)) { - ubifs_err(c, "Cannot delete inode, it has too much xattrs!"); - goto out_release; - } - lowest_xent_key(c, &key, inode->i_ino); while (1) { xent = ubifs_tnc_next_ent(c, &key, &nm); @@ -893,6 +1030,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) if (err == -ENOENT) break; + kfree(pxent); goto out_release; } @@ -905,6 +1043,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) ubifs_err(c, "dead directory entry '%s', error %d", xent->name, err); ubifs_ro_mode(c, err); + kfree(pxent); kfree(xent); goto out_release; } @@ -935,8 +1074,6 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) inode->i_ino); release_head(c, BASEHD); - ubifs_add_auth_dirt(c, lnum); - if (last_reference) { err = ubifs_tnc_remove_ino(c, inode->i_ino); if (err) @@ -946,6 +1083,8 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) } else { union ubifs_key key; + ubifs_add_auth_dirt(c, lnum); + ino_key_init(c, &key, inode->i_ino); err = ubifs_tnc_add(c, &key, lnum, offs, ilen, hash); } @@ -1197,15 +1336,20 @@ out_free: * ubifs_jnl_rename - rename a directory entry. * @c: UBIFS file-system description object * @old_dir: parent inode of directory entry to rename - * @old_dentry: directory entry to rename + * @old_inode: directory entry's inode to rename + * @old_nm: name of the old directory entry to rename * @new_dir: parent inode of directory entry to rename - * @new_dentry: new directory entry (or directory entry to replace) + * @new_inode: new directory entry's inode (or directory entry's inode to + * replace) + * @new_nm: new name of the new directory entry + * @whiteout: whiteout inode * @sync: non-zero if the write-buffer has to be synchronized + * @delete_orphan: indicates an orphan entry deletion for @whiteout * * This function implements the re-name operation which may involve writing up - * to 4 inodes and 2 directory entries. It marks the written inodes as clean - * and returns zero on success. In case of failure, a negative error code is - * returned. + * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) + * and 2 directory entries. It marks the written inodes as clean and returns + * zero on success. In case of failure, a negative error code is returned. */ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *old_inode, @@ -1213,19 +1357,20 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *new_dir, const struct inode *new_inode, const struct fscrypt_name *new_nm, - const struct inode *whiteout, int sync) + const struct inode *whiteout, int sync, int delete_orphan) { void *p; union ubifs_key key; struct ubifs_dent_node *dent, *dent2; - int err, dlen1, dlen2, ilen, lnum, offs, len; + int err, dlen1, dlen2, ilen, wlen, lnum, offs, len, orphan_added = 0; int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; int last_reference = !!(new_inode && new_inode->i_nlink == 0); int move = (old_dir != new_dir); - struct ubifs_inode *uninitialized_var(new_ui); + struct ubifs_inode *new_ui, *whiteout_ui; u8 hash_old_dir[UBIFS_HASH_ARR_SZ]; u8 hash_new_dir[UBIFS_HASH_ARR_SZ]; u8 hash_new_inode[UBIFS_HASH_ARR_SZ]; + u8 hash_whiteout_inode[UBIFS_HASH_ARR_SZ]; u8 hash_dent1[UBIFS_HASH_ARR_SZ]; u8 hash_dent2[UBIFS_HASH_ARR_SZ]; @@ -1245,9 +1390,20 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, } else ilen = 0; + if (whiteout) { + whiteout_ui = ubifs_inode(whiteout); + ubifs_assert(c, mutex_is_locked(&whiteout_ui->ui_mutex)); + ubifs_assert(c, whiteout->i_nlink == 1); + ubifs_assert(c, !whiteout_ui->dirty); + wlen = UBIFS_INO_NODE_SZ; + wlen += whiteout_ui->data_len; + } else + wlen = 0; + aligned_dlen1 = ALIGN(dlen1, 8); aligned_dlen2 = ALIGN(dlen2, 8); - len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); + len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + + ALIGN(wlen, 8) + ALIGN(plen, 8); if (move) len += plen; @@ -1309,6 +1465,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, p += ALIGN(ilen, 8); } + if (whiteout) { + pack_inode(c, p, whiteout, 0); + err = ubifs_node_calc_hash(c, p, hash_whiteout_inode); + if (err) + goto out_release; + + p += ALIGN(wlen, 8); + } + if (!move) { pack_inode(c, p, old_dir, 1); err = ubifs_node_calc_hash(c, p, hash_old_dir); @@ -1334,6 +1499,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, goto out_finish; } new_ui->del_cmtno = c->cmt_no; + orphan_added = 1; } err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); @@ -1347,6 +1513,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, if (new_inode) ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, new_inode->i_ino); + if (whiteout) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, + whiteout->i_ino); } release_head(c, BASEHD); @@ -1363,8 +1532,6 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm); if (err) goto out_ro; - - ubifs_delete_orphan(c, whiteout->i_ino); } else { err = ubifs_add_dirt(c, lnum, dlen2); if (err) @@ -1385,6 +1552,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, offs += ALIGN(ilen, 8); } + if (whiteout) { + ino_key_init(c, &key, whiteout->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, wlen, + hash_whiteout_inode); + if (err) + goto out_ro; + offs += ALIGN(wlen, 8); + } + ino_key_init(c, &key, old_dir->i_ino); err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir); if (err) @@ -1398,6 +1574,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, goto out_ro; } + if (delete_orphan) + ubifs_delete_orphan(c, whiteout->i_ino); + finish_reservation(c); if (new_inode) { mark_inode_clean(c, new_ui); @@ -1405,6 +1584,11 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, new_ui->synced_i_size = new_ui->ui_size; spin_unlock(&new_ui->ui_lock); } + /* + * No need to mark whiteout inode clean. + * Whiteout doesn't have non-zero size, no need to update + * synced_i_size for whiteout_ui. + */ mark_inode_clean(c, ubifs_inode(old_dir)); if (move) mark_inode_clean(c, ubifs_inode(new_dir)); @@ -1415,7 +1599,7 @@ out_release: release_head(c, BASEHD); out_ro: ubifs_ro_mode(c, err); - if (last_reference) + if (orphan_added) ubifs_delete_orphan(c, new_inode->i_ino); out_finish: finish_reservation(c); @@ -1427,27 +1611,29 @@ out_free: /** * truncate_data_node - re-compress/encrypt a truncated data node. * @c: UBIFS file-system description object - * @inode: inode which referes to the data node + * @inode: inode which refers to the data node * @block: data block number * @dn: data node to re-compress * @new_len: new length + * @dn_size: size of the data node @dn in memory * * This function is used when an inode is truncated and the last data node of * the inode has to be re-compressed/encrypted and re-written. */ static int truncate_data_node(const struct ubifs_info *c, const struct inode *inode, unsigned int block, struct ubifs_data_node *dn, - int *new_len) + int *new_len, int dn_size) { void *buf; - int err, dlen, compr_type, out_len, old_dlen; + int err, dlen, compr_type, out_len, data_size; out_len = le32_to_cpu(dn->size); - buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS); + buf = kmalloc(out_len, GFP_NOFS); if (!buf) return -ENOMEM; - dlen = old_dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + data_size = dn_size - UBIFS_DATA_NODE_SZ; compr_type = le16_to_cpu(dn->compr_type); if (IS_ENCRYPTED(inode)) { @@ -1467,11 +1653,11 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in } if (IS_ENCRYPTED(inode)) { - err = ubifs_encrypt(inode, dn, out_len, &old_dlen, block); + err = ubifs_encrypt(inode, dn, out_len, &data_size, block); if (err) goto out; - out_len = old_dlen; + out_len = data_size; } else { dn->compr_size = 0; } @@ -1507,8 +1693,9 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, union ubifs_key key, to_key; struct ubifs_ino_node *ino; struct ubifs_trun_node *trun; - struct ubifs_data_node *uninitialized_var(dn); + struct ubifs_data_node *dn; int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode); + int dn_size; struct ubifs_inode *ui = ubifs_inode(inode); ino_t inum = inode->i_ino; unsigned int blk; @@ -1521,10 +1708,13 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, ubifs_assert(c, S_ISREG(inode->i_mode)); ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); - sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ + - UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR; + dn_size = COMPRESSED_DATA_NODE_BUF_SZ; + + if (IS_ENCRYPTED(inode)) + dn_size += UBIFS_CIPHER_BLOCK_SIZE; - sz += ubifs_auth_node_sz(c); + sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ + + dn_size + ubifs_auth_node_sz(c); ino = kmalloc(sz, GFP_NOFS); if (!ino) @@ -1555,14 +1745,16 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) { ubifs_err(c, "bad data node (block %u, inode %lu)", blk, inode->i_ino); - ubifs_dump_node(c, dn); + ubifs_dump_node(c, dn, dn_size); + err = -EUCLEAN; goto out_free; } if (dn_len <= dlen) dlen = 0; /* Nothing to do */ else { - err = truncate_data_node(c, inode, blk, dn, &dlen); + err = truncate_data_node(c, inode, blk, dn, + &dlen, dn_size); if (err) goto out_free; } @@ -1796,7 +1988,6 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, u8 hash[UBIFS_HASH_ARR_SZ]; dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino); - ubifs_assert(c, host->i_nlink > 0); ubifs_assert(c, inode->i_nlink > 0); ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex)); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 22bfda158f7f..8788740ec57f 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -269,7 +269,7 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, break; /* No more room on heap so make it un-categorized */ cat = LPROPS_UNCAT; - /* Fall through */ + fallthrough; case LPROPS_UNCAT: list_add(&lprops->list, &c->uncat_list); break; @@ -313,7 +313,7 @@ static void ubifs_remove_from_cat(struct ubifs_info *c, case LPROPS_FREEABLE: c->freeable_cnt -= 1; ubifs_assert(c, c->freeable_cnt >= 0); - /* Fall through */ + fallthrough; case LPROPS_UNCAT: case LPROPS_EMPTY: case LPROPS_FRDI_IDX: @@ -1005,7 +1005,7 @@ out: * @c: the UBIFS file-system description object * @lp: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @lst: lprops statistics to update + * @arg: lprops statistics to update * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -1014,8 +1014,9 @@ out: */ static int scan_check_cb(struct ubifs_info *c, const struct ubifs_lprops *lp, int in_tree, - struct ubifs_lp_stats *lst) + void *arg) { + struct ubifs_lp_stats *lst = arg; struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; @@ -1269,8 +1270,7 @@ int dbg_check_lprops(struct ubifs_info *c) memset(&lst, 0, sizeof(struct ubifs_lp_stats)); err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, - (ubifs_lpt_scan_callback)scan_check_cb, - &lst); + scan_check_cb, &lst); if (err && err != -ENOSPC) goto out; diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index e21abf250951..dde0aa3287f4 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -275,7 +275,7 @@ uint32_t ubifs_unpack_bits(const struct ubifs_info *c, uint8_t **addr, int *pos, const int k = 32 - nrbits; uint8_t *p = *addr; int b = *pos; - uint32_t uninitialized_var(val); + uint32_t val; const int bytes = (nrbits + b + 7) >> 3; ubifs_assert(c, nrbits > 0); @@ -628,8 +628,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL); nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL); buf = vmalloc(c->leb_size); - ltab = vmalloc(array_size(sizeof(struct ubifs_lpt_lprops), - c->lpt_lebs)); + ltab = vmalloc_array(c->lpt_lebs, + sizeof(struct ubifs_lpt_lprops)); if (!pnode || !nnode || !buf || !ltab || !lsave) { err = -ENOMEM; goto out; @@ -851,7 +851,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, dbg_lp("lsave_sz %d", c->lsave_sz); dbg_lp("lsave_cnt %d", c->lsave_cnt); dbg_lp("lpt_hght %d", c->lpt_hght); - dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("big_lpt %u", c->big_lpt); dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); @@ -1777,8 +1777,8 @@ static int lpt_init_rd(struct ubifs_info *c) { int err, i; - c->ltab = vmalloc(array_size(sizeof(struct ubifs_lpt_lprops), - c->lpt_lebs)); + c->ltab = vmalloc_array(c->lpt_lebs, + sizeof(struct ubifs_lpt_lprops)); if (!c->ltab) return -ENOMEM; @@ -1824,7 +1824,7 @@ static int lpt_init_rd(struct ubifs_info *c) dbg_lp("lsave_sz %d", c->lsave_sz); dbg_lp("lsave_cnt %d", c->lsave_cnt); dbg_lp("lpt_hght %d", c->lpt_hght); - dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("big_lpt %u", c->big_lpt); dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); @@ -1846,8 +1846,8 @@ static int lpt_init_wr(struct ubifs_info *c) { int err, i; - c->ltab_cmt = vmalloc(array_size(sizeof(struct ubifs_lpt_lprops), - c->lpt_lebs)); + c->ltab_cmt = vmalloc_array(c->lpt_lebs, + sizeof(struct ubifs_lpt_lprops)); if (!c->ltab_cmt) return -ENOMEM; @@ -1918,6 +1918,7 @@ out_err: * @pnode: where to keep a pnode * @cnode: where to keep a cnode * @in_tree: is the node in the tree in memory + * @ptr: union of node pointers * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in * the tree * @ptr.pnode: ditto for pnode diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index d76a19e460cd..f2cb214581fd 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -577,7 +577,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, /* Go right */ nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return (void *)nnode; + return ERR_CAST(nnode); /* Go down to level 1 */ while (nnode->level > 1) { @@ -594,7 +594,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, } nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return (void *)nnode; + return ERR_CAST(nnode); } for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) @@ -1646,7 +1646,6 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) len -= node_len; } - err = 0; out: vfree(buf); return err; @@ -1933,7 +1932,6 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); out: vfree(buf); - return; } /** @@ -1970,28 +1968,28 @@ static int dbg_populate_lsave(struct ubifs_info *c) if (!dbg_is_chk_gen(c)) return 0; - if (prandom_u32() & 3) + if (get_random_u32_below(4)) return 0; for (i = 0; i < c->lsave_cnt; i++) c->lsave[i] = c->main_first; list_for_each_entry(lprops, &c->empty_list, list) - c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum; list_for_each_entry(lprops, &c->freeable_list, list) - c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum; list_for_each_entry(lprops, &c->frdi_idx_list, list) - c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum; heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; for (i = 0; i < heap->cnt; i++) - c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum; heap = &c->lpt_heap[LPROPS_DIRTY - 1]; for (i = 0; i < heap->cnt; i++) - c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum; heap = &c->lpt_heap[LPROPS_FREE - 1]; for (i = 0; i < heap->cnt; i++) - c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum; + c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum; return 1; } diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 911d0555b9f2..a148760fa49e 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -37,7 +37,7 @@ int ubifs_compare_master_node(struct ubifs_info *c, void *m1, void *m2) return ret; /* - * Do not compare the embedded HMAC aswell which also must be different + * Do not compare the embedded HMAC as well which also must be different * due to the different common node header. */ behind = hmac_offs + UBIFS_MAX_HMAC_LEN; @@ -67,10 +67,13 @@ static int mst_node_check_hash(const struct ubifs_info *c, { u8 calc[UBIFS_MAX_HASH_LEN]; const void *node = mst; + int ret; - crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch), + ret = crypto_shash_tfm_digest(c->hash_tfm, node + sizeof(struct ubifs_ch), UBIFS_MST_NODE_SZ - sizeof(struct ubifs_ch), calc); + if (ret) + return ret; if (ubifs_check_hash(c, expected, calc)) return -EPERM; @@ -314,7 +317,7 @@ static int validate_master(const struct ubifs_info *c) out: ubifs_err(c, "bad master node at offset %d error %d", c->mst_offs, err); - ubifs_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node, c->mst_node_alsz); return -EINVAL; } @@ -392,7 +395,7 @@ int ubifs_read_master(struct ubifs_info *c) if (c->leb_cnt < old_leb_cnt || c->leb_cnt < UBIFS_MIN_LEB_CNT) { ubifs_err(c, "bad leb_cnt on master node"); - ubifs_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node, c->mst_node_alsz); return -EINVAL; } diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index c97a4d537d83..615878e884ba 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -121,7 +121,7 @@ static inline const char *ubifs_compr_name(struct ubifs_info *c, int compr_type) * ubifs_wbuf_sync - synchronize write-buffer. * @wbuf: write-buffer to synchronize * - * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume + * This is the same as 'ubifs_wbuf_sync_nolock()' but it does not assume * that the write-buffer is already locked. */ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 2c294085ffed..5555dd740889 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -42,24 +42,30 @@ static int dbg_check_orphans(struct ubifs_info *c); -static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, - struct ubifs_orphan *parent_orphan) +/** + * ubifs_add_orphan - add an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * Add an orphan. This function is called when an inodes link count drops to + * zero. + */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) { struct ubifs_orphan *orphan, *o; struct rb_node **p, *parent = NULL; orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); if (!orphan) - return ERR_PTR(-ENOMEM); + return -ENOMEM; orphan->inum = inum; orphan->new = 1; - INIT_LIST_HEAD(&orphan->child_list); spin_lock(&c->orphan_lock); if (c->tot_orphans >= c->max_orphans) { spin_unlock(&c->orphan_lock); kfree(orphan); - return ERR_PTR(-ENFILE); + return -ENFILE; } p = &c->orph_tree.rb_node; while (*p) { @@ -70,10 +76,10 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, else if (inum > o->inum) p = &(*p)->rb_right; else { - ubifs_err(c, "orphaned twice"); + ubifs_err(c, "ino %lu orphaned twice", (unsigned long)inum); spin_unlock(&c->orphan_lock); kfree(orphan); - return ERR_PTR(-EINVAL); + return -EINVAL; } } c->tot_orphans += 1; @@ -83,14 +89,9 @@ static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum, list_add_tail(&orphan->list, &c->orph_list); list_add_tail(&orphan->new_list, &c->orph_new); - if (parent_orphan) { - list_add_tail(&orphan->child_list, - &parent_orphan->child_list); - } - spin_unlock(&c->orphan_lock); dbg_gen("ino %lu", (unsigned long)inum); - return orphan; + return 0; } static struct ubifs_orphan *lookup_orphan(struct ubifs_info *c, ino_t inum) @@ -135,6 +136,7 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) if (orph->cmt) { orph->del = 1; + rb_erase(&orph->rb, &c->orph_tree); orph->dnext = c->orph_dnext; c->orph_dnext = orph; dbg_gen("delete later ino %lu", (unsigned long)orph->inum); @@ -145,57 +147,6 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) } /** - * ubifs_add_orphan - add an orphan. - * @c: UBIFS file-system description object - * @inum: orphan inode number - * - * Add an orphan. This function is called when an inodes link count drops to - * zero. - */ -int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) -{ - int err = 0; - ino_t xattr_inum; - union ubifs_key key; - struct ubifs_dent_node *xent, *pxent = NULL; - struct fscrypt_name nm = {0}; - struct ubifs_orphan *xattr_orphan; - struct ubifs_orphan *orphan; - - orphan = orphan_add(c, inum, NULL); - if (IS_ERR(orphan)) - return PTR_ERR(orphan); - - lowest_xent_key(c, &key, inum); - while (1) { - xent = ubifs_tnc_next_ent(c, &key, &nm); - if (IS_ERR(xent)) { - err = PTR_ERR(xent); - if (err == -ENOENT) - break; - return err; - } - - fname_name(&nm) = xent->name; - fname_len(&nm) = le16_to_cpu(xent->nlen); - xattr_inum = le64_to_cpu(xent->inum); - - xattr_orphan = orphan_add(c, xattr_inum, orphan); - if (IS_ERR(xattr_orphan)) { - kfree(xent); - return PTR_ERR(xattr_orphan); - } - - kfree(pxent); - pxent = xent; - key_read(c, &xent->key, &key); - } - kfree(pxent); - - return 0; -} - -/** * ubifs_delete_orphan - delete an orphan. * @c: UBIFS file-system description object * @inum: orphan inode number @@ -204,7 +155,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) */ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) { - struct ubifs_orphan *orph, *child_orph, *tmp_o; + struct ubifs_orphan *orph; spin_lock(&c->orphan_lock); @@ -217,11 +168,6 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) return; } - list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) { - list_del(&child_orph->child_list); - orphan_delete(c, child_orph); - } - orphan_delete(c, orph); spin_unlock(&c->orphan_lock); @@ -516,7 +462,6 @@ static void erase_deleted(struct ubifs_info *c) dnext = orphan->dnext; ubifs_assert(c, !orphan->new); ubifs_assert(c, orphan->del); - rb_erase(&orphan->rb, &c->orph_tree); list_del(&orphan->list); c->tot_orphans -= 1; dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum); @@ -569,51 +514,6 @@ int ubifs_clear_orphans(struct ubifs_info *c) } /** - * insert_dead_orphan - insert an orphan. - * @c: UBIFS file-system description object - * @inum: orphan inode number - * - * This function is a helper to the 'do_kill_orphans()' function. The orphan - * must be kept until the next commit, so it is added to the rb-tree and the - * deletion list. - */ -static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) -{ - struct ubifs_orphan *orphan, *o; - struct rb_node **p, *parent = NULL; - - orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); - if (!orphan) - return -ENOMEM; - orphan->inum = inum; - - p = &c->orph_tree.rb_node; - while (*p) { - parent = *p; - o = rb_entry(parent, struct ubifs_orphan, rb); - if (inum < o->inum) - p = &(*p)->rb_left; - else if (inum > o->inum) - p = &(*p)->rb_right; - else { - /* Already added - no problem */ - kfree(orphan); - return 0; - } - } - c->tot_orphans += 1; - rb_link_node(&orphan->rb, parent, p); - rb_insert_color(&orphan->rb, &c->orph_tree); - list_add_tail(&orphan->list, &c->orph_list); - orphan->del = 1; - orphan->dnext = c->orph_dnext; - c->orph_dnext = orphan; - dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, - c->new_orphans, c->tot_orphans); - return 0; -} - -/** * do_kill_orphans - remove orphan inodes from the index. * @c: UBIFS file-system description object * @sleb: scanned LEB @@ -644,7 +544,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, if (snod->type != UBIFS_ORPH_NODE) { ubifs_err(c, "invalid node type %d in orphan area at %d:%d", snod->type, sleb->lnum, snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, + c->leb_size - snod->offs); err = -EINVAL; goto out_free; } @@ -672,7 +573,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, if (!first) { ubifs_err(c, "out of order commit number %llu in orphan node at %d:%d", cmt_no, sleb->lnum, snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, + c->leb_size - snod->offs); err = -EINVAL; goto out_free; } @@ -687,12 +589,12 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; for (i = 0; i < n; i++) { - union ubifs_key key1, key2; + union ubifs_key key; inum = le64_to_cpu(orph->inos[i]); - ino_key_init(c, &key1, inum); - err = ubifs_tnc_lookup(c, &key1, ino); + ino_key_init(c, &key, inum); + err = ubifs_tnc_lookup(c, &key, ino); if (err && err != -ENOENT) goto out_free; @@ -704,17 +606,10 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, dbg_rcvry("deleting orphaned inode %lu", (unsigned long)inum); - lowest_ino_key(c, &key1, inum); - highest_ino_key(c, &key2, inum); - - err = ubifs_tnc_remove_range(c, &key1, &key2); + err = ubifs_tnc_remove_ino(c, inum); if (err) goto out_ro; } - - err = insert_dead_orphan(c, inum); - if (err) - goto out_free; } *last_cmt_no = cmt_no; @@ -921,8 +816,12 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, inum = key_inum(c, &zbr->key); if (inum != ci->last_ino) { - /* Lowest node type is the inode node, so it comes first */ - if (key_type(c, &zbr->key) != UBIFS_INO_KEY) + /* + * Lowest node type is the inode node or xattr entry(when + * selinux/encryption is enabled), so it comes first + */ + if (key_type(c, &zbr->key) != UBIFS_INO_KEY && + key_type(c, &zbr->key) != UBIFS_XENT_KEY) ubifs_err(c, "found orphan node ino %lu, type %d", (unsigned long)inum, key_type(c, &zbr->key)); ci->last_ino = inum; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index f116f7b3f9e5..b36dc9b032f4 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -352,11 +352,11 @@ out_free: ubifs_err(c, "failed to recover master node"); if (mst1) { ubifs_err(c, "dumping first master node"); - ubifs_dump_node(c, mst1); + ubifs_dump_node(c, mst1, c->leb_size - ((void *)mst1 - buf1)); } if (mst2) { ubifs_err(c, "dumping second master node"); - ubifs_dump_node(c, mst2); + ubifs_dump_node(c, mst2, c->leb_size - ((void *)mst2 - buf2)); } vfree(buf2); vfree(buf1); @@ -469,7 +469,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, * The area after the common header size is not empty, so the common * header must be intact. Check it. */ - if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { + if (ubifs_check_node(c, buf, len, lnum, offs, 1, 0) != -EUCLEAN) { dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); return 0; } @@ -1406,7 +1406,6 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) union ubifs_key key; int err, lnum, offs, len; loff_t i_size; - uint32_t crc; /* Locate the inode node LEB number and offset */ ino_key_init(c, &key, e->inum); @@ -1428,8 +1427,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) ino = c->sbuf + offs; ino->size = cpu_to_le64(e->d_size); len = le32_to_cpu(ino->ch.len); - crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8); - ino->ch.crc = cpu_to_le32(crc); + ubifs_crc_node((void *)ino, len); /* Work out where data in the LEB ends and free space begins */ p = c->sbuf; len = c->leb_size - 1; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index b69ffac7e415..a950c5f2560e 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -23,13 +23,13 @@ #include "ubifs.h" #include <linux/list_sort.h> #include <crypto/hash.h> -#include <crypto/algapi.h> /** * struct replay_entry - replay list entry. * @lnum: logical eraseblock number of the node * @offs: node offset * @len: node length + * @hash: node hash * @deletion: non-zero if this entry corresponds to a node deletion * @sqnum: node sequence number * @list: links the replay list @@ -106,7 +106,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) * property values should be @lp->free == @c->leb_size and * @lp->dirty == 0, but that is not the case. The reason is that * the LEB had been garbage collected before it became the bud, - * and there was not commit inbetween. The garbage collector + * and there was no commit in between. The garbage collector * resets the free and dirty space without recording it * anywhere except lprops, so if there was no commit then * lprops does not have that information. @@ -223,7 +223,8 @@ static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino) */ list_for_each_entry_reverse(r, &c->replay_list, list) { ubifs_assert(c, r->sqnum >= rino->sqnum); - if (key_inum(c, &r->key) == key_inum(c, &rino->key)) + if (key_inum(c, &r->key) == key_inum(c, &rino->key) && + key_type(c, &r->key) == UBIFS_INO_KEY) return r->deletion == 0; } @@ -295,11 +296,11 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) * @b: second replay entry * * This is a comparios function for 'list_sort()' which compares 2 replay - * entries @a and @b by comparing their sequence numer. Returns %1 if @a has + * entries @a and @b by comparing their sequence number. Returns %1 if @a has * greater sequence number and %-1 otherwise. */ -static int replay_entries_cmp(void *priv, struct list_head *a, - struct list_head *b) +static int replay_entries_cmp(void *priv, const struct list_head *a, + const struct list_head *b) { struct ubifs_info *c = priv; struct replay_entry *ra, *rb; @@ -365,6 +366,7 @@ static void destroy_replay_list(struct ubifs_info *c) * @lnum: node logical eraseblock number * @offs: node offset * @len: node length + * @hash: node hash * @key: node key * @sqnum: sequence number * @deletion: non-zero if this is a deletion @@ -417,6 +419,7 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, * @lnum: node logical eraseblock number * @offs: node offset * @len: node length + * @hash: node hash * @key: node key * @name: directory entry name * @nlen: directory entry name length @@ -559,7 +562,9 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) } /* authenticate_sleb_hash is split out for stack usage */ -static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_hash, u8 *hash) +static int noinline_for_stack +authenticate_sleb_hash(struct ubifs_info *c, + struct shash_desc *log_hash, u8 *hash) { SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm); @@ -574,7 +579,7 @@ static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_h * @c: UBIFS file-system description object * @sleb: the scan LEB to authenticate * @log_hash: - * @is_last: if true, this is is the last LEB + * @is_last: if true, this is the last LEB * * This function iterates over the buds of a single LEB authenticating all buds * with the authentication nodes on this LEB. Authentication nodes are written @@ -827,7 +832,7 @@ out: out_dump: ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); ubifs_scan_destroy(sleb); return -EINVAL; } @@ -931,8 +936,6 @@ out: * validate_ref - validate a reference node. * @c: UBIFS file-system description object * @ref: the reference node to validate - * @ref_lnum: LEB number of the reference node - * @ref_offs: reference node offset * * This function returns %1 if a bud reference already exists for the LEB. %0 is * returned if the reference node is new, otherwise %-EINVAL is returned if @@ -1125,7 +1128,7 @@ out: out_dump: ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", lnum, offs + snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); ubifs_scan_destroy(sleb); return -EINVAL; } diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 4b4b65b48c57..e7693b94e5b5 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -53,6 +53,9 @@ static int get_default_compressor(struct ubifs_info *c) { + if (ubifs_compr_present(c, UBIFS_COMPR_ZSTD)) + return UBIFS_COMPR_ZSTD; + if (ubifs_compr_present(c, UBIFS_COMPR_LZO)) return UBIFS_COMPR_LZO; @@ -174,7 +177,8 @@ static int create_default_filesystem(struct ubifs_info *c) tmp64 = (long long)max_buds * c->leb_size; if (big_lpt) sup_flags |= UBIFS_FLG_BIGLPT; - sup_flags |= UBIFS_FLG_DOUBLE_HASH; + if (ubifs_default_version > 4) + sup_flags |= UBIFS_FLG_DOUBLE_HASH; if (ubifs_authenticated(c)) { sup_flags |= UBIFS_FLG_AUTHENTICATION; @@ -200,7 +204,7 @@ static int create_default_filesystem(struct ubifs_info *c) sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT); sup->fanout = cpu_to_le32(DEFAULT_FANOUT); sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); - sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); + sup->fmt_version = cpu_to_le32(ubifs_default_version); sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); if (c->mount_opts.override_compr) sup->default_compr = cpu_to_le16(c->mount_opts.compr_type); @@ -502,7 +506,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) failed: ubifs_err(c, "bad superblock, error %d", err); - ubifs_dump_node(c, sup); + ubifs_dump_node(c, sup, ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size)); return -EINVAL; } diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index c69cdb5e65bc..84a9157dcc32 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -76,7 +76,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, dbg_scan("scanning %s at LEB %d:%d", dbg_ntype(ch->node_type), lnum, offs); - if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) + if (ubifs_check_node(c, buf, len, lnum, offs, quiet, 1)) return SCANNED_A_CORRUPT_NODE; if (ch->node_type == UBIFS_PAD_NODE) { @@ -90,7 +90,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (!quiet) { ubifs_err(c, "bad pad node at LEB %d:%d", lnum, offs); - ubifs_dump_node(c, pad); + ubifs_dump_node(c, pad, len); } return SCANNED_A_BAD_PAD_NODE; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7fc2f3f07c16..f453c37cee37 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -19,13 +19,31 @@ #include <linux/module.h> #include <linux/ctype.h> #include <linux/kthread.h> -#include <linux/parser.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/seq_file.h> -#include <linux/mount.h> #include <linux/math64.h> #include <linux/writeback.h> #include "ubifs.h" +static int ubifs_default_version_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < 4 || n > UBIFS_FORMAT_VERSION) + return -EINVAL; + return param_set_int(val, kp); +} + +static const struct kernel_param_ops ubifs_default_version_ops = { + .set = ubifs_default_version_set, + .get = param_get_int, +}; + +int ubifs_default_version = UBIFS_FORMAT_VERSION; +module_param_cb(default_version, &ubifs_default_version_ops, &ubifs_default_version, 0600); + /* * Maximum amount of memory we may 'kmalloc()' without worrying that we are * allocating too much. @@ -36,11 +54,7 @@ static struct kmem_cache *ubifs_inode_slab; /* UBIFS TNC shrinker description */ -static struct shrinker ubifs_shrinker_info = { - .scan_objects = ubifs_shrink_scan, - .count_objects = ubifs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *ubifs_shrinker_info; /** * validate_inode - validate inode. @@ -100,7 +114,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) inode = iget_locked(sb, inum); if (!inode) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + if (!(inode_state_read_once(inode) & I_NEW)) return inode; ui = ubifs_inode(inode); @@ -124,12 +138,12 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) set_nlink(inode, le32_to_cpu(ino->nlink)); i_uid_write(inode, le32_to_cpu(ino->uid)); i_gid_write(inode, le32_to_cpu(ino->gid)); - inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); - inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); - inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); - inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec); - inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec); - inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec); + inode_set_atime(inode, (int64_t)le64_to_cpu(ino->atime_sec), + le32_to_cpu(ino->atime_nsec)); + inode_set_mtime(inode, (int64_t)le64_to_cpu(ino->mtime_sec), + le32_to_cpu(ino->mtime_nsec)); + inode_set_ctime(inode, (int64_t)le64_to_cpu(ino->ctime_sec), + le32_to_cpu(ino->ctime_nsec)); inode->i_mode = le32_to_cpu(ino->mode); inode->i_size = le64_to_cpu(ino->size); @@ -235,7 +249,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) out_invalid: ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err); - ubifs_dump_node(c, ino); + ubifs_dump_node(c, ino, UBIFS_MAX_INO_NODE_SZ); ubifs_dump_inode(c, inode); err = -EINVAL; out_ino: @@ -250,13 +264,14 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb) { struct ubifs_inode *ui; - ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS); + ui = alloc_inode_sb(sb, ubifs_inode_slab, GFP_NOFS); if (!ui) return NULL; memset((void *)ui + sizeof(struct inode), 0, sizeof(struct ubifs_inode) - sizeof(struct inode)); mutex_init(&ui->ui_mutex); + init_rwsem(&ui->xattr_sem); spin_lock_init(&ui->ui_lock); return &ui->vfs_inode; }; @@ -320,7 +335,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) static int ubifs_drop_inode(struct inode *inode) { - int drop = generic_drop_inode(inode); + int drop = inode_generic_drop(inode); if (!drop) drop = fscrypt_drop_inode(inode); @@ -343,7 +358,7 @@ static void ubifs_evict_inode(struct inode *inode) goto out; dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); - ubifs_assert(c, !atomic_read(&inode->i_count)); + ubifs_assert(c, !icount_read(inode)); truncate_inode_pages_final(&inode->i_data); @@ -758,10 +773,10 @@ static void init_constants_master(struct ubifs_info *c) * necessary to report something for the 'statfs()' call. * * Subtract the LEB reserved for GC, the LEB which is reserved for - * deletions, minimum LEBs for the index, and assume only one journal - * head is available. + * deletions, minimum LEBs for the index, the LEBs which are reserved + * for each journal head. */ - tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1; + tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt; tmp64 *= (long long)c->leb_size - c->leb_overhead; tmp64 = ubifs_reported_space(c, tmp64); c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; @@ -814,14 +829,16 @@ static int alloc_wbufs(struct ubifs_info *c) INIT_LIST_HEAD(&c->jheads[i].buds_list); err = ubifs_wbuf_init(c, &c->jheads[i].wbuf); if (err) - return err; + goto out_wbuf; c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; c->jheads[i].wbuf.jhead = i; c->jheads[i].grouped = 1; c->jheads[i].log_hash = ubifs_hash_get_desc(c); - if (IS_ERR(c->jheads[i].log_hash)) - goto out; + if (IS_ERR(c->jheads[i].log_hash)) { + err = PTR_ERR(c->jheads[i].log_hash); + goto out_log_hash; + } } /* @@ -833,9 +850,18 @@ static int alloc_wbufs(struct ubifs_info *c) return 0; -out: - while (i--) +out_log_hash: + kfree(c->jheads[i].wbuf.buf); + kfree(c->jheads[i].wbuf.inodes); + +out_wbuf: + while (i--) { + kfree(c->jheads[i].wbuf.buf); + kfree(c->jheads[i].wbuf.inodes); kfree(c->jheads[i].log_hash); + } + kfree(c->jheads); + c->jheads = NULL; return err; } @@ -893,8 +919,10 @@ static void free_buds(struct ubifs_info *c) { struct ubifs_bud *bud, *n; - rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb) + rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb) { + kfree(bud->log_hash); kfree(bud); + } } /** @@ -953,176 +981,137 @@ enum { Opt_auth_key, Opt_auth_hash_name, Opt_ignore, - Opt_err, }; -static const match_table_t tokens = { - {Opt_fast_unmount, "fast_unmount"}, - {Opt_norm_unmount, "norm_unmount"}, - {Opt_bulk_read, "bulk_read"}, - {Opt_no_bulk_read, "no_bulk_read"}, - {Opt_chk_data_crc, "chk_data_crc"}, - {Opt_no_chk_data_crc, "no_chk_data_crc"}, - {Opt_override_compr, "compr=%s"}, - {Opt_auth_key, "auth_key=%s"}, - {Opt_auth_hash_name, "auth_hash_name=%s"}, - {Opt_ignore, "ubi=%s"}, - {Opt_ignore, "vol=%s"}, - {Opt_assert, "assert=%s"}, - {Opt_err, NULL}, +static const struct constant_table ubifs_param_compr[] = { + { "none", UBIFS_COMPR_NONE }, + { "lzo", UBIFS_COMPR_LZO }, + { "zlib", UBIFS_COMPR_ZLIB }, + { "zstd", UBIFS_COMPR_ZSTD }, + {} }; -/** - * parse_standard_option - parse a standard mount option. - * @option: the option to parse - * - * Normally, standard mount options like "sync" are passed to file-systems as - * flags. However, when a "rootflags=" kernel boot parameter is used, they may - * be present in the options string. This function tries to deal with this - * situation and parse standard options. Returns 0 if the option was not - * recognized, and the corresponding integer flag if it was. - * - * UBIFS is only interested in the "sync" option, so do not check for anything - * else. - */ -static int parse_standard_option(const char *option) -{ +static const struct constant_table ubifs_param_assert[] = { + { "report", ASSACT_REPORT }, + { "read-only", ASSACT_RO }, + { "panic", ASSACT_PANIC }, + {} +}; - pr_notice("UBIFS: parse %s\n", option); - if (!strcmp(option, "sync")) - return SB_SYNCHRONOUS; - return 0; -} +static const struct fs_parameter_spec ubifs_fs_param_spec[] = { + fsparam_flag ("fast_unmount", Opt_fast_unmount), + fsparam_flag ("norm_unmount", Opt_norm_unmount), + fsparam_flag ("bulk_read", Opt_bulk_read), + fsparam_flag ("no_bulk_read", Opt_no_bulk_read), + fsparam_flag ("chk_data_crc", Opt_chk_data_crc), + fsparam_flag ("no_chk_data_crc", Opt_no_chk_data_crc), + fsparam_enum ("compr", Opt_override_compr, ubifs_param_compr), + fsparam_enum ("assert", Opt_assert, ubifs_param_assert), + fsparam_string ("auth_key", Opt_auth_key), + fsparam_string ("auth_hash_name", Opt_auth_hash_name), + fsparam_string ("ubi", Opt_ignore), + fsparam_string ("vol", Opt_ignore), + {} +}; + +struct ubifs_fs_context { + struct ubifs_mount_opts mount_opts; + char *auth_key_name; + char *auth_hash_name; + unsigned int no_chk_data_crc:1; + unsigned int bulk_read:1; + unsigned int default_compr:2; + unsigned int assert_action:2; +}; /** - * ubifs_parse_options - parse mount parameters. - * @c: UBIFS file-system description object - * @options: parameters to parse - * @is_remount: non-zero if this is FS re-mount + * ubifs_parse_param - parse a parameter. + * @fc: the filesystem context + * @param: the parameter to parse * * This function parses UBIFS mount options and returns zero in case success * and a negative error code in case of failure. */ -static int ubifs_parse_options(struct ubifs_info *c, char *options, - int is_remount) +static int ubifs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - char *p; - substring_t args[MAX_OPT_ARGS]; - - if (!options) - return 0; - - while ((p = strsep(&options, ","))) { - int token; + struct ubifs_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + bool is_remount = (fc->purpose & FS_CONTEXT_FOR_RECONFIGURE); + int opt; - if (!*p) - continue; + opt = fs_parse(fc, ubifs_fs_param_spec, param, &result); + if (opt < 0) + return opt; - token = match_token(p, tokens, args); - switch (token) { + switch (opt) { /* * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. * We accept them in order to be backward-compatible. But this * should be removed at some point. */ - case Opt_fast_unmount: - c->mount_opts.unmount_mode = 2; - break; - case Opt_norm_unmount: - c->mount_opts.unmount_mode = 1; - break; - case Opt_bulk_read: - c->mount_opts.bulk_read = 2; - c->bulk_read = 1; - break; - case Opt_no_bulk_read: - c->mount_opts.bulk_read = 1; - c->bulk_read = 0; - break; - case Opt_chk_data_crc: - c->mount_opts.chk_data_crc = 2; - c->no_chk_data_crc = 0; - break; - case Opt_no_chk_data_crc: - c->mount_opts.chk_data_crc = 1; - c->no_chk_data_crc = 1; - break; - case Opt_override_compr: - { - char *name = match_strdup(&args[0]); - - if (!name) - return -ENOMEM; - if (!strcmp(name, "none")) - c->mount_opts.compr_type = UBIFS_COMPR_NONE; - else if (!strcmp(name, "lzo")) - c->mount_opts.compr_type = UBIFS_COMPR_LZO; - else if (!strcmp(name, "zlib")) - c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; - else if (!strcmp(name, "zstd")) - c->mount_opts.compr_type = UBIFS_COMPR_ZSTD; - else { - ubifs_err(c, "unknown compressor \"%s\"", name); //FIXME: is c ready? - kfree(name); - return -EINVAL; - } - kfree(name); - c->mount_opts.override_compr = 1; - c->default_compr = c->mount_opts.compr_type; - break; - } - case Opt_assert: - { - char *act = match_strdup(&args[0]); - - if (!act) - return -ENOMEM; - if (!strcmp(act, "report")) - c->assert_action = ASSACT_REPORT; - else if (!strcmp(act, "read-only")) - c->assert_action = ASSACT_RO; - else if (!strcmp(act, "panic")) - c->assert_action = ASSACT_PANIC; - else { - ubifs_err(c, "unknown assert action \"%s\"", act); - kfree(act); - return -EINVAL; - } - kfree(act); - break; - } - case Opt_auth_key: - c->auth_key_name = kstrdup(args[0].from, GFP_KERNEL); - if (!c->auth_key_name) - return -ENOMEM; - break; - case Opt_auth_hash_name: - c->auth_hash_name = kstrdup(args[0].from, GFP_KERNEL); - if (!c->auth_hash_name) - return -ENOMEM; - break; - case Opt_ignore: - break; - default: - { - unsigned long flag; - struct super_block *sb = c->vfs_sb; - - flag = parse_standard_option(p); - if (!flag) { - ubifs_err(c, "unrecognized mount option \"%s\" or missing value", - p); - return -EINVAL; - } - sb->s_flags |= flag; - break; + case Opt_fast_unmount: + ctx->mount_opts.unmount_mode = 2; + break; + case Opt_norm_unmount: + ctx->mount_opts.unmount_mode = 1; + break; + case Opt_bulk_read: + ctx->mount_opts.bulk_read = 2; + ctx->bulk_read = 1; + break; + case Opt_no_bulk_read: + ctx->mount_opts.bulk_read = 1; + ctx->bulk_read = 0; + break; + case Opt_chk_data_crc: + ctx->mount_opts.chk_data_crc = 2; + ctx->no_chk_data_crc = 0; + break; + case Opt_no_chk_data_crc: + ctx->mount_opts.chk_data_crc = 1; + ctx->no_chk_data_crc = 1; + break; + case Opt_override_compr: + ctx->mount_opts.compr_type = result.uint_32; + ctx->mount_opts.override_compr = 1; + ctx->default_compr = ctx->mount_opts.compr_type; + break; + case Opt_assert: + ctx->assert_action = result.uint_32; + break; + case Opt_auth_key: + if (!is_remount) { + kfree(ctx->auth_key_name); + ctx->auth_key_name = param->string; + param->string = NULL; } + break; + case Opt_auth_hash_name: + if (!is_remount) { + kfree(ctx->auth_hash_name); + ctx->auth_hash_name = param->string; + param->string = NULL; } + break; + case Opt_ignore: + break; } return 0; } +/* + * ubifs_release_options - release mount parameters which have been dumped. + * @c: UBIFS file-system description object + */ +static void ubifs_release_options(struct ubifs_info *c) +{ + kfree(c->auth_key_name); + c->auth_key_name = NULL; + kfree(c->auth_hash_name); + c->auth_hash_name = NULL; +} + /** * destroy_journal - destroy journal data structures. * @c: UBIFS file-system description object @@ -1145,6 +1134,7 @@ static void destroy_journal(struct ubifs_info *c) bud = list_entry(c->old_buds.next, struct ubifs_bud, list); list_del(&bud->list); + kfree(bud->log_hash); kfree(bud); } ubifs_destroy_idx_gc(c); @@ -1225,6 +1215,10 @@ static int mount_ubifs(struct ubifs_info *c) if (err) return err; + err = ubifs_sysfs_register(c); + if (err) + goto out_debugging; + err = check_volume_empty(c); if (err) goto out_free; @@ -1295,7 +1289,7 @@ static int mount_ubifs(struct ubifs_info *c) err = ubifs_read_superblock(c); if (err) - goto out_free; + goto out_auth; c->probing = 0; @@ -1307,18 +1301,18 @@ static int mount_ubifs(struct ubifs_info *c) ubifs_err(c, "'compressor \"%s\" is not compiled in", ubifs_compr_name(c, c->default_compr)); err = -ENOTSUPP; - goto out_free; + goto out_auth; } err = init_constants_sb(c); if (err) - goto out_free; + goto out_auth; sz = ALIGN(c->max_idx_node_sz, c->min_io_size) * 2; c->cbuf = kmalloc(sz, GFP_NOFS); if (!c->cbuf) { err = -ENOMEM; - goto out_free; + goto out_auth; } err = alloc_wbufs(c); @@ -1328,7 +1322,7 @@ static int mount_ubifs(struct ubifs_info *c) sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); if (!c->ro_mount) { /* Create background thread */ - c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + c->bgt = kthread_run(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { err = PTR_ERR(c->bgt); c->bgt = NULL; @@ -1336,7 +1330,6 @@ static int mount_ubifs(struct ubifs_info *c) c->bgt_name, err); goto out_wbufs; } - wake_up_process(c->bgt); } err = ubifs_read_master(c); @@ -1514,8 +1507,8 @@ static int mount_ubifs(struct ubifs_info *c) ubifs_msg(c, "LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", c->leb_size, c->leb_size >> 10, c->min_io_size, c->max_write_size); - ubifs_msg(c, "FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)", - x, x >> 20, c->main_lebs, + ubifs_msg(c, "FS size: %lld bytes (%lld MiB, %d LEBs), max %d LEBs, journal size %lld bytes (%lld MiB, %d LEBs)", + x, x >> 20, c->main_lebs, c->max_leb_cnt, y, y >> 20, c->log_lebs + c->max_bud_cnt); ubifs_msg(c, "reserved for root: %llu bytes (%llu KiB)", c->report_rp_size, c->report_rp_size >> 10); @@ -1536,7 +1529,7 @@ static int mount_ubifs(struct ubifs_info *c) dbg_gen("main area LEBs: %d (%d - %d)", c->main_lebs, c->main_first, c->leb_cnt - 1); dbg_gen("index LEBs: %d", c->lst.idx_lebs); - dbg_gen("total index bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("total index bytes: %llu (%llu KiB, %llu MiB)", c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, c->bi.old_idx_sz >> 20); dbg_gen("key hash type: %d", c->key_hash_type); @@ -1593,6 +1586,8 @@ out_wbufs: free_wbufs(c); out_cbuf: kfree(c->cbuf); +out_auth: + ubifs_exit_authentication(c); out_free: kfree(c->write_reserve_buf); kfree(c->bu.buf); @@ -1600,6 +1595,8 @@ out_free: vfree(c->sbuf); kfree(c->bottom_up_buf); kfree(c->sup_node); + ubifs_sysfs_unregister(c); +out_debugging: ubifs_debugging_exit(c); return err; } @@ -1632,8 +1629,7 @@ static void ubifs_umount(struct ubifs_info *c) ubifs_lpt_free(c, 0); ubifs_exit_authentication(c); - kfree(c->auth_key_name); - kfree(c->auth_hash_name); + ubifs_release_options(c); kfree(c->cbuf); kfree(c->rcvrd_mst_node); kfree(c->mst_node); @@ -1644,6 +1640,7 @@ static void ubifs_umount(struct ubifs_info *c) kfree(c->bottom_up_buf); kfree(c->sup_node); ubifs_debugging_exit(c); + ubifs_sysfs_unregister(c); } /** @@ -1740,7 +1737,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) goto out; /* Create background thread */ - c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + c->bgt = kthread_run(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { err = PTR_ERR(c->bgt); c->bgt = NULL; @@ -1748,7 +1745,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->bgt_name, err); goto out; } - wake_up_process(c->bgt); c->orph_buf = vmalloc(c->leb_size); if (!c->orph_buf) { @@ -1813,7 +1809,6 @@ out: kthread_stop(c->bgt); c->bgt = NULL; } - free_wbufs(c); kfree(c->write_reserve_buf); c->write_reserve_buf = NULL; vfree(c->ileb_buf); @@ -1951,21 +1946,27 @@ static void ubifs_put_super(struct super_block *sb) mutex_unlock(&c->umount_mutex); } -static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) +static int ubifs_reconfigure(struct fs_context *fc) { + struct ubifs_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; int err; struct ubifs_info *c = sb->s_fs_info; sync_filesystem(sb); - dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); + dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, fc->sb_flags); - err = ubifs_parse_options(c, data, 1); - if (err) { - ubifs_err(c, "invalid or unknown remount parameter"); - return err; - } + /* + * Apply the mount option changes. + * auth_key_name and auth_hash_name are ignored on remount. + */ + c->mount_opts = ctx->mount_opts; + c->bulk_read = ctx->bulk_read; + c->no_chk_data_crc = ctx->no_chk_data_crc; + c->default_compr = ctx->default_compr; + c->assert_action = ctx->assert_action; - if (c->ro_mount && !(*flags & SB_RDONLY)) { + if (c->ro_mount && !(fc->sb_flags & SB_RDONLY)) { if (c->ro_error) { ubifs_msg(c, "cannot re-mount R/W due to prior errors"); return -EROFS; @@ -1977,7 +1978,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) err = ubifs_remount_rw(c); if (err) return err; - } else if (!c->ro_mount && (*flags & SB_RDONLY)) { + } else if (!c->ro_mount && (fc->sb_flags & SB_RDONLY)) { if (c->ro_error) { ubifs_msg(c, "cannot re-mount R/O due to prior errors"); return -EROFS; @@ -2010,18 +2011,17 @@ const struct super_operations ubifs_super_operations = { .evict_inode = ubifs_evict_inode, .statfs = ubifs_statfs, .dirty_inode = ubifs_dirty_inode, - .remount_fs = ubifs_remount_fs, .show_options = ubifs_show_options, .sync_fs = ubifs_sync_fs, }; /** * open_ubi - parse UBI device name string and open the UBI device. - * @name: UBI volume name + * @fc: The filesystem context * @mode: UBI volume open mode * * The primary method of mounting UBIFS is by specifying the UBI volume - * character device node path. However, UBIFS may also be mounted withoug any + * character device node path. However, UBIFS may also be mounted without any * character device node using one of the following methods: * * o ubiX_Y - mount UBI device number X, volume Y; @@ -2034,15 +2034,13 @@ const struct super_operations ubifs_super_operations = { * returns UBI volume description object in case of success and a negative * error code in case of failure. */ -static struct ubi_volume_desc *open_ubi(const char *name, int mode) +static struct ubi_volume_desc *open_ubi(struct fs_context *fc, int mode) { struct ubi_volume_desc *ubi; + const char *name = fc->source; int dev, vol; char *endptr; - if (!name || !*name) - return ERR_PTR(-EINVAL); - /* First, try to open using the device node path method */ ubi = ubi_open_volume_path(name, mode); if (!IS_ERR(ubi)) @@ -2050,14 +2048,14 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) /* Try the "nodev" method */ if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') - return ERR_PTR(-EINVAL); + goto invalid_source; /* ubi:NAME method */ if ((name[3] == ':' || name[3] == '!') && name[4] != '\0') return ubi_open_volume_nm(0, name + 4, mode); if (!isdigit(name[3])) - return ERR_PTR(-EINVAL); + goto invalid_source; dev = simple_strtoul(name + 3, &endptr, 0); @@ -2069,7 +2067,7 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) if (*endptr == '_' && isdigit(endptr[1])) { vol = simple_strtoul(endptr + 1, &endptr, 0); if (*endptr != '\0') - return ERR_PTR(-EINVAL); + goto invalid_source; return ubi_open_volume(dev, vol, mode); } @@ -2077,7 +2075,8 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0') return ubi_open_volume_nm(dev, ++endptr, mode); - return ERR_PTR(-EINVAL); +invalid_source: + return ERR_PTR(invalf(fc, "Invalid source name")); } static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) @@ -2099,6 +2098,8 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) mutex_init(&c->bu_mutex); mutex_init(&c->write_reserve_mutex); init_waitqueue_head(&c->cmt_wq); + init_waitqueue_head(&c->reserve_space_wq); + atomic_set(&c->need_wait_space, 0); c->buds = RB_ROOT; c->old_idx = RB_ROOT; c->size_tree = RB_ROOT; @@ -2127,9 +2128,10 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) return c; } -static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +static int ubifs_fill_super(struct super_block *sb, struct fs_context *fc) { struct ubifs_info *c = sb->s_fs_info; + struct ubifs_fs_context *ctx = fc->fs_private; struct inode *root; int err; @@ -2141,13 +2143,22 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out; } - err = ubifs_parse_options(c, data, 0); - if (err) - goto out_close; + /* Copy in parsed mount options */ + c->mount_opts = ctx->mount_opts; + c->auth_key_name = ctx->auth_key_name; + c->auth_hash_name = ctx->auth_hash_name; + c->no_chk_data_crc = ctx->no_chk_data_crc; + c->bulk_read = ctx->bulk_read; + c->default_compr = ctx->default_compr; + c->assert_action = ctx->assert_action; + + /* ubifs_info owns auth strings now */ + ctx->auth_key_name = NULL; + ctx->auth_hash_name = NULL; /* * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For - * UBIFS, I/O is not deferred, it is done immediately in readpage, + * UBIFS, I/O is not deferred, it is done immediately in read_folio, * which means the user would have to wait not just for their own I/O * but the read-ahead I/O as well i.e. completely pointless. * @@ -2159,6 +2170,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) c->vi.vol_id); if (err) goto out_close; + sb->s_bdi->ra_pages = 0; + sb->s_bdi->io_pages = 0; sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; @@ -2168,9 +2181,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) if (c->max_inode_sz > MAX_LFS_FILESIZE) sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; sb->s_op = &ubifs_super_operations; -#ifdef CONFIG_UBIFS_FS_XATTR sb->s_xattr = ubifs_xattr_handlers; -#endif fscrypt_set_ops(sb, &ubifs_crypt_operations); mutex_lock(&c->umount_mutex); @@ -2187,12 +2198,17 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out_umount; } + generic_set_sb_d_ops(sb); sb->s_root = d_make_root(root); if (!sb->s_root) { err = -ENOMEM; goto out_umount; } + super_set_uuid(sb, c->uuid, sizeof(c->uuid)); + super_set_sysfs_name_generic(sb, UBIFS_DFS_DIR_NAME, + c->vi.ubi_num, c->vi.vol_id); + mutex_unlock(&c->umount_mutex); return 0; @@ -2201,46 +2217,44 @@ out_umount: out_unlock: mutex_unlock(&c->umount_mutex); out_close: + ubifs_release_options(c); ubi_close_volume(c->ubi); out: return err; } -static int sb_test(struct super_block *sb, void *data) +static int sb_test(struct super_block *sb, struct fs_context *fc) { - struct ubifs_info *c1 = data; + struct ubifs_info *c1 = fc->s_fs_info; struct ubifs_info *c = sb->s_fs_info; return c->vi.cdev == c1->vi.cdev; } -static int sb_set(struct super_block *sb, void *data) -{ - sb->s_fs_info = data; - return set_anon_super(sb, NULL); -} - -static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, - const char *name, void *data) +static int ubifs_get_tree(struct fs_context *fc) { struct ubi_volume_desc *ubi; struct ubifs_info *c; struct super_block *sb; int err; - dbg_gen("name %s, flags %#x", name, flags); + if (!fc->source || !*fc->source) + return invalf(fc, "No source specified"); + + dbg_gen("name %s, flags %#x", fc->source, fc->sb_flags); /* * Get UBI device number and volume ID. Mount it read-only so far * because this might be a new mount point, and UBI allows only one * read-write user at a time. */ - ubi = open_ubi(name, UBI_READONLY); + ubi = open_ubi(fc, UBI_READONLY); if (IS_ERR(ubi)) { - if (!(flags & SB_SILENT)) + err = PTR_ERR(ubi); + if (!(fc->sb_flags & SB_SILENT)) pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d", - current->pid, name, (int)PTR_ERR(ubi)); - return ERR_CAST(ubi); + current->pid, fc->source, err); + return err; } c = alloc_ubifs_info(ubi); @@ -2248,10 +2262,11 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, err = -ENOMEM; goto out_close; } + fc->s_fs_info = c; dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - sb = sget(fs_type, sb_test, sb_set, flags, c); + sb = sget_fc(fc, sb_test, set_anon_super_fc); if (IS_ERR(sb)) { err = PTR_ERR(sb); kfree(c); @@ -2263,12 +2278,12 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, kfree(c); /* A new mount point for already mounted UBIFS */ dbg_gen("this ubi volume is already mounted"); - if (!!(flags & SB_RDONLY) != c1->ro_mount) { + if (!!(fc->sb_flags & SB_RDONLY) != c1->ro_mount) { err = -EBUSY; goto out_deact; } } else { - err = ubifs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0); + err = ubifs_fill_super(sb, fc); if (err) goto out_deact; /* We do not support atime */ @@ -2282,13 +2297,14 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, /* 'fill_super()' opens ubi again so we must close it here */ ubi_close_volume(ubi); - return dget(sb->s_root); + fc->root = dget(sb->s_root); + return 0; out_deact: deactivate_locked_super(sb); out_close: ubi_close_volume(ubi); - return ERR_PTR(err); + return err; } static void kill_ubifs_super(struct super_block *s) @@ -2298,10 +2314,61 @@ static void kill_ubifs_super(struct super_block *s) kfree(c); } +static void ubifs_free_fc(struct fs_context *fc) +{ + struct ubifs_fs_context *ctx = fc->fs_private; + + if (ctx) { + kfree(ctx->auth_key_name); + kfree(ctx->auth_hash_name); + kfree(ctx); + } +} + +static const struct fs_context_operations ubifs_context_ops = { + .free = ubifs_free_fc, + .parse_param = ubifs_parse_param, + .get_tree = ubifs_get_tree, + .reconfigure = ubifs_reconfigure, +}; + +static int ubifs_init_fs_context(struct fs_context *fc) +{ + struct ubifs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct ubifs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + if (fc->purpose != FS_CONTEXT_FOR_RECONFIGURE) { + /* Iniitialize for first mount */ + ctx->no_chk_data_crc = 1; + ctx->assert_action = ASSACT_RO; + } else { + struct ubifs_info *c = fc->root->d_sb->s_fs_info; + + /* + * Preserve existing options across remounts. + * auth_key_name and auth_hash_name are not remountable. + */ + ctx->mount_opts = c->mount_opts; + ctx->bulk_read = c->bulk_read; + ctx->no_chk_data_crc = c->no_chk_data_crc; + ctx->default_compr = c->default_compr; + ctx->assert_action = c->assert_action; + } + + fc->ops = &ubifs_context_ops; + fc->fs_private = ctx; + + return 0; +} + static struct file_system_type ubifs_fs_type = { .name = "ubifs", .owner = THIS_MODULE, - .mount = ubifs_mount, + .init_fs_context = ubifs_init_fs_context, + .parameters = ubifs_fs_param_spec, .kill_sb = kill_ubifs_super, }; MODULE_ALIAS_FS("ubifs"); @@ -2317,7 +2384,7 @@ static void inode_slab_ctor(void *obj) static int __init ubifs_init(void) { - int err; + int err = -ENOMEM; BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); @@ -2378,34 +2445,45 @@ static int __init ubifs_init(void) ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", sizeof(struct ubifs_inode), 0, - SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT | - SLAB_ACCOUNT, &inode_slab_ctor); + SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, + &inode_slab_ctor); if (!ubifs_inode_slab) return -ENOMEM; - err = register_shrinker(&ubifs_shrinker_info); - if (err) + ubifs_shrinker_info = shrinker_alloc(0, "ubifs-slab"); + if (!ubifs_shrinker_info) goto out_slab; + ubifs_shrinker_info->count_objects = ubifs_shrink_count; + ubifs_shrinker_info->scan_objects = ubifs_shrink_scan; + + shrinker_register(ubifs_shrinker_info); + err = ubifs_compressors_init(); if (err) goto out_shrinker; dbg_debugfs_init(); + err = ubifs_sysfs_init(); + if (err) + goto out_dbg; + err = register_filesystem(&ubifs_fs_type); if (err) { pr_err("UBIFS error (pid %d): cannot register file system, error %d", current->pid, err); - goto out_dbg; + goto out_sysfs; } return 0; +out_sysfs: + ubifs_sysfs_exit(); out_dbg: dbg_debugfs_exit(); ubifs_compressors_exit(); out_shrinker: - unregister_shrinker(&ubifs_shrinker_info); + shrinker_free(ubifs_shrinker_info); out_slab: kmem_cache_destroy(ubifs_inode_slab); return err; @@ -2419,8 +2497,9 @@ static void __exit ubifs_exit(void) WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) != 0); dbg_debugfs_exit(); + ubifs_sysfs_exit(); ubifs_compressors_exit(); - unregister_shrinker(&ubifs_shrinker_info); + shrinker_free(ubifs_shrinker_info); /* * Make sure all delayed rcu free inodes are flushed before we diff --git a/fs/ubifs/sysfs.c b/fs/ubifs/sysfs.c new file mode 100644 index 000000000000..aae32222f11b --- /dev/null +++ b/fs/ubifs/sysfs.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file is part of UBIFS. + * + * Copyright (C) 2021 Cisco Systems + * + * Author: Stefan Schaeckeler + */ + + +#include <linux/fs.h> +#include "ubifs.h" + +enum attr_id_t { + attr_errors_magic, + attr_errors_node, + attr_errors_crc, +}; + +struct ubifs_attr { + struct attribute attr; + enum attr_id_t attr_id; +}; + +#define UBIFS_ATTR(_name, _mode, _id) \ +static struct ubifs_attr ubifs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ +} + +#define UBIFS_ATTR_FUNC(_name, _mode) UBIFS_ATTR(_name, _mode, _name) + +UBIFS_ATTR_FUNC(errors_magic, 0444); +UBIFS_ATTR_FUNC(errors_crc, 0444); +UBIFS_ATTR_FUNC(errors_node, 0444); + +#define ATTR_LIST(name) (&ubifs_attr_##name.attr) + +static struct attribute *ubifs_attrs[] = { + ATTR_LIST(errors_magic), + ATTR_LIST(errors_node), + ATTR_LIST(errors_crc), + NULL, +}; +ATTRIBUTE_GROUPS(ubifs); + +static ssize_t ubifs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ubifs_info *sbi = container_of(kobj, struct ubifs_info, + kobj); + + struct ubifs_attr *a = container_of(attr, struct ubifs_attr, attr); + + switch (a->attr_id) { + case attr_errors_magic: + return sysfs_emit(buf, "%u\n", sbi->stats->magic_errors); + case attr_errors_node: + return sysfs_emit(buf, "%u\n", sbi->stats->node_errors); + case attr_errors_crc: + return sysfs_emit(buf, "%u\n", sbi->stats->crc_errors); + } + return 0; +}; + +static void ubifs_sb_release(struct kobject *kobj) +{ + struct ubifs_info *c = container_of(kobj, struct ubifs_info, kobj); + + complete(&c->kobj_unregister); +} + +static const struct sysfs_ops ubifs_attr_ops = { + .show = ubifs_attr_show, +}; + +static const struct kobj_type ubifs_sb_ktype = { + .default_groups = ubifs_groups, + .sysfs_ops = &ubifs_attr_ops, + .release = ubifs_sb_release, +}; + +static const struct kobj_type ubifs_ktype = { + .sysfs_ops = &ubifs_attr_ops, +}; + +static struct kset ubifs_kset = { + .kobj = {.ktype = &ubifs_ktype}, +}; + +int ubifs_sysfs_register(struct ubifs_info *c) +{ + int ret, n; + char dfs_dir_name[UBIFS_DFS_DIR_LEN]; + + c->stats = kzalloc(sizeof(struct ubifs_stats_info), GFP_KERNEL); + if (!c->stats) { + ret = -ENOMEM; + goto out_last; + } + n = snprintf(dfs_dir_name, UBIFS_DFS_DIR_LEN, UBIFS_DFS_DIR_NAME, + c->vi.ubi_num, c->vi.vol_id); + + if (n >= UBIFS_DFS_DIR_LEN) { + /* The array size is too small */ + ret = -EINVAL; + goto out_free; + } + + c->kobj.kset = &ubifs_kset; + init_completion(&c->kobj_unregister); + + ret = kobject_init_and_add(&c->kobj, &ubifs_sb_ktype, NULL, + "%s", dfs_dir_name); + if (ret) + goto out_put; + + return 0; + +out_put: + kobject_put(&c->kobj); + wait_for_completion(&c->kobj_unregister); +out_free: + kfree(c->stats); +out_last: + ubifs_err(c, "cannot create sysfs entry for ubifs%d_%d, error %d\n", + c->vi.ubi_num, c->vi.vol_id, ret); + return ret; +} + +void ubifs_sysfs_unregister(struct ubifs_info *c) +{ + kobject_del(&c->kobj); + kobject_put(&c->kobj); + wait_for_completion(&c->kobj_unregister); + + kfree(c->stats); +} + +int __init ubifs_sysfs_init(void) +{ + int ret; + + kobject_set_name(&ubifs_kset.kobj, "ubifs"); + ubifs_kset.kobj.parent = fs_kobj; + ret = kset_register(&ubifs_kset); + if (ret) + kset_put(&ubifs_kset); + + return ret; +} + +void ubifs_sysfs_exit(void) +{ + kset_unregister(&ubifs_kset); +} diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e8e7b0e9532e..33946b518148 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -44,6 +44,34 @@ enum { NOT_ON_MEDIA = 3, }; +static void do_insert_old_idx(struct ubifs_info *c, + struct ubifs_old_idx *old_idx) +{ + struct ubifs_old_idx *o; + struct rb_node **p, *parent = NULL; + + p = &c->old_idx.rb_node; + while (*p) { + parent = *p; + o = rb_entry(parent, struct ubifs_old_idx, rb); + if (old_idx->lnum < o->lnum) + p = &(*p)->rb_left; + else if (old_idx->lnum > o->lnum) + p = &(*p)->rb_right; + else if (old_idx->offs < o->offs) + p = &(*p)->rb_left; + else if (old_idx->offs > o->offs) + p = &(*p)->rb_right; + else { + ubifs_err(c, "old idx added twice!"); + kfree(old_idx); + return; + } + } + rb_link_node(&old_idx->rb, parent, p); + rb_insert_color(&old_idx->rb, &c->old_idx); +} + /** * insert_old_idx - record an index node obsoleted since the last commit start. * @c: UBIFS file-system description object @@ -69,35 +97,15 @@ enum { */ static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) { - struct ubifs_old_idx *old_idx, *o; - struct rb_node **p, *parent = NULL; + struct ubifs_old_idx *old_idx; old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); if (unlikely(!old_idx)) return -ENOMEM; old_idx->lnum = lnum; old_idx->offs = offs; + do_insert_old_idx(c, old_idx); - p = &c->old_idx.rb_node; - while (*p) { - parent = *p; - o = rb_entry(parent, struct ubifs_old_idx, rb); - if (lnum < o->lnum) - p = &(*p)->rb_left; - else if (lnum > o->lnum) - p = &(*p)->rb_right; - else if (offs < o->offs) - p = &(*p)->rb_left; - else if (offs > o->offs) - p = &(*p)->rb_right; - else { - ubifs_err(c, "old idx added twice!"); - kfree(old_idx); - return 0; - } - } - rb_link_node(&old_idx->rb, parent, p); - rb_insert_color(&old_idx->rb, &c->old_idx); return 0; } @@ -199,23 +207,6 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); - ubifs_assert(c, !ubifs_zn_obsolete(znode)); - __set_bit(OBSOLETE_ZNODE, &znode->flags); - - if (znode->level != 0) { - int i; - const int n = zn->child_cnt; - - /* The children now have new parent */ - for (i = 0; i < n; i++) { - struct ubifs_zbranch *zbr = &zn->zbranch[i]; - - if (zbr->znode) - zbr->znode->parent = zn; - } - } - - atomic_long_inc(&c->dirty_zn_cnt); return zn; } @@ -234,6 +225,42 @@ static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt) } /** + * replace_znode - replace old znode with new znode. + * @c: UBIFS file-system description object + * @new_zn: new znode + * @old_zn: old znode + * @zbr: the branch of parent znode + * + * Replace old znode with new znode in TNC. + */ +static void replace_znode(struct ubifs_info *c, struct ubifs_znode *new_zn, + struct ubifs_znode *old_zn, struct ubifs_zbranch *zbr) +{ + ubifs_assert(c, !ubifs_zn_obsolete(old_zn)); + __set_bit(OBSOLETE_ZNODE, &old_zn->flags); + + if (old_zn->level != 0) { + int i; + const int n = new_zn->child_cnt; + + /* The children now have new parent */ + for (i = 0; i < n; i++) { + struct ubifs_zbranch *child = &new_zn->zbranch[i]; + + if (child->znode) + child->znode->parent = new_zn; + } + } + + zbr->znode = new_zn; + zbr->lnum = 0; + zbr->offs = 0; + zbr->len = 0; + + atomic_long_inc(&c->dirty_zn_cnt); +} + +/** * dirty_cow_znode - ensure a znode is not being committed. * @c: UBIFS file-system description object * @zbr: branch of znode to check @@ -265,21 +292,32 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, return zn; if (zbr->len) { - err = insert_old_idx(c, zbr->lnum, zbr->offs); - if (unlikely(err)) - return ERR_PTR(err); + struct ubifs_old_idx *old_idx; + + old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); + if (unlikely(!old_idx)) { + err = -ENOMEM; + goto out; + } + old_idx->lnum = zbr->lnum; + old_idx->offs = zbr->offs; + err = add_idx_dirt(c, zbr->lnum, zbr->len); - } else - err = 0; + if (err) { + kfree(old_idx); + goto out; + } - zbr->znode = zn; - zbr->lnum = 0; - zbr->offs = 0; - zbr->len = 0; + do_insert_old_idx(c, old_idx); + } + + replace_znode(c, zn, znode, zbr); - if (unlikely(err)) - return ERR_PTR(err); return zn; + +out: + kfree(zn); + return ERR_PTR(err); } /** @@ -316,7 +354,7 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, dent); if (err) { dump_stack(); - ubifs_dump_node(c, dent); + ubifs_dump_node(c, dent, zbr->len); return err; } @@ -349,7 +387,7 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, node); if (err) { dump_stack(); - ubifs_dump_node(c, node); + ubifs_dump_node(c, node, zbr->len); return err; } @@ -360,7 +398,6 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, /** * lnc_free - remove a leaf node from the leaf node cache. * @zbr: zbranch of leaf node - * @node: leaf node */ static void lnc_free(struct ubifs_zbranch *zbr) { @@ -378,7 +415,7 @@ static void lnc_free(struct ubifs_zbranch *zbr) * * This function reads a "hashed" node defined by @zbr from the leaf node cache * (in it is there) or from the hash media, in which case the node is also - * added to LNC. Returns zero in case of success or a negative negative error + * added to LNC. Returns zero in case of success or a negative error * code in case of failure. */ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, @@ -892,7 +929,7 @@ static int fallible_resolve_collision(struct ubifs_info *c, int adding) { struct ubifs_znode *o_znode = NULL, *znode = *zn; - int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n; + int o_n, err, cmp, unsure = 0, nn = *n; cmp = fallible_matches_name(c, &znode->zbranch[nn], nm); if (unlikely(cmp < 0)) @@ -1514,8 +1551,8 @@ out: */ int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) { - int n, err = 0, lnum = -1, uninitialized_var(offs); - int uninitialized_var(len); + int n, err = 0, lnum = -1, offs; + int len; unsigned int block = key_block(c, &bu->key); struct ubifs_znode *znode; @@ -1700,7 +1737,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf, goto out_err; } - err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); + err = ubifs_check_node(c, buf, zbr->len, zbr->lnum, zbr->offs, 0, 0); if (err) { ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE); goto out; @@ -1734,7 +1771,7 @@ out_err: err = -EINVAL; out: ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs); - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, zbr->len); dump_stack(); return err; } @@ -2885,6 +2922,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) err = PTR_ERR(xent); if (err == -ENOENT) break; + kfree(pxent); return err; } @@ -2892,12 +2930,11 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) dbg_tnc("xent '%s', ino %lu", xent->name, (unsigned long)xattr_inum); - ubifs_evict_xattr_inode(c, xattr_inum); - fname_name(&nm) = xent->name; fname_len(&nm) = le16_to_cpu(xent->nlen); err = ubifs_tnc_remove_nm(c, &key1, &nm); if (err) { + kfree(pxent); kfree(xent); return err; } @@ -2906,6 +2943,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) highest_ino_key(c, &key2, xattr_inum); err = ubifs_tnc_remove_range(c, &key1, &key2); if (err) { + kfree(pxent); kfree(xent); return err; } @@ -3051,6 +3089,21 @@ static void tnc_destroy_cnext(struct ubifs_info *c) cnext = cnext->cnext; if (ubifs_zn_obsolete(znode)) kfree(znode); + else if (!ubifs_zn_cow(znode)) { + /* + * Don't forget to update clean znode count after + * committing failed, because ubifs will check this + * count while closing tnc. Non-obsolete znode could + * be re-dirtied during committing process, so dirty + * flag is untrustable. The flag 'COW_ZNODE' is set + * for each dirty znode before committing, and it is + * cleared as long as the znode become clean, so we + * can statistic clean znode count according to this + * flag. + */ + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } } while (cnext && cnext != c->cnext); } @@ -3061,14 +3114,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) void ubifs_tnc_close(struct ubifs_info *c) { tnc_destroy_cnext(c); - if (c->zroot.znode) { - long n, freed; - - n = atomic_long_read(&c->clean_zn_cnt); - freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode); - ubifs_assert(c, freed == n); - atomic_long_sub(n, &ubifs_clean_zn_cnt); - } + ubifs_destroy_tnc_tree(c); kfree(c->gap_lebs); kfree(c->ilebs); destroy_old_idx(c); @@ -3466,7 +3512,7 @@ out_unlock: /** * dbg_check_inode_size - check if inode size is correct. * @c: UBIFS file-system description object - * @inum: inode number + * @inode: inode to check * @size: inode size * * This function makes sure that the inode size (@size) is correct and it does diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 234be1c4dc87..7c43e0ccf6d4 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -657,6 +657,8 @@ static int get_znodes_to_commit(struct ubifs_info *c) znode->alt = 0; cnext = find_next_dirty(znode); if (!cnext) { + ubifs_assert(c, !znode->parent); + znode->cparent = NULL; znode->cnext = c->cnext; break; } @@ -700,7 +702,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt) c->ilebs[c->ileb_cnt++] = lnum; dbg_cmt("LEB %d", lnum); } - if (dbg_is_chk_index(c) && !(prandom_u32() & 7)) + if (dbg_is_chk_index(c) && !get_random_u32_below(8)) return -ENOSPC; return 0; } @@ -930,7 +932,7 @@ static int write_index(struct ubifs_info *c) * flag cleared before %COW_ZNODE. Specifically, it matters in * the 'dirty_cow_znode()' function. This is the reason for the * first barrier. Also, we want the bit changes to be seen to - * other threads ASAP, to avoid unnecesarry copying, which is + * other threads ASAP, to avoid unnecessary copying, which is * the reason for the second barrier. */ clear_bit(DIRTY_ZNODE, &znode->flags); diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index 49cb34c3f324..10b222dc6a53 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -126,8 +126,8 @@ int ubifs_search_zbranch(const struct ubifs_info *c, const struct ubifs_znode *znode, const union ubifs_key *key, int *n) { - int beg = 0, end = znode->child_cnt, uninitialized_var(mid); - int uninitialized_var(cmp); + int beg = 0, end = znode->child_cnt, mid; + int cmp; const struct ubifs_zbranch *zbr = &znode->zbranch[0]; ubifs_assert(c, end > beg); @@ -251,6 +251,28 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c, } /** + * ubifs_destroy_tnc_tree - destroy all znodes connected to the TNC tree. + * @c: UBIFS file-system description object + * + * This function destroys the whole TNC tree and updates clean global znode + * count. + */ +void ubifs_destroy_tnc_tree(struct ubifs_info *c) +{ + long n, freed; + + if (!c->zroot.znode) + return; + + n = atomic_long_read(&c->clean_zn_cnt); + freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode); + ubifs_assert(c, freed == n); + atomic_long_sub(n, &ubifs_clean_zn_cnt); + + c->zroot.znode = NULL; +} + +/** * read_znode - read an indexing node from flash and fill znode. * @c: UBIFS file-system description object * @zzbr: the zbranch describing the node to read @@ -299,7 +321,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, c->fanout, znode->child_cnt); ubifs_err(c, "max levels %d, znode level %d", UBIFS_MAX_LEVELS, znode->level); - err = 1; goto out_dump; } @@ -320,7 +341,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, zbr->lnum >= c->leb_cnt || zbr->offs < 0 || zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { ubifs_err(c, "bad branch %d", i); - err = 2; goto out_dump; } @@ -333,7 +353,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, default: ubifs_err(c, "bad key type at slot %d: %d", i, key_type(c, &zbr->key)); - err = 3; goto out_dump; } @@ -346,7 +365,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, ubifs_err(c, "bad target node (type %d) length (%d)", type, zbr->len); ubifs_err(c, "have to be %d", c->ranges[type].len); - err = 4; goto out_dump; } } else if (zbr->len < c->ranges[type].min_len || @@ -356,7 +374,6 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, ubifs_err(c, "have to be in range of %d-%d", c->ranges[type].min_len, c->ranges[type].max_len); - err = 5; goto out_dump; } } @@ -374,13 +391,11 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, cmp = keys_cmp(c, key1, key2); if (cmp > 0) { ubifs_err(c, "bad key order (keys %d and %d)", i, i + 1); - err = 6; goto out_dump; } else if (cmp == 0 && !is_hash_key(c, key1)) { /* These can only be keys with colliding hash */ ubifs_err(c, "keys %d and %d are not hashed but equivalent", i, i + 1); - err = 7; goto out_dump; } } @@ -389,8 +404,8 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, return 0; out_dump: - ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err); - ubifs_dump_node(c, idx); + ubifs_err(c, "bad indexing node at LEB %d:%d", lnum, offs); + ubifs_dump_node(c, idx, c->max_idx_node_sz); kfree(idx); return -EINVAL; } @@ -455,8 +470,7 @@ out: * @node: node is returned here * * This function reads a node defined by @zbr from the flash media. Returns - * zero in case of success or a negative negative error code in case of - * failure. + * zero in case of success or a negative error code in case of failure. */ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, void *node) @@ -489,7 +503,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, zbr->lnum, zbr->offs); dbg_tnck(key, "looked for key "); dbg_tnck(&key1, "but found node's key "); - ubifs_dump_node(c, node); + ubifs_dump_node(c, node, zbr->len); return -EINVAL; } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index bff682309fbe..118392aa9f2a 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -27,9 +27,11 @@ #include <linux/security.h> #include <linux/xattr.h> #include <linux/random.h> +#include <linux/sysfs.h> +#include <linux/completion.h> #include <crypto/hash_info.h> #include <crypto/hash.h> -#include <crypto/algapi.h> +#include <crypto/utils.h> #include <linux/fscrypt.h> @@ -122,15 +124,8 @@ #define OLD_ZNODE_AGE 20 #define YOUNG_ZNODE_AGE 5 -/* - * Some compressors, like LZO, may end up with more data then the input buffer. - * So UBIFS always allocates larger output buffer, to be sure the compressor - * will not corrupt memory in case of worst case compression. - */ -#define WORST_COMPR_FACTOR 2 - #ifdef CONFIG_FS_ENCRYPTION -#define UBIFS_CIPHER_BLOCK_SIZE FS_CRYPTO_BLOCK_SIZE +#define UBIFS_CIPHER_BLOCK_SIZE FSCRYPT_CONTENTS_ALIGNMENT #else #define UBIFS_CIPHER_BLOCK_SIZE 0 #endif @@ -139,7 +134,7 @@ * How much memory is needed for a buffer where we compress a data node. */ #define COMPRESSED_DATA_NODE_BUF_SZ \ - (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) + (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE) /* Maximum expected tree height for use by bottom_up_buf */ #define BOTTOM_UP_HEIGHT 64 @@ -268,6 +263,8 @@ enum { ASSACT_PANIC, }; +struct folio; + /** * struct ubifs_old_idx - index node obsoleted since last commit start. * @rb: rb-tree node @@ -356,6 +353,7 @@ struct ubifs_gced_idx_leb { * @ui_mutex: serializes inode write-back with the rest of VFS operations, * serializes "clean <-> dirty" state changes, serializes bulk-read, * protects @dirty, @bulk_read, @ui_size, and @xattr_size + * @xattr_sem: serilizes write operations (remove|set|create) on xattr * @ui_lock: protects @synced_i_size * @synced_i_size: synchronized size of inode, i.e. the value of inode size * currently stored on the flash; used only for regular file @@ -367,11 +365,12 @@ struct ubifs_gced_idx_leb { * @read_in_a_row: number of consecutive pages read in a row (for bulk read) * @data_len: length of the data attached to the inode * @data: inode's data + * @i_crypt_info: inode's fscrypt information * * @ui_mutex exists for two main reasons. At first it prevents inodes from * being written back while UBIFS changing them, being in the middle of an VFS * operation. This way UBIFS makes sure the inode fields are consistent. For - * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and + * example, in 'ubifs_rename()' we change 4 inodes simultaneously, and * write-back must not write any of them before we have finished. * * The second reason is budgeting - UBIFS has to budget all operations. If an @@ -409,6 +408,7 @@ struct ubifs_inode { unsigned int bulk_read:1; unsigned int compr_type:2; struct mutex ui_mutex; + struct rw_semaphore xattr_sem; spinlock_t ui_lock; loff_t synced_i_size; loff_t ui_size; @@ -417,6 +417,9 @@ struct ubifs_inode { pgoff_t read_in_a_row; int data_len; void *data; +#ifdef CONFIG_FS_ENCRYPTION + struct fscrypt_inode_info *i_crypt_info; +#endif }; /** @@ -831,16 +834,12 @@ struct ubifs_node_range { * struct ubifs_compressor - UBIFS compressor description structure. * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc) * @cc: cryptoapi compressor handle - * @comp_mutex: mutex used during compression - * @decomp_mutex: mutex used during decompression * @name: compressor name * @capi_name: cryptoapi compressor name */ struct ubifs_compressor { int compr_type; - struct crypto_comp *cc; - struct mutex *comp_mutex; - struct mutex *decomp_mutex; + struct crypto_acomp *cc; const char *name; const char *capi_name; }; @@ -912,8 +911,6 @@ struct ubifs_budget_req { * @rb: rb-tree node of rb-tree of orphans sorted by inode number * @list: list head of list of orphans in order added * @new_list: list head of list of orphans added since the last commit - * @child_list: list of xattr childs if this orphan hosts xattrs, list head - * if this orphan is a xattr, not used otherwise. * @cnext: next orphan to commit * @dnext: next orphan to delete * @inum: inode number @@ -925,7 +922,6 @@ struct ubifs_orphan { struct rb_node rb; struct list_head list; struct list_head new_list; - struct list_head child_list; struct ubifs_orphan *cnext; struct ubifs_orphan *dnext; ino_t inum; @@ -988,6 +984,18 @@ struct ubifs_budg_info { int dent_budget; }; +/** + * ubifs_stats_info - per-FS statistics information. + * @magic_errors: number of bad magic numbers (will be reset with a new mount). + * @node_errors: number of bad nodes (will be reset with a new mount). + * @crc_errors: number of bad crcs (will be reset with a new mount). + */ +struct ubifs_stats_info { + unsigned int magic_errors; + unsigned int node_errors; + unsigned int crc_errors; +}; + struct ubifs_debug_info; /** @@ -1024,6 +1032,8 @@ struct ubifs_debug_info; * @bg_bud_bytes: number of bud bytes when background commit is initiated * @old_buds: buds to be released after commit ends * @max_bud_cnt: maximum number of buds + * @need_wait_space: Non %0 means space reservation tasks need to wait in queue + * @reserve_space_wq: wait queue to sleep on if @need_wait_space is not %0 * * @commit_sem: synchronizes committer with other processes * @cmt_state: commit state @@ -1249,6 +1259,10 @@ struct ubifs_debug_info; * @mount_opts: UBIFS-specific mount options * * @dbg: debugging-related information + * @stats: statistics exported over sysfs + * + * @kobj: kobject for /sys/fs/ubifs/ + * @kobj_unregister: completion to unregister sysfs kobject */ struct ubifs_info { struct super_block *vfs_sb; @@ -1278,12 +1292,17 @@ struct ubifs_info { long long bg_bud_bytes; struct list_head old_buds; int max_bud_cnt; + atomic_t need_wait_space; + wait_queue_head_t reserve_space_wq; struct rw_semaphore commit_sem; int cmt_state; spinlock_t cs_lock; wait_queue_head_t cmt_wq; + struct kobject kobj; + struct completion kobj_unregister; + unsigned int big_lpt:1; unsigned int space_fixup:1; unsigned int double_hash:1; @@ -1491,6 +1510,7 @@ struct ubifs_info { struct ubifs_mount_opts mount_opts; struct ubifs_debug_info *dbg; + struct ubifs_stats_info *stats; }; extern struct list_head ubifs_infos; @@ -1504,6 +1524,7 @@ extern const struct file_operations ubifs_dir_operations; extern const struct inode_operations ubifs_dir_inode_operations; extern const struct inode_operations ubifs_symlink_inode_operations; extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; +extern int ubifs_default_version; /* auth.c */ static inline int ubifs_authenticated(const struct ubifs_info *c) @@ -1591,8 +1612,13 @@ static inline int ubifs_check_hmac(const struct ubifs_info *c, return crypto_memneq(expected, got, c->hmac_desc_len); } +#ifdef CONFIG_UBIFS_FS_AUTHENTICATION void ubifs_bad_hash(const struct ubifs_info *c, const void *node, const u8 *hash, int lnum, int offs); +#else +static inline void ubifs_bad_hash(const struct ubifs_info *c, const void *node, + const u8 *hash, int lnum, int offs) {}; +#endif int __ubifs_node_check_hash(const struct ubifs_info *c, const void *buf, const u8 *expected); @@ -1718,10 +1744,10 @@ int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, int offs); int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum, int offs, int hmac_offs); -int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int must_chk_crc); +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len, + int lnum, int offs, int quiet, int must_chk_crc); void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad); -void ubifs_crc_node(struct ubifs_info *c, void *buf, int len); +void ubifs_crc_node(void *buf, int len); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len, int hmac_offs, int pad); @@ -1762,9 +1788,10 @@ int ubifs_consolidate_log(struct ubifs_info *c); /* journal.c */ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, const struct fscrypt_name *nm, const struct inode *inode, - int deletion, int xent); + int deletion, int xent, int in_orphan); int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, - const union ubifs_key *key, const void *buf, int len); + const union ubifs_key *key, struct folio *folio, + size_t offset, int len); int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode); int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir, @@ -1779,7 +1806,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *new_dir, const struct inode *new_inode, const struct fscrypt_name *new_nm, - const struct inode *whiteout, int sync); + const struct inode *whiteout, int sync, int delete_orphan); int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, loff_t old_size, loff_t new_size); int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, @@ -1866,6 +1893,7 @@ struct ubifs_znode *ubifs_tnc_postorder_next(const struct ubifs_info *c, struct ubifs_znode *znode); long ubifs_destroy_tnc_subtree(const struct ubifs_info *c, struct ubifs_znode *zr); +void ubifs_destroy_tnc_tree(struct ubifs_info *c); struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr, struct ubifs_znode *parent, int iip); @@ -1988,30 +2016,30 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); -int ubifs_setattr(struct dentry *dentry, struct iattr *attr); -int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags); +int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr); +int ubifs_update_time(struct inode *inode, int flags); /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, - umode_t mode); -int ubifs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags); + umode_t mode, bool is_xattr); +int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags); int ubifs_check_dir_empty(struct inode *dir); /* xattr.c */ -extern const struct xattr_handler *ubifs_xattr_handlers[]; -ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); int ubifs_xattr_set(struct inode *host, const char *name, const void *value, size_t size, int flags, bool check_lock); ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, size_t size); #ifdef CONFIG_UBIFS_FS_XATTR -void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum); +extern const struct xattr_handler * const ubifs_xattr_handlers[]; +ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); int ubifs_purge_xattrs(struct inode *host); #else -static inline void ubifs_evict_xattr_inode(struct ubifs_info *c, - ino_t xattr_inum) { } +#define ubifs_listxattr NULL +#define ubifs_xattr_handlers NULL static inline int ubifs_purge_xattrs(struct inode *host) { return 0; @@ -2049,6 +2077,9 @@ int ubifs_recover_size(struct ubifs_info *c, bool in_place); void ubifs_destroy_size_tree(struct ubifs_info *c); /* ioctl.c */ +int ubifs_fileattr_get(struct dentry *dentry, struct file_kattr *fa); +int ubifs_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa); long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); void ubifs_set_inode_flags(struct inode *inode); #ifdef CONFIG_COMPAT @@ -2060,8 +2091,20 @@ int __init ubifs_compressors_init(void); void ubifs_compressors_exit(void); void ubifs_compress(const struct ubifs_info *c, const void *in_buf, int in_len, void *out_buf, int *out_len, int *compr_type); +void ubifs_compress_folio(const struct ubifs_info *c, struct folio *folio, + size_t offset, int in_len, void *out_buf, + int *out_len, int *compr_type); int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len, void *out, int *out_len, int compr_type); +int ubifs_decompress_folio(const struct ubifs_info *c, const void *buf, + int len, struct folio *folio, size_t offset, + int *out_len, int compr_type); + +/* sysfs.c */ +int ubifs_sysfs_init(void); +void ubifs_sysfs_exit(void); +int ubifs_sysfs_register(struct ubifs_info *c); +void ubifs_sysfs_unregister(struct ubifs_info *c); #include "debug.h" #include "misc.h" diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 9aefbb60074f..c21a0c2b3e90 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -48,19 +48,6 @@ #include <linux/slab.h> #include <linux/xattr.h> -/* - * Extended attribute type constants. - * - * USER_XATTR: user extended attribute ("user.*") - * TRUSTED_XATTR: trusted extended attribute ("trusted.*) - * SECURITY_XATTR: security extended attribute ("security.*") - */ -enum { - USER_XATTR, - TRUSTED_XATTR, - SECURITY_XATTR, -}; - static const struct inode_operations empty_iops; static const struct file_operations empty_fops; @@ -110,7 +97,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, if (err) return err; - inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO); + inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_budg; @@ -134,7 +121,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data_len = size; mutex_lock(&host_ui->ui_mutex); - host->i_ctime = current_time(host); + inode_set_ctime_current(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -149,7 +136,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, if (strcmp(fname_name(nm), UBIFS_XATTR_NAME_ENCRYPTION_CONTEXT) == 0) host_ui->flags |= UBIFS_CRYPT_FL; - err = ubifs_jnl_update(c, host, nm, inode, 0, 1); + err = ubifs_jnl_update(c, host, nm, inode, 0, 1, 0); if (err) goto out_cancel; ubifs_set_inode_flags(host); @@ -208,16 +195,14 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, err = -ENOMEM; goto out_free; } - mutex_lock(&ui->ui_mutex); kfree(ui->data); ui->data = buf; inode->i_size = ui->ui_size = size; old_size = ui->data_len; ui->data_len = size; - mutex_unlock(&ui->ui_mutex); mutex_lock(&host_ui->ui_mutex); - host->i_ctime = current_time(host); + inode_set_ctime_current(host); host_ui->xattr_size -= CALC_XATTR_BYTES(old_size); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -285,6 +270,7 @@ int ubifs_xattr_set(struct inode *host, const char *name, const void *value, if (!xent) return -ENOMEM; + down_write(&ubifs_inode(host)->xattr_sem); /* * The extended attribute entries are stored in LNC, so multiple * look-ups do not involve reading the flash. @@ -319,6 +305,7 @@ int ubifs_xattr_set(struct inode *host, const char *name, const void *value, iput(inode); out_free: + up_write(&ubifs_inode(host)->xattr_sem); kfree(xent); return err; } @@ -341,25 +328,25 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, if (!xent) return -ENOMEM; + down_read(&ubifs_inode(host)->xattr_sem); xent_key_init(c, &key, host->i_ino, &nm); err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); if (err) { if (err == -ENOENT) err = -ENODATA; - goto out_unlock; + goto out_cleanup; } inode = iget_xattr(c, le64_to_cpu(xent->inum)); if (IS_ERR(inode)) { err = PTR_ERR(inode); - goto out_unlock; + goto out_cleanup; } ui = ubifs_inode(inode); ubifs_assert(c, inode->i_size == ui->data_len); ubifs_assert(c, ubifs_inode(host)->xattr_size > ui->data_len); - mutex_lock(&ui->ui_mutex); if (buf) { /* If @buf is %NULL we are supposed to return the length */ if (ui->data_len > size) { @@ -372,9 +359,9 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, err = ui->data_len; out_iput: - mutex_unlock(&ui->ui_mutex); iput(inode); -out_unlock: +out_cleanup: + up_read(&ubifs_inode(host)->xattr_sem); kfree(xent); return err; } @@ -406,16 +393,21 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino, dentry, size); + down_read(&host_ui->xattr_sem); len = host_ui->xattr_names + host_ui->xattr_cnt; - if (!buffer) + if (!buffer) { /* * We should return the minimum buffer size which will fit a * null-terminated list of all the extended attribute names. */ - return len; + err = len; + goto out_err; + } - if (len > size) - return -ERANGE; + if (len > size) { + err = -ERANGE; + goto out_err; + } lowest_xent_key(c, &key, host->i_ino); while (1) { @@ -437,8 +429,9 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) pxent = xent; key_read(c, &xent->key, &key); } - kfree(pxent); + up_read(&host_ui->xattr_sem); + if (err != -ENOENT) { ubifs_err(c, "cannot find next direntry, error %d", err); return err; @@ -446,6 +439,10 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) ubifs_assert(c, written <= size); return written; + +out_err: + up_read(&host_ui->xattr_sem); + return err; } static int remove_xattr(struct ubifs_info *c, struct inode *host, @@ -464,7 +461,7 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, return err; mutex_lock(&host_ui->ui_mutex); - host->i_ctime = current_time(host); + inode_set_ctime_current(host); host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); @@ -498,12 +495,13 @@ int ubifs_purge_xattrs(struct inode *host) struct fscrypt_name nm = {0}; int err; - if (ubifs_inode(host)->xattr_cnt < ubifs_xattr_max_cnt(c)) + if (ubifs_inode(host)->xattr_cnt <= ubifs_xattr_max_cnt(c)) return 0; ubifs_warn(c, "inode %lu has too many xattrs, doing a non-atomic deletion", host->i_ino); + down_write(&ubifs_inode(host)->xattr_sem); lowest_xent_key(c, &key, host->i_ino); while (1) { xent = ubifs_tnc_next_ent(c, &key, &nm); @@ -521,57 +519,38 @@ int ubifs_purge_xattrs(struct inode *host) ubifs_err(c, "dead directory entry '%s', error %d", xent->name, err); ubifs_ro_mode(c, err); - kfree(pxent); - return err; + goto out_err; } ubifs_assert(c, ubifs_inode(xino)->xattr); clear_nlink(xino); err = remove_xattr(c, host, xino, &nm); + iput(xino); if (err) { - kfree(pxent); - iput(xino); ubifs_err(c, "cannot remove xattr, error %d", err); - return err; + goto out_err; } - iput(xino); - kfree(pxent); pxent = xent; key_read(c, &xent->key, &key); } - kfree(pxent); + up_write(&ubifs_inode(host)->xattr_sem); + if (err != -ENOENT) { ubifs_err(c, "cannot find next direntry, error %d", err); return err; } return 0; -} - -/** - * ubifs_evict_xattr_inode - Evict an xattr inode. - * @c: UBIFS file-system description object - * @xattr_inum: xattr inode number - * - * When an inode that hosts xattrs is being removed we have to make sure - * that cached inodes of the xattrs also get removed from the inode cache - * otherwise we'd waste memory. This function looks up an inode from the - * inode cache and clears the link counter such that iput() will evict - * the inode. - */ -void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum) -{ - struct inode *inode; - inode = ilookup(c->vfs_sb, xattr_inum); - if (inode) { - clear_nlink(inode); - iput(inode); - } +out_err: + kfree(pxent); + kfree(xent); + up_write(&ubifs_inode(host)->xattr_sem); + return err; } static int ubifs_xattr_remove(struct inode *host, const char *name) @@ -592,6 +571,7 @@ static int ubifs_xattr_remove(struct inode *host, const char *name) if (!xent) return -ENOMEM; + down_write(&ubifs_inode(host)->xattr_sem); xent_key_init(c, &key, host->i_ino, &nm); err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); if (err) { @@ -616,6 +596,7 @@ static int ubifs_xattr_remove(struct inode *host, const char *name) iput(inode); out_free: + up_write(&ubifs_inode(host)->xattr_sem); kfree(xent); return err; } @@ -657,7 +638,7 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode, int err; err = security_inode_init_security(inode, dentry, qstr, - &init_xattrs, 0); + &init_xattrs, NULL); if (err) { struct ubifs_info *c = dentry->i_sb->s_fs_info; ubifs_err(c, "cannot initialize security for inode %lu, error %d", @@ -679,6 +660,7 @@ static int xattr_get(const struct xattr_handler *handler, } static int xattr_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -714,7 +696,7 @@ static const struct xattr_handler ubifs_security_xattr_handler = { }; #endif -const struct xattr_handler *ubifs_xattr_handlers[] = { +const struct xattr_handler * const ubifs_xattr_handlers[] = { &ubifs_user_xattr_handler, &ubifs_trusted_xattr_handler, #ifdef CONFIG_UBIFS_FS_SECURITY |
