diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/osc')
-rw-r--r-- | drivers/staging/lustre/lustre/osc/osc_cache.c | 155 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/osc/osc_cl_internal.h | 11 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/osc/osc_internal.h | 19 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/osc/osc_io.c | 79 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/osc/osc_object.c | 19 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/osc/osc_page.c | 98 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/osc/osc_request.c | 86 |
7 files changed, 370 insertions, 97 deletions
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c index b0f030c6c9c9..0490478393df 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cache.c +++ b/drivers/staging/lustre/lustre/osc/osc_cache.c @@ -142,10 +142,7 @@ static const char *oes_strings[] = { static inline struct osc_extent *rb_extent(struct rb_node *n) { - if (!n) - return NULL; - - return container_of(n, struct osc_extent, oe_node); + return rb_entry_safe(n, struct osc_extent, oe_node); } static inline struct osc_extent *next_extent(struct osc_extent *ext) @@ -247,7 +244,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext, goto out; } - if (ext->oe_dlmlock) { + if (ext->oe_dlmlock && !ldlm_is_failed(ext->oe_dlmlock)) { struct ldlm_extent *extent; extent = &ext->oe_dlmlock->l_policy_data.l_extent; @@ -1004,6 +1001,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, env = cl_env_get(&refcheck); io = &osc_env_info(env)->oti_io; io->ci_obj = cl_object_top(osc2cl(obj)); + io->ci_ignore_layout = 1; rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); if (rc < 0) goto out; @@ -1884,16 +1882,32 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli, oap, osc, rc); } +struct extent_rpc_data { + struct list_head *erd_rpc_list; + unsigned int erd_page_count; + unsigned int erd_max_pages; + unsigned int erd_max_chunks; +}; + +static inline unsigned osc_extent_chunks(const struct osc_extent *ext) +{ + struct client_obd *cli = osc_cli(ext->oe_obj); + unsigned ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; + + return (ext->oe_end >> ppc_bits) - (ext->oe_start >> ppc_bits) + 1; +} + /** * Try to add extent to one RPC. We need to think about the following things: * - # of pages must not be over max_pages_per_rpc * - extent must be compatible with previous ones */ static int try_to_add_extent_for_io(struct client_obd *cli, - struct osc_extent *ext, struct list_head *rpclist, - unsigned int *pc, unsigned int *max_pages) + struct osc_extent *ext, + struct extent_rpc_data *data) { struct osc_extent *tmp; + unsigned int chunk_count; struct osc_async_page *oap = list_first_entry(&ext->oe_pages, struct osc_async_page, oap_pending_item); @@ -1901,19 +1915,22 @@ static int try_to_add_extent_for_io(struct client_obd *cli, EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE), ext); - *max_pages = max(ext->oe_mppr, *max_pages); - if (*pc + ext->oe_nr_pages > *max_pages) + chunk_count = osc_extent_chunks(ext); + if (chunk_count > data->erd_max_chunks) + return 0; + + data->erd_max_pages = max(ext->oe_mppr, data->erd_max_pages); + if (data->erd_page_count + ext->oe_nr_pages > data->erd_max_pages) return 0; - list_for_each_entry(tmp, rpclist, oe_link) { + list_for_each_entry(tmp, data->erd_rpc_list, oe_link) { struct osc_async_page *oap2; oap2 = list_first_entry(&tmp->oe_pages, struct osc_async_page, oap_pending_item); EASSERT(tmp->oe_owner == current, tmp); if (oap2cl_page(oap)->cp_type != oap2cl_page(oap2)->cp_type) { - CDEBUG(D_CACHE, "Do not permit different type of IO" - " for a same RPC\n"); + CDEBUG(D_CACHE, "Do not permit different type of IO in one RPC\n"); return 0; } @@ -1926,12 +1943,41 @@ static int try_to_add_extent_for_io(struct client_obd *cli, break; } - *pc += ext->oe_nr_pages; - list_move_tail(&ext->oe_link, rpclist); + data->erd_max_chunks -= chunk_count; + data->erd_page_count += ext->oe_nr_pages; + list_move_tail(&ext->oe_link, data->erd_rpc_list); ext->oe_owner = current; return 1; } +static inline unsigned osc_max_write_chunks(const struct client_obd *cli) +{ + /* + * LU-8135: + * + * The maximum size of a single transaction is about 64MB in ZFS. + * #define DMU_MAX_ACCESS (64 * 1024 * 1024) + * + * Since ZFS is a copy-on-write file system, a single dirty page in + * a chunk will result in the rewrite of the whole chunk, therefore + * an RPC shouldn't be allowed to contain too many chunks otherwise + * it will make transaction size much bigger than 64MB, especially + * with big block size for ZFS. + * + * This piece of code is to make sure that OSC won't send write RPCs + * with too many chunks. The maximum chunk size that an RPC can cover + * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally + * OST should tell the client what the biggest transaction size is, + * but it's good enough for now. + * + * This limitation doesn't apply to ldiskfs, which allows as many + * chunks in one RPC as we want. However, it won't have any benefits + * to have too many discontiguous pages in one RPC. Therefore, it + * can only have 256 chunks at most in one RPC. + */ + return min(PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits, 256); +} + /** * In order to prevent multiple ptlrpcd from breaking contiguous extents, * get_write_extent() takes all appropriate extents in atomic. @@ -1951,26 +1997,28 @@ static unsigned int get_write_extents(struct osc_object *obj, struct client_obd *cli = osc_cli(obj); struct osc_extent *ext; struct osc_extent *temp; - unsigned int page_count = 0; - unsigned int max_pages = cli->cl_max_pages_per_rpc; + struct extent_rpc_data data = { + .erd_rpc_list = rpclist, + .erd_page_count = 0, + .erd_max_pages = cli->cl_max_pages_per_rpc, + .erd_max_chunks = osc_max_write_chunks(cli), + }; LASSERT(osc_object_is_locked(obj)); list_for_each_entry_safe(ext, temp, &obj->oo_hp_exts, oe_link) { LASSERT(ext->oe_state == OES_CACHE); - if (!try_to_add_extent_for_io(cli, ext, rpclist, &page_count, - &max_pages)) - return page_count; - EASSERT(ext->oe_nr_pages <= max_pages, ext); + if (!try_to_add_extent_for_io(cli, ext, &data)) + return data.erd_page_count; + EASSERT(ext->oe_nr_pages <= data.erd_max_pages, ext); } - if (page_count == max_pages) - return page_count; + if (data.erd_page_count == data.erd_max_pages) + return data.erd_page_count; while (!list_empty(&obj->oo_urgent_exts)) { ext = list_entry(obj->oo_urgent_exts.next, struct osc_extent, oe_link); - if (!try_to_add_extent_for_io(cli, ext, rpclist, &page_count, - &max_pages)) - return page_count; + if (!try_to_add_extent_for_io(cli, ext, &data)) + return data.erd_page_count; if (!ext->oe_intree) continue; @@ -1981,13 +2029,12 @@ static unsigned int get_write_extents(struct osc_object *obj, ext->oe_owner)) continue; - if (!try_to_add_extent_for_io(cli, ext, rpclist, - &page_count, &max_pages)) - return page_count; + if (!try_to_add_extent_for_io(cli, ext, &data)) + return data.erd_page_count; } } - if (page_count == max_pages) - return page_count; + if (data.erd_page_count == data.erd_max_pages) + return data.erd_page_count; ext = first_extent(obj); while (ext) { @@ -1998,13 +2045,12 @@ static unsigned int get_write_extents(struct osc_object *obj, continue; } - if (!try_to_add_extent_for_io(cli, ext, rpclist, &page_count, - &max_pages)) - return page_count; + if (!try_to_add_extent_for_io(cli, ext, &data)) + return data.erd_page_count; ext = next_extent(ext); } - return page_count; + return data.erd_page_count; } static int @@ -2089,27 +2135,29 @@ osc_send_read_rpc(const struct lu_env *env, struct client_obd *cli, struct osc_extent *ext; struct osc_extent *next; LIST_HEAD(rpclist); - unsigned int page_count = 0; - unsigned int max_pages = cli->cl_max_pages_per_rpc; + struct extent_rpc_data data = { + .erd_rpc_list = &rpclist, + .erd_page_count = 0, + .erd_max_pages = cli->cl_max_pages_per_rpc, + .erd_max_chunks = UINT_MAX, + }; int rc = 0; LASSERT(osc_object_is_locked(osc)); list_for_each_entry_safe(ext, next, &osc->oo_reading_exts, oe_link) { EASSERT(ext->oe_state == OES_LOCK_DONE, ext); - if (!try_to_add_extent_for_io(cli, ext, &rpclist, &page_count, - &max_pages)) + if (!try_to_add_extent_for_io(cli, ext, &data)) break; osc_extent_state_set(ext, OES_RPC); - EASSERT(ext->oe_nr_pages <= max_pages, ext); + EASSERT(ext->oe_nr_pages <= data.erd_max_pages, ext); } - LASSERT(page_count <= max_pages); + LASSERT(data.erd_page_count <= data.erd_max_pages); - osc_update_pending(osc, OBD_BRW_READ, -page_count); + osc_update_pending(osc, OBD_BRW_READ, -data.erd_page_count); if (!list_empty(&rpclist)) { osc_object_unlock(osc); - LASSERT(page_count > 0); rc = osc_build_rpc(env, cli, &rpclist, OBD_BRW_READ); LASSERT(list_empty(&rpclist)); @@ -2710,8 +2758,8 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, /** * Called by osc_io_setattr_start() to freeze and destroy covering extents. */ -int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio, - struct osc_object *obj, __u64 size) +int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj, + u64 size, struct osc_extent **extp) { struct client_obd *cli = osc_cli(obj); struct osc_extent *ext; @@ -2808,9 +2856,11 @@ again: /* we need to hold this extent in OES_TRUNC state so * that no writeback will happen. This is to avoid * BUG 17397. + * Only partial truncate can reach here, if @size is + * not zero, the caller should provide a valid @extp. */ - LASSERT(!oio->oi_trunc); - oio->oi_trunc = osc_extent_get(ext); + LASSERT(!*extp); + *extp = osc_extent_get(ext); OSC_EXTENT_DUMP(D_CACHE, ext, "trunc at %llu\n", size); } @@ -2836,13 +2886,10 @@ again: /** * Called after osc_io_setattr_end to add oio->oi_trunc back to cache. */ -void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio, - struct osc_object *obj) +void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext) { - struct osc_extent *ext = oio->oi_trunc; - - oio->oi_trunc = NULL; if (ext) { + struct osc_object *obj = ext->oe_obj; bool unplug = false; EASSERT(ext->oe_nr_pages > 0, ext); @@ -3183,8 +3230,10 @@ static int discard_cb(const struct lu_env *env, struct cl_io *io, /* page is top page. */ info->oti_next_index = osc_index(ops) + 1; if (cl_page_own(env, io, page) == 0) { - KLASSERT(ergo(page->cp_type == CPT_CACHEABLE, - !PageDirty(cl_page_vmpage(page)))); + if (page->cp_type == CPT_CACHEABLE && + PageDirty(cl_page_vmpage(page))) + CL_PAGE_DEBUG(D_ERROR, env, page, + "discard dirty page?\n"); /* discard the page */ cl_page_discard(env, io, page); diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h index cce55a9689f0..c09ab97d64ae 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h @@ -159,6 +159,10 @@ struct osc_object { /* Protect osc_lock this osc_object has */ spinlock_t oo_ol_spin; struct list_head oo_ol_list; + + /** number of active IOs of this object */ + atomic_t oo_nr_ios; + wait_queue_head_t oo_io_waitq; }; static inline void osc_object_lock(struct osc_object *obj) @@ -399,10 +403,9 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, struct osc_page *ops); int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, struct list_head *list, int cmd, int brw_flags); -int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio, - struct osc_object *obj, __u64 size); -void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio, - struct osc_object *obj); +int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj, + u64 size, struct osc_extent **extp); +void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext); int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj, pgoff_t start, pgoff_t end, int hp, int discard); int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj, diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h index 688783dcc1e4..8abd83f26716 100644 --- a/drivers/staging/lustre/lustre/osc/osc_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_internal.h @@ -114,9 +114,9 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, struct ptlrpc_request_set *rqset, int async, int agl); int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u32 type, union ldlm_policy_data *policy, __u32 mode, - __u64 *flags, void *data, struct lustre_handle *lockh, - int unref); + enum ldlm_type type, union ldlm_policy_data *policy, + enum ldlm_mode mode, __u64 *flags, void *data, + struct lustre_handle *lockh, int unref); int osc_setattr_async(struct obd_export *exp, struct obdo *oa, obd_enqueue_update_f upcall, void *cookie, @@ -181,6 +181,8 @@ static inline struct osc_device *obd2osc_dev(const struct obd_device *d) return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev); } +extern struct lu_kmem_descr osc_caches[]; + extern struct kmem_cache *osc_quota_kmem; struct osc_quota_info { /** linkage for quota hash table */ @@ -218,4 +220,15 @@ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, struct osc_object *obj, pgoff_t index, enum osc_dap_flags flags); +int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc); + +/** osc shrink list to link all osc client obd */ +extern struct list_head osc_shrink_list; +/** spin lock to protect osc_shrink_list */ +extern spinlock_t osc_shrink_lock; +unsigned long osc_cache_shrink_count(struct shrinker *sk, + struct shrink_control *sc); +unsigned long osc_cache_shrink_scan(struct shrinker *sk, + struct shrink_control *sc); + #endif /* OSC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c index 228a97c098fe..0b4cc4283b05 100644 --- a/drivers/staging/lustre/lustre/osc/osc_io.c +++ b/drivers/staging/lustre/lustre/osc/osc_io.c @@ -37,6 +37,8 @@ #define DEBUG_SUBSYSTEM S_OSC +#include "../include/lustre_obdo.h" + #include "osc_cl_internal.h" /** \addtogroup osc @@ -97,6 +99,7 @@ static int osc_io_read_ahead(const struct lu_env *env, ldlm_lock_decref(&lockh, dlmlock->l_req_mode); } + ra->cra_rpc_size = osc_cli(osc)->cl_max_pages_per_rpc; ra->cra_end = cl_index(osc2cl(osc), dlmlock->l_policy_data.l_extent.end); ra->cra_release = osc_read_ahead_release; @@ -136,7 +139,7 @@ static int osc_io_submit(const struct lu_env *env, LASSERT(qin->pl_nr > 0); - CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt); + CDEBUG(D_CACHE | D_READA, "%d %d\n", qin->pl_nr, crt); osc = cl2osc(ios->cis_obj); cli = osc_cli(osc); @@ -207,6 +210,18 @@ static int osc_io_submit(const struct lu_env *env, if (queued > 0) result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags); + /* Update c/mtime for sync write. LU-7310 */ + if (qout->pl_nr > 0 && !result) { + struct cl_attr *attr = &osc_env_info(env)->oti_attr; + struct cl_object *obj = ios->cis_obj; + + cl_object_attr_lock(obj); + attr->cat_mtime = LTIME_S(CURRENT_TIME); + attr->cat_ctime = attr->cat_mtime; + cl_object_attr_update(env, obj, attr, CAT_MTIME | CAT_CTIME); + cl_object_attr_unlock(obj); + } + CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result); return qout->pl_nr > 0 ? 0 : result; } @@ -330,8 +345,25 @@ static int osc_io_commit_async(const struct lu_env *env, return result; } -static int osc_io_rw_iter_init(const struct lu_env *env, - const struct cl_io_slice *ios) +static int osc_io_iter_init(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct osc_object *osc = cl2osc(ios->cis_obj); + struct obd_import *imp = osc_cli(osc)->cl_import; + int rc = -EIO; + + spin_lock(&imp->imp_lock); + if (likely(!imp->imp_invalid)) { + atomic_inc(&osc->oo_nr_ios); + rc = 0; + } + spin_unlock(&imp->imp_lock); + + return rc; +} + +static int osc_io_write_iter_init(const struct lu_env *env, + const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; struct osc_io *oio = osc_env_io(env); @@ -342,7 +374,7 @@ static int osc_io_rw_iter_init(const struct lu_env *env, unsigned long max_pages; if (cl_io_is_append(io)) - return 0; + return osc_io_iter_init(env, ios); npages = io->u.ci_rw.crw_count >> PAGE_SHIFT; if (io->u.ci_rw.crw_pos & ~PAGE_MASK) @@ -374,11 +406,21 @@ static int osc_io_rw_iter_init(const struct lu_env *env, (void)ptlrpcd_queue_work(cli->cl_lru_work); } - return 0; + return osc_io_iter_init(env, ios); } -static void osc_io_rw_iter_fini(const struct lu_env *env, - const struct cl_io_slice *ios) +static void osc_io_iter_fini(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct osc_object *osc = cl2osc(ios->cis_obj); + + LASSERT(atomic_read(&osc->oo_nr_ios) > 0); + if (atomic_dec_and_test(&osc->oo_nr_ios)) + wake_up_all(&osc->oo_io_waitq); +} + +static void osc_io_write_iter_fini(const struct lu_env *env, + const struct cl_io_slice *ios) { struct osc_io *oio = osc_env_io(env); struct osc_object *osc = cl2osc(ios->cis_obj); @@ -389,6 +431,8 @@ static void osc_io_rw_iter_fini(const struct lu_env *env, oio->oi_lru_reserved = 0; } oio->oi_write_osclock = NULL; + + osc_io_iter_fini(env, ios); } static int osc_io_fault_start(const struct lu_env *env, @@ -479,7 +523,8 @@ static int osc_io_setattr_start(const struct lu_env *env, /* truncate cache dirty pages first */ if (cl_io_is_trunc(io)) - result = osc_cache_truncate_start(env, oio, cl2osc(obj), size); + result = osc_cache_truncate_start(env, cl2osc(obj), size, + &oio->oi_trunc); if (result == 0 && oio->oi_lockless == 0) { cl_object_attr_lock(obj); @@ -589,10 +634,8 @@ static void osc_io_setattr_end(const struct lu_env *env, __u64 size = io->u.ci_setattr.sa_attr.lvb_size; osc_trunc_check(env, io, oio, size); - if (oio->oi_trunc) { - osc_cache_truncate_end(env, oio, cl2osc(obj)); - oio->oi_trunc = NULL; - } + osc_cache_truncate_end(env, oio->oi_trunc); + oio->oi_trunc = NULL; } } @@ -669,7 +712,7 @@ static int osc_io_data_version_start(const struct lu_env *env, ptlrpc_request_set_replen(req); req->rq_interpret_reply = osc_data_version_interpret; - CLASSERT(sizeof(*dva) <= sizeof(req->rq_async_args)); + BUILD_BUG_ON(sizeof(*dva) > sizeof(req->rq_async_args)); dva = ptlrpc_req_async_args(req); dva->dva_oio = oio; @@ -832,17 +875,21 @@ static void osc_io_end(const struct lu_env *env, static const struct cl_io_operations osc_io_ops = { .op = { [CIT_READ] = { + .cio_iter_init = osc_io_iter_init, + .cio_iter_fini = osc_io_iter_fini, .cio_start = osc_io_read_start, .cio_fini = osc_io_fini }, [CIT_WRITE] = { - .cio_iter_init = osc_io_rw_iter_init, - .cio_iter_fini = osc_io_rw_iter_fini, + .cio_iter_init = osc_io_write_iter_init, + .cio_iter_fini = osc_io_write_iter_fini, .cio_start = osc_io_write_start, .cio_end = osc_io_end, .cio_fini = osc_io_fini }, [CIT_SETATTR] = { + .cio_iter_init = osc_io_iter_init, + .cio_iter_fini = osc_io_iter_fini, .cio_start = osc_io_setattr_start, .cio_end = osc_io_setattr_end }, @@ -851,6 +898,8 @@ static const struct cl_io_operations osc_io_ops = { .cio_end = osc_io_data_version_end, }, [CIT_FAULT] = { + .cio_iter_init = osc_io_iter_init, + .cio_iter_fini = osc_io_iter_fini, .cio_start = osc_io_fault_start, .cio_end = osc_io_end, .cio_fini = osc_io_fini diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c index e0c3324857dd..d3e5ca7db7b2 100644 --- a/drivers/staging/lustre/lustre/osc/osc_object.c +++ b/drivers/staging/lustre/lustre/osc/osc_object.c @@ -78,6 +78,9 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj, INIT_LIST_HEAD(&osc->oo_write_item); INIT_LIST_HEAD(&osc->oo_read_item); + atomic_set(&osc->oo_nr_ios, 0); + init_waitqueue_head(&osc->oo_io_waitq); + osc->oo_root.rb_node = NULL; INIT_LIST_HEAD(&osc->oo_hp_exts); INIT_LIST_HEAD(&osc->oo_urgent_exts); @@ -112,6 +115,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj) LASSERT(atomic_read(&osc->oo_nr_reads) == 0); LASSERT(atomic_read(&osc->oo_nr_writes) == 0); LASSERT(list_empty(&osc->oo_ol_list)); + LASSERT(!atomic_read(&osc->oo_nr_ios)); lu_object_fini(obj); kmem_cache_free(osc_object_kmem, osc); @@ -444,4 +448,19 @@ struct lu_object *osc_object_alloc(const struct lu_env *env, return obj; } +int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc) +{ + struct l_wait_info lwi = { 0 }; + + CDEBUG(D_INODE, "Invalidate osc object: %p, # of active IOs: %d\n", + osc, atomic_read(&osc->oo_nr_ios)); + + l_wait_event(osc->oo_io_waitq, !atomic_read(&osc->oo_nr_ios), &lwi); + + /* Discard all pages of this object. */ + osc_cache_truncate_start(env, osc, 0, NULL); + + return 0; +} + /** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c index e356e4af08e1..ab9d0d7bb943 100644 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ b/drivers/staging/lustre/lustre/osc/osc_page.c @@ -370,12 +370,17 @@ static int osc_cache_too_much(struct client_obd *cli) return lru_shrink_min(cli); } else { time64_t duration = ktime_get_real_seconds(); + long timediff; /* knock out pages by duration of no IO activity */ duration -= cli->cl_lru_last_used; - duration >>= 6; /* approximately 1 minute */ - if (duration > 0 && - pages >= div64_s64((s64)budget, duration)) + /* + * The difference shouldn't be more than 70 years + * so we can safely case to a long. Round to + * approximately 1 minute. + */ + timediff = (long)(duration >> 6); + if (timediff > 0 && pages >= budget / timediff) return lru_shrink_min(cli); } return 0; @@ -943,4 +948,91 @@ bool osc_over_unstable_soft_limit(struct client_obd *cli) cli->cl_max_rpcs_in_flight; } +/** + * Return how many LRU pages in the cache of all OSC devices + * + * Return: return # of cached LRU pages times reclaimation tendency + * SHRINK_STOP if it cannot do any scanning in this time + */ +unsigned long osc_cache_shrink_count(struct shrinker *sk, + struct shrink_control *sc) +{ + struct client_obd *cli; + unsigned long cached = 0; + + spin_lock(&osc_shrink_lock); + list_for_each_entry(cli, &osc_shrink_list, cl_shrink_list) + cached += atomic_long_read(&cli->cl_lru_in_list); + spin_unlock(&osc_shrink_lock); + + return (cached * sysctl_vfs_cache_pressure) / 100; +} + +/** + * Scan and try to reclaim sc->nr_to_scan cached LRU pages + * + * Return: number of cached LRU pages reclaimed + * SHRINK_STOP if it cannot do any scanning in this time + * + * Linux kernel will loop calling this shrinker scan routine with + * sc->nr_to_scan = SHRINK_BATCH(128 for now) until kernel got enough memory. + * + * If sc->nr_to_scan is 0, the VM is querying the cache size, we don't need + * to scan and try to reclaim LRU pages, just return 0 and + * osc_cache_shrink_count() will report the LRU page number. + */ +unsigned long osc_cache_shrink_scan(struct shrinker *sk, + struct shrink_control *sc) +{ + struct client_obd *stop_anchor = NULL; + struct client_obd *cli; + struct lu_env *env; + long shrank = 0; + int refcheck; + int rc; + + if (!sc->nr_to_scan) + return 0; + + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return SHRINK_STOP; + + spin_lock(&osc_shrink_lock); + while (!list_empty(&osc_shrink_list)) { + cli = list_entry(osc_shrink_list.next, struct client_obd, + cl_shrink_list); + + if (!stop_anchor) + stop_anchor = cli; + else if (cli == stop_anchor) + break; + + list_move_tail(&cli->cl_shrink_list, &osc_shrink_list); + spin_unlock(&osc_shrink_lock); + + /* shrink no more than max_pages_per_rpc for an OSC */ + rc = osc_lru_shrink(env, cli, (sc->nr_to_scan - shrank) > + cli->cl_max_pages_per_rpc ? + cli->cl_max_pages_per_rpc : + sc->nr_to_scan - shrank, true); + if (rc > 0) + shrank += rc; + + if (shrank >= sc->nr_to_scan) + goto out; + + spin_lock(&osc_shrink_lock); + } + spin_unlock(&osc_shrink_lock); + +out: + cl_env_put(env, &refcheck); + + return shrank; +} + /** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c index 7143564ae7e7..c4cfe18c3294 100644 --- a/drivers/staging/lustre/lustre/osc/osc_request.c +++ b/drivers/staging/lustre/lustre/osc/osc_request.c @@ -43,6 +43,7 @@ #include "../include/lprocfs_status.h" #include "../include/lustre/lustre_ioctl.h" #include "../include/lustre_debug.h" +#include "../include/lustre_obdo.h" #include "../include/lustre_param.h" #include "../include/lustre_fid.h" #include "../include/obd_class.h" @@ -250,7 +251,7 @@ int osc_setattr_async(struct obd_export *exp, struct obdo *oa, req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret; - CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args)); + BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args)); sa = ptlrpc_req_async_args(req); sa->sa_oa = oa; sa->sa_upcall = upcall; @@ -348,7 +349,7 @@ int osc_punch_base(struct obd_export *exp, struct obdo *oa, ptlrpc_request_set_replen(req); req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret; - CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args)); + BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args)); sa = ptlrpc_req_async_args(req); sa->sa_oa = oa; sa->sa_upcall = upcall; @@ -429,7 +430,7 @@ int osc_sync_base(struct osc_object *obj, struct obdo *oa, ptlrpc_request_set_replen(req); req->rq_interpret_reply = osc_sync_interpret; - CLASSERT(sizeof(*fa) <= sizeof(req->rq_async_args)); + BUILD_BUG_ON(sizeof(*fa) > sizeof(req->rq_async_args)); fa = ptlrpc_req_async_args(req); fa->fa_obj = obj; fa->fa_oa = oa; @@ -1170,7 +1171,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli, } ptlrpc_request_set_replen(req); - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); aa->aa_oa = oa; aa->aa_requested_nob = requested_nob; @@ -1757,7 +1758,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, cl_req_attr_set(env, osc2cl(obj), crattr); lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid); - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); INIT_LIST_HEAD(&aa->aa_oaps); list_splice_init(&rpc_list, &aa->aa_oaps); @@ -2038,7 +2039,7 @@ no_match: if (!rc) { struct osc_enqueue_args *aa; - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); aa->oa_exp = exp; aa->oa_mode = einfo->ei_mode; @@ -2080,9 +2081,9 @@ no_match: } int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u32 type, union ldlm_policy_data *policy, __u32 mode, - __u64 *flags, void *data, struct lustre_handle *lockh, - int unref) + enum ldlm_type type, union ldlm_policy_data *policy, + enum ldlm_mode mode, __u64 *flags, void *data, + struct lustre_handle *lockh, int unref) { struct obd_device *obd = exp->exp_obd; __u64 lflags = *flags; @@ -2195,7 +2196,7 @@ static int osc_statfs_async(struct obd_export *exp, } req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret; - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); aa->aa_oi = oinfo; @@ -2400,7 +2401,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, struct osc_brw_async_args *aa; struct obdo *oa; - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); if (!oa) { @@ -2479,6 +2480,33 @@ static int osc_disconnect(struct obd_export *exp) return rc; } +static int osc_ldlm_resource_invalidate(struct cfs_hash *hs, + struct cfs_hash_bd *bd, + struct hlist_node *hnode, void *arg) +{ + struct ldlm_resource *res = cfs_hash_object(hs, hnode); + struct osc_object *osc = NULL; + struct lu_env *env = arg; + struct ldlm_lock *lock; + + lock_res(res); + list_for_each_entry(lock, &res->lr_granted, l_res_link) { + if (lock->l_ast_data && !osc) { + osc = lock->l_ast_data; + cl_object_get(osc2cl(osc)); + } + lock->l_ast_data = NULL; + } + unlock_res(res); + + if (osc) { + osc_object_invalidate(env, osc); + cl_object_put(env, osc2cl(osc)); + } + + return 0; +} + static int osc_import_event(struct obd_device *obd, struct obd_import *imp, enum obd_import_event event) @@ -2506,17 +2534,18 @@ static int osc_import_event(struct obd_device *obd, struct lu_env *env; int refcheck; + ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); + env = cl_env_get(&refcheck); if (!IS_ERR(env)) { - /* Reset grants */ - cli = &obd->u.cli; - /* all pages go to failing rpcs due to the invalid - * import - */ - osc_io_unplug(env, cli, NULL); + osc_io_unplug(env, &obd->u.cli, NULL); - ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); + cfs_hash_for_each_nolock(ns->ns_rs_hash, + osc_ldlm_resource_invalidate, + env, 0); cl_env_put(env, &refcheck); + + ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); } else { rc = PTR_ERR(env); } @@ -2646,6 +2675,11 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) INIT_LIST_HEAD(&cli->cl_grant_shrink_list); ns_register_cancel(obd->obd_namespace, osc_cancel_weight); + + spin_lock(&osc_shrink_lock); + list_add_tail(&cli->cl_shrink_list, &osc_shrink_list); + spin_unlock(&osc_shrink_lock); + return rc; out_ptlrpcd_work: @@ -2699,6 +2733,10 @@ static int osc_cleanup(struct obd_device *obd) struct client_obd *cli = &obd->u.cli; int rc; + spin_lock(&osc_shrink_lock); + list_del(&cli->cl_shrink_list); + spin_unlock(&osc_shrink_lock); + /* lru cleanup */ if (cli->cl_cache) { LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0); @@ -2766,7 +2804,14 @@ static struct obd_ops osc_obd_ops = { .quotactl = osc_quotactl, }; -extern struct lu_kmem_descr osc_caches[]; +struct list_head osc_shrink_list = LIST_HEAD_INIT(osc_shrink_list); +DEFINE_SPINLOCK(osc_shrink_lock); + +static struct shrinker osc_cache_shrinker = { + .count_objects = osc_cache_shrink_count, + .scan_objects = osc_cache_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; static int __init osc_init(void) { @@ -2792,6 +2837,8 @@ static int __init osc_init(void) if (rc) goto out_kmem; + register_shrinker(&osc_cache_shrinker); + /* This is obviously too much memory, only prevent overflow here */ if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) { rc = -EINVAL; @@ -2830,6 +2877,7 @@ out_kmem: static void /*__exit*/ osc_exit(void) { + unregister_shrinker(&osc_cache_shrinker); class_unregister_type(LUSTRE_OSC_NAME); lu_kmem_fini(osc_caches); ptlrpc_free_rq_pool(osc_rq_pool); |