summaryrefslogtreecommitdiff
path: root/drivers/staging/lustre/lustre/osc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/osc')
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cache.c155
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cl_internal.h11
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_internal.h19
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_io.c79
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_object.c19
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_page.c98
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_request.c86
7 files changed, 370 insertions, 97 deletions
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index b0f030c6c9c9..0490478393df 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -142,10 +142,7 @@ static const char *oes_strings[] = {
static inline struct osc_extent *rb_extent(struct rb_node *n)
{
- if (!n)
- return NULL;
-
- return container_of(n, struct osc_extent, oe_node);
+ return rb_entry_safe(n, struct osc_extent, oe_node);
}
static inline struct osc_extent *next_extent(struct osc_extent *ext)
@@ -247,7 +244,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
goto out;
}
- if (ext->oe_dlmlock) {
+ if (ext->oe_dlmlock && !ldlm_is_failed(ext->oe_dlmlock)) {
struct ldlm_extent *extent;
extent = &ext->oe_dlmlock->l_policy_data.l_extent;
@@ -1004,6 +1001,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
env = cl_env_get(&refcheck);
io = &osc_env_info(env)->oti_io;
io->ci_obj = cl_object_top(osc2cl(obj));
+ io->ci_ignore_layout = 1;
rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
if (rc < 0)
goto out;
@@ -1884,16 +1882,32 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
oap, osc, rc);
}
+struct extent_rpc_data {
+ struct list_head *erd_rpc_list;
+ unsigned int erd_page_count;
+ unsigned int erd_max_pages;
+ unsigned int erd_max_chunks;
+};
+
+static inline unsigned osc_extent_chunks(const struct osc_extent *ext)
+{
+ struct client_obd *cli = osc_cli(ext->oe_obj);
+ unsigned ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
+
+ return (ext->oe_end >> ppc_bits) - (ext->oe_start >> ppc_bits) + 1;
+}
+
/**
* Try to add extent to one RPC. We need to think about the following things:
* - # of pages must not be over max_pages_per_rpc
* - extent must be compatible with previous ones
*/
static int try_to_add_extent_for_io(struct client_obd *cli,
- struct osc_extent *ext, struct list_head *rpclist,
- unsigned int *pc, unsigned int *max_pages)
+ struct osc_extent *ext,
+ struct extent_rpc_data *data)
{
struct osc_extent *tmp;
+ unsigned int chunk_count;
struct osc_async_page *oap = list_first_entry(&ext->oe_pages,
struct osc_async_page,
oap_pending_item);
@@ -1901,19 +1915,22 @@ static int try_to_add_extent_for_io(struct client_obd *cli,
EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE),
ext);
- *max_pages = max(ext->oe_mppr, *max_pages);
- if (*pc + ext->oe_nr_pages > *max_pages)
+ chunk_count = osc_extent_chunks(ext);
+ if (chunk_count > data->erd_max_chunks)
+ return 0;
+
+ data->erd_max_pages = max(ext->oe_mppr, data->erd_max_pages);
+ if (data->erd_page_count + ext->oe_nr_pages > data->erd_max_pages)
return 0;
- list_for_each_entry(tmp, rpclist, oe_link) {
+ list_for_each_entry(tmp, data->erd_rpc_list, oe_link) {
struct osc_async_page *oap2;
oap2 = list_first_entry(&tmp->oe_pages, struct osc_async_page,
oap_pending_item);
EASSERT(tmp->oe_owner == current, tmp);
if (oap2cl_page(oap)->cp_type != oap2cl_page(oap2)->cp_type) {
- CDEBUG(D_CACHE, "Do not permit different type of IO"
- " for a same RPC\n");
+ CDEBUG(D_CACHE, "Do not permit different type of IO in one RPC\n");
return 0;
}
@@ -1926,12 +1943,41 @@ static int try_to_add_extent_for_io(struct client_obd *cli,
break;
}
- *pc += ext->oe_nr_pages;
- list_move_tail(&ext->oe_link, rpclist);
+ data->erd_max_chunks -= chunk_count;
+ data->erd_page_count += ext->oe_nr_pages;
+ list_move_tail(&ext->oe_link, data->erd_rpc_list);
ext->oe_owner = current;
return 1;
}
+static inline unsigned osc_max_write_chunks(const struct client_obd *cli)
+{
+ /*
+ * LU-8135:
+ *
+ * The maximum size of a single transaction is about 64MB in ZFS.
+ * #define DMU_MAX_ACCESS (64 * 1024 * 1024)
+ *
+ * Since ZFS is a copy-on-write file system, a single dirty page in
+ * a chunk will result in the rewrite of the whole chunk, therefore
+ * an RPC shouldn't be allowed to contain too many chunks otherwise
+ * it will make transaction size much bigger than 64MB, especially
+ * with big block size for ZFS.
+ *
+ * This piece of code is to make sure that OSC won't send write RPCs
+ * with too many chunks. The maximum chunk size that an RPC can cover
+ * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally
+ * OST should tell the client what the biggest transaction size is,
+ * but it's good enough for now.
+ *
+ * This limitation doesn't apply to ldiskfs, which allows as many
+ * chunks in one RPC as we want. However, it won't have any benefits
+ * to have too many discontiguous pages in one RPC. Therefore, it
+ * can only have 256 chunks at most in one RPC.
+ */
+ return min(PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits, 256);
+}
+
/**
* In order to prevent multiple ptlrpcd from breaking contiguous extents,
* get_write_extent() takes all appropriate extents in atomic.
@@ -1951,26 +1997,28 @@ static unsigned int get_write_extents(struct osc_object *obj,
struct client_obd *cli = osc_cli(obj);
struct osc_extent *ext;
struct osc_extent *temp;
- unsigned int page_count = 0;
- unsigned int max_pages = cli->cl_max_pages_per_rpc;
+ struct extent_rpc_data data = {
+ .erd_rpc_list = rpclist,
+ .erd_page_count = 0,
+ .erd_max_pages = cli->cl_max_pages_per_rpc,
+ .erd_max_chunks = osc_max_write_chunks(cli),
+ };
LASSERT(osc_object_is_locked(obj));
list_for_each_entry_safe(ext, temp, &obj->oo_hp_exts, oe_link) {
LASSERT(ext->oe_state == OES_CACHE);
- if (!try_to_add_extent_for_io(cli, ext, rpclist, &page_count,
- &max_pages))
- return page_count;
- EASSERT(ext->oe_nr_pages <= max_pages, ext);
+ if (!try_to_add_extent_for_io(cli, ext, &data))
+ return data.erd_page_count;
+ EASSERT(ext->oe_nr_pages <= data.erd_max_pages, ext);
}
- if (page_count == max_pages)
- return page_count;
+ if (data.erd_page_count == data.erd_max_pages)
+ return data.erd_page_count;
while (!list_empty(&obj->oo_urgent_exts)) {
ext = list_entry(obj->oo_urgent_exts.next,
struct osc_extent, oe_link);
- if (!try_to_add_extent_for_io(cli, ext, rpclist, &page_count,
- &max_pages))
- return page_count;
+ if (!try_to_add_extent_for_io(cli, ext, &data))
+ return data.erd_page_count;
if (!ext->oe_intree)
continue;
@@ -1981,13 +2029,12 @@ static unsigned int get_write_extents(struct osc_object *obj,
ext->oe_owner))
continue;
- if (!try_to_add_extent_for_io(cli, ext, rpclist,
- &page_count, &max_pages))
- return page_count;
+ if (!try_to_add_extent_for_io(cli, ext, &data))
+ return data.erd_page_count;
}
}
- if (page_count == max_pages)
- return page_count;
+ if (data.erd_page_count == data.erd_max_pages)
+ return data.erd_page_count;
ext = first_extent(obj);
while (ext) {
@@ -1998,13 +2045,12 @@ static unsigned int get_write_extents(struct osc_object *obj,
continue;
}
- if (!try_to_add_extent_for_io(cli, ext, rpclist, &page_count,
- &max_pages))
- return page_count;
+ if (!try_to_add_extent_for_io(cli, ext, &data))
+ return data.erd_page_count;
ext = next_extent(ext);
}
- return page_count;
+ return data.erd_page_count;
}
static int
@@ -2089,27 +2135,29 @@ osc_send_read_rpc(const struct lu_env *env, struct client_obd *cli,
struct osc_extent *ext;
struct osc_extent *next;
LIST_HEAD(rpclist);
- unsigned int page_count = 0;
- unsigned int max_pages = cli->cl_max_pages_per_rpc;
+ struct extent_rpc_data data = {
+ .erd_rpc_list = &rpclist,
+ .erd_page_count = 0,
+ .erd_max_pages = cli->cl_max_pages_per_rpc,
+ .erd_max_chunks = UINT_MAX,
+ };
int rc = 0;
LASSERT(osc_object_is_locked(osc));
list_for_each_entry_safe(ext, next, &osc->oo_reading_exts, oe_link) {
EASSERT(ext->oe_state == OES_LOCK_DONE, ext);
- if (!try_to_add_extent_for_io(cli, ext, &rpclist, &page_count,
- &max_pages))
+ if (!try_to_add_extent_for_io(cli, ext, &data))
break;
osc_extent_state_set(ext, OES_RPC);
- EASSERT(ext->oe_nr_pages <= max_pages, ext);
+ EASSERT(ext->oe_nr_pages <= data.erd_max_pages, ext);
}
- LASSERT(page_count <= max_pages);
+ LASSERT(data.erd_page_count <= data.erd_max_pages);
- osc_update_pending(osc, OBD_BRW_READ, -page_count);
+ osc_update_pending(osc, OBD_BRW_READ, -data.erd_page_count);
if (!list_empty(&rpclist)) {
osc_object_unlock(osc);
- LASSERT(page_count > 0);
rc = osc_build_rpc(env, cli, &rpclist, OBD_BRW_READ);
LASSERT(list_empty(&rpclist));
@@ -2710,8 +2758,8 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
/**
* Called by osc_io_setattr_start() to freeze and destroy covering extents.
*/
-int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio,
- struct osc_object *obj, __u64 size)
+int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj,
+ u64 size, struct osc_extent **extp)
{
struct client_obd *cli = osc_cli(obj);
struct osc_extent *ext;
@@ -2808,9 +2856,11 @@ again:
/* we need to hold this extent in OES_TRUNC state so
* that no writeback will happen. This is to avoid
* BUG 17397.
+ * Only partial truncate can reach here, if @size is
+ * not zero, the caller should provide a valid @extp.
*/
- LASSERT(!oio->oi_trunc);
- oio->oi_trunc = osc_extent_get(ext);
+ LASSERT(!*extp);
+ *extp = osc_extent_get(ext);
OSC_EXTENT_DUMP(D_CACHE, ext,
"trunc at %llu\n", size);
}
@@ -2836,13 +2886,10 @@ again:
/**
* Called after osc_io_setattr_end to add oio->oi_trunc back to cache.
*/
-void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio,
- struct osc_object *obj)
+void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext)
{
- struct osc_extent *ext = oio->oi_trunc;
-
- oio->oi_trunc = NULL;
if (ext) {
+ struct osc_object *obj = ext->oe_obj;
bool unplug = false;
EASSERT(ext->oe_nr_pages > 0, ext);
@@ -3183,8 +3230,10 @@ static int discard_cb(const struct lu_env *env, struct cl_io *io,
/* page is top page. */
info->oti_next_index = osc_index(ops) + 1;
if (cl_page_own(env, io, page) == 0) {
- KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
- !PageDirty(cl_page_vmpage(page))));
+ if (page->cp_type == CPT_CACHEABLE &&
+ PageDirty(cl_page_vmpage(page)))
+ CL_PAGE_DEBUG(D_ERROR, env, page,
+ "discard dirty page?\n");
/* discard the page */
cl_page_discard(env, io, page);
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
index cce55a9689f0..c09ab97d64ae 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@@ -159,6 +159,10 @@ struct osc_object {
/* Protect osc_lock this osc_object has */
spinlock_t oo_ol_spin;
struct list_head oo_ol_list;
+
+ /** number of active IOs of this object */
+ atomic_t oo_nr_ios;
+ wait_queue_head_t oo_io_waitq;
};
static inline void osc_object_lock(struct osc_object *obj)
@@ -399,10 +403,9 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
struct osc_page *ops);
int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
struct list_head *list, int cmd, int brw_flags);
-int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio,
- struct osc_object *obj, __u64 size);
-void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio,
- struct osc_object *obj);
+int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj,
+ u64 size, struct osc_extent **extp);
+void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext);
int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
pgoff_t start, pgoff_t end, int hp, int discard);
int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
index 688783dcc1e4..8abd83f26716 100644
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -114,9 +114,9 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
struct ptlrpc_request_set *rqset, int async, int agl);
int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u32 type, union ldlm_policy_data *policy, __u32 mode,
- __u64 *flags, void *data, struct lustre_handle *lockh,
- int unref);
+ enum ldlm_type type, union ldlm_policy_data *policy,
+ enum ldlm_mode mode, __u64 *flags, void *data,
+ struct lustre_handle *lockh, int unref);
int osc_setattr_async(struct obd_export *exp, struct obdo *oa,
obd_enqueue_update_f upcall, void *cookie,
@@ -181,6 +181,8 @@ static inline struct osc_device *obd2osc_dev(const struct obd_device *d)
return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev);
}
+extern struct lu_kmem_descr osc_caches[];
+
extern struct kmem_cache *osc_quota_kmem;
struct osc_quota_info {
/** linkage for quota hash table */
@@ -218,4 +220,15 @@ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
struct osc_object *obj, pgoff_t index,
enum osc_dap_flags flags);
+int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc);
+
+/** osc shrink list to link all osc client obd */
+extern struct list_head osc_shrink_list;
+/** spin lock to protect osc_shrink_list */
+extern spinlock_t osc_shrink_lock;
+unsigned long osc_cache_shrink_count(struct shrinker *sk,
+ struct shrink_control *sc);
+unsigned long osc_cache_shrink_scan(struct shrinker *sk,
+ struct shrink_control *sc);
+
#endif /* OSC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
index 228a97c098fe..0b4cc4283b05 100644
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@@ -37,6 +37,8 @@
#define DEBUG_SUBSYSTEM S_OSC
+#include "../include/lustre_obdo.h"
+
#include "osc_cl_internal.h"
/** \addtogroup osc
@@ -97,6 +99,7 @@ static int osc_io_read_ahead(const struct lu_env *env,
ldlm_lock_decref(&lockh, dlmlock->l_req_mode);
}
+ ra->cra_rpc_size = osc_cli(osc)->cl_max_pages_per_rpc;
ra->cra_end = cl_index(osc2cl(osc),
dlmlock->l_policy_data.l_extent.end);
ra->cra_release = osc_read_ahead_release;
@@ -136,7 +139,7 @@ static int osc_io_submit(const struct lu_env *env,
LASSERT(qin->pl_nr > 0);
- CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt);
+ CDEBUG(D_CACHE | D_READA, "%d %d\n", qin->pl_nr, crt);
osc = cl2osc(ios->cis_obj);
cli = osc_cli(osc);
@@ -207,6 +210,18 @@ static int osc_io_submit(const struct lu_env *env,
if (queued > 0)
result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags);
+ /* Update c/mtime for sync write. LU-7310 */
+ if (qout->pl_nr > 0 && !result) {
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+ struct cl_object *obj = ios->cis_obj;
+
+ cl_object_attr_lock(obj);
+ attr->cat_mtime = LTIME_S(CURRENT_TIME);
+ attr->cat_ctime = attr->cat_mtime;
+ cl_object_attr_update(env, obj, attr, CAT_MTIME | CAT_CTIME);
+ cl_object_attr_unlock(obj);
+ }
+
CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result);
return qout->pl_nr > 0 ? 0 : result;
}
@@ -330,8 +345,25 @@ static int osc_io_commit_async(const struct lu_env *env,
return result;
}
-static int osc_io_rw_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
+static int osc_io_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct osc_object *osc = cl2osc(ios->cis_obj);
+ struct obd_import *imp = osc_cli(osc)->cl_import;
+ int rc = -EIO;
+
+ spin_lock(&imp->imp_lock);
+ if (likely(!imp->imp_invalid)) {
+ atomic_inc(&osc->oo_nr_ios);
+ rc = 0;
+ }
+ spin_unlock(&imp->imp_lock);
+
+ return rc;
+}
+
+static int osc_io_write_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
{
struct cl_io *io = ios->cis_io;
struct osc_io *oio = osc_env_io(env);
@@ -342,7 +374,7 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
unsigned long max_pages;
if (cl_io_is_append(io))
- return 0;
+ return osc_io_iter_init(env, ios);
npages = io->u.ci_rw.crw_count >> PAGE_SHIFT;
if (io->u.ci_rw.crw_pos & ~PAGE_MASK)
@@ -374,11 +406,21 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
(void)ptlrpcd_queue_work(cli->cl_lru_work);
}
- return 0;
+ return osc_io_iter_init(env, ios);
}
-static void osc_io_rw_iter_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
+static void osc_io_iter_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct osc_object *osc = cl2osc(ios->cis_obj);
+
+ LASSERT(atomic_read(&osc->oo_nr_ios) > 0);
+ if (atomic_dec_and_test(&osc->oo_nr_ios))
+ wake_up_all(&osc->oo_io_waitq);
+}
+
+static void osc_io_write_iter_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
{
struct osc_io *oio = osc_env_io(env);
struct osc_object *osc = cl2osc(ios->cis_obj);
@@ -389,6 +431,8 @@ static void osc_io_rw_iter_fini(const struct lu_env *env,
oio->oi_lru_reserved = 0;
}
oio->oi_write_osclock = NULL;
+
+ osc_io_iter_fini(env, ios);
}
static int osc_io_fault_start(const struct lu_env *env,
@@ -479,7 +523,8 @@ static int osc_io_setattr_start(const struct lu_env *env,
/* truncate cache dirty pages first */
if (cl_io_is_trunc(io))
- result = osc_cache_truncate_start(env, oio, cl2osc(obj), size);
+ result = osc_cache_truncate_start(env, cl2osc(obj), size,
+ &oio->oi_trunc);
if (result == 0 && oio->oi_lockless == 0) {
cl_object_attr_lock(obj);
@@ -589,10 +634,8 @@ static void osc_io_setattr_end(const struct lu_env *env,
__u64 size = io->u.ci_setattr.sa_attr.lvb_size;
osc_trunc_check(env, io, oio, size);
- if (oio->oi_trunc) {
- osc_cache_truncate_end(env, oio, cl2osc(obj));
- oio->oi_trunc = NULL;
- }
+ osc_cache_truncate_end(env, oio->oi_trunc);
+ oio->oi_trunc = NULL;
}
}
@@ -669,7 +712,7 @@ static int osc_io_data_version_start(const struct lu_env *env,
ptlrpc_request_set_replen(req);
req->rq_interpret_reply = osc_data_version_interpret;
- CLASSERT(sizeof(*dva) <= sizeof(req->rq_async_args));
+ BUILD_BUG_ON(sizeof(*dva) > sizeof(req->rq_async_args));
dva = ptlrpc_req_async_args(req);
dva->dva_oio = oio;
@@ -832,17 +875,21 @@ static void osc_io_end(const struct lu_env *env,
static const struct cl_io_operations osc_io_ops = {
.op = {
[CIT_READ] = {
+ .cio_iter_init = osc_io_iter_init,
+ .cio_iter_fini = osc_io_iter_fini,
.cio_start = osc_io_read_start,
.cio_fini = osc_io_fini
},
[CIT_WRITE] = {
- .cio_iter_init = osc_io_rw_iter_init,
- .cio_iter_fini = osc_io_rw_iter_fini,
+ .cio_iter_init = osc_io_write_iter_init,
+ .cio_iter_fini = osc_io_write_iter_fini,
.cio_start = osc_io_write_start,
.cio_end = osc_io_end,
.cio_fini = osc_io_fini
},
[CIT_SETATTR] = {
+ .cio_iter_init = osc_io_iter_init,
+ .cio_iter_fini = osc_io_iter_fini,
.cio_start = osc_io_setattr_start,
.cio_end = osc_io_setattr_end
},
@@ -851,6 +898,8 @@ static const struct cl_io_operations osc_io_ops = {
.cio_end = osc_io_data_version_end,
},
[CIT_FAULT] = {
+ .cio_iter_init = osc_io_iter_init,
+ .cio_iter_fini = osc_io_iter_fini,
.cio_start = osc_io_fault_start,
.cio_end = osc_io_end,
.cio_fini = osc_io_fini
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
index e0c3324857dd..d3e5ca7db7b2 100644
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@@ -78,6 +78,9 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
INIT_LIST_HEAD(&osc->oo_write_item);
INIT_LIST_HEAD(&osc->oo_read_item);
+ atomic_set(&osc->oo_nr_ios, 0);
+ init_waitqueue_head(&osc->oo_io_waitq);
+
osc->oo_root.rb_node = NULL;
INIT_LIST_HEAD(&osc->oo_hp_exts);
INIT_LIST_HEAD(&osc->oo_urgent_exts);
@@ -112,6 +115,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
LASSERT(list_empty(&osc->oo_ol_list));
+ LASSERT(!atomic_read(&osc->oo_nr_ios));
lu_object_fini(obj);
kmem_cache_free(osc_object_kmem, osc);
@@ -444,4 +448,19 @@ struct lu_object *osc_object_alloc(const struct lu_env *env,
return obj;
}
+int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc)
+{
+ struct l_wait_info lwi = { 0 };
+
+ CDEBUG(D_INODE, "Invalidate osc object: %p, # of active IOs: %d\n",
+ osc, atomic_read(&osc->oo_nr_ios));
+
+ l_wait_event(osc->oo_io_waitq, !atomic_read(&osc->oo_nr_ios), &lwi);
+
+ /* Discard all pages of this object. */
+ osc_cache_truncate_start(env, osc, 0, NULL);
+
+ return 0;
+}
+
/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index e356e4af08e1..ab9d0d7bb943 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -370,12 +370,17 @@ static int osc_cache_too_much(struct client_obd *cli)
return lru_shrink_min(cli);
} else {
time64_t duration = ktime_get_real_seconds();
+ long timediff;
/* knock out pages by duration of no IO activity */
duration -= cli->cl_lru_last_used;
- duration >>= 6; /* approximately 1 minute */
- if (duration > 0 &&
- pages >= div64_s64((s64)budget, duration))
+ /*
+ * The difference shouldn't be more than 70 years
+ * so we can safely case to a long. Round to
+ * approximately 1 minute.
+ */
+ timediff = (long)(duration >> 6);
+ if (timediff > 0 && pages >= budget / timediff)
return lru_shrink_min(cli);
}
return 0;
@@ -943,4 +948,91 @@ bool osc_over_unstable_soft_limit(struct client_obd *cli)
cli->cl_max_rpcs_in_flight;
}
+/**
+ * Return how many LRU pages in the cache of all OSC devices
+ *
+ * Return: return # of cached LRU pages times reclaimation tendency
+ * SHRINK_STOP if it cannot do any scanning in this time
+ */
+unsigned long osc_cache_shrink_count(struct shrinker *sk,
+ struct shrink_control *sc)
+{
+ struct client_obd *cli;
+ unsigned long cached = 0;
+
+ spin_lock(&osc_shrink_lock);
+ list_for_each_entry(cli, &osc_shrink_list, cl_shrink_list)
+ cached += atomic_long_read(&cli->cl_lru_in_list);
+ spin_unlock(&osc_shrink_lock);
+
+ return (cached * sysctl_vfs_cache_pressure) / 100;
+}
+
+/**
+ * Scan and try to reclaim sc->nr_to_scan cached LRU pages
+ *
+ * Return: number of cached LRU pages reclaimed
+ * SHRINK_STOP if it cannot do any scanning in this time
+ *
+ * Linux kernel will loop calling this shrinker scan routine with
+ * sc->nr_to_scan = SHRINK_BATCH(128 for now) until kernel got enough memory.
+ *
+ * If sc->nr_to_scan is 0, the VM is querying the cache size, we don't need
+ * to scan and try to reclaim LRU pages, just return 0 and
+ * osc_cache_shrink_count() will report the LRU page number.
+ */
+unsigned long osc_cache_shrink_scan(struct shrinker *sk,
+ struct shrink_control *sc)
+{
+ struct client_obd *stop_anchor = NULL;
+ struct client_obd *cli;
+ struct lu_env *env;
+ long shrank = 0;
+ int refcheck;
+ int rc;
+
+ if (!sc->nr_to_scan)
+ return 0;
+
+ if (!(sc->gfp_mask & __GFP_FS))
+ return SHRINK_STOP;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return SHRINK_STOP;
+
+ spin_lock(&osc_shrink_lock);
+ while (!list_empty(&osc_shrink_list)) {
+ cli = list_entry(osc_shrink_list.next, struct client_obd,
+ cl_shrink_list);
+
+ if (!stop_anchor)
+ stop_anchor = cli;
+ else if (cli == stop_anchor)
+ break;
+
+ list_move_tail(&cli->cl_shrink_list, &osc_shrink_list);
+ spin_unlock(&osc_shrink_lock);
+
+ /* shrink no more than max_pages_per_rpc for an OSC */
+ rc = osc_lru_shrink(env, cli, (sc->nr_to_scan - shrank) >
+ cli->cl_max_pages_per_rpc ?
+ cli->cl_max_pages_per_rpc :
+ sc->nr_to_scan - shrank, true);
+ if (rc > 0)
+ shrank += rc;
+
+ if (shrank >= sc->nr_to_scan)
+ goto out;
+
+ spin_lock(&osc_shrink_lock);
+ }
+ spin_unlock(&osc_shrink_lock);
+
+out:
+ cl_env_put(env, &refcheck);
+
+ return shrank;
+}
+
/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 7143564ae7e7..c4cfe18c3294 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -43,6 +43,7 @@
#include "../include/lprocfs_status.h"
#include "../include/lustre/lustre_ioctl.h"
#include "../include/lustre_debug.h"
+#include "../include/lustre_obdo.h"
#include "../include/lustre_param.h"
#include "../include/lustre_fid.h"
#include "../include/obd_class.h"
@@ -250,7 +251,7 @@ int osc_setattr_async(struct obd_export *exp, struct obdo *oa,
req->rq_interpret_reply =
(ptlrpc_interpterer_t)osc_setattr_interpret;
- CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
+ BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args));
sa = ptlrpc_req_async_args(req);
sa->sa_oa = oa;
sa->sa_upcall = upcall;
@@ -348,7 +349,7 @@ int osc_punch_base(struct obd_export *exp, struct obdo *oa,
ptlrpc_request_set_replen(req);
req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
- CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
+ BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args));
sa = ptlrpc_req_async_args(req);
sa->sa_oa = oa;
sa->sa_upcall = upcall;
@@ -429,7 +430,7 @@ int osc_sync_base(struct osc_object *obj, struct obdo *oa,
ptlrpc_request_set_replen(req);
req->rq_interpret_reply = osc_sync_interpret;
- CLASSERT(sizeof(*fa) <= sizeof(req->rq_async_args));
+ BUILD_BUG_ON(sizeof(*fa) > sizeof(req->rq_async_args));
fa = ptlrpc_req_async_args(req);
fa->fa_obj = obj;
fa->fa_oa = oa;
@@ -1170,7 +1171,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
}
ptlrpc_request_set_replen(req);
- CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
aa = ptlrpc_req_async_args(req);
aa->aa_oa = oa;
aa->aa_requested_nob = requested_nob;
@@ -1757,7 +1758,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
cl_req_attr_set(env, osc2cl(obj), crattr);
lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
- CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
aa = ptlrpc_req_async_args(req);
INIT_LIST_HEAD(&aa->aa_oaps);
list_splice_init(&rpc_list, &aa->aa_oaps);
@@ -2038,7 +2039,7 @@ no_match:
if (!rc) {
struct osc_enqueue_args *aa;
- CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
aa = ptlrpc_req_async_args(req);
aa->oa_exp = exp;
aa->oa_mode = einfo->ei_mode;
@@ -2080,9 +2081,9 @@ no_match:
}
int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u32 type, union ldlm_policy_data *policy, __u32 mode,
- __u64 *flags, void *data, struct lustre_handle *lockh,
- int unref)
+ enum ldlm_type type, union ldlm_policy_data *policy,
+ enum ldlm_mode mode, __u64 *flags, void *data,
+ struct lustre_handle *lockh, int unref)
{
struct obd_device *obd = exp->exp_obd;
__u64 lflags = *flags;
@@ -2195,7 +2196,7 @@ static int osc_statfs_async(struct obd_export *exp,
}
req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret;
- CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
aa = ptlrpc_req_async_args(req);
aa->aa_oi = oinfo;
@@ -2400,7 +2401,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
struct osc_brw_async_args *aa;
struct obdo *oa;
- CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args));
aa = ptlrpc_req_async_args(req);
oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
if (!oa) {
@@ -2479,6 +2480,33 @@ static int osc_disconnect(struct obd_export *exp)
return rc;
}
+static int osc_ldlm_resource_invalidate(struct cfs_hash *hs,
+ struct cfs_hash_bd *bd,
+ struct hlist_node *hnode, void *arg)
+{
+ struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+ struct osc_object *osc = NULL;
+ struct lu_env *env = arg;
+ struct ldlm_lock *lock;
+
+ lock_res(res);
+ list_for_each_entry(lock, &res->lr_granted, l_res_link) {
+ if (lock->l_ast_data && !osc) {
+ osc = lock->l_ast_data;
+ cl_object_get(osc2cl(osc));
+ }
+ lock->l_ast_data = NULL;
+ }
+ unlock_res(res);
+
+ if (osc) {
+ osc_object_invalidate(env, osc);
+ cl_object_put(env, osc2cl(osc));
+ }
+
+ return 0;
+}
+
static int osc_import_event(struct obd_device *obd,
struct obd_import *imp,
enum obd_import_event event)
@@ -2506,17 +2534,18 @@ static int osc_import_event(struct obd_device *obd,
struct lu_env *env;
int refcheck;
+ ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
+
env = cl_env_get(&refcheck);
if (!IS_ERR(env)) {
- /* Reset grants */
- cli = &obd->u.cli;
- /* all pages go to failing rpcs due to the invalid
- * import
- */
- osc_io_unplug(env, cli, NULL);
+ osc_io_unplug(env, &obd->u.cli, NULL);
- ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
+ cfs_hash_for_each_nolock(ns->ns_rs_hash,
+ osc_ldlm_resource_invalidate,
+ env, 0);
cl_env_put(env, &refcheck);
+
+ ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
} else {
rc = PTR_ERR(env);
}
@@ -2646,6 +2675,11 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
+
+ spin_lock(&osc_shrink_lock);
+ list_add_tail(&cli->cl_shrink_list, &osc_shrink_list);
+ spin_unlock(&osc_shrink_lock);
+
return rc;
out_ptlrpcd_work:
@@ -2699,6 +2733,10 @@ static int osc_cleanup(struct obd_device *obd)
struct client_obd *cli = &obd->u.cli;
int rc;
+ spin_lock(&osc_shrink_lock);
+ list_del(&cli->cl_shrink_list);
+ spin_unlock(&osc_shrink_lock);
+
/* lru cleanup */
if (cli->cl_cache) {
LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0);
@@ -2766,7 +2804,14 @@ static struct obd_ops osc_obd_ops = {
.quotactl = osc_quotactl,
};
-extern struct lu_kmem_descr osc_caches[];
+struct list_head osc_shrink_list = LIST_HEAD_INIT(osc_shrink_list);
+DEFINE_SPINLOCK(osc_shrink_lock);
+
+static struct shrinker osc_cache_shrinker = {
+ .count_objects = osc_cache_shrink_count,
+ .scan_objects = osc_cache_shrink_scan,
+ .seeks = DEFAULT_SEEKS,
+};
static int __init osc_init(void)
{
@@ -2792,6 +2837,8 @@ static int __init osc_init(void)
if (rc)
goto out_kmem;
+ register_shrinker(&osc_cache_shrinker);
+
/* This is obviously too much memory, only prevent overflow here */
if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
rc = -EINVAL;
@@ -2830,6 +2877,7 @@ out_kmem:
static void /*__exit*/ osc_exit(void)
{
+ unregister_shrinker(&osc_cache_shrinker);
class_unregister_type(LUSTRE_OSC_NAME);
lu_kmem_fini(osc_caches);
ptlrpc_free_rq_pool(osc_rq_pool);