From 144cba1493fdd6e3e1980e439a31df877831ebcd Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 27 Apr 2015 11:09:54 +0800 Subject: libceph: allow setting osd_req_op's flags Signed-off-by: Yan, Zheng Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 61b19c46bdb3..7506b485bb6d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -249,7 +249,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); extern void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode); + unsigned int which, u16 opcode, u32 flags); extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, unsigned int which, -- cgit From d50c97b566c5bbf990eff472e9feaa58fdebdd33 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 May 2015 11:52:20 +0300 Subject: libceph: nuke time_sub() Unused since ceph got merged into mainline I guess. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/libceph.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 30f92cefaa72..85ae9a889a3f 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -93,15 +93,6 @@ enum { CEPH_MOUNT_SHUTDOWN, }; -/* - * subtract jiffies - */ -static inline unsigned long time_sub(unsigned long a, unsigned long b) -{ - BUG_ON(time_after(b, a)); - return (long)a - (long)b; -} - struct ceph_mds_client; /* -- cgit From a319bf56a617354e62cf5f774d2ca4e1a8a3bff3 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 May 2015 12:02:17 +0300 Subject: libceph: store timeouts in jiffies, verify user input There are currently three libceph-level timeouts that the user can specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive. All of these are in seconds and no checking is done on user input: negative values are accepted, we multiply them all by HZ which may or may not overflow, arbitrarily large jiffies then get added together, etc. There is also a bug in the way mount_timeout=0 is handled. It's supposed to mean "infinite timeout", but that's not how wait.h APIs treat it and so __ceph_open_session() for example will busy loop without much chance of being interrupted if none of ceph-mons are there. Fix all this by verifying user input, storing timeouts capped by msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies() helper for all user-specified waits to handle infinite timeouts correctly. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/libceph.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 85ae9a889a3f..d73a569f9bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,9 +43,9 @@ struct ceph_options { int flags; struct ceph_fsid fsid; struct ceph_entity_addr my_addr; - int mount_timeout; - int osd_idle_ttl; - int osd_keepalive_timeout; + unsigned long mount_timeout; /* jiffies */ + unsigned long osd_idle_ttl; /* jiffies */ + unsigned long osd_keepalive_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -63,9 +63,9 @@ struct ceph_options { /* * defaults */ -#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_KEEPALIVE_DEFAULT 5 -#define CEPH_OSD_IDLE_TTL_DEFAULT 60 +#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) +#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) @@ -93,6 +93,11 @@ enum { CEPH_MOUNT_SHUTDOWN, }; +static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) +{ + return timeout ?: MAX_SCHEDULE_TIMEOUT; +} + struct ceph_mds_client; /* -- cgit From f66fd9f0952187d274c13c136b74548f792c1925 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 10 Jun 2015 17:26:13 +0800 Subject: ceph: pre-allocate data structure that tracks caps flushing Signed-off-by: Yan, Zheng --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index d73a569f9bf5..9ebee53d3bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -174,6 +174,7 @@ static inline int calc_pages_for(u64 off, u64 len) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; -- cgit From b459be739f97e2062b2ba77cfe8ea198dbd58904 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 12 Jun 2015 13:21:07 +0300 Subject: crush: sync up with userspace .. up to ceph.git commit 1db1abc8328d ("crush: eliminate ad hoc diff between kernel and userspace"). This fixes a bunch of recently pulled coding style issues and makes includes a bit cleaner. A patch "crush:Make the function crush_ln static" from Nicholas Krause is folded in as crush_ln() has been made static in userspace as well. Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 40 ++++++++++++++++++++++++++++++++++++++-- include/linux/crush/hash.h | 6 ++++++ include/linux/crush/mapper.h | 2 +- 3 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 48a1a7d100f1..48b49305716b 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -1,7 +1,11 @@ #ifndef CEPH_CRUSH_CRUSH_H #define CEPH_CRUSH_CRUSH_H -#include +#ifdef __KERNEL__ +# include +#else +# include "crush_compat.h" +#endif /* * CRUSH is a pseudo-random data distribution algorithm that @@ -20,7 +24,11 @@ #define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ +#define CRUSH_MAX_RULESET (1<<8) /* max crush ruleset number */ +#define CRUSH_MAX_RULES CRUSH_MAX_RULESET /* should be the same as max rulesets */ +#define CRUSH_MAX_DEVICE_WEIGHT (100u * 0x10000u) +#define CRUSH_MAX_BUCKET_WEIGHT (65535u * 0x10000u) #define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */ #define CRUSH_ITEM_NONE 0x7fffffff /* no result */ @@ -108,6 +116,15 @@ enum { }; extern const char *crush_bucket_alg_name(int alg); +/* + * although tree was a legacy algorithm, it has been buggy, so + * exclude it. + */ +#define CRUSH_LEGACY_ALLOWED_BUCKET_ALGS ( \ + (1 << CRUSH_BUCKET_UNIFORM) | \ + (1 << CRUSH_BUCKET_LIST) | \ + (1 << CRUSH_BUCKET_STRAW)) + struct crush_bucket { __s32 id; /* this'll be negative */ __u16 type; /* non-zero; type=0 is reserved for devices */ @@ -174,7 +191,7 @@ struct crush_map { /* choose local attempts using a fallback permutation before * re-descent */ __u32 choose_local_fallback_tries; - /* choose attempts before giving up */ + /* choose attempts before giving up */ __u32 choose_total_tries; /* attempt chooseleaf inner descent once for firstn mode; on * reject retry outer descent. Note that this does *not* @@ -187,6 +204,25 @@ struct crush_map { * that want to limit reshuffling, a value of 3 or 4 will make the * mappings line up a bit better with previous mappings. */ __u8 chooseleaf_vary_r; + +#ifndef __KERNEL__ + /* + * version 0 (original) of straw_calc has various flaws. version 1 + * fixes a few of them. + */ + __u8 straw_calc_version; + + /* + * allowed bucket algs is a bitmask, here the bit positions + * are CRUSH_BUCKET_*. note that these are *bits* and + * CRUSH_BUCKET_* values are not, so we need to or together (1 + * << CRUSH_BUCKET_WHATEVER). The 0th bit is not used to + * minimize confusion (bucket type values start at 1). + */ + __u32 allowed_bucket_algs; + + __u32 *choose_tries; +#endif }; diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h index 91e884230d5d..d1d90258242e 100644 --- a/include/linux/crush/hash.h +++ b/include/linux/crush/hash.h @@ -1,6 +1,12 @@ #ifndef CEPH_CRUSH_HASH_H #define CEPH_CRUSH_HASH_H +#ifdef __KERNEL__ +# include +#else +# include "crush_compat.h" +#endif + #define CRUSH_HASH_RJENKINS1 0 #define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1 diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index eab367446eea..5dfd5b1125d2 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -8,7 +8,7 @@ * LGPL2 */ -#include +#include "crush.h" extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size); extern int crush_do_rule(const struct crush_map *map, -- cgit