summaryrefslogtreecommitdiff
path: root/include/linux/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/ceph')
-rw-r--r--include/linux/ceph/buffer.h1
-rw-r--r--include/linux/ceph/ceph_features.h111
-rw-r--r--include/linux/ceph/ceph_fs.h46
-rw-r--r--include/linux/ceph/decode.h17
-rw-r--r--include/linux/ceph/libceph.h19
-rw-r--r--include/linux/ceph/messenger.h17
-rw-r--r--include/linux/ceph/osd_client.h30
-rw-r--r--include/linux/ceph/osdmap.h114
-rw-r--r--include/linux/ceph/rados.h22
9 files changed, 262 insertions, 115 deletions
diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
index 58d19014068f..07ad423cc37f 100644
--- a/include/linux/ceph/buffer.h
+++ b/include/linux/ceph/buffer.h
@@ -17,7 +17,6 @@ struct ceph_buffer {
struct kref kref;
struct kvec vec;
size_t alloc_len;
- bool is_vmalloc;
};
extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 4c42080347af..d12659ce550d 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -4,42 +4,80 @@
/*
* feature bits
*/
-#define CEPH_FEATURE_UID (1<<0)
-#define CEPH_FEATURE_NOSRCADDR (1<<1)
-#define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
-#define CEPH_FEATURE_FLOCK (1<<3)
-#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
-#define CEPH_FEATURE_MONNAMES (1<<5)
-#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
-#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
-#define CEPH_FEATURE_OBJECTLOCATOR (1<<8)
-#define CEPH_FEATURE_PGID64 (1<<9)
-#define CEPH_FEATURE_INCSUBOSDMAP (1<<10)
-#define CEPH_FEATURE_PGPOOL3 (1<<11)
-#define CEPH_FEATURE_OSDREPLYMUX (1<<12)
-#define CEPH_FEATURE_OSDENC (1<<13)
-#define CEPH_FEATURE_OMAP (1<<14)
-#define CEPH_FEATURE_MONENC (1<<15)
-#define CEPH_FEATURE_QUERY_T (1<<16)
-#define CEPH_FEATURE_INDEP_PG_MAP (1<<17)
-#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
-#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19)
-#define CEPH_FEATURE_MON_NULLROUTE (1<<20)
-#define CEPH_FEATURE_MON_GV (1<<21)
-#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22)
-#define CEPH_FEATURE_MSG_AUTH (1<<23)
-#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24)
-#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
-#define CEPH_FEATURE_CREATEPOOLID (1<<26)
-#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
-#define CEPH_FEATURE_OSD_HBMSGS (1<<28)
-#define CEPH_FEATURE_MDSENC (1<<29)
-#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30)
+#define CEPH_FEATURE_UID (1ULL<<0)
+#define CEPH_FEATURE_NOSRCADDR (1ULL<<1)
+#define CEPH_FEATURE_MONCLOCKCHECK (1ULL<<2)
+#define CEPH_FEATURE_FLOCK (1ULL<<3)
+#define CEPH_FEATURE_SUBSCRIBE2 (1ULL<<4)
+#define CEPH_FEATURE_MONNAMES (1ULL<<5)
+#define CEPH_FEATURE_RECONNECT_SEQ (1ULL<<6)
+#define CEPH_FEATURE_DIRLAYOUTHASH (1ULL<<7)
+#define CEPH_FEATURE_OBJECTLOCATOR (1ULL<<8)
+#define CEPH_FEATURE_PGID64 (1ULL<<9)
+#define CEPH_FEATURE_INCSUBOSDMAP (1ULL<<10)
+#define CEPH_FEATURE_PGPOOL3 (1ULL<<11)
+#define CEPH_FEATURE_OSDREPLYMUX (1ULL<<12)
+#define CEPH_FEATURE_OSDENC (1ULL<<13)
+#define CEPH_FEATURE_OMAP (1ULL<<14)
+#define CEPH_FEATURE_MONENC (1ULL<<15)
+#define CEPH_FEATURE_QUERY_T (1ULL<<16)
+#define CEPH_FEATURE_INDEP_PG_MAP (1ULL<<17)
+#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18)
+#define CEPH_FEATURE_CHUNKY_SCRUB (1ULL<<19)
+#define CEPH_FEATURE_MON_NULLROUTE (1ULL<<20)
+#define CEPH_FEATURE_MON_GV (1ULL<<21)
+#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22)
+#define CEPH_FEATURE_MSG_AUTH (1ULL<<23)
+#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24)
+#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25)
+#define CEPH_FEATURE_CREATEPOOLID (1ULL<<26)
+#define CEPH_FEATURE_REPLY_CREATE_INODE (1ULL<<27)
+#define CEPH_FEATURE_OSD_HBMSGS (1ULL<<28)
+#define CEPH_FEATURE_MDSENC (1ULL<<29)
+#define CEPH_FEATURE_OSDHASHPSPOOL (1ULL<<30)
+#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31)
+#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32)
+#define CEPH_FEATURE_MON_SCRUB (1ULL<<33)
+#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34)
+#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35)
+#define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */
+#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37)
+#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
+#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */
+/* The process supports new-style OSDMap encoding. Monitors also use
+ this bit to determine if peers support NAK messages. */
+#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39)
+#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40)
+#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41)
+#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */
+
+/*
+ * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
+ * vector to evaluate to 64 bit ~0. To cope, we designate 1ULL << 63
+ * to mean 33 bit ~0, and introduce a helper below to do the
+ * translation.
+ *
+ * This was introduced by ceph.git commit
+ * 9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8
+ * and fixed by ceph.git commit
+ * 4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c
+ */
+#define CEPH_FEATURE_RESERVED (1ULL<<63)
+
+static inline u64 ceph_sanitize_features(u64 features)
+{
+ if (features & CEPH_FEATURE_RESERVED) {
+ /* everything through OSD_SNAPMAPPER */
+ return 0x1ffffffffull;
+ } else {
+ return features;
+ }
+}
/*
* Features supported.
*/
-#define CEPH_FEATURES_SUPPORTED_DEFAULT \
+#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_PGID64 | \
@@ -48,7 +86,13 @@
CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
- CEPH_FEATURE_OSDHASHPSPOOL)
+ CEPH_FEATURE_OSDHASHPSPOOL | \
+ CEPH_FEATURE_OSD_CACHEPOOL | \
+ CEPH_FEATURE_CRUSH_V2 | \
+ CEPH_FEATURE_EXPORT_PEER | \
+ CEPH_FEATURE_OSDMAP_ENC | \
+ CEPH_FEATURE_CRUSH_TUNABLES3 | \
+ CEPH_FEATURE_OSD_PRIMARY_AFFINITY)
#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
@@ -56,4 +100,5 @@
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC)
+
#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 2ad7b860f062..5f6db18d72e8 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -53,6 +53,29 @@ struct ceph_file_layout {
__le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
} __attribute__ ((packed));
+#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
+#define ceph_file_layout_stripe_count(l) \
+ ((__s32)le32_to_cpu((l).fl_stripe_count))
+#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
+#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
+#define ceph_file_layout_object_su(l) \
+ ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
+#define ceph_file_layout_pg_pool(l) \
+ ((__s32)le32_to_cpu((l).fl_pg_pool))
+
+static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
+{
+ return le32_to_cpu(l->fl_stripe_unit) *
+ le32_to_cpu(l->fl_stripe_count);
+}
+
+/* "period" == bytes before i start on a new set of objects */
+static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
+{
+ return le32_to_cpu(l->fl_object_size) *
+ le32_to_cpu(l->fl_stripe_count);
+}
+
#define CEPH_MIN_STRIPE_UNIT 65536
int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
@@ -282,6 +305,8 @@ enum {
CEPH_SESSION_RENEWCAPS,
CEPH_SESSION_STALE,
CEPH_SESSION_RECALL_STATE,
+ CEPH_SESSION_FLUSHMSG,
+ CEPH_SESSION_FLUSHMSG_ACK,
};
extern const char *ceph_session_op_name(int op);
@@ -307,6 +332,7 @@ enum {
CEPH_MDS_OP_LOOKUPHASH = 0x00102,
CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
CEPH_MDS_OP_LOOKUPINO = 0x00104,
+ CEPH_MDS_OP_LOOKUPNAME = 0x00105,
CEPH_MDS_OP_SETXATTR = 0x01105,
CEPH_MDS_OP_RMXATTR = 0x01106,
@@ -348,8 +374,9 @@ extern const char *ceph_mds_op_name(int op);
/*
* Ceph setxattr request flags.
*/
-#define CEPH_XATTR_CREATE 1
-#define CEPH_XATTR_REPLACE 2
+#define CEPH_XATTR_CREATE (1 << 0)
+#define CEPH_XATTR_REPLACE (1 << 1)
+#define CEPH_XATTR_REMOVE (1 << 31)
union ceph_mds_request_args {
struct {
@@ -394,8 +421,8 @@ union ceph_mds_request_args {
struct {
__u8 rule; /* currently fcntl or flock */
__u8 type; /* shared, exclusive, remove*/
+ __le64 owner; /* owner of the lock */
__le64 pid; /* process id requesting the lock */
- __le64 pid_namespace;
__le64 start; /* initial location to lock */
__le64 length; /* num bytes to lock from start */
__u8 wait; /* will caller wait for lock to become available? */
@@ -457,7 +484,8 @@ struct ceph_mds_reply_cap {
__u8 flags; /* CEPH_CAP_FLAG_* */
} __attribute__ ((packed));
-#define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */
+#define CEPH_CAP_FLAG_AUTH (1 << 0) /* cap is issued by auth mds */
+#define CEPH_CAP_FLAG_RELEASE (1 << 1) /* release the cap */
/* inode record, for bundling with mds reply */
struct ceph_mds_reply_inode {
@@ -505,8 +533,8 @@ struct ceph_filelock {
__le64 start;/* file offset to start lock at */
__le64 length; /* num bytes to lock; 0 for all following start */
__le64 client; /* which client holds the lock */
+ __le64 owner; /* owner the lock */
__le64 pid; /* process id holding the lock on the client */
- __le64 pid_namespace;
__u8 type; /* shared lock, exclusive lock, or unlock */
} __attribute__ ((packed));
@@ -658,6 +686,14 @@ struct ceph_mds_caps {
__le32 time_warp_seq;
} __attribute__ ((packed));
+struct ceph_mds_cap_peer {
+ __le64 cap_id;
+ __le32 seq;
+ __le32 mseq;
+ __le32 mds;
+ __u8 flags;
+} __attribute__ ((packed));
+
/* cap release msg head */
struct ceph_mds_cap_release {
__le32 num; /* number of cap_items that follow */
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 0442c3d800f0..a6ef9cc267ec 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -8,23 +8,6 @@
#include <linux/ceph/types.h>
-/* This seemed to be the easiest place to define these */
-
-#define U8_MAX ((u8)(~0U))
-#define U16_MAX ((u16)(~0U))
-#define U32_MAX ((u32)(~0U))
-#define U64_MAX ((u64)(~0ULL))
-
-#define S8_MAX ((s8)(U8_MAX >> 1))
-#define S16_MAX ((s16)(U16_MAX >> 1))
-#define S32_MAX ((s32)(U32_MAX >> 1))
-#define S64_MAX ((s64)(U64_MAX >> 1LL))
-
-#define S8_MIN ((s8)(-S8_MAX - 1))
-#define S16_MIN ((s16)(-S16_MAX - 1))
-#define S32_MIN ((s32)(-S32_MAX - 1))
-#define S64_MIN ((s64)(-S64_MAX - 1LL))
-
/*
* in all cases,
* void **p pointer to position pointer
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 2e3024881a5e..2f49aa4c4f7f 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -122,8 +122,8 @@ struct ceph_client {
int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
- u32 supported_features;
- u32 required_features;
+ u64 supported_features;
+ u64 required_features;
struct ceph_messenger msgr; /* messenger instance */
struct ceph_mon_client monc;
@@ -173,15 +173,18 @@ static inline int calc_pages_for(u64 off, u64 len)
(off >> PAGE_CACHE_SHIFT);
}
+extern struct kmem_cache *ceph_inode_cachep;
+extern struct kmem_cache *ceph_cap_cachep;
+extern struct kmem_cache *ceph_dentry_cachep;
+extern struct kmem_cache *ceph_file_cachep;
+
/* ceph_common.c */
extern bool libceph_compatible(void *data);
extern const char *ceph_msg_type_name(int type);
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
-extern struct kmem_cache *ceph_inode_cachep;
-extern struct kmem_cache *ceph_cap_cachep;
-extern struct kmem_cache *ceph_dentry_cachep;
-extern struct kmem_cache *ceph_file_cachep;
+extern void *ceph_kvmalloc(size_t size, gfp_t flags);
+extern void ceph_kvfree(const void *ptr);
extern struct ceph_options *ceph_parse_options(char *options,
const char *dev_name, const char *dev_name_end,
@@ -192,8 +195,8 @@ extern int ceph_compare_options(struct ceph_options *new_opt,
struct ceph_client *client);
extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
void *private,
- unsigned supported_features,
- unsigned required_features);
+ u64 supported_features,
+ u64 required_features);
extern u64 ceph_client_id(struct ceph_client *client);
extern void ceph_destroy_client(struct ceph_client *client);
extern int __ceph_open_session(struct ceph_client *client,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 7c1420bb1dce..d21f2dba0731 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -1,6 +1,7 @@
#ifndef __FS_CEPH_MESSENGER_H
#define __FS_CEPH_MESSENGER_H
+#include <linux/blk_types.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/net.h>
@@ -60,8 +61,8 @@ struct ceph_messenger {
u32 global_seq;
spinlock_t global_seq_lock;
- u32 supported_features;
- u32 required_features;
+ u64 supported_features;
+ u64 required_features;
};
enum ceph_msg_data_type {
@@ -119,8 +120,7 @@ struct ceph_msg_data_cursor {
#ifdef CONFIG_BLOCK
struct { /* bio */
struct bio *bio; /* bio from list */
- unsigned int vector_index; /* vector from bio */
- unsigned int vector_offset; /* bytes from vector */
+ struct bvec_iter bvec_iter;
};
#endif /* CONFIG_BLOCK */
struct { /* pages */
@@ -154,10 +154,9 @@ struct ceph_msg {
struct list_head list_head; /* links for connection lists */
struct kref kref;
- bool front_is_vmalloc;
bool more_to_follow;
bool needs_out_seq;
- int front_max;
+ int front_alloc_len;
unsigned long ack_stamp; /* tx: when we were acked */
struct ceph_msgpool *pool;
@@ -192,7 +191,7 @@ struct ceph_connection {
struct ceph_entity_name peer_name; /* peer name */
- unsigned peer_features;
+ u64 peer_features;
u32 connect_seq; /* identify the most recent connection
attempt for this connection, client */
u32 peer_global_seq; /* peer's global seq for this connection */
@@ -256,8 +255,8 @@ extern void ceph_msgr_flush(void);
extern void ceph_messenger_init(struct ceph_messenger *msgr,
struct ceph_entity_addr *myaddr,
- u32 supported_features,
- u32 required_features,
+ u64 supported_features,
+ u64 required_features,
bool nocrc);
extern void ceph_con_init(struct ceph_connection *con, void *private,
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 8f47625a0661..94ec69672164 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -12,12 +12,6 @@
#include <linux/ceph/auth.h>
#include <linux/ceph/pagelist.h>
-/*
- * Maximum object name size
- * (must be at least as big as RBD_MAX_MD_NAME_LEN -- currently 100)
- */
-#define MAX_OBJ_NAME_SIZE 100
-
struct ceph_msg;
struct ceph_snap_context;
struct ceph_osd_request;
@@ -49,7 +43,7 @@ struct ceph_osd {
};
-#define CEPH_OSD_MAX_OP 2
+#define CEPH_OSD_MAX_OP 3
enum ceph_osd_data_type {
CEPH_OSD_DATA_TYPE_NONE = 0,
@@ -82,6 +76,7 @@ struct ceph_osd_data {
struct ceph_osd_req_op {
u16 op; /* CEPH_OSD_OP_* */
+ u32 flags; /* CEPH_OSD_OP_FLAG_* */
u32 payload_len;
union {
struct ceph_osd_data raw_data_in;
@@ -108,6 +103,10 @@ struct ceph_osd_req_op {
u32 timeout;
__u8 flag;
} watch;
+ struct {
+ u64 expected_object_size;
+ u64 expected_write_size;
+ } alloc_hint;
};
};
@@ -138,6 +137,7 @@ struct ceph_osd_request {
__le64 *r_request_pool;
void *r_request_pgid;
__le32 *r_request_attempts;
+ bool r_paused;
struct ceph_eversion *r_request_reassert_version;
int r_result;
@@ -158,15 +158,21 @@ struct ceph_osd_request {
struct inode *r_inode; /* for use by callbacks */
void *r_priv; /* ditto */
- char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */
- int r_oid_len;
+ struct ceph_object_locator r_base_oloc;
+ struct ceph_object_id r_base_oid;
+ struct ceph_object_locator r_target_oloc;
+ struct ceph_object_id r_target_oid;
+
u64 r_snapid;
unsigned long r_stamp; /* send OR check time */
- struct ceph_file_layout r_file_layout;
struct ceph_snap_context *r_snapc; /* snap context for writes */
};
+struct ceph_request_redirect {
+ struct ceph_object_locator oloc;
+};
+
struct ceph_osd_event {
u64 cookie;
int one_shot;
@@ -292,6 +298,10 @@ extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
u64 cookie, u64 version, int flag);
+extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ u64 expected_object_size,
+ u64 expected_write_size);
extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index d05cc4451af6..561ea896c657 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -35,20 +35,53 @@ struct ceph_pg_pool_info {
u8 object_hash;
u32 pg_num, pgp_num;
int pg_num_mask, pgp_num_mask;
+ s64 read_tier;
+ s64 write_tier; /* wins for read+write ops */
u64 flags;
char *name;
};
+static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
+{
+ switch (pool->type) {
+ case CEPH_POOL_TYPE_REP:
+ return true;
+ case CEPH_POOL_TYPE_EC:
+ return false;
+ default:
+ BUG_ON(1);
+ }
+}
+
struct ceph_object_locator {
- uint64_t pool;
- char *key;
+ s64 pool;
+};
+
+/*
+ * Maximum supported by kernel client object name length
+ *
+ * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
+ */
+#define CEPH_MAX_OID_NAME_LEN 100
+
+struct ceph_object_id {
+ char name[CEPH_MAX_OID_NAME_LEN];
+ int name_len;
};
struct ceph_pg_mapping {
struct rb_node node;
struct ceph_pg pgid;
- int len;
- int osds[];
+
+ union {
+ struct {
+ int len;
+ int osds[];
+ } pg_temp;
+ struct {
+ int osd;
+ } primary_temp;
+ };
};
struct ceph_osdmap {
@@ -65,44 +98,60 @@ struct ceph_osdmap {
struct ceph_entity_addr *osd_addr;
struct rb_root pg_temp;
+ struct rb_root primary_temp;
+
+ u32 *osd_primary_affinity;
+
struct rb_root pg_pools;
u32 pool_max;
/* the CRUSH map specifies the mapping of placement groups to
* the list of osds that store+replicate them. */
struct crush_map *crush;
+
+ struct mutex crush_scratch_mutex;
+ int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
};
-/*
- * file layout helpers
- */
-#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
-#define ceph_file_layout_stripe_count(l) \
- ((__s32)le32_to_cpu((l).fl_stripe_count))
-#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
-#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
-#define ceph_file_layout_object_su(l) \
- ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
-#define ceph_file_layout_pg_pool(l) \
- ((__s32)le32_to_cpu((l).fl_pg_pool))
-
-static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
+static inline void ceph_oid_set_name(struct ceph_object_id *oid,
+ const char *name)
{
- return le32_to_cpu(l->fl_stripe_unit) *
- le32_to_cpu(l->fl_stripe_count);
+ int len;
+
+ len = strlen(name);
+ if (len > sizeof(oid->name)) {
+ WARN(1, "ceph_oid_set_name '%s' len %d vs %zu, truncating\n",
+ name, len, sizeof(oid->name));
+ len = sizeof(oid->name);
+ }
+
+ memcpy(oid->name, name, len);
+ oid->name_len = len;
}
-/* "period" == bytes before i start on a new set of objects */
-static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
+static inline void ceph_oid_copy(struct ceph_object_id *dest,
+ struct ceph_object_id *src)
{
- return le32_to_cpu(l->fl_object_size) *
- le32_to_cpu(l->fl_stripe_count);
+ BUG_ON(src->name_len > sizeof(dest->name));
+ memcpy(dest->name, src->name, src->name_len);
+ dest->name_len = src->name_len;
}
+static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd)
+{
+ return osd >= 0 && osd < map->max_osd &&
+ (map->osd_state[osd] & CEPH_OSD_EXISTS);
+}
static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
{
- return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
+ return ceph_osd_exists(map, osd) &&
+ (map->osd_state[osd] & CEPH_OSD_UP);
+}
+
+static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd)
+{
+ return !ceph_osd_is_up(map, osd);
}
static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
@@ -111,6 +160,7 @@ static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
}
extern char *ceph_osdmap_state_str(char *str, int len, int state);
+extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd);
static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
int osd)
@@ -143,7 +193,7 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
return 0;
}
-extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
+extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *map,
struct ceph_messenger *msgr);
@@ -155,14 +205,20 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
u64 *bno, u64 *oxoff, u64 *oxlen);
/* calculate mapping of object to a placement group */
-extern int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
- struct ceph_osdmap *osdmap, uint64_t pool);
+extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
+ struct ceph_object_locator *oloc,
+ struct ceph_object_id *oid,
+ struct ceph_pg *pg_out);
+
extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
struct ceph_pg pgid,
- int *acting);
+ int *osds, int *primary);
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
struct ceph_pg pgid);
+extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
+ u64 id);
+
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 68c96a508ac2..f20e0d8a2155 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -81,8 +81,9 @@ struct ceph_pg_v1 {
*/
#define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */
-#define CEPH_PG_TYPE_REP 1
-#define CEPH_PG_TYPE_RAID4 2
+#define CEPH_POOL_TYPE_REP 1
+#define CEPH_POOL_TYPE_RAID4 2 /* never implemented */
+#define CEPH_POOL_TYPE_EC 3
/*
* stable_mod func is used to control number of placement groups.
@@ -133,6 +134,10 @@ extern const char *ceph_osd_state_name(int s);
#define CEPH_OSD_IN 0x10000
#define CEPH_OSD_OUT 0
+/* osd primary-affinity. fixed point value: 0x10000 == baseline */
+#define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000
+#define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000
+
/*
* osd map flag bits
@@ -227,6 +232,9 @@ enum {
CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
+ /* hints */
+ CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35,
+
/** multi **/
CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
@@ -344,6 +352,10 @@ enum {
CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */
CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */
CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */
+ CEPH_OSD_FLAG_IGNORE_CACHE = 0x8000, /* ignore cache logic */
+ CEPH_OSD_FLAG_SKIPRWLOCKS = 0x10000, /* skip rw locks */
+ CEPH_OSD_FLAG_IGNORE_OVERLAY = 0x20000, /* ignore pool overlay */
+ CEPH_OSD_FLAG_FLUSH = 0x40000, /* this is part of flush */
};
enum {
@@ -378,7 +390,7 @@ enum {
*/
struct ceph_osd_op {
__le16 op; /* CEPH_OSD_OP_* */
- __le32 flags; /* CEPH_OSD_FLAG_* */
+ __le32 flags; /* CEPH_OSD_OP_FLAG_* */
union {
struct {
__le64 offset, length;
@@ -412,6 +424,10 @@ struct ceph_osd_op {
__le64 offset, length;
__le64 src_offset;
} __attribute__ ((packed)) clonerange;
+ struct {
+ __le64 expected_object_size;
+ __le64 expected_write_size;
+ } __attribute__ ((packed)) alloc_hint;
};
__le32 payload_len;
} __attribute__ ((packed));