Skip to content

Commit 779fe0f

Browse files
ukernelidryomov
authored andcommitted
ceph: rados pool namespace support
This patch adds codes that decode pool namespace information in cap message and request reply. Pool namespace is saved in i_layout, it will be passed to libceph when doing read/write. Signed-off-by: Yan, Zheng <[email protected]>
1 parent cd08e0a commit 779fe0f

File tree

8 files changed

+159
-77
lines changed

8 files changed

+159
-77
lines changed

fs/ceph/addr.c

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1730,14 +1730,16 @@ enum {
17301730
POOL_WRITE = 2,
17311731
};
17321732

1733-
static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool)
1733+
static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
1734+
s64 pool, struct ceph_string *pool_ns)
17341735
{
17351736
struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
17361737
struct ceph_mds_client *mdsc = fsc->mdsc;
17371738
struct ceph_osd_request *rd_req = NULL, *wr_req = NULL;
17381739
struct rb_node **p, *parent;
17391740
struct ceph_pool_perm *perm;
17401741
struct page **pages;
1742+
size_t pool_ns_len;
17411743
int err = 0, err2 = 0, have = 0;
17421744

17431745
down_read(&mdsc->pool_perm_rwsem);
@@ -1749,17 +1751,31 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool)
17491751
else if (pool > perm->pool)
17501752
p = &(*p)->rb_right;
17511753
else {
1752-
have = perm->perm;
1753-
break;
1754+
int ret = ceph_compare_string(pool_ns,
1755+
perm->pool_ns,
1756+
perm->pool_ns_len);
1757+
if (ret < 0)
1758+
p = &(*p)->rb_left;
1759+
else if (ret > 0)
1760+
p = &(*p)->rb_right;
1761+
else {
1762+
have = perm->perm;
1763+
break;
1764+
}
17541765
}
17551766
}
17561767
up_read(&mdsc->pool_perm_rwsem);
17571768
if (*p)
17581769
goto out;
17591770

1760-
dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool);
1771+
if (pool_ns)
1772+
dout("__ceph_pool_perm_get pool %lld ns %.*s no perm cached\n",
1773+
pool, (int)pool_ns->len, pool_ns->str);
1774+
else
1775+
dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool);
17611776

17621777
down_write(&mdsc->pool_perm_rwsem);
1778+
p = &mdsc->pool_perm_tree.rb_node;
17631779
parent = NULL;
17641780
while (*p) {
17651781
parent = *p;
@@ -1769,8 +1785,17 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool)
17691785
else if (pool > perm->pool)
17701786
p = &(*p)->rb_right;
17711787
else {
1772-
have = perm->perm;
1773-
break;
1788+
int ret = ceph_compare_string(pool_ns,
1789+
perm->pool_ns,
1790+
perm->pool_ns_len);
1791+
if (ret < 0)
1792+
p = &(*p)->rb_left;
1793+
else if (ret > 0)
1794+
p = &(*p)->rb_right;
1795+
else {
1796+
have = perm->perm;
1797+
break;
1798+
}
17741799
}
17751800
}
17761801
if (*p) {
@@ -1788,6 +1813,8 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool)
17881813
rd_req->r_flags = CEPH_OSD_FLAG_READ;
17891814
osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0);
17901815
rd_req->r_base_oloc.pool = pool;
1816+
if (pool_ns)
1817+
rd_req->r_base_oloc.pool_ns = ceph_get_string(pool_ns);
17911818
ceph_oid_printf(&rd_req->r_base_oid, "%llx.00000000", ci->i_vino.ino);
17921819

17931820
err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS);
@@ -1841,14 +1868,20 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool)
18411868
goto out_unlock;
18421869
}
18431870

1844-
perm = kmalloc(sizeof(*perm), GFP_NOFS);
1871+
pool_ns_len = pool_ns ? pool_ns->len : 0;
1872+
perm = kmalloc(sizeof(*perm) + pool_ns_len + 1, GFP_NOFS);
18451873
if (!perm) {
18461874
err = -ENOMEM;
18471875
goto out_unlock;
18481876
}
18491877

18501878
perm->pool = pool;
18511879
perm->perm = have;
1880+
perm->pool_ns_len = pool_ns_len;
1881+
if (pool_ns_len > 0)
1882+
memcpy(perm->pool_ns, pool_ns->str, pool_ns_len);
1883+
perm->pool_ns[pool_ns_len] = 0;
1884+
18521885
rb_link_node(&perm->node, parent, p);
18531886
rb_insert_color(&perm->node, &mdsc->pool_perm_tree);
18541887
err = 0;
@@ -1860,19 +1893,20 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool)
18601893
out:
18611894
if (!err)
18621895
err = have;
1863-
dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err);
1896+
if (pool_ns)
1897+
dout("__ceph_pool_perm_get pool %lld ns %.*s result = %d\n",
1898+
pool, (int)pool_ns->len, pool_ns->str, err);
1899+
else
1900+
dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err);
18641901
return err;
18651902
}
18661903

18671904
int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
18681905
{
18691906
s64 pool;
1907+
struct ceph_string *pool_ns;
18701908
int ret, flags;
18711909

1872-
/* does not support pool namespace yet */
1873-
if (ci->i_pool_ns_len)
1874-
return -EIO;
1875-
18761910
if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
18771911
NOPOOLPERM))
18781912
return 0;
@@ -1896,7 +1930,9 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
18961930
return 0;
18971931
}
18981932

1899-
ret = __ceph_pool_perm_get(ci, pool);
1933+
pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
1934+
ret = __ceph_pool_perm_get(ci, pool, pool_ns);
1935+
ceph_put_string(pool_ns);
19001936
if (ret < 0)
19011937
return ret;
19021938

@@ -1907,8 +1943,9 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
19071943
flags |= CEPH_I_POOL_WR;
19081944

19091945
spin_lock(&ci->i_ceph_lock);
1910-
if (pool == ci->i_layout.pool_id) {
1911-
ci->i_ceph_flags = flags;
1946+
if (pool == ci->i_layout.pool_id &&
1947+
pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) {
1948+
ci->i_ceph_flags |= flags;
19121949
} else {
19131950
pool = ci->i_layout.pool_id;
19141951
flags = ci->i_ceph_flags;

fs/ceph/caps.c

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2779,12 +2779,11 @@ static void invalidate_aliases(struct inode *inode)
27792779
*/
27802780
static void handle_cap_grant(struct ceph_mds_client *mdsc,
27812781
struct inode *inode, struct ceph_mds_caps *grant,
2782-
u64 inline_version,
2783-
void *inline_data, int inline_len,
2782+
struct ceph_string **pns, u64 inline_version,
2783+
void *inline_data, u32 inline_len,
27842784
struct ceph_buffer *xattr_buf,
27852785
struct ceph_mds_session *session,
2786-
struct ceph_cap *cap, int issued,
2787-
u32 pool_ns_len)
2786+
struct ceph_cap *cap, int issued)
27882787
__releases(ci->i_ceph_lock)
27892788
__releases(mdsc->snap_rwsem)
27902789
{
@@ -2896,11 +2895,18 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
28962895
if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
28972896
/* file layout may have changed */
28982897
s64 old_pool = ci->i_layout.pool_id;
2898+
struct ceph_string *old_ns;
2899+
28992900
ceph_file_layout_from_legacy(&ci->i_layout, &grant->layout);
2900-
ci->i_pool_ns_len = pool_ns_len;
2901-
if (ci->i_layout.pool_id != old_pool)
2901+
old_ns = rcu_dereference_protected(ci->i_layout.pool_ns,
2902+
lockdep_is_held(&ci->i_ceph_lock));
2903+
rcu_assign_pointer(ci->i_layout.pool_ns, *pns);
2904+
2905+
if (ci->i_layout.pool_id != old_pool || *pns != old_ns)
29022906
ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
29032907

2908+
*pns = old_ns;
2909+
29042910
/* size/truncate_seq? */
29052911
queue_trunc = ceph_fill_file_size(inode, issued,
29062912
le32_to_cpu(grant->truncate_seq),
@@ -3423,20 +3429,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
34233429
struct ceph_cap *cap;
34243430
struct ceph_mds_caps *h;
34253431
struct ceph_mds_cap_peer *peer = NULL;
3426-
struct ceph_snap_realm *realm;
3432+
struct ceph_snap_realm *realm = NULL;
3433+
struct ceph_string *pool_ns = NULL;
34273434
int mds = session->s_mds;
34283435
int op, issued;
34293436
u32 seq, mseq;
34303437
struct ceph_vino vino;
3431-
u64 cap_id;
3432-
u64 size, max_size;
34333438
u64 tid;
34343439
u64 inline_version = 0;
34353440
void *inline_data = NULL;
34363441
u32 inline_len = 0;
34373442
void *snaptrace;
34383443
size_t snaptrace_len;
3439-
u32 pool_ns_len = 0;
34403444
void *p, *end;
34413445

34423446
dout("handle_caps from mds%d\n", mds);
@@ -3450,11 +3454,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
34503454
op = le32_to_cpu(h->op);
34513455
vino.ino = le64_to_cpu(h->ino);
34523456
vino.snap = CEPH_NOSNAP;
3453-
cap_id = le64_to_cpu(h->cap_id);
34543457
seq = le32_to_cpu(h->seq);
34553458
mseq = le32_to_cpu(h->migrate_seq);
3456-
size = le64_to_cpu(h->size);
3457-
max_size = le64_to_cpu(h->max_size);
34583459

34593460
snaptrace = h + 1;
34603461
snaptrace_len = le32_to_cpu(h->snap_trace_len);
@@ -3493,6 +3494,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
34933494
u64 flush_tid;
34943495
u32 caller_uid, caller_gid;
34953496
u32 osd_epoch_barrier;
3497+
u32 pool_ns_len;
34963498
/* version >= 5 */
34973499
ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
34983500
/* version >= 6 */
@@ -3502,6 +3504,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
35023504
ceph_decode_32_safe(&p, end, caller_gid, bad);
35033505
/* version >= 8 */
35043506
ceph_decode_32_safe(&p, end, pool_ns_len, bad);
3507+
if (pool_ns_len > 0) {
3508+
ceph_decode_need(&p, end, pool_ns_len, bad);
3509+
pool_ns = ceph_find_or_create_string(p, pool_ns_len);
3510+
p += pool_ns_len;
3511+
}
35053512
}
35063513

35073514
/* lookup ino */
@@ -3522,7 +3529,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
35223529
cap = ceph_get_cap(mdsc, NULL);
35233530
cap->cap_ino = vino.ino;
35243531
cap->queue_release = 1;
3525-
cap->cap_id = cap_id;
3532+
cap->cap_id = le64_to_cpu(h->cap_id);
35263533
cap->mseq = mseq;
35273534
cap->seq = seq;
35283535
spin_lock(&session->s_cap_lock);
@@ -3557,10 +3564,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
35573564
}
35583565
handle_cap_import(mdsc, inode, h, peer, session,
35593566
&cap, &issued);
3560-
handle_cap_grant(mdsc, inode, h,
3567+
handle_cap_grant(mdsc, inode, h, &pool_ns,
35613568
inline_version, inline_data, inline_len,
3562-
msg->middle, session, cap, issued,
3563-
pool_ns_len);
3569+
msg->middle, session, cap, issued);
35643570
if (realm)
35653571
ceph_put_snap_realm(mdsc, realm);
35663572
goto done_unlocked;
@@ -3582,10 +3588,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
35823588
case CEPH_CAP_OP_GRANT:
35833589
__ceph_caps_issued(ci, &issued);
35843590
issued |= __ceph_caps_dirty(ci);
3585-
handle_cap_grant(mdsc, inode, h,
3591+
handle_cap_grant(mdsc, inode, h, &pool_ns,
35863592
inline_version, inline_data, inline_len,
3587-
msg->middle, session, cap, issued,
3588-
pool_ns_len);
3593+
msg->middle, session, cap, issued);
35893594
goto done_unlocked;
35903595

35913596
case CEPH_CAP_OP_FLUSH_ACK:
@@ -3616,6 +3621,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
36163621
mutex_unlock(&session->s_mutex);
36173622
done_unlocked:
36183623
iput(inode);
3624+
ceph_put_string(pool_ns);
36193625
return;
36203626

36213627
bad:

fs/ceph/inode.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
447447

448448
memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
449449
RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL);
450-
ci->i_pool_ns_len = 0;
451450

452451
ci->i_fragtree = RB_ROOT;
453452
mutex_init(&ci->i_fragtree_mutex);
@@ -571,7 +570,7 @@ void ceph_destroy_inode(struct inode *inode)
571570
if (ci->i_xattrs.prealloc_blob)
572571
ceph_buffer_put(ci->i_xattrs.prealloc_blob);
573572

574-
ceph_put_string(ci->i_layout.pool_ns);
573+
ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns));
575574

576575
call_rcu(&inode->i_rcu, ceph_i_callback);
577576
}
@@ -736,6 +735,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
736735
int issued = 0, implemented, new_issued;
737736
struct timespec mtime, atime, ctime;
738737
struct ceph_buffer *xattr_blob = NULL;
738+
struct ceph_string *pool_ns = NULL;
739739
struct ceph_cap *new_cap = NULL;
740740
int err = 0;
741741
bool wake = false;
@@ -763,6 +763,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
763763
iinfo->xattr_len);
764764
}
765765

766+
if (iinfo->pool_ns_len > 0)
767+
pool_ns = ceph_find_or_create_string(iinfo->pool_ns_data,
768+
iinfo->pool_ns_len);
769+
766770
spin_lock(&ci->i_ceph_lock);
767771

768772
/*
@@ -818,11 +822,18 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
818822
if (new_version ||
819823
(new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
820824
s64 old_pool = ci->i_layout.pool_id;
825+
struct ceph_string *old_ns;
826+
821827
ceph_file_layout_from_legacy(&ci->i_layout, &info->layout);
822-
ci->i_pool_ns_len = iinfo->pool_ns_len;
823-
if (ci->i_layout.pool_id != old_pool)
828+
old_ns = rcu_dereference_protected(ci->i_layout.pool_ns,
829+
lockdep_is_held(&ci->i_ceph_lock));
830+
rcu_assign_pointer(ci->i_layout.pool_ns, pool_ns);
831+
832+
if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns)
824833
ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
825834

835+
pool_ns = old_ns;
836+
826837
queue_trunc = ceph_fill_file_size(inode, issued,
827838
le32_to_cpu(info->truncate_seq),
828839
le64_to_cpu(info->truncate_size),
@@ -989,6 +1000,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
9891000
ceph_put_cap(mdsc, new_cap);
9901001
if (xattr_blob)
9911002
ceph_buffer_put(xattr_blob);
1003+
ceph_put_string(pool_ns);
9921004
return err;
9931005
}
9941006

fs/ceph/ioctl.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
213213
ceph_ino(inode), dl.object_no);
214214

215215
oloc.pool = ci->i_layout.pool_id;
216+
oloc.pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
216217
ceph_oid_printf(&oid, "%s", dl.object_name);
217218

218219
r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid);
220+
221+
ceph_oloc_destroy(&oloc);
219222
if (r < 0) {
220223
up_read(&osdc->lock);
221224
return r;

0 commit comments

Comments
 (0)