Skip to content

Commit fe33032

Browse files
ukernelidryomov
authored andcommitted
ceph: add mount option to limit caps count
If number of caps exceed the limit, ceph_trim_dentires() also trim dentries with valid leases. Trimming dentry releases references to associated inode, which may evict inode and release caps. By default, there is no limit for caps count. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Reviewed-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
1 parent 37c4efc commit fe33032

8 files changed

Lines changed: 91 additions & 21 deletions

File tree

Documentation/filesystems/ceph.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ Mount Options
118118
of a non-responsive Ceph file system. The default is 30
119119
seconds.
120120

121+
caps_max=X
122+
Specify the maximum number of caps to hold. Unused caps are released
123+
when number of caps exceeds the limit. The default is 0 (no limit)
124+
121125
rbytes
122126
When stat() is called on a directory, set st_size to 'rbytes',
123127
the summation of file sizes over all files nested beneath that

fs/ceph/caps.c

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,17 @@ void ceph_caps_finalize(struct ceph_mds_client *mdsc)
148148
spin_unlock(&mdsc->caps_list_lock);
149149
}
150150

151-
void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
151+
void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
152+
struct ceph_mount_options *fsopt)
152153
{
153154
spin_lock(&mdsc->caps_list_lock);
154-
mdsc->caps_min_count += delta;
155-
BUG_ON(mdsc->caps_min_count < 0);
155+
mdsc->caps_min_count = fsopt->max_readdir;
156+
if (mdsc->caps_min_count < 1024)
157+
mdsc->caps_min_count = 1024;
158+
mdsc->caps_use_max = fsopt->caps_max;
159+
if (mdsc->caps_use_max > 0 &&
160+
mdsc->caps_use_max < mdsc->caps_min_count)
161+
mdsc->caps_use_max = mdsc->caps_min_count;
156162
spin_unlock(&mdsc->caps_list_lock);
157163
}
158164

@@ -272,6 +278,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
272278
if (!err) {
273279
BUG_ON(have + alloc != need);
274280
ctx->count = need;
281+
ctx->used = 0;
275282
}
276283

277284
spin_lock(&mdsc->caps_list_lock);
@@ -295,13 +302,24 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
295302
}
296303

297304
void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
298-
struct ceph_cap_reservation *ctx)
305+
struct ceph_cap_reservation *ctx)
299306
{
307+
bool reclaim = false;
308+
if (!ctx->count)
309+
return;
310+
300311
dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
301312
spin_lock(&mdsc->caps_list_lock);
302313
__ceph_unreserve_caps(mdsc, ctx->count);
303314
ctx->count = 0;
315+
316+
if (mdsc->caps_use_max > 0 &&
317+
mdsc->caps_use_count > mdsc->caps_use_max)
318+
reclaim = true;
304319
spin_unlock(&mdsc->caps_list_lock);
320+
321+
if (reclaim)
322+
ceph_reclaim_caps_nr(mdsc, ctx->used);
305323
}
306324

307325
struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
@@ -346,6 +364,7 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
346364
BUG_ON(list_empty(&mdsc->caps_list));
347365

348366
ctx->count--;
367+
ctx->used++;
349368
mdsc->caps_reserve_count--;
350369
mdsc->caps_use_count++;
351370

@@ -500,12 +519,12 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
500519
static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
501520
struct ceph_inode_info *ci)
502521
{
503-
struct ceph_mount_options *ma = mdsc->fsc->mount_options;
522+
struct ceph_mount_options *opt = mdsc->fsc->mount_options;
504523

505524
ci->i_hold_caps_min = round_jiffies(jiffies +
506-
ma->caps_wanted_delay_min * HZ);
525+
opt->caps_wanted_delay_min * HZ);
507526
ci->i_hold_caps_max = round_jiffies(jiffies +
508-
ma->caps_wanted_delay_max * HZ);
527+
opt->caps_wanted_delay_max * HZ);
509528
dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
510529
ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
511530
}

fs/ceph/dir.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1224,6 +1224,7 @@ enum {
12241224

12251225
struct ceph_lease_walk_control {
12261226
bool dir_lease;
1227+
bool expire_dir_lease;
12271228
unsigned long nr_to_scan;
12281229
unsigned long dir_lease_ttl;
12291230
};
@@ -1345,16 +1346,31 @@ static int __dir_lease_check(struct dentry *dentry, void *arg)
13451346
/* Move dentry to tail of dir lease list if we don't want
13461347
* to delete it. So dentries in the list are checked in a
13471348
* round robin manner */
1348-
return TOUCH;
1349+
if (!lwc->expire_dir_lease)
1350+
return TOUCH;
1351+
if (dentry->d_lockref.count > 0 ||
1352+
(di->flags & CEPH_DENTRY_REFERENCED))
1353+
return TOUCH;
1354+
/* invalidate dir lease */
1355+
di->lease_shared_gen = 0;
13491356
}
13501357
return DELETE;
13511358
}
13521359

13531360
int ceph_trim_dentries(struct ceph_mds_client *mdsc)
13541361
{
13551362
struct ceph_lease_walk_control lwc;
1363+
unsigned long count;
13561364
unsigned long freed;
13571365

1366+
spin_lock(&mdsc->caps_list_lock);
1367+
if (mdsc->caps_use_max > 0 &&
1368+
mdsc->caps_use_count > mdsc->caps_use_max)
1369+
count = mdsc->caps_use_count - mdsc->caps_use_max;
1370+
else
1371+
count = 0;
1372+
spin_unlock(&mdsc->caps_list_lock);
1373+
13581374
lwc.dir_lease = false;
13591375
lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2;
13601376
freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
@@ -1365,6 +1381,8 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc)
13651381
lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE;
13661382

13671383
lwc.dir_lease = true;
1384+
lwc.expire_dir_lease = freed < count;
1385+
lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
13681386
freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
13691387
if (!lwc.nr_to_scan) /* more to check */
13701388
return -EAGAIN;

fs/ceph/mds_client.c

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1965,6 +1965,18 @@ void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc)
19651965
}
19661966
}
19671967

1968+
void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
1969+
{
1970+
int val;
1971+
if (!nr)
1972+
return;
1973+
val = atomic_add_return(nr, &mdsc->cap_reclaim_pending);
1974+
if (!(val % CEPH_CAPS_PER_RELEASE)) {
1975+
atomic_set(&mdsc->cap_reclaim_pending, 0);
1976+
ceph_queue_cap_reclaim_work(mdsc);
1977+
}
1978+
}
1979+
19681980
/*
19691981
* requests
19701982
*/
@@ -2878,7 +2890,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
28782890
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
28792891
req->r_op == CEPH_MDS_OP_LSSNAP))
28802892
ceph_readdir_prepopulate(req, req->r_session);
2881-
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
28822893
}
28832894
current->journal_info = NULL;
28842895
mutex_unlock(&req->r_fill_mutex);
@@ -2887,12 +2898,18 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
28872898
if (realm)
28882899
ceph_put_snap_realm(mdsc, realm);
28892900

2890-
if (err == 0 && req->r_target_inode &&
2891-
test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
2892-
struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
2893-
spin_lock(&ci->i_unsafe_lock);
2894-
list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops);
2895-
spin_unlock(&ci->i_unsafe_lock);
2901+
if (err == 0) {
2902+
if (req->r_target_inode &&
2903+
test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
2904+
struct ceph_inode_info *ci =
2905+
ceph_inode(req->r_target_inode);
2906+
spin_lock(&ci->i_unsafe_lock);
2907+
list_add_tail(&req->r_unsafe_target_item,
2908+
&ci->i_unsafe_iops);
2909+
spin_unlock(&ci->i_unsafe_lock);
2910+
}
2911+
2912+
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
28962913
}
28972914
out_err:
28982915
mutex_lock(&mdsc->mutex);
@@ -4083,13 +4100,14 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
40834100
spin_lock_init(&mdsc->cap_dirty_lock);
40844101
init_waitqueue_head(&mdsc->cap_flushing_wq);
40854102
INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
4103+
atomic_set(&mdsc->cap_reclaim_pending, 0);
40864104

40874105
spin_lock_init(&mdsc->dentry_list_lock);
40884106
INIT_LIST_HEAD(&mdsc->dentry_leases);
40894107
INIT_LIST_HEAD(&mdsc->dentry_dir_leases);
40904108

40914109
ceph_caps_init(mdsc);
4092-
ceph_adjust_min_caps(mdsc, fsc->min_caps);
4110+
ceph_adjust_caps_max_min(mdsc, fsc->mount_options);
40934111

40944112
spin_lock_init(&mdsc->snapid_map_lock);
40954113
mdsc->snapid_map_tree = RB_ROOT;

fs/ceph/mds_client.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ struct ceph_mds_client {
379379
wait_queue_head_t cap_flushing_wq;
380380

381381
struct work_struct cap_reclaim_work;
382+
atomic_t cap_reclaim_pending;
382383

383384
/*
384385
* Cap reservations
@@ -396,6 +397,7 @@ struct ceph_mds_client {
396397
unreserved) */
397398
int caps_total_count; /* total caps allocated */
398399
int caps_use_count; /* in use */
400+
int caps_use_max; /* max used caps */
399401
int caps_reserve_count; /* unused, reserved */
400402
int caps_avail_count; /* unused, unreserved */
401403
int caps_min_count; /* keep at least this many
@@ -465,6 +467,7 @@ extern void __ceph_queue_cap_release(struct ceph_mds_session *session,
465467
extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
466468
struct ceph_mds_session *session);
467469
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
470+
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
468471
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
469472

470473
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,

fs/ceph/super.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ enum {
133133
Opt_rasize,
134134
Opt_caps_wanted_delay_min,
135135
Opt_caps_wanted_delay_max,
136+
Opt_caps_max,
136137
Opt_readdir_max_entries,
137138
Opt_readdir_max_bytes,
138139
Opt_congestion_kb,
@@ -175,6 +176,7 @@ static match_table_t fsopt_tokens = {
175176
{Opt_rasize, "rasize=%d"},
176177
{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
177178
{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
179+
{Opt_caps_max, "caps_max=%d"},
178180
{Opt_readdir_max_entries, "readdir_max_entries=%d"},
179181
{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
180182
{Opt_congestion_kb, "write_congestion_kb=%d"},
@@ -286,6 +288,11 @@ static int parse_fsopt_token(char *c, void *private)
286288
return -EINVAL;
287289
fsopt->caps_wanted_delay_max = intval;
288290
break;
291+
case Opt_caps_max:
292+
if (intval < 0)
293+
return -EINVAL;
294+
fsopt->caps_max = intval;
295+
break;
289296
case Opt_readdir_max_entries:
290297
if (intval < 1)
291298
return -EINVAL;
@@ -576,6 +583,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
576583
seq_printf(m, ",rasize=%d", fsopt->rasize);
577584
if (fsopt->congestion_kb != default_congestion_kb())
578585
seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
586+
if (fsopt->caps_max)
587+
seq_printf(m, ",caps_max=%d", fsopt->caps_max);
579588
if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
580589
seq_printf(m, ",caps_wanted_delay_min=%d",
581590
fsopt->caps_wanted_delay_min);
@@ -683,9 +692,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
683692
if (!fsc->wb_pagevec_pool)
684693
goto fail_cap_wq;
685694

686-
/* caps */
687-
fsc->min_caps = fsopt->max_readdir;
688-
689695
return fsc;
690696

691697
fail_cap_wq:

fs/ceph/super.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ struct ceph_mount_options {
7979
int rasize; /* max readahead */
8080
int congestion_kb; /* max writeback in flight */
8181
int caps_wanted_delay_min, caps_wanted_delay_max;
82+
int caps_max;
8283
int max_readdir; /* max readdir result (entires) */
8384
int max_readdir_bytes; /* max readdir result (bytes) */
8485

@@ -100,7 +101,6 @@ struct ceph_fs_client {
100101
struct ceph_client *client;
101102

102103
unsigned long mount_state;
103-
int min_caps; /* min caps i added */
104104
loff_t max_file_size;
105105

106106
struct ceph_mds_client *mdsc;
@@ -668,7 +668,8 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
668668

669669
extern void ceph_caps_init(struct ceph_mds_client *mdsc);
670670
extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
671-
extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
671+
extern void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
672+
struct ceph_mount_options *fsopt);
672673
extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
673674
struct ceph_cap_reservation *ctx, int need);
674675
extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc,

include/linux/ceph/types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ struct ceph_vino {
2424
/* context for the caps reservation mechanism */
2525
struct ceph_cap_reservation {
2626
int count;
27+
int used;
2728
};
2829

2930

0 commit comments

Comments
 (0)