Skip to content

Commit 185efc0

Browse files
Johannes Weinertorvalds
authored andcommitted
memcg: Revert "memcg: add memory.vmscan_stat"
Revert the post-3.0 commit 82f9d48 ("memcg: add memory.vmscan_stat"). The implementation of per-memcg reclaim statistics violates how memcg hierarchies usually behave: hierarchically. The reclaim statistics are accounted to child memcgs and the parent hitting the limit, but not to hierarchy levels in between. Usually, hierarchical statistics are perfectly recursive, with each level representing the sum of itself and all its children. Since this exports statistics to userspace, this may lead to confusion and problems with changing things after the release, so revert it now, we can try again later. Signed-off-by: Johannes Weiner <jweiner@redhat.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Michal Hocko <mhocko@suse.cz> Cc: Ying Han <yinghan@google.com> Cc: Balbir Singh <bsingharora@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent a4d3e9e commit 185efc0

File tree

5 files changed

+18
-303
lines changed

5 files changed

+18
-303
lines changed

Documentation/cgroups/memory.txt

Lines changed: 1 addition & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ will be charged as a new owner of it.
380380

381381
5.2 stat file
382382

383-
5.2.1 memory.stat file includes following statistics
383+
memory.stat file includes following statistics
384384

385385
# per-memory cgroup local status
386386
cache - # of bytes of page cache memory.
@@ -438,89 +438,6 @@ Note:
438438
file_mapped is accounted only when the memory cgroup is owner of page
439439
cache.)
440440

441-
5.2.2 memory.vmscan_stat
442-
443-
memory.vmscan_stat includes statistics information for memory scanning and
444-
freeing, reclaiming. The statistics shows memory scanning information since
445-
memory cgroup creation and can be reset to 0 by writing 0 as
446-
447-
#echo 0 > ../memory.vmscan_stat
448-
449-
This file contains following statistics.
450-
451-
[param]_[file_or_anon]_pages_by_[reason]_[under_heararchy]
452-
[param]_elapsed_ns_by_[reason]_[under_hierarchy]
453-
454-
For example,
455-
456-
scanned_file_pages_by_limit indicates the number of scanned
457-
file pages at vmscan.
458-
459-
Now, 3 parameters are supported
460-
461-
scanned - the number of pages scanned by vmscan
462-
rotated - the number of pages activated at vmscan
463-
freed - the number of pages freed by vmscan
464-
465-
If "rotated" is high against scanned/freed, the memcg seems busy.
466-
467-
Now, 2 reason are supported
468-
469-
limit - the memory cgroup's limit
470-
system - global memory pressure + softlimit
471-
(global memory pressure not under softlimit is not handled now)
472-
473-
When under_hierarchy is added in the tail, the number indicates the
474-
total memcg scan of its children and itself.
475-
476-
elapsed_ns is a elapsed time in nanosecond. This may include sleep time
477-
and not indicates CPU usage. So, please take this as just showing
478-
latency.
479-
480-
Here is an example.
481-
482-
# cat /cgroup/memory/A/memory.vmscan_stat
483-
scanned_pages_by_limit 9471864
484-
scanned_anon_pages_by_limit 6640629
485-
scanned_file_pages_by_limit 2831235
486-
rotated_pages_by_limit 4243974
487-
rotated_anon_pages_by_limit 3971968
488-
rotated_file_pages_by_limit 272006
489-
freed_pages_by_limit 2318492
490-
freed_anon_pages_by_limit 962052
491-
freed_file_pages_by_limit 1356440
492-
elapsed_ns_by_limit 351386416101
493-
scanned_pages_by_system 0
494-
scanned_anon_pages_by_system 0
495-
scanned_file_pages_by_system 0
496-
rotated_pages_by_system 0
497-
rotated_anon_pages_by_system 0
498-
rotated_file_pages_by_system 0
499-
freed_pages_by_system 0
500-
freed_anon_pages_by_system 0
501-
freed_file_pages_by_system 0
502-
elapsed_ns_by_system 0
503-
scanned_pages_by_limit_under_hierarchy 9471864
504-
scanned_anon_pages_by_limit_under_hierarchy 6640629
505-
scanned_file_pages_by_limit_under_hierarchy 2831235
506-
rotated_pages_by_limit_under_hierarchy 4243974
507-
rotated_anon_pages_by_limit_under_hierarchy 3971968
508-
rotated_file_pages_by_limit_under_hierarchy 272006
509-
freed_pages_by_limit_under_hierarchy 2318492
510-
freed_anon_pages_by_limit_under_hierarchy 962052
511-
freed_file_pages_by_limit_under_hierarchy 1356440
512-
elapsed_ns_by_limit_under_hierarchy 351386416101
513-
scanned_pages_by_system_under_hierarchy 0
514-
scanned_anon_pages_by_system_under_hierarchy 0
515-
scanned_file_pages_by_system_under_hierarchy 0
516-
rotated_pages_by_system_under_hierarchy 0
517-
rotated_anon_pages_by_system_under_hierarchy 0
518-
rotated_file_pages_by_system_under_hierarchy 0
519-
freed_pages_by_system_under_hierarchy 0
520-
freed_anon_pages_by_system_under_hierarchy 0
521-
freed_file_pages_by_system_under_hierarchy 0
522-
elapsed_ns_by_system_under_hierarchy 0
523-
524441
5.3 swappiness
525442

526443
Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.

include/linux/memcontrol.h

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,6 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
3939
struct mem_cgroup *mem_cont,
4040
int active, int file);
4141

42-
struct memcg_scanrecord {
43-
struct mem_cgroup *mem; /* scanend memory cgroup */
44-
struct mem_cgroup *root; /* scan target hierarchy root */
45-
int context; /* scanning context (see memcontrol.c) */
46-
unsigned long nr_scanned[2]; /* the number of scanned pages */
47-
unsigned long nr_rotated[2]; /* the number of rotated pages */
48-
unsigned long nr_freed[2]; /* the number of freed pages */
49-
unsigned long elapsed; /* nsec of time elapsed while scanning */
50-
};
51-
5242
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
5343
/*
5444
* All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -127,15 +117,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page);
127117
extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
128118
struct task_struct *p);
129119

130-
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
131-
gfp_t gfp_mask, bool noswap,
132-
struct memcg_scanrecord *rec);
133-
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
134-
gfp_t gfp_mask, bool noswap,
135-
struct zone *zone,
136-
struct memcg_scanrecord *rec,
137-
unsigned long *nr_scanned);
138-
139120
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
140121
extern int do_swap_account;
141122
#endif

include/linux/swap.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,12 @@ static inline void lru_cache_add_file(struct page *page)
252252
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
253253
gfp_t gfp_mask, nodemask_t *mask);
254254
extern int __isolate_lru_page(struct page *page, int mode, int file);
255+
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
256+
gfp_t gfp_mask, bool noswap);
257+
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
258+
gfp_t gfp_mask, bool noswap,
259+
struct zone *zone,
260+
unsigned long *nr_scanned);
255261
extern unsigned long shrink_all_memory(unsigned long nr_pages);
256262
extern int vm_swappiness;
257263
extern int remove_mapping(struct address_space *mapping, struct page *page);

mm/memcontrol.c

Lines changed: 6 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list {
204204
static void mem_cgroup_threshold(struct mem_cgroup *mem);
205205
static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
206206

207-
enum {
208-
SCAN_BY_LIMIT,
209-
SCAN_BY_SYSTEM,
210-
NR_SCAN_CONTEXT,
211-
SCAN_BY_SHRINK, /* not recorded now */
212-
};
213-
214-
enum {
215-
SCAN,
216-
SCAN_ANON,
217-
SCAN_FILE,
218-
ROTATE,
219-
ROTATE_ANON,
220-
ROTATE_FILE,
221-
FREED,
222-
FREED_ANON,
223-
FREED_FILE,
224-
ELAPSED,
225-
NR_SCANSTATS,
226-
};
227-
228-
struct scanstat {
229-
spinlock_t lock;
230-
unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS];
231-
unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS];
232-
};
233-
234-
const char *scanstat_string[NR_SCANSTATS] = {
235-
"scanned_pages",
236-
"scanned_anon_pages",
237-
"scanned_file_pages",
238-
"rotated_pages",
239-
"rotated_anon_pages",
240-
"rotated_file_pages",
241-
"freed_pages",
242-
"freed_anon_pages",
243-
"freed_file_pages",
244-
"elapsed_ns",
245-
};
246-
#define SCANSTAT_WORD_LIMIT "_by_limit"
247-
#define SCANSTAT_WORD_SYSTEM "_by_system"
248-
#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy"
249-
250-
251207
/*
252208
* The memory controller data structure. The memory controller controls both
253209
* page cache and RSS per cgroup. We would eventually like to provide
@@ -313,8 +269,7 @@ struct mem_cgroup {
313269

314270
/* For oom notifier event fd */
315271
struct list_head oom_notify;
316-
/* For recording LRU-scan statistics */
317-
struct scanstat scanstat;
272+
318273
/*
319274
* Should we move charges of a task when a task is moved into this
320275
* mem_cgroup ? And what type of charges should we move ?
@@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
16781633
}
16791634
#endif
16801635

1681-
static void __mem_cgroup_record_scanstat(unsigned long *stats,
1682-
struct memcg_scanrecord *rec)
1683-
{
1684-
1685-
stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1];
1686-
stats[SCAN_ANON] += rec->nr_scanned[0];
1687-
stats[SCAN_FILE] += rec->nr_scanned[1];
1688-
1689-
stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1];
1690-
stats[ROTATE_ANON] += rec->nr_rotated[0];
1691-
stats[ROTATE_FILE] += rec->nr_rotated[1];
1692-
1693-
stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1];
1694-
stats[FREED_ANON] += rec->nr_freed[0];
1695-
stats[FREED_FILE] += rec->nr_freed[1];
1696-
1697-
stats[ELAPSED] += rec->elapsed;
1698-
}
1699-
1700-
static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec)
1701-
{
1702-
struct mem_cgroup *mem;
1703-
int context = rec->context;
1704-
1705-
if (context >= NR_SCAN_CONTEXT)
1706-
return;
1707-
1708-
mem = rec->mem;
1709-
spin_lock(&mem->scanstat.lock);
1710-
__mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec);
1711-
spin_unlock(&mem->scanstat.lock);
1712-
1713-
mem = rec->root;
1714-
spin_lock(&mem->scanstat.lock);
1715-
__mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec);
1716-
spin_unlock(&mem->scanstat.lock);
1717-
}
1718-
17191636
/*
17201637
* Scan the hierarchy if needed to reclaim memory. We remember the last child
17211638
* we reclaimed from, so that we don't end up penalizing one child extensively
@@ -1740,25 +1657,15 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
17401657
bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
17411658
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
17421659
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1743-
struct memcg_scanrecord rec;
17441660
unsigned long excess;
1745-
unsigned long scanned;
1661+
unsigned long nr_scanned;
17461662

17471663
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
17481664

17491665
/* If memsw_is_minimum==1, swap-out is of-no-use. */
17501666
if (!check_soft && !shrink && root_mem->memsw_is_minimum)
17511667
noswap = true;
17521668

1753-
if (shrink)
1754-
rec.context = SCAN_BY_SHRINK;
1755-
else if (check_soft)
1756-
rec.context = SCAN_BY_SYSTEM;
1757-
else
1758-
rec.context = SCAN_BY_LIMIT;
1759-
1760-
rec.root = root_mem;
1761-
17621669
while (1) {
17631670
victim = mem_cgroup_select_victim(root_mem);
17641671
if (victim == root_mem) {
@@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
17991706
css_put(&victim->css);
18001707
continue;
18011708
}
1802-
rec.mem = victim;
1803-
rec.nr_scanned[0] = 0;
1804-
rec.nr_scanned[1] = 0;
1805-
rec.nr_rotated[0] = 0;
1806-
rec.nr_rotated[1] = 0;
1807-
rec.nr_freed[0] = 0;
1808-
rec.nr_freed[1] = 0;
1809-
rec.elapsed = 0;
18101709
/* we use swappiness of local cgroup */
18111710
if (check_soft) {
18121711
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1813-
noswap, zone, &rec, &scanned);
1814-
*total_scanned += scanned;
1712+
noswap, zone, &nr_scanned);
1713+
*total_scanned += nr_scanned;
18151714
} else
18161715
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1817-
noswap, &rec);
1818-
mem_cgroup_record_scanstat(&rec);
1716+
noswap);
18191717
css_put(&victim->css);
18201718
/*
18211719
* At shrinking usage, we can't check we should stop here or
@@ -3854,18 +3752,14 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
38543752
/* try to free all pages in this cgroup */
38553753
shrink = 1;
38563754
while (nr_retries && mem->res.usage > 0) {
3857-
struct memcg_scanrecord rec;
38583755
int progress;
38593756

38603757
if (signal_pending(current)) {
38613758
ret = -EINTR;
38623759
goto out;
38633760
}
3864-
rec.context = SCAN_BY_SHRINK;
3865-
rec.mem = mem;
3866-
rec.root = mem;
38673761
progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
3868-
false, &rec);
3762+
false);
38693763
if (!progress) {
38703764
nr_retries--;
38713765
/* maybe some writeback is necessary */
@@ -4709,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
47094603
}
47104604
#endif /* CONFIG_NUMA */
47114605

4712-
static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp,
4713-
struct cftype *cft,
4714-
struct cgroup_map_cb *cb)
4715-
{
4716-
struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4717-
char string[64];
4718-
int i;
4719-
4720-
for (i = 0; i < NR_SCANSTATS; i++) {
4721-
strcpy(string, scanstat_string[i]);
4722-
strcat(string, SCANSTAT_WORD_LIMIT);
4723-
cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]);
4724-
}
4725-
4726-
for (i = 0; i < NR_SCANSTATS; i++) {
4727-
strcpy(string, scanstat_string[i]);
4728-
strcat(string, SCANSTAT_WORD_SYSTEM);
4729-
cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]);
4730-
}
4731-
4732-
for (i = 0; i < NR_SCANSTATS; i++) {
4733-
strcpy(string, scanstat_string[i]);
4734-
strcat(string, SCANSTAT_WORD_LIMIT);
4735-
strcat(string, SCANSTAT_WORD_HIERARCHY);
4736-
cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]);
4737-
}
4738-
for (i = 0; i < NR_SCANSTATS; i++) {
4739-
strcpy(string, scanstat_string[i]);
4740-
strcat(string, SCANSTAT_WORD_SYSTEM);
4741-
strcat(string, SCANSTAT_WORD_HIERARCHY);
4742-
cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]);
4743-
}
4744-
return 0;
4745-
}
4746-
4747-
static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp,
4748-
unsigned int event)
4749-
{
4750-
struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4751-
4752-
spin_lock(&mem->scanstat.lock);
4753-
memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats));
4754-
memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats));
4755-
spin_unlock(&mem->scanstat.lock);
4756-
return 0;
4757-
}
4758-
4759-
47604606
static struct cftype mem_cgroup_files[] = {
47614607
{
47624608
.name = "usage_in_bytes",
@@ -4827,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = {
48274673
.mode = S_IRUGO,
48284674
},
48294675
#endif
4830-
{
4831-
.name = "vmscan_stat",
4832-
.read_map = mem_cgroup_vmscan_stat_read,
4833-
.trigger = mem_cgroup_reset_vmscan_stat,
4834-
},
48354676
};
48364677

48374678
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -5095,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
50954936
atomic_set(&mem->refcnt, 1);
50964937
mem->move_charge_at_immigrate = 0;
50974938
mutex_init(&mem->thresholds_lock);
5098-
spin_lock_init(&mem->scanstat.lock);
50994939
return &mem->css;
51004940
free_out:
51014941
__mem_cgroup_free(mem);

0 commit comments

Comments
 (0)