Commit | Line | Data |
---|---|---|
d1a4c0b3 GC |
1 | #include <net/tcp.h> |
2 | #include <net/tcp_memcontrol.h> | |
3 | #include <net/sock.h> | |
3dc43e3e GC |
4 | #include <net/ip.h> |
5 | #include <linux/nsproxy.h> | |
d1a4c0b3 GC |
6 | #include <linux/memcontrol.h> |
7 | #include <linux/module.h> | |
8 | ||
1d62e436 | 9 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
d1a4c0b3 GC |
10 | { |
11 | /* | |
3e32cb2e | 12 | * The root cgroup does not use page_counters, but rather, |
d1a4c0b3 GC |
13 | * rely on the data already collected by the network |
14 | * subsystem | |
15 | */ | |
d1a4c0b3 | 16 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); |
3e32cb2e JW |
17 | struct page_counter *counter_parent = NULL; |
18 | struct cg_proto *cg_proto, *parent_cg; | |
d1a4c0b3 GC |
19 | |
20 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
21 | if (!cg_proto) | |
6bc10349 | 22 | return 0; |
d1a4c0b3 | 23 | |
2e685cad EB |
24 | cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; |
25 | cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; | |
26 | cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; | |
27 | cg_proto->memory_pressure = 0; | |
28 | cg_proto->memcg = memcg; | |
d1a4c0b3 GC |
29 | |
30 | parent_cg = tcp_prot.proto_cgroup(parent); | |
31 | if (parent_cg) | |
3e32cb2e | 32 | counter_parent = &parent_cg->memory_allocated; |
d1a4c0b3 | 33 | |
3e32cb2e | 34 | page_counter_init(&cg_proto->memory_allocated, counter_parent); |
908c7f19 | 35 | percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); |
d1a4c0b3 | 36 | |
6bc10349 | 37 | return 0; |
d1a4c0b3 GC |
38 | } |
39 | EXPORT_SYMBOL(tcp_init_cgroup); | |
40 | ||
1d62e436 | 41 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) |
d1a4c0b3 | 42 | { |
d1a4c0b3 | 43 | struct cg_proto *cg_proto; |
d1a4c0b3 GC |
44 | |
45 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
46 | if (!cg_proto) | |
47 | return; | |
48 | ||
2e685cad | 49 | percpu_counter_destroy(&cg_proto->sockets_allocated); |
f48b80a5 VD |
50 | |
51 | if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) | |
52 | static_key_slow_dec(&memcg_socket_limit_enabled); | |
53 | ||
d1a4c0b3 GC |
54 | } |
55 | EXPORT_SYMBOL(tcp_destroy_cgroup); | |
3aaabe23 | 56 | |
3e32cb2e | 57 | static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) |
3aaabe23 | 58 | { |
3aaabe23 | 59 | struct cg_proto *cg_proto; |
3aaabe23 GC |
60 | int i; |
61 | int ret; | |
62 | ||
63 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
64 | if (!cg_proto) | |
65 | return -EINVAL; | |
66 | ||
3e32cb2e | 67 | ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages); |
3aaabe23 GC |
68 | if (ret) |
69 | return ret; | |
70 | ||
71 | for (i = 0; i < 3; i++) | |
3e32cb2e | 72 | cg_proto->sysctl_mem[i] = min_t(long, nr_pages, |
2e685cad | 73 | sysctl_tcp_mem[i]); |
3aaabe23 | 74 | |
3e32cb2e | 75 | if (nr_pages == PAGE_COUNTER_MAX) |
3f134619 | 76 | clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); |
3e32cb2e | 77 | else { |
3f134619 GC |
78 | /* |
79 | * The active bit needs to be written after the static_key | |
80 | * update. This is what guarantees that the socket activation | |
81 | * function is the last one to run. See sock_update_memcg() for | |
82 | * details, and note that we don't mark any socket as belonging | |
83 | * to this memcg until that flag is up. | |
84 | * | |
85 | * We need to do this, because static_keys will span multiple | |
86 | * sites, but we can't control their order. If we mark a socket | |
87 | * as accounted, but the accounting functions are not patched in | |
88 | * yet, we'll lose accounting. | |
89 | * | |
90 | * We never race with the readers in sock_update_memcg(), | |
91 | * because when this value change, the code to process it is not | |
92 | * patched in yet. | |
93 | * | |
94 | * The activated bit is used to guarantee that no two writers | |
95 | * will do the update in the same memcg. Without that, we can't | |
96 | * properly shutdown the static key. | |
97 | */ | |
98 | if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) | |
99 | static_key_slow_inc(&memcg_socket_limit_enabled); | |
100 | set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | |
101 | } | |
3aaabe23 GC |
102 | |
103 | return 0; | |
104 | } | |
105 | ||
3e32cb2e JW |
106 | enum { |
107 | RES_USAGE, | |
108 | RES_LIMIT, | |
109 | RES_MAX_USAGE, | |
110 | RES_FAILCNT, | |
111 | }; | |
112 | ||
113 | static DEFINE_MUTEX(tcp_limit_mutex); | |
114 | ||
451af504 TH |
115 | static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, |
116 | char *buf, size_t nbytes, loff_t off) | |
3aaabe23 | 117 | { |
451af504 | 118 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); |
3e32cb2e | 119 | unsigned long nr_pages; |
3aaabe23 GC |
120 | int ret = 0; |
121 | ||
451af504 TH |
122 | buf = strstrip(buf); |
123 | ||
124 | switch (of_cft(of)->private) { | |
3aaabe23 GC |
125 | case RES_LIMIT: |
126 | /* see memcontrol.c */ | |
650c5e56 | 127 | ret = page_counter_memparse(buf, "-1", &nr_pages); |
3aaabe23 GC |
128 | if (ret) |
129 | break; | |
3e32cb2e JW |
130 | mutex_lock(&tcp_limit_mutex); |
131 | ret = tcp_update_limit(memcg, nr_pages); | |
132 | mutex_unlock(&tcp_limit_mutex); | |
3aaabe23 GC |
133 | break; |
134 | default: | |
135 | ret = -EINVAL; | |
136 | break; | |
137 | } | |
451af504 | 138 | return ret ?: nbytes; |
3aaabe23 GC |
139 | } |
140 | ||
182446d0 | 141 | static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) |
3aaabe23 | 142 | { |
182446d0 | 143 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
3e32cb2e | 144 | struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg); |
3aaabe23 GC |
145 | u64 val; |
146 | ||
147 | switch (cft->private) { | |
148 | case RES_LIMIT: | |
3e32cb2e JW |
149 | if (!cg_proto) |
150 | return PAGE_COUNTER_MAX; | |
151 | val = cg_proto->memory_allocated.limit; | |
152 | val *= PAGE_SIZE; | |
3aaabe23 | 153 | break; |
5a6dd343 | 154 | case RES_USAGE: |
3e32cb2e JW |
155 | if (!cg_proto) |
156 | val = atomic_long_read(&tcp_memory_allocated); | |
157 | else | |
158 | val = page_counter_read(&cg_proto->memory_allocated); | |
159 | val *= PAGE_SIZE; | |
5a6dd343 | 160 | break; |
ffea59e5 | 161 | case RES_FAILCNT: |
3e32cb2e JW |
162 | if (!cg_proto) |
163 | return 0; | |
164 | val = cg_proto->memory_allocated.failcnt; | |
165 | break; | |
0850f0f5 | 166 | case RES_MAX_USAGE: |
3e32cb2e JW |
167 | if (!cg_proto) |
168 | return 0; | |
169 | val = cg_proto->memory_allocated.watermark; | |
170 | val *= PAGE_SIZE; | |
ffea59e5 | 171 | break; |
3aaabe23 GC |
172 | default: |
173 | BUG(); | |
174 | } | |
175 | return val; | |
176 | } | |
177 | ||
6770c64e TH |
178 | static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, |
179 | char *buf, size_t nbytes, loff_t off) | |
ffea59e5 GC |
180 | { |
181 | struct mem_cgroup *memcg; | |
ffea59e5 GC |
182 | struct cg_proto *cg_proto; |
183 | ||
6770c64e | 184 | memcg = mem_cgroup_from_css(of_css(of)); |
ffea59e5 GC |
185 | cg_proto = tcp_prot.proto_cgroup(memcg); |
186 | if (!cg_proto) | |
6770c64e | 187 | return nbytes; |
ffea59e5 | 188 | |
6770c64e | 189 | switch (of_cft(of)->private) { |
0850f0f5 | 190 | case RES_MAX_USAGE: |
3e32cb2e | 191 | page_counter_reset_watermark(&cg_proto->memory_allocated); |
0850f0f5 | 192 | break; |
ffea59e5 | 193 | case RES_FAILCNT: |
3e32cb2e | 194 | cg_proto->memory_allocated.failcnt = 0; |
ffea59e5 GC |
195 | break; |
196 | } | |
197 | ||
6770c64e | 198 | return nbytes; |
ffea59e5 GC |
199 | } |
200 | ||
676f7c8f TH |
201 | static struct cftype tcp_files[] = { |
202 | { | |
203 | .name = "kmem.tcp.limit_in_bytes", | |
451af504 | 204 | .write = tcp_cgroup_write, |
676f7c8f TH |
205 | .read_u64 = tcp_cgroup_read, |
206 | .private = RES_LIMIT, | |
207 | }, | |
208 | { | |
209 | .name = "kmem.tcp.usage_in_bytes", | |
210 | .read_u64 = tcp_cgroup_read, | |
211 | .private = RES_USAGE, | |
212 | }, | |
213 | { | |
214 | .name = "kmem.tcp.failcnt", | |
215 | .private = RES_FAILCNT, | |
6770c64e | 216 | .write = tcp_cgroup_reset, |
676f7c8f TH |
217 | .read_u64 = tcp_cgroup_read, |
218 | }, | |
219 | { | |
220 | .name = "kmem.tcp.max_usage_in_bytes", | |
221 | .private = RES_MAX_USAGE, | |
6770c64e | 222 | .write = tcp_cgroup_reset, |
676f7c8f TH |
223 | .read_u64 = tcp_cgroup_read, |
224 | }, | |
6bc10349 | 225 | { } /* terminate */ |
676f7c8f | 226 | }; |
6bc10349 TH |
227 | |
228 | static int __init tcp_memcontrol_init(void) | |
229 | { | |
2cf669a5 | 230 | WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files)); |
6bc10349 TH |
231 | return 0; |
232 | } | |
233 | __initcall(tcp_memcontrol_init); |