Commit | Line | Data |
---|---|---|
d1a4c0b3 GC |
1 | #include <net/tcp.h> |
2 | #include <net/tcp_memcontrol.h> | |
3 | #include <net/sock.h> | |
3dc43e3e GC |
4 | #include <net/ip.h> |
5 | #include <linux/nsproxy.h> | |
d1a4c0b3 GC |
6 | #include <linux/memcontrol.h> |
7 | #include <linux/module.h> | |
8 | ||
d1a4c0b3 GC |
9 | static void memcg_tcp_enter_memory_pressure(struct sock *sk) |
10 | { | |
c48e074c | 11 | if (sk->sk_cgrp->memory_pressure) |
2e685cad | 12 | sk->sk_cgrp->memory_pressure = 1; |
d1a4c0b3 GC |
13 | } |
14 | EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); | |
15 | ||
1d62e436 | 16 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
d1a4c0b3 GC |
17 | { |
18 | /* | |
19 | * The root cgroup does not use res_counters, but rather, | |
20 | * rely on the data already collected by the network | |
21 | * subsystem | |
22 | */ | |
23 | struct res_counter *res_parent = NULL; | |
24 | struct cg_proto *cg_proto, *parent_cg; | |
d1a4c0b3 GC |
25 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); |
26 | ||
27 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
28 | if (!cg_proto) | |
6bc10349 | 29 | return 0; |
d1a4c0b3 | 30 | |
2e685cad EB |
31 | cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; |
32 | cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; | |
33 | cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; | |
34 | cg_proto->memory_pressure = 0; | |
35 | cg_proto->memcg = memcg; | |
d1a4c0b3 GC |
36 | |
37 | parent_cg = tcp_prot.proto_cgroup(parent); | |
38 | if (parent_cg) | |
2e685cad | 39 | res_parent = &parent_cg->memory_allocated; |
d1a4c0b3 | 40 | |
2e685cad EB |
41 | res_counter_init(&cg_proto->memory_allocated, res_parent); |
42 | percpu_counter_init(&cg_proto->sockets_allocated, 0); | |
d1a4c0b3 | 43 | |
6bc10349 | 44 | return 0; |
d1a4c0b3 GC |
45 | } |
46 | EXPORT_SYMBOL(tcp_init_cgroup); | |
47 | ||
1d62e436 | 48 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) |
d1a4c0b3 | 49 | { |
d1a4c0b3 | 50 | struct cg_proto *cg_proto; |
d1a4c0b3 GC |
51 | |
52 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
53 | if (!cg_proto) | |
54 | return; | |
55 | ||
2e685cad | 56 | percpu_counter_destroy(&cg_proto->sockets_allocated); |
d1a4c0b3 GC |
57 | } |
58 | EXPORT_SYMBOL(tcp_destroy_cgroup); | |
3aaabe23 GC |
59 | |
60 | static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | |
61 | { | |
3aaabe23 GC |
62 | struct cg_proto *cg_proto; |
63 | u64 old_lim; | |
64 | int i; | |
65 | int ret; | |
66 | ||
67 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
68 | if (!cg_proto) | |
69 | return -EINVAL; | |
70 | ||
6de5a8bf SZ |
71 | if (val > RES_COUNTER_MAX) |
72 | val = RES_COUNTER_MAX; | |
3aaabe23 | 73 | |
2e685cad EB |
74 | old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT); |
75 | ret = res_counter_set_limit(&cg_proto->memory_allocated, val); | |
3aaabe23 GC |
76 | if (ret) |
77 | return ret; | |
78 | ||
79 | for (i = 0; i < 3; i++) | |
2e685cad EB |
80 | cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT, |
81 | sysctl_tcp_mem[i]); | |
3aaabe23 | 82 | |
6de5a8bf | 83 | if (val == RES_COUNTER_MAX) |
3f134619 | 84 | clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); |
6de5a8bf | 85 | else if (val != RES_COUNTER_MAX) { |
3f134619 GC |
86 | /* |
87 | * The active bit needs to be written after the static_key | |
88 | * update. This is what guarantees that the socket activation | |
89 | * function is the last one to run. See sock_update_memcg() for | |
90 | * details, and note that we don't mark any socket as belonging | |
91 | * to this memcg until that flag is up. | |
92 | * | |
93 | * We need to do this, because static_keys will span multiple | |
94 | * sites, but we can't control their order. If we mark a socket | |
95 | * as accounted, but the accounting functions are not patched in | |
96 | * yet, we'll lose accounting. | |
97 | * | |
98 | * We never race with the readers in sock_update_memcg(), | |
99 | * because when this value change, the code to process it is not | |
100 | * patched in yet. | |
101 | * | |
102 | * The activated bit is used to guarantee that no two writers | |
103 | * will do the update in the same memcg. Without that, we can't | |
104 | * properly shutdown the static key. | |
105 | */ | |
106 | if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) | |
107 | static_key_slow_inc(&memcg_socket_limit_enabled); | |
108 | set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | |
109 | } | |
3aaabe23 GC |
110 | |
111 | return 0; | |
112 | } | |
113 | ||
182446d0 | 114 | static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, |
3aaabe23 GC |
115 | const char *buffer) |
116 | { | |
182446d0 | 117 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
3aaabe23 GC |
118 | unsigned long long val; |
119 | int ret = 0; | |
120 | ||
121 | switch (cft->private) { | |
122 | case RES_LIMIT: | |
123 | /* see memcontrol.c */ | |
124 | ret = res_counter_memparse_write_strategy(buffer, &val); | |
125 | if (ret) | |
126 | break; | |
127 | ret = tcp_update_limit(memcg, val); | |
128 | break; | |
129 | default: | |
130 | ret = -EINVAL; | |
131 | break; | |
132 | } | |
133 | return ret; | |
134 | } | |
135 | ||
136 | static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) | |
137 | { | |
3aaabe23 GC |
138 | struct cg_proto *cg_proto; |
139 | ||
140 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
141 | if (!cg_proto) | |
142 | return default_val; | |
143 | ||
2e685cad | 144 | return res_counter_read_u64(&cg_proto->memory_allocated, type); |
3aaabe23 GC |
145 | } |
146 | ||
5a6dd343 GC |
147 | static u64 tcp_read_usage(struct mem_cgroup *memcg) |
148 | { | |
5a6dd343 GC |
149 | struct cg_proto *cg_proto; |
150 | ||
151 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
152 | if (!cg_proto) | |
153 | return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; | |
154 | ||
2e685cad | 155 | return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE); |
5a6dd343 GC |
156 | } |
157 | ||
182446d0 | 158 | static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) |
3aaabe23 | 159 | { |
182446d0 | 160 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
3aaabe23 GC |
161 | u64 val; |
162 | ||
163 | switch (cft->private) { | |
164 | case RES_LIMIT: | |
6de5a8bf | 165 | val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX); |
3aaabe23 | 166 | break; |
5a6dd343 GC |
167 | case RES_USAGE: |
168 | val = tcp_read_usage(memcg); | |
169 | break; | |
ffea59e5 | 170 | case RES_FAILCNT: |
0850f0f5 GC |
171 | case RES_MAX_USAGE: |
172 | val = tcp_read_stat(memcg, cft->private, 0); | |
ffea59e5 | 173 | break; |
3aaabe23 GC |
174 | default: |
175 | BUG(); | |
176 | } | |
177 | return val; | |
178 | } | |
179 | ||
182446d0 | 180 | static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) |
ffea59e5 GC |
181 | { |
182 | struct mem_cgroup *memcg; | |
ffea59e5 GC |
183 | struct cg_proto *cg_proto; |
184 | ||
182446d0 | 185 | memcg = mem_cgroup_from_css(css); |
ffea59e5 GC |
186 | cg_proto = tcp_prot.proto_cgroup(memcg); |
187 | if (!cg_proto) | |
188 | return 0; | |
ffea59e5 GC |
189 | |
190 | switch (event) { | |
0850f0f5 | 191 | case RES_MAX_USAGE: |
2e685cad | 192 | res_counter_reset_max(&cg_proto->memory_allocated); |
0850f0f5 | 193 | break; |
ffea59e5 | 194 | case RES_FAILCNT: |
2e685cad | 195 | res_counter_reset_failcnt(&cg_proto->memory_allocated); |
ffea59e5 GC |
196 | break; |
197 | } | |
198 | ||
199 | return 0; | |
200 | } | |
201 | ||
676f7c8f TH |
202 | static struct cftype tcp_files[] = { |
203 | { | |
204 | .name = "kmem.tcp.limit_in_bytes", | |
205 | .write_string = tcp_cgroup_write, | |
206 | .read_u64 = tcp_cgroup_read, | |
207 | .private = RES_LIMIT, | |
208 | }, | |
209 | { | |
210 | .name = "kmem.tcp.usage_in_bytes", | |
211 | .read_u64 = tcp_cgroup_read, | |
212 | .private = RES_USAGE, | |
213 | }, | |
214 | { | |
215 | .name = "kmem.tcp.failcnt", | |
216 | .private = RES_FAILCNT, | |
217 | .trigger = tcp_cgroup_reset, | |
218 | .read_u64 = tcp_cgroup_read, | |
219 | }, | |
220 | { | |
221 | .name = "kmem.tcp.max_usage_in_bytes", | |
222 | .private = RES_MAX_USAGE, | |
223 | .trigger = tcp_cgroup_reset, | |
224 | .read_u64 = tcp_cgroup_read, | |
225 | }, | |
6bc10349 | 226 | { } /* terminate */ |
676f7c8f | 227 | }; |
6bc10349 TH |
228 | |
229 | static int __init tcp_memcontrol_init(void) | |
230 | { | |
231 | WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files)); | |
232 | return 0; | |
233 | } | |
234 | __initcall(tcp_memcontrol_init); |