Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lustre/ptlrpc/sec_bulk.c | |
37 | * | |
38 | * Author: Eric Mei <ericm@clusterfs.com> | |
39 | */ | |
40 | ||
41 | #define DEBUG_SUBSYSTEM S_SEC | |
42 | ||
43 | #include <linux/libcfs/libcfs.h> | |
44 | #include <linux/crypto.h> | |
45 | ||
46 | #include <obd.h> | |
47 | #include <obd_cksum.h> | |
48 | #include <obd_class.h> | |
49 | #include <obd_support.h> | |
50 | #include <lustre_net.h> | |
51 | #include <lustre_import.h> | |
52 | #include <lustre_dlm.h> | |
53 | #include <lustre_sec.h> | |
54 | ||
55 | #include "ptlrpc_internal.h" | |
56 | ||
57 | /**************************************** | |
58 | * bulk encryption page pools * | |
59 | ****************************************/ | |
60 | ||
61 | ||
ae18c5c6 AM |
62 | #define POINTERS_PER_PAGE (PAGE_CACHE_SIZE / sizeof(void *)) |
63 | #define PAGES_PER_POOL (POINTERS_PER_PAGE) | |
d7e09d03 PT |
64 | |
65 | #define IDLE_IDX_MAX (100) | |
66 | #define IDLE_IDX_WEIGHT (3) | |
67 | ||
68 | #define CACHE_QUIESCENT_PERIOD (20) | |
69 | ||
70 | static struct ptlrpc_enc_page_pool { | |
71 | /* | |
72 | * constants | |
73 | */ | |
74 | unsigned long epp_max_pages; /* maximum pages can hold, const */ | |
75 | unsigned int epp_max_pools; /* number of pools, const */ | |
76 | ||
77 | /* | |
78 | * wait queue in case of not enough free pages. | |
79 | */ | |
80 | wait_queue_head_t epp_waitq; /* waiting threads */ | |
81 | unsigned int epp_waitqlen; /* wait queue length */ | |
82 | unsigned long epp_pages_short; /* # of pages wanted of in-q users */ | |
83 | unsigned int epp_growing:1; /* during adding pages */ | |
84 | ||
85 | /* | |
86 | * indicating how idle the pools are, from 0 to MAX_IDLE_IDX | |
87 | * this is counted based on each time when getting pages from | |
88 | * the pools, not based on time. which means in case that system | |
89 | * is idled for a while but the idle_idx might still be low if no | |
90 | * activities happened in the pools. | |
91 | */ | |
92 | unsigned long epp_idle_idx; | |
93 | ||
94 | /* last shrink time due to mem tight */ | |
95 | long epp_last_shrink; | |
96 | long epp_last_access; | |
97 | ||
98 | /* | |
99 | * in-pool pages bookkeeping | |
100 | */ | |
101 | spinlock_t epp_lock; /* protect following fields */ | |
102 | unsigned long epp_total_pages; /* total pages in pools */ | |
103 | unsigned long epp_free_pages; /* current pages available */ | |
104 | ||
105 | /* | |
106 | * statistics | |
107 | */ | |
108 | unsigned long epp_st_max_pages; /* # of pages ever reached */ | |
109 | unsigned int epp_st_grows; /* # of grows */ | |
110 | unsigned int epp_st_grow_fails; /* # of add pages failures */ | |
111 | unsigned int epp_st_shrinks; /* # of shrinks */ | |
112 | unsigned long epp_st_access; /* # of access */ | |
113 | unsigned long epp_st_missings; /* # of cache missing */ | |
114 | unsigned long epp_st_lowfree; /* lowest free pages reached */ | |
115 | unsigned int epp_st_max_wqlen; /* highest waitqueue length */ | |
116 | cfs_time_t epp_st_max_wait; /* in jeffies */ | |
117 | /* | |
118 | * pointers to pools | |
119 | */ | |
120 | struct page ***epp_pools; | |
121 | } page_pools; | |
122 | ||
d7e09d03 PT |
123 | /* |
124 | * /proc/fs/lustre/sptlrpc/encrypt_page_pools | |
125 | */ | |
73bb1da6 | 126 | int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) |
d7e09d03 PT |
127 | { |
128 | int rc; | |
129 | ||
130 | spin_lock(&page_pools.epp_lock); | |
131 | ||
73bb1da6 | 132 | rc = seq_printf(m, |
d7e09d03 PT |
133 | "physical pages: %lu\n" |
134 | "pages per pool: %lu\n" | |
135 | "max pages: %lu\n" | |
136 | "max pools: %u\n" | |
137 | "total pages: %lu\n" | |
138 | "total free: %lu\n" | |
139 | "idle index: %lu/100\n" | |
140 | "last shrink: %lds\n" | |
141 | "last access: %lds\n" | |
142 | "max pages reached: %lu\n" | |
143 | "grows: %u\n" | |
144 | "grows failure: %u\n" | |
145 | "shrinks: %u\n" | |
146 | "cache access: %lu\n" | |
147 | "cache missing: %lu\n" | |
148 | "low free mark: %lu\n" | |
149 | "max waitqueue depth: %u\n" | |
150 | "max wait time: "CFS_TIME_T"/%u\n" | |
151 | , | |
4f6cc9ab | 152 | totalram_pages, |
d7e09d03 PT |
153 | PAGES_PER_POOL, |
154 | page_pools.epp_max_pages, | |
155 | page_pools.epp_max_pools, | |
156 | page_pools.epp_total_pages, | |
157 | page_pools.epp_free_pages, | |
158 | page_pools.epp_idle_idx, | |
159 | cfs_time_current_sec() - page_pools.epp_last_shrink, | |
160 | cfs_time_current_sec() - page_pools.epp_last_access, | |
161 | page_pools.epp_st_max_pages, | |
162 | page_pools.epp_st_grows, | |
163 | page_pools.epp_st_grow_fails, | |
164 | page_pools.epp_st_shrinks, | |
165 | page_pools.epp_st_access, | |
166 | page_pools.epp_st_missings, | |
167 | page_pools.epp_st_lowfree, | |
168 | page_pools.epp_st_max_wqlen, | |
169 | page_pools.epp_st_max_wait, HZ | |
170 | ); | |
171 | ||
172 | spin_unlock(&page_pools.epp_lock); | |
173 | return rc; | |
174 | } | |
175 | ||
176 | static void enc_pools_release_free_pages(long npages) | |
177 | { | |
178 | int p_idx, g_idx; | |
179 | int p_idx_max1, p_idx_max2; | |
180 | ||
181 | LASSERT(npages > 0); | |
182 | LASSERT(npages <= page_pools.epp_free_pages); | |
183 | LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages); | |
184 | ||
185 | /* max pool index before the release */ | |
186 | p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL; | |
187 | ||
188 | page_pools.epp_free_pages -= npages; | |
189 | page_pools.epp_total_pages -= npages; | |
190 | ||
191 | /* max pool index after the release */ | |
192 | p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 : | |
193 | ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL); | |
194 | ||
195 | p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; | |
196 | g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; | |
197 | LASSERT(page_pools.epp_pools[p_idx]); | |
198 | ||
199 | while (npages--) { | |
200 | LASSERT(page_pools.epp_pools[p_idx]); | |
201 | LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); | |
202 | ||
203 | __free_page(page_pools.epp_pools[p_idx][g_idx]); | |
204 | page_pools.epp_pools[p_idx][g_idx] = NULL; | |
205 | ||
206 | if (++g_idx == PAGES_PER_POOL) { | |
207 | p_idx++; | |
208 | g_idx = 0; | |
209 | } | |
210 | }; | |
211 | ||
212 | /* free unused pools */ | |
213 | while (p_idx_max1 < p_idx_max2) { | |
214 | LASSERT(page_pools.epp_pools[p_idx_max2]); | |
215 | OBD_FREE(page_pools.epp_pools[p_idx_max2], PAGE_CACHE_SIZE); | |
216 | page_pools.epp_pools[p_idx_max2] = NULL; | |
217 | p_idx_max2--; | |
218 | } | |
219 | } | |
220 | ||
221 | /* | |
d7e09d03 PT |
222 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. |
223 | */ | |
3bb22ec5 PT |
224 | static unsigned long enc_pools_shrink_count(struct shrinker *s, |
225 | struct shrink_control *sc) | |
d7e09d03 | 226 | { |
3bb22ec5 PT |
227 | /* |
228 | * if no pool access for a long time, we consider it's fully idle. | |
229 | * a little race here is fine. | |
230 | */ | |
231 | if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > | |
232 | CACHE_QUIESCENT_PERIOD)) { | |
d7e09d03 | 233 | spin_lock(&page_pools.epp_lock); |
3bb22ec5 | 234 | page_pools.epp_idle_idx = IDLE_IDX_MAX; |
d7e09d03 PT |
235 | spin_unlock(&page_pools.epp_lock); |
236 | } | |
237 | ||
3bb22ec5 PT |
238 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); |
239 | return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * | |
240 | (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; | |
241 | } | |
242 | ||
243 | /* | |
244 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. | |
245 | */ | |
246 | static unsigned long enc_pools_shrink_scan(struct shrinker *s, | |
247 | struct shrink_control *sc) | |
248 | { | |
249 | spin_lock(&page_pools.epp_lock); | |
250 | sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, | |
251 | page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); | |
252 | if (sc->nr_to_scan > 0) { | |
253 | enc_pools_release_free_pages(sc->nr_to_scan); | |
254 | CDEBUG(D_SEC, "released %ld pages, %ld left\n", | |
255 | (long)sc->nr_to_scan, page_pools.epp_free_pages); | |
256 | ||
257 | page_pools.epp_st_shrinks++; | |
258 | page_pools.epp_last_shrink = cfs_time_current_sec(); | |
259 | } | |
260 | spin_unlock(&page_pools.epp_lock); | |
261 | ||
d7e09d03 PT |
262 | /* |
263 | * if no pool access for a long time, we consider it's fully idle. | |
264 | * a little race here is fine. | |
265 | */ | |
266 | if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > | |
267 | CACHE_QUIESCENT_PERIOD)) { | |
268 | spin_lock(&page_pools.epp_lock); | |
269 | page_pools.epp_idle_idx = IDLE_IDX_MAX; | |
270 | spin_unlock(&page_pools.epp_lock); | |
271 | } | |
272 | ||
273 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); | |
3bb22ec5 | 274 | return sc->nr_to_scan; |
d7e09d03 PT |
275 | } |
276 | ||
277 | static inline | |
278 | int npages_to_npools(unsigned long npages) | |
279 | { | |
280 | return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL); | |
281 | } | |
282 | ||
283 | /* | |
284 | * return how many pages cleaned up. | |
285 | */ | |
286 | static unsigned long enc_pools_cleanup(struct page ***pools, int npools) | |
287 | { | |
288 | unsigned long cleaned = 0; | |
289 | int i, j; | |
290 | ||
291 | for (i = 0; i < npools; i++) { | |
292 | if (pools[i]) { | |
293 | for (j = 0; j < PAGES_PER_POOL; j++) { | |
294 | if (pools[i][j]) { | |
295 | __free_page(pools[i][j]); | |
296 | cleaned++; | |
297 | } | |
298 | } | |
299 | OBD_FREE(pools[i], PAGE_CACHE_SIZE); | |
300 | pools[i] = NULL; | |
301 | } | |
302 | } | |
303 | ||
304 | return cleaned; | |
305 | } | |
306 | ||
307 | /* | |
308 | * merge @npools pointed by @pools which contains @npages new pages | |
309 | * into current pools. | |
310 | * | |
311 | * we have options to avoid most memory copy with some tricks. but we choose | |
312 | * the simplest way to avoid complexity. It's not frequently called. | |
313 | */ | |
314 | static void enc_pools_insert(struct page ***pools, int npools, int npages) | |
315 | { | |
316 | int freeslot; | |
317 | int op_idx, np_idx, og_idx, ng_idx; | |
318 | int cur_npools, end_npools; | |
319 | ||
320 | LASSERT(npages > 0); | |
321 | LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages); | |
322 | LASSERT(npages_to_npools(npages) == npools); | |
323 | LASSERT(page_pools.epp_growing); | |
324 | ||
325 | spin_lock(&page_pools.epp_lock); | |
326 | ||
327 | /* | |
328 | * (1) fill all the free slots of current pools. | |
329 | */ | |
330 | /* free slots are those left by rent pages, and the extra ones with | |
331 | * index >= total_pages, locate at the tail of last pool. */ | |
332 | freeslot = page_pools.epp_total_pages % PAGES_PER_POOL; | |
333 | if (freeslot != 0) | |
334 | freeslot = PAGES_PER_POOL - freeslot; | |
335 | freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages; | |
336 | ||
337 | op_idx = page_pools.epp_free_pages / PAGES_PER_POOL; | |
338 | og_idx = page_pools.epp_free_pages % PAGES_PER_POOL; | |
339 | np_idx = npools - 1; | |
340 | ng_idx = (npages - 1) % PAGES_PER_POOL; | |
341 | ||
342 | while (freeslot) { | |
343 | LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL); | |
344 | LASSERT(pools[np_idx][ng_idx] != NULL); | |
345 | ||
346 | page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx]; | |
347 | pools[np_idx][ng_idx] = NULL; | |
348 | ||
349 | freeslot--; | |
350 | ||
351 | if (++og_idx == PAGES_PER_POOL) { | |
352 | op_idx++; | |
353 | og_idx = 0; | |
354 | } | |
355 | if (--ng_idx < 0) { | |
356 | if (np_idx == 0) | |
357 | break; | |
358 | np_idx--; | |
359 | ng_idx = PAGES_PER_POOL - 1; | |
360 | } | |
361 | } | |
362 | ||
363 | /* | |
364 | * (2) add pools if needed. | |
365 | */ | |
366 | cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) / | |
367 | PAGES_PER_POOL; | |
368 | end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL -1) / | |
369 | PAGES_PER_POOL; | |
370 | LASSERT(end_npools <= page_pools.epp_max_pools); | |
371 | ||
372 | np_idx = 0; | |
373 | while (cur_npools < end_npools) { | |
374 | LASSERT(page_pools.epp_pools[cur_npools] == NULL); | |
375 | LASSERT(np_idx < npools); | |
376 | LASSERT(pools[np_idx] != NULL); | |
377 | ||
378 | page_pools.epp_pools[cur_npools++] = pools[np_idx]; | |
379 | pools[np_idx++] = NULL; | |
380 | } | |
381 | ||
382 | page_pools.epp_total_pages += npages; | |
383 | page_pools.epp_free_pages += npages; | |
384 | page_pools.epp_st_lowfree = page_pools.epp_free_pages; | |
385 | ||
386 | if (page_pools.epp_total_pages > page_pools.epp_st_max_pages) | |
387 | page_pools.epp_st_max_pages = page_pools.epp_total_pages; | |
388 | ||
389 | CDEBUG(D_SEC, "add %d pages to total %lu\n", npages, | |
390 | page_pools.epp_total_pages); | |
391 | ||
392 | spin_unlock(&page_pools.epp_lock); | |
393 | } | |
394 | ||
395 | static int enc_pools_add_pages(int npages) | |
396 | { | |
397 | static DEFINE_MUTEX(add_pages_mutex); | |
398 | struct page ***pools; | |
399 | int npools, alloced = 0; | |
400 | int i, j, rc = -ENOMEM; | |
401 | ||
402 | if (npages < PTLRPC_MAX_BRW_PAGES) | |
403 | npages = PTLRPC_MAX_BRW_PAGES; | |
404 | ||
405 | mutex_lock(&add_pages_mutex); | |
406 | ||
407 | if (npages + page_pools.epp_total_pages > page_pools.epp_max_pages) | |
408 | npages = page_pools.epp_max_pages - page_pools.epp_total_pages; | |
409 | LASSERT(npages > 0); | |
410 | ||
411 | page_pools.epp_st_grows++; | |
412 | ||
413 | npools = npages_to_npools(npages); | |
414 | OBD_ALLOC(pools, npools * sizeof(*pools)); | |
415 | if (pools == NULL) | |
416 | goto out; | |
417 | ||
418 | for (i = 0; i < npools; i++) { | |
419 | OBD_ALLOC(pools[i], PAGE_CACHE_SIZE); | |
420 | if (pools[i] == NULL) | |
421 | goto out_pools; | |
422 | ||
423 | for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) { | |
424 | pools[i][j] = alloc_page(__GFP_IO | | |
425 | __GFP_HIGHMEM); | |
426 | if (pools[i][j] == NULL) | |
427 | goto out_pools; | |
428 | ||
429 | alloced++; | |
430 | } | |
431 | } | |
432 | LASSERT(alloced == npages); | |
433 | ||
434 | enc_pools_insert(pools, npools, npages); | |
435 | CDEBUG(D_SEC, "added %d pages into pools\n", npages); | |
436 | rc = 0; | |
437 | ||
438 | out_pools: | |
439 | enc_pools_cleanup(pools, npools); | |
440 | OBD_FREE(pools, npools * sizeof(*pools)); | |
441 | out: | |
442 | if (rc) { | |
443 | page_pools.epp_st_grow_fails++; | |
444 | CERROR("Failed to allocate %d enc pages\n", npages); | |
445 | } | |
446 | ||
447 | mutex_unlock(&add_pages_mutex); | |
448 | return rc; | |
449 | } | |
450 | ||
451 | static inline void enc_pools_wakeup(void) | |
452 | { | |
453 | LASSERT(spin_is_locked(&page_pools.epp_lock)); | |
454 | LASSERT(page_pools.epp_waitqlen >= 0); | |
455 | ||
456 | if (unlikely(page_pools.epp_waitqlen)) { | |
457 | LASSERT(waitqueue_active(&page_pools.epp_waitq)); | |
458 | wake_up_all(&page_pools.epp_waitq); | |
459 | } | |
460 | } | |
461 | ||
462 | static int enc_pools_should_grow(int page_needed, long now) | |
463 | { | |
464 | /* don't grow if someone else is growing the pools right now, | |
465 | * or the pools has reached its full capacity | |
466 | */ | |
467 | if (page_pools.epp_growing || | |
468 | page_pools.epp_total_pages == page_pools.epp_max_pages) | |
469 | return 0; | |
470 | ||
471 | /* if total pages is not enough, we need to grow */ | |
472 | if (page_pools.epp_total_pages < page_needed) | |
473 | return 1; | |
474 | ||
475 | /* | |
476 | * we wanted to return 0 here if there was a shrink just happened | |
477 | * moment ago, but this may cause deadlock if both client and ost | |
478 | * live on single node. | |
479 | */ | |
480 | #if 0 | |
481 | if (now - page_pools.epp_last_shrink < 2) | |
482 | return 0; | |
483 | #endif | |
484 | ||
485 | /* | |
486 | * here we perhaps need consider other factors like wait queue | |
487 | * length, idle index, etc. ? | |
488 | */ | |
489 | ||
490 | /* grow the pools in any other cases */ | |
491 | return 1; | |
492 | } | |
493 | ||
494 | /* | |
495 | * we allocate the requested pages atomically. | |
496 | */ | |
497 | int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc) | |
498 | { | |
499 | wait_queue_t waitlink; | |
500 | unsigned long this_idle = -1; | |
501 | cfs_time_t tick = 0; | |
502 | long now; | |
503 | int p_idx, g_idx; | |
504 | int i; | |
505 | ||
506 | LASSERT(desc->bd_iov_count > 0); | |
507 | LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages); | |
508 | ||
509 | /* resent bulk, enc iov might have been allocated previously */ | |
510 | if (desc->bd_enc_iov != NULL) | |
511 | return 0; | |
512 | ||
513 | OBD_ALLOC(desc->bd_enc_iov, | |
514 | desc->bd_iov_count * sizeof(*desc->bd_enc_iov)); | |
515 | if (desc->bd_enc_iov == NULL) | |
516 | return -ENOMEM; | |
517 | ||
518 | spin_lock(&page_pools.epp_lock); | |
519 | ||
520 | page_pools.epp_st_access++; | |
521 | again: | |
522 | if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) { | |
523 | if (tick == 0) | |
524 | tick = cfs_time_current(); | |
525 | ||
526 | now = cfs_time_current_sec(); | |
527 | ||
528 | page_pools.epp_st_missings++; | |
529 | page_pools.epp_pages_short += desc->bd_iov_count; | |
530 | ||
531 | if (enc_pools_should_grow(desc->bd_iov_count, now)) { | |
532 | page_pools.epp_growing = 1; | |
533 | ||
534 | spin_unlock(&page_pools.epp_lock); | |
535 | enc_pools_add_pages(page_pools.epp_pages_short / 2); | |
536 | spin_lock(&page_pools.epp_lock); | |
537 | ||
538 | page_pools.epp_growing = 0; | |
539 | ||
540 | enc_pools_wakeup(); | |
541 | } else { | |
542 | if (++page_pools.epp_waitqlen > | |
543 | page_pools.epp_st_max_wqlen) | |
544 | page_pools.epp_st_max_wqlen = | |
545 | page_pools.epp_waitqlen; | |
546 | ||
547 | set_current_state(TASK_UNINTERRUPTIBLE); | |
548 | init_waitqueue_entry_current(&waitlink); | |
549 | add_wait_queue(&page_pools.epp_waitq, &waitlink); | |
550 | ||
551 | spin_unlock(&page_pools.epp_lock); | |
552 | waitq_wait(&waitlink, TASK_UNINTERRUPTIBLE); | |
553 | remove_wait_queue(&page_pools.epp_waitq, &waitlink); | |
554 | LASSERT(page_pools.epp_waitqlen > 0); | |
555 | spin_lock(&page_pools.epp_lock); | |
556 | page_pools.epp_waitqlen--; | |
557 | } | |
558 | ||
559 | LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count); | |
560 | page_pools.epp_pages_short -= desc->bd_iov_count; | |
561 | ||
562 | this_idle = 0; | |
563 | goto again; | |
564 | } | |
565 | ||
566 | /* record max wait time */ | |
567 | if (unlikely(tick != 0)) { | |
568 | tick = cfs_time_current() - tick; | |
569 | if (tick > page_pools.epp_st_max_wait) | |
570 | page_pools.epp_st_max_wait = tick; | |
571 | } | |
572 | ||
573 | /* proceed with rest of allocation */ | |
574 | page_pools.epp_free_pages -= desc->bd_iov_count; | |
575 | ||
576 | p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; | |
577 | g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; | |
578 | ||
579 | for (i = 0; i < desc->bd_iov_count; i++) { | |
580 | LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); | |
581 | desc->bd_enc_iov[i].kiov_page = | |
582 | page_pools.epp_pools[p_idx][g_idx]; | |
583 | page_pools.epp_pools[p_idx][g_idx] = NULL; | |
584 | ||
585 | if (++g_idx == PAGES_PER_POOL) { | |
586 | p_idx++; | |
587 | g_idx = 0; | |
588 | } | |
589 | } | |
590 | ||
591 | if (page_pools.epp_free_pages < page_pools.epp_st_lowfree) | |
592 | page_pools.epp_st_lowfree = page_pools.epp_free_pages; | |
593 | ||
594 | /* | |
595 | * new idle index = (old * weight + new) / (weight + 1) | |
596 | */ | |
597 | if (this_idle == -1) { | |
598 | this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX / | |
599 | page_pools.epp_total_pages; | |
600 | } | |
601 | page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT + | |
602 | this_idle) / | |
603 | (IDLE_IDX_WEIGHT + 1); | |
604 | ||
605 | page_pools.epp_last_access = cfs_time_current_sec(); | |
606 | ||
607 | spin_unlock(&page_pools.epp_lock); | |
608 | return 0; | |
609 | } | |
610 | EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages); | |
611 | ||
612 | void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) | |
613 | { | |
614 | int p_idx, g_idx; | |
615 | int i; | |
616 | ||
617 | if (desc->bd_enc_iov == NULL) | |
618 | return; | |
619 | ||
620 | LASSERT(desc->bd_iov_count > 0); | |
621 | ||
622 | spin_lock(&page_pools.epp_lock); | |
623 | ||
624 | p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; | |
625 | g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; | |
626 | ||
627 | LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <= | |
628 | page_pools.epp_total_pages); | |
629 | LASSERT(page_pools.epp_pools[p_idx]); | |
630 | ||
631 | for (i = 0; i < desc->bd_iov_count; i++) { | |
632 | LASSERT(desc->bd_enc_iov[i].kiov_page != NULL); | |
633 | LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]); | |
634 | LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL); | |
635 | ||
636 | page_pools.epp_pools[p_idx][g_idx] = | |
637 | desc->bd_enc_iov[i].kiov_page; | |
638 | ||
639 | if (++g_idx == PAGES_PER_POOL) { | |
640 | p_idx++; | |
641 | g_idx = 0; | |
642 | } | |
643 | } | |
644 | ||
645 | page_pools.epp_free_pages += desc->bd_iov_count; | |
646 | ||
647 | enc_pools_wakeup(); | |
648 | ||
649 | spin_unlock(&page_pools.epp_lock); | |
650 | ||
651 | OBD_FREE(desc->bd_enc_iov, | |
652 | desc->bd_iov_count * sizeof(*desc->bd_enc_iov)); | |
653 | desc->bd_enc_iov = NULL; | |
654 | } | |
655 | EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages); | |
656 | ||
657 | /* | |
658 | * we don't do much stuff for add_user/del_user anymore, except adding some | |
659 | * initial pages in add_user() if current pools are empty, rest would be | |
660 | * handled by the pools's self-adaption. | |
661 | */ | |
662 | int sptlrpc_enc_pool_add_user(void) | |
663 | { | |
664 | int need_grow = 0; | |
665 | ||
666 | spin_lock(&page_pools.epp_lock); | |
667 | if (page_pools.epp_growing == 0 && page_pools.epp_total_pages == 0) { | |
668 | page_pools.epp_growing = 1; | |
669 | need_grow = 1; | |
670 | } | |
671 | spin_unlock(&page_pools.epp_lock); | |
672 | ||
673 | if (need_grow) { | |
674 | enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES + | |
675 | PTLRPC_MAX_BRW_PAGES); | |
676 | ||
677 | spin_lock(&page_pools.epp_lock); | |
678 | page_pools.epp_growing = 0; | |
679 | enc_pools_wakeup(); | |
680 | spin_unlock(&page_pools.epp_lock); | |
681 | } | |
682 | return 0; | |
683 | } | |
684 | EXPORT_SYMBOL(sptlrpc_enc_pool_add_user); | |
685 | ||
686 | int sptlrpc_enc_pool_del_user(void) | |
687 | { | |
688 | return 0; | |
689 | } | |
690 | EXPORT_SYMBOL(sptlrpc_enc_pool_del_user); | |
691 | ||
692 | static inline void enc_pools_alloc(void) | |
693 | { | |
694 | LASSERT(page_pools.epp_max_pools); | |
695 | OBD_ALLOC_LARGE(page_pools.epp_pools, | |
696 | page_pools.epp_max_pools * | |
697 | sizeof(*page_pools.epp_pools)); | |
698 | } | |
699 | ||
700 | static inline void enc_pools_free(void) | |
701 | { | |
702 | LASSERT(page_pools.epp_max_pools); | |
703 | LASSERT(page_pools.epp_pools); | |
704 | ||
705 | OBD_FREE_LARGE(page_pools.epp_pools, | |
706 | page_pools.epp_max_pools * | |
707 | sizeof(*page_pools.epp_pools)); | |
708 | } | |
709 | ||
3bb22ec5 PT |
710 | static struct shrinker pools_shrinker = { |
711 | .count_objects = enc_pools_shrink_count, | |
712 | .scan_objects = enc_pools_shrink_scan, | |
713 | .seeks = DEFAULT_SEEKS, | |
714 | }; | |
715 | ||
d7e09d03 PT |
716 | int sptlrpc_enc_pool_init(void) |
717 | { | |
718 | /* | |
719 | * maximum capacity is 1/8 of total physical memory. | |
720 | * is the 1/8 a good number? | |
721 | */ | |
4f6cc9ab | 722 | page_pools.epp_max_pages = totalram_pages / 8; |
d7e09d03 PT |
723 | page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); |
724 | ||
725 | init_waitqueue_head(&page_pools.epp_waitq); | |
726 | page_pools.epp_waitqlen = 0; | |
727 | page_pools.epp_pages_short = 0; | |
728 | ||
729 | page_pools.epp_growing = 0; | |
730 | ||
731 | page_pools.epp_idle_idx = 0; | |
732 | page_pools.epp_last_shrink = cfs_time_current_sec(); | |
733 | page_pools.epp_last_access = cfs_time_current_sec(); | |
734 | ||
735 | spin_lock_init(&page_pools.epp_lock); | |
736 | page_pools.epp_total_pages = 0; | |
737 | page_pools.epp_free_pages = 0; | |
738 | ||
739 | page_pools.epp_st_max_pages = 0; | |
740 | page_pools.epp_st_grows = 0; | |
741 | page_pools.epp_st_grow_fails = 0; | |
742 | page_pools.epp_st_shrinks = 0; | |
743 | page_pools.epp_st_access = 0; | |
744 | page_pools.epp_st_missings = 0; | |
745 | page_pools.epp_st_lowfree = 0; | |
746 | page_pools.epp_st_max_wqlen = 0; | |
747 | page_pools.epp_st_max_wait = 0; | |
748 | ||
749 | enc_pools_alloc(); | |
750 | if (page_pools.epp_pools == NULL) | |
751 | return -ENOMEM; | |
752 | ||
3bb22ec5 | 753 | register_shrinker(&pools_shrinker); |
d7e09d03 PT |
754 | |
755 | return 0; | |
756 | } | |
757 | ||
758 | void sptlrpc_enc_pool_fini(void) | |
759 | { | |
760 | unsigned long cleaned, npools; | |
761 | ||
d7e09d03 PT |
762 | LASSERT(page_pools.epp_pools); |
763 | LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); | |
764 | ||
3bb22ec5 | 765 | unregister_shrinker(&pools_shrinker); |
d7e09d03 PT |
766 | |
767 | npools = npages_to_npools(page_pools.epp_total_pages); | |
768 | cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); | |
769 | LASSERT(cleaned == page_pools.epp_total_pages); | |
770 | ||
771 | enc_pools_free(); | |
772 | ||
773 | if (page_pools.epp_st_access > 0) { | |
774 | CDEBUG(D_SEC, | |
775 | "max pages %lu, grows %u, grow fails %u, shrinks %u, " | |
776 | "access %lu, missing %lu, max qlen %u, max wait " | |
777 | CFS_TIME_T"/%d\n", | |
778 | page_pools.epp_st_max_pages, page_pools.epp_st_grows, | |
779 | page_pools.epp_st_grow_fails, | |
780 | page_pools.epp_st_shrinks, page_pools.epp_st_access, | |
781 | page_pools.epp_st_missings, page_pools.epp_st_max_wqlen, | |
782 | page_pools.epp_st_max_wait, HZ); | |
783 | } | |
784 | } | |
785 | ||
786 | ||
787 | static int cfs_hash_alg_id[] = { | |
788 | [BULK_HASH_ALG_NULL] = CFS_HASH_ALG_NULL, | |
789 | [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32, | |
790 | [BULK_HASH_ALG_CRC32] = CFS_HASH_ALG_CRC32, | |
791 | [BULK_HASH_ALG_MD5] = CFS_HASH_ALG_MD5, | |
792 | [BULK_HASH_ALG_SHA1] = CFS_HASH_ALG_SHA1, | |
793 | [BULK_HASH_ALG_SHA256] = CFS_HASH_ALG_SHA256, | |
794 | [BULK_HASH_ALG_SHA384] = CFS_HASH_ALG_SHA384, | |
795 | [BULK_HASH_ALG_SHA512] = CFS_HASH_ALG_SHA512, | |
796 | }; | |
797 | const char * sptlrpc_get_hash_name(__u8 hash_alg) | |
798 | { | |
799 | return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]); | |
800 | } | |
801 | EXPORT_SYMBOL(sptlrpc_get_hash_name); | |
802 | ||
803 | __u8 sptlrpc_get_hash_alg(const char *algname) | |
804 | { | |
805 | return cfs_crypto_hash_alg(algname); | |
806 | } | |
807 | EXPORT_SYMBOL(sptlrpc_get_hash_alg); | |
808 | ||
809 | int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed) | |
810 | { | |
811 | struct ptlrpc_bulk_sec_desc *bsd; | |
812 | int size = msg->lm_buflens[offset]; | |
813 | ||
814 | bsd = lustre_msg_buf(msg, offset, sizeof(*bsd)); | |
815 | if (bsd == NULL) { | |
816 | CERROR("Invalid bulk sec desc: size %d\n", size); | |
817 | return -EINVAL; | |
818 | } | |
819 | ||
820 | if (swabbed) { | |
821 | __swab32s(&bsd->bsd_nob); | |
822 | } | |
823 | ||
824 | if (unlikely(bsd->bsd_version != 0)) { | |
825 | CERROR("Unexpected version %u\n", bsd->bsd_version); | |
826 | return -EPROTO; | |
827 | } | |
828 | ||
829 | if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) { | |
830 | CERROR("Invalid type %u\n", bsd->bsd_type); | |
831 | return -EPROTO; | |
832 | } | |
833 | ||
834 | /* FIXME more sanity check here */ | |
835 | ||
836 | if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL && | |
837 | bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG && | |
838 | bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) { | |
839 | CERROR("Invalid svc %u\n", bsd->bsd_svc); | |
840 | return -EPROTO; | |
841 | } | |
842 | ||
843 | return 0; | |
844 | } | |
845 | EXPORT_SYMBOL(bulk_sec_desc_unpack); | |
846 | ||
847 | int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, | |
848 | void *buf, int buflen) | |
849 | { | |
850 | struct cfs_crypto_hash_desc *hdesc; | |
851 | int hashsize; | |
852 | char hashbuf[64]; | |
853 | unsigned int bufsize; | |
854 | int i, err; | |
855 | ||
856 | LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX); | |
857 | LASSERT(buflen >= 4); | |
858 | ||
859 | hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0); | |
860 | if (IS_ERR(hdesc)) { | |
861 | CERROR("Unable to initialize checksum hash %s\n", | |
862 | cfs_crypto_hash_name(cfs_hash_alg_id[alg])); | |
863 | return PTR_ERR(hdesc); | |
864 | } | |
865 | ||
866 | hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]); | |
867 | ||
868 | for (i = 0; i < desc->bd_iov_count; i++) { | |
869 | cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page, | |
870 | desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK, | |
871 | desc->bd_iov[i].kiov_len); | |
872 | } | |
873 | if (hashsize > buflen) { | |
874 | bufsize = sizeof(hashbuf); | |
875 | err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf, | |
876 | &bufsize); | |
877 | memcpy(buf, hashbuf, buflen); | |
878 | } else { | |
879 | bufsize = buflen; | |
880 | err = cfs_crypto_hash_final(hdesc, (unsigned char *)buf, | |
881 | &bufsize); | |
882 | } | |
883 | ||
884 | if (err) | |
885 | cfs_crypto_hash_final(hdesc, NULL, NULL); | |
886 | return err; | |
887 | } | |
888 | EXPORT_SYMBOL(sptlrpc_get_bulk_checksum); |