Btrfs: Fix free block discard calls down to the block layer
[deliverable/linux.git] / fs / btrfs / zlib.c
CommitLineData
c8b97818
CM
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 *
18 * Based on jffs2 zlib code:
19 * Copyright © 2001-2007 Red Hat, Inc.
20 * Created by David Woodhouse <dwmw2@infradead.org>
21 */
22
23#include <linux/kernel.h>
24#include <linux/slab.h>
25#include <linux/zlib.h>
26#include <linux/zutil.h>
27#include <linux/vmalloc.h>
28#include <linux/init.h>
29#include <linux/err.h>
30#include <linux/sched.h>
31#include <linux/pagemap.h>
32#include <linux/bio.h>
b2950863 33#include "compression.h"
c8b97818
CM
34
35/* Plan: call deflate() with avail_in == *sourcelen,
36 avail_out = *dstlen - 12 and flush == Z_FINISH.
37 If it doesn't manage to finish, call it again with
38 avail_in == 0 and avail_out set to the remaining 12
39 bytes for it to clean up.
40 Q: Is 12 bytes sufficient?
41*/
42#define STREAM_END_SPACE 12
43
44struct workspace {
45 z_stream inf_strm;
46 z_stream def_strm;
47 char *buf;
48 struct list_head list;
49};
50
51static LIST_HEAD(idle_workspace);
52static DEFINE_SPINLOCK(workspace_lock);
53static unsigned long num_workspace;
54static atomic_t alloc_workspace = ATOMIC_INIT(0);
55static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
56
57/*
58 * this finds an available zlib workspace or allocates a new one
59 * NULL or an ERR_PTR is returned if things go bad.
60 */
61static struct workspace *find_zlib_workspace(void)
62{
63 struct workspace *workspace;
64 int ret;
65 int cpus = num_online_cpus();
66
67again:
68 spin_lock(&workspace_lock);
69 if (!list_empty(&idle_workspace)) {
70 workspace = list_entry(idle_workspace.next, struct workspace,
71 list);
72 list_del(&workspace->list);
73 num_workspace--;
74 spin_unlock(&workspace_lock);
75 return workspace;
76
77 }
78 spin_unlock(&workspace_lock);
79 if (atomic_read(&alloc_workspace) > cpus) {
80 DEFINE_WAIT(wait);
81 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
82 if (atomic_read(&alloc_workspace) > cpus)
83 schedule();
84 finish_wait(&workspace_wait, &wait);
85 goto again;
86 }
87 atomic_inc(&alloc_workspace);
88 workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
89 if (!workspace) {
90 ret = -ENOMEM;
91 goto fail;
92 }
93
94 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
95 if (!workspace->def_strm.workspace) {
96 ret = -ENOMEM;
97 goto fail;
98 }
99 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
100 if (!workspace->inf_strm.workspace) {
101 ret = -ENOMEM;
102 goto fail_inflate;
103 }
104 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
105 if (!workspace->buf) {
106 ret = -ENOMEM;
107 goto fail_kmalloc;
108 }
109 return workspace;
110
111fail_kmalloc:
112 vfree(workspace->inf_strm.workspace);
113fail_inflate:
114 vfree(workspace->def_strm.workspace);
115fail:
116 kfree(workspace);
117 atomic_dec(&alloc_workspace);
118 wake_up(&workspace_wait);
119 return ERR_PTR(ret);
120}
121
122/*
123 * put a workspace struct back on the list or free it if we have enough
124 * idle ones sitting around
125 */
126static int free_workspace(struct workspace *workspace)
127{
128 spin_lock(&workspace_lock);
129 if (num_workspace < num_online_cpus()) {
130 list_add_tail(&workspace->list, &idle_workspace);
131 num_workspace++;
132 spin_unlock(&workspace_lock);
133 if (waitqueue_active(&workspace_wait))
134 wake_up(&workspace_wait);
135 return 0;
136 }
137 spin_unlock(&workspace_lock);
138 vfree(workspace->def_strm.workspace);
139 vfree(workspace->inf_strm.workspace);
140 kfree(workspace->buf);
141 kfree(workspace);
142
143 atomic_dec(&alloc_workspace);
144 if (waitqueue_active(&workspace_wait))
145 wake_up(&workspace_wait);
146 return 0;
147}
148
149/*
150 * cleanup function for module exit
151 */
152static void free_workspaces(void)
153{
154 struct workspace *workspace;
155 while(!list_empty(&idle_workspace)) {
156 workspace = list_entry(idle_workspace.next, struct workspace,
157 list);
158 list_del(&workspace->list);
159 vfree(workspace->def_strm.workspace);
160 vfree(workspace->inf_strm.workspace);
161 kfree(workspace->buf);
162 kfree(workspace);
163 atomic_dec(&alloc_workspace);
164 }
165}
166
167/*
168 * given an address space and start/len, compress the bytes.
169 *
170 * pages are allocated to hold the compressed result and stored
171 * in 'pages'
172 *
173 * out_pages is used to return the number of pages allocated. There
174 * may be pages allocated even if we return an error
175 *
176 * total_in is used to return the number of bytes actually read. It
177 * may be smaller then len if we had to exit early because we
178 * ran out of room in the pages array or because we cross the
179 * max_out threshold.
180 *
181 * total_out is used to return the total number of compressed bytes
182 *
183 * max_out tells us the max number of bytes that we're allowed to
184 * stuff into pages
185 */
186int btrfs_zlib_compress_pages(struct address_space *mapping,
187 u64 start, unsigned long len,
188 struct page **pages,
189 unsigned long nr_dest_pages,
190 unsigned long *out_pages,
191 unsigned long *total_in,
192 unsigned long *total_out,
193 unsigned long max_out)
194{
195 int ret;
196 struct workspace *workspace;
197 char *data_in;
198 char *cpage_out;
199 int nr_pages = 0;
200 struct page *in_page = NULL;
201 struct page *out_page = NULL;
202 int out_written = 0;
203 int in_read = 0;
204 unsigned long bytes_left;
205
206 *out_pages = 0;
207 *total_out = 0;
208 *total_in = 0;
209
210 workspace = find_zlib_workspace();
211 if (!workspace)
212 return -1;
213
214 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
215 printk(KERN_WARNING "deflateInit failed\n");
216 ret = -1;
217 goto out;
218 }
219
220 workspace->def_strm.total_in = 0;
221 workspace->def_strm.total_out = 0;
222
223 in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
224 data_in = kmap(in_page);
225
226 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
227 cpage_out = kmap(out_page);
228 pages[0] = out_page;
229 nr_pages = 1;
230
231 workspace->def_strm.next_in = data_in;
232 workspace->def_strm.next_out = cpage_out;
233 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
234 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
235
236 out_written = 0;
237 in_read = 0;
238
239 while (workspace->def_strm.total_in < len) {
240 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
241 if (ret != Z_OK) {
242 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
243 ret);
244 zlib_deflateEnd(&workspace->def_strm);
245 ret = -1;
246 goto out;
247 }
248
249 /* we're making it bigger, give up */
250 if (workspace->def_strm.total_in > 8192 &&
251 workspace->def_strm.total_in <
252 workspace->def_strm.total_out) {
253 ret = -1;
254 goto out;
255 }
256 /* we need another page for writing out. Test this
257 * before the total_in so we will pull in a new page for
258 * the stream end if required
259 */
260 if (workspace->def_strm.avail_out == 0) {
261 kunmap(out_page);
262 if (nr_pages == nr_dest_pages) {
263 out_page = NULL;
264 ret = -1;
265 goto out;
266 }
267 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
268 cpage_out = kmap(out_page);
269 pages[nr_pages] = out_page;
270 nr_pages++;
271 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
272 workspace->def_strm.next_out = cpage_out;
273 }
274 /* we're all done */
275 if (workspace->def_strm.total_in >= len)
276 break;
277
278 /* we've read in a full page, get a new one */
279 if (workspace->def_strm.avail_in == 0) {
280 if (workspace->def_strm.total_out > max_out)
281 break;
282
283 bytes_left = len - workspace->def_strm.total_in;
284 kunmap(in_page);
285 page_cache_release(in_page);
286
287 start += PAGE_CACHE_SIZE;
288 in_page = find_get_page(mapping,
289 start >> PAGE_CACHE_SHIFT);
290 data_in = kmap(in_page);
291 workspace->def_strm.avail_in = min(bytes_left,
292 PAGE_CACHE_SIZE);
293 workspace->def_strm.next_in = data_in;
294 }
295 }
296 workspace->def_strm.avail_in = 0;
297 ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
298 zlib_deflateEnd(&workspace->def_strm);
299
300 if (ret != Z_STREAM_END) {
301 ret = -1;
302 goto out;
303 }
304
305 if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
306 ret = -1;
307 goto out;
308 }
309
310 ret = 0;
311 *total_out = workspace->def_strm.total_out;
312 *total_in = workspace->def_strm.total_in;
313out:
314 *out_pages = nr_pages;
315 if (out_page)
316 kunmap(out_page);
317
318 if (in_page) {
319 kunmap(in_page);
320 page_cache_release(in_page);
321 }
322 free_workspace(workspace);
323 return ret;
324}
325
326/*
327 * pages_in is an array of pages with compressed data.
328 *
329 * disk_start is the starting logical offset of this array in the file
330 *
331 * bvec is a bio_vec of pages from the file that we want to decompress into
332 *
333 * vcnt is the count of pages in the biovec
334 *
335 * srclen is the number of bytes in pages_in
336 *
337 * The basic idea is that we have a bio that was created by readpages.
338 * The pages in the bio are for the uncompressed data, and they may not
339 * be contiguous. They all correspond to the range of bytes covered by
340 * the compressed extent.
341 */
342int btrfs_zlib_decompress_biovec(struct page **pages_in,
343 u64 disk_start,
344 struct bio_vec *bvec,
345 int vcnt,
346 size_t srclen)
347{
348 int ret = 0;
349 int wbits = MAX_WBITS;
350 struct workspace *workspace;
351 char *data_in;
352 size_t total_out = 0;
353 unsigned long page_bytes_left;
354 unsigned long page_in_index = 0;
355 unsigned long page_out_index = 0;
356 struct page *page_out;
357 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
358 PAGE_CACHE_SIZE;
359 unsigned long buf_start;
360 unsigned long buf_offset;
361 unsigned long bytes;
362 unsigned long working_bytes;
363 unsigned long pg_offset;
364 unsigned long start_byte;
365 unsigned long current_buf_start;
366 char *kaddr;
367
368 workspace = find_zlib_workspace();
369 if (!workspace)
370 return -ENOMEM;
371
372 data_in = kmap(pages_in[page_in_index]);
373 workspace->inf_strm.next_in = data_in;
5b050f04 374 workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
c8b97818
CM
375 workspace->inf_strm.total_in = 0;
376
377 workspace->inf_strm.total_out = 0;
378 workspace->inf_strm.next_out = workspace->buf;
379 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
380 page_out = bvec[page_out_index].bv_page;
381 page_bytes_left = PAGE_CACHE_SIZE;
382 pg_offset = 0;
383
384 /* If it's deflate, and it's got no preset dictionary, then
385 we can tell zlib to skip the adler32 check. */
386 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
387 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
388 !(((data_in[0]<<8) + data_in[1]) % 31)) {
389
390 wbits = -((data_in[0] >> 4) + 8);
391 workspace->inf_strm.next_in += 2;
392 workspace->inf_strm.avail_in -= 2;
393 }
394
395 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
396 printk(KERN_WARNING "inflateInit failed\n");
397 ret = -1;
398 goto out;
399 }
400 while(workspace->inf_strm.total_in < srclen) {
401 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
402 if (ret != Z_OK && ret != Z_STREAM_END) {
403 break;
404 }
405
406 /*
407 * buf start is the byte offset we're of the start of
408 * our workspace buffer
409 */
410 buf_start = total_out;
411
412 /* total_out is the last byte of the workspace buffer */
413 total_out = workspace->inf_strm.total_out;
414
415 working_bytes = total_out - buf_start;
416
417 /*
418 * start byte is the first byte of the page we're currently
419 * copying into relative to the start of the compressed data.
420 */
421 start_byte = page_offset(page_out) - disk_start;
422
423 if (working_bytes == 0) {
424 /* we didn't make progress in this inflate
425 * call, we're done
426 */
771ed689 427 if (ret != Z_STREAM_END) {
c8b97818 428 ret = -1;
771ed689 429 }
c8b97818
CM
430 break;
431 }
432
433 /* we haven't yet hit data corresponding to this page */
434 if (total_out <= start_byte) {
435 goto next;
436 }
437
438 /*
439 * the start of the data we care about is offset into
440 * the middle of our working buffer
441 */
442 if (total_out > start_byte && buf_start < start_byte) {
443 buf_offset = start_byte - buf_start;
444 working_bytes -= buf_offset;
445 } else {
446 buf_offset = 0;
447 }
448 current_buf_start = buf_start;
449
450 /* copy bytes from the working buffer into the pages */
451 while(working_bytes > 0) {
452 bytes = min(PAGE_CACHE_SIZE - pg_offset,
453 PAGE_CACHE_SIZE - buf_offset);
454 bytes = min(bytes, working_bytes);
455 kaddr = kmap_atomic(page_out, KM_USER0);
456 memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
457 bytes);
458 kunmap_atomic(kaddr, KM_USER0);
459 flush_dcache_page(page_out);
460
461 pg_offset += bytes;
462 page_bytes_left -= bytes;
463 buf_offset += bytes;
464 working_bytes -= bytes;
465 current_buf_start += bytes;
466
467 /* check if we need to pick another page */
468 if (page_bytes_left == 0) {
469 page_out_index++;
470 if (page_out_index >= vcnt) {
471 ret = 0;
472 goto done;
473 }
474 page_out = bvec[page_out_index].bv_page;
475 pg_offset = 0;
476 page_bytes_left = PAGE_CACHE_SIZE;
477 start_byte = page_offset(page_out) - disk_start;
478
479 /*
480 * make sure our new page is covered by this
481 * working buffer
482 */
483 if (total_out <= start_byte) {
484 goto next;
485 }
486
487 /* the next page in the biovec might not
488 * be adjacent to the last page, but it
489 * might still be found inside this working
490 * buffer. bump our offset pointer
491 */
492 if (total_out > start_byte &&
493 current_buf_start < start_byte) {
494 buf_offset = start_byte - buf_start;
495 working_bytes = total_out - start_byte;
496 current_buf_start = buf_start +
497 buf_offset;
498 }
499 }
500 }
501next:
502 workspace->inf_strm.next_out = workspace->buf;
503 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
504
505 if (workspace->inf_strm.avail_in == 0) {
506 unsigned long tmp;
507 kunmap(pages_in[page_in_index]);
508 page_in_index++;
509 if (page_in_index >= total_pages_in) {
510 data_in = NULL;
511 break;
512 }
513 data_in = kmap(pages_in[page_in_index]);
514 workspace->inf_strm.next_in = data_in;
515 tmp = srclen - workspace->inf_strm.total_in;
516 workspace->inf_strm.avail_in = min(tmp,
517 PAGE_CACHE_SIZE);
518 }
519 }
520 if (ret != Z_STREAM_END) {
521 ret = -1;
522 } else {
523 ret = 0;
524 }
525done:
526 zlib_inflateEnd(&workspace->inf_strm);
527 if (data_in)
528 kunmap(pages_in[page_in_index]);
529out:
530 free_workspace(workspace);
531 return ret;
532}
533
534/*
535 * a less complex decompression routine. Our compressed data fits in a
536 * single page, and we want to read a single page out of it.
537 * start_byte tells us the offset into the compressed data we're interested in
538 */
539int btrfs_zlib_decompress(unsigned char *data_in,
540 struct page *dest_page,
541 unsigned long start_byte,
542 size_t srclen, size_t destlen)
543{
544 int ret = 0;
545 int wbits = MAX_WBITS;
546 struct workspace *workspace;
547 unsigned long bytes_left = destlen;
548 unsigned long total_out = 0;
549 char *kaddr;
550
551 if (destlen > PAGE_CACHE_SIZE)
552 return -ENOMEM;
553
554 workspace = find_zlib_workspace();
555 if (!workspace)
556 return -ENOMEM;
557
558 workspace->inf_strm.next_in = data_in;
559 workspace->inf_strm.avail_in = srclen;
560 workspace->inf_strm.total_in = 0;
561
562 workspace->inf_strm.next_out = workspace->buf;
563 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
564 workspace->inf_strm.total_out = 0;
565 /* If it's deflate, and it's got no preset dictionary, then
566 we can tell zlib to skip the adler32 check. */
567 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
568 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
569 !(((data_in[0]<<8) + data_in[1]) % 31)) {
570
571 wbits = -((data_in[0] >> 4) + 8);
572 workspace->inf_strm.next_in += 2;
573 workspace->inf_strm.avail_in -= 2;
574 }
575
576 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
577 printk(KERN_WARNING "inflateInit failed\n");
578 ret = -1;
579 goto out;
580 }
581
582 while(bytes_left > 0) {
583 unsigned long buf_start;
584 unsigned long buf_offset;
585 unsigned long bytes;
586 unsigned long pg_offset = 0;
587
588 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
589 if (ret != Z_OK && ret != Z_STREAM_END) {
590 break;
591 }
592
593 buf_start = total_out;
594 total_out = workspace->inf_strm.total_out;
595
596 if (total_out == buf_start) {
597 ret = -1;
598 break;
599 }
600
601 if (total_out <= start_byte) {
602 goto next;
603 }
604
605 if (total_out > start_byte && buf_start < start_byte) {
606 buf_offset = start_byte - buf_start;
607 } else {
608 buf_offset = 0;
609 }
610
611 bytes = min(PAGE_CACHE_SIZE - pg_offset,
612 PAGE_CACHE_SIZE - buf_offset);
613 bytes = min(bytes, bytes_left);
614
615 kaddr = kmap_atomic(dest_page, KM_USER0);
616 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
617 kunmap_atomic(kaddr, KM_USER0);
618
619 pg_offset += bytes;
620 bytes_left -= bytes;
621next:
622 workspace->inf_strm.next_out = workspace->buf;
623 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
624 }
625 if (ret != Z_STREAM_END && bytes_left != 0) {
626 ret = -1;
627 } else {
628 ret = 0;
629 }
630 zlib_inflateEnd(&workspace->inf_strm);
631out:
632 free_workspace(workspace);
633 return ret;
634}
635
636void btrfs_zlib_exit(void)
637{
638 free_workspaces();
639}
This page took 0.051021 seconds and 5 git commands to generate.