[deliverable/linux.git] / fs / btrfs / zlib.c

/*
 * Copyright (C) 2008 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 *
 * Based on jffs2 zlib code:
 * Copyright © 2001-2007 Red Hat, Inc.
 * Created by David Woodhouse <dwmw2@infradead.org>
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/zlib.h>
#include <linux/zutil.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/bio.h>
#include "compression.h"

/* Plan: call deflate() with avail_in == *sourcelen,
	avail_out = *dstlen - 12 and flush == Z_FINISH.
	If it doesn't manage to finish,	call it again with
	avail_in == 0 and avail_out set to the remaining 12
	bytes for it to clean up.
   Q: Is 12 bytes sufficient?
*/
#define STREAM_END_SPACE 12

struct workspace {
	z_stream inf_strm;
	z_stream def_strm;
	char *buf;
	struct list_head list;
};

static LIST_HEAD(idle_workspace);
static DEFINE_SPINLOCK(workspace_lock);
static unsigned long num_workspace;
static atomic_t alloc_workspace = ATOMIC_INIT(0);
static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);

/*
 * this finds an available zlib workspace or allocates a new one
 * NULL or an ERR_PTR is returned if things go bad.
 */
static struct workspace *find_zlib_workspace(void)
{
	struct workspace *workspace;
	int ret;
	int cpus = num_online_cpus();

again:
	spin_lock(&workspace_lock);
	if (!list_empty(&idle_workspace)) {
		workspace = list_entry(idle_workspace.next, struct workspace,
				       list);
		list_del(&workspace->list);
		num_workspace--;
		spin_unlock(&workspace_lock);
		return workspace;

	}
	spin_unlock(&workspace_lock);
	if (atomic_read(&alloc_workspace) > cpus) {
		DEFINE_WAIT(wait);
		prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
		if (atomic_read(&alloc_workspace) > cpus)
			schedule();
		finish_wait(&workspace_wait, &wait);
		goto again;
	}
	atomic_inc(&alloc_workspace);
	workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
	if (!workspace) {
		ret = -ENOMEM;
		goto fail;
	}

	workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
	if (!workspace->def_strm.workspace) {
		ret = -ENOMEM;
		goto fail;
	}
	workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
	if (!workspace->inf_strm.workspace) {
		ret = -ENOMEM;
		goto fail_inflate;
	}
	workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
	if (!workspace->buf) {
		ret = -ENOMEM;
		goto fail_kmalloc;
	}
	return workspace;

fail_kmalloc:
	vfree(workspace->inf_strm.workspace);
fail_inflate:
	vfree(workspace->def_strm.workspace);
fail:
	kfree(workspace);
	atomic_dec(&alloc_workspace);
	wake_up(&workspace_wait);
	return ERR_PTR(ret);
}

/*
 * put a workspace struct back on the list or free it if we have enough
 * idle ones sitting around
 */
static int free_workspace(struct workspace *workspace)
{
	spin_lock(&workspace_lock);
	if (num_workspace < num_online_cpus()) {
		list_add_tail(&workspace->list, &idle_workspace);
		num_workspace++;
		spin_unlock(&workspace_lock);
		if (waitqueue_active(&workspace_wait))
			wake_up(&workspace_wait);
		return 0;
	}
	spin_unlock(&workspace_lock);
	vfree(workspace->def_strm.workspace);
	vfree(workspace->inf_strm.workspace);
	kfree(workspace->buf);
	kfree(workspace);

	atomic_dec(&alloc_workspace);
	if (waitqueue_active(&workspace_wait))
		wake_up(&workspace_wait);
	return 0;
}

/*
 * cleanup function for module exit
 */
static void free_workspaces(void)
{
	struct workspace *workspace;
	while(!list_empty(&idle_workspace)) {
		workspace = list_entry(idle_workspace.next, struct workspace,
				       list);
		list_del(&workspace->list);
		vfree(workspace->def_strm.workspace);
		vfree(workspace->inf_strm.workspace);
		kfree(workspace->buf);
		kfree(workspace);
		atomic_dec(&alloc_workspace);
	}
}

/*
 * given an address space and start/len, compress the bytes.
 *
 * pages are allocated to hold the compressed result and stored
 * in 'pages'
 *
 * out_pages is used to return the number of pages allocated.  There
 * may be pages allocated even if we return an error
 *
 * total_in is used to return the number of bytes actually read.  It
 * may be smaller then len if we had to exit early because we
 * ran out of room in the pages array or because we cross the
 * max_out threshold.
 *
 * total_out is used to return the total number of compressed bytes
 *
 * max_out tells us the max number of bytes that we're allowed to
 * stuff into pages
 */
int btrfs_zlib_compress_pages(struct address_space *mapping,
			      u64 start, unsigned long len,
			      struct page **pages,
			      unsigned long nr_dest_pages,
			      unsigned long *out_pages,
			      unsigned long *total_in,
			      unsigned long *total_out,
			      unsigned long max_out)
{
	int ret;
	struct workspace *workspace;
	char *data_in;
	char *cpage_out;
	int nr_pages = 0;
	struct page *in_page = NULL;
	struct page *out_page = NULL;
	int out_written = 0;
	int in_read = 0;
	unsigned long bytes_left;

	*out_pages = 0;
	*total_out = 0;
	*total_in = 0;

	workspace = find_zlib_workspace();
	if (!workspace)
		return -1;

	if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
		printk(KERN_WARNING "deflateInit failed\n");
		ret = -1;
		goto out;
	}

	workspace->def_strm.total_in = 0;
	workspace->def_strm.total_out = 0;

	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
	data_in = kmap(in_page);

	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
	cpage_out = kmap(out_page);
	pages[0] = out_page;
	nr_pages = 1;

	workspace->def_strm.next_in = data_in;
	workspace->def_strm.next_out = cpage_out;
	workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
	workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);

	out_written = 0;
	in_read = 0;

	while (workspace->def_strm.total_in < len) {
		ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
		if (ret != Z_OK) {
			printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
			       ret);
			zlib_deflateEnd(&workspace->def_strm);
			ret = -1;
			goto out;
		}

		/* we're making it bigger, give up */
		if (workspace->def_strm.total_in > 8192 &&
		    workspace->def_strm.total_in <
		    workspace->def_strm.total_out) {
			ret = -1;
			goto out;
		}
		/* we need another page for writing out.  Test this
		 * before the total_in so we will pull in a new page for
		 * the stream end if required
		 */
		if (workspace->def_strm.avail_out == 0) {
			kunmap(out_page);
			if (nr_pages == nr_dest_pages) {
				out_page = NULL;
				ret = -1;
				goto out;
			}
			out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
			cpage_out = kmap(out_page);
			pages[nr_pages] = out_page;
			nr_pages++;
			workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
			workspace->def_strm.next_out = cpage_out;
		}
		/* we're all done */
		if (workspace->def_strm.total_in >= len)
			break;

		/* we've read in a full page, get a new one */
		if (workspace->def_strm.avail_in == 0) {
			if (workspace->def_strm.total_out > max_out)
				break;

			bytes_left = len - workspace->def_strm.total_in;
			kunmap(in_page);
			page_cache_release(in_page);

			start += PAGE_CACHE_SIZE;
			in_page = find_get_page(mapping,
						start >> PAGE_CACHE_SHIFT);
			data_in = kmap(in_page);
			workspace->def_strm.avail_in = min(bytes_left,
							   PAGE_CACHE_SIZE);
			workspace->def_strm.next_in = data_in;
		}
	}
	workspace->def_strm.avail_in = 0;
	ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
	zlib_deflateEnd(&workspace->def_strm);

	if (ret != Z_STREAM_END) {
		ret = -1;
		goto out;
	}

	if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
		ret = -1;
		goto out;
	}

	ret = 0;
	*total_out = workspace->def_strm.total_out;
	*total_in = workspace->def_strm.total_in;
out:
	*out_pages = nr_pages;
	if (out_page)
		kunmap(out_page);

	if (in_page) {
		kunmap(in_page);
		page_cache_release(in_page);
	}
	free_workspace(workspace);
	return ret;
}

/*
 * pages_in is an array of pages with compressed data.
 *
 * disk_start is the starting logical offset of this array in the file
 *
 * bvec is a bio_vec of pages from the file that we want to decompress into
 *
 * vcnt is the count of pages in the biovec
 *
 * srclen is the number of bytes in pages_in
 *
 * The basic idea is that we have a bio that was created by readpages.
 * The pages in the bio are for the uncompressed data, and they may not
 * be contiguous.  They all correspond to the range of bytes covered by
 * the compressed extent.
 */
int btrfs_zlib_decompress_biovec(struct page **pages_in,
			      u64 disk_start,
			      struct bio_vec *bvec,
			      int vcnt,
			      size_t srclen)
{
	int ret = 0;
	int wbits = MAX_WBITS;
	struct workspace *workspace;
	char *data_in;
	size_t total_out = 0;
	unsigned long page_bytes_left;
	unsigned long page_in_index = 0;
	unsigned long page_out_index = 0;
	struct page *page_out;
	unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
					PAGE_CACHE_SIZE;
	unsigned long buf_start;
	unsigned long buf_offset;
	unsigned long bytes;
	unsigned long working_bytes;
	unsigned long pg_offset;
	unsigned long start_byte;
	unsigned long current_buf_start;
	char *kaddr;

	workspace = find_zlib_workspace();
	if (!workspace)
		return -ENOMEM;

	data_in = kmap(pages_in[page_in_index]);
	workspace->inf_strm.next_in = data_in;
	workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
	workspace->inf_strm.total_in = 0;

	workspace->inf_strm.total_out = 0;
	workspace->inf_strm.next_out = workspace->buf;
	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
	page_out = bvec[page_out_index].bv_page;
	page_bytes_left = PAGE_CACHE_SIZE;
	pg_offset = 0;

	/* If it's deflate, and it's got no preset dictionary, then
	   we can tell zlib to skip the adler32 check. */
	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
	    !(((data_in[0]<<8) + data_in[1]) % 31)) {

		wbits = -((data_in[0] >> 4) + 8);
		workspace->inf_strm.next_in += 2;
		workspace->inf_strm.avail_in -= 2;
	}

	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
		printk(KERN_WARNING "inflateInit failed\n");
		ret = -1;
		goto out;
	}
	while(workspace->inf_strm.total_in < srclen) {
		ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
		if (ret != Z_OK && ret != Z_STREAM_END) {
			break;
		}

		/*
		 * buf start is the byte offset we're of the start of
		 * our workspace buffer
		 */
		buf_start = total_out;

		/* total_out is the last byte of the workspace buffer */
		total_out = workspace->inf_strm.total_out;

		working_bytes = total_out - buf_start;

		/*
		 * start byte is the first byte of the page we're currently
		 * copying into relative to the start of the compressed data.
		 */
		start_byte = page_offset(page_out) - disk_start;

		if (working_bytes == 0) {
			/* we didn't make progress in this inflate
			 * call, we're done
			 */
			if (ret != Z_STREAM_END) {
				ret = -1;
			}
			break;
		}

		/* we haven't yet hit data corresponding to this page */
		if (total_out <= start_byte) {
			goto next;
		}

		/*
		 * the start of the data we care about is offset into
		 * the middle of our working buffer
		 */
		if (total_out > start_byte && buf_start < start_byte) {
			buf_offset = start_byte - buf_start;
			working_bytes -= buf_offset;
		} else {
			buf_offset = 0;
		}
		current_buf_start = buf_start;

		/* copy bytes from the working buffer into the pages */
		while(working_bytes > 0) {
			bytes = min(PAGE_CACHE_SIZE - pg_offset,
				    PAGE_CACHE_SIZE - buf_offset);
			bytes = min(bytes, working_bytes);
			kaddr = kmap_atomic(page_out, KM_USER0);
			memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
			       bytes);
			kunmap_atomic(kaddr, KM_USER0);
			flush_dcache_page(page_out);

			pg_offset += bytes;
			page_bytes_left -= bytes;
			buf_offset += bytes;
			working_bytes -= bytes;
			current_buf_start += bytes;

			/* check if we need to pick another page */
			if (page_bytes_left == 0) {
				page_out_index++;
				if (page_out_index >= vcnt) {
					ret = 0;
					goto done;
				}
				page_out = bvec[page_out_index].bv_page;
				pg_offset = 0;
				page_bytes_left = PAGE_CACHE_SIZE;
				start_byte = page_offset(page_out) - disk_start;

				/*
				 * make sure our new page is covered by this
				 * working buffer
				 */
				if (total_out <= start_byte) {
					goto next;
				}

				/* the next page in the biovec might not
				 * be adjacent to the last page, but it
				 * might still be found inside this working
				 * buffer.  bump our offset pointer
				 */
				if (total_out > start_byte &&
				    current_buf_start < start_byte) {
					buf_offset = start_byte - buf_start;
					working_bytes = total_out - start_byte;
					current_buf_start = buf_start +
						buf_offset;
				}
			}
		}
next:
		workspace->inf_strm.next_out = workspace->buf;
		workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;

		if (workspace->inf_strm.avail_in == 0) {
			unsigned long tmp;
			kunmap(pages_in[page_in_index]);
			page_in_index++;
			if (page_in_index >= total_pages_in) {
				data_in = NULL;
				break;
			}
			data_in = kmap(pages_in[page_in_index]);
			workspace->inf_strm.next_in = data_in;
			tmp = srclen - workspace->inf_strm.total_in;
			workspace->inf_strm.avail_in = min(tmp,
							   PAGE_CACHE_SIZE);
		}
	}
	if (ret != Z_STREAM_END) {
		ret = -1;
	} else {
		ret = 0;
	}
done:
	zlib_inflateEnd(&workspace->inf_strm);
	if (data_in)
		kunmap(pages_in[page_in_index]);
out:
	free_workspace(workspace);
	return ret;
}

/*
 * a less complex decompression routine.  Our compressed data fits in a
 * single page, and we want to read a single page out of it.
 * start_byte tells us the offset into the compressed data we're interested in
 */
int btrfs_zlib_decompress(unsigned char *data_in,
			  struct page *dest_page,
			  unsigned long start_byte,
			  size_t srclen, size_t destlen)
{
	int ret = 0;
	int wbits = MAX_WBITS;
	struct workspace *workspace;
	unsigned long bytes_left = destlen;
	unsigned long total_out = 0;
	char *kaddr;

	if (destlen > PAGE_CACHE_SIZE)
		return -ENOMEM;

	workspace = find_zlib_workspace();
	if (!workspace)
		return -ENOMEM;

	workspace->inf_strm.next_in = data_in;
	workspace->inf_strm.avail_in = srclen;
	workspace->inf_strm.total_in = 0;

	workspace->inf_strm.next_out = workspace->buf;
	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
	workspace->inf_strm.total_out = 0;
	/* If it's deflate, and it's got no preset dictionary, then
	   we can tell zlib to skip the adler32 check. */
	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
	    !(((data_in[0]<<8) + data_in[1]) % 31)) {

		wbits = -((data_in[0] >> 4) + 8);
		workspace->inf_strm.next_in += 2;
		workspace->inf_strm.avail_in -= 2;
	}

	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
		printk(KERN_WARNING "inflateInit failed\n");
		ret = -1;
		goto out;
	}

	while(bytes_left > 0) {
		unsigned long buf_start;
		unsigned long buf_offset;
		unsigned long bytes;
		unsigned long pg_offset = 0;

		ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
		if (ret != Z_OK && ret != Z_STREAM_END) {
			break;
		}

		buf_start = total_out;
		total_out = workspace->inf_strm.total_out;

		if (total_out == buf_start) {
			ret = -1;
			break;
		}

		if (total_out <= start_byte) {
			goto next;
		}

		if (total_out > start_byte && buf_start < start_byte) {
			buf_offset = start_byte - buf_start;
		} else {
			buf_offset = 0;
		}

		bytes = min(PAGE_CACHE_SIZE - pg_offset,
			    PAGE_CACHE_SIZE - buf_offset);
		bytes = min(bytes, bytes_left);

		kaddr = kmap_atomic(dest_page, KM_USER0);
		memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
		kunmap_atomic(kaddr, KM_USER0);

		pg_offset += bytes;
		bytes_left -= bytes;
next:
		workspace->inf_strm.next_out = workspace->buf;
		workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
	}
	if (ret != Z_STREAM_END && bytes_left != 0) {
		ret = -1;
	} else {
		ret = 0;
	}
	zlib_inflateEnd(&workspace->inf_strm);
out:
	free_workspace(workspace);
	return ret;
}

void btrfs_zlib_exit(void)
{
    free_workspaces();
}
Commit	Line	Data
c8b97818 CM	1	/*
	2	* Copyright (C) 2008 Oracle. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public
	6	* License v2 as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope that it will be useful,
	9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	11	* General Public License for more details.
	12	*
	13	* You should have received a copy of the GNU General Public
	14	* License along with this program; if not, write to the
	15	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	16	* Boston, MA 021110-1307, USA.
	17	*
	18	* Based on jffs2 zlib code:
	19	* Copyright © 2001-2007 Red Hat, Inc.
	20	* Created by David Woodhouse <dwmw2@infradead.org>
	21	*/
	22
	23	#include <linux/kernel.h>
	24	#include <linux/slab.h>
	25	#include <linux/zlib.h>
	26	#include <linux/zutil.h>
	27	#include <linux/vmalloc.h>
	28	#include <linux/init.h>
	29	#include <linux/err.h>
	30	#include <linux/sched.h>
	31	#include <linux/pagemap.h>
	32	#include <linux/bio.h>
b2950863	33	#include "compression.h"
c8b97818 CM	34
	35	/* Plan: call deflate() with avail_in == *sourcelen,
	36	avail_out = *dstlen - 12 and flush == Z_FINISH.
	37	If it doesn't manage to finish, call it again with
	38	avail_in == 0 and avail_out set to the remaining 12
	39	bytes for it to clean up.
	40	Q: Is 12 bytes sufficient?
	41	*/
	42	#define STREAM_END_SPACE 12
	43
	44	struct workspace {
	45	z_stream inf_strm;
	46	z_stream def_strm;
	47	char *buf;
	48	struct list_head list;
	49	};
	50
	51	static LIST_HEAD(idle_workspace);
	52	static DEFINE_SPINLOCK(workspace_lock);
	53	static unsigned long num_workspace;
	54	static atomic_t alloc_workspace = ATOMIC_INIT(0);
	55	static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
	56
	57	/*
	58	* this finds an available zlib workspace or allocates a new one
	59	* NULL or an ERR_PTR is returned if things go bad.
	60	*/
	61	static struct workspace *find_zlib_workspace(void)
	62	{
	63	struct workspace *workspace;
	64	int ret;
	65	int cpus = num_online_cpus();
	66
	67	again:
	68	spin_lock(&workspace_lock);
	69	if (!list_empty(&idle_workspace)) {
	70	workspace = list_entry(idle_workspace.next, struct workspace,
	71	list);
	72	list_del(&workspace->list);
	73	num_workspace--;
	74	spin_unlock(&workspace_lock);
	75	return workspace;
	76
	77	}
	78	spin_unlock(&workspace_lock);
	79	if (atomic_read(&alloc_workspace) > cpus) {
	80	DEFINE_WAIT(wait);
	81	prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
	82	if (atomic_read(&alloc_workspace) > cpus)
	83	schedule();
	84	finish_wait(&workspace_wait, &wait);
	85	goto again;
	86	}
	87	atomic_inc(&alloc_workspace);
	88	workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
	89	if (!workspace) {
	90	ret = -ENOMEM;
	91	goto fail;
	92	}
	93
	94	workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
	95	if (!workspace->def_strm.workspace) {
	96	ret = -ENOMEM;
	97	goto fail;
98	}
99	workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
100	if (!workspace->inf_strm.workspace) {
101	ret = -ENOMEM;
102	goto fail_inflate;
103	}
104	workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
105	if (!workspace->buf) {
106	ret = -ENOMEM;
107	goto fail_kmalloc;
108	}
109	return workspace;
110
111	fail_kmalloc:
112	vfree(workspace->inf_strm.workspace);
113	fail_inflate:
114	vfree(workspace->def_strm.workspace);
115	fail:
116	kfree(workspace);
117	atomic_dec(&alloc_workspace);
118	wake_up(&workspace_wait);
119	return ERR_PTR(ret);
120	}
121
122	/*
123	* put a workspace struct back on the list or free it if we have enough
124	* idle ones sitting around
125	*/
126	static int free_workspace(struct workspace *workspace)
127	{
128	spin_lock(&workspace_lock);
129	if (num_workspace < num_online_cpus()) {
130	list_add_tail(&workspace->list, &idle_workspace);
131	num_workspace++;
132	spin_unlock(&workspace_lock);
133	if (waitqueue_active(&workspace_wait))
134	wake_up(&workspace_wait);
135	return 0;
136	}
137	spin_unlock(&workspace_lock);
138	vfree(workspace->def_strm.workspace);
139	vfree(workspace->inf_strm.workspace);
140	kfree(workspace->buf);
141	kfree(workspace);
142
143	atomic_dec(&alloc_workspace);
144	if (waitqueue_active(&workspace_wait))
145	wake_up(&workspace_wait);
146	return 0;
147	}
148
149	/*
150	* cleanup function for module exit
151	*/
152	static void free_workspaces(void)
153	{
154	struct workspace *workspace;
155	while(!list_empty(&idle_workspace)) {
156	workspace = list_entry(idle_workspace.next, struct workspace,
157	list);
158	list_del(&workspace->list);
159	vfree(workspace->def_strm.workspace);
160	vfree(workspace->inf_strm.workspace);
161	kfree(workspace->buf);
162	kfree(workspace);
163	atomic_dec(&alloc_workspace);
164	}
165	}
166
167	/*
168	* given an address space and start/len, compress the bytes.
169	*
170	* pages are allocated to hold the compressed result and stored
171	* in 'pages'
172	*
173	* out_pages is used to return the number of pages allocated. There
174	* may be pages allocated even if we return an error
175	*
176	* total_in is used to return the number of bytes actually read. It
177	* may be smaller then len if we had to exit early because we
178	* ran out of room in the pages array or because we cross the
179	* max_out threshold.
180	*
181	* total_out is used to return the total number of compressed bytes
182	*
183	* max_out tells us the max number of bytes that we're allowed to
184	* stuff into pages
185	*/
186	int btrfs_zlib_compress_pages(struct address_space *mapping,
187	u64 start, unsigned long len,
188	struct page **pages,
189	unsigned long nr_dest_pages,
190	unsigned long *out_pages,
191	unsigned long *total_in,
192	unsigned long *total_out,
193	unsigned long max_out)
194	{
195	int ret;
196	struct workspace *workspace;
197	char *data_in;
198	char *cpage_out;
199	int nr_pages = 0;
200	struct page *in_page = NULL;
201	struct page *out_page = NULL;
202	int out_written = 0;
203	int in_read = 0;
204	unsigned long bytes_left;
205
206	*out_pages = 0;
207	*total_out = 0;
208	*total_in = 0;
209
210	workspace = find_zlib_workspace();
211	if (!workspace)
212	return -1;
213
214	if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
215	printk(KERN_WARNING "deflateInit failed\n");
216	ret = -1;
217	goto out;
218	}
219
220	workspace->def_strm.total_in = 0;
221	workspace->def_strm.total_out = 0;
222
223	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
224	data_in = kmap(in_page);
225
226	out_page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
227	cpage_out = kmap(out_page);
228	pages[0] = out_page;
229	nr_pages = 1;
230
231	workspace->def_strm.next_in = data_in;
232	workspace->def_strm.next_out = cpage_out;
233	workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
234	workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
235
236	out_written = 0;
237	in_read = 0;
238
239	while (workspace->def_strm.total_in < len) {
240	ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
241	if (ret != Z_OK) {
242	printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
243	ret);
244	zlib_deflateEnd(&workspace->def_strm);
245	ret = -1;
246	goto out;
247	}
248
249	/* we're making it bigger, give up */
250	if (workspace->def_strm.total_in > 8192 &&
251	workspace->def_strm.total_in <
252	workspace->def_strm.total_out) {
253	ret = -1;
254	goto out;
255	}
256	/* we need another page for writing out. Test this
257	* before the total_in so we will pull in a new page for
258	* the stream end if required
259	*/
260	if (workspace->def_strm.avail_out == 0) {
261	kunmap(out_page);
262	if (nr_pages == nr_dest_pages) {
263	out_page = NULL;
264	ret = -1;
265	goto out;
266	}
267	out_page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
268	cpage_out = kmap(out_page);
269	pages[nr_pages] = out_page;
270	nr_pages++;
271	workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
272	workspace->def_strm.next_out = cpage_out;
273	}
274	/* we're all done */
275	if (workspace->def_strm.total_in >= len)
276	break;
277
278	/* we've read in a full page, get a new one */
279	if (workspace->def_strm.avail_in == 0) {
280	if (workspace->def_strm.total_out > max_out)
281	break;
282
283	bytes_left = len - workspace->def_strm.total_in;
284	kunmap(in_page);
285	page_cache_release(in_page);
286
287	start += PAGE_CACHE_SIZE;
288	in_page = find_get_page(mapping,
289	start >> PAGE_CACHE_SHIFT);
290	data_in = kmap(in_page);
291	workspace->def_strm.avail_in = min(bytes_left,
292	PAGE_CACHE_SIZE);
293	workspace->def_strm.next_in = data_in;
294	}
295	}
296	workspace->def_strm.avail_in = 0;
297	ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
298	zlib_deflateEnd(&workspace->def_strm);
299
300	if (ret != Z_STREAM_END) {
301	ret = -1;
302	goto out;
303	}
304
305	if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
306	ret = -1;
307	goto out;
308	}
309
310	ret = 0;
311	*total_out = workspace->def_strm.total_out;
312	*total_in = workspace->def_strm.total_in;
313	out:
314	*out_pages = nr_pages;
315	if (out_page)
316	kunmap(out_page);
317
318	if (in_page) {
319	kunmap(in_page);
320	page_cache_release(in_page);
321	}
322	free_workspace(workspace);
323	return ret;
324	}
325
326	/*
327	* pages_in is an array of pages with compressed data.
328	*
329	* disk_start is the starting logical offset of this array in the file
330	*
331	* bvec is a bio_vec of pages from the file that we want to decompress into
332	*
333	* vcnt is the count of pages in the biovec
334	*
335	* srclen is the number of bytes in pages_in
336	*
337	* The basic idea is that we have a bio that was created by readpages.
338	* The pages in the bio are for the uncompressed data, and they may not
339	* be contiguous. They all correspond to the range of bytes covered by
340	* the compressed extent.
341	*/
342	int btrfs_zlib_decompress_biovec(struct page **pages_in,
343	u64 disk_start,
344	struct bio_vec *bvec,
345	int vcnt,
346	size_t srclen)
347	{
348	int ret = 0;
349	int wbits = MAX_WBITS;
350	struct workspace *workspace;
351	char *data_in;
352	size_t total_out = 0;
353	unsigned long page_bytes_left;
354	unsigned long page_in_index = 0;
355	unsigned long page_out_index = 0;
356	struct page *page_out;
357	unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
358	PAGE_CACHE_SIZE;
359	unsigned long buf_start;
360	unsigned long buf_offset;
361	unsigned long bytes;
362	unsigned long working_bytes;
363	unsigned long pg_offset;
364	unsigned long start_byte;
365	unsigned long current_buf_start;
366	char *kaddr;
367
368	workspace = find_zlib_workspace();
369	if (!workspace)
370	return -ENOMEM;
371
372	data_in = kmap(pages_in[page_in_index]);
373	workspace->inf_strm.next_in = data_in;
5b050f04	374	workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
c8b97818 CM	375	workspace->inf_strm.total_in = 0;
	376
	377	workspace->inf_strm.total_out = 0;
	378	workspace->inf_strm.next_out = workspace->buf;
	379	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
	380	page_out = bvec[page_out_index].bv_page;
	381	page_bytes_left = PAGE_CACHE_SIZE;
	382	pg_offset = 0;
	383
	384	/* If it's deflate, and it's got no preset dictionary, then
	385	we can tell zlib to skip the adler32 check. */
	386	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
	387	((data_in[0] & 0x0f) == Z_DEFLATED) &&
	388	!(((data_in[0]<<8) + data_in[1]) % 31)) {
	389
	390	wbits = -((data_in[0] >> 4) + 8);
	391	workspace->inf_strm.next_in += 2;
	392	workspace->inf_strm.avail_in -= 2;
	393	}
	394
	395	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
	396	printk(KERN_WARNING "inflateInit failed\n");
	397	ret = -1;
	398	goto out;
	399	}
	400	while(workspace->inf_strm.total_in < srclen) {
	401	ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
	402	if (ret != Z_OK && ret != Z_STREAM_END) {
	403	break;
	404	}
	405
	406	/*
	407	* buf start is the byte offset we're of the start of
	408	* our workspace buffer
	409	*/
	410	buf_start = total_out;
	411
	412	/* total_out is the last byte of the workspace buffer */
	413	total_out = workspace->inf_strm.total_out;
	414
	415	working_bytes = total_out - buf_start;
	416
	417	/*
	418	* start byte is the first byte of the page we're currently
	419	* copying into relative to the start of the compressed data.
	420	*/
	421	start_byte = page_offset(page_out) - disk_start;
	422
	423	if (working_bytes == 0) {
	424	/* we didn't make progress in this inflate
	425	* call, we're done
	426	*/
771ed689	427	if (ret != Z_STREAM_END) {
c8b97818	428	ret = -1;
771ed689	429	}
c8b97818 CM	430	break;
	431	}
	432
	433	/* we haven't yet hit data corresponding to this page */
	434	if (total_out <= start_byte) {
	435	goto next;
	436	}
	437
	438	/*
	439	* the start of the data we care about is offset into
	440	* the middle of our working buffer
	441	*/
	442	if (total_out > start_byte && buf_start < start_byte) {
	443	buf_offset = start_byte - buf_start;
	444	working_bytes -= buf_offset;
	445	} else {
	446	buf_offset = 0;
	447	}
	448	current_buf_start = buf_start;
	449
	450	/* copy bytes from the working buffer into the pages */
	451	while(working_bytes > 0) {
	452	bytes = min(PAGE_CACHE_SIZE - pg_offset,
	453	PAGE_CACHE_SIZE - buf_offset);
	454	bytes = min(bytes, working_bytes);
	455	kaddr = kmap_atomic(page_out, KM_USER0);
	456	memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
	457	bytes);
	458	kunmap_atomic(kaddr, KM_USER0);
	459	flush_dcache_page(page_out);
	460
	461	pg_offset += bytes;
	462	page_bytes_left -= bytes;
	463	buf_offset += bytes;
	464	working_bytes -= bytes;
	465	current_buf_start += bytes;
	466
	467	/* check if we need to pick another page */
	468	if (page_bytes_left == 0) {
	469	page_out_index++;
	470	if (page_out_index >= vcnt) {
	471	ret = 0;
	472	goto done;
	473	}
	474	page_out = bvec[page_out_index].bv_page;
	475	pg_offset = 0;
	476	page_bytes_left = PAGE_CACHE_SIZE;
	477	start_byte = page_offset(page_out) - disk_start;
	478
	479	/*
	480	* make sure our new page is covered by this
	481	* working buffer
	482	*/
	483	if (total_out <= start_byte) {
	484	goto next;
	485	}
	486
	487	/* the next page in the biovec might not
	488	* be adjacent to the last page, but it
	489	* might still be found inside this working
	490	* buffer. bump our offset pointer
	491	*/
	492	if (total_out > start_byte &&
	493	current_buf_start < start_byte) {
494	buf_offset = start_byte - buf_start;
495	working_bytes = total_out - start_byte;
496	current_buf_start = buf_start +
497	buf_offset;
498	}
499	}
500	}
501	next:
502	workspace->inf_strm.next_out = workspace->buf;
503	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
504
505	if (workspace->inf_strm.avail_in == 0) {
506	unsigned long tmp;
507	kunmap(pages_in[page_in_index]);
508	page_in_index++;
509	if (page_in_index >= total_pages_in) {
510	data_in = NULL;
511	break;
512	}
513	data_in = kmap(pages_in[page_in_index]);
514	workspace->inf_strm.next_in = data_in;
515	tmp = srclen - workspace->inf_strm.total_in;
516	workspace->inf_strm.avail_in = min(tmp,
517	PAGE_CACHE_SIZE);
518	}
519	}
520	if (ret != Z_STREAM_END) {
521	ret = -1;
522	} else {
523	ret = 0;
524	}
525	done:
526	zlib_inflateEnd(&workspace->inf_strm);
527	if (data_in)
528	kunmap(pages_in[page_in_index]);
529	out:
530	free_workspace(workspace);
531	return ret;
532	}
533
534	/*
535	* a less complex decompression routine. Our compressed data fits in a
536	* single page, and we want to read a single page out of it.
537	* start_byte tells us the offset into the compressed data we're interested in
538	*/
539	int btrfs_zlib_decompress(unsigned char *data_in,
540	struct page *dest_page,
541	unsigned long start_byte,
542	size_t srclen, size_t destlen)
543	{
544	int ret = 0;
545	int wbits = MAX_WBITS;
546	struct workspace *workspace;
547	unsigned long bytes_left = destlen;
548	unsigned long total_out = 0;
549	char *kaddr;
550
551	if (destlen > PAGE_CACHE_SIZE)
552	return -ENOMEM;
553
554	workspace = find_zlib_workspace();
555	if (!workspace)
556	return -ENOMEM;
557
558	workspace->inf_strm.next_in = data_in;
559	workspace->inf_strm.avail_in = srclen;
560	workspace->inf_strm.total_in = 0;
561
562	workspace->inf_strm.next_out = workspace->buf;
563	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
564	workspace->inf_strm.total_out = 0;
565	/* If it's deflate, and it's got no preset dictionary, then
566	we can tell zlib to skip the adler32 check. */
567	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
568	((data_in[0] & 0x0f) == Z_DEFLATED) &&
569	!(((data_in[0]<<8) + data_in[1]) % 31)) {
570
571	wbits = -((data_in[0] >> 4) + 8);
572	workspace->inf_strm.next_in += 2;
573	workspace->inf_strm.avail_in -= 2;
574	}
575
576	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
577	printk(KERN_WARNING "inflateInit failed\n");
578	ret = -1;
579	goto out;
580	}
581
582	while(bytes_left > 0) {
583	unsigned long buf_start;
584	unsigned long buf_offset;
585	unsigned long bytes;
586	unsigned long pg_offset = 0;
587
588	ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
589	if (ret != Z_OK && ret != Z_STREAM_END) {
590	break;
591	}
592
593	buf_start = total_out;
594	total_out = workspace->inf_strm.total_out;
595
596	if (total_out == buf_start) {
597	ret = -1;
598	break;
599	}
600
601	if (total_out <= start_byte) {
602	goto next;
603	}
604
605	if (total_out > start_byte && buf_start < start_byte) {
606	buf_offset = start_byte - buf_start;
607	} else {
608	buf_offset = 0;
609	}
610
611	bytes = min(PAGE_CACHE_SIZE - pg_offset,
612	PAGE_CACHE_SIZE - buf_offset);
613	bytes = min(bytes, bytes_left);
614
615	kaddr = kmap_atomic(dest_page, KM_USER0);
616	memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
617	kunmap_atomic(kaddr, KM_USER0);
618
619	pg_offset += bytes;
620	bytes_left -= bytes;
621	next:
622	workspace->inf_strm.next_out = workspace->buf;
623	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
624	}
625	if (ret != Z_STREAM_END && bytes_left != 0) {
626	ret = -1;
627	} else {
628	ret = 0;
629	}
630	zlib_inflateEnd(&workspace->inf_strm);
631	out:
632	free_workspace(workspace);
633	return ret;
634	}
635
636	void btrfs_zlib_exit(void)
637	{
638	free_workspaces();
639	}