Commit | Line | Data |
---|---|---|
eb8ffbfe RD |
1 | /* |
2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. | |
3 | * Copyright (c) 2005 Cisco Systems. All rights reserved. | |
2a1d9b7f | 4 | * Copyright (c) 2005 Mellanox Technologies. All rights reserved. |
eb8ffbfe RD |
5 | * |
6 | * This software is available to you under a choice of one of two | |
7 | * licenses. You may choose to be licensed under the terms of the GNU | |
8 | * General Public License (GPL) Version 2, available from the file | |
9 | * COPYING in the main directory of this source tree, or the | |
10 | * OpenIB.org BSD license below: | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or | |
13 | * without modification, are permitted provided that the following | |
14 | * conditions are met: | |
15 | * | |
16 | * - Redistributions of source code must retain the above | |
17 | * copyright notice, this list of conditions and the following | |
18 | * disclaimer. | |
19 | * | |
20 | * - Redistributions in binary form must reproduce the above | |
21 | * copyright notice, this list of conditions and the following | |
22 | * disclaimer in the documentation and/or other materials | |
23 | * provided with the distribution. | |
24 | * | |
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
32 | * SOFTWARE. | |
33 | * | |
34 | * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $ | |
35 | */ | |
36 | ||
37 | #include <linux/mm.h> | |
38 | #include <linux/dma-mapping.h> | |
39 | ||
40 | #include "uverbs.h" | |
41 | ||
42 | struct ib_umem_account_work { | |
43 | struct work_struct work; | |
44 | struct mm_struct *mm; | |
45 | unsigned long diff; | |
46 | }; | |
47 | ||
48 | ||
49 | static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) | |
50 | { | |
51 | struct ib_umem_chunk *chunk, *tmp; | |
52 | int i; | |
53 | ||
54 | list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { | |
55 | dma_unmap_sg(dev->dma_device, chunk->page_list, | |
56 | chunk->nents, DMA_BIDIRECTIONAL); | |
57 | for (i = 0; i < chunk->nents; ++i) { | |
58 | if (umem->writable && dirty) | |
59 | set_page_dirty_lock(chunk->page_list[i].page); | |
60 | put_page(chunk->page_list[i].page); | |
61 | } | |
62 | ||
63 | kfree(chunk); | |
64 | } | |
65 | } | |
66 | ||
67 | int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, | |
68 | void *addr, size_t size, int write) | |
69 | { | |
70 | struct page **page_list; | |
71 | struct ib_umem_chunk *chunk; | |
72 | unsigned long locked; | |
73 | unsigned long lock_limit; | |
74 | unsigned long cur_base; | |
75 | unsigned long npages; | |
76 | int ret = 0; | |
77 | int off; | |
78 | int i; | |
79 | ||
80 | if (!can_do_mlock()) | |
81 | return -EPERM; | |
82 | ||
83 | page_list = (struct page **) __get_free_page(GFP_KERNEL); | |
84 | if (!page_list) | |
85 | return -ENOMEM; | |
86 | ||
87 | mem->user_base = (unsigned long) addr; | |
88 | mem->length = size; | |
89 | mem->offset = (unsigned long) addr & ~PAGE_MASK; | |
90 | mem->page_size = PAGE_SIZE; | |
91 | mem->writable = write; | |
92 | ||
93 | INIT_LIST_HEAD(&mem->chunk_list); | |
94 | ||
95 | npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; | |
96 | ||
97 | down_write(¤t->mm->mmap_sem); | |
98 | ||
99 | locked = npages + current->mm->locked_vm; | |
100 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | |
101 | ||
102 | if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { | |
103 | ret = -ENOMEM; | |
104 | goto out; | |
105 | } | |
106 | ||
107 | cur_base = (unsigned long) addr & PAGE_MASK; | |
108 | ||
109 | while (npages) { | |
110 | ret = get_user_pages(current, current->mm, cur_base, | |
111 | min_t(int, npages, | |
112 | PAGE_SIZE / sizeof (struct page *)), | |
113 | 1, !write, page_list, NULL); | |
114 | ||
115 | if (ret < 0) | |
116 | goto out; | |
117 | ||
118 | cur_base += ret * PAGE_SIZE; | |
119 | npages -= ret; | |
120 | ||
121 | off = 0; | |
122 | ||
123 | while (ret) { | |
124 | chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) * | |
125 | min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK), | |
126 | GFP_KERNEL); | |
127 | if (!chunk) { | |
128 | ret = -ENOMEM; | |
129 | goto out; | |
130 | } | |
131 | ||
132 | chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); | |
133 | for (i = 0; i < chunk->nents; ++i) { | |
134 | chunk->page_list[i].page = page_list[i + off]; | |
135 | chunk->page_list[i].offset = 0; | |
136 | chunk->page_list[i].length = PAGE_SIZE; | |
137 | } | |
138 | ||
139 | chunk->nmap = dma_map_sg(dev->dma_device, | |
140 | &chunk->page_list[0], | |
141 | chunk->nents, | |
142 | DMA_BIDIRECTIONAL); | |
143 | if (chunk->nmap <= 0) { | |
144 | for (i = 0; i < chunk->nents; ++i) | |
145 | put_page(chunk->page_list[i].page); | |
146 | kfree(chunk); | |
147 | ||
148 | ret = -ENOMEM; | |
149 | goto out; | |
150 | } | |
151 | ||
152 | ret -= chunk->nents; | |
153 | off += chunk->nents; | |
154 | list_add_tail(&chunk->list, &mem->chunk_list); | |
155 | } | |
156 | ||
157 | ret = 0; | |
158 | } | |
159 | ||
160 | out: | |
161 | if (ret < 0) | |
162 | __ib_umem_release(dev, mem, 0); | |
163 | else | |
164 | current->mm->locked_vm = locked; | |
165 | ||
166 | up_write(¤t->mm->mmap_sem); | |
167 | free_page((unsigned long) page_list); | |
168 | ||
169 | return ret; | |
170 | } | |
171 | ||
172 | void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) | |
173 | { | |
174 | __ib_umem_release(dev, umem, 1); | |
175 | ||
176 | down_write(¤t->mm->mmap_sem); | |
177 | current->mm->locked_vm -= | |
178 | PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; | |
179 | up_write(¤t->mm->mmap_sem); | |
180 | } | |
181 | ||
182 | static void ib_umem_account(void *work_ptr) | |
183 | { | |
184 | struct ib_umem_account_work *work = work_ptr; | |
185 | ||
186 | down_write(&work->mm->mmap_sem); | |
187 | work->mm->locked_vm -= work->diff; | |
188 | up_write(&work->mm->mmap_sem); | |
189 | mmput(work->mm); | |
190 | kfree(work); | |
191 | } | |
192 | ||
193 | void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) | |
194 | { | |
195 | struct ib_umem_account_work *work; | |
196 | struct mm_struct *mm; | |
197 | ||
198 | __ib_umem_release(dev, umem, 1); | |
199 | ||
200 | mm = get_task_mm(current); | |
201 | if (!mm) | |
202 | return; | |
203 | ||
204 | /* | |
205 | * We may be called with the mm's mmap_sem already held. This | |
206 | * can happen when a userspace munmap() is the call that drops | |
207 | * the last reference to our file and calls our release | |
208 | * method. If there are memory regions to destroy, we'll end | |
209 | * up here and not be able to take the mmap_sem. Therefore we | |
210 | * defer the vm_locked accounting to the system workqueue. | |
211 | */ | |
212 | ||
213 | work = kmalloc(sizeof *work, GFP_KERNEL); | |
214 | if (!work) | |
215 | return; | |
216 | ||
217 | INIT_WORK(&work->work, ib_umem_account, work); | |
218 | work->mm = mm; | |
219 | work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; | |
220 | ||
221 | schedule_work(&work->work); | |
222 | } |