2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
29 #include "radeon_drm.h"
31 #include "radeon_reg.h"
34 * Common GART table functions.
36 int radeon_gart_table_ram_alloc(struct radeon_device
*rdev
)
40 ptr
= pci_alloc_consistent(rdev
->pdev
, rdev
->gart
.table_size
,
41 &rdev
->gart
.table_addr
);
46 if (rdev
->family
== CHIP_RS400
|| rdev
->family
== CHIP_RS480
||
47 rdev
->family
== CHIP_RS690
|| rdev
->family
== CHIP_RS740
) {
48 set_memory_uc((unsigned long)ptr
,
49 rdev
->gart
.table_size
>> PAGE_SHIFT
);
53 memset((void *)rdev
->gart
.ptr
, 0, rdev
->gart
.table_size
);
57 void radeon_gart_table_ram_free(struct radeon_device
*rdev
)
59 if (rdev
->gart
.ptr
== NULL
) {
63 if (rdev
->family
== CHIP_RS400
|| rdev
->family
== CHIP_RS480
||
64 rdev
->family
== CHIP_RS690
|| rdev
->family
== CHIP_RS740
) {
65 set_memory_wb((unsigned long)rdev
->gart
.ptr
,
66 rdev
->gart
.table_size
>> PAGE_SHIFT
);
69 pci_free_consistent(rdev
->pdev
, rdev
->gart
.table_size
,
70 (void *)rdev
->gart
.ptr
,
71 rdev
->gart
.table_addr
);
72 rdev
->gart
.ptr
= NULL
;
73 rdev
->gart
.table_addr
= 0;
76 int radeon_gart_table_vram_alloc(struct radeon_device
*rdev
)
80 if (rdev
->gart
.robj
== NULL
) {
81 r
= radeon_bo_create(rdev
, rdev
->gart
.table_size
,
82 PAGE_SIZE
, true, RADEON_GEM_DOMAIN_VRAM
,
83 NULL
, &rdev
->gart
.robj
);
91 int radeon_gart_table_vram_pin(struct radeon_device
*rdev
)
96 r
= radeon_bo_reserve(rdev
->gart
.robj
, false);
99 r
= radeon_bo_pin(rdev
->gart
.robj
,
100 RADEON_GEM_DOMAIN_VRAM
, &gpu_addr
);
102 radeon_bo_unreserve(rdev
->gart
.robj
);
105 r
= radeon_bo_kmap(rdev
->gart
.robj
, &rdev
->gart
.ptr
);
107 radeon_bo_unpin(rdev
->gart
.robj
);
108 radeon_bo_unreserve(rdev
->gart
.robj
);
109 rdev
->gart
.table_addr
= gpu_addr
;
113 void radeon_gart_table_vram_unpin(struct radeon_device
*rdev
)
117 if (rdev
->gart
.robj
== NULL
) {
120 r
= radeon_bo_reserve(rdev
->gart
.robj
, false);
121 if (likely(r
== 0)) {
122 radeon_bo_kunmap(rdev
->gart
.robj
);
123 radeon_bo_unpin(rdev
->gart
.robj
);
124 radeon_bo_unreserve(rdev
->gart
.robj
);
125 rdev
->gart
.ptr
= NULL
;
129 void radeon_gart_table_vram_free(struct radeon_device
*rdev
)
131 if (rdev
->gart
.robj
== NULL
) {
134 radeon_gart_table_vram_unpin(rdev
);
135 radeon_bo_unref(&rdev
->gart
.robj
);
142 * Common gart functions.
144 void radeon_gart_unbind(struct radeon_device
*rdev
, unsigned offset
,
152 if (!rdev
->gart
.ready
) {
153 WARN(1, "trying to unbind memory from uninitialized GART !\n");
156 t
= offset
/ RADEON_GPU_PAGE_SIZE
;
157 p
= t
/ (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
);
158 for (i
= 0; i
< pages
; i
++, p
++) {
159 if (rdev
->gart
.pages
[p
]) {
160 rdev
->gart
.pages
[p
] = NULL
;
161 rdev
->gart
.pages_addr
[p
] = rdev
->dummy_page
.addr
;
162 page_base
= rdev
->gart
.pages_addr
[p
];
163 for (j
= 0; j
< (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
); j
++, t
++) {
164 if (rdev
->gart
.ptr
) {
165 radeon_gart_set_page(rdev
, t
, page_base
);
167 page_base
+= RADEON_GPU_PAGE_SIZE
;
172 radeon_gart_tlb_flush(rdev
);
175 int radeon_gart_bind(struct radeon_device
*rdev
, unsigned offset
,
176 int pages
, struct page
**pagelist
, dma_addr_t
*dma_addr
)
183 if (!rdev
->gart
.ready
) {
184 WARN(1, "trying to bind memory to uninitialized GART !\n");
187 t
= offset
/ RADEON_GPU_PAGE_SIZE
;
188 p
= t
/ (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
);
190 for (i
= 0; i
< pages
; i
++, p
++) {
191 rdev
->gart
.pages_addr
[p
] = dma_addr
[i
];
192 rdev
->gart
.pages
[p
] = pagelist
[i
];
193 if (rdev
->gart
.ptr
) {
194 page_base
= rdev
->gart
.pages_addr
[p
];
195 for (j
= 0; j
< (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
); j
++, t
++) {
196 radeon_gart_set_page(rdev
, t
, page_base
);
197 page_base
+= RADEON_GPU_PAGE_SIZE
;
202 radeon_gart_tlb_flush(rdev
);
206 void radeon_gart_restore(struct radeon_device
*rdev
)
211 if (!rdev
->gart
.ptr
) {
214 for (i
= 0, t
= 0; i
< rdev
->gart
.num_cpu_pages
; i
++) {
215 page_base
= rdev
->gart
.pages_addr
[i
];
216 for (j
= 0; j
< (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
); j
++, t
++) {
217 radeon_gart_set_page(rdev
, t
, page_base
);
218 page_base
+= RADEON_GPU_PAGE_SIZE
;
222 radeon_gart_tlb_flush(rdev
);
225 int radeon_gart_init(struct radeon_device
*rdev
)
229 if (rdev
->gart
.pages
) {
232 /* We need PAGE_SIZE >= RADEON_GPU_PAGE_SIZE */
233 if (PAGE_SIZE
< RADEON_GPU_PAGE_SIZE
) {
234 DRM_ERROR("Page size is smaller than GPU page size!\n");
237 r
= radeon_dummy_page_init(rdev
);
240 /* Compute table size */
241 rdev
->gart
.num_cpu_pages
= rdev
->mc
.gtt_size
/ PAGE_SIZE
;
242 rdev
->gart
.num_gpu_pages
= rdev
->mc
.gtt_size
/ RADEON_GPU_PAGE_SIZE
;
243 DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
244 rdev
->gart
.num_cpu_pages
, rdev
->gart
.num_gpu_pages
);
245 /* Allocate pages table */
246 rdev
->gart
.pages
= kzalloc(sizeof(void *) * rdev
->gart
.num_cpu_pages
,
248 if (rdev
->gart
.pages
== NULL
) {
249 radeon_gart_fini(rdev
);
252 rdev
->gart
.pages_addr
= kzalloc(sizeof(dma_addr_t
) *
253 rdev
->gart
.num_cpu_pages
, GFP_KERNEL
);
254 if (rdev
->gart
.pages_addr
== NULL
) {
255 radeon_gart_fini(rdev
);
258 /* set GART entry to point to the dummy page by default */
259 for (i
= 0; i
< rdev
->gart
.num_cpu_pages
; i
++) {
260 rdev
->gart
.pages_addr
[i
] = rdev
->dummy_page
.addr
;
265 void radeon_gart_fini(struct radeon_device
*rdev
)
267 if (rdev
->gart
.pages
&& rdev
->gart
.pages_addr
&& rdev
->gart
.ready
) {
269 radeon_gart_unbind(rdev
, 0, rdev
->gart
.num_cpu_pages
);
271 rdev
->gart
.ready
= false;
272 kfree(rdev
->gart
.pages
);
273 kfree(rdev
->gart
.pages_addr
);
274 rdev
->gart
.pages
= NULL
;
275 rdev
->gart
.pages_addr
= NULL
;
277 radeon_dummy_page_fini(rdev
);
283 * TODO bind a default page at vm initialization for default address
285 int radeon_vm_manager_init(struct radeon_device
*rdev
)
289 rdev
->vm_manager
.enabled
= false;
291 /* mark first vm as always in use, it's the system one */
292 /* allocate enough for 2 full VM pts */
293 r
= radeon_sa_bo_manager_init(rdev
, &rdev
->vm_manager
.sa_manager
,
294 rdev
->vm_manager
.max_pfn
* 8 * 2,
295 RADEON_GEM_DOMAIN_VRAM
);
297 dev_err(rdev
->dev
, "failed to allocate vm bo (%dKB)\n",
298 (rdev
->vm_manager
.max_pfn
* 8) >> 10);
302 r
= rdev
->vm_manager
.funcs
->init(rdev
);
304 rdev
->vm_manager
.enabled
= true;
309 /* cs mutex must be lock */
310 static void radeon_vm_unbind_locked(struct radeon_device
*rdev
,
311 struct radeon_vm
*vm
)
313 struct radeon_bo_va
*bo_va
;
319 /* wait for vm use to end */
321 radeon_fence_wait(vm
->fence
, false);
322 radeon_fence_unref(&vm
->fence
);
326 rdev
->vm_manager
.funcs
->unbind(rdev
, vm
);
327 rdev
->vm_manager
.use_bitmap
&= ~(1 << vm
->id
);
328 list_del_init(&vm
->list
);
330 radeon_sa_bo_free(rdev
, &vm
->sa_bo
, NULL
);
333 list_for_each_entry(bo_va
, &vm
->va
, vm_list
) {
334 bo_va
->valid
= false;
338 void radeon_vm_manager_fini(struct radeon_device
*rdev
)
340 if (rdev
->vm_manager
.sa_manager
.bo
== NULL
)
342 radeon_vm_manager_suspend(rdev
);
343 rdev
->vm_manager
.funcs
->fini(rdev
);
344 radeon_sa_bo_manager_fini(rdev
, &rdev
->vm_manager
.sa_manager
);
345 rdev
->vm_manager
.enabled
= false;
348 int radeon_vm_manager_start(struct radeon_device
*rdev
)
350 if (rdev
->vm_manager
.sa_manager
.bo
== NULL
) {
353 return radeon_sa_bo_manager_start(rdev
, &rdev
->vm_manager
.sa_manager
);
356 int radeon_vm_manager_suspend(struct radeon_device
*rdev
)
358 struct radeon_vm
*vm
, *tmp
;
360 radeon_mutex_lock(&rdev
->cs_mutex
);
361 /* unbind all active vm */
362 list_for_each_entry_safe(vm
, tmp
, &rdev
->vm_manager
.lru_vm
, list
) {
363 radeon_vm_unbind_locked(rdev
, vm
);
365 rdev
->vm_manager
.funcs
->fini(rdev
);
366 radeon_mutex_unlock(&rdev
->cs_mutex
);
367 return radeon_sa_bo_manager_suspend(rdev
, &rdev
->vm_manager
.sa_manager
);
370 /* cs mutex must be lock */
371 void radeon_vm_unbind(struct radeon_device
*rdev
, struct radeon_vm
*vm
)
373 mutex_lock(&vm
->mutex
);
374 radeon_vm_unbind_locked(rdev
, vm
);
375 mutex_unlock(&vm
->mutex
);
378 /* cs mutex must be lock & vm mutex must be lock */
379 int radeon_vm_bind(struct radeon_device
*rdev
, struct radeon_vm
*vm
)
381 struct radeon_vm
*vm_evict
;
391 list_del_init(&vm
->list
);
392 list_add_tail(&vm
->list
, &rdev
->vm_manager
.lru_vm
);
397 r
= radeon_sa_bo_new(rdev
, &rdev
->vm_manager
.sa_manager
, &vm
->sa_bo
,
398 RADEON_GPU_PAGE_ALIGN(vm
->last_pfn
* 8),
399 RADEON_GPU_PAGE_SIZE
, false);
401 if (list_empty(&rdev
->vm_manager
.lru_vm
)) {
404 vm_evict
= list_first_entry(&rdev
->vm_manager
.lru_vm
, struct radeon_vm
, list
);
405 radeon_vm_unbind(rdev
, vm_evict
);
408 vm
->pt
= radeon_sa_bo_cpu_addr(vm
->sa_bo
);
409 vm
->pt_gpu_addr
= radeon_sa_bo_gpu_addr(vm
->sa_bo
);
410 memset(vm
->pt
, 0, RADEON_GPU_PAGE_ALIGN(vm
->last_pfn
* 8));
413 /* search for free vm */
414 for (i
= 0; i
< rdev
->vm_manager
.nvm
; i
++) {
415 if (!(rdev
->vm_manager
.use_bitmap
& (1 << i
))) {
420 /* evict vm if necessary */
422 vm_evict
= list_first_entry(&rdev
->vm_manager
.lru_vm
, struct radeon_vm
, list
);
423 radeon_vm_unbind(rdev
, vm_evict
);
428 r
= rdev
->vm_manager
.funcs
->bind(rdev
, vm
, id
);
430 radeon_sa_bo_free(rdev
, &vm
->sa_bo
, NULL
);
433 rdev
->vm_manager
.use_bitmap
|= 1 << id
;
435 list_add_tail(&vm
->list
, &rdev
->vm_manager
.lru_vm
);
436 return radeon_vm_bo_update_pte(rdev
, vm
, rdev
->ring_tmp_bo
.bo
,
437 &rdev
->ring_tmp_bo
.bo
->tbo
.mem
);
440 /* object have to be reserved */
441 int radeon_vm_bo_add(struct radeon_device
*rdev
,
442 struct radeon_vm
*vm
,
443 struct radeon_bo
*bo
,
447 struct radeon_bo_va
*bo_va
, *tmp
;
448 struct list_head
*head
;
449 uint64_t size
= radeon_bo_size(bo
), last_offset
= 0;
452 bo_va
= kzalloc(sizeof(struct radeon_bo_va
), GFP_KERNEL
);
458 bo_va
->soffset
= offset
;
459 bo_va
->eoffset
= offset
+ size
;
460 bo_va
->flags
= flags
;
461 bo_va
->valid
= false;
462 INIT_LIST_HEAD(&bo_va
->bo_list
);
463 INIT_LIST_HEAD(&bo_va
->vm_list
);
464 /* make sure object fit at this offset */
465 if (bo_va
->soffset
>= bo_va
->eoffset
) {
470 last_pfn
= bo_va
->eoffset
/ RADEON_GPU_PAGE_SIZE
;
471 if (last_pfn
> rdev
->vm_manager
.max_pfn
) {
473 dev_err(rdev
->dev
, "va above limit (0x%08X > 0x%08X)\n",
474 last_pfn
, rdev
->vm_manager
.max_pfn
);
478 mutex_lock(&vm
->mutex
);
479 if (last_pfn
> vm
->last_pfn
) {
480 /* release mutex and lock in right order */
481 mutex_unlock(&vm
->mutex
);
482 radeon_mutex_lock(&rdev
->cs_mutex
);
483 mutex_lock(&vm
->mutex
);
484 /* and check again */
485 if (last_pfn
> vm
->last_pfn
) {
486 /* grow va space 32M by 32M */
487 unsigned align
= ((32 << 20) >> 12) - 1;
488 radeon_vm_unbind_locked(rdev
, vm
);
489 vm
->last_pfn
= (last_pfn
+ align
) & ~align
;
491 radeon_mutex_unlock(&rdev
->cs_mutex
);
495 list_for_each_entry(tmp
, &vm
->va
, vm_list
) {
496 if (bo_va
->soffset
>= last_offset
&& bo_va
->eoffset
< tmp
->soffset
) {
497 /* bo can be added before this one */
500 if (bo_va
->soffset
>= tmp
->soffset
&& bo_va
->soffset
< tmp
->eoffset
) {
501 /* bo and tmp overlap, invalid offset */
502 dev_err(rdev
->dev
, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
503 bo
, (unsigned)bo_va
->soffset
, tmp
->bo
,
504 (unsigned)tmp
->soffset
, (unsigned)tmp
->eoffset
);
506 mutex_unlock(&vm
->mutex
);
509 last_offset
= tmp
->eoffset
;
510 head
= &tmp
->vm_list
;
512 list_add(&bo_va
->vm_list
, head
);
513 list_add_tail(&bo_va
->bo_list
, &bo
->va
);
514 mutex_unlock(&vm
->mutex
);
518 static u64
radeon_vm_get_addr(struct radeon_device
*rdev
,
519 struct ttm_mem_reg
*mem
,
524 switch (mem
->mem_type
) {
526 addr
= (mem
->start
<< PAGE_SHIFT
);
527 addr
+= pfn
* RADEON_GPU_PAGE_SIZE
;
528 addr
+= rdev
->vm_manager
.vram_base_offset
;
531 /* offset inside page table */
532 addr
= mem
->start
<< PAGE_SHIFT
;
533 addr
+= pfn
* RADEON_GPU_PAGE_SIZE
;
534 addr
= addr
>> PAGE_SHIFT
;
535 /* page table offset */
536 addr
= rdev
->gart
.pages_addr
[addr
];
537 /* in case cpu page size != gpu page size*/
538 addr
+= (pfn
* RADEON_GPU_PAGE_SIZE
) & (~PAGE_MASK
);
546 /* object have to be reserved & cs mutex took & vm mutex took */
547 int radeon_vm_bo_update_pte(struct radeon_device
*rdev
,
548 struct radeon_vm
*vm
,
549 struct radeon_bo
*bo
,
550 struct ttm_mem_reg
*mem
)
552 struct radeon_bo_va
*bo_va
;
553 unsigned ngpu_pages
, i
;
554 uint64_t addr
= 0, pfn
;
557 /* nothing to do if vm isn't bound */
561 bo_va
= radeon_bo_va(bo
, vm
);
563 dev_err(rdev
->dev
, "bo %p not in vm %p\n", bo
, vm
);
570 ngpu_pages
= radeon_bo_ngpu_pages(bo
);
571 bo_va
->flags
&= ~RADEON_VM_PAGE_VALID
;
572 bo_va
->flags
&= ~RADEON_VM_PAGE_SYSTEM
;
574 if (mem
->mem_type
!= TTM_PL_SYSTEM
) {
575 bo_va
->flags
|= RADEON_VM_PAGE_VALID
;
578 if (mem
->mem_type
== TTM_PL_TT
) {
579 bo_va
->flags
|= RADEON_VM_PAGE_SYSTEM
;
582 pfn
= bo_va
->soffset
/ RADEON_GPU_PAGE_SIZE
;
583 flags
= rdev
->vm_manager
.funcs
->page_flags(rdev
, bo_va
->vm
, bo_va
->flags
);
584 for (i
= 0, addr
= 0; i
< ngpu_pages
; i
++) {
585 if (mem
&& bo_va
->valid
) {
586 addr
= radeon_vm_get_addr(rdev
, mem
, i
);
588 rdev
->vm_manager
.funcs
->set_page(rdev
, bo_va
->vm
, i
+ pfn
, addr
, flags
);
590 rdev
->vm_manager
.funcs
->tlb_flush(rdev
, bo_va
->vm
);
594 /* object have to be reserved */
595 int radeon_vm_bo_rmv(struct radeon_device
*rdev
,
596 struct radeon_vm
*vm
,
597 struct radeon_bo
*bo
)
599 struct radeon_bo_va
*bo_va
;
601 bo_va
= radeon_bo_va(bo
, vm
);
605 radeon_mutex_lock(&rdev
->cs_mutex
);
606 mutex_lock(&vm
->mutex
);
607 radeon_vm_bo_update_pte(rdev
, vm
, bo
, NULL
);
608 radeon_mutex_unlock(&rdev
->cs_mutex
);
609 list_del(&bo_va
->vm_list
);
610 mutex_unlock(&vm
->mutex
);
611 list_del(&bo_va
->bo_list
);
617 void radeon_vm_bo_invalidate(struct radeon_device
*rdev
,
618 struct radeon_bo
*bo
)
620 struct radeon_bo_va
*bo_va
;
622 BUG_ON(!atomic_read(&bo
->tbo
.reserved
));
623 list_for_each_entry(bo_va
, &bo
->va
, bo_list
) {
624 bo_va
->valid
= false;
628 int radeon_vm_init(struct radeon_device
*rdev
, struct radeon_vm
*vm
)
634 mutex_init(&vm
->mutex
);
635 INIT_LIST_HEAD(&vm
->list
);
636 INIT_LIST_HEAD(&vm
->va
);
637 /* SI requires equal sized PTs for all VMs, so always set
638 * last_pfn to max_pfn. cayman allows variable sized
639 * pts so we can grow then as needed. Once we switch
640 * to two level pts we can unify this again.
642 if (rdev
->family
>= CHIP_TAHITI
)
643 vm
->last_pfn
= rdev
->vm_manager
.max_pfn
;
646 /* map the ib pool buffer at 0 in virtual address space, set
649 r
= radeon_vm_bo_add(rdev
, vm
, rdev
->ring_tmp_bo
.bo
, 0,
650 RADEON_VM_PAGE_READABLE
| RADEON_VM_PAGE_SNOOPED
);
654 void radeon_vm_fini(struct radeon_device
*rdev
, struct radeon_vm
*vm
)
656 struct radeon_bo_va
*bo_va
, *tmp
;
659 radeon_mutex_lock(&rdev
->cs_mutex
);
660 mutex_lock(&vm
->mutex
);
661 radeon_vm_unbind_locked(rdev
, vm
);
662 radeon_mutex_unlock(&rdev
->cs_mutex
);
665 r
= radeon_bo_reserve(rdev
->ring_tmp_bo
.bo
, false);
667 bo_va
= radeon_bo_va(rdev
->ring_tmp_bo
.bo
, vm
);
668 list_del_init(&bo_va
->bo_list
);
669 list_del_init(&bo_va
->vm_list
);
670 radeon_bo_unreserve(rdev
->ring_tmp_bo
.bo
);
673 if (!list_empty(&vm
->va
)) {
674 dev_err(rdev
->dev
, "still active bo inside vm\n");
676 list_for_each_entry_safe(bo_va
, tmp
, &vm
->va
, vm_list
) {
677 list_del_init(&bo_va
->vm_list
);
678 r
= radeon_bo_reserve(bo_va
->bo
, false);
680 list_del_init(&bo_va
->bo_list
);
681 radeon_bo_unreserve(bo_va
->bo
);
685 mutex_unlock(&vm
->mutex
);
This page took 0.046605 seconds and 5 git commands to generate.