2 * Copyright 2009 Jerome Glisse.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
28 * Jerome Glisse <glisse@freedesktop.org>
29 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
32 #include <linux/list.h>
33 #include <linux/slab.h>
35 #include <drm/amdgpu_drm.h>
37 #include "amdgpu_trace.h"
40 int amdgpu_ttm_init(struct amdgpu_device
*adev
);
41 void amdgpu_ttm_fini(struct amdgpu_device
*adev
);
43 static u64
amdgpu_get_vis_part_size(struct amdgpu_device
*adev
,
44 struct ttm_mem_reg
*mem
)
47 if (mem
->start
<< PAGE_SHIFT
< adev
->mc
.visible_vram_size
) {
48 ret
= (u64
)((mem
->start
<< PAGE_SHIFT
) + mem
->size
) >
49 adev
->mc
.visible_vram_size
?
50 adev
->mc
.visible_vram_size
- (mem
->start
<< PAGE_SHIFT
) :
56 static void amdgpu_update_memory_usage(struct amdgpu_device
*adev
,
57 struct ttm_mem_reg
*old_mem
,
58 struct ttm_mem_reg
*new_mem
)
65 switch (new_mem
->mem_type
) {
67 atomic64_add(new_mem
->size
, &adev
->gtt_usage
);
70 atomic64_add(new_mem
->size
, &adev
->vram_usage
);
71 vis_size
= amdgpu_get_vis_part_size(adev
, new_mem
);
72 atomic64_add(vis_size
, &adev
->vram_vis_usage
);
78 switch (old_mem
->mem_type
) {
80 atomic64_sub(old_mem
->size
, &adev
->gtt_usage
);
83 atomic64_sub(old_mem
->size
, &adev
->vram_usage
);
84 vis_size
= amdgpu_get_vis_part_size(adev
, old_mem
);
85 atomic64_sub(vis_size
, &adev
->vram_vis_usage
);
91 static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object
*tbo
)
95 bo
= container_of(tbo
, struct amdgpu_bo
, tbo
);
97 amdgpu_update_memory_usage(bo
->adev
, &bo
->tbo
.mem
, NULL
);
99 mutex_lock(&bo
->adev
->gem
.mutex
);
100 list_del_init(&bo
->list
);
101 mutex_unlock(&bo
->adev
->gem
.mutex
);
102 drm_gem_object_release(&bo
->gem_base
);
107 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object
*bo
)
109 if (bo
->destroy
== &amdgpu_ttm_bo_destroy
)
114 static void amdgpu_ttm_placement_init(struct amdgpu_device
*adev
,
115 struct ttm_placement
*placement
,
116 struct ttm_place
*placements
,
117 u32 domain
, u64 flags
)
121 placement
->placement
= placements
;
122 placement
->busy_placement
= placements
;
124 if (domain
& AMDGPU_GEM_DOMAIN_VRAM
) {
125 if (flags
& AMDGPU_GEM_CREATE_NO_CPU_ACCESS
&&
126 adev
->mc
.visible_vram_size
< adev
->mc
.real_vram_size
) {
128 adev
->mc
.visible_vram_size
>> PAGE_SHIFT
;
129 placements
[c
++].flags
= TTM_PL_FLAG_WC
| TTM_PL_FLAG_UNCACHED
|
132 placements
[c
].fpfn
= 0;
133 placements
[c
++].flags
= TTM_PL_FLAG_WC
| TTM_PL_FLAG_UNCACHED
|
137 if (domain
& AMDGPU_GEM_DOMAIN_GTT
) {
138 if (flags
& AMDGPU_GEM_CREATE_CPU_GTT_USWC
) {
139 placements
[c
].fpfn
= 0;
140 placements
[c
++].flags
= TTM_PL_FLAG_WC
| TTM_PL_FLAG_TT
|
141 TTM_PL_FLAG_UNCACHED
;
143 placements
[c
].fpfn
= 0;
144 placements
[c
++].flags
= TTM_PL_FLAG_CACHED
| TTM_PL_FLAG_TT
;
148 if (domain
& AMDGPU_GEM_DOMAIN_CPU
) {
149 if (flags
& AMDGPU_GEM_CREATE_CPU_GTT_USWC
) {
150 placements
[c
].fpfn
= 0;
151 placements
[c
++].flags
= TTM_PL_FLAG_WC
| TTM_PL_FLAG_SYSTEM
|
152 TTM_PL_FLAG_UNCACHED
;
154 placements
[c
].fpfn
= 0;
155 placements
[c
++].flags
= TTM_PL_FLAG_CACHED
| TTM_PL_FLAG_SYSTEM
;
159 if (domain
& AMDGPU_GEM_DOMAIN_GDS
) {
160 placements
[c
].fpfn
= 0;
161 placements
[c
++].flags
= TTM_PL_FLAG_UNCACHED
|
164 if (domain
& AMDGPU_GEM_DOMAIN_GWS
) {
165 placements
[c
].fpfn
= 0;
166 placements
[c
++].flags
= TTM_PL_FLAG_UNCACHED
|
169 if (domain
& AMDGPU_GEM_DOMAIN_OA
) {
170 placements
[c
].fpfn
= 0;
171 placements
[c
++].flags
= TTM_PL_FLAG_UNCACHED
|
176 placements
[c
].fpfn
= 0;
177 placements
[c
++].flags
= TTM_PL_MASK_CACHING
|
180 placement
->num_placement
= c
;
181 placement
->num_busy_placement
= c
;
183 for (i
= 0; i
< c
; i
++) {
184 if ((flags
& AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
) &&
185 (placements
[i
].flags
& TTM_PL_FLAG_VRAM
) &&
188 adev
->mc
.visible_vram_size
>> PAGE_SHIFT
;
190 placements
[i
].lpfn
= 0;
194 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo
*rbo
, u32 domain
)
196 amdgpu_ttm_placement_init(rbo
->adev
, &rbo
->placement
,
197 rbo
->placements
, domain
, rbo
->flags
);
200 static void amdgpu_fill_placement_to_bo(struct amdgpu_bo
*bo
,
201 struct ttm_placement
*placement
)
203 BUG_ON(placement
->num_placement
> (AMDGPU_GEM_DOMAIN_MAX
+ 1));
205 memcpy(bo
->placements
, placement
->placement
,
206 placement
->num_placement
* sizeof(struct ttm_place
));
207 bo
->placement
.num_placement
= placement
->num_placement
;
208 bo
->placement
.num_busy_placement
= placement
->num_busy_placement
;
209 bo
->placement
.placement
= bo
->placements
;
210 bo
->placement
.busy_placement
= bo
->placements
;
213 int amdgpu_bo_create_restricted(struct amdgpu_device
*adev
,
214 unsigned long size
, int byte_align
,
215 bool kernel
, u32 domain
, u64 flags
,
217 struct ttm_placement
*placement
,
218 struct amdgpu_bo
**bo_ptr
)
220 struct amdgpu_bo
*bo
;
221 enum ttm_bo_type type
;
222 unsigned long page_align
;
226 /* VI has a hw bug where VM PTEs have to be allocated in groups of 8.
227 * do this as a temporary workaround
229 if (!(domain
& (AMDGPU_GEM_DOMAIN_GDS
| AMDGPU_GEM_DOMAIN_GWS
| AMDGPU_GEM_DOMAIN_OA
))) {
230 if (adev
->asic_type
>= CHIP_TOPAZ
) {
231 if (byte_align
& 0x7fff)
232 byte_align
= ALIGN(byte_align
, 0x8000);
234 size
= ALIGN(size
, 0x8000);
238 page_align
= roundup(byte_align
, PAGE_SIZE
) >> PAGE_SHIFT
;
239 size
= ALIGN(size
, PAGE_SIZE
);
242 type
= ttm_bo_type_kernel
;
244 type
= ttm_bo_type_sg
;
246 type
= ttm_bo_type_device
;
250 acc_size
= ttm_bo_dma_acc_size(&adev
->mman
.bdev
, size
,
251 sizeof(struct amdgpu_bo
));
253 bo
= kzalloc(sizeof(struct amdgpu_bo
), GFP_KERNEL
);
256 r
= drm_gem_object_init(adev
->ddev
, &bo
->gem_base
, size
);
262 INIT_LIST_HEAD(&bo
->list
);
263 INIT_LIST_HEAD(&bo
->va
);
264 bo
->initial_domain
= domain
& (AMDGPU_GEM_DOMAIN_VRAM
|
265 AMDGPU_GEM_DOMAIN_GTT
|
266 AMDGPU_GEM_DOMAIN_CPU
|
267 AMDGPU_GEM_DOMAIN_GDS
|
268 AMDGPU_GEM_DOMAIN_GWS
|
269 AMDGPU_GEM_DOMAIN_OA
);
272 amdgpu_fill_placement_to_bo(bo
, placement
);
273 /* Kernel allocation are uninterruptible */
274 r
= ttm_bo_init(&adev
->mman
.bdev
, &bo
->tbo
, size
, type
,
275 &bo
->placement
, page_align
, !kernel
, NULL
,
276 acc_size
, sg
, NULL
, &amdgpu_ttm_bo_destroy
);
277 if (unlikely(r
!= 0)) {
282 trace_amdgpu_bo_create(bo
);
287 int amdgpu_bo_create(struct amdgpu_device
*adev
,
288 unsigned long size
, int byte_align
,
289 bool kernel
, u32 domain
, u64 flags
,
290 struct sg_table
*sg
, struct amdgpu_bo
**bo_ptr
)
292 struct ttm_placement placement
= {0};
293 struct ttm_place placements
[AMDGPU_GEM_DOMAIN_MAX
+ 1];
295 memset(&placements
, 0,
296 (AMDGPU_GEM_DOMAIN_MAX
+ 1) * sizeof(struct ttm_place
));
298 amdgpu_ttm_placement_init(adev
, &placement
,
299 placements
, domain
, flags
);
301 return amdgpu_bo_create_restricted(adev
, size
, byte_align
,
302 kernel
, domain
, flags
,
308 int amdgpu_bo_kmap(struct amdgpu_bo
*bo
, void **ptr
)
313 if (bo
->flags
& AMDGPU_GEM_CREATE_NO_CPU_ACCESS
)
322 r
= ttm_bo_kmap(&bo
->tbo
, 0, bo
->tbo
.num_pages
, &bo
->kmap
);
326 bo
->kptr
= ttm_kmap_obj_virtual(&bo
->kmap
, &is_iomem
);
333 void amdgpu_bo_kunmap(struct amdgpu_bo
*bo
)
335 if (bo
->kptr
== NULL
)
338 ttm_bo_kunmap(&bo
->kmap
);
341 struct amdgpu_bo
*amdgpu_bo_ref(struct amdgpu_bo
*bo
)
346 ttm_bo_reference(&bo
->tbo
);
350 void amdgpu_bo_unref(struct amdgpu_bo
**bo
)
352 struct ttm_buffer_object
*tbo
;
363 int amdgpu_bo_pin_restricted(struct amdgpu_bo
*bo
, u32 domain
,
364 u64 min_offset
, u64 max_offset
,
370 if (amdgpu_ttm_tt_has_userptr(bo
->tbo
.ttm
))
373 if (WARN_ON_ONCE(min_offset
> max_offset
))
379 *gpu_addr
= amdgpu_bo_gpu_offset(bo
);
381 if (max_offset
!= 0) {
383 if (domain
== AMDGPU_GEM_DOMAIN_VRAM
)
384 domain_start
= bo
->adev
->mc
.vram_start
;
386 domain_start
= bo
->adev
->mc
.gtt_start
;
387 WARN_ON_ONCE(max_offset
<
388 (amdgpu_bo_gpu_offset(bo
) - domain_start
));
393 amdgpu_ttm_placement_from_domain(bo
, domain
);
394 for (i
= 0; i
< bo
->placement
.num_placement
; i
++) {
395 /* force to pin into visible video ram */
396 if ((bo
->placements
[i
].flags
& TTM_PL_FLAG_VRAM
) &&
397 !(bo
->flags
& AMDGPU_GEM_CREATE_NO_CPU_ACCESS
) &&
398 (!max_offset
|| max_offset
> bo
->adev
->mc
.visible_vram_size
)) {
399 if (WARN_ON_ONCE(min_offset
>
400 bo
->adev
->mc
.visible_vram_size
))
402 fpfn
= min_offset
>> PAGE_SHIFT
;
403 lpfn
= bo
->adev
->mc
.visible_vram_size
>> PAGE_SHIFT
;
405 fpfn
= min_offset
>> PAGE_SHIFT
;
406 lpfn
= max_offset
>> PAGE_SHIFT
;
408 if (fpfn
> bo
->placements
[i
].fpfn
)
409 bo
->placements
[i
].fpfn
= fpfn
;
410 if (lpfn
&& lpfn
< bo
->placements
[i
].lpfn
)
411 bo
->placements
[i
].lpfn
= lpfn
;
412 bo
->placements
[i
].flags
|= TTM_PL_FLAG_NO_EVICT
;
415 r
= ttm_bo_validate(&bo
->tbo
, &bo
->placement
, false, false);
416 if (likely(r
== 0)) {
418 if (gpu_addr
!= NULL
)
419 *gpu_addr
= amdgpu_bo_gpu_offset(bo
);
420 if (domain
== AMDGPU_GEM_DOMAIN_VRAM
)
421 bo
->adev
->vram_pin_size
+= amdgpu_bo_size(bo
);
423 bo
->adev
->gart_pin_size
+= amdgpu_bo_size(bo
);
425 dev_err(bo
->adev
->dev
, "%p pin failed\n", bo
);
430 int amdgpu_bo_pin(struct amdgpu_bo
*bo
, u32 domain
, u64
*gpu_addr
)
432 return amdgpu_bo_pin_restricted(bo
, domain
, 0, 0, gpu_addr
);
435 int amdgpu_bo_unpin(struct amdgpu_bo
*bo
)
439 if (!bo
->pin_count
) {
440 dev_warn(bo
->adev
->dev
, "%p unpin not necessary\n", bo
);
446 for (i
= 0; i
< bo
->placement
.num_placement
; i
++) {
447 bo
->placements
[i
].lpfn
= 0;
448 bo
->placements
[i
].flags
&= ~TTM_PL_FLAG_NO_EVICT
;
450 r
= ttm_bo_validate(&bo
->tbo
, &bo
->placement
, false, false);
451 if (likely(r
== 0)) {
452 if (bo
->tbo
.mem
.mem_type
== TTM_PL_VRAM
)
453 bo
->adev
->vram_pin_size
-= amdgpu_bo_size(bo
);
455 bo
->adev
->gart_pin_size
-= amdgpu_bo_size(bo
);
457 dev_err(bo
->adev
->dev
, "%p validate failed for unpin\n", bo
);
462 int amdgpu_bo_evict_vram(struct amdgpu_device
*adev
)
464 /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
465 if (0 && (adev
->flags
& AMDGPU_IS_APU
)) {
466 /* Useless to evict on IGP chips */
469 return ttm_bo_evict_mm(&adev
->mman
.bdev
, TTM_PL_VRAM
);
472 void amdgpu_bo_force_delete(struct amdgpu_device
*adev
)
474 struct amdgpu_bo
*bo
, *n
;
476 if (list_empty(&adev
->gem
.objects
)) {
479 dev_err(adev
->dev
, "Userspace still has active objects !\n");
480 list_for_each_entry_safe(bo
, n
, &adev
->gem
.objects
, list
) {
481 mutex_lock(&adev
->ddev
->struct_mutex
);
482 dev_err(adev
->dev
, "%p %p %lu %lu force free\n",
483 &bo
->gem_base
, bo
, (unsigned long)bo
->gem_base
.size
,
484 *((unsigned long *)&bo
->gem_base
.refcount
));
485 mutex_lock(&bo
->adev
->gem
.mutex
);
486 list_del_init(&bo
->list
);
487 mutex_unlock(&bo
->adev
->gem
.mutex
);
488 /* this should unref the ttm bo */
489 drm_gem_object_unreference(&bo
->gem_base
);
490 mutex_unlock(&adev
->ddev
->struct_mutex
);
494 int amdgpu_bo_init(struct amdgpu_device
*adev
)
496 /* Add an MTRR for the VRAM */
497 adev
->mc
.vram_mtrr
= arch_phys_wc_add(adev
->mc
.aper_base
,
499 DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
500 adev
->mc
.mc_vram_size
>> 20,
501 (unsigned long long)adev
->mc
.aper_size
>> 20);
502 DRM_INFO("RAM width %dbits DDR\n",
503 adev
->mc
.vram_width
);
504 return amdgpu_ttm_init(adev
);
507 void amdgpu_bo_fini(struct amdgpu_device
*adev
)
509 amdgpu_ttm_fini(adev
);
510 arch_phys_wc_del(adev
->mc
.vram_mtrr
);
513 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo
*bo
,
514 struct vm_area_struct
*vma
)
516 return ttm_fbdev_mmap(vma
, &bo
->tbo
);
519 int amdgpu_bo_set_tiling_flags(struct amdgpu_bo
*bo
, u64 tiling_flags
)
521 if (AMDGPU_TILING_GET(tiling_flags
, TILE_SPLIT
) > 6)
524 bo
->tiling_flags
= tiling_flags
;
528 void amdgpu_bo_get_tiling_flags(struct amdgpu_bo
*bo
, u64
*tiling_flags
)
530 lockdep_assert_held(&bo
->tbo
.resv
->lock
.base
);
533 *tiling_flags
= bo
->tiling_flags
;
536 int amdgpu_bo_set_metadata (struct amdgpu_bo
*bo
, void *metadata
,
537 uint32_t metadata_size
, uint64_t flags
)
541 if (!metadata_size
) {
542 if (bo
->metadata_size
) {
544 bo
->metadata_size
= 0;
549 if (metadata
== NULL
)
552 buffer
= kzalloc(metadata_size
, GFP_KERNEL
);
556 memcpy(buffer
, metadata
, metadata_size
);
559 bo
->metadata_flags
= flags
;
560 bo
->metadata
= buffer
;
561 bo
->metadata_size
= metadata_size
;
566 int amdgpu_bo_get_metadata(struct amdgpu_bo
*bo
, void *buffer
,
567 size_t buffer_size
, uint32_t *metadata_size
,
570 if (!buffer
&& !metadata_size
)
574 if (buffer_size
< bo
->metadata_size
)
577 if (bo
->metadata_size
)
578 memcpy(buffer
, bo
->metadata
, bo
->metadata_size
);
582 *metadata_size
= bo
->metadata_size
;
584 *flags
= bo
->metadata_flags
;
589 void amdgpu_bo_move_notify(struct ttm_buffer_object
*bo
,
590 struct ttm_mem_reg
*new_mem
)
592 struct amdgpu_bo
*rbo
;
594 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo
))
597 rbo
= container_of(bo
, struct amdgpu_bo
, tbo
);
598 amdgpu_vm_bo_invalidate(rbo
->adev
, rbo
);
600 /* update statistics */
604 /* move_notify is called before move happens */
605 amdgpu_update_memory_usage(rbo
->adev
, &bo
->mem
, new_mem
);
608 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object
*bo
)
610 struct amdgpu_device
*adev
;
611 struct amdgpu_bo
*abo
;
612 unsigned long offset
, size
, lpfn
;
615 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo
))
618 abo
= container_of(bo
, struct amdgpu_bo
, tbo
);
620 if (bo
->mem
.mem_type
!= TTM_PL_VRAM
)
623 size
= bo
->mem
.num_pages
<< PAGE_SHIFT
;
624 offset
= bo
->mem
.start
<< PAGE_SHIFT
;
625 if ((offset
+ size
) <= adev
->mc
.visible_vram_size
)
628 /* hurrah the memory is not visible ! */
629 amdgpu_ttm_placement_from_domain(abo
, AMDGPU_GEM_DOMAIN_VRAM
);
630 lpfn
= adev
->mc
.visible_vram_size
>> PAGE_SHIFT
;
631 for (i
= 0; i
< abo
->placement
.num_placement
; i
++) {
632 /* Force into visible VRAM */
633 if ((abo
->placements
[i
].flags
& TTM_PL_FLAG_VRAM
) &&
634 (!abo
->placements
[i
].lpfn
|| abo
->placements
[i
].lpfn
> lpfn
))
635 abo
->placements
[i
].lpfn
= lpfn
;
637 r
= ttm_bo_validate(bo
, &abo
->placement
, false, false);
638 if (unlikely(r
== -ENOMEM
)) {
639 amdgpu_ttm_placement_from_domain(abo
, AMDGPU_GEM_DOMAIN_GTT
);
640 return ttm_bo_validate(bo
, &abo
->placement
, false, false);
641 } else if (unlikely(r
!= 0)) {
645 offset
= bo
->mem
.start
<< PAGE_SHIFT
;
646 /* this should never happen */
647 if ((offset
+ size
) > adev
->mc
.visible_vram_size
)
654 * amdgpu_bo_fence - add fence to buffer object
656 * @bo: buffer object in question
657 * @fence: fence to add
658 * @shared: true if fence should be added shared
661 void amdgpu_bo_fence(struct amdgpu_bo
*bo
, struct amdgpu_fence
*fence
,
664 struct reservation_object
*resv
= bo
->tbo
.resv
;
667 reservation_object_add_shared_fence(resv
, &fence
->base
);
669 reservation_object_add_excl_fence(resv
, &fence
->base
);