2 * Copyright 2009 Jerome Glisse.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
28 * Jerome Glisse <glisse@freedesktop.org>
29 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
32 #include <ttm/ttm_bo_api.h>
33 #include <ttm/ttm_bo_driver.h>
34 #include <ttm/ttm_placement.h>
35 #include <ttm/ttm_module.h>
36 #include <ttm/ttm_page_alloc.h>
38 #include <drm/amdgpu_drm.h>
39 #include <linux/seq_file.h>
40 #include <linux/slab.h>
41 #include <linux/swiotlb.h>
42 #include <linux/swap.h>
43 #include <linux/pagemap.h>
44 #include <linux/debugfs.h>
46 #include "bif/bif_4_1_d.h"
48 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
50 static int amdgpu_ttm_debugfs_init(struct amdgpu_device
*adev
);
51 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device
*adev
);
53 static struct amdgpu_device
*amdgpu_get_adev(struct ttm_bo_device
*bdev
)
55 struct amdgpu_mman
*mman
;
56 struct amdgpu_device
*adev
;
58 mman
= container_of(bdev
, struct amdgpu_mman
, bdev
);
59 adev
= container_of(mman
, struct amdgpu_device
, mman
);
67 static int amdgpu_ttm_mem_global_init(struct drm_global_reference
*ref
)
69 return ttm_mem_global_init(ref
->object
);
72 static void amdgpu_ttm_mem_global_release(struct drm_global_reference
*ref
)
74 ttm_mem_global_release(ref
->object
);
77 static int amdgpu_ttm_global_init(struct amdgpu_device
*adev
)
79 struct drm_global_reference
*global_ref
;
80 struct amdgpu_ring
*ring
;
81 struct amd_sched_rq
*rq
;
84 adev
->mman
.mem_global_referenced
= false;
85 global_ref
= &adev
->mman
.mem_global_ref
;
86 global_ref
->global_type
= DRM_GLOBAL_TTM_MEM
;
87 global_ref
->size
= sizeof(struct ttm_mem_global
);
88 global_ref
->init
= &amdgpu_ttm_mem_global_init
;
89 global_ref
->release
= &amdgpu_ttm_mem_global_release
;
90 r
= drm_global_item_ref(global_ref
);
92 DRM_ERROR("Failed setting up TTM memory accounting "
97 adev
->mman
.bo_global_ref
.mem_glob
=
98 adev
->mman
.mem_global_ref
.object
;
99 global_ref
= &adev
->mman
.bo_global_ref
.ref
;
100 global_ref
->global_type
= DRM_GLOBAL_TTM_BO
;
101 global_ref
->size
= sizeof(struct ttm_bo_global
);
102 global_ref
->init
= &ttm_bo_global_init
;
103 global_ref
->release
= &ttm_bo_global_release
;
104 r
= drm_global_item_ref(global_ref
);
106 DRM_ERROR("Failed setting up TTM BO subsystem.\n");
107 drm_global_item_unref(&adev
->mman
.mem_global_ref
);
111 ring
= adev
->mman
.buffer_funcs_ring
;
112 rq
= &ring
->sched
.sched_rq
[AMD_SCHED_PRIORITY_KERNEL
];
113 r
= amd_sched_entity_init(&ring
->sched
, &adev
->mman
.entity
,
114 rq
, amdgpu_sched_jobs
);
116 DRM_ERROR("Failed setting up TTM BO move run queue.\n");
117 drm_global_item_unref(&adev
->mman
.mem_global_ref
);
118 drm_global_item_unref(&adev
->mman
.bo_global_ref
.ref
);
122 adev
->mman
.mem_global_referenced
= true;
127 static void amdgpu_ttm_global_fini(struct amdgpu_device
*adev
)
129 if (adev
->mman
.mem_global_referenced
) {
130 amd_sched_entity_fini(adev
->mman
.entity
.sched
,
132 drm_global_item_unref(&adev
->mman
.bo_global_ref
.ref
);
133 drm_global_item_unref(&adev
->mman
.mem_global_ref
);
134 adev
->mman
.mem_global_referenced
= false;
138 static int amdgpu_invalidate_caches(struct ttm_bo_device
*bdev
, uint32_t flags
)
143 static int amdgpu_init_mem_type(struct ttm_bo_device
*bdev
, uint32_t type
,
144 struct ttm_mem_type_manager
*man
)
146 struct amdgpu_device
*adev
;
148 adev
= amdgpu_get_adev(bdev
);
153 man
->flags
= TTM_MEMTYPE_FLAG_MAPPABLE
;
154 man
->available_caching
= TTM_PL_MASK_CACHING
;
155 man
->default_caching
= TTM_PL_FLAG_CACHED
;
158 man
->func
= &ttm_bo_manager_func
;
159 man
->gpu_offset
= adev
->mc
.gtt_start
;
160 man
->available_caching
= TTM_PL_MASK_CACHING
;
161 man
->default_caching
= TTM_PL_FLAG_CACHED
;
162 man
->flags
= TTM_MEMTYPE_FLAG_MAPPABLE
| TTM_MEMTYPE_FLAG_CMA
;
165 /* "On-card" video ram */
166 man
->func
= &ttm_bo_manager_func
;
167 man
->gpu_offset
= adev
->mc
.vram_start
;
168 man
->flags
= TTM_MEMTYPE_FLAG_FIXED
|
169 TTM_MEMTYPE_FLAG_MAPPABLE
;
170 man
->available_caching
= TTM_PL_FLAG_UNCACHED
| TTM_PL_FLAG_WC
;
171 man
->default_caching
= TTM_PL_FLAG_WC
;
176 /* On-chip GDS memory*/
177 man
->func
= &ttm_bo_manager_func
;
179 man
->flags
= TTM_MEMTYPE_FLAG_FIXED
| TTM_MEMTYPE_FLAG_CMA
;
180 man
->available_caching
= TTM_PL_FLAG_UNCACHED
;
181 man
->default_caching
= TTM_PL_FLAG_UNCACHED
;
184 DRM_ERROR("Unsupported memory type %u\n", (unsigned)type
);
190 static void amdgpu_evict_flags(struct ttm_buffer_object
*bo
,
191 struct ttm_placement
*placement
)
193 struct amdgpu_bo
*rbo
;
194 static struct ttm_place placements
= {
197 .flags
= TTM_PL_MASK_CACHING
| TTM_PL_FLAG_SYSTEM
200 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo
)) {
201 placement
->placement
= &placements
;
202 placement
->busy_placement
= &placements
;
203 placement
->num_placement
= 1;
204 placement
->num_busy_placement
= 1;
207 rbo
= container_of(bo
, struct amdgpu_bo
, tbo
);
208 switch (bo
->mem
.mem_type
) {
210 if (rbo
->adev
->mman
.buffer_funcs_ring
->ready
== false)
211 amdgpu_ttm_placement_from_domain(rbo
, AMDGPU_GEM_DOMAIN_CPU
);
213 amdgpu_ttm_placement_from_domain(rbo
, AMDGPU_GEM_DOMAIN_GTT
);
217 amdgpu_ttm_placement_from_domain(rbo
, AMDGPU_GEM_DOMAIN_CPU
);
219 *placement
= rbo
->placement
;
222 static int amdgpu_verify_access(struct ttm_buffer_object
*bo
, struct file
*filp
)
224 struct amdgpu_bo
*rbo
= container_of(bo
, struct amdgpu_bo
, tbo
);
226 if (amdgpu_ttm_tt_get_usermm(bo
->ttm
))
228 return drm_vma_node_verify_access(&rbo
->gem_base
.vma_node
, filp
);
231 static void amdgpu_move_null(struct ttm_buffer_object
*bo
,
232 struct ttm_mem_reg
*new_mem
)
234 struct ttm_mem_reg
*old_mem
= &bo
->mem
;
236 BUG_ON(old_mem
->mm_node
!= NULL
);
238 new_mem
->mm_node
= NULL
;
241 static int amdgpu_move_blit(struct ttm_buffer_object
*bo
,
242 bool evict
, bool no_wait_gpu
,
243 struct ttm_mem_reg
*new_mem
,
244 struct ttm_mem_reg
*old_mem
)
246 struct amdgpu_device
*adev
;
247 struct amdgpu_ring
*ring
;
248 uint64_t old_start
, new_start
;
252 adev
= amdgpu_get_adev(bo
->bdev
);
253 ring
= adev
->mman
.buffer_funcs_ring
;
254 old_start
= old_mem
->start
<< PAGE_SHIFT
;
255 new_start
= new_mem
->start
<< PAGE_SHIFT
;
257 switch (old_mem
->mem_type
) {
259 old_start
+= adev
->mc
.vram_start
;
262 old_start
+= adev
->mc
.gtt_start
;
265 DRM_ERROR("Unknown placement %d\n", old_mem
->mem_type
);
268 switch (new_mem
->mem_type
) {
270 new_start
+= adev
->mc
.vram_start
;
273 new_start
+= adev
->mc
.gtt_start
;
276 DRM_ERROR("Unknown placement %d\n", old_mem
->mem_type
);
280 DRM_ERROR("Trying to move memory with ring turned off.\n");
284 BUILD_BUG_ON((PAGE_SIZE
% AMDGPU_GPU_PAGE_SIZE
) != 0);
286 r
= amdgpu_copy_buffer(ring
, old_start
, new_start
,
287 new_mem
->num_pages
* PAGE_SIZE
, /* bytes */
289 /* FIXME: handle copy error */
290 r
= ttm_bo_move_accel_cleanup(bo
, fence
,
291 evict
, no_wait_gpu
, new_mem
);
296 static int amdgpu_move_vram_ram(struct ttm_buffer_object
*bo
,
297 bool evict
, bool interruptible
,
299 struct ttm_mem_reg
*new_mem
)
301 struct amdgpu_device
*adev
;
302 struct ttm_mem_reg
*old_mem
= &bo
->mem
;
303 struct ttm_mem_reg tmp_mem
;
304 struct ttm_place placements
;
305 struct ttm_placement placement
;
308 adev
= amdgpu_get_adev(bo
->bdev
);
310 tmp_mem
.mm_node
= NULL
;
311 placement
.num_placement
= 1;
312 placement
.placement
= &placements
;
313 placement
.num_busy_placement
= 1;
314 placement
.busy_placement
= &placements
;
317 placements
.flags
= TTM_PL_MASK_CACHING
| TTM_PL_FLAG_TT
;
318 r
= ttm_bo_mem_space(bo
, &placement
, &tmp_mem
,
319 interruptible
, no_wait_gpu
);
324 r
= ttm_tt_set_placement_caching(bo
->ttm
, tmp_mem
.placement
);
329 r
= ttm_tt_bind(bo
->ttm
, &tmp_mem
);
333 r
= amdgpu_move_blit(bo
, true, no_wait_gpu
, &tmp_mem
, old_mem
);
337 r
= ttm_bo_move_ttm(bo
, true, no_wait_gpu
, new_mem
);
339 ttm_bo_mem_put(bo
, &tmp_mem
);
343 static int amdgpu_move_ram_vram(struct ttm_buffer_object
*bo
,
344 bool evict
, bool interruptible
,
346 struct ttm_mem_reg
*new_mem
)
348 struct amdgpu_device
*adev
;
349 struct ttm_mem_reg
*old_mem
= &bo
->mem
;
350 struct ttm_mem_reg tmp_mem
;
351 struct ttm_placement placement
;
352 struct ttm_place placements
;
355 adev
= amdgpu_get_adev(bo
->bdev
);
357 tmp_mem
.mm_node
= NULL
;
358 placement
.num_placement
= 1;
359 placement
.placement
= &placements
;
360 placement
.num_busy_placement
= 1;
361 placement
.busy_placement
= &placements
;
364 placements
.flags
= TTM_PL_MASK_CACHING
| TTM_PL_FLAG_TT
;
365 r
= ttm_bo_mem_space(bo
, &placement
, &tmp_mem
,
366 interruptible
, no_wait_gpu
);
370 r
= ttm_bo_move_ttm(bo
, true, no_wait_gpu
, &tmp_mem
);
374 r
= amdgpu_move_blit(bo
, true, no_wait_gpu
, new_mem
, old_mem
);
379 ttm_bo_mem_put(bo
, &tmp_mem
);
383 static int amdgpu_bo_move(struct ttm_buffer_object
*bo
,
384 bool evict
, bool interruptible
,
386 struct ttm_mem_reg
*new_mem
)
388 struct amdgpu_device
*adev
;
389 struct amdgpu_bo
*abo
;
390 struct ttm_mem_reg
*old_mem
= &bo
->mem
;
393 /* Can't move a pinned BO */
394 abo
= container_of(bo
, struct amdgpu_bo
, tbo
);
395 if (WARN_ON_ONCE(abo
->pin_count
> 0))
398 adev
= amdgpu_get_adev(bo
->bdev
);
399 if (old_mem
->mem_type
== TTM_PL_SYSTEM
&& bo
->ttm
== NULL
) {
400 amdgpu_move_null(bo
, new_mem
);
403 if ((old_mem
->mem_type
== TTM_PL_TT
&&
404 new_mem
->mem_type
== TTM_PL_SYSTEM
) ||
405 (old_mem
->mem_type
== TTM_PL_SYSTEM
&&
406 new_mem
->mem_type
== TTM_PL_TT
)) {
408 amdgpu_move_null(bo
, new_mem
);
411 if (adev
->mman
.buffer_funcs
== NULL
||
412 adev
->mman
.buffer_funcs_ring
== NULL
||
413 !adev
->mman
.buffer_funcs_ring
->ready
) {
418 if (old_mem
->mem_type
== TTM_PL_VRAM
&&
419 new_mem
->mem_type
== TTM_PL_SYSTEM
) {
420 r
= amdgpu_move_vram_ram(bo
, evict
, interruptible
,
421 no_wait_gpu
, new_mem
);
422 } else if (old_mem
->mem_type
== TTM_PL_SYSTEM
&&
423 new_mem
->mem_type
== TTM_PL_VRAM
) {
424 r
= amdgpu_move_ram_vram(bo
, evict
, interruptible
,
425 no_wait_gpu
, new_mem
);
427 r
= amdgpu_move_blit(bo
, evict
, no_wait_gpu
, new_mem
, old_mem
);
432 r
= ttm_bo_move_memcpy(bo
, evict
, no_wait_gpu
, new_mem
);
438 /* update statistics */
439 atomic64_add((u64
)bo
->num_pages
<< PAGE_SHIFT
, &adev
->num_bytes_moved
);
443 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device
*bdev
, struct ttm_mem_reg
*mem
)
445 struct ttm_mem_type_manager
*man
= &bdev
->man
[mem
->mem_type
];
446 struct amdgpu_device
*adev
= amdgpu_get_adev(bdev
);
448 mem
->bus
.addr
= NULL
;
450 mem
->bus
.size
= mem
->num_pages
<< PAGE_SHIFT
;
452 mem
->bus
.is_iomem
= false;
453 if (!(man
->flags
& TTM_MEMTYPE_FLAG_MAPPABLE
))
455 switch (mem
->mem_type
) {
462 mem
->bus
.offset
= mem
->start
<< PAGE_SHIFT
;
463 /* check if it's visible */
464 if ((mem
->bus
.offset
+ mem
->bus
.size
) > adev
->mc
.visible_vram_size
)
466 mem
->bus
.base
= adev
->mc
.aper_base
;
467 mem
->bus
.is_iomem
= true;
470 * Alpha: use bus.addr to hold the ioremap() return,
471 * so we can modify bus.base below.
473 if (mem
->placement
& TTM_PL_FLAG_WC
)
475 ioremap_wc(mem
->bus
.base
+ mem
->bus
.offset
,
479 ioremap_nocache(mem
->bus
.base
+ mem
->bus
.offset
,
483 * Alpha: Use just the bus offset plus
484 * the hose/domain memory base for bus.base.
485 * It then can be used to build PTEs for VRAM
486 * access, as done in ttm_bo_vm_fault().
488 mem
->bus
.base
= (mem
->bus
.base
& 0x0ffffffffUL
) +
489 adev
->ddev
->hose
->dense_mem_base
;
498 static void amdgpu_ttm_io_mem_free(struct ttm_bo_device
*bdev
, struct ttm_mem_reg
*mem
)
503 * TTM backend functions.
505 struct amdgpu_ttm_gup_task_list
{
506 struct list_head list
;
507 struct task_struct
*task
;
510 struct amdgpu_ttm_tt
{
511 struct ttm_dma_tt ttm
;
512 struct amdgpu_device
*adev
;
515 struct mm_struct
*usermm
;
517 spinlock_t guptasklock
;
518 struct list_head guptasks
;
519 atomic_t mmu_invalidations
;
522 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt
*ttm
, struct page
**pages
)
524 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
525 int write
= !(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
);
529 if (gtt
->userflags
& AMDGPU_GEM_USERPTR_ANONONLY
) {
530 /* check that we only use anonymous memory
531 to prevent problems with writeback */
532 unsigned long end
= gtt
->userptr
+ ttm
->num_pages
* PAGE_SIZE
;
533 struct vm_area_struct
*vma
;
535 vma
= find_vma(gtt
->usermm
, gtt
->userptr
);
536 if (!vma
|| vma
->vm_file
|| vma
->vm_end
< end
)
541 unsigned num_pages
= ttm
->num_pages
- pinned
;
542 uint64_t userptr
= gtt
->userptr
+ pinned
* PAGE_SIZE
;
543 struct page
**p
= pages
+ pinned
;
544 struct amdgpu_ttm_gup_task_list guptask
;
546 guptask
.task
= current
;
547 spin_lock(>t
->guptasklock
);
548 list_add(&guptask
.list
, >t
->guptasks
);
549 spin_unlock(>t
->guptasklock
);
551 r
= get_user_pages(userptr
, num_pages
, write
, 0, p
, NULL
);
553 spin_lock(>t
->guptasklock
);
554 list_del(&guptask
.list
);
555 spin_unlock(>t
->guptasklock
);
562 } while (pinned
< ttm
->num_pages
);
567 release_pages(pages
, pinned
, 0);
571 /* prepare the sg table with the user pages */
572 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt
*ttm
)
574 struct amdgpu_device
*adev
= amdgpu_get_adev(ttm
->bdev
);
575 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
579 int write
= !(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
);
580 enum dma_data_direction direction
= write
?
581 DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
;
583 r
= sg_alloc_table_from_pages(ttm
->sg
, ttm
->pages
, ttm
->num_pages
, 0,
584 ttm
->num_pages
<< PAGE_SHIFT
,
590 nents
= dma_map_sg(adev
->dev
, ttm
->sg
->sgl
, ttm
->sg
->nents
, direction
);
591 if (nents
!= ttm
->sg
->nents
)
594 drm_prime_sg_to_page_addr_arrays(ttm
->sg
, ttm
->pages
,
595 gtt
->ttm
.dma_address
, ttm
->num_pages
);
604 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt
*ttm
)
606 struct amdgpu_device
*adev
= amdgpu_get_adev(ttm
->bdev
);
607 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
608 struct sg_page_iter sg_iter
;
610 int write
= !(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
);
611 enum dma_data_direction direction
= write
?
612 DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
;
614 /* double check that we don't free the table twice */
618 /* free the sg table and pages again */
619 dma_unmap_sg(adev
->dev
, ttm
->sg
->sgl
, ttm
->sg
->nents
, direction
);
621 for_each_sg_page(ttm
->sg
->sgl
, &sg_iter
, ttm
->sg
->nents
, 0) {
622 struct page
*page
= sg_page_iter_page(&sg_iter
);
623 if (!(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
))
624 set_page_dirty(page
);
626 mark_page_accessed(page
);
630 sg_free_table(ttm
->sg
);
633 static int amdgpu_ttm_backend_bind(struct ttm_tt
*ttm
,
634 struct ttm_mem_reg
*bo_mem
)
636 struct amdgpu_ttm_tt
*gtt
= (void*)ttm
;
637 uint32_t flags
= amdgpu_ttm_tt_pte_flags(gtt
->adev
, ttm
, bo_mem
);
641 r
= amdgpu_ttm_tt_pin_userptr(ttm
);
643 DRM_ERROR("failed to pin userptr\n");
647 gtt
->offset
= (unsigned long)(bo_mem
->start
<< PAGE_SHIFT
);
648 if (!ttm
->num_pages
) {
649 WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
650 ttm
->num_pages
, bo_mem
, ttm
);
653 if (bo_mem
->mem_type
== AMDGPU_PL_GDS
||
654 bo_mem
->mem_type
== AMDGPU_PL_GWS
||
655 bo_mem
->mem_type
== AMDGPU_PL_OA
)
658 r
= amdgpu_gart_bind(gtt
->adev
, gtt
->offset
, ttm
->num_pages
,
659 ttm
->pages
, gtt
->ttm
.dma_address
, flags
);
662 DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
663 ttm
->num_pages
, (unsigned)gtt
->offset
);
669 static int amdgpu_ttm_backend_unbind(struct ttm_tt
*ttm
)
671 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
673 /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
674 if (gtt
->adev
->gart
.ready
)
675 amdgpu_gart_unbind(gtt
->adev
, gtt
->offset
, ttm
->num_pages
);
678 amdgpu_ttm_tt_unpin_userptr(ttm
);
683 static void amdgpu_ttm_backend_destroy(struct ttm_tt
*ttm
)
685 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
687 ttm_dma_tt_fini(>t
->ttm
);
691 static struct ttm_backend_func amdgpu_backend_func
= {
692 .bind
= &amdgpu_ttm_backend_bind
,
693 .unbind
= &amdgpu_ttm_backend_unbind
,
694 .destroy
= &amdgpu_ttm_backend_destroy
,
697 static struct ttm_tt
*amdgpu_ttm_tt_create(struct ttm_bo_device
*bdev
,
698 unsigned long size
, uint32_t page_flags
,
699 struct page
*dummy_read_page
)
701 struct amdgpu_device
*adev
;
702 struct amdgpu_ttm_tt
*gtt
;
704 adev
= amdgpu_get_adev(bdev
);
706 gtt
= kzalloc(sizeof(struct amdgpu_ttm_tt
), GFP_KERNEL
);
710 gtt
->ttm
.ttm
.func
= &amdgpu_backend_func
;
712 if (ttm_dma_tt_init(>t
->ttm
, bdev
, size
, page_flags
, dummy_read_page
)) {
716 return >t
->ttm
.ttm
;
719 static int amdgpu_ttm_tt_populate(struct ttm_tt
*ttm
)
721 struct amdgpu_device
*adev
;
722 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
725 bool slave
= !!(ttm
->page_flags
& TTM_PAGE_FLAG_SG
);
727 if (ttm
->state
!= tt_unpopulated
)
730 if (gtt
&& gtt
->userptr
) {
731 ttm
->sg
= kzalloc(sizeof(struct sg_table
), GFP_KERNEL
);
735 ttm
->page_flags
|= TTM_PAGE_FLAG_SG
;
736 ttm
->state
= tt_unbound
;
740 if (slave
&& ttm
->sg
) {
741 drm_prime_sg_to_page_addr_arrays(ttm
->sg
, ttm
->pages
,
742 gtt
->ttm
.dma_address
, ttm
->num_pages
);
743 ttm
->state
= tt_unbound
;
747 adev
= amdgpu_get_adev(ttm
->bdev
);
749 #ifdef CONFIG_SWIOTLB
750 if (swiotlb_nr_tbl()) {
751 return ttm_dma_populate(>t
->ttm
, adev
->dev
);
755 r
= ttm_pool_populate(ttm
);
760 for (i
= 0; i
< ttm
->num_pages
; i
++) {
761 gtt
->ttm
.dma_address
[i
] = pci_map_page(adev
->pdev
, ttm
->pages
[i
],
763 PCI_DMA_BIDIRECTIONAL
);
764 if (pci_dma_mapping_error(adev
->pdev
, gtt
->ttm
.dma_address
[i
])) {
766 pci_unmap_page(adev
->pdev
, gtt
->ttm
.dma_address
[i
],
767 PAGE_SIZE
, PCI_DMA_BIDIRECTIONAL
);
768 gtt
->ttm
.dma_address
[i
] = 0;
770 ttm_pool_unpopulate(ttm
);
777 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt
*ttm
)
779 struct amdgpu_device
*adev
;
780 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
782 bool slave
= !!(ttm
->page_flags
& TTM_PAGE_FLAG_SG
);
784 if (gtt
&& gtt
->userptr
) {
786 ttm
->page_flags
&= ~TTM_PAGE_FLAG_SG
;
793 adev
= amdgpu_get_adev(ttm
->bdev
);
795 #ifdef CONFIG_SWIOTLB
796 if (swiotlb_nr_tbl()) {
797 ttm_dma_unpopulate(>t
->ttm
, adev
->dev
);
802 for (i
= 0; i
< ttm
->num_pages
; i
++) {
803 if (gtt
->ttm
.dma_address
[i
]) {
804 pci_unmap_page(adev
->pdev
, gtt
->ttm
.dma_address
[i
],
805 PAGE_SIZE
, PCI_DMA_BIDIRECTIONAL
);
809 ttm_pool_unpopulate(ttm
);
812 int amdgpu_ttm_tt_set_userptr(struct ttm_tt
*ttm
, uint64_t addr
,
815 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
821 gtt
->usermm
= current
->mm
;
822 gtt
->userflags
= flags
;
823 spin_lock_init(>t
->guptasklock
);
824 INIT_LIST_HEAD(>t
->guptasks
);
825 atomic_set(>t
->mmu_invalidations
, 0);
830 struct mm_struct
*amdgpu_ttm_tt_get_usermm(struct ttm_tt
*ttm
)
832 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
840 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt
*ttm
, unsigned long start
,
843 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
844 struct amdgpu_ttm_gup_task_list
*entry
;
847 if (gtt
== NULL
|| !gtt
->userptr
)
850 size
= (unsigned long)gtt
->ttm
.ttm
.num_pages
* PAGE_SIZE
;
851 if (gtt
->userptr
> end
|| gtt
->userptr
+ size
<= start
)
854 spin_lock(>t
->guptasklock
);
855 list_for_each_entry(entry
, >t
->guptasks
, list
) {
856 if (entry
->task
== current
) {
857 spin_unlock(>t
->guptasklock
);
861 spin_unlock(>t
->guptasklock
);
863 atomic_inc(>t
->mmu_invalidations
);
868 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt
*ttm
,
869 int *last_invalidated
)
871 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
872 int prev_invalidated
= *last_invalidated
;
874 *last_invalidated
= atomic_read(>t
->mmu_invalidations
);
875 return prev_invalidated
!= *last_invalidated
;
878 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt
*ttm
)
880 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
885 return !!(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
);
888 uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device
*adev
, struct ttm_tt
*ttm
,
889 struct ttm_mem_reg
*mem
)
893 if (mem
&& mem
->mem_type
!= TTM_PL_SYSTEM
)
894 flags
|= AMDGPU_PTE_VALID
;
896 if (mem
&& mem
->mem_type
== TTM_PL_TT
) {
897 flags
|= AMDGPU_PTE_SYSTEM
;
899 if (ttm
->caching_state
== tt_cached
)
900 flags
|= AMDGPU_PTE_SNOOPED
;
903 if (adev
->asic_type
>= CHIP_TONGA
)
904 flags
|= AMDGPU_PTE_EXECUTABLE
;
906 flags
|= AMDGPU_PTE_READABLE
;
908 if (!amdgpu_ttm_tt_is_readonly(ttm
))
909 flags
|= AMDGPU_PTE_WRITEABLE
;
914 static void amdgpu_ttm_lru_removal(struct ttm_buffer_object
*tbo
)
916 struct amdgpu_device
*adev
= amdgpu_get_adev(tbo
->bdev
);
919 for (i
= 0; i
< AMDGPU_TTM_LRU_SIZE
; ++i
) {
920 struct amdgpu_mman_lru
*lru
= &adev
->mman
.log2_size
[i
];
922 for (j
= 0; j
< TTM_NUM_MEM_TYPES
; ++j
)
923 if (&tbo
->lru
== lru
->lru
[j
])
924 lru
->lru
[j
] = tbo
->lru
.prev
;
926 if (&tbo
->swap
== lru
->swap_lru
)
927 lru
->swap_lru
= tbo
->swap
.prev
;
931 static struct amdgpu_mman_lru
*amdgpu_ttm_lru(struct ttm_buffer_object
*tbo
)
933 struct amdgpu_device
*adev
= amdgpu_get_adev(tbo
->bdev
);
934 unsigned log2_size
= min(ilog2(tbo
->num_pages
),
935 AMDGPU_TTM_LRU_SIZE
- 1);
937 return &adev
->mman
.log2_size
[log2_size
];
940 static struct list_head
*amdgpu_ttm_lru_tail(struct ttm_buffer_object
*tbo
)
942 struct amdgpu_mman_lru
*lru
= amdgpu_ttm_lru(tbo
);
943 struct list_head
*res
= lru
->lru
[tbo
->mem
.mem_type
];
945 lru
->lru
[tbo
->mem
.mem_type
] = &tbo
->lru
;
950 static struct list_head
*amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object
*tbo
)
952 struct amdgpu_mman_lru
*lru
= amdgpu_ttm_lru(tbo
);
953 struct list_head
*res
= lru
->swap_lru
;
955 lru
->swap_lru
= &tbo
->swap
;
960 static struct ttm_bo_driver amdgpu_bo_driver
= {
961 .ttm_tt_create
= &amdgpu_ttm_tt_create
,
962 .ttm_tt_populate
= &amdgpu_ttm_tt_populate
,
963 .ttm_tt_unpopulate
= &amdgpu_ttm_tt_unpopulate
,
964 .invalidate_caches
= &amdgpu_invalidate_caches
,
965 .init_mem_type
= &amdgpu_init_mem_type
,
966 .evict_flags
= &amdgpu_evict_flags
,
967 .move
= &amdgpu_bo_move
,
968 .verify_access
= &amdgpu_verify_access
,
969 .move_notify
= &amdgpu_bo_move_notify
,
970 .fault_reserve_notify
= &amdgpu_bo_fault_reserve_notify
,
971 .io_mem_reserve
= &amdgpu_ttm_io_mem_reserve
,
972 .io_mem_free
= &amdgpu_ttm_io_mem_free
,
973 .lru_removal
= &amdgpu_ttm_lru_removal
,
974 .lru_tail
= &amdgpu_ttm_lru_tail
,
975 .swap_lru_tail
= &amdgpu_ttm_swap_lru_tail
,
978 int amdgpu_ttm_init(struct amdgpu_device
*adev
)
983 r
= amdgpu_ttm_global_init(adev
);
987 /* No others user of address space so set it to 0 */
988 r
= ttm_bo_device_init(&adev
->mman
.bdev
,
989 adev
->mman
.bo_global_ref
.ref
.object
,
991 adev
->ddev
->anon_inode
->i_mapping
,
992 DRM_FILE_PAGE_OFFSET
,
995 DRM_ERROR("failed initializing buffer object driver(%d).\n", r
);
999 for (i
= 0; i
< AMDGPU_TTM_LRU_SIZE
; ++i
) {
1000 struct amdgpu_mman_lru
*lru
= &adev
->mman
.log2_size
[i
];
1002 for (j
= 0; j
< TTM_NUM_MEM_TYPES
; ++j
)
1003 lru
->lru
[j
] = &adev
->mman
.bdev
.man
[j
].lru
;
1004 lru
->swap_lru
= &adev
->mman
.bdev
.glob
->swap_lru
;
1007 adev
->mman
.initialized
= true;
1008 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, TTM_PL_VRAM
,
1009 adev
->mc
.real_vram_size
>> PAGE_SHIFT
);
1011 DRM_ERROR("Failed initializing VRAM heap.\n");
1014 /* Change the size here instead of the init above so only lpfn is affected */
1015 amdgpu_ttm_set_active_vram_size(adev
, adev
->mc
.visible_vram_size
);
1017 r
= amdgpu_bo_create(adev
, 256 * 1024, PAGE_SIZE
, true,
1018 AMDGPU_GEM_DOMAIN_VRAM
,
1019 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
,
1020 NULL
, NULL
, &adev
->stollen_vga_memory
);
1024 r
= amdgpu_bo_reserve(adev
->stollen_vga_memory
, false);
1027 r
= amdgpu_bo_pin(adev
->stollen_vga_memory
, AMDGPU_GEM_DOMAIN_VRAM
, NULL
);
1028 amdgpu_bo_unreserve(adev
->stollen_vga_memory
);
1030 amdgpu_bo_unref(&adev
->stollen_vga_memory
);
1033 DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1034 (unsigned) (adev
->mc
.real_vram_size
/ (1024 * 1024)));
1035 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, TTM_PL_TT
,
1036 adev
->mc
.gtt_size
>> PAGE_SHIFT
);
1038 DRM_ERROR("Failed initializing GTT heap.\n");
1041 DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1042 (unsigned)(adev
->mc
.gtt_size
/ (1024 * 1024)));
1044 adev
->gds
.mem
.total_size
= adev
->gds
.mem
.total_size
<< AMDGPU_GDS_SHIFT
;
1045 adev
->gds
.mem
.gfx_partition_size
= adev
->gds
.mem
.gfx_partition_size
<< AMDGPU_GDS_SHIFT
;
1046 adev
->gds
.mem
.cs_partition_size
= adev
->gds
.mem
.cs_partition_size
<< AMDGPU_GDS_SHIFT
;
1047 adev
->gds
.gws
.total_size
= adev
->gds
.gws
.total_size
<< AMDGPU_GWS_SHIFT
;
1048 adev
->gds
.gws
.gfx_partition_size
= adev
->gds
.gws
.gfx_partition_size
<< AMDGPU_GWS_SHIFT
;
1049 adev
->gds
.gws
.cs_partition_size
= adev
->gds
.gws
.cs_partition_size
<< AMDGPU_GWS_SHIFT
;
1050 adev
->gds
.oa
.total_size
= adev
->gds
.oa
.total_size
<< AMDGPU_OA_SHIFT
;
1051 adev
->gds
.oa
.gfx_partition_size
= adev
->gds
.oa
.gfx_partition_size
<< AMDGPU_OA_SHIFT
;
1052 adev
->gds
.oa
.cs_partition_size
= adev
->gds
.oa
.cs_partition_size
<< AMDGPU_OA_SHIFT
;
1054 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, AMDGPU_PL_GDS
,
1055 adev
->gds
.mem
.total_size
>> PAGE_SHIFT
);
1057 DRM_ERROR("Failed initializing GDS heap.\n");
1062 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, AMDGPU_PL_GWS
,
1063 adev
->gds
.gws
.total_size
>> PAGE_SHIFT
);
1065 DRM_ERROR("Failed initializing gws heap.\n");
1070 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, AMDGPU_PL_OA
,
1071 adev
->gds
.oa
.total_size
>> PAGE_SHIFT
);
1073 DRM_ERROR("Failed initializing oa heap.\n");
1077 r
= amdgpu_ttm_debugfs_init(adev
);
1079 DRM_ERROR("Failed to init debugfs\n");
1085 void amdgpu_ttm_fini(struct amdgpu_device
*adev
)
1089 if (!adev
->mman
.initialized
)
1091 amdgpu_ttm_debugfs_fini(adev
);
1092 if (adev
->stollen_vga_memory
) {
1093 r
= amdgpu_bo_reserve(adev
->stollen_vga_memory
, false);
1095 amdgpu_bo_unpin(adev
->stollen_vga_memory
);
1096 amdgpu_bo_unreserve(adev
->stollen_vga_memory
);
1098 amdgpu_bo_unref(&adev
->stollen_vga_memory
);
1100 ttm_bo_clean_mm(&adev
->mman
.bdev
, TTM_PL_VRAM
);
1101 ttm_bo_clean_mm(&adev
->mman
.bdev
, TTM_PL_TT
);
1102 ttm_bo_clean_mm(&adev
->mman
.bdev
, AMDGPU_PL_GDS
);
1103 ttm_bo_clean_mm(&adev
->mman
.bdev
, AMDGPU_PL_GWS
);
1104 ttm_bo_clean_mm(&adev
->mman
.bdev
, AMDGPU_PL_OA
);
1105 ttm_bo_device_release(&adev
->mman
.bdev
);
1106 amdgpu_gart_fini(adev
);
1107 amdgpu_ttm_global_fini(adev
);
1108 adev
->mman
.initialized
= false;
1109 DRM_INFO("amdgpu: ttm finalized\n");
1112 /* this should only be called at bootup or when userspace
1114 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device
*adev
, u64 size
)
1116 struct ttm_mem_type_manager
*man
;
1118 if (!adev
->mman
.initialized
)
1121 man
= &adev
->mman
.bdev
.man
[TTM_PL_VRAM
];
1122 /* this just adjusts TTM size idea, which sets lpfn to the correct value */
1123 man
->size
= size
>> PAGE_SHIFT
;
1126 int amdgpu_mmap(struct file
*filp
, struct vm_area_struct
*vma
)
1128 struct drm_file
*file_priv
;
1129 struct amdgpu_device
*adev
;
1131 if (unlikely(vma
->vm_pgoff
< DRM_FILE_PAGE_OFFSET
))
1134 file_priv
= filp
->private_data
;
1135 adev
= file_priv
->minor
->dev
->dev_private
;
1139 return ttm_bo_mmap(filp
, vma
, &adev
->mman
.bdev
);
1142 int amdgpu_copy_buffer(struct amdgpu_ring
*ring
,
1143 uint64_t src_offset
,
1144 uint64_t dst_offset
,
1145 uint32_t byte_count
,
1146 struct reservation_object
*resv
,
1147 struct fence
**fence
)
1149 struct amdgpu_device
*adev
= ring
->adev
;
1150 struct amdgpu_job
*job
;
1153 unsigned num_loops
, num_dw
;
1157 max_bytes
= adev
->mman
.buffer_funcs
->copy_max_bytes
;
1158 num_loops
= DIV_ROUND_UP(byte_count
, max_bytes
);
1159 num_dw
= num_loops
* adev
->mman
.buffer_funcs
->copy_num_dw
;
1161 /* for IB padding */
1162 while (num_dw
& 0x7)
1165 r
= amdgpu_job_alloc_with_ib(adev
, num_dw
* 4, &job
);
1170 r
= amdgpu_sync_resv(adev
, &job
->sync
, resv
,
1171 AMDGPU_FENCE_OWNER_UNDEFINED
);
1173 DRM_ERROR("sync failed (%d).\n", r
);
1178 for (i
= 0; i
< num_loops
; i
++) {
1179 uint32_t cur_size_in_bytes
= min(byte_count
, max_bytes
);
1181 amdgpu_emit_copy_buffer(adev
, &job
->ibs
[0], src_offset
,
1182 dst_offset
, cur_size_in_bytes
);
1184 src_offset
+= cur_size_in_bytes
;
1185 dst_offset
+= cur_size_in_bytes
;
1186 byte_count
-= cur_size_in_bytes
;
1189 amdgpu_ring_pad_ib(ring
, &job
->ibs
[0]);
1190 WARN_ON(job
->ibs
[0].length_dw
> num_dw
);
1191 r
= amdgpu_job_submit(job
, ring
, &adev
->mman
.entity
,
1192 AMDGPU_FENCE_OWNER_UNDEFINED
, fence
);
1199 amdgpu_job_free(job
);
1203 #if defined(CONFIG_DEBUG_FS)
1205 static int amdgpu_mm_dump_table(struct seq_file
*m
, void *data
)
1207 struct drm_info_node
*node
= (struct drm_info_node
*)m
->private;
1208 unsigned ttm_pl
= *(int *)node
->info_ent
->data
;
1209 struct drm_device
*dev
= node
->minor
->dev
;
1210 struct amdgpu_device
*adev
= dev
->dev_private
;
1211 struct drm_mm
*mm
= (struct drm_mm
*)adev
->mman
.bdev
.man
[ttm_pl
].priv
;
1213 struct ttm_bo_global
*glob
= adev
->mman
.bdev
.glob
;
1215 spin_lock(&glob
->lru_lock
);
1216 ret
= drm_mm_dump_table(m
, mm
);
1217 spin_unlock(&glob
->lru_lock
);
1218 if (ttm_pl
== TTM_PL_VRAM
)
1219 seq_printf(m
, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
1220 adev
->mman
.bdev
.man
[ttm_pl
].size
,
1221 (u64
)atomic64_read(&adev
->vram_usage
) >> 20,
1222 (u64
)atomic64_read(&adev
->vram_vis_usage
) >> 20);
1226 static int ttm_pl_vram
= TTM_PL_VRAM
;
1227 static int ttm_pl_tt
= TTM_PL_TT
;
1229 static const struct drm_info_list amdgpu_ttm_debugfs_list
[] = {
1230 {"amdgpu_vram_mm", amdgpu_mm_dump_table
, 0, &ttm_pl_vram
},
1231 {"amdgpu_gtt_mm", amdgpu_mm_dump_table
, 0, &ttm_pl_tt
},
1232 {"ttm_page_pool", ttm_page_alloc_debugfs
, 0, NULL
},
1233 #ifdef CONFIG_SWIOTLB
1234 {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs
, 0, NULL
}
1238 static ssize_t
amdgpu_ttm_vram_read(struct file
*f
, char __user
*buf
,
1239 size_t size
, loff_t
*pos
)
1241 struct amdgpu_device
*adev
= f
->f_inode
->i_private
;
1245 if (size
& 0x3 || *pos
& 0x3)
1249 unsigned long flags
;
1252 if (*pos
>= adev
->mc
.mc_vram_size
)
1255 spin_lock_irqsave(&adev
->mmio_idx_lock
, flags
);
1256 WREG32(mmMM_INDEX
, ((uint32_t)*pos
) | 0x80000000);
1257 WREG32(mmMM_INDEX_HI
, *pos
>> 31);
1258 value
= RREG32(mmMM_DATA
);
1259 spin_unlock_irqrestore(&adev
->mmio_idx_lock
, flags
);
1261 r
= put_user(value
, (uint32_t *)buf
);
1274 static const struct file_operations amdgpu_ttm_vram_fops
= {
1275 .owner
= THIS_MODULE
,
1276 .read
= amdgpu_ttm_vram_read
,
1277 .llseek
= default_llseek
1280 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1282 static ssize_t
amdgpu_ttm_gtt_read(struct file
*f
, char __user
*buf
,
1283 size_t size
, loff_t
*pos
)
1285 struct amdgpu_device
*adev
= f
->f_inode
->i_private
;
1290 loff_t p
= *pos
/ PAGE_SIZE
;
1291 unsigned off
= *pos
& ~PAGE_MASK
;
1292 size_t cur_size
= min_t(size_t, size
, PAGE_SIZE
- off
);
1296 if (p
>= adev
->gart
.num_cpu_pages
)
1299 page
= adev
->gart
.pages
[p
];
1304 r
= copy_to_user(buf
, ptr
, cur_size
);
1305 kunmap(adev
->gart
.pages
[p
]);
1307 r
= clear_user(buf
, cur_size
);
1321 static const struct file_operations amdgpu_ttm_gtt_fops
= {
1322 .owner
= THIS_MODULE
,
1323 .read
= amdgpu_ttm_gtt_read
,
1324 .llseek
= default_llseek
1331 static int amdgpu_ttm_debugfs_init(struct amdgpu_device
*adev
)
1333 #if defined(CONFIG_DEBUG_FS)
1336 struct drm_minor
*minor
= adev
->ddev
->primary
;
1337 struct dentry
*ent
, *root
= minor
->debugfs_root
;
1339 ent
= debugfs_create_file("amdgpu_vram", S_IFREG
| S_IRUGO
, root
,
1340 adev
, &amdgpu_ttm_vram_fops
);
1342 return PTR_ERR(ent
);
1343 i_size_write(ent
->d_inode
, adev
->mc
.mc_vram_size
);
1344 adev
->mman
.vram
= ent
;
1346 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1347 ent
= debugfs_create_file("amdgpu_gtt", S_IFREG
| S_IRUGO
, root
,
1348 adev
, &amdgpu_ttm_gtt_fops
);
1350 return PTR_ERR(ent
);
1351 i_size_write(ent
->d_inode
, adev
->mc
.gtt_size
);
1352 adev
->mman
.gtt
= ent
;
1355 count
= ARRAY_SIZE(amdgpu_ttm_debugfs_list
);
1357 #ifdef CONFIG_SWIOTLB
1358 if (!swiotlb_nr_tbl())
1362 return amdgpu_debugfs_add_files(adev
, amdgpu_ttm_debugfs_list
, count
);
1369 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device
*adev
)
1371 #if defined(CONFIG_DEBUG_FS)
1373 debugfs_remove(adev
->mman
.vram
);
1374 adev
->mman
.vram
= NULL
;
1376 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1377 debugfs_remove(adev
->mman
.gtt
);
1378 adev
->mman
.gtt
= NULL
;