3 * Copyright (C) 2001 Todd Inglett, IBM Corporation
5 * pSeries LPAR support.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /* Enables debugging of low-level hash table routines - careful! */
25 #include <linux/kernel.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/console.h>
28 #include <linux/export.h>
29 #include <asm/processor.h>
32 #include <asm/pgtable.h>
33 #include <asm/machdep.h>
34 #include <asm/mmu_context.h>
35 #include <asm/iommu.h>
36 #include <asm/tlbflush.h>
39 #include <asm/cputable.h>
42 #include <asm/trace.h>
43 #include <asm/firmware.h>
45 #include "plpar_wrappers.h"
48 /* Flag bits for H_BULK_REMOVE */
49 #define HBR_REQUEST 0x4000000000000000UL
50 #define HBR_RESPONSE 0x8000000000000000UL
51 #define HBR_END 0xc000000000000000UL
52 #define HBR_AVPN 0x0200000000000000UL
53 #define HBR_ANDCOND 0x0100000000000000UL
57 EXPORT_SYMBOL(plpar_hcall
);
58 EXPORT_SYMBOL(plpar_hcall9
);
59 EXPORT_SYMBOL(plpar_hcall_norets
);
61 extern void pSeries_find_serial_port(void);
63 void vpa_init(int cpu
)
65 int hwcpu
= get_hard_smp_processor_id(cpu
);
68 struct paca_struct
*pp
;
69 struct dtl_entry
*dtl
;
71 if (cpu_has_feature(CPU_FTR_ALTIVEC
))
72 lppaca_of(cpu
).vmxregs_in_use
= 1;
74 if (cpu_has_feature(CPU_FTR_ARCH_207S
))
75 lppaca_of(cpu
).ebb_regs_in_use
= 1;
77 addr
= __pa(&lppaca_of(cpu
));
78 ret
= register_vpa(hwcpu
, addr
);
81 pr_err("WARNING: VPA registration for cpu %d (hw %d) of area "
82 "%lx failed with %ld\n", cpu
, hwcpu
, addr
, ret
);
86 * PAPR says this feature is SLB-Buffer but firmware never
87 * reports that. All SPLPAR support SLB shadow buffer.
89 addr
= __pa(&slb_shadow
[cpu
]);
90 if (firmware_has_feature(FW_FEATURE_SPLPAR
)) {
91 ret
= register_slb_shadow(hwcpu
, addr
);
93 pr_err("WARNING: SLB shadow buffer registration for "
94 "cpu %d (hw %d) of area %lx failed with %ld\n",
95 cpu
, hwcpu
, addr
, ret
);
99 * Register dispatch trace log, if one has been allocated.
102 dtl
= pp
->dispatch_log
;
106 lppaca_of(cpu
).dtl_idx
= 0;
108 /* hypervisor reads buffer length from this field */
109 dtl
->enqueue_to_dispatch_time
= DISPATCH_LOG_BYTES
;
110 ret
= register_dtl(hwcpu
, __pa(dtl
));
112 pr_err("WARNING: DTL registration of cpu %d (hw %d) "
113 "failed with %ld\n", smp_processor_id(),
115 lppaca_of(cpu
).dtl_enable_mask
= 2;
119 static long pSeries_lpar_hpte_insert(unsigned long hpte_group
,
120 unsigned long vpn
, unsigned long pa
,
121 unsigned long rflags
, unsigned long vflags
,
122 int psize
, int apsize
, int ssize
)
124 unsigned long lpar_rc
;
127 unsigned long hpte_v
, hpte_r
;
129 if (!(vflags
& HPTE_V_BOLTED
))
130 pr_devel("hpte_insert(group=%lx, vpn=%016lx, "
131 "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
132 hpte_group
, vpn
, pa
, rflags
, vflags
, psize
);
134 hpte_v
= hpte_encode_v(vpn
, psize
, apsize
, ssize
) | vflags
| HPTE_V_VALID
;
135 hpte_r
= hpte_encode_r(pa
, psize
, apsize
) | rflags
;
137 if (!(vflags
& HPTE_V_BOLTED
))
138 pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v
, hpte_r
);
140 /* Now fill in the actual HPTE */
141 /* Set CEC cookie to 0 */
143 /* I-cache Invalidate = 0 */
144 /* I-cache synchronize = 0 */
148 /* Make pHyp happy */
149 if ((rflags
& _PAGE_NO_CACHE
) & !(rflags
& _PAGE_WRITETHRU
))
150 hpte_r
&= ~_PAGE_COHERENT
;
151 if (firmware_has_feature(FW_FEATURE_XCMO
) && !(hpte_r
& HPTE_R_N
))
152 flags
|= H_COALESCE_CAND
;
154 lpar_rc
= plpar_pte_enter(flags
, hpte_group
, hpte_v
, hpte_r
, &slot
);
155 if (unlikely(lpar_rc
== H_PTEG_FULL
)) {
156 if (!(vflags
& HPTE_V_BOLTED
))
162 * Since we try and ioremap PHBs we don't own, the pte insert
163 * will fail. However we must catch the failure in hash_page
164 * or we will loop forever, so return -2 in this case.
166 if (unlikely(lpar_rc
!= H_SUCCESS
)) {
167 if (!(vflags
& HPTE_V_BOLTED
))
168 pr_devel(" lpar err %ld\n", lpar_rc
);
171 if (!(vflags
& HPTE_V_BOLTED
))
172 pr_devel(" -> slot: %lu\n", slot
& 7);
174 /* Because of iSeries, we have to pass down the secondary
175 * bucket bit here as well
177 return (slot
& 7) | (!!(vflags
& HPTE_V_SECONDARY
) << 3);
180 static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock
);
182 static long pSeries_lpar_hpte_remove(unsigned long hpte_group
)
184 unsigned long slot_offset
;
185 unsigned long lpar_rc
;
187 unsigned long dummy1
, dummy2
;
189 /* pick a random slot to start at */
190 slot_offset
= mftb() & 0x7;
192 for (i
= 0; i
< HPTES_PER_GROUP
; i
++) {
194 /* don't remove a bolted entry */
195 lpar_rc
= plpar_pte_remove(H_ANDCOND
, hpte_group
+ slot_offset
,
196 (0x1UL
<< 4), &dummy1
, &dummy2
);
197 if (lpar_rc
== H_SUCCESS
)
201 * The test for adjunct partition is performed before the
202 * ANDCOND test. H_RESOURCE may be returned, so we need to
203 * check for that as well.
205 BUG_ON(lpar_rc
!= H_NOT_FOUND
&& lpar_rc
!= H_RESOURCE
);
214 static void pSeries_lpar_hptab_clear(void)
216 unsigned long size_bytes
= 1UL << ppc64_pft_size
;
217 unsigned long hpte_count
= size_bytes
>> 4;
225 /* Read in batches of 4,
226 * invalidate only valid entries not in the VRMA
227 * hpte_count will be a multiple of 4
229 for (i
= 0; i
< hpte_count
; i
+= 4) {
230 lpar_rc
= plpar_pte_read_4_raw(0, i
, (void *)ptes
);
231 if (lpar_rc
!= H_SUCCESS
)
233 for (j
= 0; j
< 4; j
++){
234 if ((ptes
[j
].pteh
& HPTE_V_VRMA_MASK
) ==
237 if (ptes
[j
].pteh
& HPTE_V_VALID
)
238 plpar_pte_remove_raw(0, i
+ j
, 0,
239 &(ptes
[j
].pteh
), &(ptes
[j
].ptel
));
245 * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
246 * the low 3 bits of flags happen to line up. So no transform is needed.
247 * We can probably optimize here and assume the high bits of newpp are
248 * already zero. For now I am paranoid.
250 static long pSeries_lpar_hpte_updatepp(unsigned long slot
,
253 int psize
, int apsize
,
254 int ssize
, int local
)
256 unsigned long lpar_rc
;
257 unsigned long flags
= (newpp
& 7) | H_AVPN
;
258 unsigned long want_v
;
260 want_v
= hpte_encode_avpn(vpn
, psize
, ssize
);
262 pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
263 want_v
, slot
, flags
, psize
);
265 lpar_rc
= plpar_pte_protect(flags
, slot
, want_v
);
267 if (lpar_rc
== H_NOT_FOUND
) {
268 pr_devel("not found !\n");
274 BUG_ON(lpar_rc
!= H_SUCCESS
);
279 static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot
)
281 unsigned long dword0
;
282 unsigned long lpar_rc
;
283 unsigned long dummy_word1
;
286 /* Read 1 pte at a time */
287 /* Do not need RPN to logical page translation */
288 /* No cross CEC PFT access */
291 lpar_rc
= plpar_pte_read(flags
, slot
, &dword0
, &dummy_word1
);
293 BUG_ON(lpar_rc
!= H_SUCCESS
);
298 static long pSeries_lpar_hpte_find(unsigned long vpn
, int psize
, int ssize
)
303 unsigned long want_v
, hpte_v
;
305 hash
= hpt_hash(vpn
, mmu_psize_defs
[psize
].shift
, ssize
);
306 want_v
= hpte_encode_avpn(vpn
, psize
, ssize
);
308 /* Bolted entries are always in the primary group */
309 slot
= (hash
& htab_hash_mask
) * HPTES_PER_GROUP
;
310 for (i
= 0; i
< HPTES_PER_GROUP
; i
++) {
311 hpte_v
= pSeries_lpar_hpte_getword0(slot
);
313 if (HPTE_V_COMPARE(hpte_v
, want_v
) && (hpte_v
& HPTE_V_VALID
))
322 static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp
,
324 int psize
, int ssize
)
327 unsigned long lpar_rc
, slot
, vsid
, flags
;
329 vsid
= get_kernel_vsid(ea
, ssize
);
330 vpn
= hpt_vpn(ea
, vsid
, ssize
);
332 slot
= pSeries_lpar_hpte_find(vpn
, psize
, ssize
);
336 lpar_rc
= plpar_pte_protect(flags
, slot
, 0);
338 BUG_ON(lpar_rc
!= H_SUCCESS
);
341 static void pSeries_lpar_hpte_invalidate(unsigned long slot
, unsigned long vpn
,
342 int psize
, int apsize
,
343 int ssize
, int local
)
345 unsigned long want_v
;
346 unsigned long lpar_rc
;
347 unsigned long dummy1
, dummy2
;
349 pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
350 slot
, vpn
, psize
, local
);
352 want_v
= hpte_encode_avpn(vpn
, psize
, ssize
);
353 lpar_rc
= plpar_pte_remove(H_AVPN
, slot
, want_v
, &dummy1
, &dummy2
);
354 if (lpar_rc
== H_NOT_FOUND
)
357 BUG_ON(lpar_rc
!= H_SUCCESS
);
361 * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
362 * to make sure that we avoid bouncing the hypervisor tlbie lock.
364 #define PPC64_HUGE_HPTE_BATCH 12
366 static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot
,
367 unsigned long *vpn
, int count
,
368 int psize
, int ssize
)
370 unsigned long param
[8];
371 int i
= 0, pix
= 0, rc
;
372 unsigned long flags
= 0;
373 int lock_tlbie
= !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE
);
376 spin_lock_irqsave(&pSeries_lpar_tlbie_lock
, flags
);
378 for (i
= 0; i
< count
; i
++) {
380 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE
)) {
381 pSeries_lpar_hpte_invalidate(slot
[i
], vpn
[i
], psize
, 0,
384 param
[pix
] = HBR_REQUEST
| HBR_AVPN
| slot
[i
];
385 param
[pix
+1] = hpte_encode_avpn(vpn
[i
], psize
, ssize
);
388 rc
= plpar_hcall9(H_BULK_REMOVE
, param
,
389 param
[0], param
[1], param
[2],
390 param
[3], param
[4], param
[5],
392 BUG_ON(rc
!= H_SUCCESS
);
398 param
[pix
] = HBR_END
;
399 rc
= plpar_hcall9(H_BULK_REMOVE
, param
, param
[0], param
[1],
400 param
[2], param
[3], param
[4], param
[5],
402 BUG_ON(rc
!= H_SUCCESS
);
406 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock
, flags
);
409 static void pSeries_lpar_hugepage_invalidate(struct mm_struct
*mm
,
410 unsigned char *hpte_slot_array
,
411 unsigned long addr
, int psize
)
413 int ssize
= 0, i
, index
= 0;
414 unsigned long s_addr
= addr
;
415 unsigned int max_hpte_count
, valid
;
416 unsigned long vpn_array
[PPC64_HUGE_HPTE_BATCH
];
417 unsigned long slot_array
[PPC64_HUGE_HPTE_BATCH
];
418 unsigned long shift
, hidx
, vpn
= 0, vsid
, hash
, slot
;
420 shift
= mmu_psize_defs
[psize
].shift
;
421 max_hpte_count
= 1U << (PMD_SHIFT
- shift
);
423 for (i
= 0; i
< max_hpte_count
; i
++) {
424 valid
= hpte_valid(hpte_slot_array
, i
);
427 hidx
= hpte_hash_index(hpte_slot_array
, i
);
430 addr
= s_addr
+ (i
* (1ul << shift
));
431 if (!is_kernel_addr(addr
)) {
432 ssize
= user_segment_size(addr
);
433 vsid
= get_vsid(mm
->context
.id
, addr
, ssize
);
436 vsid
= get_kernel_vsid(addr
, mmu_kernel_ssize
);
437 ssize
= mmu_kernel_ssize
;
440 vpn
= hpt_vpn(addr
, vsid
, ssize
);
441 hash
= hpt_hash(vpn
, shift
, ssize
);
442 if (hidx
& _PTEIDX_SECONDARY
)
445 slot
= (hash
& htab_hash_mask
) * HPTES_PER_GROUP
;
446 slot
+= hidx
& _PTEIDX_GROUP_IX
;
448 slot_array
[index
] = slot
;
449 vpn_array
[index
] = vpn
;
450 if (index
== PPC64_HUGE_HPTE_BATCH
- 1) {
452 * Now do a bluk invalidate
454 __pSeries_lpar_hugepage_invalidate(slot_array
,
456 PPC64_HUGE_HPTE_BATCH
,
463 __pSeries_lpar_hugepage_invalidate(slot_array
, vpn_array
,
464 index
, psize
, ssize
);
467 static void pSeries_lpar_hpte_removebolted(unsigned long ea
,
468 int psize
, int ssize
)
471 unsigned long slot
, vsid
;
473 vsid
= get_kernel_vsid(ea
, ssize
);
474 vpn
= hpt_vpn(ea
, vsid
, ssize
);
476 slot
= pSeries_lpar_hpte_find(vpn
, psize
, ssize
);
479 * lpar doesn't use the passed actual page size
481 pSeries_lpar_hpte_invalidate(slot
, vpn
, psize
, 0, ssize
, 0);
485 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
488 static void pSeries_lpar_flush_hash_range(unsigned long number
, int local
)
491 unsigned long i
, pix
, rc
;
492 unsigned long flags
= 0;
493 struct ppc64_tlb_batch
*batch
= &__get_cpu_var(ppc64_tlb_batch
);
494 int lock_tlbie
= !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE
);
495 unsigned long param
[9];
496 unsigned long hash
, index
, shift
, hidx
, slot
;
501 spin_lock_irqsave(&pSeries_lpar_tlbie_lock
, flags
);
503 psize
= batch
->psize
;
504 ssize
= batch
->ssize
;
506 for (i
= 0; i
< number
; i
++) {
509 pte_iterate_hashed_subpages(pte
, psize
, vpn
, index
, shift
) {
510 hash
= hpt_hash(vpn
, shift
, ssize
);
511 hidx
= __rpte_to_hidx(pte
, index
);
512 if (hidx
& _PTEIDX_SECONDARY
)
514 slot
= (hash
& htab_hash_mask
) * HPTES_PER_GROUP
;
515 slot
+= hidx
& _PTEIDX_GROUP_IX
;
516 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE
)) {
518 * lpar doesn't use the passed actual page size
520 pSeries_lpar_hpte_invalidate(slot
, vpn
, psize
,
523 param
[pix
] = HBR_REQUEST
| HBR_AVPN
| slot
;
524 param
[pix
+1] = hpte_encode_avpn(vpn
, psize
,
528 rc
= plpar_hcall9(H_BULK_REMOVE
, param
,
529 param
[0], param
[1], param
[2],
530 param
[3], param
[4], param
[5],
532 BUG_ON(rc
!= H_SUCCESS
);
536 } pte_iterate_hashed_end();
539 param
[pix
] = HBR_END
;
540 rc
= plpar_hcall9(H_BULK_REMOVE
, param
, param
[0], param
[1],
541 param
[2], param
[3], param
[4], param
[5],
543 BUG_ON(rc
!= H_SUCCESS
);
547 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock
, flags
);
550 static int __init
disable_bulk_remove(char *str
)
552 if (strcmp(str
, "off") == 0 &&
553 firmware_has_feature(FW_FEATURE_BULK_REMOVE
)) {
554 printk(KERN_INFO
"Disabling BULK_REMOVE firmware feature");
555 powerpc_firmware_features
&= ~FW_FEATURE_BULK_REMOVE
;
560 __setup("bulk_remove=", disable_bulk_remove
);
562 void __init
hpte_init_lpar(void)
564 ppc_md
.hpte_invalidate
= pSeries_lpar_hpte_invalidate
;
565 ppc_md
.hpte_updatepp
= pSeries_lpar_hpte_updatepp
;
566 ppc_md
.hpte_updateboltedpp
= pSeries_lpar_hpte_updateboltedpp
;
567 ppc_md
.hpte_insert
= pSeries_lpar_hpte_insert
;
568 ppc_md
.hpte_remove
= pSeries_lpar_hpte_remove
;
569 ppc_md
.hpte_removebolted
= pSeries_lpar_hpte_removebolted
;
570 ppc_md
.flush_hash_range
= pSeries_lpar_flush_hash_range
;
571 ppc_md
.hpte_clear_all
= pSeries_lpar_hptab_clear
;
572 ppc_md
.hugepage_invalidate
= pSeries_lpar_hugepage_invalidate
;
575 #ifdef CONFIG_PPC_SMLPAR
576 #define CMO_FREE_HINT_DEFAULT 1
577 static int cmo_free_hint_flag
= CMO_FREE_HINT_DEFAULT
;
579 static int __init
cmo_free_hint(char *str
)
582 parm
= strstrip(str
);
584 if (strcasecmp(parm
, "no") == 0 || strcasecmp(parm
, "off") == 0) {
585 printk(KERN_INFO
"cmo_free_hint: CMO free page hinting is not active.\n");
586 cmo_free_hint_flag
= 0;
590 cmo_free_hint_flag
= 1;
591 printk(KERN_INFO
"cmo_free_hint: CMO free page hinting is active.\n");
593 if (strcasecmp(parm
, "yes") == 0 || strcasecmp(parm
, "on") == 0)
599 __setup("cmo_free_hint=", cmo_free_hint
);
601 static void pSeries_set_page_state(struct page
*page
, int order
,
605 unsigned long cmo_page_sz
, addr
;
607 cmo_page_sz
= cmo_get_page_size();
608 addr
= __pa((unsigned long)page_address(page
));
610 for (i
= 0; i
< (1 << order
); i
++, addr
+= PAGE_SIZE
) {
611 for (j
= 0; j
< PAGE_SIZE
; j
+= cmo_page_sz
)
612 plpar_hcall_norets(H_PAGE_INIT
, state
, addr
+ j
, 0);
616 void arch_free_page(struct page
*page
, int order
)
618 if (!cmo_free_hint_flag
|| !firmware_has_feature(FW_FEATURE_CMO
))
621 pSeries_set_page_state(page
, order
, H_PAGE_SET_UNUSED
);
623 EXPORT_SYMBOL(arch_free_page
);
627 #ifdef CONFIG_TRACEPOINTS
629 * We optimise our hcall path by placing hcall_tracepoint_refcount
630 * directly in the TOC so we can check if the hcall tracepoints are
631 * enabled via a single load.
634 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
635 extern long hcall_tracepoint_refcount
;
638 * Since the tracing code might execute hcalls we need to guard against
639 * recursion. One example of this are spinlocks calling H_YIELD on
640 * shared processor partitions.
642 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth
);
644 void hcall_tracepoint_regfunc(void)
646 hcall_tracepoint_refcount
++;
649 void hcall_tracepoint_unregfunc(void)
651 hcall_tracepoint_refcount
--;
654 void __trace_hcall_entry(unsigned long opcode
, unsigned long *args
)
660 * We cannot call tracepoints inside RCU idle regions which
661 * means we must not trace H_CEDE.
663 if (opcode
== H_CEDE
)
666 local_irq_save(flags
);
668 depth
= &__get_cpu_var(hcall_trace_depth
);
675 trace_hcall_entry(opcode
, args
);
679 local_irq_restore(flags
);
682 void __trace_hcall_exit(long opcode
, unsigned long retval
,
683 unsigned long *retbuf
)
688 if (opcode
== H_CEDE
)
691 local_irq_save(flags
);
693 depth
= &__get_cpu_var(hcall_trace_depth
);
699 trace_hcall_exit(opcode
, retval
, retbuf
);
704 local_irq_restore(flags
);
710 * H_GET_MPP hcall returns info in 7 parms
712 int h_get_mpp(struct hvcall_mpp_data
*mpp_data
)
715 unsigned long retbuf
[PLPAR_HCALL9_BUFSIZE
];
717 rc
= plpar_hcall9(H_GET_MPP
, retbuf
);
719 mpp_data
->entitled_mem
= retbuf
[0];
720 mpp_data
->mapped_mem
= retbuf
[1];
722 mpp_data
->group_num
= (retbuf
[2] >> 2 * 8) & 0xffff;
723 mpp_data
->pool_num
= retbuf
[2] & 0xffff;
725 mpp_data
->mem_weight
= (retbuf
[3] >> 7 * 8) & 0xff;
726 mpp_data
->unallocated_mem_weight
= (retbuf
[3] >> 6 * 8) & 0xff;
727 mpp_data
->unallocated_entitlement
= retbuf
[3] & 0xffffffffffff;
729 mpp_data
->pool_size
= retbuf
[4];
730 mpp_data
->loan_request
= retbuf
[5];
731 mpp_data
->backing_mem
= retbuf
[6];
735 EXPORT_SYMBOL(h_get_mpp
);
737 int h_get_mpp_x(struct hvcall_mpp_x_data
*mpp_x_data
)
740 unsigned long retbuf
[PLPAR_HCALL9_BUFSIZE
] = { 0 };
742 rc
= plpar_hcall9(H_GET_MPP_X
, retbuf
);
744 mpp_x_data
->coalesced_bytes
= retbuf
[0];
745 mpp_x_data
->pool_coalesced_bytes
= retbuf
[1];
746 mpp_x_data
->pool_purr_cycles
= retbuf
[2];
747 mpp_x_data
->pool_spurr_cycles
= retbuf
[3];