Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
038b0a6d | 2 | |
8d379dad JB |
3 | #include <linux/linkage.h> |
4 | #include <asm/dwarf2.h> | |
090a3f61 | 5 | #include <asm/cpufeature.h> |
59e97e4d | 6 | #include <asm/alternative-asm.h> |
8d379dad | 7 | |
090a3f61 BP |
8 | /* |
9 | * Some CPUs run faster using the string copy instructions (sane microcode). | |
10 | * It is also a lot simpler. Use this when possible. But, don't use streaming | |
11 | * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the | |
12 | * prefetch distance based on SMP/UP. | |
13 | */ | |
8d379dad | 14 | ALIGN |
090a3f61 | 15 | ENTRY(copy_page) |
8d379dad | 16 | CFI_STARTPROC |
090a3f61 | 17 | ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD |
269833bd ML |
18 | movl $4096/8, %ecx |
19 | rep movsq | |
8d379dad JB |
20 | ret |
21 | CFI_ENDPROC | |
090a3f61 | 22 | ENDPROC(copy_page) |
1da177e4 | 23 | |
090a3f61 | 24 | ENTRY(copy_page_regs) |
8d379dad | 25 | CFI_STARTPROC |
269833bd | 26 | subq $2*8, %rsp |
42693290 | 27 | CFI_ADJUST_CFA_OFFSET 2*8 |
269833bd | 28 | movq %rbx, (%rsp) |
8d379dad | 29 | CFI_REL_OFFSET rbx, 0 |
269833bd | 30 | movq %r12, 1*8(%rsp) |
8d379dad | 31 | CFI_REL_OFFSET r12, 1*8 |
7bcd3f34 | 32 | |
269833bd | 33 | movl $(4096/64)-5, %ecx |
7bcd3f34 AK |
34 | .p2align 4 |
35 | .Loop64: | |
269833bd ML |
36 | dec %rcx |
37 | movq 0x8*0(%rsi), %rax | |
38 | movq 0x8*1(%rsi), %rbx | |
39 | movq 0x8*2(%rsi), %rdx | |
40 | movq 0x8*3(%rsi), %r8 | |
41 | movq 0x8*4(%rsi), %r9 | |
42 | movq 0x8*5(%rsi), %r10 | |
43 | movq 0x8*6(%rsi), %r11 | |
44 | movq 0x8*7(%rsi), %r12 | |
7bcd3f34 AK |
45 | |
46 | prefetcht0 5*64(%rsi) | |
47 | ||
269833bd ML |
48 | movq %rax, 0x8*0(%rdi) |
49 | movq %rbx, 0x8*1(%rdi) | |
50 | movq %rdx, 0x8*2(%rdi) | |
51 | movq %r8, 0x8*3(%rdi) | |
52 | movq %r9, 0x8*4(%rdi) | |
53 | movq %r10, 0x8*5(%rdi) | |
54 | movq %r11, 0x8*6(%rdi) | |
55 | movq %r12, 0x8*7(%rdi) | |
7bcd3f34 | 56 | |
269833bd ML |
57 | leaq 64 (%rsi), %rsi |
58 | leaq 64 (%rdi), %rdi | |
7bcd3f34 | 59 | |
269833bd | 60 | jnz .Loop64 |
7bcd3f34 | 61 | |
269833bd | 62 | movl $5, %ecx |
7bcd3f34 AK |
63 | .p2align 4 |
64 | .Loop2: | |
269833bd ML |
65 | decl %ecx |
66 | ||
67 | movq 0x8*0(%rsi), %rax | |
68 | movq 0x8*1(%rsi), %rbx | |
69 | movq 0x8*2(%rsi), %rdx | |
70 | movq 0x8*3(%rsi), %r8 | |
71 | movq 0x8*4(%rsi), %r9 | |
72 | movq 0x8*5(%rsi), %r10 | |
73 | movq 0x8*6(%rsi), %r11 | |
74 | movq 0x8*7(%rsi), %r12 | |
75 | ||
76 | movq %rax, 0x8*0(%rdi) | |
77 | movq %rbx, 0x8*1(%rdi) | |
78 | movq %rdx, 0x8*2(%rdi) | |
79 | movq %r8, 0x8*3(%rdi) | |
80 | movq %r9, 0x8*4(%rdi) | |
81 | movq %r10, 0x8*5(%rdi) | |
82 | movq %r11, 0x8*6(%rdi) | |
83 | movq %r12, 0x8*7(%rdi) | |
84 | ||
85 | leaq 64(%rdi), %rdi | |
86 | leaq 64(%rsi), %rsi | |
7bcd3f34 AK |
87 | jnz .Loop2 |
88 | ||
269833bd | 89 | movq (%rsp), %rbx |
8d379dad | 90 | CFI_RESTORE rbx |
269833bd | 91 | movq 1*8(%rsp), %r12 |
8d379dad | 92 | CFI_RESTORE r12 |
269833bd | 93 | addq $2*8, %rsp |
42693290 | 94 | CFI_ADJUST_CFA_OFFSET -2*8 |
7bcd3f34 | 95 | ret |
8d379dad | 96 | CFI_ENDPROC |
090a3f61 | 97 | ENDPROC(copy_page_regs) |