Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (C) 1999-2002 Hewlett-Packard Co | |
3 | * Stephane Eranian <eranian@hpl.hp.com> | |
4 | * David Mosberger-Tang <davidm@hpl.hp.com> | |
5 | * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> | |
6 | * | |
7 | * 1/06/01 davidm Tuned for Itanium. | |
8 | * 2/12/02 kchen Tuned for both Itanium and McKinley | |
9 | * 3/08/02 davidm Some more tweaking | |
10 | */ | |
1da177e4 LT |
11 | |
12 | #include <asm/asmmacro.h> | |
13 | #include <asm/page.h> | |
e007c533 | 14 | #include <asm/export.h> |
1da177e4 LT |
15 | |
16 | #ifdef CONFIG_ITANIUM | |
17 | # define L3_LINE_SIZE 64 // Itanium L3 line size | |
18 | # define PREFETCH_LINES 9 // magic number | |
19 | #else | |
20 | # define L3_LINE_SIZE 128 // McKinley L3 line size | |
21 | # define PREFETCH_LINES 12 // magic number | |
22 | #endif | |
23 | ||
24 | #define saved_lc r2 | |
25 | #define dst_fetch r3 | |
26 | #define dst1 r8 | |
27 | #define dst2 r9 | |
28 | #define dst3 r10 | |
29 | #define dst4 r11 | |
30 | ||
31 | #define dst_last r31 | |
32 | ||
33 | GLOBAL_ENTRY(clear_page) | |
34 | .prologue | |
35 | .regstk 1,0,0,0 | |
36 | mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until | |
37 | .save ar.lc, saved_lc | |
38 | mov saved_lc = ar.lc | |
39 | ||
40 | .body | |
41 | mov ar.lc = (PREFETCH_LINES - 1) | |
42 | mov dst_fetch = in0 | |
43 | adds dst1 = 16, in0 | |
44 | adds dst2 = 32, in0 | |
45 | ;; | |
46 | .fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE | |
47 | adds dst3 = 48, in0 // executing this multiple times is harmless | |
48 | br.cloop.sptk.few .fetch | |
49 | ;; | |
50 | addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch | |
51 | mov ar.lc = r16 // one L3 line per iteration | |
52 | adds dst4 = 64, in0 | |
53 | ;; | |
54 | #ifdef CONFIG_ITANIUM | |
55 | // Optimized for Itanium | |
56 | 1: stf.spill.nta [dst1] = f0, 64 | |
57 | stf.spill.nta [dst2] = f0, 64 | |
58 | cmp.lt p8,p0=dst_fetch, dst_last | |
59 | ;; | |
60 | #else | |
61 | // Optimized for McKinley | |
62 | 1: stf.spill.nta [dst1] = f0, 64 | |
63 | stf.spill.nta [dst2] = f0, 64 | |
64 | stf.spill.nta [dst3] = f0, 64 | |
65 | stf.spill.nta [dst4] = f0, 128 | |
66 | cmp.lt p8,p0=dst_fetch, dst_last | |
67 | ;; | |
68 | stf.spill.nta [dst1] = f0, 64 | |
69 | stf.spill.nta [dst2] = f0, 64 | |
70 | #endif | |
71 | stf.spill.nta [dst3] = f0, 64 | |
72 | (p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE | |
73 | br.cloop.sptk.few 1b | |
74 | ;; | |
75 | mov ar.lc = saved_lc // restore lc | |
76 | br.ret.sptk.many rp | |
77 | END(clear_page) | |
e007c533 | 78 | EXPORT_SYMBOL(clear_page) |