/[linux-patches]/genpatches-2.6/trunk/2.6.14/1499_16.9_ix87-xcpu-leak.patch
Gentoo

Contents of /genpatches-2.6/trunk/2.6.14/1499_16.9_ix87-xcpu-leak.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 446 - (show annotations) (download) (as text)
Fri Apr 28 14:58:31 2006 UTC (14 years, 7 months ago) by johnm
File MIME type: text/x-diff
File size: 9241 byte(s)
fix build-issues with task thread_info changes on x86_64
1 --- arch/i386/kernel/cpu/amd.c 2006-04-20 21:01:05.000000000 +0000
2 +++ arch/i386/kernel/cpu/amd.c 2006-04-20 21:01:19.000000000 +0000
3 @@ -202,6 +202,8 @@ static void __init init_amd(struct cpuin
4 set_bit(X86_FEATURE_K7, c->x86_capability);
5 break;
6 }
7 + if (c->x86 >= 6)
8 + set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability);
9
10 display_cacheinfo(c);
11
12 --- arch/x86_64/kernel/process.c 2006-04-20 21:01:05.000000000 +0000
13 +++ arch/x86_64/kernel/process.c 2006-04-20 21:01:19.000000000 +0000
14 @@ -504,8 +504,6 @@ struct task_struct *__switch_to(struct t
15 int cpu = smp_processor_id();
16 struct tss_struct *tss = &per_cpu(init_tss, cpu);
17
18 - unlazy_fpu(prev_p);
19 -
20 /*
21 * Reload esp0, LDT and the page table pointer:
22 */
23 @@ -568,6 +566,12 @@ struct task_struct *__switch_to(struct t
24 prev->userrsp = read_pda(oldrsp);
25 write_pda(oldrsp, next->userrsp);
26 write_pda(pcurrent, next_p);
27 +
28 + /* This must be here to ensure both math_state_restore() and
29 + kernel_fpu_begin() work consistently.
30 + And the AMD workaround requires it to be after DS reload. */
31 + unlazy_fpu(prev_p);
32 +
33 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
34
35 /*
36 --- arch/x86_64/kernel/setup.c 2006-04-20 21:01:05.000000000 +0000
37 +++ arch/x86_64/kernel/setup.c 2006-04-20 21:01:19.000000000 +0000
38 @@ -862,6 +862,10 @@ static int __init init_amd(struct cpuinf
39 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
40 set_bit(X86_FEATURE_K8_C, &c->x86_capability);
41
42 + /* Enable workaround for FXSAVE leak */
43 + if (c->x86 >= 6)
44 + set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
45 +
46 r = get_model_name(c);
47 if (!r) {
48 switch (c->x86) {
49 --- include/asm-i386/cpufeature.h 2006-04-20 21:01:05.000000000 +0000
50 +++ include/asm-i386/cpufeature.h 2006-04-20 21:01:19.000000000 +0000
51 @@ -70,6 +70,7 @@
52 #define X86_FEATURE_P3 (3*32+ 6) /* P3 */
53 #define X86_FEATURE_P4 (3*32+ 7) /* P4 */
54
55 +#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
56 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
57 #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
58 #define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */
59 --- include/asm-i386/i387.h 2006-04-20 21:01:05.000000000 +0000
60 +++ include/asm-i386/i387.h 2006-04-20 21:01:19.000000000 +0000
61 @@ -13,6 +13,7 @@
62
63 #include <linux/sched.h>
64 #include <linux/init.h>
65 +#include <linux/kernel_stat.h>
66 #include <asm/processor.h>
67 #include <asm/sigcontext.h>
68 #include <asm/user.h>
69 @@ -38,17 +39,38 @@ extern void init_fpu(struct task_struct
70 extern void kernel_fpu_begin(void);
71 #define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
72
73 +/* We need a safe address that is cheap to find and that is already
74 + in L1 during context switch. The best choices are unfortunately
75 + different for UP and SMP */
76 +#ifdef CONFIG_SMP
77 +#define safe_address (__per_cpu_offset[0])
78 +#else
79 +#define safe_address (kstat_cpu(0).cpustat.user)
80 +#endif
81 +
82 /*
83 * These must be called with preempt disabled
84 */
85 static inline void __save_init_fpu( struct task_struct *tsk )
86 {
87 + /* Use more nops than strictly needed in case the compiler
88 + varies code */
89 alternative_input(
90 - "fnsave %1 ; fwait ;" GENERIC_NOP2,
91 - "fxsave %1 ; fnclex",
92 + "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
93 + "fxsave %[fx]\n"
94 + "bt $7,%[fsw] ; jc 1f ; fnclex\n1:",
95 X86_FEATURE_FXSR,
96 - "m" (tsk->thread.i387.fxsave)
97 - :"memory");
98 + [fx] "m" (tsk->thread.i387.fxsave),
99 + [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
100 + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
101 + is pending. Clear the x87 state here by setting it to fixed
102 + values. __per_cpu_offset[0] is a random variable that should be in L1 */
103 + alternative_input(
104 + GENERIC_NOP8 GENERIC_NOP2,
105 + "emms\n\t" /* clear stack tags */
106 + "fildl %[addr]", /* set F?P to defined value */
107 + X86_FEATURE_FXSAVE_LEAK,
108 + [addr] "m" (safe_address));
109 tsk->thread_info->status &= ~TS_USEDFPU;
110 }
111
112 --- include/asm-x86_64/cpufeature.h 2006-04-20 21:01:05.000000000 +0000
113 +++ include/asm-x86_64/cpufeature.h 2006-04-20 21:01:19.000000000 +0000
114 @@ -64,6 +64,7 @@
115 #define X86_FEATURE_K8_C (3*32+ 4) /* C stepping K8 */
116 #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
117
118 +#define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */
119 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
120 #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
121 #define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */
122 --- include/asm-x86_64/i387.h 2006-04-28 14:59:34.000000000 +0000
123 +++ include/asm-x86_64/i387.h 2006-04-28 15:01:37.000000000 +0000
124 @@ -46,7 +46,7 @@
125 }
126
127 #define clear_fpu(tsk) do { \
128 - if ((tsk)->thread_info->status & TS_USEDFPU) { \
129 + if ((tsk)->thread_info->status & TS_USEDFPU) { \
130 tolerant_fwait(); \
131 (tsk)->thread_info->status &= ~TS_USEDFPU; \
132 stts(); \
133 @@ -72,10 +72,28 @@
134 #define set_fpu_swd(t,val) ((t)->thread.i387.fxsave.swd = (val))
135 #define set_fpu_fxsr_twd(t,val) ((t)->thread.i387.fxsave.twd = (val))
136
137 +#define X87_FSW_ES (1 << 7) /* Exception Summary */
138 +
139 +/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
140 + is pending. Clear the x87 state here by setting it to fixed
141 + values. The kernel data segment can be sometimes 0 and sometimes
142 + new user value. Both should be ok.
143 + Use the PDA as safe address because it should be already in L1. */
144 +static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
145 +{
146 + if (unlikely(fx->swd & X87_FSW_ES))
147 + asm volatile("fnclex");
148 + alternative_input(ASM_NOP8 ASM_NOP2,
149 + " emms\n" /* clear stack tags */
150 + " fildl %%gs:0", /* load to clear state */
151 + X86_FEATURE_FXSAVE_LEAK);
152 +}
153 +
154 static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
155 {
156 int err;
157 - asm volatile("1: rex64 ; fxrstor (%[fx])\n\t"
158 +
159 + asm volatile("1: rex64/fxrstor (%[fx])\n\t"
160 "2:\n"
161 ".section .fixup,\"ax\"\n"
162 "3: movl $-1,%[err]\n"
163 @@ -86,7 +104,11 @@
164 " .quad 1b,3b\n"
165 ".previous"
166 : [err] "=r" (err)
167 - : [fx] "r" (fx), "0" (0));
168 +#if 0 /* See comment in __fxsave_clear() below. */
169 + : [fx] "r" (fx), "m" (*fx), "0" (0));
170 +#else
171 + : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
172 +#endif
173 if (unlikely(err))
174 init_fpu(current);
175 return err;
176 @@ -95,7 +117,8 @@
177 static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
178 {
179 int err;
180 - asm volatile("1: rex64 ; fxsave (%[fx])\n\t"
181 +
182 + asm volatile("1: rex64/fxsave (%[fx])\n\t"
183 "2:\n"
184 ".section .fixup,\"ax\"\n"
185 "3: movl $-1,%[err]\n"
186 @@ -105,20 +128,54 @@
187 " .align 8\n"
188 " .quad 1b,3b\n"
189 ".previous"
190 - : [err] "=r" (err)
191 - : [fx] "r" (fx), "0" (0));
192 + : [err] "=r" (err), "=m" (*fx)
193 +#if 0 /* See comment in __fxsave_clear() below. */
194 + : [fx] "r" (fx), "0" (0));
195 +#else
196 + : [fx] "cdaSDb" (fx), "0" (0));
197 +#endif
198 if (unlikely(err))
199 __clear_user(fx, sizeof(struct i387_fxsave_struct));
200 + /* No need to clear here because the caller clears USED_MATH */
201 return err;
202 }
203
204 +static inline void __fxsave_clear(struct task_struct *tsk)
205 +{
206 + /* Using "rex64; fxsave %0" is broken because, if the memory operand
207 + uses any extended registers for addressing, a second REX prefix
208 + will be generated (to the assembler, rex64 followed by semicolon
209 + is a separate instruction), and hence the 64-bitness is lost. */
210 +#if 0
211 + /* Using "fxsaveq %0" would be the ideal choice, but is only supported
212 + starting with gas 2.16. */
213 + __asm__ __volatile__("fxsaveq %0"
214 + : "=m" (tsk->thread.i387.fxsave));
215 +#elif 0
216 + /* Using, as a workaround, the properly prefixed form below isn't
217 + accepted by any binutils version so far released, complaining that
218 + the same type of prefix is used twice if an extended register is
219 + needed for addressing (fix submitted to mainline 2005-11-21). */
220 + __asm__ __volatile__("rex64/fxsave %0"
221 + : "=m" (tsk->thread.i387.fxsave));
222 +#else
223 + /* This, however, we can work around by forcing the compiler to select
224 + an addressing mode that doesn't require extended registers. */
225 + __asm__ __volatile__("rex64/fxsave %P2(%1)"
226 + : "=m" (tsk->thread.i387.fxsave)
227 + : "cdaSDb" (tsk),
228 + "i" (offsetof(__typeof__(*tsk),
229 + thread.i387.fxsave)));
230 +#endif
231 + clear_fpu_state(&tsk->thread.i387.fxsave);
232 +}
233 +
234 static inline void kernel_fpu_begin(void)
235 {
236 struct thread_info *me = current_thread_info();
237 preempt_disable();
238 - if (me->status & TS_USEDFPU) {
239 - asm volatile("rex64 ; fxsave %0 ; fnclex"
240 - : "=m" (me->task->thread.i387.fxsave));
241 + if (me->status & TS_USEDFPU) {
242 + __fxsave_clear(me->task);
243 me->status &= ~TS_USEDFPU;
244 return;
245 }
246 @@ -131,10 +188,9 @@
247 preempt_enable();
248 }
249
250 -static inline void save_init_fpu( struct task_struct *tsk )
251 +static inline void save_init_fpu(struct task_struct *tsk)
252 {
253 - asm volatile( "rex64 ; fxsave %0 ; fnclex"
254 - : "=m" (tsk->thread.i387.fxsave));
255 + __fxsave_clear(tsk);
256 tsk->thread_info->status &= ~TS_USEDFPU;
257 stts();
258 }

  ViewVC Help
Powered by ViewVC 1.1.20