ia64/xen-unstable

view xen/arch/ia64/linux-xen/head.S @ 5987:5f1ed597f107

Ensure percpu data area not used before the TR is set.
author fred@xuni-t01.sc.intel.com
date Tue Aug 23 18:43:18 2005 -0800 (2005-08-23)
parents 97675c2dbb40
children 9312a3e8a6f8
line source
1 /*
2 * Here is where the ball gets rolling as far as the kernel is concerned.
3 * When control is transferred to _start, the bootload has already
4 * loaded us to the correct address. All that's left to do here is
5 * to set up the kernel's global pointer and jump to the kernel
6 * entry point.
7 *
8 * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Stephane Eranian <eranian@hpl.hp.com>
11 * Copyright (C) 1999 VA Linux Systems
12 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
13 * Copyright (C) 1999 Intel Corp.
14 * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
15 * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
16 * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
17 * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
18 */
20 #include <linux/config.h>
22 #include <asm/asmmacro.h>
23 #include <asm/fpu.h>
24 #include <asm/kregs.h>
25 #include <asm/mmu_context.h>
26 #include <asm/offsets.h>
27 #include <asm/pal.h>
28 #include <asm/pgtable.h>
29 #include <asm/processor.h>
30 #include <asm/ptrace.h>
31 #include <asm/system.h>
33 .section __special_page_section,"ax"
35 .global empty_zero_page
36 empty_zero_page:
37 .skip PAGE_SIZE
39 .global swapper_pg_dir
40 swapper_pg_dir:
41 .skip PAGE_SIZE
43 .rodata
44 halt_msg:
45 stringz "Halting kernel\n"
47 .text
49 .global start_ap
51 /*
52 * Start the kernel. When the bootloader passes control to _start(), r28
53 * points to the address of the boot parameter area. Execution reaches
54 * here in physical mode.
55 */
56 GLOBAL_ENTRY(_start)
57 start_ap:
58 .prologue
59 .save rp, r0 // terminate unwind chain with a NULL rp
60 .body
62 rsm psr.i | psr.ic
63 ;;
64 srlz.i
65 ;;
66 /*
67 * Initialize kernel region registers:
68 * rr[0]: VHPT enabled, page size = PAGE_SHIFT
69 * rr[1]: VHPT enabled, page size = PAGE_SHIFT
70 * rr[2]: VHPT enabled, page size = PAGE_SHIFT
71 * rr[3]: VHPT enabled, page size = PAGE_SHIFT
72 * rr[4]: VHPT enabled, page size = PAGE_SHIFT
73 * rr[5]: VHPT enabled, page size = PAGE_SHIFT
74 * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
75 * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
76 * We initialize all of them to prevent inadvertently assuming
77 * something about the state of address translation early in boot.
78 */
79 movl r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
80 movl r7=(0<<61)
81 movl r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
82 movl r9=(1<<61)
83 movl r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
84 movl r11=(2<<61)
85 movl r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
86 movl r13=(3<<61)
87 movl r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
88 movl r15=(4<<61)
89 movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
90 movl r17=(5<<61)
91 movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
92 movl r19=(6<<61)
93 movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
94 movl r21=(7<<61)
95 ;;
96 mov rr[r7]=r6
97 mov rr[r9]=r8
98 mov rr[r11]=r10
99 mov rr[r13]=r12
100 mov rr[r15]=r14
101 mov rr[r17]=r16
102 mov rr[r19]=r18
103 mov rr[r21]=r20
104 ;;
105 /*
106 * Now pin mappings into the TLB for kernel text and data
107 */
108 mov r18=KERNEL_TR_PAGE_SHIFT<<2
109 movl r17=KERNEL_START
110 ;;
111 mov cr.itir=r18
112 mov cr.ifa=r17
113 mov r16=IA64_TR_KERNEL
114 mov r3=ip
115 movl r18=PAGE_KERNEL
116 ;;
117 dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
118 ;;
119 or r18=r2,r18
120 ;;
121 srlz.i
122 ;;
123 itr.i itr[r16]=r18
124 ;;
125 itr.d dtr[r16]=r18
126 ;;
127 srlz.i
129 /*
130 * Switch into virtual mode:
131 */
132 #ifdef CONFIG_VTI
133 movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH \
134 |IA64_PSR_DI)
135 #else // CONFIG_VTI
136 movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
137 |IA64_PSR_DI)
138 #endif // CONFIG_VTI
139 ;;
140 mov cr.ipsr=r16
141 movl r17=1f
142 ;;
143 mov cr.iip=r17
144 mov cr.ifs=r0
145 ;;
146 rfi
147 ;;
148 1: // now we are in virtual mode
150 // set IVT entry point---can't access I/O ports without it
151 #ifdef CONFIG_VTI
152 movl r3=vmx_ia64_ivt
153 #else // CONFIG_VTI
154 movl r3=ia64_ivt
155 #endif // CONFIG_VTI
156 ;;
157 mov cr.iva=r3
158 movl r2=FPSR_DEFAULT
159 ;;
160 srlz.i
161 movl gp=__gp
163 mov ar.fpsr=r2
164 ;;
166 #define isAP p2 // are we an Application Processor?
167 #define isBP p3 // are we the Bootstrap Processor?
169 #ifdef CONFIG_SMP
170 /*
171 * Find the init_task for the currently booting CPU. At poweron, and in
172 * UP mode, task_for_booting_cpu is NULL.
173 */
174 movl r3=task_for_booting_cpu
175 ;;
176 ld8 r3=[r3]
177 movl r2=init_task
178 ;;
179 cmp.eq isBP,isAP=r3,r0
180 ;;
181 (isAP) mov r2=r3
182 #else
183 movl r2=init_task
184 cmp.eq isBP,isAP=r0,r0
185 #endif
186 ;;
187 tpa r3=r2 // r3 == phys addr of task struct
188 mov r16=-1
189 (isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
191 // load mapping for stack (virtaddr in r2, physaddr in r3)
192 rsm psr.ic
193 movl r17=PAGE_KERNEL
194 ;;
195 srlz.d
196 dep r18=0,r3,0,12
197 ;;
198 or r18=r17,r18
199 #ifdef XEN
200 dep r2=-1,r3,60,4 // IMVA of task
201 #else
202 dep r2=-1,r3,61,3 // IMVA of task
203 #endif
204 ;;
205 mov r17=rr[r2]
206 shr.u r16=r3,IA64_GRANULE_SHIFT
207 ;;
208 dep r17=0,r17,8,24
209 ;;
210 mov cr.itir=r17
211 mov cr.ifa=r2
213 mov r19=IA64_TR_CURRENT_STACK
214 ;;
215 itr.d dtr[r19]=r18
216 ;;
217 ssm psr.ic
218 srlz.d
219 ;;
221 .load_current:
222 // load the "current" pointer (r13) and ar.k6 with the current task
223 #ifdef CONFIG_VTI
224 mov r21=r2 // virtual address
225 ;;
226 bsw.1
227 ;;
228 #else // CONFIG_VTI
229 mov IA64_KR(CURRENT)=r2
230 mov IA64_KR(CURRENT_STACK)=r16
231 #endif // CONFIG_VTI
232 mov r13=r2
233 /*
234 * Reserve space at the top of the stack for "struct pt_regs". Kernel threads
235 * don't store interesting values in that structure, but the space still needs
236 * to be there because time-critical stuff such as the context switching can
237 * be implemented more efficiently (for example, __switch_to()
238 * always sets the psr.dfh bit of the task it is switching to).
239 */
240 addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
241 addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE
242 mov ar.rsc=0 // place RSE in enforced lazy mode
243 ;;
244 loadrs // clear the dirty partition
245 ;;
246 mov ar.bspstore=r2 // establish the new RSE stack
247 ;;
248 mov ar.rsc=0x3 // place RSE in eager mode
250 #ifdef XEN
251 (isBP) dep r28=-1,r28,60,4 // make address virtual
252 #else
253 (isBP) dep r28=-1,r28,61,3 // make address virtual
254 #endif
255 (isBP) movl r2=ia64_boot_param
256 ;;
257 (isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader
259 #ifdef CONFIG_SMP
260 (isAP) br.call.sptk.many rp=start_secondary
261 .ret0:
262 (isAP) br.cond.sptk self
263 #endif
265 // This is executed by the bootstrap processor (bsp) only:
267 #ifdef CONFIG_IA64_FW_EMU
268 // initialize PAL & SAL emulator:
269 br.call.sptk.many rp=sys_fw_init
270 .ret1:
271 #endif
272 br.call.sptk.many rp=start_kernel
273 .ret2: addl r3=@ltoff(halt_msg),gp
274 ;;
275 alloc r2=ar.pfs,8,0,2,0
276 ;;
277 ld8 out0=[r3]
278 br.call.sptk.many b0=console_print
280 self: hint @pause
281 ;;
282 br.sptk.many self // endless loop
283 ;;
284 END(_start)
286 GLOBAL_ENTRY(ia64_save_debug_regs)
287 alloc r16=ar.pfs,1,0,0,0
288 mov r20=ar.lc // preserve ar.lc
289 mov ar.lc=IA64_NUM_DBG_REGS-1
290 mov r18=0
291 add r19=IA64_NUM_DBG_REGS*8,in0
292 ;;
293 1: mov r16=dbr[r18]
294 #ifdef CONFIG_ITANIUM
295 ;;
296 srlz.d
297 #endif
298 mov r17=ibr[r18]
299 add r18=1,r18
300 ;;
301 st8.nta [in0]=r16,8
302 st8.nta [r19]=r17,8
303 br.cloop.sptk.many 1b
304 ;;
305 mov ar.lc=r20 // restore ar.lc
306 br.ret.sptk.many rp
307 END(ia64_save_debug_regs)
309 GLOBAL_ENTRY(ia64_load_debug_regs)
310 alloc r16=ar.pfs,1,0,0,0
311 lfetch.nta [in0]
312 mov r20=ar.lc // preserve ar.lc
313 add r19=IA64_NUM_DBG_REGS*8,in0
314 mov ar.lc=IA64_NUM_DBG_REGS-1
315 mov r18=-1
316 ;;
317 1: ld8.nta r16=[in0],8
318 ld8.nta r17=[r19],8
319 add r18=1,r18
320 ;;
321 mov dbr[r18]=r16
322 #ifdef CONFIG_ITANIUM
323 ;;
324 srlz.d // Errata 132 (NoFix status)
325 #endif
326 mov ibr[r18]=r17
327 br.cloop.sptk.many 1b
328 ;;
329 mov ar.lc=r20 // restore ar.lc
330 br.ret.sptk.many rp
331 END(ia64_load_debug_regs)
333 GLOBAL_ENTRY(__ia64_save_fpu)
334 alloc r2=ar.pfs,1,4,0,0
335 adds loc0=96*16-16,in0
336 adds loc1=96*16-16-128,in0
337 ;;
338 stf.spill.nta [loc0]=f127,-256
339 stf.spill.nta [loc1]=f119,-256
340 ;;
341 stf.spill.nta [loc0]=f111,-256
342 stf.spill.nta [loc1]=f103,-256
343 ;;
344 stf.spill.nta [loc0]=f95,-256
345 stf.spill.nta [loc1]=f87,-256
346 ;;
347 stf.spill.nta [loc0]=f79,-256
348 stf.spill.nta [loc1]=f71,-256
349 ;;
350 stf.spill.nta [loc0]=f63,-256
351 stf.spill.nta [loc1]=f55,-256
352 adds loc2=96*16-32,in0
353 ;;
354 stf.spill.nta [loc0]=f47,-256
355 stf.spill.nta [loc1]=f39,-256
356 adds loc3=96*16-32-128,in0
357 ;;
358 stf.spill.nta [loc2]=f126,-256
359 stf.spill.nta [loc3]=f118,-256
360 ;;
361 stf.spill.nta [loc2]=f110,-256
362 stf.spill.nta [loc3]=f102,-256
363 ;;
364 stf.spill.nta [loc2]=f94,-256
365 stf.spill.nta [loc3]=f86,-256
366 ;;
367 stf.spill.nta [loc2]=f78,-256
368 stf.spill.nta [loc3]=f70,-256
369 ;;
370 stf.spill.nta [loc2]=f62,-256
371 stf.spill.nta [loc3]=f54,-256
372 adds loc0=96*16-48,in0
373 ;;
374 stf.spill.nta [loc2]=f46,-256
375 stf.spill.nta [loc3]=f38,-256
376 adds loc1=96*16-48-128,in0
377 ;;
378 stf.spill.nta [loc0]=f125,-256
379 stf.spill.nta [loc1]=f117,-256
380 ;;
381 stf.spill.nta [loc0]=f109,-256
382 stf.spill.nta [loc1]=f101,-256
383 ;;
384 stf.spill.nta [loc0]=f93,-256
385 stf.spill.nta [loc1]=f85,-256
386 ;;
387 stf.spill.nta [loc0]=f77,-256
388 stf.spill.nta [loc1]=f69,-256
389 ;;
390 stf.spill.nta [loc0]=f61,-256
391 stf.spill.nta [loc1]=f53,-256
392 adds loc2=96*16-64,in0
393 ;;
394 stf.spill.nta [loc0]=f45,-256
395 stf.spill.nta [loc1]=f37,-256
396 adds loc3=96*16-64-128,in0
397 ;;
398 stf.spill.nta [loc2]=f124,-256
399 stf.spill.nta [loc3]=f116,-256
400 ;;
401 stf.spill.nta [loc2]=f108,-256
402 stf.spill.nta [loc3]=f100,-256
403 ;;
404 stf.spill.nta [loc2]=f92,-256
405 stf.spill.nta [loc3]=f84,-256
406 ;;
407 stf.spill.nta [loc2]=f76,-256
408 stf.spill.nta [loc3]=f68,-256
409 ;;
410 stf.spill.nta [loc2]=f60,-256
411 stf.spill.nta [loc3]=f52,-256
412 adds loc0=96*16-80,in0
413 ;;
414 stf.spill.nta [loc2]=f44,-256
415 stf.spill.nta [loc3]=f36,-256
416 adds loc1=96*16-80-128,in0
417 ;;
418 stf.spill.nta [loc0]=f123,-256
419 stf.spill.nta [loc1]=f115,-256
420 ;;
421 stf.spill.nta [loc0]=f107,-256
422 stf.spill.nta [loc1]=f99,-256
423 ;;
424 stf.spill.nta [loc0]=f91,-256
425 stf.spill.nta [loc1]=f83,-256
426 ;;
427 stf.spill.nta [loc0]=f75,-256
428 stf.spill.nta [loc1]=f67,-256
429 ;;
430 stf.spill.nta [loc0]=f59,-256
431 stf.spill.nta [loc1]=f51,-256
432 adds loc2=96*16-96,in0
433 ;;
434 stf.spill.nta [loc0]=f43,-256
435 stf.spill.nta [loc1]=f35,-256
436 adds loc3=96*16-96-128,in0
437 ;;
438 stf.spill.nta [loc2]=f122,-256
439 stf.spill.nta [loc3]=f114,-256
440 ;;
441 stf.spill.nta [loc2]=f106,-256
442 stf.spill.nta [loc3]=f98,-256
443 ;;
444 stf.spill.nta [loc2]=f90,-256
445 stf.spill.nta [loc3]=f82,-256
446 ;;
447 stf.spill.nta [loc2]=f74,-256
448 stf.spill.nta [loc3]=f66,-256
449 ;;
450 stf.spill.nta [loc2]=f58,-256
451 stf.spill.nta [loc3]=f50,-256
452 adds loc0=96*16-112,in0
453 ;;
454 stf.spill.nta [loc2]=f42,-256
455 stf.spill.nta [loc3]=f34,-256
456 adds loc1=96*16-112-128,in0
457 ;;
458 stf.spill.nta [loc0]=f121,-256
459 stf.spill.nta [loc1]=f113,-256
460 ;;
461 stf.spill.nta [loc0]=f105,-256
462 stf.spill.nta [loc1]=f97,-256
463 ;;
464 stf.spill.nta [loc0]=f89,-256
465 stf.spill.nta [loc1]=f81,-256
466 ;;
467 stf.spill.nta [loc0]=f73,-256
468 stf.spill.nta [loc1]=f65,-256
469 ;;
470 stf.spill.nta [loc0]=f57,-256
471 stf.spill.nta [loc1]=f49,-256
472 adds loc2=96*16-128,in0
473 ;;
474 stf.spill.nta [loc0]=f41,-256
475 stf.spill.nta [loc1]=f33,-256
476 adds loc3=96*16-128-128,in0
477 ;;
478 stf.spill.nta [loc2]=f120,-256
479 stf.spill.nta [loc3]=f112,-256
480 ;;
481 stf.spill.nta [loc2]=f104,-256
482 stf.spill.nta [loc3]=f96,-256
483 ;;
484 stf.spill.nta [loc2]=f88,-256
485 stf.spill.nta [loc3]=f80,-256
486 ;;
487 stf.spill.nta [loc2]=f72,-256
488 stf.spill.nta [loc3]=f64,-256
489 ;;
490 stf.spill.nta [loc2]=f56,-256
491 stf.spill.nta [loc3]=f48,-256
492 ;;
493 stf.spill.nta [loc2]=f40
494 stf.spill.nta [loc3]=f32
495 br.ret.sptk.many rp
496 END(__ia64_save_fpu)
498 GLOBAL_ENTRY(__ia64_load_fpu)
499 alloc r2=ar.pfs,1,2,0,0
500 adds r3=128,in0
501 adds r14=256,in0
502 adds r15=384,in0
503 mov loc0=512
504 mov loc1=-1024+16
505 ;;
506 ldf.fill.nta f32=[in0],loc0
507 ldf.fill.nta f40=[ r3],loc0
508 ldf.fill.nta f48=[r14],loc0
509 ldf.fill.nta f56=[r15],loc0
510 ;;
511 ldf.fill.nta f64=[in0],loc0
512 ldf.fill.nta f72=[ r3],loc0
513 ldf.fill.nta f80=[r14],loc0
514 ldf.fill.nta f88=[r15],loc0
515 ;;
516 ldf.fill.nta f96=[in0],loc1
517 ldf.fill.nta f104=[ r3],loc1
518 ldf.fill.nta f112=[r14],loc1
519 ldf.fill.nta f120=[r15],loc1
520 ;;
521 ldf.fill.nta f33=[in0],loc0
522 ldf.fill.nta f41=[ r3],loc0
523 ldf.fill.nta f49=[r14],loc0
524 ldf.fill.nta f57=[r15],loc0
525 ;;
526 ldf.fill.nta f65=[in0],loc0
527 ldf.fill.nta f73=[ r3],loc0
528 ldf.fill.nta f81=[r14],loc0
529 ldf.fill.nta f89=[r15],loc0
530 ;;
531 ldf.fill.nta f97=[in0],loc1
532 ldf.fill.nta f105=[ r3],loc1
533 ldf.fill.nta f113=[r14],loc1
534 ldf.fill.nta f121=[r15],loc1
535 ;;
536 ldf.fill.nta f34=[in0],loc0
537 ldf.fill.nta f42=[ r3],loc0
538 ldf.fill.nta f50=[r14],loc0
539 ldf.fill.nta f58=[r15],loc0
540 ;;
541 ldf.fill.nta f66=[in0],loc0
542 ldf.fill.nta f74=[ r3],loc0
543 ldf.fill.nta f82=[r14],loc0
544 ldf.fill.nta f90=[r15],loc0
545 ;;
546 ldf.fill.nta f98=[in0],loc1
547 ldf.fill.nta f106=[ r3],loc1
548 ldf.fill.nta f114=[r14],loc1
549 ldf.fill.nta f122=[r15],loc1
550 ;;
551 ldf.fill.nta f35=[in0],loc0
552 ldf.fill.nta f43=[ r3],loc0
553 ldf.fill.nta f51=[r14],loc0
554 ldf.fill.nta f59=[r15],loc0
555 ;;
556 ldf.fill.nta f67=[in0],loc0
557 ldf.fill.nta f75=[ r3],loc0
558 ldf.fill.nta f83=[r14],loc0
559 ldf.fill.nta f91=[r15],loc0
560 ;;
561 ldf.fill.nta f99=[in0],loc1
562 ldf.fill.nta f107=[ r3],loc1
563 ldf.fill.nta f115=[r14],loc1
564 ldf.fill.nta f123=[r15],loc1
565 ;;
566 ldf.fill.nta f36=[in0],loc0
567 ldf.fill.nta f44=[ r3],loc0
568 ldf.fill.nta f52=[r14],loc0
569 ldf.fill.nta f60=[r15],loc0
570 ;;
571 ldf.fill.nta f68=[in0],loc0
572 ldf.fill.nta f76=[ r3],loc0
573 ldf.fill.nta f84=[r14],loc0
574 ldf.fill.nta f92=[r15],loc0
575 ;;
576 ldf.fill.nta f100=[in0],loc1
577 ldf.fill.nta f108=[ r3],loc1
578 ldf.fill.nta f116=[r14],loc1
579 ldf.fill.nta f124=[r15],loc1
580 ;;
581 ldf.fill.nta f37=[in0],loc0
582 ldf.fill.nta f45=[ r3],loc0
583 ldf.fill.nta f53=[r14],loc0
584 ldf.fill.nta f61=[r15],loc0
585 ;;
586 ldf.fill.nta f69=[in0],loc0
587 ldf.fill.nta f77=[ r3],loc0
588 ldf.fill.nta f85=[r14],loc0
589 ldf.fill.nta f93=[r15],loc0
590 ;;
591 ldf.fill.nta f101=[in0],loc1
592 ldf.fill.nta f109=[ r3],loc1
593 ldf.fill.nta f117=[r14],loc1
594 ldf.fill.nta f125=[r15],loc1
595 ;;
596 ldf.fill.nta f38 =[in0],loc0
597 ldf.fill.nta f46 =[ r3],loc0
598 ldf.fill.nta f54 =[r14],loc0
599 ldf.fill.nta f62 =[r15],loc0
600 ;;
601 ldf.fill.nta f70 =[in0],loc0
602 ldf.fill.nta f78 =[ r3],loc0
603 ldf.fill.nta f86 =[r14],loc0
604 ldf.fill.nta f94 =[r15],loc0
605 ;;
606 ldf.fill.nta f102=[in0],loc1
607 ldf.fill.nta f110=[ r3],loc1
608 ldf.fill.nta f118=[r14],loc1
609 ldf.fill.nta f126=[r15],loc1
610 ;;
611 ldf.fill.nta f39 =[in0],loc0
612 ldf.fill.nta f47 =[ r3],loc0
613 ldf.fill.nta f55 =[r14],loc0
614 ldf.fill.nta f63 =[r15],loc0
615 ;;
616 ldf.fill.nta f71 =[in0],loc0
617 ldf.fill.nta f79 =[ r3],loc0
618 ldf.fill.nta f87 =[r14],loc0
619 ldf.fill.nta f95 =[r15],loc0
620 ;;
621 ldf.fill.nta f103=[in0]
622 ldf.fill.nta f111=[ r3]
623 ldf.fill.nta f119=[r14]
624 ldf.fill.nta f127=[r15]
625 br.ret.sptk.many rp
626 END(__ia64_load_fpu)
628 GLOBAL_ENTRY(__ia64_init_fpu)
629 stf.spill [sp]=f0 // M3
630 mov f32=f0 // F
631 nop.b 0
633 ldfps f33,f34=[sp] // M0
634 ldfps f35,f36=[sp] // M1
635 mov f37=f0 // F
636 ;;
638 setf.s f38=r0 // M2
639 setf.s f39=r0 // M3
640 mov f40=f0 // F
642 ldfps f41,f42=[sp] // M0
643 ldfps f43,f44=[sp] // M1
644 mov f45=f0 // F
646 setf.s f46=r0 // M2
647 setf.s f47=r0 // M3
648 mov f48=f0 // F
650 ldfps f49,f50=[sp] // M0
651 ldfps f51,f52=[sp] // M1
652 mov f53=f0 // F
654 setf.s f54=r0 // M2
655 setf.s f55=r0 // M3
656 mov f56=f0 // F
658 ldfps f57,f58=[sp] // M0
659 ldfps f59,f60=[sp] // M1
660 mov f61=f0 // F
662 setf.s f62=r0 // M2
663 setf.s f63=r0 // M3
664 mov f64=f0 // F
666 ldfps f65,f66=[sp] // M0
667 ldfps f67,f68=[sp] // M1
668 mov f69=f0 // F
670 setf.s f70=r0 // M2
671 setf.s f71=r0 // M3
672 mov f72=f0 // F
674 ldfps f73,f74=[sp] // M0
675 ldfps f75,f76=[sp] // M1
676 mov f77=f0 // F
678 setf.s f78=r0 // M2
679 setf.s f79=r0 // M3
680 mov f80=f0 // F
682 ldfps f81,f82=[sp] // M0
683 ldfps f83,f84=[sp] // M1
684 mov f85=f0 // F
686 setf.s f86=r0 // M2
687 setf.s f87=r0 // M3
688 mov f88=f0 // F
690 /*
691 * When the instructions are cached, it would be faster to initialize
692 * the remaining registers with simply mov instructions (F-unit).
693 * This gets the time down to ~29 cycles. However, this would use up
694 * 33 bundles, whereas continuing with the above pattern yields
695 * 10 bundles and ~30 cycles.
696 */
698 ldfps f89,f90=[sp] // M0
699 ldfps f91,f92=[sp] // M1
700 mov f93=f0 // F
702 setf.s f94=r0 // M2
703 setf.s f95=r0 // M3
704 mov f96=f0 // F
706 ldfps f97,f98=[sp] // M0
707 ldfps f99,f100=[sp] // M1
708 mov f101=f0 // F
710 setf.s f102=r0 // M2
711 setf.s f103=r0 // M3
712 mov f104=f0 // F
714 ldfps f105,f106=[sp] // M0
715 ldfps f107,f108=[sp] // M1
716 mov f109=f0 // F
718 setf.s f110=r0 // M2
719 setf.s f111=r0 // M3
720 mov f112=f0 // F
722 ldfps f113,f114=[sp] // M0
723 ldfps f115,f116=[sp] // M1
724 mov f117=f0 // F
726 setf.s f118=r0 // M2
727 setf.s f119=r0 // M3
728 mov f120=f0 // F
730 ldfps f121,f122=[sp] // M0
731 ldfps f123,f124=[sp] // M1
732 mov f125=f0 // F
734 setf.s f126=r0 // M2
735 setf.s f127=r0 // M3
736 br.ret.sptk.many rp // F
737 END(__ia64_init_fpu)
739 /*
740 * Switch execution mode from virtual to physical
741 *
742 * Inputs:
743 * r16 = new psr to establish
744 * Output:
745 * r19 = old virtual address of ar.bsp
746 * r20 = old virtual address of sp
747 *
748 * Note: RSE must already be in enforced lazy mode
749 */
750 GLOBAL_ENTRY(ia64_switch_mode_phys)
751 {
752 alloc r2=ar.pfs,0,0,0,0
753 rsm psr.i | psr.ic // disable interrupts and interrupt collection
754 mov r15=ip
755 }
756 ;;
757 {
758 flushrs // must be first insn in group
759 srlz.i
760 }
761 ;;
762 mov cr.ipsr=r16 // set new PSR
763 add r3=1f-ia64_switch_mode_phys,r15
765 mov r19=ar.bsp
766 mov r20=sp
767 mov r14=rp // get return address into a general register
768 ;;
770 // going to physical mode, use tpa to translate virt->phys
771 tpa r17=r19
772 tpa r3=r3
773 tpa sp=sp
774 tpa r14=r14
775 ;;
777 mov r18=ar.rnat // save ar.rnat
778 mov ar.bspstore=r17 // this steps on ar.rnat
779 mov cr.iip=r3
780 mov cr.ifs=r0
781 ;;
782 mov ar.rnat=r18 // restore ar.rnat
783 rfi // must be last insn in group
784 ;;
785 1: mov rp=r14
786 br.ret.sptk.many rp
787 END(ia64_switch_mode_phys)
789 /*
790 * Switch execution mode from physical to virtual
791 *
792 * Inputs:
793 * r16 = new psr to establish
794 * r19 = new bspstore to establish
795 * r20 = new sp to establish
796 *
797 * Note: RSE must already be in enforced lazy mode
798 */
799 GLOBAL_ENTRY(ia64_switch_mode_virt)
800 {
801 alloc r2=ar.pfs,0,0,0,0
802 rsm psr.i | psr.ic // disable interrupts and interrupt collection
803 mov r15=ip
804 }
805 ;;
806 {
807 flushrs // must be first insn in group
808 srlz.i
809 }
810 ;;
811 mov cr.ipsr=r16 // set new PSR
812 add r3=1f-ia64_switch_mode_virt,r15
814 mov r14=rp // get return address into a general register
815 ;;
817 // going to virtual
818 // - for code addresses, set upper bits of addr to KERNEL_START
819 // - for stack addresses, copy from input argument
820 movl r18=KERNEL_START
821 dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
822 dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
823 mov sp=r20
824 ;;
825 or r3=r3,r18
826 or r14=r14,r18
827 ;;
829 mov r18=ar.rnat // save ar.rnat
830 mov ar.bspstore=r19 // this steps on ar.rnat
831 mov cr.iip=r3
832 mov cr.ifs=r0
833 ;;
834 mov ar.rnat=r18 // restore ar.rnat
835 rfi // must be last insn in group
836 ;;
837 1: mov rp=r14
838 br.ret.sptk.many rp
839 END(ia64_switch_mode_virt)
841 GLOBAL_ENTRY(ia64_delay_loop)
842 .prologue
843 { nop 0 // work around GAS unwind info generation bug...
844 .save ar.lc,r2
845 mov r2=ar.lc
846 .body
847 ;;
848 mov ar.lc=r32
849 }
850 ;;
851 // force loop to be 32-byte aligned (GAS bug means we cannot use .align
852 // inside function body without corrupting unwind info).
853 { nop 0 }
854 1: br.cloop.sptk.few 1b
855 ;;
856 mov ar.lc=r2
857 br.ret.sptk.many rp
858 END(ia64_delay_loop)
860 /*
861 * Return a CPU-local timestamp in nano-seconds. This timestamp is
862 * NOT synchronized across CPUs its return value must never be
863 * compared against the values returned on another CPU. The usage in
864 * kernel/sched.c ensures that.
865 *
866 * The return-value of sched_clock() is NOT supposed to wrap-around.
867 * If it did, it would cause some scheduling hiccups (at the worst).
868 * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
869 * that would happen only once every 5+ years.
870 *
871 * The code below basically calculates:
872 *
873 * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
874 *
875 * except that the multiplication and the shift are done with 128-bit
876 * intermediate precision so that we can produce a full 64-bit result.
877 */
878 GLOBAL_ENTRY(sched_clock)
879 #ifdef XEN
880 movl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET
881 #else
882 addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
883 #endif
884 mov.m r9=ar.itc // fetch cycle-counter (35 cyc)
885 ;;
886 ldf8 f8=[r8]
887 ;;
888 setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8...
889 ;;
890 xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
891 xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
892 ;;
893 getf.sig r8=f10 // (5 cyc)
894 getf.sig r9=f11
895 ;;
896 shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
897 br.ret.sptk.many rp
898 END(sched_clock)
900 GLOBAL_ENTRY(start_kernel_thread)
901 .prologue
902 .save rp, r0 // this is the end of the call-chain
903 .body
904 alloc r2 = ar.pfs, 0, 0, 2, 0
905 mov out0 = r9
906 mov out1 = r11;;
907 br.call.sptk.many rp = kernel_thread_helper;;
908 mov out0 = r8
909 br.call.sptk.many rp = sys_exit;;
910 1: br.sptk.few 1b // not reached
911 END(start_kernel_thread)
913 #ifdef CONFIG_IA64_BRL_EMU
915 /*
916 * Assembly routines used by brl_emu.c to set preserved register state.
917 */
919 #define SET_REG(reg) \
920 GLOBAL_ENTRY(ia64_set_##reg); \
921 alloc r16=ar.pfs,1,0,0,0; \
922 mov reg=r32; \
923 ;; \
924 br.ret.sptk.many rp; \
925 END(ia64_set_##reg)
927 SET_REG(b1);
928 SET_REG(b2);
929 SET_REG(b3);
930 SET_REG(b4);
931 SET_REG(b5);
933 #endif /* CONFIG_IA64_BRL_EMU */
935 #ifdef CONFIG_SMP
936 /*
937 * This routine handles spinlock contention. It uses a non-standard calling
938 * convention to avoid converting leaf routines into interior routines. Because
939 * of this special convention, there are several restrictions:
940 *
941 * - do not use gp relative variables, this code is called from the kernel
942 * and from modules, r1 is undefined.
943 * - do not use stacked registers, the caller owns them.
944 * - do not use the scratch stack space, the caller owns it.
945 * - do not use any registers other than the ones listed below
946 *
947 * Inputs:
948 * ar.pfs - saved CFM of caller
949 * ar.ccv - 0 (and available for use)
950 * r27 - flags from spin_lock_irqsave or 0. Must be preserved.
951 * r28 - available for use.
952 * r29 - available for use.
953 * r30 - available for use.
954 * r31 - address of lock, available for use.
955 * b6 - return address
956 * p14 - available for use.
957 * p15 - used to track flag status.
958 *
959 * If you patch this code to use more registers, do not forget to update
960 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
961 */
963 #if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
965 GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
966 .prologue
967 .save ar.pfs, r0 // this code effectively has a zero frame size
968 .save rp, r28
969 .body
970 nop 0
971 tbit.nz p15,p0=r27,IA64_PSR_I_BIT
972 .restore sp // pop existing prologue after next insn
973 mov b6 = r28
974 .prologue
975 .save ar.pfs, r0
976 .altrp b6
977 .body
978 ;;
979 (p15) ssm psr.i // reenable interrupts if they were on
980 // DavidM says that srlz.d is slow and is not required in this case
981 .wait:
982 // exponential backoff, kdb, lockmeter etc. go in here
983 hint @pause
984 ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
985 nop 0
986 ;;
987 cmp4.ne p14,p0=r30,r0
988 (p14) br.cond.sptk.few .wait
989 (p15) rsm psr.i // disable interrupts if we reenabled them
990 br.cond.sptk.few b6 // lock is now free, try to acquire
991 .global ia64_spinlock_contention_pre3_4_end // for kernprof
992 ia64_spinlock_contention_pre3_4_end:
993 END(ia64_spinlock_contention_pre3_4)
995 #else
997 GLOBAL_ENTRY(ia64_spinlock_contention)
998 .prologue
999 .altrp b6
1000 .body
1001 tbit.nz p15,p0=r27,IA64_PSR_I_BIT
1002 ;;
1003 .wait:
1004 (p15) ssm psr.i // reenable interrupts if they were on
1005 // DavidM says that srlz.d is slow and is not required in this case
1006 .wait2:
1007 // exponential backoff, kdb, lockmeter etc. go in here
1008 hint @pause
1009 ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
1010 ;;
1011 cmp4.ne p14,p0=r30,r0
1012 mov r30 = 1
1013 (p14) br.cond.sptk.few .wait2
1014 (p15) rsm psr.i // disable interrupts if we reenabled them
1015 ;;
1016 cmpxchg4.acq r30=[r31], r30, ar.ccv
1017 ;;
1018 cmp4.ne p14,p0=r0,r30
1019 (p14) br.cond.sptk.few .wait
1021 br.ret.sptk.many b6 // lock is now taken
1022 END(ia64_spinlock_contention)
1024 #endif
1026 #endif /* CONFIG_SMP */