ia64/xen-unstable

changeset 5190:ef94e64d3896

bitkeeper revision 1.1581 (42983526-uYH3-ev0arYC-GCKopWHg)

This patch should make x86-64 XenLinux more stable. Please apply.

# Cleanups and improving stability by avoiding complex fixup at
critical section.
# Also fix error_entry with error set.
# Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
# Signed-off-by: Li B Xin <li.b.xin@intel.com>

I think the other was dropped (i.e. not applied in BK):
pfn_pte_ma.patch. Please apply it as well.
author kaf24@firebug.cl.cam.ac.uk
date Sat May 28 09:08:54 2005 +0000 (2005-05-28)
parents 7d0a21c7ba33
children b9f77360e9fc
files linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
line diff
     1.1 --- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S	Sat May 28 09:02:28 2005 +0000
     1.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S	Sat May 28 09:08:54 2005 +0000
     1.3 @@ -511,7 +511,15 @@ retint_check:
     1.4  	movl threadinfo_flags(%rcx),%edx
     1.5  	andl %edi,%edx
     1.6  	jnz  retint_careful
     1.7 -retint_restore_args:				
     1.8 +retint_restore_args:
     1.9 +        movb EVENT_MASK-REST_SKIP(%rsp), %al
    1.10 +        notb %al			# %al == ~saved_mask
    1.11 +        XEN_LOCK_VCPU_INFO_SMP(%rsi)
    1.12 +        andb evtchn_upcall_mask(%rsi),%al
    1.13 +	andb $1,%al			# %al == mask & ~saved_mask
    1.14 +	jnz restore_all_enable_events	# != 0 => reenable event delivery      
    1.15 +        XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
    1.16 +		
    1.17  	RESTORE_ARGS 0,8,0						
    1.18  	testb $3,8(%rsp)                # check CS
    1.19  	jnz  user_mode
    1.20 @@ -627,7 +635,7 @@ ENTRY(spurious_interrupt)
    1.21  	.macro errorentry sym
    1.22          movq (%rsp),%rcx
    1.23          movq 8(%rsp),%r11
    1.24 -        addq $0x18,%rsp /* rsp points to the error code */
    1.25 +        addq $0x10,%rsp /* rsp points to the error code */
    1.26  	pushq %rax
    1.27  	leaq  \sym(%rip),%rax
    1.28  	jmp error_entry
    1.29 @@ -712,27 +720,19 @@ error_call_handler:
    1.30          XEN_SAVE_UPCALL_MASK(%r11,%cl,EVENT_MASK)
    1.31  0:              
    1.32  	call *%rax
    1.33 -error_check_event:
    1.34 -        movb EVENT_MASK(%rsp), %al
    1.35 -        notb %al			# %al == ~saved_mask
    1.36 -        XEN_LOCK_VCPU_INFO_SMP(%rsi)
    1.37 -        andb evtchn_upcall_mask(%rsi),%al
    1.38 -	andb $1,%al			# %al == mask & ~saved_mask
    1.39 -	jnz restore_all_enable_events	# != 0 => reenable event delivery      
    1.40 -        XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
    1.41  error_exit:		
    1.42  	RESTORE_REST
    1.43  /*	cli */
    1.44 +	XEN_GET_VCPU_INFO(%rsi)        
    1.45 +	XEN_BLOCK_EVENTS(%rsi)		
    1.46  	GET_THREAD_INFO(%rcx)	
    1.47 -	testb $3,CS-REST_SKIP(%rsp)
    1.48 +	testb $3,CS-ARGOFFSET(%rsp)
    1.49  	jz retint_kernel
    1.50  	movl  threadinfo_flags(%rcx),%edx
    1.51 -	movl  $_TIF_WORK_MASK,%edi
    1.52 +	movl  $_TIF_WORK_MASK,%edi	
    1.53  	andl  %edi,%edx
    1.54 -	jnz  retint_careful
    1.55 -	RESTORE_ARGS 0,8,0						
    1.56 -        SWITCH_TO_USER 0
    1.57 -	CFI_ENDPROC
    1.58 +	jnz   retint_careful
    1.59 +	jmp   retint_restore_args
    1.60  
    1.61  error_kernelspace:
    1.62           /*
    1.63 @@ -777,132 +777,52 @@ ENTRY(hypervisor_callback)
    1.64  # So, on entry to the handler we detect whether we interrupted an
    1.65  # existing activation in its critical region -- if so, we pop the current
    1.66  # activation and restart the handler using the previous one.
    1.67 -
    1.68  ENTRY(do_hypervisor_callback)   # do_hyperviosr_callback(struct *pt_regs)
    1.69  # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
    1.70  # see the correct pointer to the pt_regs
    1.71          addq $8, %rsp            # we don't return, adjust the stack frame
    1.72 -        movq RIP(%rsp),%rax
    1.73 -	cmpq $scrit,%rax
    1.74 -	jb   11f
    1.75 -	cmpq $ecrit,%rax
    1.76 -	jb   critical_region_fixup
    1.77  11:	movb $0, EVENT_MASK(%rsp)         
    1.78  	call evtchn_do_upcall
    1.79 -        jmp  error_check_event
    1.80 +        jmp  error_exit
    1.81  
    1.82          ALIGN
    1.83  restore_all_enable_events:  
    1.84  	XEN_UNBLOCK_EVENTS(%rsi)        # %rsi is already set up...
    1.85 +
    1.86  scrit:	/**** START OF CRITICAL REGION ****/
    1.87  	XEN_TEST_PENDING(%rsi)
    1.88  	jnz  14f			# process more events if necessary...
    1.89  	XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
    1.90 -        RESTORE_REST
    1.91          RESTORE_ARGS 0,8,0
    1.92          testb $3,8(%rsp)                # check CS
    1.93          jnz  crit_user_mode
    1.94          orb   $3,1*8(%rsp)
    1.95          iretq
    1.96  crit_user_mode:
    1.97 -        SWITCH_TO_USER 0 
    1.98 +        SWITCH_TO_USER 0
    1.99          
   1.100  14:	XEN_LOCKED_BLOCK_EVENTS(%rsi)
   1.101  	XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
   1.102 +	SAVE_REST
   1.103          movq %rsp,%rdi                  # set the argument again
   1.104  	jmp  11b
   1.105  ecrit:  /**** END OF CRITICAL REGION ****/
   1.106 -# [How we do the fixup]. We want to merge the current stack frame with the
   1.107 -# just-interrupted frame. How we do this depends on where in the critical
   1.108 -# region the interrupted handler was executing, and so how many saved
   1.109 -# registers are in each frame. We do this quickly using the lookup table
   1.110 -# 'critical_fixup_table'. For each byte offset in the critical region, it
   1.111 -# provides the number of bytes which have already been popped from the
   1.112 -# interrupted stack frame. 
   1.113 -critical_region_fixup:
   1.114 -     	subq $scrit,%rax
   1.115 -	shlq $1,%rax
   1.116 -	addq $critical_fixup_table,%rax
   1.117 -	movzwq (%rax),%rcx
   1.118 -	xorq  %rax,%rax
   1.119 -	movb  %ch,%al
   1.120 -	movb  $0,%ch
   1.121 -#ifdef CONFIG_SMP
   1.122 -	cmpb $0xff,%al
   1.123 -	jne  15f
   1.124 -	add  $1,%al
   1.125 -	GET_THREAD_INFO(%rbp)
   1.126 -	XEN_UNLOCK_VCPU_INFO_SMP(%r11)
   1.127 -15:
   1.128 -#endif
   1.129 -    	movq  %rsp,%rsi
   1.130 -	movq  %rsi,%rdi
   1.131 -	addq  $0xa8,%rax
   1.132 -	addq  %rax,%rdi
   1.133 -	addq  %rcx,%rsi
   1.134 -	shrq  $3,%rcx			# convert words to bytes
   1.135 -	je    17f			# skip loop if nothing to copy
   1.136 -16:	subq  $8,%rsi			# pre-decrementing copy loop
   1.137 -	subq  $8,%rdi
   1.138 -	movq  (%rsi),%rax
   1.139 -	movq  %rax,(%rdi)
   1.140 -	loop  16b
   1.141 -17:	movq  %rdi,%rsp			# final %edi is top of merged stack
   1.142 -	jmp   11b
   1.143 -
   1.144 -critical_fixup_table:
   1.145 -        .byte 0x00,0x00,0x00,0x00                 # testb  $0xff,0x0(%rsi)
   1.146 -        .byte 0x00,0x00,0x00,0x00,0x00,0x00       # jne    <crit_user_mode+0x42>
   1.147 -        .byte 0x00,0x00,0x00,0x00                 # mov    (%rsp),%r15
   1.148 -        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x8(%rsp),%r14
   1.149 -        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x10(%rsp),%r13
   1.150 -        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x18(%rsp),%r12
   1.151 -        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x20(%rsp),%rbp
   1.152 -        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x28(%rsp),%rbx
   1.153 -        .byte 0x00,0x00,0x00,0x00                 # add    $0x30,%rsp
   1.154 -        .byte 0x30,0x30,0x30,0x30                 # mov    (%rsp),%r11
   1.155 -        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x8(%rsp),%r10
   1.156 -        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x10(%rsp),%r9
   1.157 -        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x18(%rsp),%r8
   1.158 -        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x20(%rsp),%rax
   1.159 -        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x28(%rsp),%rcx
   1.160 -        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x30(%rsp),%rdx
   1.161 -        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x38(%rsp),%rsi
   1.162 -        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x40(%rsp),%rdi
   1.163 -        .byte 0x30,0x30,0x30,0x30                 # add    $0x50,%rsp
   1.164 -        .byte 0x80,0x80,0x80,0x80,0x80            # testb  $0x3,0x8(%rsp)
   1.165 -        .byte 0x80,0x80                           # jne    ffffffff8010dc25 <crit_user_mode>
   1.166 -        .byte 0x80,0x80,0x80,0x80                 # orb    $0x3,0x8(%rsp)
   1.167 -        .byte 0x80,0x80                           # iretq
   1.168 -                                                  # <crit_user_mode>:
   1.169 -        .byte 0x80,0x80,0x80,0x80,0x80,0x80,0x80  # movq   $0x0,%gs:0x60
   1.170 -        .byte 0x80,0x80,0x80,0x80,0x80
   1.171 -        .byte 0x80,0x80,0x80,0x80                 # sub    $0x20,%rsp
   1.172 -        .byte 0x60,0x60,0x60,0x60                 # mov    %rax,(%rsp)
   1.173 -        .byte 0x60,0x60,0x60,0x60,0x60            # mov    %r11,0x8(%rsp)
   1.174 -        .byte 0x60,0x60,0x60,0x60,0x60            # mov    %rcx,0x10(%rsp)
   1.175 -        .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60  # movq   $0x0,0x18(%rsp)
   1.176 -        .byte 0x60,0x60
   1.177 -        .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60  # movq   $0x33,0x28(%rsp)
   1.178 -        .byte 0x60,0x60
   1.179 -        .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60  # movq   $0x2b,0x40(%rsp)
   1.180 -        .byte 0x60,0x60        
   1.181 -        .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60  # mov    $0x17,%rax
   1.182 -        .byte 0x60,0x60                           # syscall
   1.183 -        .byte 0x60,0x60,0x60,0x60,0x60            # movb   $0x1,0x1(%rsi)
   1.184 -        .byte 0x60,0x60,0x60                      # mov    %rsp,%rdi
   1.185 -        .byte 0x60,0x60,0x60,0x60,0x60            # jmpq   <do_hypervisor_callback+0x20>
   1.186 +# At this point, unlike on x86-32, we don't do the fixup to simplify the 
   1.187 +# code and the stack frame is more complex on x86-64.
   1.188 +# When the kernel is interrupted in the critical section, the kernel 
   1.189 +# will do IRET in that case, and everything will be restored at that point, 
   1.190 +# i.e. it just resumes from the next instruction interrupted with the same context. 
   1.191 +	
   1.192  # Hypervisor uses this for application faults while it executes.
   1.193  ENTRY(failsafe_callback)
   1.194 -        hlt         
   1.195 -#if 0        
   1.196 +	addq $0x10,%rsp /* skip rcx and r11 */	
   1.197  1:	movl (%rsp),%ds
   1.198  2:	movl 8(%rsp),%es
   1.199  3:	movl 16(%rsp),%fs
   1.200  4:	movl 24(%rsp),%gs
   1.201 -        subq $14,%rsp
   1.202 +	addq $0x20,%rsp /* skip the above selectors */		
   1.203  	SAVE_ALL
   1.204 -	jmp  ret_from_exception
   1.205 +	jmp  error_exit
   1.206  .section .fixup,"ax";	\
   1.207  6:	movq $0,(%rsp);	\
   1.208  	jmp 1b;		\
   1.209 @@ -914,13 +834,14 @@ 9:	movq $0,(%rsp);	\
   1.210  	jmp 4b;		\
   1.211  .previous;		\
   1.212  .section __ex_table,"a";\
   1.213 -	.align 8;	\
   1.214 -	.long 1b,6b;	\
   1.215 -	.long 2b,7b;	\
   1.216 -	.long 3b,8b;	\
   1.217 -	.long 4b,9b;	\
   1.218 +	.align 16;	\
   1.219 +	.quad 1b,6b;	\
   1.220 +	.quad 2b,7b;	\
   1.221 +	.quad 3b,8b;	\
   1.222 +	.quad 4b,9b;	\
   1.223  .previous
   1.224 -       
   1.225 + 
   1.226 +#if 0	      
   1.227          .section __ex_table,"a"
   1.228          .align 8
   1.229          .quad gs_change,bad_gs
   1.230 @@ -933,7 +854,8 @@ bad_gs:
   1.231          movl %eax,%gs
   1.232          jmp  2b
   1.233          .previous       
   1.234 -#endif	
   1.235 +#endif
   1.236 +	
   1.237  /*
   1.238   * Create a kernel thread.
   1.239   *
     2.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h	Sat May 28 09:02:28 2005 +0000
     2.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h	Sat May 28 09:08:54 2005 +0000
     2.3 @@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[PAG
     2.4  	printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), pmd_val(e))
     2.5  #define pud_ERROR(e) \
     2.6  	printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), pud_val(e))
     2.7 -
     2.8  #define pgd_ERROR(e) \
     2.9          printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), pgd_val(e))
    2.10  
    2.11 @@ -299,7 +298,7 @@ static inline pte_t pfn_pte(unsigned lon
    2.12  	return pte;
    2.13  }
    2.14  
    2.15 -#define pfn_pte_ma(pfn, prot)	__pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
    2.16 +#define pfn_pte_ma(pfn, prot)	__pte_ma((((pfn) << PAGE_SHIFT) | pgprot_val(prot)) & __supported_pte_mask)
    2.17  /*
    2.18   * The following only work if pte_present() is true.
    2.19   * Undefined behaviour if not..